You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

50 lines
1.3KB

  1. from __future__ import division
  2. import pandas as pd
  3. import sys, argparse, os
  4. import fileinput
  5. import re
  6. # input arguments
  7. parser = argparse.ArgumentParser(description="this script is to extract mendelian concordance information")
  8. parser.add_argument('-LCL5', '--LCL5', type=str, help='LCL5 family info', required=True)
  9. parser.add_argument('-LCL6', '--LCL6', type=str, help='LCL6 family info', required=True)
  10. parser.add_argument('-family', '--family', type=str, help='family name', required=True)
  11. args = parser.parse_args()
  12. lcl5 = args.LCL5
  13. lcl6 = args.LCL6
  14. family = args.family
  15. # output file
  16. family_name = family + '.txt'
  17. family_file = open(family_name,'w')
  18. # input files
  19. lcl5_dat = pd.read_table(lcl5)
  20. lcl6_dat = pd.read_table(lcl6)
  21. merged_df = pd.merge(lcl5_dat, lcl6_dat, how='outer', left_on=['#CHROM','POS'], right_on = ['#CHROM','POS'])
  22. for row in merged_df.itertuples():
  23. if row.CHILD_x == row.CHILD_y:
  24. mendelian = '1'
  25. else:
  26. mendelian = '0'
  27. if pd.isnull(row.INFO_x) == True:
  28. mendelian = mendelian + ':.'
  29. else:
  30. mendelian = mendelian + ':' + row.INFO_x.split('=')[1]
  31. if pd.isnull(row.INFO_y) == True:
  32. mendelian = mendelian + ':.'
  33. else:
  34. mendelian = mendelian + ':' + row.INFO_y.split('=')[1]
  35. outline = row._1 + '\t' + str(row.POS) + '\t' + mendelian + '\n'
  36. family_file.write(outline)