You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

61 line
1.4KB

  1. from __future__ import division
  2. import pandas as pd
  3. import sys, argparse, os
  4. # input arguments
  5. parser = argparse.ArgumentParser(description="this script is to calculate reproducibility between Quartet_D5 and Quartet_D6s")
  6. parser.add_argument('-sister', '--sister', type=str, help='sister.txt', required=True)
  7. parser.add_argument('-project', '--project', type=str, help='project name', required=True)
  8. args = parser.parse_args()
  9. sister_file = args.sister
  10. project_name = args.project
  11. # output file
  12. output_name = project_name + '.sister.reproducibility.txt'
  13. output_file = open(output_name,'w')
  14. # input files
  15. sister_dat = pd.read_table(sister_file)
  16. sister_same = 0
  17. sister_diff = 0
  18. for row in sister_dat.itertuples():
  19. # sister
  20. if row[5] == row[6]:
  21. if row[5] == './.':
  22. mendelian = 'noInfo'
  23. sister_count = "no"
  24. elif row[5] == '0/0':
  25. mendelian = 'Ref'
  26. sister_count = "no"
  27. else:
  28. mendelian = '1'
  29. sister_count = "yes_same"
  30. else:
  31. mendelian = '0'
  32. if (row[5] == './.' or row[5] == '0/0') and (row[6] == './.' or row[6] == '0/0'):
  33. sister_count = "no"
  34. else:
  35. sister_count = "yes_diff"
  36. if sister_count == 'yes_same':
  37. sister_same += 1
  38. elif sister_count == 'yes_diff':
  39. sister_diff += 1
  40. else:
  41. pass
  42. sister = sister_same/(sister_same + sister_diff)
  43. outcolumn = 'Project\tReproducibility_D5_D6\n'
  44. outResult = project_name + '\t' + str(sister) + '\n'
  45. output_file.write(outcolumn)
  46. output_file.write(outResult)