Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

50 lignes
3.8KB

  1. import pandas as pd
  2. import sys, argparse, os
  3. parser = argparse.ArgumentParser(description="This script is to get information from hap")
  4. parser.add_argument('-hap', '--happy', type=str, help='hap.py table', required=True)
  5. parser.add_argument('-name', '--name', type=str, help='sample name', required=True)
  6. args = parser.parse_args()
  7. hap_file = args.happy
  8. name = args.name
  9. dat = pd.read_table(hap_file)
  10. dat['QUERY.TP'] = dat['QUERY.TOTAL'].astype(int) - dat['QUERY.UNK'].astype(int) - dat['QUERY.FP'].astype(int)
  11. dat['QUERY'] = dat['QUERY.TOTAL'].astype(int) - dat['QUERY.UNK'].astype(int)
  12. indel = dat[['INDEL' in s for s in dat['Type']]]
  13. snv = dat[['SNP' in s for s in dat['Type']]]
  14. indel.reset_index(drop=True, inplace=True)
  15. snv.reset_index(drop=True, inplace=True)
  16. benchmark = pd.concat([snv, indel], axis=1)
  17. benchmark = benchmark[[ 'QUERY.TOTAL', 'QUERY','QUERY.TP','QUERY.FP','TRUTH.FN','METRIC.Precision', 'METRIC.Recall','METRIC.F1_Score']]
  18. benchmark.columns = ['SNV number','INDEL number','SNV query','INDEL query','SNV TP','INDEL TP','SNV FP','INDEL FP','SNV FN','INDEL FN','SNV precision','INDEL precision','SNV recall','INDEL recall','SNV F1','INDEL F1']
  19. benchmark = benchmark[['SNV number','INDEL number','SNV query','INDEL query','SNV TP','INDEL TP','SNV FP','INDEL FP','SNV FN','INDEL FN','SNV precision','INDEL precision','SNV recall','INDEL recall','SNV F1','INDEL F1']]
  20. benchmark['SNV precision'] = benchmark['SNV precision'].astype(float)
  21. benchmark['INDEL precision'] = benchmark['INDEL precision'].astype(float)
  22. benchmark['SNV recall'] = benchmark['SNV recall'].astype(float)
  23. benchmark['INDEL recall'] = benchmark['INDEL recall'].astype(float)
  24. benchmark['SNV F1'] = benchmark['SNV F1'].astype(float)
  25. benchmark['INDEL F1'] = benchmark['INDEL F1'].astype(float)
  26. benchmark = benchmark.round(2)
  27. name_array = name.split("_")
  28. LCL5_1 = name_array[0] + "_" + name_array[1] + "_" + name_array[2] + "_" + name_array[3] + "_" + name_array[4] + "_" + "LCL5_1" + "_" + name_array[5]
  29. LCL5_2 = name_array[0] + "_" + name_array[1] + "_" + name_array[2] + "_" + name_array[3] + "_" + name_array[4] + "_" + "LCL5_2" + "_" + name_array[5]
  30. LCL5_3 = name_array[0] + "_" + name_array[1] + "_" + name_array[2] + "_" + name_array[3] + "_" + name_array[4] + "_" + "LCL5_3" + "_" + name_array[5]
  31. LCL6_1 = name_array[0] + "_" + name_array[1] + "_" + name_array[2] + "_" + name_array[3] + "_" + name_array[4] + "_" + "LCL6_1" + "_" + name_array[5]
  32. LCL6_2 = name_array[0] + "_" + name_array[1] + "_" + name_array[2] + "_" + name_array[3] + "_" + name_array[4] + "_" + "LCL6_2" + "_" + name_array[5]
  33. LCL6_3 = name_array[0] + "_" + name_array[1] + "_" + name_array[2] + "_" + name_array[3] + "_" + name_array[4] + "_" + "LCL6_3" + "_" + name_array[5]
  34. LCL7_1 = name_array[0] + "_" + name_array[1] + "_" + name_array[2] + "_" + name_array[3] + "_" + name_array[4] + "_" + "LCL7_1" + "_" + name_array[5]
  35. LCL7_2 = name_array[0] + "_" + name_array[1] + "_" + name_array[2] + "_" + name_array[3] + "_" + name_array[4] + "_" + "LCL7_2" + "_" + name_array[5]
  36. LCL7_3 = name_array[0] + "_" + name_array[1] + "_" + name_array[2] + "_" + name_array[3] + "_" + name_array[4] + "_" + "LCL7_3" + "_" + name_array[5]
  37. LCL8_1 = name_array[0] + "_" + name_array[1] + "_" + name_array[2] + "_" + name_array[3] + "_" + name_array[4] + "_" + "LCL8_1" + "_" + name_array[5]
  38. LCL8_2 = name_array[0] + "_" + name_array[1] + "_" + name_array[2] + "_" + name_array[3] + "_" + name_array[4] + "_" + "LCL8_2" + "_" + name_array[5]
  39. LCL8_3 = name_array[0] + "_" + name_array[1] + "_" + name_array[2] + "_" + name_array[3] + "_" + name_array[4] + "_" + "LCL8_3" + "_" + name_array[5]
  40. benchmark.insert(loc=0, column='Sample', value=[LCL5_1,LCL5_2,LCL5_3,LCL6_1,LCL6_2,LCL6_3,LCL7_1,LCL7_2,LCL7_3,LCL8_1,LCL8_2,LCL8_3])
  41. benchmark.to_csv('variants.calling.qc.txt',sep="\t",index=0)