Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.

extract_multiqc.py 2.2KB

4 lat temu
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. import json
  2. import pandas as pd
  3. import sys, argparse, os
  4. parser = argparse.ArgumentParser(description="This script is to get information from multiqc")
  5. parser.add_argument('-fastqc_qualimap', '--fastqc_qualimap', type=str, help='multiqc_general_stats.txt', required=True)
  6. parser.add_argument('-fastqc', '--fastqc', type=str, help='multiqc_fastqc.txt', required=True)
  7. parser.add_argument('-fastqscreen', '--fastqscreen', type=str, help='multiqc_fastq_screen.txt', required=True)
  8. parser.add_argument('-hap', '--happy', type=str, help='multiqc_happy_data.json', required=True)
  9. args = parser.parse_args()
  10. # Rename input:
  11. fastqc_qualimap_file = args.fastqc_qualimap
  12. fastqc_file = args.fastqc
  13. fastqscreen_file = args.fastqscreen
  14. hap_file = args.happy
  15. # fastqc and qualimap
  16. dat = pd.read_table(fastqc_qualimap_file)
  17. fastqc = dat.loc[:, dat.columns.str.startswith('FastQC')]
  18. fastqc.insert(loc=0, column='Sample', value=dat['Sample'])
  19. fastqc_stat = fastqc.dropna()
  20. # qulimap
  21. qualimap = dat.loc[:, dat.columns.str.startswith('QualiMap')]
  22. qualimap.insert(loc=0, column='Sample', value=dat['Sample'])
  23. qualimap_stat = qualimap.dropna()
  24. # fastqc
  25. dat = pd.read_table(fastqc_file)
  26. fastqc_module = dat.loc[:, "per_base_sequence_quality":"kmer_content"]
  27. fastqc_module.insert(loc=0, column='Sample', value=dat['Sample'])
  28. fastqc_all = pd.merge(fastqc_stat,fastqc_module, how='outer', left_on=['Sample'], right_on = ['Sample'])
  29. # fastqscreen
  30. dat = pd.read_table(fastqscreen_file)
  31. fastqscreen = dat.loc[:, dat.columns.str.endswith('percentage')]
  32. dat['Sample'] = [i.replace('_screen','') for i in dat['Sample']]
  33. fastqscreen.insert(loc=0, column='Sample', value=dat['Sample'])
  34. # benchmark
  35. with open(hap_file) as hap_json:
  36. happy = json.load(hap_json)
  37. dat =pd.DataFrame.from_records(happy)
  38. dat = dat.loc[:, dat.columns.str.endswith('ALL')]
  39. dat_transposed = dat.T
  40. benchmark = dat_transposed.loc[:,['sample_id','METRIC.Precision','METRIC.Recall']]
  41. benchmark.columns = ['Sample','Precision','Recall']
  42. #output
  43. fastqc_all.to_csv('fastqc.final.result.txt',sep="\t",index=0)
  44. fastqscreen.to_csv('fastqscreen.final.result.txt',sep="\t",index=0)
  45. qualimap_stat.to_csv('qualimap.final.result.txt',sep="\t",index=0)
  46. benchmark.to_csv('benchmark.final.result.txt',sep="\t",index=0)