您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

37 行
1.7KB

  1. import sys
  2. import pandas as pd
  3. snpindel_txt=sys.argv[1]
  4. size=sys.argv[2]
  5. sample=sys.argv[3]
  6. #AF_filter:yes/no(yes:Filter out AF value < 0.05)
  7. AF_filter=sys.argv[4]
  8. size = int(size)
  9. print('Target region: %.2f Mb' % (size/1000000))
  10. # Read annotated txt
  11. df = pd.read_csv(snpindel_txt, sep="\t")
  12. if 'Otherinfo14' in df.columns:
  13. df = df.rename(columns={'AF':'AF_all', 'Otherinfo4':'#CHROM', 'Otherinfo5':'POS', 'Otherinfo6':'ID', 'Otherinfo7':'REF', 'Otherinfo8':'ALT', 'Otherinfo9':'QUAL', 'Otherinfo10':'FILTER', 'Otherinfo11':'INFO', 'Otherinfo12':'FORMAT', 'Otherinfo13':'Normal', 'Otherinfo14':'Tumor'})
  14. else:
  15. df = df.rename(columns={'AF':'AF_all', 'Otherinfo4':'#CHROM', 'Otherinfo5':'POS', 'Otherinfo6':'ID', 'Otherinfo7':'REF', 'Otherinfo8':'ALT', 'Otherinfo9':'QUAL', 'Otherinfo10':'FILTER', 'Otherinfo11':'INFO', 'Otherinfo12':'FORMAT', 'Otherinfo13':'Tumor'})
  16. df = df[df.columns.drop(list(df.filter(regex='Otherinfo')))]
  17. # Extract AF value
  18. df.insert(df.shape[1], 'AF', df.Tumor.str.split(':', expand = True)[2])
  19. df['AF'] = df['AF'].apply(lambda x:float(x))
  20. if str(AF_filter) == 'yes':
  21. #Filter out AF value < 0.05
  22. df = df[df['AF'] >= 0.05]
  23. df.to_csv('%s.snp_indel.txt' % sample, sep='\t', index=False)
  24. # TMB is measured by counting the total number of somatic, non-synonymous exonic variants per the total number of genes surveyed by the product.
  25. df_include = df[(df['Func.refGene'] == 'exonic') & (df['ExonicFunc.refGene'] != 'synonymous SNV') & (df['FILTER'] == 'PASS')]
  26. total_mb = '%.2f' % (size/1000000)
  27. num_muts = df_include.shape[0]
  28. tmb = '%.2f' % (num_muts/(size/1000000))
  29. res = pd.DataFrame({'Total_Mb': [total_mb], 'Number_of_Muts': [num_muts], 'TMB': [tmb]})
  30. res.to_csv('%s.TMB.txt' % sample, sep='\t', index=False)