|
|
|
|
|
|
|
|
|
|
|
import sys |
|
|
|
|
|
import pandas as pd |
|
|
|
|
|
|
|
|
|
|
|
snpindel_txt=sys.argv[1] |
|
|
|
|
|
size=sys.argv[2] |
|
|
|
|
|
sample=sys.argv[3] |
|
|
|
|
|
#AF_filter:yes/no(yes:Filter out AF value < 0.05) |
|
|
|
|
|
AF_filter=sys.argv[4] |
|
|
|
|
|
|
|
|
|
|
|
size = int(size) |
|
|
|
|
|
print('Target region: %.2f Mb' % (size/1000000)) |
|
|
|
|
|
# Read annotated txt |
|
|
|
|
|
df = pd.read_csv(snpindel_txt, sep="\t") |
|
|
|
|
|
if 'Otherinfo14' in df.columns: |
|
|
|
|
|
df = df.rename(columns={'AF':'AF_all', 'Otherinfo4':'#CHROM', 'Otherinfo5':'POS', 'Otherinfo6':'ID', 'Otherinfo7':'REF', 'Otherinfo8':'ALT', 'Otherinfo9':'QUAL', 'Otherinfo10':'FILTER', 'Otherinfo11':'INFO', 'Otherinfo12':'FORMAT', 'Otherinfo13':'Normal', 'Otherinfo14':'Tumor'}) |
|
|
|
|
|
else: |
|
|
|
|
|
df = df.rename(columns={'AF':'AF_all', 'Otherinfo4':'#CHROM', 'Otherinfo5':'POS', 'Otherinfo6':'ID', 'Otherinfo7':'REF', 'Otherinfo8':'ALT', 'Otherinfo9':'QUAL', 'Otherinfo10':'FILTER', 'Otherinfo11':'INFO', 'Otherinfo12':'FORMAT', 'Otherinfo13':'Tumor'}) |
|
|
|
|
|
|
|
|
|
|
|
df = df[df.columns.drop(list(df.filter(regex='Otherinfo')))] |
|
|
|
|
|
|
|
|
|
|
|
# Extract AF value |
|
|
|
|
|
|
|
|
|
|
|
df.insert(df.shape[1], 'AF', df.Tumor.str.split(':', expand = True)[2]) |
|
|
|
|
|
df['AF'] = df['AF'].apply(lambda x:float(x)) |
|
|
|
|
|
if str(AF_filter) == 'yes': |
|
|
|
|
|
#Filter out AF value < 0.05 |
|
|
|
|
|
df = df[df['AF'] >= 0.05] |
|
|
|
|
|
df.to_csv('%s.snp_indel.txt' % sample, sep='\t', index=False) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# TMB is measured by counting the total number of somatic, non-synonymous exonic variants per the total number of genes surveyed by the product. |
|
|
|
|
|
df_include = df[(df['Func.refGene'] == 'exonic') & (df['ExonicFunc.refGene'] != 'synonymous SNV') & (df['FILTER'] == 'PASS')] |
|
|
|
|
|
total_mb = '%.2f' % (size/1000000) |
|
|
|
|
|
num_muts = df_include.shape[0] |
|
|
|
|
|
tmb = '%.2f' % (num_muts/(size/1000000)) |
|
|
|
|
|
res = pd.DataFrame({'Total_Mb': [total_mb], 'Number_of_Muts': [num_muts], 'TMB': [tmb]}) |
|
|
|
|
|
res.to_csv('%s.TMB.txt' % sample, sep='\t', index=False) |