преди 2 години · 58e855226b
--- a/defaults
+++ b/defaults
@@ -0,0 +1,8 @@
 {
    "hg38_CDS":"oss://hcc1395/reference_genome/TMB_dir/genecode.v36.CDS_merged.bed",
    "regions":"",
    "docker_tmb":"registry.cn-shanghai.aliyuncs.com/hcc1395_aliyun/tmb:1.1.0",
    "cluster_config":"OnDemand ecs.g6.large img-ubuntu-vpc",
    "disk_size":"50",
    "AF_filter":true
 }
--- a/inputs
+++ b/inputs
@@ -0,0 +1,12 @@
 {
    "{{ project_name }}.sample": "{{ sample }}",
    "{{ project_name }}.hg38_CDS": "{{ hg38_CDS }}",
    "{{ project_name }}.regions": "{{ regions }}",
    "{{ project_name }}.snpindel_txt": "{{ snpindel_txt }}",
    "{{ project_name }}.docker_tmb": "{{docker_tmb}}",
    "{{ project_name }}.cluster_config": "{{ cluster_config }}",
    "{{ project_name }}.disk_size": "{{ disk_size }}",
    "{{ project_name }}.AF_filter": {{ AF_filter | tojson }}


 }
--- a/tmb.py
+++ b/tmb.py
@@ -0,0 +1,37 @@
 import sys
 import pandas as pd

 snpindel_txt=sys.argv[1]
 size=sys.argv[2]
 sample=sys.argv[3]
 #AF_filter:yes/no(yes:Filter out AF value < 0.05)
 AF_filter=sys.argv[4]

 size = int(size)
 print('Target region: %.2f Mb' % (size/1000000))
 # Read annotated txt
 df = pd.read_csv(snpindel_txt, sep="\t")
 if 'Otherinfo14' in df.columns:
  df = df.rename(columns={'AF':'AF_all', 'Otherinfo4':'#CHROM', 'Otherinfo5':'POS', 'Otherinfo6':'ID', 'Otherinfo7':'REF', 'Otherinfo8':'ALT', 'Otherinfo9':'QUAL', 'Otherinfo10':'FILTER', 'Otherinfo11':'INFO', 'Otherinfo12':'FORMAT', 'Otherinfo13':'Normal', 'Otherinfo14':'Tumor'})
 else:
  df = df.rename(columns={'AF':'AF_all', 'Otherinfo4':'#CHROM', 'Otherinfo5':'POS', 'Otherinfo6':'ID', 'Otherinfo7':'REF', 'Otherinfo8':'ALT', 'Otherinfo9':'QUAL', 'Otherinfo10':'FILTER', 'Otherinfo11':'INFO', 'Otherinfo12':'FORMAT', 'Otherinfo13':'Tumor'})

 df = df[df.columns.drop(list(df.filter(regex='Otherinfo')))]

 # Extract AF value

 df.insert(df.shape[1], 'AF', df.Tumor.str.split(':', expand = True)[2])
 df['AF'] = df['AF'].apply(lambda x:float(x))
 if str(AF_filter) == 'yes':
    #Filter out AF value < 0.05
    df = df[df['AF'] >= 0.05]
 df.to_csv('%s.snp_indel.txt' % sample, sep='\t', index=False)


 # TMB is measured by counting the total number of somatic, non-synonymous exonic variants per the total number of genes surveyed by the product.
 df_include = df[(df['Func.refGene'] == 'exonic') & (df['ExonicFunc.refGene'] != 'synonymous SNV') & (df['FILTER'] == 'PASS')]
 total_mb = '%.2f' % (size/1000000)
 num_muts = df_include.shape[0]
 tmb = '%.2f' % (num_muts/(size/1000000))
 res = pd.DataFrame({'Total_Mb': [total_mb], 'Number_of_Muts': [num_muts], 'TMB': [tmb]})
 res.to_csv('%s.TMB.txt' % sample, sep='\t', index=False)
--- a/workflow.wdl
+++ b/workflow.wdl
@@ -0,0 +1,25 @@
 import "./tasks/tmb.wdl" as tmb

 workflow {{ project_name }}{
    String sample
    File hg38_CDS
    File? regions
    File snpindel_txt
    String docker_tmb
    String cluster_config
    String disk_size
    Boolean AF_filter

    call tmb.TMB as TMB{
        input:
            sample=sample,
            hg38_CDS=hg38_CDS,
            regions=regions,
            snpindel_txt=snpindel_txt,
            docker=docker_tmb,
            cluster_config=cluster_config,
            disk_size=disk_size,
            AF_filter=AF_filter
    }

 }