4 年之前 · 607454ab2f
--- a/defaults
+++ b/defaults
  "sentieon_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v202010.02",
  "varscan_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/varscan2:v2.4.3",
  "annovar_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/annovar:v2019.10",
  "vep_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/vep:v104.0",
  "cache": "oss://pgx-reference-data/ensembl_vep/",
  "vep_path": "/opt/vep/ensembl-vep",
  "species": "homo_sapiens_merged",
  "vcf2maf_path": "/opt/mskcc-vcf2maf",
  "tnseq_pon": "",
  "tnscope_pon": "",
  "disk_size": "200",
  "tnscope": false,
  "varscan": false,
  "annovar": false,
  "vep": false,
  "only_pass": true
 }
--- a/inputs
+++ b/inputs
  "{{ project_name }}.annovar_database": "{{ annovar_database }}",
  "{{ project_name }}.tnseq_pon": "{{ tnseq_pon }}",
  "{{ project_name }}.tnscope_pon": "{{ tnscope_pon }}",
  "{{ project_name }}.vep_path": "{{ vep_path }}",
  "{{ project_name }}.cache": "{{ cache }}",
  "{{ project_name }}.species": "{{ species }}",
  "{{ project_name }}.vcf2maf_path": "{{ vcf2maf_path }}",
  "{{ project_name }}.vep_docker": "{{ vep_docker }}",
  "{{ project_name }}.disk_size": "{{ disk_size }}",
  "{{ project_name }}.cluster_config": "{{ cluster_config }}",
  "{{ project_name }}.haplotyper": {{ haplotyper | tojson }},
  "{{ project_name }}.tnscope": {{ tnscope | tojson }},
  "{{ project_name }}.varscan": {{ varscan | tojson }},
  "{{ project_name }}.annovar": {{ annovar | tojson }},
  "{{ project_name }}.only_pass": {{ only_pass | tojson }}
  "{{ project_name }}.only_pass": {{ only_pass | tojson }},
  "{{ project_name }}.vep": {{ vep | tojson }}
 }
--- a/tasks/VEP.wdl
+++ b/tasks/VEP.wdl
 task vcf2maf {
  File vcf
  String sample_id
  String basename = basename(vcf,".vcf")
  String tumor_id
  String normal_id
  File ref_dir
  String fasta
  String vep_path
  File cache
  String hg
  String species
  Boolean only_pass
  String vcf2maf_path
  String docker
  String cluster_config
  String disk_size
  command <<<
    set -o pipefail
    set -e
    nt=$(nproc)
    if [ only_pass ]; then
      awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${vcf} > ${sample_id}.INPUT.vcf
    else
      cp ${vcf} ${sample_id}.INPUT.vcf
    fi
    # Define ncbi_build
    if [ hg == "hg19" ]; then
      ncbi_build="GRCh37"
    elif [ hg == "hg38" ]; then
      ncbi_build="GRCh38"
    fi
    # Extract the BND variants from VCF
    awk -F'\t' '{if(($1~"^#")||($8!~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.INPUT.vcf > ${sample_id}.INPUT.vcf2maf.vcf
    awk -F'\t' '{if(($1~"^#")||($8~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.INPUT.vcf > ${sample_id}.INPUT.VEP.vcf
    # vcf2maf
    perl ${vcf2maf_path}/vcf2maf.pl \
    --input-vcf ${sample_id}.INPUT.vcf2maf.vcf --output-maf ${basename}.maf \
    --tumor-id ${tumor_id} --normal-id ${normal_id} \
    --ref-fasta ${ref_dir}/${fasta} \
    --vep-path ${vep_path} \
    --vep-data ${cache} \
    --ncbi-build $ncbi_build \
    --species ${species} \
    --vep-fork $nt
    # vep: only annotate the BND
    perl ${vep_path}/vep \
    --input_file ${sample_id}.INPUT.VEP.vcf --output_file ${basename}.BND.VEP.txt \
    --fasta ${ref_dir}/${fasta} \
    --dir ${cache} \
    --assembly $ncbi_build \
    --species ${species} \
    --fork $nt \
    --no_progress --no_stats --buffer_size 5000 --sift b --ccds --uniprot --hgvs --symbol --numbers --domains --gene_phenotype --canonical --protein --biotype --uniprot --tsl --variant_class --shift_hgvs 1 --check_existing --total_length --allele_number --no_escape --xref_refseq --failed 1 --tab --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --format vcf --force_overwrite --offline --pubmed --regulatory
  >>>
  runtime {
    docker: docker
    cluster: cluster_config
    systemDisk: "cloud_ssd 40"
    dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  }
  output {
    File input_vcf = "${sample_id}.INPUT.vcf"
    File maf = "${basename}.${hg}.maf"
    File bnd_vep = "${basename}.BND.VEP.txt"
  }
 }
--- a/workflow.wdl
+++ b/workflow.wdl
 import "./tasks/processSomatic.wdl" as processSomatic
 import "./tasks/somaticFilter.wdl" as somaticFilter
 import "./tasks/ANNOVAR.wdl" as ANNOVAR
 import "./tasks/VEP.wdl" as VEP
 workflow {{ project_name }} {
  String sentieon_docker
  String varscan_docker
  String annovar_docker
  String vep_docker
  File ref_dir
  String fasta
  File? tnseq_pon
  File? tnscope_pon
  String vep_path
  File cache
  String species
  String vcf2maf_path
  String disk_size
  String cluster_config
  Boolean varscan
  Boolean annovar
  Boolean vep
  Boolean only_pass
  if (tumor_fastq_1!= "") {
        disk_size=disk_size
      }
    }
    if (vep) {
      call VEP.VEP as TNseq_VEP {
        input: 
        vcf=TNseq.TNseq_vcf,
        hg=hg,
        only_pass=only_pass,
        sample_id=sample_id,
        tumor_id=sample_id + "_tumor",
        normal_id=sample_id + "_normal",
        ref_dir=ref_dir,
        fasta=fasta,
        vep_path=vep_path,
        cache=cache,
        species=species,
        vcf2maf_path=vcf2maf_path,
        docker=vep_docker,
        cluster_config=cluster_config,
        disk_size=disk_size
      }
    }
  }
  if (tnscope) {
        disk_size=disk_size
      }
    }
    if (vep) {
      call VEP.VEP as TNscope_VEP {
        input: 
        vcf=TNscope.TNscope_vcf,
        hg=hg,
        only_pass=only_pass,
        sample_id=sample_id,
        tumor_id=sample_id + "_tumor",
        normal_id=sample_id + "_normal",
        ref_dir=ref_dir,
        fasta=fasta,
        vep_path=vep_path,
        cache=cache,
        species=species,
        vcf2maf_path=vcf2maf_path,
        docker=vep_docker,
        cluster_config=cluster_config,
        disk_size=disk_size
      }
    }
  }
  if (varscan) {
      sample=sample_id,
      varscan_snp=somatic.varscan_snp,
      varscan_indel=somatic.varscan_indel,   
      docker=varscan_docker,	
      docker=varscan_docker,
      disk_size=disk_size,
      cluster_config=cluster_config
    }
      varscan_indel_germline_hc=processSomatic.varscan_indel_germline_hc,
      varscan_indel_loh_hc=processSomatic.varscan_indel_loh_hc,
      varscan_indel=somatic.varscan_indel,   
      docker=varscan_docker,	
      docker=varscan_docker,
      disk_size=disk_size,
      cluster_config=cluster_config
    }
        disk_size=disk_size
      }
    }
    if (vep) {
      call VEP.VEP as VarScan_VEP {
        input: 
        vcf=VarScan.VarScan_vcf,
        hg=hg,
        only_pass=only_pass,
        sample_id=sample_id,
        tumor_id=sample_id + "_tumor",
        normal_id=sample_id + "_normal",
        ref_dir=ref_dir,
        fasta=fasta,
        vep_path=vep_path,
        cache=cache,
        species=species,
        vcf2maf_path=vcf2maf_path,
        docker=vep_docker,
        cluster_config=cluster_config,
        disk_size=disk_size
      }
    }
  }
 }