"sentieon_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v202010.02", | "sentieon_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v202010.02", | ||||
"varscan_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/varscan2:v2.4.3", | "varscan_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/varscan2:v2.4.3", | ||||
"annovar_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/annovar:v2019.10", | "annovar_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/annovar:v2019.10", | ||||
"vep_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/vep:v104.0", | |||||
"cache": "oss://pgx-reference-data/ensembl_vep/", | |||||
"vep_path": "/opt/vep/ensembl-vep", | |||||
"species": "homo_sapiens_merged", | |||||
"vcf2maf_path": "/opt/mskcc-vcf2maf", | |||||
"tnseq_pon": "", | "tnseq_pon": "", | ||||
"tnscope_pon": "", | "tnscope_pon": "", | ||||
"disk_size": "200", | "disk_size": "200", | ||||
"tnscope": false, | "tnscope": false, | ||||
"varscan": false, | "varscan": false, | ||||
"annovar": false, | "annovar": false, | ||||
"vep": false, | |||||
"only_pass": true | "only_pass": true | ||||
} | } |
"{{ project_name }}.annovar_database": "{{ annovar_database }}", | "{{ project_name }}.annovar_database": "{{ annovar_database }}", | ||||
"{{ project_name }}.tnseq_pon": "{{ tnseq_pon }}", | "{{ project_name }}.tnseq_pon": "{{ tnseq_pon }}", | ||||
"{{ project_name }}.tnscope_pon": "{{ tnscope_pon }}", | "{{ project_name }}.tnscope_pon": "{{ tnscope_pon }}", | ||||
"{{ project_name }}.vep_path": "{{ vep_path }}", | |||||
"{{ project_name }}.cache": "{{ cache }}", | |||||
"{{ project_name }}.species": "{{ species }}", | |||||
"{{ project_name }}.vcf2maf_path": "{{ vcf2maf_path }}", | |||||
"{{ project_name }}.vep_docker": "{{ vep_docker }}", | |||||
"{{ project_name }}.disk_size": "{{ disk_size }}", | "{{ project_name }}.disk_size": "{{ disk_size }}", | ||||
"{{ project_name }}.cluster_config": "{{ cluster_config }}", | "{{ project_name }}.cluster_config": "{{ cluster_config }}", | ||||
"{{ project_name }}.haplotyper": {{ haplotyper | tojson }}, | "{{ project_name }}.haplotyper": {{ haplotyper | tojson }}, | ||||
"{{ project_name }}.tnscope": {{ tnscope | tojson }}, | "{{ project_name }}.tnscope": {{ tnscope | tojson }}, | ||||
"{{ project_name }}.varscan": {{ varscan | tojson }}, | "{{ project_name }}.varscan": {{ varscan | tojson }}, | ||||
"{{ project_name }}.annovar": {{ annovar | tojson }}, | "{{ project_name }}.annovar": {{ annovar | tojson }}, | ||||
"{{ project_name }}.only_pass": {{ only_pass | tojson }} | |||||
"{{ project_name }}.only_pass": {{ only_pass | tojson }}, | |||||
"{{ project_name }}.vep": {{ vep | tojson }} | |||||
} | } |
task vcf2maf { | |||||
File vcf | |||||
String sample_id | |||||
String basename = basename(vcf,".vcf") | |||||
String tumor_id | |||||
String normal_id | |||||
File ref_dir | |||||
String fasta | |||||
String vep_path | |||||
File cache | |||||
String hg | |||||
String species | |||||
Boolean only_pass | |||||
String vcf2maf_path | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
nt=$(nproc) | |||||
if [ only_pass ]; then | |||||
awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${vcf} > ${sample_id}.INPUT.vcf | |||||
else | |||||
cp ${vcf} ${sample_id}.INPUT.vcf | |||||
fi | |||||
# Define ncbi_build | |||||
if [ hg == "hg19" ]; then | |||||
ncbi_build="GRCh37" | |||||
elif [ hg == "hg38" ]; then | |||||
ncbi_build="GRCh38" | |||||
fi | |||||
# Extract the BND variants from VCF | |||||
awk -F'\t' '{if(($1~"^#")||($8!~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.INPUT.vcf > ${sample_id}.INPUT.vcf2maf.vcf | |||||
awk -F'\t' '{if(($1~"^#")||($8~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.INPUT.vcf > ${sample_id}.INPUT.VEP.vcf | |||||
# vcf2maf | |||||
perl ${vcf2maf_path}/vcf2maf.pl \ | |||||
--input-vcf ${sample_id}.INPUT.vcf2maf.vcf --output-maf ${basename}.maf \ | |||||
--tumor-id ${tumor_id} --normal-id ${normal_id} \ | |||||
--ref-fasta ${ref_dir}/${fasta} \ | |||||
--vep-path ${vep_path} \ | |||||
--vep-data ${cache} \ | |||||
--ncbi-build $ncbi_build \ | |||||
--species ${species} \ | |||||
--vep-fork $nt | |||||
# vep: only annotate the BND | |||||
perl ${vep_path}/vep \ | |||||
--input_file ${sample_id}.INPUT.VEP.vcf --output_file ${basename}.BND.VEP.txt \ | |||||
--fasta ${ref_dir}/${fasta} \ | |||||
--dir ${cache} \ | |||||
--assembly $ncbi_build \ | |||||
--species ${species} \ | |||||
--fork $nt \ | |||||
--no_progress --no_stats --buffer_size 5000 --sift b --ccds --uniprot --hgvs --symbol --numbers --domains --gene_phenotype --canonical --protein --biotype --uniprot --tsl --variant_class --shift_hgvs 1 --check_existing --total_length --allele_number --no_escape --xref_refseq --failed 1 --tab --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --format vcf --force_overwrite --offline --pubmed --regulatory | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File input_vcf = "${sample_id}.INPUT.vcf" | |||||
File maf = "${basename}.${hg}.maf" | |||||
File bnd_vep = "${basename}.BND.VEP.txt" | |||||
} | |||||
} |
import "./tasks/processSomatic.wdl" as processSomatic | import "./tasks/processSomatic.wdl" as processSomatic | ||||
import "./tasks/somaticFilter.wdl" as somaticFilter | import "./tasks/somaticFilter.wdl" as somaticFilter | ||||
import "./tasks/ANNOVAR.wdl" as ANNOVAR | import "./tasks/ANNOVAR.wdl" as ANNOVAR | ||||
import "./tasks/VEP.wdl" as VEP | |||||
workflow {{ project_name }} { | workflow {{ project_name }} { | ||||
String sentieon_docker | String sentieon_docker | ||||
String varscan_docker | String varscan_docker | ||||
String annovar_docker | String annovar_docker | ||||
String vep_docker | |||||
File ref_dir | File ref_dir | ||||
String fasta | String fasta | ||||
File? tnseq_pon | File? tnseq_pon | ||||
File? tnscope_pon | File? tnscope_pon | ||||
String vep_path | |||||
File cache | |||||
String species | |||||
String vcf2maf_path | |||||
String disk_size | String disk_size | ||||
String cluster_config | String cluster_config | ||||
Boolean varscan | Boolean varscan | ||||
Boolean annovar | Boolean annovar | ||||
Boolean vep | |||||
Boolean only_pass | Boolean only_pass | ||||
if (tumor_fastq_1!= "") { | if (tumor_fastq_1!= "") { | ||||
disk_size=disk_size | disk_size=disk_size | ||||
} | } | ||||
} | } | ||||
if (vep) { | |||||
call VEP.VEP as TNseq_VEP { | |||||
input: | |||||
vcf=TNseq.TNseq_vcf, | |||||
hg=hg, | |||||
only_pass=only_pass, | |||||
sample_id=sample_id, | |||||
tumor_id=sample_id + "_tumor", | |||||
normal_id=sample_id + "_normal", | |||||
ref_dir=ref_dir, | |||||
fasta=fasta, | |||||
vep_path=vep_path, | |||||
cache=cache, | |||||
species=species, | |||||
vcf2maf_path=vcf2maf_path, | |||||
docker=vep_docker, | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
} | |||||
} | } | ||||
if (tnscope) { | if (tnscope) { | ||||
disk_size=disk_size | disk_size=disk_size | ||||
} | } | ||||
} | } | ||||
if (vep) { | |||||
call VEP.VEP as TNscope_VEP { | |||||
input: | |||||
vcf=TNscope.TNscope_vcf, | |||||
hg=hg, | |||||
only_pass=only_pass, | |||||
sample_id=sample_id, | |||||
tumor_id=sample_id + "_tumor", | |||||
normal_id=sample_id + "_normal", | |||||
ref_dir=ref_dir, | |||||
fasta=fasta, | |||||
vep_path=vep_path, | |||||
cache=cache, | |||||
species=species, | |||||
vcf2maf_path=vcf2maf_path, | |||||
docker=vep_docker, | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
} | |||||
} | } | ||||
if (varscan) { | if (varscan) { | ||||
sample=sample_id, | sample=sample_id, | ||||
varscan_snp=somatic.varscan_snp, | varscan_snp=somatic.varscan_snp, | ||||
varscan_indel=somatic.varscan_indel, | varscan_indel=somatic.varscan_indel, | ||||
docker=varscan_docker, | |||||
docker=varscan_docker, | |||||
disk_size=disk_size, | disk_size=disk_size, | ||||
cluster_config=cluster_config | cluster_config=cluster_config | ||||
} | } | ||||
varscan_indel_germline_hc=processSomatic.varscan_indel_germline_hc, | varscan_indel_germline_hc=processSomatic.varscan_indel_germline_hc, | ||||
varscan_indel_loh_hc=processSomatic.varscan_indel_loh_hc, | varscan_indel_loh_hc=processSomatic.varscan_indel_loh_hc, | ||||
varscan_indel=somatic.varscan_indel, | varscan_indel=somatic.varscan_indel, | ||||
docker=varscan_docker, | |||||
docker=varscan_docker, | |||||
disk_size=disk_size, | disk_size=disk_size, | ||||
cluster_config=cluster_config | cluster_config=cluster_config | ||||
} | } | ||||
disk_size=disk_size | disk_size=disk_size | ||||
} | } | ||||
} | } | ||||
if (vep) { | |||||
call VEP.VEP as VarScan_VEP { | |||||
input: | |||||
vcf=VarScan.VarScan_vcf, | |||||
hg=hg, | |||||
only_pass=only_pass, | |||||
sample_id=sample_id, | |||||
tumor_id=sample_id + "_tumor", | |||||
normal_id=sample_id + "_normal", | |||||
ref_dir=ref_dir, | |||||
fasta=fasta, | |||||
vep_path=vep_path, | |||||
cache=cache, | |||||
species=species, | |||||
vcf2maf_path=vcf2maf_path, | |||||
docker=vep_docker, | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
} | |||||
} | } | ||||
} | } |