@@ -18,6 +18,11 @@ | |||
"sentieon_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v202010.02", | |||
"varscan_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/varscan2:v2.4.3", | |||
"annovar_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/annovar:v2019.10", | |||
"vep_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/vep:v104.0", | |||
"cache": "oss://pgx-reference-data/ensembl_vep/", | |||
"vep_path": "/opt/vep/ensembl-vep", | |||
"species": "homo_sapiens_merged", | |||
"vcf2maf_path": "/opt/mskcc-vcf2maf", | |||
"tnseq_pon": "", | |||
"tnscope_pon": "", | |||
"disk_size": "200", | |||
@@ -27,5 +32,6 @@ | |||
"tnscope": false, | |||
"varscan": false, | |||
"annovar": false, | |||
"vep": false, | |||
"only_pass": true | |||
} |
@@ -23,6 +23,11 @@ | |||
"{{ project_name }}.annovar_database": "{{ annovar_database }}", | |||
"{{ project_name }}.tnseq_pon": "{{ tnseq_pon }}", | |||
"{{ project_name }}.tnscope_pon": "{{ tnscope_pon }}", | |||
"{{ project_name }}.vep_path": "{{ vep_path }}", | |||
"{{ project_name }}.cache": "{{ cache }}", | |||
"{{ project_name }}.species": "{{ species }}", | |||
"{{ project_name }}.vcf2maf_path": "{{ vcf2maf_path }}", | |||
"{{ project_name }}.vep_docker": "{{ vep_docker }}", | |||
"{{ project_name }}.disk_size": "{{ disk_size }}", | |||
"{{ project_name }}.cluster_config": "{{ cluster_config }}", | |||
"{{ project_name }}.haplotyper": {{ haplotyper | tojson }}, | |||
@@ -30,5 +35,6 @@ | |||
"{{ project_name }}.tnscope": {{ tnscope | tojson }}, | |||
"{{ project_name }}.varscan": {{ varscan | tojson }}, | |||
"{{ project_name }}.annovar": {{ annovar | tojson }}, | |||
"{{ project_name }}.only_pass": {{ only_pass | tojson }} | |||
"{{ project_name }}.only_pass": {{ only_pass | tojson }}, | |||
"{{ project_name }}.vep": {{ vep | tojson }} | |||
} |
@@ -0,0 +1,77 @@ | |||
task vcf2maf { | |||
File vcf | |||
String sample_id | |||
String basename = basename(vcf,".vcf") | |||
String tumor_id | |||
String normal_id | |||
File ref_dir | |||
String fasta | |||
String vep_path | |||
File cache | |||
String hg | |||
String species | |||
Boolean only_pass | |||
String vcf2maf_path | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
if [ only_pass ]; then | |||
awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${vcf} > ${sample_id}.INPUT.vcf | |||
else | |||
cp ${vcf} ${sample_id}.INPUT.vcf | |||
fi | |||
# Define ncbi_build | |||
if [ hg == "hg19" ]; then | |||
ncbi_build="GRCh37" | |||
elif [ hg == "hg38" ]; then | |||
ncbi_build="GRCh38" | |||
fi | |||
# Extract the BND variants from VCF | |||
awk -F'\t' '{if(($1~"^#")||($8!~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.INPUT.vcf > ${sample_id}.INPUT.vcf2maf.vcf | |||
awk -F'\t' '{if(($1~"^#")||($8~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.INPUT.vcf > ${sample_id}.INPUT.VEP.vcf | |||
# vcf2maf | |||
perl ${vcf2maf_path}/vcf2maf.pl \ | |||
--input-vcf ${sample_id}.INPUT.vcf2maf.vcf --output-maf ${basename}.maf \ | |||
--tumor-id ${tumor_id} --normal-id ${normal_id} \ | |||
--ref-fasta ${ref_dir}/${fasta} \ | |||
--vep-path ${vep_path} \ | |||
--vep-data ${cache} \ | |||
--ncbi-build $ncbi_build \ | |||
--species ${species} \ | |||
--vep-fork $nt | |||
# vep: only annotate the BND | |||
perl ${vep_path}/vep \ | |||
--input_file ${sample_id}.INPUT.VEP.vcf --output_file ${basename}.BND.VEP.txt \ | |||
--fasta ${ref_dir}/${fasta} \ | |||
--dir ${cache} \ | |||
--assembly $ncbi_build \ | |||
--species ${species} \ | |||
--fork $nt \ | |||
--no_progress --no_stats --buffer_size 5000 --sift b --ccds --uniprot --hgvs --symbol --numbers --domains --gene_phenotype --canonical --protein --biotype --uniprot --tsl --variant_class --shift_hgvs 1 --check_existing --total_length --allele_number --no_escape --xref_refseq --failed 1 --tab --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --format vcf --force_overwrite --offline --pubmed --regulatory | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File input_vcf = "${sample_id}.INPUT.vcf" | |||
File maf = "${basename}.${hg}.maf" | |||
File bnd_vep = "${basename}.BND.VEP.txt" | |||
} | |||
} |
@@ -10,6 +10,7 @@ import "./tasks/somatic.wdl" as somatic | |||
import "./tasks/processSomatic.wdl" as processSomatic | |||
import "./tasks/somaticFilter.wdl" as somaticFilter | |||
import "./tasks/ANNOVAR.wdl" as ANNOVAR | |||
import "./tasks/VEP.wdl" as VEP | |||
workflow {{ project_name }} { | |||
@@ -25,6 +26,7 @@ workflow {{ project_name }} { | |||
String sentieon_docker | |||
String varscan_docker | |||
String annovar_docker | |||
String vep_docker | |||
File ref_dir | |||
String fasta | |||
@@ -42,6 +44,11 @@ workflow {{ project_name }} { | |||
File? tnseq_pon | |||
File? tnscope_pon | |||
String vep_path | |||
File cache | |||
String species | |||
String vcf2maf_path | |||
String disk_size | |||
String cluster_config | |||
@@ -51,6 +58,7 @@ workflow {{ project_name }} { | |||
Boolean varscan | |||
Boolean annovar | |||
Boolean vep | |||
Boolean only_pass | |||
if (tumor_fastq_1!= "") { | |||
@@ -263,6 +271,27 @@ workflow {{ project_name }} { | |||
disk_size=disk_size | |||
} | |||
} | |||
if (vep) { | |||
call VEP.VEP as TNseq_VEP { | |||
input: | |||
vcf=TNseq.TNseq_vcf, | |||
hg=hg, | |||
only_pass=only_pass, | |||
sample_id=sample_id, | |||
tumor_id=sample_id + "_tumor", | |||
normal_id=sample_id + "_normal", | |||
ref_dir=ref_dir, | |||
fasta=fasta, | |||
vep_path=vep_path, | |||
cache=cache, | |||
species=species, | |||
vcf2maf_path=vcf2maf_path, | |||
docker=vep_docker, | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
} | |||
} | |||
if (tnscope) { | |||
@@ -303,6 +332,27 @@ workflow {{ project_name }} { | |||
disk_size=disk_size | |||
} | |||
} | |||
if (vep) { | |||
call VEP.VEP as TNscope_VEP { | |||
input: | |||
vcf=TNscope.TNscope_vcf, | |||
hg=hg, | |||
only_pass=only_pass, | |||
sample_id=sample_id, | |||
tumor_id=sample_id + "_tumor", | |||
normal_id=sample_id + "_normal", | |||
ref_dir=ref_dir, | |||
fasta=fasta, | |||
vep_path=vep_path, | |||
cache=cache, | |||
species=species, | |||
vcf2maf_path=vcf2maf_path, | |||
docker=vep_docker, | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
} | |||
} | |||
if (varscan) { | |||
@@ -325,7 +375,7 @@ workflow {{ project_name }} { | |||
sample=sample_id, | |||
varscan_snp=somatic.varscan_snp, | |||
varscan_indel=somatic.varscan_indel, | |||
docker=varscan_docker, | |||
docker=varscan_docker, | |||
disk_size=disk_size, | |||
cluster_config=cluster_config | |||
} | |||
@@ -340,7 +390,7 @@ workflow {{ project_name }} { | |||
varscan_indel_germline_hc=processSomatic.varscan_indel_germline_hc, | |||
varscan_indel_loh_hc=processSomatic.varscan_indel_loh_hc, | |||
varscan_indel=somatic.varscan_indel, | |||
docker=varscan_docker, | |||
docker=varscan_docker, | |||
disk_size=disk_size, | |||
cluster_config=cluster_config | |||
} | |||
@@ -357,5 +407,26 @@ workflow {{ project_name }} { | |||
disk_size=disk_size | |||
} | |||
} | |||
if (vep) { | |||
call VEP.VEP as VarScan_VEP { | |||
input: | |||
vcf=VarScan.VarScan_vcf, | |||
hg=hg, | |||
only_pass=only_pass, | |||
sample_id=sample_id, | |||
tumor_id=sample_id + "_tumor", | |||
normal_id=sample_id + "_normal", | |||
ref_dir=ref_dir, | |||
fasta=fasta, | |||
vep_path=vep_path, | |||
cache=cache, | |||
species=species, | |||
vcf2maf_path=vcf2maf_path, | |||
docker=vep_docker, | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
} | |||
} | |||
} |