task VEP { File vcf String sample_id String basename = basename(vcf,".vcf") String tumor_id String normal_id File ref_dir String fasta String vep_path File cache String hg String species String vcf2maf_path String docker String cluster_config String disk_size command <<< set -o pipefail set -e nt=$(nproc) awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${vcf} > ${basename}.PASS.vcf # Define ncbi_build if [ ${hg} == "hg19" ]; then ncbi_build="GRCh37" elif [ ${hg} == "hg38" ]; then ncbi_build="GRCh38" ncol=`awk -F'\t' '{if($1!~"^#"){print NF}}' ${basename}.PASS.vcf | uniq` if [ $ncol -lt 11 ]; then vcf2maf_ID="--tumor-id ${tumor_id} --normal-id ${normal_id}" else vcf2maf_ID="--tumor-id ${tumor_id}" fi nrow=`awk -F'\t' '{if($1~"^chr"){print $0}}' ${basename}.PASS.vcf | wc -l` if [ $nrow -lt 5000 ]; then buffer_size="--buffer_size 5000" else buffer_size="--buffer_size 500" fi # vep perl ${vep_path}/vep --format vcf --vcf \ --assembly $ncbi_build \ --species ${species} \ --everything --af_exac \ --offline \ --cache --dir_cache ${cache} \ --fasta ${ref_dir}/${fasta} \ --input_file ${basename}.PASS.vcf --output_file ${basename}.PASS.vep.vcf # vcf2maf perl ${vcf2maf_path}/vcf2maf.pl \ --inhibit-vep \ --input-vcf ${basename}.PASS.vep.vcf --output-maf ${basename}.PASS.maf \ $vcf2maf_ID \ --ref-fasta ${ref_dir}/${fasta} \ --ncbi-build $ncbi_build \ --species ${species} \ --vep-fork $nt >>> runtime { docker: docker cluster: cluster_config systemDisk: "cloud_ssd 40" dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" } output { File input_vcf = "${basename}.PASS.vcf" File vep_vcf = "${basename}.PASS.vep.vcf" File maf = "${basename}.PASS.maf" } }