task VEP { File vcf String sample_id String basename = basename(vcf,".vcf") String tumor_id String normal_id File ref_dir String fasta String vep_path File cache String hg String species String vcf2maf_path String docker String cluster_config String disk_size command <<< set -o pipefail set -e nt=$(nproc) awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${vcf} > ${basename}.PASS.vcf # Define ncbi_build if [ ${hg} == "hg19" ]; then ncbi_build="GRCh37" elif [ ${hg} == "hg38" ]; then ncbi_build="GRCh38" ncol=`awk -F'\t' '{if($1!~"^#"){print NF}}' ${basename}.PASS.vcf | uniq` if [ $ncol -lt 11 ]; then vcf2maf_ID="--tumor-id ${tumor_id} --normal-id ${normal_id}" else vcf2maf_ID="--tumor-id ${tumor_id}" fi nrow=`awk -F'\t' '{if($1~"^chr"){print $0}}' ${basename}.PASS.vcf | wc -l` if [ $nrow -lt 5000 ]; then buffer_size="--buffer_size 5000" else buffer_size="--buffer_size 500" fi # Extract the BND variants from VCF # awk -F'\t' '{if(($1~"^#")||($8!~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.PASS.vcf > ${sample_id}.PASS.vcf2maf.vcf # awk -F'\t' '{if(($1~"^#")||($8~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.PASS.vcf > ${sample_id}.INPUT.VEP.vcf # vcf2maf # perl ${vcf2maf_path}/vcf2maf.pl \ # --input-vcf ${sample_id}.PASS.vcf2maf.vcf --output-maf ${basename}.maf \ # --tumor-id ${tumor_id} --normal-id ${normal_id} \ # --ref-fasta ${ref_dir}/${fasta} \ # --vep-path ${vep_path} \ # --vep-data ${cache} \ # --ncbi-build ${ncbi_build} \ # --species ${species} \ # --vep-fork $nt # vep # perl ${vep_path}/vep \ # --input_file ${basename}.PASS.vcf --output_file ${basename}.PASS.vep.vcf \ # --fasta ${ref_dir}/${fasta} \ # --dir ${cache} \ # --assembly ${ncbi_build} \ # --species ${species} \ # --fork $nt \ # --format vcf --vcf \ # --no_progress \ # --no_stats \ # $buffer_size \ # --sift b \ # --ccds --uniprot --hgvs --symbol --numbers --domains --gene_phenotype --canonical --protein --biotype --uniprot --tsl --variant_class --shift_hgvs 1 --check_existing --total_length --allele_number --no_escape --xref_refseq --failed 1 --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --force_overwrite --offline --pubmed --regulatory perl ${vep_path}/vep --format vcf --vcf \ --assembly $ncbi_build \ --species ${species} \ --everything --af_exac \ --offline \ --cache --dir_cache ${cache} \ --fasta ${ref_dir}/${fasta} \ --input_file ${basename}.PASS.vcf --output_file ${basename}.PASS.vep.vcf # vcf2maf perl ${vcf2maf_path}/vcf2maf.pl \ --inhibit-vep \ --input-vcf ${basename}.PASS.vep.vcf --output-maf ${basename}.PASS.maf \ $vcf2maf_ID \ --ref-fasta ${ref_dir}/${fasta} \ --ncbi-build $ncbi_build \ --species ${species} \ --vep-fork $nt >>> runtime { docker: docker cluster: cluster_config systemDisk: "cloud_ssd 40" dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" } output { File input_vcf = "${basename}.PASS.vcf" File vep_vcf = "${basename}.PASS.vep.vcf" File maf = "${basename}.PASS.maf" } }