task VEP { File vcf String sample_id String basename = basename(vcf,".vcf") File ref_dir String fasta String vep_path File cache String ncbi_build String species String vcf2maf_path String docker String cluster_config String disk_size command <<< set -o pipefail set -e nt=$(nproc) source /etc/profile #awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${vcf} > ${sample_id}.vcf # Set the buffer_size based on the data size nrow=`awk -F'\t' '{if($1~"^chr"){print $0}}' ${vcf} | wc -l` if [ $nrow -lt 5000 ]; then buffer_size="--buffer_size 5000" else buffer_size="--buffer_size 50" fi # Judge the SAMPLE info of vcf file sample_col_1=`awk -F'\t' '{if($1~"^#CHROM"){print $10}}' ${vcf}` sample_col_2=`awk -F'\t' '{if($1~"^#CHROM"){print $11}}' ${vcf}` if [ $sample_col_2 ]; then # This situation means there are pairs SAMPLE_vcf2maf="--tumor-id $sample_col_2 --normal-id $sample_col_1" SAMPLE_vcf2vcf="--vcf-tumor-id $sample_col_2 --vcf-normal-id $sample_col_1" cp ${vcf} ${sample_id}.tmp1.vcf else # Tumor-only or normal-only SAMPLE_vcf2maf="--tumor-id $sample_col_1" # Add a column and remove it after vcf2vcf SAMPLE_vcf2vcf="--vcf-tumor-id $sample_col_1 --vcf-normal-id $sample_col_1" awk -F'\t' 'OFS="\t" {if($1!~"^##" && length($11)==0) $11=$10; print $0}' ${vcf} > ${sample_id}.tmp1.vcf fi # vcf2vcf: transfer into a standardized format echo "Transfer the VCF file into a standardized format..." perl ${vcf2maf_path}/vcf2vcf.pl \ --input-vcf ${sample_id}.tmp1.vcf --output-vcf ${sample_id}.tmp2.vcf \ $SAMPLE_vcf2vcf \ --ref-fasta ${ref_dir}/${fasta} if [ $sample_col_2 ]; then cp ${sample_id}.tmp2.vcf ${basename}.norm.vcf else cut -f 1,2,3,4,5,6,7,8,9,10 ${sample_id}.tmp2.vcf > ${basename}.norm.vcf fi # VEP annotation echo "VEP annotation..." perl ${vep_path}/vep --format vcf --vcf \ --input_file ${basename}.norm.vcf --output_file ${basename}.vep.vcf \ --assembly ${ncbi_build} \ --species ${species} \ --everything --af_exac \ --offline \ --cache --dir_cache ${cache} \ --fasta ${ref_dir}/${fasta} \ $buffer_size \ --fork $nt # vcf2maf echo "vcf2maf..." perl ${vcf2maf_path}/vcf2maf.pl \ --inhibit-vep \ --input-vcf ${basename}.vep.vcf --output-maf ${basename}.maf \ $SAMPLE_vcf2maf \ --ref-fasta ${ref_dir}/${fasta} \ --ncbi-build ${ncbi_build} \ --species ${species} >>> runtime { docker: docker cluster: cluster_config systemDisk: "cloud_ssd 40" dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" } output { File norm_vcf = "${basename}.norm.vcf" File vep_vcf = "${basename}.vep.vcf" File vep_vcf_summary = "${basename}.vep.vcf_summary.html" File maf = "${basename}.maf" } }