|
- task VEP {
-
- File vcf
- String sample_id
- String basename = basename(vcf,".vcf")
- File ref_dir
- String fasta
- String vep_path
- File cache
- String ncbi_build
- String species
- String vcf2maf_path
- String docker
- String cluster_config
- String disk_size
-
-
- command <<<
- set -o pipefail
- set -e
- nt=$(nproc)
-
- source /etc/profile
- echo ${sample_id}
- echo ${basename}
-
- #awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${vcf} > ${sample_id}.vcf
-
- # Set the buffer_size based on the data size
- nrow=`awk -F'\t' '{if($1~"^chr"){print $0}}' ${vcf} | wc -l`
- if [ $nrow -lt 5000 ]; then
- buffer_size="--buffer_size 5000"
- else
- buffer_size="--buffer_size 1000"
- fi
-
-
- # Judge the SAMPLE info of vcf file
- tumor_id=`awk -F'\t' '{if($1~"^#CHROM"){print $10}}' ${vcf}`
- normal_id=`awk -F'\t' '{if($1~"^#CHROM"){print $11}}' ${vcf}`
-
- if [ $normal_id ]; then
- SAMPLE_vcf2maf="--tumor-id $tumor_id --normal-id $normal_id"
- SAMPLE_vcf2vcf="--vcf-tumor-id $tumor_id --vcf-normal-id $normal_id"
- cp ${vcf} ${sample_id}.tmp.vcf
- else
- SAMPLE_vcf2maf="--tumor-id $tumor_id"
- # Add a column and remove it after vcf2vcf
- SAMPLE_vcf2vcf="--vcf-tumor-id $tumor_id --vcf-normal-id $tumor_id"
- awk -F'\t' 'OFS="\t" {if($1!~"^##" && length($11)==0) $11=$10; print $0}' ${vcf} > ${sample_id}.tmp1.vcf
- fi
-
-
- # vcf2vcf: transfer into a standardized format
- echo "Transfer the VCF file into a standardized format..."
- perl ${vcf2maf_path}/vcf2vcf.pl \
- --input-vcf ${sample_id}.tmp1.vcf --output-vcf ${sample_id}.tmp2.vcf \
- $SAMPLE_vcf2vcf \
- --ref-fasta ${ref_dir}/${fasta}
-
- if [ $normal_id ]; then
- cp ${sample_id}.tmp2.vcf ${basename}.norm.vcf
- else
- cut -f 1,2,3,4,5,6,7,8,9,10 ${sample_id}.tmp2.vcf > ${basename}.norm.vcf
-
-
- # VEP annotation
- echo "VEP annotation..."
- perl ${vep_path}/vep --format vcf --vcf \
- --input_file ${basename}.norm.vcf --output_file ${basename}.vep.vcf \
- --assembly ${ncbi_build} \
- --species ${species} \
- --everything --af_exac \
- --offline \
- --cache --dir_cache ${cache} \
- --fasta ${ref_dir}/${fasta} \
- $buffer_size \
- --fork $nt
-
-
- # vcf2maf
- echo "vcf2maf..."
- perl ${vcf2maf_path}/vcf2maf.pl \
- --inhibit-vep \
- --input-vcf ${basename}.vep.vcf --output-maf ${basename}.maf \
- $SAMPLE_vcf2maf \
- --ref-fasta ${ref_dir}/${fasta} \
- --ncbi-build ${ncbi_build} \
- --species ${species}
-
- >>>
-
- runtime {
- docker: docker
- cluster: cluster_config
- systemDisk: "cloud_ssd 40"
- dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
- }
-
- output {
- File norm_vcf = "${basename}.norm.vcf"
- File vep_vcf = "${basename}.vep.vcf"
- File maf = "${basename}.maf"
- }
- }
|