|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110 |
- task VEP {
-
- File vcf
- String sample_id
- String basename = basename(vcf,".vcf")
- String tumor_id
- String normal_id
- File ref_dir
- String fasta
- String vep_path
- File cache
- String hg
- String species
- String vcf2maf_path
- String docker
- String cluster_config
- String disk_size
-
-
- command <<<
- set -o pipefail
- set -e
- nt=$(nproc)
-
- awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${vcf} > ${basename}.PASS.vcf
-
- # Define ncbi_build
- if [ hg == "hg19" ]; then
- ncbi_build="GRCh37"
- elif [ hg == "hg38" ]; then
- ncbi_build="GRCh38"
-
- ncol=`awk -F'\t' '{if($1!~"^#"){print NF}}' ${basename}.PASS.vcf | uniq`
- if [ $ncol -lt 11 ]; then
- vcf2maf_ID="--tumor-id ${tumor_id} --normal-id ${normal_id}"
- else
- vcf2maf_ID="--tumor-id ${tumor_id}"
- fi
-
- nrow=`awk -F'\t' '{if($1~"^chr"){print $0}}' ${basename}.PASS.vcf | wc -l`
- if [ $nrow -lt 5000 ]; then
- buffer_size="--buffer_size 5000"
- else
- buffer_size="--buffer_size 500"
- fi
-
- # Extract the BND variants from VCF
- # awk -F'\t' '{if(($1~"^#")||($8!~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.PASS.vcf > ${sample_id}.PASS.vcf2maf.vcf
- # awk -F'\t' '{if(($1~"^#")||($8~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.PASS.vcf > ${sample_id}.INPUT.VEP.vcf
-
- # vcf2maf
- # perl ${vcf2maf_path}/vcf2maf.pl \
- # --input-vcf ${sample_id}.PASS.vcf2maf.vcf --output-maf ${basename}.maf \
- # --tumor-id ${tumor_id} --normal-id ${normal_id} \
- # --ref-fasta ${ref_dir}/${fasta} \
- # --vep-path ${vep_path} \
- # --vep-data ${cache} \
- # --ncbi-build ${ncbi_build} \
- # --species ${species} \
- # --vep-fork $nt
-
- # vep
- # perl ${vep_path}/vep \
- # --input_file ${basename}.PASS.vcf --output_file ${basename}.PASS.vep.vcf \
- # --fasta ${ref_dir}/${fasta} \
- # --dir ${cache} \
- # --assembly ${ncbi_build} \
- # --species ${species} \
- # --fork $nt \
- # --format vcf --vcf \
- # --no_progress \
- # --no_stats \
- # $buffer_size \
- # --sift b \
- # --ccds --uniprot --hgvs --symbol --numbers --domains --gene_phenotype --canonical --protein --biotype --uniprot --tsl --variant_class --shift_hgvs 1 --check_existing --total_length --allele_number --no_escape --xref_refseq --failed 1 --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --force_overwrite --offline --pubmed --regulatory
-
- perl ${vep_path}/vep --format vcf --vcf \
- --assembly ${ncbi_build} \
- --species ${species} \
- --everything --af_exac \
- --offline \
- --cache --dir_cache ${cache} \
- --fasta ${ref_dir}/${fasta} \
- --input_file ${basename}.PASS.vcf --output_file ${basename}.PASS.vep.vcf
-
-
- # vcf2maf
- perl ${vcf2maf_path}/vcf2maf.pl \
- --inhibit-vep \
- --input-vcf ${basename}.PASS.vep.vcf --output-maf ${basename}.PASS.maf \
- $vcf2maf_ID \
- --ref-fasta ${ref_dir}/${fasta} \
- --ncbi-build ${ncbi_build} \
- --species ${species} \
- --vep-fork $nt
- >>>
-
- runtime {
- docker: docker
- cluster: cluster_config
- systemDisk: "cloud_ssd 40"
- dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
- }
-
- output {
- File input_vcf = "${basename}.PASS.vcf"
- File vep_vcf = "${basename}.PASS.vep.vcf"
- File maf = "${basename}.PASS.maf"
- }
- }
|