|
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677 |
- task VEP {
-
- File vcf
- String sample_id
- String basename = basename(vcf,".vcf")
- String tumor_id
- String normal_id
- File ref_dir
- String fasta
- String vep_path
- File cache
- String hg
- String species
- Boolean only_pass
- String vcf2maf_path
- String docker
- String cluster_config
- String disk_size
-
-
- command <<<
- set -o pipefail
- set -e
- nt=$(nproc)
-
- if [ only_pass ]; then
- awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${vcf} > ${sample_id}.INPUT.vcf
- else
- cp ${vcf} ${sample_id}.INPUT.vcf
- fi
-
- # Define ncbi_build
- if [ hg == "hg19" ]; then
- ncbi_build="GRCh37"
- elif [ hg == "hg38" ]; then
- ncbi_build="GRCh38"
- fi
-
- # Extract the BND variants from VCF
- awk -F'\t' '{if(($1~"^#")||($8!~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.INPUT.vcf > ${sample_id}.INPUT.vcf2maf.vcf
- awk -F'\t' '{if(($1~"^#")||($8~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.INPUT.vcf > ${sample_id}.INPUT.VEP.vcf
-
- # vcf2maf
- perl ${vcf2maf_path}/vcf2maf.pl \
- --input-vcf ${sample_id}.INPUT.vcf2maf.vcf --output-maf ${basename}.maf \
- --tumor-id ${tumor_id} --normal-id ${normal_id} \
- --ref-fasta ${ref_dir}/${fasta} \
- --vep-path ${vep_path} \
- --vep-data ${cache} \
- --ncbi-build $ncbi_build \
- --species ${species} \
- --vep-fork $nt
-
- # vep: only annotate the BND
- perl ${vep_path}/vep \
- --input_file ${sample_id}.INPUT.VEP.vcf --output_file ${basename}.BND.VEP.txt \
- --fasta ${ref_dir}/${fasta} \
- --dir ${cache} \
- --assembly $ncbi_build \
- --species ${species} \
- --fork $nt \
- --no_progress --no_stats --buffer_size 5000 --sift b --ccds --uniprot --hgvs --symbol --numbers --domains --gene_phenotype --canonical --protein --biotype --uniprot --tsl --variant_class --shift_hgvs 1 --check_existing --total_length --allele_number --no_escape --xref_refseq --failed 1 --tab --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --format vcf --force_overwrite --offline --pubmed --regulatory
- >>>
-
- runtime {
- docker: docker
- cluster: cluster_config
- systemDisk: "cloud_ssd 40"
- dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
- }
-
- output {
- File input_vcf = "${sample_id}.INPUT.vcf"
- File maf = "${basename}.${hg}.maf"
- File bnd_vep = "${basename}.BND.VEP.txt"
- }
- }
|