VEP (Variant Effect Predictor) predicts the functional effects of genomic variants. The annotated VCF will be converted into MAF based on vcf2maf.
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

85 lines
2.5KB

  1. task VEP {
  2. File vcf
  3. String sample_id
  4. String basename = basename(vcf,".vcf")
  5. String tumor_id
  6. String normal_id
  7. File ref_dir
  8. String fasta
  9. String vep_path
  10. File cache
  11. String ncbi_build
  12. String species
  13. String vcf2maf_path
  14. String docker
  15. String cluster_config
  16. String disk_size
  17. command <<<
  18. set -o pipefail
  19. set -e
  20. nt=$(nproc)
  21. awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${vcf} > ${basename}.PASS.vcf
  22. ncol=`awk -F'\t' '{if($1!~"^#"){print NF}}' ${basename}.PASS.vcf | uniq`
  23. if [ $ncol -lt 11 ]; then
  24. vcf2maf_ID="--tumor-id ${tumor_id} --normal-id ${normal_id}"
  25. else
  26. vcf2maf_ID="--tumor-id ${tumor_id}"
  27. fi
  28. # Extract the BND variants from VCF
  29. # awk -F'\t' '{if(($1~"^#")||($8!~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.PASS.vcf > ${sample_id}.PASS.vcf2maf.vcf
  30. # awk -F'\t' '{if(($1~"^#")||($8~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.PASS.vcf > ${sample_id}.INPUT.VEP.vcf
  31. # vcf2maf
  32. # perl ${vcf2maf_path}/vcf2maf.pl \
  33. # --input-vcf ${sample_id}.PASS.vcf2maf.vcf --output-maf ${basename}.maf \
  34. # --tumor-id ${tumor_id} --normal-id ${normal_id} \
  35. # --ref-fasta ${ref_dir}/${fasta} \
  36. # --vep-path ${vep_path} \
  37. # --vep-data ${cache} \
  38. # --ncbi-build ${ncbi_build} \
  39. # --species ${species} \
  40. # --vep-fork $nt
  41. # vep
  42. perl ${vep_path}/vep \
  43. --input_file ${basename}.PASS.vcf --output_file ${basename}.PASS.vep.vcf \
  44. --fasta ${ref_dir}/${fasta} \
  45. --dir ${cache} \
  46. --assembly ${ncbi_build} \
  47. --species ${species} \
  48. --fork $nt \
  49. --format vcf --vcf \
  50. --no_progress \
  51. --no_stats \
  52. --buffer_size 5000 \
  53. --sift b \
  54. --ccds --uniprot --hgvs --symbol --numbers --domains --gene_phenotype --canonical --protein --biotype --uniprot --tsl --variant_class --shift_hgvs 1 --check_existing --total_length --allele_number --no_escape --xref_refseq --failed 1 --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --force_overwrite --offline --pubmed --regulatory
  55. # vcf2maf
  56. perl ${vcf2maf_path}/vcf2maf.pl \
  57. --inhibit-vep \
  58. --input-vcf ${basename}.PASS.vep.vcf --output-maf ${basename}.PASS.maf \
  59. $vcf2maf_ID \
  60. --ref-fasta ${ref_dir}/${fasta} \
  61. --vep-fork $nt
  62. >>>
  63. runtime {
  64. docker: docker
  65. cluster: cluster_config
  66. systemDisk: "cloud_ssd 40"
  67. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  68. }
  69. output {
  70. File input_vcf = "${basename}.PASS.vcf"
  71. File vep_vcf = "${basename}.PASS.vep.vcf"
  72. File maf = "${basename}.PASS.maf"
  73. }
  74. }