VEP (Variant Effect Predictor) predicts the functional effects of genomic variants. The annotated VCF will be converted into MAF based on vcf2maf.
Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

116 rindas
3.5KB

  1. task VEP {
  2. File vcf
  3. String sample_id
  4. String basename = basename(vcf,".vcf")
  5. String tumor_id
  6. String normal_id
  7. File ref_dir
  8. String fasta
  9. String vep_path
  10. File cache
  11. String ncbi_build
  12. String species
  13. String vcf2maf_path
  14. String docker
  15. String cluster_config
  16. String disk_size
  17. command <<<
  18. set -o pipefail
  19. set -e
  20. nt=$(nproc)
  21. source /etc/profile
  22. awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${vcf} > ${sample_id}.vcf
  23. # Judge the SAMPLE info of vcf file
  24. ncol=`awk -F'\t' '{if($1!~"^#"){print NF}}' ${sample_id}.vcf | uniq`
  25. if [ $ncol -lt 11 ]; then
  26. SAMPLE_vcf2maf="--tumor-id ${tumor_id} --normal-id ${normal_id}"
  27. SAMPLE_vcf2vcf="--vcf-tumor-id ${tumor_id} --vcf-normal-id ${normal_id}"
  28. else
  29. SAMPLE_vcf2maf="--tumor-id ${sample_id}"
  30. SAMPLE_vcf2vcf="--vcf-tumor-id ${sample_id}"
  31. fi
  32. # Set the buffer_size based on the data size
  33. nrow=`awk -F'\t' '{if($1~"^chr"){print $0}}' ${sample_id}.vcf | wc -l`
  34. if [ $nrow -lt 5000 ]; then
  35. buffer_size="--buffer_size 5000"
  36. else
  37. buffer_size="--buffer_size 1000"
  38. fi
  39. # Extract the BND variants from VCF
  40. # awk -F'\t' '{if(($1~"^#")||($8!~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.PASS.vcf > ${sample_id}.PASS.vcf2maf.vcf
  41. # awk -F'\t' '{if(($1~"^#")||($8~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.PASS.vcf > ${sample_id}.INPUT.VEP.vcf
  42. # vcf2maf
  43. # perl ${vcf2maf_path}/vcf2maf.pl \
  44. # --input-vcf ${sample_id}.PASS.vcf2maf.vcf --output-maf ${basename}.maf \
  45. # --tumor-id ${tumor_id} --normal-id ${normal_id} \
  46. # --ref-fasta ${ref_dir}/${fasta} \
  47. # --vep-path ${vep_path} \
  48. # --vep-data ${cache} \
  49. # --ncbi-build ${ncbi_build} \
  50. # --species ${species} \
  51. # --vep-fork $nt
  52. # vep
  53. # perl ${vep_path}/vep \
  54. # --input_file ${sample_id}.vcf --output_file ${basename}.PASS.vep.vcf \
  55. # --fasta ${ref_dir}/${fasta} \
  56. # --dir ${cache} \
  57. # --assembly ${ncbi_build} \
  58. # --species ${species} \
  59. # --fork $nt \
  60. # --format vcf --vcf \
  61. # --no_progress \
  62. # --no_stats \
  63. # $buffer_size \
  64. # --sift b \
  65. # --ccds --uniprot --hgvs --symbol --numbers --domains --gene_phenotype --canonical --protein --biotype --uniprot --tsl --variant_class --shift_hgvs 1 --check_existing --total_length --allele_number --no_escape --xref_refseq --failed 1 --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --force_overwrite --offline --pubmed --regulatory
  66. # vcf2vcf: transfer into a standardized format
  67. perl ${vcf2maf_path}/vcf2vcf.pl \
  68. --input-vcf ${sample_id}.vcf --output-vcf ${basename}.norm.vcf \
  69. $SAMPLE_vcf2vcf \
  70. --ref-fasta ${ref_dir}/${fasta}
  71. # VEP annotation
  72. perl ${vep_path}/vep --format vcf --vcf \
  73. --assembly ${ncbi_build} \
  74. --species ${species} \
  75. --everything --af_exac \
  76. --offline \
  77. --cache --dir_cache ${cache} \
  78. --fasta ${ref_dir}/${fasta} \
  79. $buffer_size \
  80. --input_file ${basename}.norm.vcf --output_file ${basename}.vep.vcf
  81. # vcf2maf
  82. perl ${vcf2maf_path}/vcf2maf.pl \
  83. --inhibit-vep \
  84. --input-vcf ${basename}.vep.vcf --output-maf ${basename}.maf \
  85. $SAMPLE_vcf2maf \
  86. --ref-fasta ${ref_dir}/${fasta} \
  87. --ncbi-build ${ncbi_build} \
  88. --species ${species} \
  89. --vep-fork $nt
  90. >>>
  91. runtime {
  92. docker: docker
  93. cluster: cluster_config
  94. systemDisk: "cloud_ssd 40"
  95. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  96. }
  97. output {
  98. File vep_vcf = "${basename}.vep.vcf"
  99. File maf = "${basename}.maf"
  100. }
  101. }