VEP (Variant Effect Predictor) predicts the functional effects of genomic variants. The annotated VCF will be converted into MAF based on vcf2maf.
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

95 lines
2.3KB

  1. task VEP {
  2. File vcf
  3. String sample_id
  4. String basename = basename(vcf,".vcf")
  5. File ref_dir
  6. String fasta
  7. String vep_path
  8. File cache
  9. String ncbi_build
  10. String species
  11. String vcf2maf_path
  12. String docker
  13. String cluster_config
  14. String disk_size
  15. command <<<
  16. set -o pipefail
  17. set -e
  18. nt=$(nproc)
  19. source /etc/profile
  20. #awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${vcf} > ${sample_id}.vcf
  21. # Judge the SAMPLE info of vcf file
  22. tumor_id=`awk -F'\t' '{if($1~"^#CHROM"){print $10}}' ${vcf}`
  23. normal_id=`awk -F'\t' '{if($1~"^#CHROM"){print $11}}' ${vcf}`
  24. if [ $normal_id ]; then
  25. SAMPLE_vcf2maf="--tumor-id $tumor_id --normal-id $normal_id"
  26. SAMPLE_vcf2vcf="--vcf-tumor-id $tumor_id --vcf-normal-id $normal_id"
  27. else
  28. SAMPLE_vcf2maf="--tumor-id $tumor_id"
  29. SAMPLE_vcf2vcf="--vcf-tumor-id $tumor_id"
  30. fi
  31. # Set the buffer_size based on the data size
  32. nrow=`awk -F'\t' '{if($1~"^chr"){print $0}}' ${vcf} | wc -l`
  33. if [ $nrow -lt 5000 ]; then
  34. buffer_size="--buffer_size 5000"
  35. else
  36. buffer_size="--buffer_size 1000"
  37. fi
  38. # vcf2vcf: transfer into a standardized format
  39. echo "Transfer the VCF file into a standardized format..."
  40. perl ${vcf2maf_path}/vcf2vcf.pl \
  41. --input-vcf ${vcf} --output-vcf ${basename}.norm.vcf \
  42. $SAMPLE_vcf2vcf \
  43. --ref-fasta ${ref_dir}/${fasta}
  44. # VEP annotation
  45. echo "VEP annotation..."
  46. perl ${vep_path}/vep --format vcf --vcf \
  47. --assembly ${ncbi_build} \
  48. --species ${species} \
  49. --everything --af_exac \
  50. --offline \
  51. --cache --dir_cache ${cache} \
  52. --fasta ${ref_dir}/${fasta} \
  53. $buffer_size \
  54. --fork $nt \
  55. --input_file ${basename}.norm.vcf --output_file ${basename}.vep.vcf
  56. # vcf2maf
  57. echo "vcf2maf..."
  58. perl ${vcf2maf_path}/vcf2maf.pl \
  59. --inhibit-vep \
  60. --input-vcf ${basename}.vep.vcf --output-maf ${basename}.maf \
  61. $SAMPLE_vcf2maf \
  62. --ref-fasta ${ref_dir}/${fasta} \
  63. --ncbi-build ${ncbi_build} \
  64. --species ${species} \
  65. --vep-fork $nt
  66. >>>
  67. runtime {
  68. docker: docker
  69. cluster: cluster_config
  70. systemDisk: "cloud_ssd 40"
  71. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  72. }
  73. output {
  74. File norm_vcf = "${basename}.norm.vcf"
  75. File vep_vcf = "${basename}.vep.vcf"
  76. File maf = "${basename}.maf"
  77. }
  78. }