VEP (Variant Effect Predictor) predicts the functional effects of genomic variants. The annotated VCF will be converted into MAF based on vcf2maf.
Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

103 linhas
2.7KB

  1. task VEP {
  2. File vcf
  3. String sample_id
  4. String basename = basename(vcf,".vcf")
  5. File ref_dir
  6. String fasta
  7. String vep_path
  8. File cache
  9. String ncbi_build
  10. String species
  11. String vcf2maf_path
  12. String docker
  13. String cluster_config
  14. String disk_size
  15. command <<<
  16. set -o pipefail
  17. set -e
  18. nt=$(nproc)
  19. source /etc/profile
  20. #awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${vcf} > ${sample_id}.vcf
  21. # Set the buffer_size based on the data size
  22. nrow=`awk -F'\t' '{if($1~"^chr"){print $0}}' ${vcf} | wc -l`
  23. if [ $nrow -lt 5000 ]; then
  24. buffer_size="--buffer_size 5000"
  25. else
  26. buffer_size="--buffer_size 1000"
  27. fi
  28. # Judge the SAMPLE info of vcf file
  29. tumor_id=`awk -F'\t' '{if($1~"^#CHROM"){print $10}}' ${vcf}`
  30. normal_id=`awk -F'\t' '{if($1~"^#CHROM"){print $11}}' ${vcf}`
  31. if [ $normal_id ]; then
  32. SAMPLE_vcf2maf="--tumor-id $tumor_id --normal-id $normal_id"
  33. SAMPLE_vcf2vcf="--vcf-tumor-id $tumor_id --vcf-normal-id $normal_id"
  34. cp ${vcf} ${sample_id}.tmp.vcf
  35. else
  36. SAMPLE_vcf2maf="--tumor-id $tumor_id"
  37. # Add a column and remove it after vcf2vcf
  38. SAMPLE_vcf2vcf="--vcf-tumor-id $tumor_id --vcf-normal-id $tumor_id"
  39. awk -F'\t' 'OFS="\t" {if($1!~"^##" && length($11)==0) $11=$10; print $0}' ${vcf} > ${sample_id}.tmp1.vcf
  40. fi
  41. # vcf2vcf: transfer into a standardized format
  42. echo "Transfer the VCF file into a standardized format..."
  43. perl ${vcf2maf_path}/vcf2vcf.pl \
  44. --input-vcf ${sample_id}.tmp1.vcf --output-vcf ${sample_id}.tmp2.vcf \
  45. $SAMPLE_vcf2vcf \
  46. --ref-fasta ${ref_dir}/${fasta}
  47. if [ $normal_id ]; then
  48. cp ${sample_id}.tmp2.vcf ${basename}.norm.vcf
  49. else
  50. cut -f 1,2,3,4,5,6,7,8,9,10 ${sample_id}.tmp2.vcf > ${basename}.norm.vcf
  51. # VEP annotation
  52. echo "VEP annotation..."
  53. perl ${vep_path}/vep --format vcf --vcf \
  54. --input_file ${basename}.norm.vcf --output_file ${basename}.vep.vcf \
  55. --assembly ${ncbi_build} \
  56. --species ${species} \
  57. --everything --af_exac \
  58. --offline \
  59. --cache --dir_cache ${cache} \
  60. --fasta ${ref_dir}/${fasta} \
  61. $buffer_size \
  62. --fork $nt
  63. # vcf2maf
  64. echo "vcf2maf..."
  65. perl ${vcf2maf_path}/vcf2maf.pl \
  66. --inhibit-vep \
  67. --input-vcf ${basename}.vep.vcf --output-maf ${basename}.maf \
  68. $SAMPLE_vcf2maf \
  69. --ref-fasta ${ref_dir}/${fasta} \
  70. --ncbi-build ${ncbi_build} \
  71. --species ${species}
  72. >>>
  73. runtime {
  74. docker: docker
  75. cluster: cluster_config
  76. systemDisk: "cloud_ssd 40"
  77. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  78. }
  79. output {
  80. File norm_vcf = "${basename}.norm.vcf"
  81. File vep_vcf = "${basename}.vep.vcf"
  82. File maf = "${basename}.maf"
  83. }
  84. }