VEP (Variant Effect Predictor) predicts the functional effects of genomic variants. The annotated VCF will be converted into MAF based on vcf2maf.
No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.

112 líneas
3.3KB

  1. task VEP {
  2. File vcf
  3. String sample_id
  4. String basename = basename(vcf,".vcf")
  5. String tumor_id
  6. String normal_id
  7. File ref_dir
  8. String fasta
  9. String vep_path
  10. File cache
  11. String ncbi_build
  12. String species
  13. String vcf2maf_path
  14. String docker
  15. String cluster_config
  16. String disk_size
  17. command <<<
  18. set -o pipefail
  19. set -e
  20. nt=$(nproc)
  21. awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${vcf} > ${sample_id}.vcf
  22. # Judge the SAMPLE info of vcf file
  23. ncol=`awk -F'\t' '{if($1!~"^#"){print NF}}' ${sample_id}.vcf | uniq`
  24. if [ $ncol -lt 11 ]; then
  25. SAMPLE="--tumor-id ${tumor_id} --normal-id ${normal_id}"
  26. else
  27. SAMPLE="--tumor-id ${sample_id}"
  28. fi
  29. # Set the buffer_size based on the data size
  30. nrow=`awk -F'\t' '{if($1~"^chr"){print $0}}' ${sample_id}.vcf | wc -l`
  31. if [ $nrow -lt 5000 ]; then
  32. buffer_size="--buffer_size 5000"
  33. else
  34. buffer_size="--buffer_size 1000"
  35. fi
  36. # Extract the BND variants from VCF
  37. # awk -F'\t' '{if(($1~"^#")||($8!~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.PASS.vcf > ${sample_id}.PASS.vcf2maf.vcf
  38. # awk -F'\t' '{if(($1~"^#")||($8~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.PASS.vcf > ${sample_id}.INPUT.VEP.vcf
  39. # vcf2maf
  40. # perl ${vcf2maf_path}/vcf2maf.pl \
  41. # --input-vcf ${sample_id}.PASS.vcf2maf.vcf --output-maf ${basename}.maf \
  42. # --tumor-id ${tumor_id} --normal-id ${normal_id} \
  43. # --ref-fasta ${ref_dir}/${fasta} \
  44. # --vep-path ${vep_path} \
  45. # --vep-data ${cache} \
  46. # --ncbi-build ${ncbi_build} \
  47. # --species ${species} \
  48. # --vep-fork $nt
  49. # vep
  50. # perl ${vep_path}/vep \
  51. # --input_file ${sample_id}.vcf --output_file ${basename}.PASS.vep.vcf \
  52. # --fasta ${ref_dir}/${fasta} \
  53. # --dir ${cache} \
  54. # --assembly ${ncbi_build} \
  55. # --species ${species} \
  56. # --fork $nt \
  57. # --format vcf --vcf \
  58. # --no_progress \
  59. # --no_stats \
  60. # $buffer_size \
  61. # --sift b \
  62. # --ccds --uniprot --hgvs --symbol --numbers --domains --gene_phenotype --canonical --protein --biotype --uniprot --tsl --variant_class --shift_hgvs 1 --check_existing --total_length --allele_number --no_escape --xref_refseq --failed 1 --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --force_overwrite --offline --pubmed --regulatory
  63. # vcf2vcf: transfer into a standardized format
  64. perl ${vcf2maf_path}/vcf2vcf.pl \
  65. --input-vcf ${sample_id}.vcf --output-vcf ${basename}.norm.vcf \
  66. $SAMPLE \
  67. --ref-fasta ${reference}
  68. # VEP annotation
  69. perl ${vep_path}/vep --format vcf --vcf \
  70. --assembly ${ncbi_build} \
  71. --species ${species} \
  72. --everything --af_exac \
  73. --offline \
  74. --cache --dir_cache ${cache} \
  75. --fasta ${ref_dir}/${fasta} \
  76. $buffer_size \
  77. --input_file ${basename}.norm.vcf --output_file ${basename}.vep.vcf
  78. # vcf2maf
  79. perl ${vcf2maf_path}/vcf2maf.pl \
  80. --inhibit-vep \
  81. --input-vcf ${basename}.vep.vcf --output-maf ${basename}.maf \
  82. $SAMPLE \
  83. --ref-fasta ${ref_dir}/${fasta} \
  84. --ncbi-build ${ncbi_build} \
  85. --species ${species} \
  86. --vep-fork $nt
  87. >>>
  88. runtime {
  89. docker: docker
  90. cluster: cluster_config
  91. systemDisk: "cloud_ssd 40"
  92. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  93. }
  94. output {
  95. File vep_vcf = "${basename}.vep.vcf"
  96. File maf = "${basename}.maf"
  97. }
  98. }