VEP (Variant Effect Predictor) predicts the functional effects of genomic variants. The annotated VCF will be converted into MAF based on vcf2maf.
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

3 年前
3 年前
3 年前
3 年前
3 年前
3 年前
3 年前
3 年前
3 年前
3 年前
3 年前
3 年前
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. task VEP {
  2. File vcf
  3. String sample_id
  4. String basename = basename(vcf,".vcf")
  5. String tumor_id
  6. String normal_id
  7. File ref_dir
  8. String fasta
  9. String vep_path
  10. File cache
  11. String ncbi_build
  12. String species
  13. String vcf2maf_path
  14. String docker
  15. String cluster_config
  16. String disk_size
  17. command <<<
  18. set -o pipefail
  19. set -e
  20. nt=$(nproc)
  21. source /etc/profile
  22. awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${vcf} > ${sample_id}.vcf
  23. # Judge the SAMPLE info of vcf file
  24. ncol=`awk -F'\t' '{if($1!~"^#"){print NF}}' ${sample_id}.vcf | uniq`
  25. if [ $ncol -lt 11 ]; then
  26. SAMPLE_vcf2maf="--tumor-id ${tumor_id} --normal-id ${normal_id}"
  27. SAMPLE_vcf2vcf="--vcf-tumor-id ${tumor_id} --vcf-normal-id ${normal_id}"
  28. else
  29. SAMPLE_vcf2maf="--tumor-id ${sample_id}"
  30. SAMPLE_vcf2vcf="--vcf-tumor-id ${sample_id}"
  31. fi
  32. # Set the buffer_size based on the data size
  33. nrow=`awk -F'\t' '{if($1~"^chr"){print $0}}' ${sample_id}.vcf | wc -l`
  34. if [ $nrow -lt 5000 ]; then
  35. buffer_size="--buffer_size 5000"
  36. else
  37. buffer_size="--buffer_size 1000"
  38. fi
  39. # Extract the BND variants from VCF
  40. # awk -F'\t' '{if(($1~"^#")||($8!~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.PASS.vcf > ${sample_id}.PASS.vcf2maf.vcf
  41. # awk -F'\t' '{if(($1~"^#")||($8~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.PASS.vcf > ${sample_id}.INPUT.VEP.vcf
  42. # vcf2maf
  43. # perl ${vcf2maf_path}/vcf2maf.pl \
  44. # --input-vcf ${sample_id}.PASS.vcf2maf.vcf --output-maf ${basename}.maf \
  45. # --tumor-id ${tumor_id} --normal-id ${normal_id} \
  46. # --ref-fasta ${ref_dir}/${fasta} \
  47. # --vep-path ${vep_path} \
  48. # --vep-data ${cache} \
  49. # --ncbi-build ${ncbi_build} \
  50. # --species ${species} \
  51. # --vep-fork $nt
  52. # vep
  53. # perl ${vep_path}/vep \
  54. # --input_file ${sample_id}.vcf --output_file ${basename}.PASS.vep.vcf \
  55. # --fasta ${ref_dir}/${fasta} \
  56. # --dir ${cache} \
  57. # --assembly ${ncbi_build} \
  58. # --species ${species} \
  59. # --fork $nt \
  60. # --format vcf --vcf \
  61. # --no_progress \
  62. # --no_stats \
  63. # $buffer_size \
  64. # --sift b \
  65. # --ccds --uniprot --hgvs --symbol --numbers --domains --gene_phenotype --canonical --protein --biotype --uniprot --tsl --variant_class --shift_hgvs 1 --check_existing --total_length --allele_number --no_escape --xref_refseq --failed 1 --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --force_overwrite --offline --pubmed --regulatory
  66. # vcf2vcf: transfer into a standardized format
  67. perl ${vcf2maf_path}/vcf2vcf.pl \
  68. --input-vcf ${sample_id}.vcf --output-vcf ${basename}.norm.vcf \
  69. $SAMPLE_vcf2vcf \
  70. --ref-fasta ${ref_dir}/${fasta}
  71. # VEP annotation
  72. perl ${vep_path}/vep --format vcf --vcf \
  73. --assembly ${ncbi_build} \
  74. --species ${species} \
  75. --everything --af_exac \
  76. --offline \
  77. --cache --dir_cache ${cache} \
  78. --fasta ${ref_dir}/${fasta} \
  79. $buffer_size \
  80. --input_file ${basename}.norm.vcf --output_file ${basename}.vep.vcf
  81. # vcf2maf
  82. perl ${vcf2maf_path}/vcf2maf.pl \
  83. --inhibit-vep \
  84. --input-vcf ${basename}.vep.vcf --output-maf ${basename}.maf \
  85. $SAMPLE_vcf2maf \
  86. --ref-fasta ${ref_dir}/${fasta} \
  87. --ncbi-build ${ncbi_build} \
  88. --species ${species} \
  89. --vep-fork $nt
  90. >>>
  91. runtime {
  92. docker: docker
  93. cluster: cluster_config
  94. systemDisk: "cloud_ssd 40"
  95. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  96. }
  97. output {
  98. File vep_vcf = "${basename}.vep.vcf"
  99. File maf = "${basename}.maf"
  100. }
  101. }