VEP (Variant Effect Predictor) predicts the functional effects of genomic variants. The annotated VCF will be converted into MAF based on vcf2maf.
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

3 年前
3 年前
3 年前
3 年前
3 年前
3 年前
3 年前
3 年前
3 年前
3 年前
3 年前
3 年前
3 年前
3 年前
3 年前
3 年前
3 年前
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. task VEP {
  2. File vcf
  3. String sample_id
  4. String basename = basename(vcf,".vcf")
  5. File ref_dir
  6. String fasta
  7. String vep_path
  8. File cache
  9. String ncbi_build
  10. String species
  11. String vcf2maf_path
  12. String docker
  13. String cluster_config
  14. String disk_size
  15. command <<<
  16. set -o pipefail
  17. set -e
  18. nt=$(nproc)
  19. source /etc/profile
  20. echo ${sample_id}
  21. echo ${basename}
  22. #awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${vcf} > ${sample_id}.vcf
  23. # Set the buffer_size based on the data size
  24. nrow=`awk -F'\t' '{if($1~"^chr"){print $0}}' ${vcf} | wc -l`
  25. if [ $nrow -lt 5000 ]; then
  26. buffer_size="--buffer_size 5000"
  27. else
  28. buffer_size="--buffer_size 1000"
  29. fi
  30. # Judge the SAMPLE info of vcf file
  31. tumor_id=`awk -F'\t' '{if($1~"^#CHROM"){print $10}}' ${vcf}`
  32. normal_id=`awk -F'\t' '{if($1~"^#CHROM"){print $11}}' ${vcf}`
  33. if [ $normal_id ]; then
  34. SAMPLE_vcf2maf="--tumor-id $tumor_id --normal-id $normal_id"
  35. SAMPLE_vcf2vcf="--vcf-tumor-id $tumor_id --vcf-normal-id $normal_id"
  36. cp ${vcf} ${sample_id}.tmp.vcf
  37. else
  38. SAMPLE_vcf2maf="--tumor-id $tumor_id"
  39. # Add a column and remove it after vcf2vcf
  40. SAMPLE_vcf2vcf="--vcf-tumor-id $tumor_id --vcf-normal-id $tumor_id"
  41. awk -F'\t' 'OFS="\t" {if($1!~"^##" && length($11)==0) $11=$10; print $0}' ${vcf} > ${sample_id}.tmp1.vcf
  42. fi
  43. # vcf2vcf: transfer into a standardized format
  44. echo "Transfer the VCF file into a standardized format..."
  45. perl ${vcf2maf_path}/vcf2vcf.pl \
  46. --input-vcf ${sample_id}.tmp1.vcf --output-vcf ${sample_id}.tmp2.vcf \
  47. $SAMPLE_vcf2vcf \
  48. --ref-fasta ${ref_dir}/${fasta}
  49. if [ $normal_id ]; then
  50. cp ${sample_id}.tmp2.vcf ${basename}.norm.vcf
  51. else
  52. cut -f 1,2,3,4,5,6,7,8,9,10 ${sample_id}.tmp2.vcf > ${basename}.norm.vcf
  53. # VEP annotation
  54. echo "VEP annotation..."
  55. perl ${vep_path}/vep --format vcf --vcf \
  56. --input_file ${basename}.norm.vcf --output_file ${basename}.vep.vcf \
  57. --assembly ${ncbi_build} \
  58. --species ${species} \
  59. --everything --af_exac \
  60. --offline \
  61. --cache --dir_cache ${cache} \
  62. --fasta ${ref_dir}/${fasta} \
  63. $buffer_size \
  64. --fork $nt
  65. # vcf2maf
  66. echo "vcf2maf..."
  67. perl ${vcf2maf_path}/vcf2maf.pl \
  68. --inhibit-vep \
  69. --input-vcf ${basename}.vep.vcf --output-maf ${basename}.maf \
  70. $SAMPLE_vcf2maf \
  71. --ref-fasta ${ref_dir}/${fasta} \
  72. --ncbi-build ${ncbi_build} \
  73. --species ${species}
  74. >>>
  75. runtime {
  76. docker: docker
  77. cluster: cluster_config
  78. systemDisk: "cloud_ssd 40"
  79. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  80. }
  81. output {
  82. File norm_vcf = "${basename}.norm.vcf"
  83. File vep_vcf = "${basename}.vep.vcf"
  84. File maf = "${basename}.maf"
  85. }
  86. }