Germline & Somatic short variant discovery (SNVs + Indels) for WGS & WES.
Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

81 lines
1.9KB

  1. task VEP {
  2. File vcf
  3. String sample_id
  4. String basename = basename(vcf,".vcf")
  5. String tumor_id
  6. String normal_id
  7. File ref_dir
  8. String fasta
  9. String vep_path
  10. File cache
  11. String hg
  12. String species
  13. String vcf2maf_path
  14. String docker
  15. String cluster_config
  16. String disk_size
  17. command <<<
  18. set -o pipefail
  19. set -e
  20. nt=$(nproc)
  21. awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${vcf} > ${basename}.PASS.vcf
  22. # Define ncbi_build
  23. if [ ${hg} == "hg19" ]; then
  24. ncbi_build="GRCh37"
  25. elif [ ${hg} == "hg38" ]; then
  26. ncbi_build="GRCh38"
  27. ncol=`awk -F'\t' '{if($1!~"^#"){print NF}}' ${basename}.PASS.vcf | uniq`
  28. if [ $ncol -lt 11 ]; then
  29. vcf2maf_ID="--tumor-id ${tumor_id} --normal-id ${normal_id}"
  30. else
  31. vcf2maf_ID="--tumor-id ${tumor_id}"
  32. fi
  33. nrow=`awk -F'\t' '{if($1~"^chr"){print $0}}' ${basename}.PASS.vcf | wc -l`
  34. if [ $nrow -lt 5000 ]; then
  35. buffer_size="--buffer_size 5000"
  36. else
  37. buffer_size="--buffer_size 500"
  38. fi
  39. # vep
  40. perl ${vep_path}/vep --format vcf --vcf \
  41. --assembly $ncbi_build \
  42. --species ${species} \
  43. --everything --af_exac \
  44. --offline \
  45. --cache --dir_cache ${cache} \
  46. --fasta ${ref_dir}/${fasta} \
  47. --input_file ${basename}.PASS.vcf --output_file ${basename}.PASS.vep.vcf
  48. # vcf2maf
  49. perl ${vcf2maf_path}/vcf2maf.pl \
  50. --inhibit-vep \
  51. --input-vcf ${basename}.PASS.vep.vcf --output-maf ${basename}.PASS.maf \
  52. $vcf2maf_ID \
  53. --ref-fasta ${ref_dir}/${fasta} \
  54. --ncbi-build $ncbi_build \
  55. --species ${species} \
  56. --vep-fork $nt
  57. >>>
  58. runtime {
  59. docker: docker
  60. cluster: cluster_config
  61. systemDisk: "cloud_ssd 40"
  62. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  63. }
  64. output {
  65. File input_vcf = "${basename}.PASS.vcf"
  66. File vep_vcf = "${basename}.PASS.vep.vcf"
  67. File maf = "${basename}.PASS.maf"
  68. }
  69. }