VEP (Variant Effect Predictor) predicts the functional effects of genomic variants. The annotated VCF will be converted into MAF based on vcf2maf.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

70 line
2.2KB

  1. task vcf2maf {
  2. File vcf
  3. String sample_id
  4. String basename = basename(vcf,".vcf")
  5. String tumor_id
  6. String normal_id
  7. File ref_dir
  8. String fasta
  9. String vep_path
  10. File cache
  11. String ncbi_build
  12. String species
  13. Boolean only_pass
  14. String vcf2maf_path
  15. String docker
  16. String cluster_config
  17. String disk_size
  18. command <<<
  19. set -o pipefail
  20. set -e
  21. nt=$(nproc)
  22. if [ only_pass ]; then
  23. awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${vcf} > ${sample_id}.INPUT.vcf
  24. else
  25. cp ${vcf} ${sample_id}.INPUT.vcf
  26. fi
  27. # Extract the BND variants from VCF
  28. awk -F'\t' '{if(($1~"^#")||($8!~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.INPUT.vcf > ${sample_id}.INPUT.vcf2maf.vcf
  29. awk -F'\t' '{if(($1~"^#")||($8~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.INPUT.vcf > ${sample_id}.INPUT.VEP.vcf
  30. # vcf2maf
  31. perl ${vcf2maf_path}/vcf2maf.pl \
  32. --input-vcf ${sample_id}.INPUT.vcf2maf.vcf --output-maf ${basename}.maf \
  33. --tumor-id ${tumor_id} --normal-id ${normal_id} \
  34. --ref-fasta ${ref_dir}/${fasta} \
  35. --vep-path ${vep_path} \
  36. --vep-data ${cache} \
  37. --ncbi-build ${ncbi_build} \
  38. --species ${species} \
  39. --vep-fork $nt
  40. # vep: only annotate the BND
  41. perl ${vep_path}/vep \
  42. --input_file ${sample_id}.INPUT.VEP.vcf --output_file ${basename}.BND.VEP.txt \
  43. --fasta ${ref_dir}/${fasta} \
  44. --dir ${cache} \
  45. --assembly ${ncbi_build} \
  46. --species ${species} \
  47. --fork $nt \
  48. --no_progress --no_stats --buffer_size 5000 --sift b --ccds --uniprot --hgvs --symbol --numbers --domains --gene_phenotype --canonical --protein --biotype --uniprot --tsl --variant_class --shift_hgvs 1 --check_existing --total_length --allele_number --no_escape --xref_refseq --failed 1 --tab --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --format vcf --force_overwrite --offline --pubmed --regulatory
  49. >>>
  50. runtime {
  51. docker: docker
  52. cluster: cluster_config
  53. systemDisk: "cloud_ssd 40"
  54. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  55. }
  56. output {
  57. File input_vcf = "${sample_id}.INPUT.vcf"
  58. File maf = "${basename}.maf"
  59. File bnd_vep = "${basename}.BND.VEP.txt"
  60. }
  61. }