Annotation of genetic variants detected from human genome hg19 and hg38.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

57 lines
1.9KB

  1. task annovar {
  2. File vcf
  3. String basename = basename(vcf,".vcf")
  4. String hg
  5. File annovar_database
  6. String docker
  7. String cluster_config
  8. String disk_size
  9. command <<<
  10. set -o pipefail
  11. set -e
  12. nt=$(nproc)
  13. awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${vcf} > ${basename}.PASS.vcf
  14. if [ ${hg} == "hg38" ]; then
  15. /installations/annovar/table_annovar.pl ${basename}.PASS.vcf \
  16. ${annovar_database} -buildver ${hg} \
  17. -out ${basename} -remove \
  18. -protocol refGene,ensGene,knownGene,esp6500siv2_all,ALL.sites.2015_08,AFR.sites.2015_08,AMR.sites.2015_08,EAS.sites.2015_08,EUR.sites.2015_08,SAS.sites.2015_08,avsnp147,dbnsfp33a,clinvar_20210501,gnomad_genome,dbscsnv11,dbnsfp31a_interpro \
  19. -operation g,g,g,f,f,f,f,f,f,f,f,f,f,f,f,f \
  20. --gff3dbfile hg38_rmsk.gff \
  21. -nastring . -vcfinput -thread $nt
  22. fi
  23. # 1000g2015aug contains 6 datasets: ALL.sites.2015_08,AFR.sites.2015_08,AMR.sites.2015_08,EAS.sites.2015_08,EUR.sites.2015_08,SAS.sites.2015_08
  24. #-protocol refGene,cytoBand,genomicSuperDups,ljb26_all,dbnsfp35c,intervar_20180118,cosmic70,exac03,gnomad211_exome,clinvar_20210501 \
  25. #-operation g,r,r,f,f,f,f,f,f,f \
  26. if [ ${hg} == "hg19" ]; then
  27. /installations/annovar/table_annovar.pl ${basename}.PASS.vcf \
  28. ${annovar_database} -buildver ${hg} \
  29. -out ${basename} -remove \
  30. -protocol refGene,cytoBand,genomicSuperDups,ljb26_all,snp138,cosmic78,intervar_20170202,popfreq_all_20150413,clinvar_20190305 \
  31. -operation g,r,r,f,f,f,f,f,f \
  32. -nastring . -vcfinput -thread $nt
  33. fi
  34. >>>
  35. runtime {
  36. docker: docker
  37. cluster: cluster_config
  38. systemDisk: "cloud_ssd 40"
  39. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  40. }
  41. output {
  42. File pass_vcf = "${basename}.PASS.vcf"
  43. File avinput = "${basename}.avinput"
  44. File multianno_txt = "${basename}.${hg}_multianno.txt"
  45. File multianno_vcf = "${basename}.${hg}_multianno.vcf"
  46. }
  47. }