Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

SepSnvIndel.wdl 2.8KB

5 år sedan
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. task SepSnvIndel {
  2. File vcf
  3. String sampleName = basename(vcf,".normed.vcf")
  4. File keeped_vcf
  5. String docker
  6. String cluster_config
  7. String disk_size
  8. command <<<
  9. cat ${vcf} | grep '#' > header
  10. cat ${vcf} | sed '/^#/d' | awk '$5!~/,/' > removed.body
  11. cat ${vcf} | sed '/^#/d' | awk '$5~/,/' > MNP.body
  12. cat header removed.body > ${sampleName}.MNPremoved.vcf
  13. cat header MNP.body > ${sampleName}.MNP.vcf
  14. rtg bgzip ${sampleName}.MNPremoved.vcf
  15. rtg index -f vcf ${sampleName}.MNPremoved.vcf.gz
  16. rtg bgzip ${keeped_vcf} -c > all.selected.vcf.gz
  17. rtg index -f vcf all.selected.vcf.gz
  18. rtg vcffilter -i ${sampleName}.MNPremoved.vcf.gz -o ${sampleName}.normed.snv.train.vcf.gz --include-vcf=all.selected.vcf.gz --snps-only
  19. rtg vcffilter -i ${sampleName}.MNPremoved.vcf.gz -o ${sampleName}.normed.snv.test.vcf.gz --exclude-vcf=all.selected.vcf.gz --snps-only
  20. rtg vcffilter -i ${sampleName}.MNPremoved.vcf.gz -o ${sampleName}.normed.indel.train.vcf.gz --include-vcf=all.selected.vcf.gz --non-snps-only
  21. rtg vcffilter -i ${sampleName}.MNPremoved.vcf.gz -o ${sampleName}.normed.indel.test.vcf.gz --exclude-vcf=all.selected.vcf.gz --non-snps-only
  22. rtg vcffilter -i ${sampleName}.MNPremoved.vcf.gz -o ${sampleName}.normed.snv.vcf.gz --snps-only
  23. rtg vcffilter -i ${sampleName}.MNPremoved.vcf.gz -o ${sampleName}.normed.indel.vcf.gz --non-snps-only
  24. gzip -d ${sampleName}.normed.snv.train.vcf.gz -c > ${sampleName}.normed.snv.train.vcf
  25. gzip -d ${sampleName}.normed.snv.test.vcf.gz -c > ${sampleName}.normed.snv.test.vcf
  26. gzip -d ${sampleName}.normed.indel.train.vcf.gz -c > ${sampleName}.normed.indel.train.vcf
  27. gzip -d ${sampleName}.normed.indel.test.vcf.gz -c > ${sampleName}.normed.indel.test.vcf
  28. >>>
  29. runtime {
  30. docker:docker
  31. cluster: cluster_config
  32. systemDisk: "cloud_ssd 40"
  33. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  34. }
  35. output {
  36. File MNP="${sampleName}.MNP.vcf"
  37. File snv_gz = "${sampleName}.normed.snv.vcf.gz"
  38. File snv_idx = "${sampleName}.normed.snv.vcf.gz.tbi"
  39. File indel_gz = "${sampleName}.normed.indel.vcf.gz"
  40. File indel_idx = "${sampleName}.normed.indel.vcf.gz.tbi"
  41. File snv_train = "${sampleName}.normed.snv.train.vcf"
  42. File snv_test = "${sampleName}.normed.snv.test.vcf"
  43. File indel_train = "${sampleName}.normed.indel.train.vcf"
  44. File indel_test = "${sampleName}.normed.indel.test.vcf"
  45. File snv_train_gz = "${sampleName}.normed.snv.train.vcf.gz"
  46. File snv_test_gz = "${sampleName}.normed.snv.test.vcf.gz"
  47. File indel_train_gz = "${sampleName}.normed.indel.train.vcf.gz"
  48. File indel_test_gz = "${sampleName}.normed.indel.test.vcf.gz"
  49. File snv_train_idx = "${sampleName}.normed.snv.train.vcf.gz.tbi"
  50. File snv_test_idx = "${sampleName}.normed.snv.test.vcf.gz.tbi"
  51. File indel_train_idx = "${sampleName}.normed.indel.train.vcf.gz.tbi"
  52. File indel_test_idx = "${sampleName}.normed.indel.test.vcf.gz.tbi"
  53. }
  54. }