Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

104 rindas
4.6KB

  1. task region {
  2. File sub_vcf
  3. File sub_vcf_idx
  4. File satellite
  5. File simple_repeat
  6. File SINE
  7. File LINE
  8. File LTR
  9. File low_complexity
  10. File SD
  11. File ins_breakpoint
  12. File del_breakpoint
  13. File mhc
  14. String sample_name
  15. String tag
  16. String docker
  17. String cluster_config
  18. String disk_size
  19. command <<<
  20. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.satellite.vcf.gz --include-bed=${satellite}
  21. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.simple_repeat.vcf.gz --include-bed=${simple_repeat}
  22. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.SINE.vcf.gz --include-bed=${SINE}
  23. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.LINE.vcf.gz --include-bed=${LINE}
  24. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.LTR.vcf.gz --include-bed=${LTR}
  25. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.low_complexity.vcf.gz --include-bed=${low_complexity}
  26. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.SD.vcf.gz --include-bed=${SD}
  27. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.ins_breakpoint.vcf.gz --include-bed=${ins_breakpoint}
  28. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.del_breakpoint.vcf.gz --include-bed=${del_breakpoint}
  29. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.mhc.vcf.gz --include-bed=${mhc}
  30. rtg vcfstat ${sample_name}.${tag}.satellite.vcf.gz > ${sample_name}.${tag}.satellite.vcfstat.txt
  31. rtg vcfstat ${sample_name}.${tag}.simple_repeat.vcf.gz > ${sample_name}.${tag}.simple_repeat.vcfstat.txt
  32. rtg vcfstat ${sample_name}.${tag}.SINE.vcf.gz > ${sample_name}.${tag}.SINE.vcfstat.txt
  33. rtg vcfstat ${sample_name}.${tag}.LINE.vcf.gz > ${sample_name}.${tag}.LINE.vcfstat.txt
  34. rtg vcfstat ${sample_name}.${tag}.LTR.vcf.gz > ${sample_name}.${tag}.LTR.vcfstat.txt
  35. rtg vcfstat ${sample_name}.${tag}.low_complexity.vcf.gz > ${sample_name}.${tag}.low_complexity.vcfstat.txt
  36. rtg vcfstat ${sample_name}.${tag}.SD.vcf.gz > ${sample_name}.${tag}.SD.vcfstat.txt
  37. rtg vcfstat ${sample_name}.${tag}.ins_breakpoint.vcf.gz > ${sample_name}.${tag}.ins_breakpoint.vcfstat.txt
  38. rtg vcfstat ${sample_name}.${tag}.del_breakpoint.vcf.gz > ${sample_name}.${tag}.del_breakpoint.vcfstat.txt
  39. rtg vcfstat ${sample_name}.${tag}.mhc.vcf.gz > ${sample_name}.${tag}.mhc.vcfstat.txt
  40. for i in *vcf.gz
  41. do
  42. cat $i | grep -v '#' >> ${sample_name}.${tag}.all.count.txt
  43. cat $i | grep -v '#' | grep 'SNV;MC' | wc -l >> ${sample_name}.${tag}.snv.mc.count.txt
  44. cat $i | grep -v '#' | grep 'SNV;MC' | awk '{ if (($10 == $11) && ($11 == $12) && ($12 == $13)) { print } }' | wc -l >> ${sample_name}.${tag}.snv.mc.same.genotype.count.txt
  45. cat $i | grep -v '#' | grep 'INDEL;MC' | wc -l >> ${sample_name}.${tag}.indel.mc.count.txt
  46. cat $i | grep -v '#' | grep 'INDEL;MC' | awk '{ if (($10 == $11) && ($11 == $12) && ($12 == $13)) { print } }' | wc -l >> ${sample_name}.${tag}.indel.mc.same.genotype.count.txt
  47. done
  48. ls *vcf.gz > file.list.txt
  49. >>>
  50. runtime {
  51. docker:docker
  52. cluster: cluster_config
  53. systemDisk: "cloud_ssd 40"
  54. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  55. }
  56. output {
  57. File satellite_vcf = "${sample_name}.${tag}.satellite.vcf.gz"
  58. File simple_repeat_vcf = "${sample_name}.${tag}.simple_repeat.vcf.gz"
  59. File SINE_vcf = "${sample_name}.${tag}.SINE.vcf.gz"
  60. File LINE_vcf = "${sample_name}.${tag}.LINE.vcf.gz"
  61. File LTR_vcf = "${sample_name}.${tag}.LTR.vcf.gz"
  62. File low_complexity_vcf = "${sample_name}.${tag}.low_complexity.vcf.gz"
  63. File SD_vcf = "${sample_name}.${tag}.SD.vcf.gz"
  64. File ins_breakpoint_vcf = "${sample_name}.${tag}.ins_breakpoint.vcf.gz"
  65. File del_breakpoint_vcf = "${sample_name}.${tag}.del_breakpoint.vcf.gz"
  66. File mhc_vcf = "${sample_name}.${tag}.mhc.vcf.gz"
  67. File satellite_stat = "${sample_name}.${tag}.satellite.vcfstat.txt"
  68. File simple_repeat_stat = "${sample_name}.${tag}.simple_repeat.vcfstat.txt"
  69. File SINE_stat = "${sample_name}.${tag}.SINE.vcfstat.txt"
  70. File LINE_stat = "${sample_name}.${tag}.LINE.vcfstat.txt"
  71. File LTR_stat = "${sample_name}.${tag}.LTR.vcfstat.txt"
  72. File low_complexity_stat = "${sample_name}.${tag}.low_complexity.vcfstat.txt"
  73. File SD_stat = "${sample_name}.${tag}.SD.vcfstat.txt"
  74. File ins_breakpoint_stat = "${sample_name}.${tag}.ins_breakpoint.vcfstat.txt"
  75. File del_breakpoint_stat = "${sample_name}.${tag}.del_breakpoint.vcfstat.txt"
  76. File mhc_stat = "${sample_name}.${tag}.mhc.vcfstat.txt"
  77. File snv = "${sample_name}.${tag}.snv.mc.count.txt"
  78. File snv_mc = "${sample_name}.${tag}.snv.mc.same.genotype.count.txt"
  79. File indel = "${sample_name}.${tag}.indel.mc.count.txt"
  80. File indel_mc = "${sample_name}.${tag}.indel.mc.same.genotype.count.txt"
  81. File file_list = "file.list.txt"
  82. File all = "${sample_name}.${tag}.all.count.txt"
  83. }
  84. }