You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

106 lines
4.8KB

  1. task region {
  2. File sub_vcf
  3. File sub_vcf_idx
  4. File satellite
  5. File simple_repeat
  6. File SINE
  7. File LINE
  8. File LTR
  9. File low_complexity
  10. File SD
  11. File ins_breakpoint
  12. File del_breakpoint
  13. File mhc
  14. String sample_name
  15. String tag
  16. String docker
  17. String cluster_config
  18. String disk_size
  19. command <<<
  20. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.satellite.vcf.gz --include-bed=${satellite}
  21. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.simple_repeat.vcf.gz --include-bed=${simple_repeat}
  22. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.SINE.vcf.gz --include-bed=${SINE}
  23. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.LINE.vcf.gz --include-bed=${LINE}
  24. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.LTR.vcf.gz --include-bed=${LTR}
  25. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.low_complexity.vcf.gz --include-bed=${low_complexity}
  26. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.SD.vcf.gz --include-bed=${SD}
  27. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.ins_breakpoint.vcf.gz --include-bed=${ins_breakpoint}
  28. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.del_breakpoint.vcf.gz --include-bed=${del_breakpoint}
  29. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.mhc.vcf.gz --include-bed=${mhc}
  30. rtg vcfstat ${sample_name}.${tag}.satellite.vcf.gz > ${sample_name}.${tag}.satellite.vcfstat.txt
  31. rtg vcfstat ${sample_name}.${tag}.simple_repeat.vcf.gz > ${sample_name}.${tag}.simple_repeat.vcfstat.txt
  32. rtg vcfstat ${sample_name}.${tag}.SINE.vcf.gz > ${sample_name}.${tag}.SINE.vcfstat.txt
  33. rtg vcfstat ${sample_name}.${tag}.LINE.vcf.gz > ${sample_name}.${tag}.LINE.vcfstat.txt
  34. rtg vcfstat ${sample_name}.${tag}.LTR.vcf.gz > ${sample_name}.${tag}.LTR.vcfstat.txt
  35. rtg vcfstat ${sample_name}.${tag}.low_complexity.vcf.gz > ${sample_name}.${tag}.low_complexity.vcfstat.txt
  36. rtg vcfstat ${sample_name}.${tag}.SD.vcf.gz > ${sample_name}.${tag}.SD.vcfstat.txt
  37. rtg vcfstat ${sample_name}.${tag}.ins_breakpoint.vcf.gz > ${sample_name}.${tag}.ins_breakpoint.vcfstat.txt
  38. rtg vcfstat ${sample_name}.${tag}.del_breakpoint.vcf.gz > ${sample_name}.${tag}.del_breakpoint.vcfstat.txt
  39. rtg vcfstat ${sample_name}.${tag}.mhc.vcf.gz > ${sample_name}.${tag}.mhc.vcfstat.txt
  40. for i in *vcf.gz
  41. do
  42. zcat $i | grep -v '#' | grep 'SNV' >> ${sample_name}.${tag}.all.snv.count.txt
  43. zcat $i | grep -v '#' | grep 'INDEL' >> ${sample_name}.${tag}.all.indel.count.txt
  44. zcat $i | grep -v '#' | grep 'SNV;MC' | wc -l >> ${sample_name}.${tag}.snv.mc.count.txt
  45. zcat $i | grep -v '#' | grep 'SNV;MC' | awk '{ if (($10 == $11) && ($11 == $12) && ($12 == $13)) { print } }' | wc -l >> ${sample_name}.${tag}.snv.mc.same.genotype.count.txt
  46. zcat $i | grep -v '#' | grep 'INDEL;MC' | wc -l >> ${sample_name}.${tag}.indel.mc.count.txt
  47. zcat $i | grep -v '#' | grep 'INDEL;MC' | awk '{ if (($10 == $11) && ($11 == $12) && ($12 == $13)) { print } }' | wc -l >> ${sample_name}.${tag}.indel.mc.same.genotype.count.txt
  48. done
  49. ls *vcf.gz > file.list.txt
  50. >>>
  51. runtime {
  52. docker:docker
  53. cluster: cluster_config
  54. systemDisk: "cloud_ssd 40"
  55. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  56. }
  57. output {
  58. File satellite_vcf = "${sample_name}.${tag}.satellite.vcf.gz"
  59. File simple_repeat_vcf = "${sample_name}.${tag}.simple_repeat.vcf.gz"
  60. File SINE_vcf = "${sample_name}.${tag}.SINE.vcf.gz"
  61. File LINE_vcf = "${sample_name}.${tag}.LINE.vcf.gz"
  62. File LTR_vcf = "${sample_name}.${tag}.LTR.vcf.gz"
  63. File low_complexity_vcf = "${sample_name}.${tag}.low_complexity.vcf.gz"
  64. File SD_vcf = "${sample_name}.${tag}.SD.vcf.gz"
  65. File ins_breakpoint_vcf = "${sample_name}.${tag}.ins_breakpoint.vcf.gz"
  66. File del_breakpoint_vcf = "${sample_name}.${tag}.del_breakpoint.vcf.gz"
  67. File mhc_vcf = "${sample_name}.${tag}.mhc.vcf.gz"
  68. File satellite_stat = "${sample_name}.${tag}.satellite.vcfstat.txt"
  69. File simple_repeat_stat = "${sample_name}.${tag}.simple_repeat.vcfstat.txt"
  70. File SINE_stat = "${sample_name}.${tag}.SINE.vcfstat.txt"
  71. File LINE_stat = "${sample_name}.${tag}.LINE.vcfstat.txt"
  72. File LTR_stat = "${sample_name}.${tag}.LTR.vcfstat.txt"
  73. File low_complexity_stat = "${sample_name}.${tag}.low_complexity.vcfstat.txt"
  74. File SD_stat = "${sample_name}.${tag}.SD.vcfstat.txt"
  75. File ins_breakpoint_stat = "${sample_name}.${tag}.ins_breakpoint.vcfstat.txt"
  76. File del_breakpoint_stat = "${sample_name}.${tag}.del_breakpoint.vcfstat.txt"
  77. File mhc_stat = "${sample_name}.${tag}.mhc.vcfstat.txt"
  78. File snv = "${sample_name}.${tag}.snv.mc.count.txt"
  79. File snv_mc = "${sample_name}.${tag}.snv.mc.same.genotype.count.txt"
  80. File indel = "${sample_name}.${tag}.indel.mc.count.txt"
  81. File indel_mc = "${sample_name}.${tag}.indel.mc.same.genotype.count.txt"
  82. File file_list = "file.list.txt"
  83. File all_snv = "${sample_name}.${tag}.all.snv.count.txt"
  84. File all_indel = "${sample_name}.${tag}.all.indel.count.txt"
  85. }
  86. }