You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

121 lines
5.2KB

  1. task region {
  2. File sub_vcf
  3. File sub_vcf_idx
  4. File satellite
  5. File simple_repeat
  6. File SINE
  7. File LINE
  8. File LTR
  9. File low_complexity
  10. File SD
  11. File ins_breakpoint
  12. File del_breakpoint
  13. File mhc
  14. String sample_name
  15. String tag
  16. String docker
  17. String cluster_config
  18. String disk_size
  19. command <<<
  20. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.satellite.vcf.gz --include-bed=${satellite}
  21. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.simple_repeat.vcf.gz --include-bed=${simple_repeat}
  22. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.SINE.vcf.gz --include-bed=${SINE}
  23. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.LINE.vcf.gz --include-bed=${LINE}
  24. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.LTR.vcf.gz --include-bed=${LTR}
  25. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.low_complexity.vcf.gz --include-bed=${low_complexity}
  26. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.SD.vcf.gz --include-bed=${SD}
  27. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.ins_breakpoint.vcf.gz --include-bed=${ins_breakpoint}
  28. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.del_breakpoint.vcf.gz --include-bed=${del_breakpoint}
  29. rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.mhc.vcf.gz --include-bed=${mhc}
  30. rtg vcfstat ${sample_name}.${tag}.satellite.vcf.gz > ${sample_name}.${tag}.satellite.vcfstat.txt
  31. rtg vcfstat ${sample_name}.${tag}.simple_repeat.vcf.gz > ${sample_name}.${tag}.simple_repeat.vcfstat.txt
  32. rtg vcfstat ${sample_name}.${tag}.SINE.vcf.gz > ${sample_name}.${tag}.SINE.vcfstat.txt
  33. rtg vcfstat ${sample_name}.${tag}.LINE.vcf.gz > ${sample_name}.${tag}.LINE.vcfstat.txt
  34. rtg vcfstat ${sample_name}.${tag}.LTR.vcf.gz > ${sample_name}.${tag}.LTR.vcfstat.txt
  35. rtg vcfstat ${sample_name}.${tag}.low_complexity.vcf.gz > ${sample_name}.${tag}.low_complexity.vcfstat.txt
  36. rtg vcfstat ${sample_name}.${tag}.SD.vcf.gz > ${sample_name}.${tag}.SD.vcfstat.txt
  37. rtg vcfstat ${sample_name}.${tag}.ins_breakpoint.vcf.gz > ${sample_name}.${tag}.ins_breakpoint.vcfstat.txt
  38. rtg vcfstat ${sample_name}.${tag}.del_breakpoint.vcf.gz > ${sample_name}.${tag}.del_breakpoint.vcfstat.txt
  39. rtg vcfstat ${sample_name}.${tag}.mhc.vcf.gz > ${sample_name}.${tag}.mhc.vcfstat.txt
  40. for i in *vcf.gz
  41. do
  42. zcat $i | grep -v '#' | grep 'SNV' | wc -l >> ${sample_name}.${tag}.all.snv.count.txt
  43. zcat $i | grep -v '#' | grep 'INDEL'| wc -l >> ${sample_name}.${tag}.all.indel.count.txt
  44. zcat $i | grep -v '#' | grep 'SNV;MC' | wc -l >> ${sample_name}.${tag}.snv.mc.count.txt
  45. zcat $i | grep -v '#' | grep 'SNV;MC' | awk '{ if (($10 == $11) && ($11 == $12) && ($12 == $13)) { print } }' | wc -l >> ${sample_name}.${tag}.snv.mc.same.genotype.count.txt
  46. zcat $i | grep -v '#' | grep 'INDEL;MC' | wc -l >> ${sample_name}.${tag}.indel.mc.count.txt
  47. zcat $i | grep -v '#' | grep 'INDEL;MC' | awk '{ if (($10 == $11) && ($11 == $12) && ($12 == $13)) { print } }' | wc -l >> ${sample_name}.${tag}.indel.mc.same.genotype.count.txt
  48. done
  49. for i in *vcfstat.txt
  50. do
  51. cat $i | sed -n '13,13p' | sed 's/\s\+/\t/g' | cut -f3 >> ${sample_name}.${tag}.ti_tv.txt
  52. done
  53. for i in *vcfstat.txt
  54. do
  55. cat $i | sed -n '14,14p' | sed 's/\s\+/\t/g' | cut -f5 >> ${sample_name}.${tag}.het_hom.txt
  56. done
  57. ls *vcf.gz > file.list.txt
  58. >>>
  59. runtime {
  60. docker:docker
  61. cluster: cluster_config
  62. systemDisk: "cloud_ssd 40"
  63. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  64. }
  65. output {
  66. File satellite_vcf = "${sample_name}.${tag}.satellite.vcf.gz"
  67. File simple_repeat_vcf = "${sample_name}.${tag}.simple_repeat.vcf.gz"
  68. File SINE_vcf = "${sample_name}.${tag}.SINE.vcf.gz"
  69. File LINE_vcf = "${sample_name}.${tag}.LINE.vcf.gz"
  70. File LTR_vcf = "${sample_name}.${tag}.LTR.vcf.gz"
  71. File low_complexity_vcf = "${sample_name}.${tag}.low_complexity.vcf.gz"
  72. File SD_vcf = "${sample_name}.${tag}.SD.vcf.gz"
  73. File ins_breakpoint_vcf = "${sample_name}.${tag}.ins_breakpoint.vcf.gz"
  74. File del_breakpoint_vcf = "${sample_name}.${tag}.del_breakpoint.vcf.gz"
  75. File mhc_vcf = "${sample_name}.${tag}.mhc.vcf.gz"
  76. File satellite_stat = "${sample_name}.${tag}.satellite.vcfstat.txt"
  77. File simple_repeat_stat = "${sample_name}.${tag}.simple_repeat.vcfstat.txt"
  78. File SINE_stat = "${sample_name}.${tag}.SINE.vcfstat.txt"
  79. File LINE_stat = "${sample_name}.${tag}.LINE.vcfstat.txt"
  80. File LTR_stat = "${sample_name}.${tag}.LTR.vcfstat.txt"
  81. File low_complexity_stat = "${sample_name}.${tag}.low_complexity.vcfstat.txt"
  82. File SD_stat = "${sample_name}.${tag}.SD.vcfstat.txt"
  83. File ins_breakpoint_stat = "${sample_name}.${tag}.ins_breakpoint.vcfstat.txt"
  84. File del_breakpoint_stat = "${sample_name}.${tag}.del_breakpoint.vcfstat.txt"
  85. File mhc_stat = "${sample_name}.${tag}.mhc.vcfstat.txt"
  86. File snv = "${sample_name}.${tag}.snv.mc.count.txt"
  87. File snv_mc = "${sample_name}.${tag}.snv.mc.same.genotype.count.txt"
  88. File indel = "${sample_name}.${tag}.indel.mc.count.txt"
  89. File indel_mc = "${sample_name}.${tag}.indel.mc.same.genotype.count.txt"
  90. File file_list = "file.list.txt"
  91. File all_snv = "${sample_name}.${tag}.all.snv.count.txt"
  92. File all_indel = "${sample_name}.${tag}.all.indel.count.txt"
  93. File ti_tv = "${sample_name}.${tag}.ti_tv.txt"
  94. File het_homo = "${sample_name}.${tag}.het_hom.txt"
  95. }
  96. }