選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

split_gvcf_files.wdl 3.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. task split_gvcf_files {
  2. File gvcf
  3. String project
  4. String docker
  5. String cluster_config
  6. String disk_size
  7. command <<<
  8. cat ${gvcf} | grep '#CHROM' | sed s'/\t/\n/g' > name
  9. ncol=`cat name | wc -l`
  10. sed -i '1,9d' name
  11. for i in $(seq 1 $ncol); do cat ${gvcf}| cut -f1-9,$i > $i.splited.vcf; done
  12. ls *splited.vcf | sort -n | paste - name > rename
  13. cat rename | while read a b
  14. do
  15. mv $a $b.vcf
  16. if [[ $b.vcf =~ "LCL5_1" ]];then
  17. cp $b.vcf ${project}.LCL5_1.vcf
  18. elif [[ $b.vcf =~ "LCL5_2" ]]; then
  19. cp $b.vcf ${project}.LCL5_2.vcf
  20. elif [[ $b.vcf =~ "LCL5_3" ]]; then
  21. cp $b.vcf ${project}.LCL5_3.vcf
  22. elif [[ $b.vcf =~ "LCL6_1" ]]; then
  23. cp $b.vcf ${project}.LCL6_1.vcf
  24. elif [[ $b.vcf =~ "LCL6_2" ]]; then
  25. cp $b.vcf ${project}.LCL6_2.vcf
  26. elif [[ $b.vcf =~ "LCL6_3" ]]; then
  27. cp $b.vcf ${project}.LCL6_3.vcf
  28. elif [[ $b.vcf =~ "LCL7_1" ]]; then
  29. cp $b.vcf ${project}.LCL7_1.vcf
  30. elif [[ $b.vcf =~ "LCL7_2" ]]; then
  31. cp $b.vcf ${project}.LCL7_2.vcf
  32. elif [[ $b.vcf =~ "LCL7_3" ]]; then
  33. cp $b.vcf ${project}.LCL7_3.vcf
  34. elif [[ $b.vcf =~ "LCL8_1" ]]; then
  35. cp $b.vcf ${project}.LCL8_1.vcf
  36. elif [[ $b.vcf =~ "LCL8_2" ]]; then
  37. cp $b.vcf ${project}.LCL8_2.vcf
  38. elif [[ $b.vcf =~ "LCL8_3" ]]; then
  39. cp $b.vcf ${project}.LCL8_3.vcf
  40. fi
  41. done
  42. cat ${project}.LCL5_1.vcf | grep -v '#' > LCL5_1.body
  43. cat ${project}.LCL5_2.vcf | grep -v '#' > LCL5_2.body
  44. cat ${project}.LCL5_3.vcf | grep -v '#' > LCL5_3.body
  45. cat ${project}.LCL6_1.vcf | grep -v '#' | cut -f 10 > LCL6_1.body
  46. cat ${project}.LCL6_2.vcf | grep -v '#' | cut -f 10 > LCL6_2.body
  47. cat ${project}.LCL6_3.vcf | grep -v '#' | cut -f 10 > LCL6_3.body
  48. cat ${project}.LCL7_1.vcf | grep -v '#' | cut -f 10 > LCL7_1.body
  49. cat ${project}.LCL7_2.vcf | grep -v '#' | cut -f 10 > LCL7_2.body
  50. cat ${project}.LCL7_3.vcf | grep -v '#' | cut -f 10 > LCL7_3.body
  51. cat ${project}.LCL8_1.vcf | grep -v '#' | cut -f 10 > LCL8_1.body
  52. cat ${project}.LCL8_2.vcf | grep -v '#' | cut -f 10 > LCL8_2.body
  53. cat ${project}.LCL8_3.vcf | grep -v '#' | cut -f 10 > LCL8_3.body
  54. echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL5\tLCL6\tLCL7\tLCL8" > header_name
  55. cat ${project}.LCL5_1.vcf | grep '##' | cat - header_name > header
  56. paste LCL5_1.body LCL6_1.body LCL7_1.body LCL8_1.body > family_1.body
  57. paste LCL5_2.body LCL6_2.body LCL7_2.body LCL8_2.body > family_2.body
  58. paste LCL5_3.body LCL6_3.body LCL7_3.body LCL8_3.body > family_3.body
  59. cat header family_1.body > ${project}.1.family.vcf
  60. cat header family_2.body > ${project}.2.family.vcf
  61. cat header family_3.body > ${project}.3.family.vcf
  62. >>>
  63. runtime {
  64. docker:docker
  65. cluster: cluster_config
  66. systemDisk: "cloud_ssd 40"
  67. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  68. }
  69. output {
  70. Array[File] splited_vcf = glob("*.vcf")
  71. Array[File] family_vcf = glob("*.family.vcf")
  72. File LCL5_1 = "${project}.LCL5_1.vcf"
  73. File LCL5_2 = "${project}.LCL5_2.vcf"
  74. File LCL5_3 = "${project}.LCL5_3.vcf"
  75. File LCL6_1 = "${project}.LCL6_1.vcf"
  76. File LCL6_2 = "${project}.LCL6_2.vcf"
  77. File LCL6_3 = "${project}.LCL6_3.vcf"
  78. File LCL7_1 = "${project}.LCL7_1.vcf"
  79. File LCL7_2 = "${project}.LCL7_2.vcf"
  80. File LCL7_3 = "${project}.LCL7_3.vcf"
  81. File LCL8_1 = "${project}.LCL8_1.vcf"
  82. File LCL8_2 = "${project}.LCL8_2.vcf"
  83. File LCL8_3 = "${project}.LCL8_3.vcf"
  84. }
  85. }