您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

109 行
4.9KB

  1. task benchmark {
  2. File vcf
  3. File benchmarking_dir
  4. File contig
  5. File sdf
  6. File ref_dir
  7. File fasta
  8. String sample = basename(vcf,".vcf.gz")
  9. String docker
  10. String cluster_config
  11. String disk_size
  12. command <<<
  13. nt=$(nproc)
  14. mkdir -p /cromwell_root/tmp
  15. cp -r ${ref_dir} /cromwell_root/tmp/
  16. cp -r ${benchmarking_dir} /cromwell_root/tmp/
  17. export HGREF=/cromwell_root/tmp/reference_data/GRCh38.d1.vd1.fa
  18. if [[ ${sample} =~ "gz" ]];then
  19. gunzip ${vcf} -c > unzip.vcf
  20. else
  21. cp ${vcf} unzip.vcf
  22. fi
  23. cat unzip.vcf | grep '#CHROM' > name
  24. cat unzip.vcf | grep -v '#' > body
  25. cat body | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > body.filtered
  26. if grep -q PASS "body.filtered"; then
  27. cat body.filtered | grep -v '0/0' | grep 'PASS' > body.filtered.gt
  28. cat unzip.vcf | grep '##' | grep -v 'contig' | cat - ${contig} name body.filtered.gt > filtered.vcf
  29. else
  30. cat body.filtered | grep -v '0/0' > body.filtered.gt
  31. cat unzip.vcf | grep '##' | grep -v 'contig' | cat - ${contig} name body.filtered.gt > filtered.vcf
  32. fi
  33. echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL5" > LCL5_name
  34. echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL6" > LCL6_name
  35. echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL7" > LCL7_name
  36. echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL8" > LCL8_name
  37. ls > filelist
  38. if [[ ${sample} =~ "LCL5" ]];then
  39. /opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL5.high.confidence.calls.vcf filtered.vcf -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
  40. cat filtered.vcf | grep '##' > header
  41. cat filtered.vcf | grep -v '#' > body
  42. cat header LCL5_name body > LCL5.vcf
  43. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL5.vcf -c > ${sample}.reformed.vcf.gz
  44. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.reformed.vcf.gz
  45. elif [[ ${sample} =~ "LCL6" ]]; then
  46. /opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL6.high.confidence.calls.vcf filtered.vcf -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
  47. cat filtered.vcf | grep '##' > header
  48. cat filtered.vcf | grep -v '#' > body
  49. cat header LCL6_name body > LCL6.vcf
  50. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL6.vcf -c > ${sample}.reformed.vcf.gz
  51. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.reformed.vcf.gz
  52. elif [[ ${sample} =~ "LCL7" ]]; then
  53. /opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL7.high.confidence.calls.vcf filtered.vcf -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
  54. cat filtered.vcf | grep '##' > header
  55. cat filtered.vcf | grep -v '#' > body
  56. cat header LCL7_name body > LCL7.vcf
  57. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL7.vcf -c > ${sample}.reformed.vcf.gz
  58. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.reformed.vcf.gz
  59. elif [[ ${sample} =~ "LCL8" ]]; then
  60. /opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL8.high.confidence.calls.vcf filtered.vcf -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
  61. cat filtered.vcf | grep '##' > header
  62. cat filtered.vcf | grep -v '#' > body
  63. cat header LCL8_name body > LCL8.vcf
  64. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL8.vcf -c > ${sample}.reformed.vcf.gz
  65. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.reformed.vcf.gz
  66. else
  67. echo "only for quartet samples"
  68. fi
  69. >>>
  70. runtime {
  71. docker:docker
  72. cluster:cluster_config
  73. systemDisk:"cloud_ssd 40"
  74. dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
  75. }
  76. output {
  77. File rtg_vcf = "${sample}.reformed.vcf.gz"
  78. File rtg_vcf_index = "${sample}.reformed.vcf.gz.tbi"
  79. File gzip_vcf = "${sample}.vcf.gz"
  80. File gzip_vcf_index = "${sample}.vcf.gz.tbi"
  81. File roc_all_csv = "${sample}.roc.all.csv.gz"
  82. File roc_indel = "${sample}.roc.Locations.INDEL.csv.gz"
  83. File roc_indel_pass = "${sample}.roc.Locations.INDEL.PASS.csv.gz"
  84. File roc_snp = "${sample}.roc.Locations.SNP.csv.gz"
  85. File roc_snp_pass = "${sample}.roc.Locations.SNP.PASS.csv.gz"
  86. File summary = "${sample}.summary.csv"
  87. File extended = "${sample}.extended.csv"
  88. File metrics = "${sample}.metrics.json.gz"
  89. File out_file = "filelist"
  90. File filtered_vcf = "filtered.vcf"
  91. File unzip_vcf = "unzip.vcf"
  92. File body_filter = "body.filtered"
  93. File body_filtered_gt = "body.filtered.gt"
  94. File name = "name"
  95. File body = "body"
  96. }
  97. }