You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

benchmark.wdl 5.7KB

4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
4 vuotta sitten
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. task benchmark {
  2. File vcf
  3. File benchmarking_dir
  4. File contig
  5. File sdf
  6. String project
  7. String sample = basename(vcf,".raw.vcf.gz")
  8. String docker
  9. String cluster_config
  10. String disk_size
  11. command <<<
  12. set -o pipefail
  13. set -e
  14. nt=$(nproc)
  15. mkdir -p /cromwell_root/tmp
  16. cp -r ${benchmarking_dir} /cromwell_root/tmp/
  17. gunzip ${vcf} -c > unzip.vcf
  18. cat unzip.vcf | grep '#CHROM' > name
  19. cat unzip.vcf | grep -v '#' > body
  20. cat body | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > body.filtered
  21. cat unzip.vcf | grep '##' | grep -v 'contig' | cat - ${contig} name body.filtered > filtered.vcf
  22. echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL5" > LCL5_name
  23. echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL6" > LCL6_name
  24. echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL7" > LCL7_name
  25. echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL8" > LCL8_name
  26. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip filtered.vcf -c > ${project}.${sample}.rtg.vcf.gz
  27. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${project}.${sample}.rtg.vcf.gz
  28. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcffilter -i ${project}.${sample}.rtg.vcf.gz -o ${sample}.rtg.SNV.vcf.gz --snps-only
  29. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcffilter -i ${project}.${sample}.rtg.vcf.gz -o ${sample}.rtg.INDEL.vcf.gz --non-snps-only
  30. if [[ ${sample} =~ "LCL5" ]];then
  31. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL5.high.confidence.calls.SNV.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_SNV -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
  32. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL5.high.confidence.calls.INDEL.vcf.gz -c ${sample}.rtg.INDEL.vcf.gz -o ${sample}_INDEL -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
  33. cat filtered.vcf | grep '##' > header
  34. cat filtered.vcf | grep -v '#' > body
  35. cat header LCL5_name body > LCL5.vcf
  36. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL5.vcf -c > ${project}.${sample}.rtg.vcf.gz
  37. elif [[ ${sample} =~ "LCL6" ]]; then
  38. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL6.high.confidence.calls.SNV.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_SNV -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
  39. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL6.high.confidence.calls.INDEL.vcf.gz -c ${sample}.rtg.INDEL.vcf.gz -o ${sample}_INDEL -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
  40. cat filtered.vcf | grep '##' > header
  41. cat filtered.vcf | grep -v '#' > body
  42. cat header LCL6_name body > LCL6.vcf
  43. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL6.vcf -c > ${project}.${sample}.rtg.vcf.gz
  44. elif [[ ${sample} =~ "LCL7" ]]; then
  45. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL7.high.confidence.calls.SNV.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_SNV -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
  46. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL7.high.confidence.calls.INDEL.vcf.gz -c ${sample}.rtg.INDEL.vcf.gz -o ${sample}_INDEL -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
  47. cat filtered.vcf | grep '##' > header
  48. cat filtered.vcf | grep -v '#' > body
  49. cat header LCL7_name body > LCL7.vcf
  50. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL7.vcf -c > ${project}.${sample}.rtg.vcf.gz
  51. elif [[ ${sample} =~ "LCL8" ]]; then
  52. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL8.high.confidence.calls.SNV.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_SNV -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
  53. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL8.high.confidence.calls.INDEL.vcf.gz -c ${sample}.rtg.INDEL.vcf.gz -o ${sample}_INDEL -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
  54. cat filtered.vcf | grep '##' > header
  55. cat filtered.vcf | grep -v '#' > body
  56. cat header LCL8_name body > LCL8.vcf
  57. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL8.vcf -c > ${project}.${sample}.rtg.vcf.gz
  58. else
  59. echo "only for quartet samples"
  60. fi
  61. cat ${sample}_SNV/summary.txt > ${project}.${sample}_SNV_precision_recall.txt
  62. cat ${sample}_INDEL/summary.txt > ${project}.${sample}_INDEL_precision_recall.txt
  63. >>>
  64. runtime {
  65. docker:docker
  66. cluster:cluster_config
  67. systemDisk:"cloud_ssd 40"
  68. dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
  69. }
  70. output {
  71. File rtg_vcf = "${project}.${sample}.rtg.vcf.gz"
  72. File rtg_vcf_index = "${project}.${sample}.rtg.vcf.gz.tbi"
  73. File SNV_result = "${project}.${sample}_SNV_precision_recall.txt"
  74. File Indel_result = "${project}.${sample}_INDEL_precision_recall.txt"
  75. }
  76. }