Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.

70 lines
4.3KB

  1. task benchmark {
  2. File vcf
  3. File benchmarking_dir
  4. File sdf
  5. String project
  6. String sample = basename(vcf,".splited.vcf")
  7. String docker
  8. String cluster_config
  9. String disk_size
  10. command <<<
  11. set -o pipefail
  12. set -e
  13. nt=$(nproc)
  14. mkdir -p /cromwell_root/tmp
  15. cp -r ${benchmarking_dir} /cromwell_root/tmp/
  16. cat ${vcf} | grep '#' > header
  17. cat ${vcf} | grep -v '#' | grep -v '0/0' | grep -v '\./\.'| awk '
  18. BEGIN { OFS = "\t" }
  19. {
  20. for ( i=9; i<=NF; i++ ) {
  21. split($i,a,":") ;$i = a[1];
  22. }
  23. }
  24. { print }
  25. ' > body
  26. cat header body > filtered.vcf
  27. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip filtered.vcf -c > ${sample}.rtg.vcf.gz
  28. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.rtg.vcf.gz
  29. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcffilter -i ${project}.${sample}.rtg.vcf.gz -o ${sample}.rtg.SNV.vcf.gz --snps-only
  30. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcffilter -i ${project}.${sample}.rtg.vcf.gz -o ${sample}.rtg.INDEL.vcf.gz --non-snps-only
  31. if [[ ${sample} =~ "LCL5" ]];then
  32. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL5.high.confidence.calls.SNV.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_SNV -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
  33. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL5.high.confidence.calls.INDEL.vcf.gz -c ${sample}.rtg.INDEL.vcf.gz -o ${sample}_INDEL -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
  34. elif [[ ${sample} =~ "LCL6" ]]; then
  35. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL6.high.confidence.calls.SNV.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_SNV -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
  36. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL6.high.confidence.calls.INDEL.vcf.gz -c ${sample}.rtg.INDEL.vcf.gz -o ${sample}_INDEL -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
  37. elif [[ ${sample} =~ "LCL7" ]]; then
  38. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL7.high.confidence.calls.SNV.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_SNV -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
  39. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL7.high.confidence.calls.INDEL.vcf.gz -c ${sample}.rtg.INDEL.vcf.gz -o ${sample}_INDEL -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
  40. elif [[ ${sample} =~ "LCL8" ]]; then
  41. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL8.high.confidence.calls.SNV.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_SNV -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
  42. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL8.high.confidence.calls.INDEL.vcf.gz -c ${sample}.rtg.INDEL.vcf.gz -o ${sample}_INDEL -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
  43. else
  44. echo "only for quartet samples"
  45. fi
  46. cat ${sample}_SNV/summary.txt > ${project}.${sample}_SNV_precision_recall.txt
  47. cat ${sample}_INDEL/summary.txt > ${project}.${sample}_INDEL_precision_recall.txt
  48. >>>
  49. runtime {
  50. docker:docker
  51. cluster:cluster_config
  52. systemDisk:"cloud_ssd 40"
  53. dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
  54. }
  55. output {
  56. File rtg_vcf = "${project}.${sample}.rtg.vcf.gz"
  57. File rtg_vcf_index = "${project}.${sample}.rtg.vcf.gz.tbi"
  58. File SNV_result = "${project}.${sample}_SNV_precision_recall.txt"
  59. File Indel_result = "${project}.${sample}_INDEL_precision_recall.txt"
  60. }
  61. }