Du kannst nicht mehr als 25 Themen auswählen Themen müssen entweder mit einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

83 Zeilen
4.0KB

  1. task benchmark {
  2. File filtered_vcf
  3. File benchmarking_dir
  4. File ref_dir
  5. String sample = basename(filtered_vcf,".filtered.vcf")
  6. String fasta
  7. String docker
  8. String cluster_config
  9. String disk_size
  10. command <<<
  11. set -o pipefail
  12. set -e
  13. nt=$(nproc)
  14. mkdir -p /cromwell_root/tmp
  15. cp -r ${ref_dir} /cromwell_root/tmp/
  16. cp -r ${benchmarking_dir} /cromwell_root/tmp/
  17. export HGREF=/cromwell_root/tmp/reference_data/GRCh38.d1.vd1.fa
  18. echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL5" > LCL5_name
  19. echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL6" > LCL6_name
  20. echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL7" > LCL7_name
  21. echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL8" > LCL8_name
  22. if [[ ${sample} =~ "LCL5" ]];then
  23. /opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL5.high.confidence.calls.vcf ${filtered_vcf} -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
  24. cat ${filtered_vcf} | grep '##' > header
  25. cat ${filtered_vcf} | grep -v '#' > body
  26. cat header LCL5_name body > LCL5.vcf
  27. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL5.vcf -c > ${sample}.reformed.vcf.gz
  28. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.reformed.vcf.gz
  29. elif [[ ${sample} =~ "LCL6" ]]; then
  30. /opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL6.high.confidence.calls.vcf ${filtered_vcf} -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
  31. cat ${filtered_vcf} | grep '##' > header
  32. cat ${filtered_vcf} | grep -v '#' > body
  33. cat header LCL6_name body > LCL6.vcf
  34. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL6.vcf -c > ${sample}.reformed.vcf.gz
  35. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.reformed.vcf.gz
  36. elif [[ ${sample} =~ "LCL7" ]]; then
  37. /opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL7.high.confidence.calls.vcf ${filtered_vcf} -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
  38. cat ${filtered_vcf} | grep '##' > header
  39. cat ${filtered_vcf} | grep -v '#' > body
  40. cat header LCL7_name body > LCL7.vcf
  41. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL7.vcf -c > ${sample}.reformed.vcf.gz
  42. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.reformed.vcf.gz
  43. elif [[ ${sample} =~ "LCL8" ]]; then
  44. /opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL8.high.confidence.calls.vcf ${filtered_vcf} -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
  45. cat ${filtered_vcf} | grep '##' > header
  46. cat ${filtered_vcf} | grep -v '#' > body
  47. cat header LCL8_name body > LCL8.vcf
  48. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL8.vcf -c > ${sample}.reformed.vcf.gz
  49. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.reformed.vcf.gz
  50. else
  51. echo "only for quartet samples"
  52. fi
  53. >>>
  54. runtime {
  55. docker:docker
  56. cluster:cluster_config
  57. systemDisk:"cloud_ssd 40"
  58. dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
  59. }
  60. output {
  61. File rtg_vcf = "${sample}.reformed.vcf.gz"
  62. File rtg_vcf_index = "${sample}.reformed.vcf.gz.tbi"
  63. File gzip_vcf = "${sample}.vcf.gz"
  64. File gzip_vcf_index = "${sample}.vcf.gz.tbi"
  65. File roc_all_csv = "${sample}.roc.all.csv.gz"
  66. File roc_indel = "${sample}.roc.Locations.INDEL.csv.gz"
  67. File roc_indel_pass = "${sample}.roc.Locations.INDEL.PASS.csv.gz"
  68. File roc_snp = "${sample}.roc.Locations.SNP.csv.gz"
  69. File roc_snp_pass = "${sample}.roc.Locations.SNP.PASS.csv.gz"
  70. File summary = "${sample}.summary.csv"
  71. File extended = "${sample}.extended.csv"
  72. File metrics = "${sample}.metrics.json.gz"
  73. }
  74. }