You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

64 lines
2.7KB

  1. task benchmark {
  2. File vcf
  3. File benchmarking_dir
  4. File ref_dir
  5. String sample = basename(vcf,".vcf")
  6. String sample_mark
  7. String fasta
  8. String docker
  9. String cluster_config
  10. String disk_size
  11. command <<<
  12. set -o pipefail
  13. set -e
  14. nt=$(nproc)
  15. mkdir -p /cromwell_root/tmp
  16. cp -r ${ref_dir} /cromwell_root/tmp/
  17. export HGREF=/cromwell_root/tmp/reference_data/GRCh38.d1.vd1.fa
  18. cat ${vcf} | grep '#' > header
  19. cat ${vcf} | grep -v '#' > body
  20. cat body | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > body.filtered
  21. cat header body.filtered > ${sample}.filtered.vcf
  22. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip ${sample}.filtered.vcf -c > ${sample}.filtered.rtg.vcf.gz
  23. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.filtered.rtg.vcf.gz
  24. if [ ${sample_mark} == "LCL5" ];then
  25. /opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL5.afterfilterdiffbed.vcf.gz ${sample}.filtered.rtg.vcf.gz -f ${benchmarking_dir}/LCL5.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
  26. elif [ ${sample_mark} == "LCL6" ]; then
  27. /opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL6.afterfilterdiffbed.vcf.gz ${sample}.filtered.rtg.vcf.gz -f ${benchmarking_dir}/LCL6.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
  28. elif [ ${sample_mark} == "LCL7" ]; then
  29. /opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL7.afterfilterdiffbed.vcf.gz ${sample}.filtered.rtg.vcf.gz -f ${benchmarking_dir}/LCL7.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
  30. elif [ ${sample_mark} == "LCL8" ]; then
  31. /opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL8.afterfilterdiffbed.vcf.gz ${sample}.filtered.rtg.vcf.gz -f ${benchmarking_dir}/LCL8.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
  32. else
  33. echo "only for quartet samples"
  34. fi
  35. >>>
  36. runtime {
  37. docker:docker
  38. cluster:cluster_config
  39. systemDisk:"cloud_ssd 40"
  40. dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
  41. }
  42. output {
  43. File rtg_vcf = "${sample}.filtered.rtg.vcf.gz"
  44. File rtg_vcf_index = "${sample}.filtered.rtg.vcf.gz.tbi"
  45. File gzip_vcf = "${sample}.vcf.gz"
  46. File gzip_vcf_index = "${sample}.vcf.gz.tbi"
  47. File roc_all_csv = "${sample}.roc.all.csv.gz"
  48. File roc_indel = "${sample}.roc.Locations.INDEL.csv.gz"
  49. File roc_indel_pass = "${sample}.roc.Locations.INDEL.PASS.csv.gz"
  50. File roc_snp = "${sample}.roc.Locations.SNP.csv.gz"
  51. File roc_snp_pass = "${sample}.roc.Locations.SNP.PASS.csv.gz"
  52. File summary = "${sample}.summary.csv"
  53. File extended = "${sample}.extended.csv"
  54. File metrics = "${sample}.metrics.json.gz"
  55. }
  56. }