You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

преди 4 години
преди 3 години
преди 4 години
преди 3 години
преди 4 години
преди 3 години
преди 4 години
преди 3 години
преди 4 години
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. task benchmark {
  2. File vcf
  3. File benchmarking_dir
  4. File ref_dir
  5. String sample = basename(vcf,".splited.vcf")
  6. String fasta
  7. String docker
  8. String cluster_config
  9. String disk_size
  10. command <<<
  11. set -o pipefail
  12. set -e
  13. nt=$(nproc)
  14. mkdir -p /cromwell_root/tmp
  15. cp -r ${ref_dir} /cromwell_root/tmp/
  16. cp -r ${benchmarking_dir} /cromwell_root/tmp/
  17. export HGREF=/cromwell_root/tmp/reference_data/GRCh38.d1.vd1.fa
  18. cat ${vcf} | grep '#' > header
  19. cat ${vcf} | grep -v '#' | grep -v '0/0' | grep -v '\./\.'| awk '
  20. BEGIN { OFS = "\t" }
  21. {
  22. for ( i=9; i<=NF; i++ ) {
  23. split($i,a,":") ;$i = a[1];
  24. }
  25. }
  26. { print }
  27. ' > body
  28. cat header body > filtered.vcf
  29. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip filtered.vcf -c > ${sample}.rtg.vcf.gz
  30. /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.rtg.vcf.gz
  31. if [[ ${sample} =~ "LCL5" ]];then
  32. /opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL5.high.confidence.calls.vcf ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
  33. elif [[ ${sample} =~ "LCL6" ]]; then
  34. /opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL6.high.confidence.calls.vcf ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
  35. elif [[ ${sample} =~ "LCL7" ]]; then
  36. /opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL7.high.confidence.calls.vcf ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
  37. elif [[ ${sample} =~ "LCL8" ]]; then
  38. /opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL8.high.confidence.calls.vcf ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
  39. else
  40. echo "only for quartet samples"
  41. fi
  42. >>>
  43. runtime {
  44. docker:docker
  45. cluster:cluster_config
  46. systemDisk:"cloud_ssd 40"
  47. dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
  48. }
  49. output {
  50. File rtg_vcf = "${sample}.rtg.vcf.gz"
  51. File rtg_vcf_index = "${sample}.rtg.vcf.gz.tbi"
  52. File gzip_vcf = "${sample}.vcf.gz"
  53. File gzip_vcf_index = "${sample}.vcf.gz.tbi"
  54. File roc_all_csv = "${sample}.roc.all.csv.gz"
  55. File roc_indel = "${sample}.roc.Locations.INDEL.csv.gz"
  56. File roc_indel_pass = "${sample}.roc.Locations.INDEL.PASS.csv.gz"
  57. File roc_snp = "${sample}.roc.Locations.SNP.csv.gz"
  58. File roc_snp_pass = "${sample}.roc.Locations.SNP.PASS.csv.gz"
  59. File summary = "${sample}.summary.csv"
  60. File extended = "${sample}.extended.csv"
  61. File metrics = "${sample}.metrics.json.gz"
  62. }
  63. }