task benchmark { File vcf File benchmarking_dir File sdf String project String sample = basename(vcf,".splited.vcf") String docker String cluster_config String disk_size command <<< set -o pipefail set -e nt=$(nproc) mkdir -p /cromwell_root/tmp cp -r ${benchmarking_dir} /cromwell_root/tmp/ cat ${vcf} | grep '#' > header cat ${vcf} | grep -v '#' | grep -v '0/0' | grep -v '\./\.'| awk ' BEGIN { OFS = "\t" } { for ( i=9; i<=NF; i++ ) { split($i,a,":") ;$i = a[1]; } } { print } ' > body cat header body > filtered.vcf /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip filtered.vcf -c > ${sample}.rtg.vcf.gz /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.rtg.vcf.gz /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcffilter -i ${project}.${sample}.rtg.vcf.gz -o ${sample}.rtg.SNV.vcf.gz --snps-only /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcffilter -i ${project}.${sample}.rtg.vcf.gz -o ${sample}.rtg.INDEL.vcf.gz --non-snps-only if [[ ${sample} =~ "LCL5" ]];then /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL5.high.confidence.calls.SNV.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_SNV -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL5.high.confidence.calls.INDEL.vcf.gz -c ${sample}.rtg.INDEL.vcf.gz -o ${sample}_INDEL -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed elif [[ ${sample} =~ "LCL6" ]]; then /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL6.high.confidence.calls.SNV.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_SNV -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL6.high.confidence.calls.INDEL.vcf.gz -c ${sample}.rtg.INDEL.vcf.gz -o ${sample}_INDEL -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed elif [[ ${sample} =~ "LCL7" ]]; then /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL7.high.confidence.calls.SNV.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_SNV -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL7.high.confidence.calls.INDEL.vcf.gz -c ${sample}.rtg.INDEL.vcf.gz -o ${sample}_INDEL -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed elif [[ ${sample} =~ "LCL8" ]]; then /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL8.high.confidence.calls.SNV.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_SNV -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL8.high.confidence.calls.INDEL.vcf.gz -c ${sample}.rtg.INDEL.vcf.gz -o ${sample}_INDEL -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed else echo "only for quartet samples" fi cat ${sample}_SNV/summary.txt > ${project}.${sample}_SNV_precision_recall.txt cat ${sample}_INDEL/summary.txt > ${project}.${sample}_INDEL_precision_recall.txt >>> runtime { docker:docker cluster:cluster_config systemDisk:"cloud_ssd 40" dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" } output { File rtg_vcf = "${project}.${sample}.rtg.vcf.gz" File rtg_vcf_index = "${project}.${sample}.rtg.vcf.gz.tbi" File SNV_result = "${project}.${sample}_SNV_precision_recall.txt" File Indel_result = "${project}.${sample}_INDEL_precision_recall.txt" } }