|
- task benchmark {
- File vcf
- File benchmarking_dir
- File contig
- File sdf
- String project
- String sample = basename(vcf,".raw.vcf.gz")
- String docker
- String cluster_config
- String disk_size
-
-
- command <<<
- set -o pipefail
- set -e
- nt=$(nproc)
- mkdir -p /cromwell_root/tmp
- cp -r ${benchmarking_dir} /cromwell_root/tmp/
-
- if [[ ${sample} =~ "gz" ]];then
- gunzip ${vcf} -c > unzip.vcf
- else
- cp ${vcf} unzip.vcf
- fi
-
- cat unzip.vcf | grep '#CHROM' > name
- cat unzip.vcf | grep -v '#' > body
- cat body | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > body.filtered
-
- if grep -q PASS "body.filtered"; then
- cat body.filtered | grep -v '0/0' | grep 'PASS' > body.filtered.gt
- cat unzip.vcf | grep '##' | grep -v 'contig' | cat - ${contig} name body.filtered.gt > filtered.vcf
- else
- cat body.filtered | grep -v '0/0' > body.filtered.gt
- cat unzip.vcf | grep '##' | grep -v 'contig' | cat - ${contig} name body.filtered.gt > filtered.vcf
- fi
-
- echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL5" > LCL5_name
- echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL6" > LCL6_name
- echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL7" > LCL7_name
- echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL8" > LCL8_name
-
-
- /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip filtered.vcf -c > ${project}.${sample}.rtg.vcf.gz
- /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${project}.${sample}.rtg.vcf.gz
- /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcffilter -i ${project}.${sample}.rtg.vcf.gz -o ${sample}.rtg.SNV.vcf.gz --snps-only
- /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcffilter -i ${project}.${sample}.rtg.vcf.gz -o ${sample}.rtg.INDEL.vcf.gz --non-snps-only
-
-
- if [[ ${sample} =~ "LCL5" ]];then
- /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL5.high.confidence.calls.SNV.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_SNV -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
- /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL5.high.confidence.calls.INDEL.vcf.gz -c ${sample}.rtg.INDEL.vcf.gz -o ${sample}_INDEL -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
- cat filtered.vcf | grep '##' > header
- cat filtered.vcf | grep -v '#' > body
- cat header LCL5_name body > LCL5.vcf
- /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL5.vcf -c > ${project}.${sample}.reformed.vcf.gz
- /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${project}.${sample}.reformed.vcf.gz
- elif [[ ${sample} =~ "LCL6" ]]; then
- /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL6.high.confidence.calls.SNV.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_SNV -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
- /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL6.high.confidence.calls.INDEL.vcf.gz -c ${sample}.rtg.INDEL.vcf.gz -o ${sample}_INDEL -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
- cat filtered.vcf | grep '##' > header
- cat filtered.vcf | grep -v '#' > body
- cat header LCL6_name body > LCL6.vcf
- /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL6.vcf -c > ${project}.${sample}.reformed.vcf.gz
- /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${project}.${sample}.reformed.vcf.gz
- elif [[ ${sample} =~ "LCL7" ]]; then
- /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL7.high.confidence.calls.SNV.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_SNV -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
- /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL7.high.confidence.calls.INDEL.vcf.gz -c ${sample}.rtg.INDEL.vcf.gz -o ${sample}_INDEL -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
- cat filtered.vcf | grep '##' > header
- cat filtered.vcf | grep -v '#' > body
- cat header LCL7_name body > LCL7.vcf
- /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL7.vcf -c > ${project}.${sample}.reformed.vcf.gz
- /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${project}.${sample}.reformed.vcf.gz
- elif [[ ${sample} =~ "LCL8" ]]; then
- /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL8.high.confidence.calls.SNV.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_SNV -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
- /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL8.high.confidence.calls.INDEL.vcf.gz -c ${sample}.rtg.INDEL.vcf.gz -o ${sample}_INDEL -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
- cat filtered.vcf | grep '##' > header
- cat filtered.vcf | grep -v '#' > body
- cat header LCL8_name body > LCL8.vcf
- /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL8.vcf -c > ${project}.${sample}.reformed.vcf.gz
- /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${project}.${sample}.reformed.vcf.gz
- else
- echo "only for quartet samples"
- fi
-
-
- cat ${sample}_SNV/summary.txt > ${project}.${sample}_SNV_precision_recall.txt
- cat ${sample}_INDEL/summary.txt > ${project}.${sample}_INDEL_precision_recall.txt
-
- >>>
-
- runtime {
- docker:docker
- cluster:cluster_config
- systemDisk:"cloud_ssd 40"
- dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
- }
-
- output {
- File rtg_vcf = "${project}.${sample}.reformed.vcf.gz"
- File rtg_vcf_index = "${project}.${sample}.reformed.vcf.gz.tbi"
- File SNV_result = "${project}.${sample}_SNV_precision_recall.txt"
- File Indel_result = "${project}.${sample}_INDEL_precision_recall.txt"
- }
- }
|