|
|
@@ -1,7 +1,7 @@ |
|
|
|
task benchmark { |
|
|
|
File vcf |
|
|
|
File benchmarking_dir |
|
|
|
File ref_dir |
|
|
|
File sdf |
|
|
|
String sample = basename(vcf,".raw.vcf.gz") |
|
|
|
String fasta |
|
|
|
String docker |
|
|
@@ -14,29 +14,37 @@ task benchmark { |
|
|
|
set -e |
|
|
|
nt=$(nproc) |
|
|
|
mkdir -p /cromwell_root/tmp |
|
|
|
cp -r ${ref_dir} /cromwell_root/tmp/ |
|
|
|
cp -r ${benchmarking_dir} /cromwell_root/tmp/ |
|
|
|
|
|
|
|
export HGREF=/cromwell_root/tmp/reference_data/GRCh38.d1.vd1.fa |
|
|
|
ls /cromwell_root/tmp/reference_datasets_v202103/ > files.txt |
|
|
|
|
|
|
|
gunzip ${vcf} -c > unzip.vcf |
|
|
|
|
|
|
|
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip unzip.vcf -c > ${sample}.rtg.vcf.gz |
|
|
|
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.rtg.vcf.gz |
|
|
|
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcffilter -i ${sample}.rtg.vcf.gz -o ${sample}.rtg.SNV.vcf.gz --snps-only |
|
|
|
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcffilter -i ${sample}.rtg.vcf.gz -o ${sample}.rtg.INDEL.vcf.gz --non-snps-only |
|
|
|
|
|
|
|
|
|
|
|
if [[ ${sample} =~ "LCL5" ]];then |
|
|
|
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL5.high.confidence.calls.vcf.gz ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta} |
|
|
|
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg -b /cromwell_root/tmp/reference_datasets_v202103/LCL5.high.confidence.calls.SNV.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_SNV -t ${sdf} --bed-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed |
|
|
|
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg -b /cromwell_root/tmp/reference_datasets_v202103/LCL5.high.confidence.calls.INDEL.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_INDEL -t ${sdf} --bed-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed |
|
|
|
elif [[ ${sample} =~ "LCL6" ]]; then |
|
|
|
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL6.high.confidence.calls.vcf.gz ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta} |
|
|
|
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg -b /cromwell_root/tmp/reference_datasets_v202103/LCL6.high.confidence.calls.SNV.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_SNV -t ${sdf} --bed-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed |
|
|
|
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg -b /cromwell_root/tmp/reference_datasets_v202103/LCL6.high.confidence.calls.INDEL.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_INDEL -t ${sdf} --bed-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed |
|
|
|
elif [[ ${sample} =~ "LCL7" ]]; then |
|
|
|
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL7.high.confidence.calls.vcf.gz ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta} |
|
|
|
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg -b /cromwell_root/tmp/reference_datasets_v202103/LCL7.high.confidence.calls.SNV.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_SNV -t ${sdf} --bed-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed |
|
|
|
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg -b /cromwell_root/tmp/reference_datasets_v202103/LCL7.high.confidence.calls.INDEL.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_INDEL -t ${sdf} --bed-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed |
|
|
|
elif [[ ${sample} =~ "LCL8" ]]; then |
|
|
|
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL8.high.confidence.calls.vcf.gz ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta} |
|
|
|
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg -b /cromwell_root/tmp/reference_datasets_v202103/LCL8.high.confidence.calls.SNV.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_SNV -t ${sdf} --bed-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed |
|
|
|
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg -b /cromwell_root/tmp/reference_datasets_v202103/LCL8.high.confidence.calls.INDEL.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_INDEL -t ${sdf} --bed-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed |
|
|
|
else |
|
|
|
echo "only for quartet samples" |
|
|
|
fi |
|
|
|
|
|
|
|
ls /cromwell_root/tmp/reference_datasets_v202103/ > files.txt |
|
|
|
|
|
|
|
cat ${sample}_SNV/summary.txt ${sample}_SNV_precision_recall.txt |
|
|
|
cat ${sample}_INDEL/summary.txt ${sample}_INDEL_precision_recall.txt |
|
|
|
|
|
|
|
>>> |
|
|
|
|
|
|
|
runtime { |
|
|
@@ -49,16 +57,8 @@ task benchmark { |
|
|
|
output { |
|
|
|
File rtg_vcf = "${sample}.rtg.vcf.gz" |
|
|
|
File rtg_vcf_index = "${sample}.rtg.vcf.gz.tbi" |
|
|
|
File gzip_vcf = "${sample}.vcf.gz" |
|
|
|
File gzip_vcf_index = "${sample}.vcf.gz.tbi" |
|
|
|
File roc_all_csv = "${sample}.roc.all.csv.gz" |
|
|
|
File roc_indel = "${sample}.roc.Locations.INDEL.csv.gz" |
|
|
|
File roc_indel_pass = "${sample}.roc.Locations.INDEL.PASS.csv.gz" |
|
|
|
File roc_snp = "${sample}.roc.Locations.SNP.csv.gz" |
|
|
|
File roc_snp_pass = "${sample}.roc.Locations.SNP.PASS.csv.gz" |
|
|
|
File summary = "${sample}.summary.csv" |
|
|
|
File extended = "${sample}.extended.csv" |
|
|
|
File metrics = "${sample}.metrics.json.gz" |
|
|
|
File SNV_result = "${sample}_SNV_precision_recall.txt" |
|
|
|
File Indel_result = "${sample}_INDEL_precision_recall.txt" |
|
|
|
File file_list = "files.txt" |
|
|
|
} |
|
|
|
} |