|
|
@@ -1,6 +1,5 @@ |
|
|
|
task benchmark { |
|
|
|
File vcf |
|
|
|
File vcf_idx |
|
|
|
File benchmarking_dir |
|
|
|
File ref_dir |
|
|
|
String sample |
|
|
@@ -16,17 +15,33 @@ task benchmark { |
|
|
|
nt=$(nproc) |
|
|
|
mkdir -p /cromwell_root/tmp |
|
|
|
cp -r ${ref_dir} /cromwell_root/tmp/ |
|
|
|
cp -r ${benchmarking_dir} /cromwell_root/tmp/ |
|
|
|
|
|
|
|
export HGREF=/cromwell_root/tmp/reference_data/GRCh38.d1.vd1.fa |
|
|
|
|
|
|
|
cat ${vcf} | grep '#' > header |
|
|
|
cat ${vcf} | grep -v '#' | grep -v '0/0' | grep -v '\./\.'| awk ' |
|
|
|
BEGIN { OFS = "\t" } |
|
|
|
{ |
|
|
|
for ( i=9; i<=NF; i++ ) { |
|
|
|
split($i,a,":") ;$i = a[1]; |
|
|
|
} |
|
|
|
} |
|
|
|
{ print } |
|
|
|
' > body |
|
|
|
cat header body > filtered.vcf |
|
|
|
|
|
|
|
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip filtered.vcf -c > ${sample}.rtg.vcf.gz |
|
|
|
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.rtg.vcf.gz |
|
|
|
|
|
|
|
if [[ ${sample} =~ "LCL5" ]];then |
|
|
|
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL5.ref.v20201103.vcf.gz ${vcf} -f ${benchmarking_dir}/Quartet.callable.voted.collapse.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta} |
|
|
|
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL5.high.confidence.calls.vcf ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta} |
|
|
|
elif [[ ${sample} =~ "LCL6" ]]; then |
|
|
|
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL6.ref.v20201103.vcf.gz ${vcf} -f ${benchmarking_dir}/Quartet.callable.voted.collapse.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta} |
|
|
|
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL6.high.confidence.calls.vcf.gz ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta} |
|
|
|
elif [[ ${sample} =~ "LCL7" ]]; then |
|
|
|
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL7.ref.v20201103.vcf.gz ${vcf} -f ${benchmarking_dir}/Quartet.callable.voted.collapse.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta} |
|
|
|
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL7.high.confidence.calls.vcf.gz ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta} |
|
|
|
elif [[ ${sample} =~ "LCL8" ]]; then |
|
|
|
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL8.ref.v20201103.vcf.gz ${vcf} -f ${benchmarking_dir}/Quartet.callable.voted.collapse.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta} |
|
|
|
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL8.high.confidence.calls.vcf.gz ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta} |
|
|
|
else |
|
|
|
echo "only for quartet samples" |
|
|
|
fi |
|
|
@@ -40,6 +55,8 @@ task benchmark { |
|
|
|
} |
|
|
|
|
|
|
|
output { |
|
|
|
File rtg_vcf = "${sample}.rtg.vcf.gz" |
|
|
|
File rtg_vcf_index = "${sample}.rtg.vcf.gz.tbi" |
|
|
|
File gzip_vcf = "${sample}.vcf.gz" |
|
|
|
File gzip_vcf_index = "${sample}.vcf.gz.tbi" |
|
|
|
File roc_all_csv = "${sample}.roc.all.csv.gz" |