@@ -0,0 +1,11 @@ | |||
{ | |||
"{{ project_name }}.benchmarking_dir": "oss://chinese-quartet/quartet-result-data/NCTR_benchmarking_20181215/", | |||
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", | |||
"{{ project_name }}.sample_mark": "{{ sample_mark }}", | |||
"{{ project_name }}.vcf_index": "{{ vcf_index }}", | |||
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc", | |||
"{{ project_name }}.disk_size": "200", | |||
"{{ project_name }}.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:latest", | |||
"{{ project_name }}.vcf": "{{ vcf }}", | |||
"{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/" | |||
} |
@@ -0,0 +1,56 @@ | |||
task benchmark { | |||
File vcf | |||
File vcf_index | |||
File benchmarking_dir | |||
File ref_dir | |||
String sample = basename(vcf,".vcf") | |||
String sample_mark | |||
String fasta | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
mkdir -p /cromwell_root/tmp | |||
cp -r ${ref_dir} /cromwell_root/tmp/ | |||
export HGREF=/cromwell_root/tmp/reference_data/GRCh38.d1.vd1.fa | |||
if [ ${sample_mark} == "LCL5" ];then | |||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL5.vcf.gz ${vcf} -f ${benchmarking_dir}/LCL5.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||
elif [ ${sample_mark} == "LCL6" ]; then | |||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL6.vcf.gz ${vcf} -f ${benchmarking_dir}/LCL6.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||
elif [ ${sample_mark} == "LCL7" ]; then | |||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL7.vcf.gz ${vcf} -f ${benchmarking_dir}/LCL7.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||
elif [ ${sample_mark} == "LCL8" ]; then | |||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL8.vcf.gz ${vcf} -f ${benchmarking_dir}/LCL8.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||
else | |||
echo "only for quartet samples" | |||
fi | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster:cluster_config | |||
systemDisk:"cloud_ssd 40" | |||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File gzip_vcf = "${sample}.vcf.gz" | |||
File gzip_vcf_index = "${sample}.vcf.gz.tbi" | |||
File roc_all_csv = "${sample}.roc.all.csv.gz" | |||
File roc_indel = "${sample}.roc.Locations.INDEL.csv.gz" | |||
File roc_indel_pass = "${sample}.roc.Locations.INDEL.PASS.csv.gz" | |||
File roc_snp = "${sample}.roc.Locations.SNP.csv.gz" | |||
File roc_snp_pass = "${sample}.roc.Locations.SNP.PASS.csv.gz" | |||
File summary = "${sample}.summary.csv" | |||
File extended = "${sample}.extended.csv" | |||
File metrics = "${sample}.metrics.json.gz" | |||
} | |||
} |
@@ -0,0 +1,26 @@ | |||
task mergeNum { | |||
Array[File] vcfnumber | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
for i in ${sep=" " vcfnumber} | |||
do | |||
cat $i | cut -d':' -f2 | tr '\n' '\t' | sed s'/\t$/\n/g' >> vcfstats | |||
done | |||
sed '1i\File\tFailed Filters\tPassed Filters\tSNPs\tMNPs\tInsertions\tDeletions\tIndels\tSame as reference\tSNP Transitions/Transversions\tTotal Het/Hom ratio\tSNP Het/Hom ratio\tMNP Het/Hom ratio\tInsertion Het/Hom ratio\tDeletion Het/Hom ratio\tIndel Het/Hom ratio\tInsertion/Deletion ratio\tIndel/SNP+MNP ratio' vcfstats > vcfstats.txt | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster:cluster_config | |||
systemDisk:"cloud_ssd 40" | |||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File vcfstat="vcfstats.txt" | |||
} | |||
} |
@@ -0,0 +1,31 @@ | |||
task multiqc { | |||
Array[File] summary | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
mkdir -p /cromwell_root/tmp/benchmark | |||
cp ${sep=" " summary} /cromwell_root/tmp/benchmark | |||
multiqc /cromwell_root/tmp/ | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster:cluster_config | |||
systemDisk:"cloud_ssd 40" | |||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File multiqc_html = "multiqc_report.html" | |||
Array[File] multiqc_txt = glob("multiqc_data/*") | |||
} | |||
} |
@@ -0,0 +1,24 @@ | |||
task vcfstat { | |||
File rtg_vcf | |||
File rtg_vcf_index | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfstats ${rtg_vcf} > onestats.txt | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster:cluster_config | |||
systemDisk:"cloud_ssd 40" | |||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File vcfnumber="onestats.txt" | |||
} | |||
} |
@@ -0,0 +1,29 @@ | |||
import "./tasks/benchmark.wdl" as benchmark | |||
workflow {{ project_name }} { | |||
File vcf | |||
File vcf_index | |||
File benchmarking_dir | |||
File ref_dir | |||
String sample_mark | |||
String fasta | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
call benchmark.benchmark as benchmark { | |||
input: | |||
vcf=vcf, | |||
vcf_index=vcf_index, | |||
benchmarking_dir=benchmarking_dir, | |||
ref_dir=ref_dir, | |||
sample_mark=sample_mark, | |||
fasta=fasta, | |||
docker=docker, | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
} | |||