{ | |||||
"{{ project_name }}.benchmarking_dir": "oss://chinese-quartet/quartet-result-data/NCTR_benchmarking_20181215/", | |||||
"{{ project_name }}.vcfstat.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:latest", | |||||
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", | |||||
"{{ project_name }}.benchmark.cluster_config": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc", | |||||
"{{ project_name }}.benchmark.disk_size": "150", | |||||
"{{ project_name }}.vcfstat.disk_size": "100", | |||||
"{{ project_name }}.benchmark.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:latest", | |||||
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}", | |||||
"{{ project_name }}.mergeNum.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/gatk:v2019.01", | |||||
"{{ project_name }}.mergeNum.disk_size": "100", | |||||
"{{ project_name }}.multiqc.cluster_config": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc", | |||||
"{{ project_name }}.multiqc.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/multiqc:v1.8", | |||||
"{{ project_name }}.mergeNum.cluster_config": "OnDemand bcs.a2.large img-ubuntu-vpc", | |||||
"{{ project_name }}.vcfstat.cluster_config": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc", | |||||
"{{ project_name }}.multiqc.disk_size": "100", | |||||
"{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/" | |||||
} |
task benchmark { | |||||
File vcf | |||||
File benchmarking_dir | |||||
File ref_dir | |||||
String sample = basename(vcf,".vcf") | |||||
String sample_mark | |||||
String fasta | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
nt=$(nproc) | |||||
mkdir -p /cromwell_root/tmp | |||||
cp -r ${ref_dir} /cromwell_root/tmp/ | |||||
export HGREF=/cromwell_root/tmp/reference_data/GRCh38.d1.vd1.fa | |||||
cat ${vcf} | grep '#' > header | |||||
cat ${vcf} | grep -v '#' > body | |||||
cat body | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > body.filtered | |||||
cat header body.filtered > ${sample}.filtered.vcf | |||||
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip ${sample}.filtered.vcf -c > ${sample}.filtered.rtg.vcf.gz | |||||
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.filtered.rtg.vcf.gz | |||||
if [ ${sample_mark} == "LCL5" ];then | |||||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL5.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL5.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||||
elif [ ${sample_mark} == "LCL6" ]; then | |||||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL6.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL6.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||||
elif [ ${sample_mark} == "LCL7" ]; then | |||||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL7.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL7.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||||
elif [ ${sample_mark} == "LCL8" ]; then | |||||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL8.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL8.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||||
else | |||||
echo "only for quartet samples" | |||||
fi | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster:cluster_config | |||||
systemDisk:"cloud_ssd 40" | |||||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File rtg_vcf = "${sample}.rtg.vcf.gz" | |||||
File rtg_vcf_index = "${sample}.rtg.vcf.gz.tbi" | |||||
File gzip_vcf = "${sample}.vcf.gz" | |||||
File gzip_vcf_index = "${sample}.vcf.gz.tbi" | |||||
File roc_all_csv = "${sample}.roc.all.csv.gz" | |||||
File roc_indel = "${sample}.roc.Locations.INDEL.csv.gz" | |||||
File roc_indel_pass = "${sample}.roc.Locations.INDEL.PASS.csv.gz" | |||||
File roc_snp = "${sample}.roc.Locations.SNP.csv.gz" | |||||
File roc_snp_pass = "${sample}.roc.Locations.SNP.PASS.csv.gz" | |||||
File summary = "${sample}.summary.csv" | |||||
File extended = "${sample}.extended.csv" | |||||
File metrics = "${sample}.metrics.json.gz" | |||||
} | |||||
} |
task mergeNum { | |||||
Array[File] vcfnumber | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
for i in ${sep=" " vcfnumber} | |||||
do | |||||
cat $i | cut -d':' -f2 | tr '\n' '\t' | sed s'/\t$/\n/g' >> vcfstats | |||||
done | |||||
sed '1i\File\tFailed Filters\tPassed Filters\tSNPs\tMNPs\tInsertions\tDeletions\tIndels\tSame as reference\tSNP Transitions/Transversions\tTotal Het/Hom ratio\tSNP Het/Hom ratio\tMNP Het/Hom ratio\tInsertion Het/Hom ratio\tDeletion Het/Hom ratio\tIndel Het/Hom ratio\tInsertion/Deletion ratio\tIndel/SNP+MNP ratio' vcfstats > vcfstats.txt | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster:cluster_config | |||||
systemDisk:"cloud_ssd 40" | |||||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File vcfstat="vcfstats.txt" | |||||
} | |||||
} |
task multiqc { | |||||
Array[File] summary | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
mkdir -p /cromwell_root/tmp/benchmark | |||||
cp ${sep=" " summary} /cromwell_root/tmp/benchmark | |||||
multiqc /cromwell_root/tmp/ | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster:cluster_config | |||||
systemDisk:"cloud_ssd 40" | |||||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File multiqc_html = "multiqc_report.html" | |||||
Array[File] multiqc_txt = glob("multiqc_data/*") | |||||
} | |||||
} |
task vcfstat { | |||||
File rtg_vcf | |||||
File rtg_vcf_index | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfstats ${rtg_vcf} > onestats.txt | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster:cluster_config | |||||
systemDisk:"cloud_ssd 40" | |||||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File vcfnumber="onestats.txt" | |||||
} | |||||
} |
import "./tasks/benchmark.wdl" as benchmark | |||||
import "./tasks/multiqc.wdl" as multiqc | |||||
import "./tasks/vcfstat.wdl" as vcfstat | |||||
import "./tasks/mergeNum.wdl" as mergeNum | |||||
workflow {{ project_name }} { | |||||
File inputSamplesFile | |||||
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile) | |||||
File benchmarking_dir | |||||
File ref_dir | |||||
String fasta | |||||
scatter (sample in inputSamples) { | |||||
call benchmark.benchmark as benchmark { | |||||
input: | |||||
vcf=sample[0], | |||||
benchmarking_dir=benchmarking_dir, | |||||
ref_dir=ref_dir, | |||||
sample_mark=sample[1], | |||||
fasta=fasta | |||||
} | |||||
call vcfstat.vcfstat as vcfstat { | |||||
input: | |||||
rtg_vcf=benchmark.rtg_vcf, | |||||
rtg_vcf_index=benchmark.rtg_vcf_index | |||||
} | |||||
} | |||||
call multiqc.multiqc as multiqc { | |||||
input: | |||||
summary=benchmark.summary | |||||
} | |||||
call mergeNum.mergeNum as mergeNum { | |||||
input: | |||||
vcfnumber=vcfstat.vcfnumber | |||||
} | |||||
} | |||||