@@ -0,0 +1,18 @@ | |||
{ | |||
"{{ project_name }}.benchmarking_dir": "oss://chinese-quartet/quartet-result-data/NCTR_benchmarking_20181215/", | |||
"{{ project_name }}.vcfstat.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:latest", | |||
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", | |||
"{{ project_name }}.benchmark.cluster_config": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc", | |||
"{{ project_name }}.benchmark.disk_size": "150", | |||
"{{ project_name }}.vcfstat.disk_size": "100", | |||
"{{ project_name }}.benchmark.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:latest", | |||
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}", | |||
"{{ project_name }}.mergeNum.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/gatk:v2019.01", | |||
"{{ project_name }}.mergeNum.disk_size": "100", | |||
"{{ project_name }}.multiqc.cluster_config": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc", | |||
"{{ project_name }}.multiqc.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/multiqc:v1.8", | |||
"{{ project_name }}.mergeNum.cluster_config": "OnDemand bcs.a2.large img-ubuntu-vpc", | |||
"{{ project_name }}.vcfstat.cluster_config": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc", | |||
"{{ project_name }}.multiqc.disk_size": "100", | |||
"{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/" | |||
} |
@@ -0,0 +1,64 @@ | |||
task benchmark { | |||
File vcf | |||
File benchmarking_dir | |||
File ref_dir | |||
String sample = basename(vcf,".vcf") | |||
String sample_mark | |||
String fasta | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
mkdir -p /cromwell_root/tmp | |||
cp -r ${ref_dir} /cromwell_root/tmp/ | |||
export HGREF=/cromwell_root/tmp/reference_data/GRCh38.d1.vd1.fa | |||
cat ${vcf} | grep '#' > header | |||
cat ${vcf} | grep -v '#' > body | |||
cat body | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > body.filtered | |||
cat header body.filtered > ${sample}.filtered.vcf | |||
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip ${sample}.filtered.vcf -c > ${sample}.filtered.rtg.vcf.gz | |||
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.filtered.rtg.vcf.gz | |||
if [ ${sample_mark} == "LCL5" ];then | |||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL5.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL5.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||
elif [ ${sample_mark} == "LCL6" ]; then | |||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL6.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL6.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||
elif [ ${sample_mark} == "LCL7" ]; then | |||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL7.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL7.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||
elif [ ${sample_mark} == "LCL8" ]; then | |||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL8.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL8.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||
else | |||
echo "only for quartet samples" | |||
fi | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster:cluster_config | |||
systemDisk:"cloud_ssd 40" | |||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File rtg_vcf = "${sample}.rtg.vcf.gz" | |||
File rtg_vcf_index = "${sample}.rtg.vcf.gz.tbi" | |||
File gzip_vcf = "${sample}.vcf.gz" | |||
File gzip_vcf_index = "${sample}.vcf.gz.tbi" | |||
File roc_all_csv = "${sample}.roc.all.csv.gz" | |||
File roc_indel = "${sample}.roc.Locations.INDEL.csv.gz" | |||
File roc_indel_pass = "${sample}.roc.Locations.INDEL.PASS.csv.gz" | |||
File roc_snp = "${sample}.roc.Locations.SNP.csv.gz" | |||
File roc_snp_pass = "${sample}.roc.Locations.SNP.PASS.csv.gz" | |||
File summary = "${sample}.summary.csv" | |||
File extended = "${sample}.extended.csv" | |||
File metrics = "${sample}.metrics.json.gz" | |||
} | |||
} |
@@ -0,0 +1,26 @@ | |||
task mergeNum { | |||
Array[File] vcfnumber | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
for i in ${sep=" " vcfnumber} | |||
do | |||
cat $i | cut -d':' -f2 | tr '\n' '\t' | sed s'/\t$/\n/g' >> vcfstats | |||
done | |||
sed '1i\File\tFailed Filters\tPassed Filters\tSNPs\tMNPs\tInsertions\tDeletions\tIndels\tSame as reference\tSNP Transitions/Transversions\tTotal Het/Hom ratio\tSNP Het/Hom ratio\tMNP Het/Hom ratio\tInsertion Het/Hom ratio\tDeletion Het/Hom ratio\tIndel Het/Hom ratio\tInsertion/Deletion ratio\tIndel/SNP+MNP ratio' vcfstats > vcfstats.txt | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster:cluster_config | |||
systemDisk:"cloud_ssd 40" | |||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File vcfstat="vcfstats.txt" | |||
} | |||
} |
@@ -0,0 +1,31 @@ | |||
task multiqc { | |||
Array[File] summary | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
mkdir -p /cromwell_root/tmp/benchmark | |||
cp ${sep=" " summary} /cromwell_root/tmp/benchmark | |||
multiqc /cromwell_root/tmp/ | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster:cluster_config | |||
systemDisk:"cloud_ssd 40" | |||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File multiqc_html = "multiqc_report.html" | |||
Array[File] multiqc_txt = glob("multiqc_data/*") | |||
} | |||
} |
@@ -0,0 +1,24 @@ | |||
task vcfstat { | |||
File rtg_vcf | |||
File rtg_vcf_index | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfstats ${rtg_vcf} > onestats.txt | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster:cluster_config | |||
systemDisk:"cloud_ssd 40" | |||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File vcfnumber="onestats.txt" | |||
} | |||
} |
@@ -0,0 +1,44 @@ | |||
import "./tasks/benchmark.wdl" as benchmark | |||
import "./tasks/multiqc.wdl" as multiqc | |||
import "./tasks/vcfstat.wdl" as vcfstat | |||
import "./tasks/mergeNum.wdl" as mergeNum | |||
workflow {{ project_name }} { | |||
File inputSamplesFile | |||
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile) | |||
File benchmarking_dir | |||
File ref_dir | |||
String fasta | |||
scatter (sample in inputSamples) { | |||
call benchmark.benchmark as benchmark { | |||
input: | |||
vcf=sample[0], | |||
benchmarking_dir=benchmarking_dir, | |||
ref_dir=ref_dir, | |||
sample_mark=sample[1], | |||
fasta=fasta | |||
} | |||
call vcfstat.vcfstat as vcfstat { | |||
input: | |||
rtg_vcf=benchmark.rtg_vcf, | |||
rtg_vcf_index=benchmark.rtg_vcf_index | |||
} | |||
} | |||
call multiqc.multiqc as multiqc { | |||
input: | |||
summary=benchmark.summary | |||
} | |||
call mergeNum.mergeNum as mergeNum { | |||
input: | |||
vcfnumber=vcfstat.vcfnumber | |||
} | |||
} | |||