@@ -0,0 +1,42 @@ | |||
import "./tasks/benchmark.wdl" as benchmark | |||
import "./tasks/multiqc.wdl" as multiqc | |||
import "./tasks/vcfstat.wdl" as vcfstat | |||
import "./tasks/mergeNum.wdl" as mergeNum | |||
workflow {{ project_name }} { | |||
File inputSamplesFile | |||
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile) | |||
File benchmarking_dir | |||
File ref_dir | |||
String fasta | |||
scatter (sample in inputSamples) { | |||
call benchmark.benchmark as benchmark { | |||
input: | |||
vcf=sample[0], | |||
benchmarking_dir=benchmarking_dir, | |||
ref_dir=ref_dir, | |||
sample_mark=sample[1], | |||
fasta=fasta | |||
} | |||
call vcfstat.vcfstat as vcfstat { | |||
input: | |||
rtg_vcf=benchmark.rtg_vcf, | |||
rtg_vcf_index=benchmark.rtg_vcf_index | |||
} | |||
} | |||
call multiqc.multiqc as multiqc { | |||
input: | |||
summary=benchmark.summary | |||
} | |||
call mergeNum.mergeNum as mergeNum { | |||
input: | |||
vcfnumber=vcfstat.vcfnumber | |||
} | |||
} | |||
@@ -1,18 +1,10 @@ | |||
{ | |||
"{{ project_name }}.benchmarking_dir": "oss://pgx-result/renluyao/manuscript/benchmark_calls_v3.0/", | |||
"{{ project_name }}.vcfstat.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:latest", | |||
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", | |||
"{{ project_name }}.benchmark.cluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc", | |||
"{{ project_name }}.benchmark.disk_size": "500", | |||
"{{ project_name }}.vcfstat.disk_size": "100", | |||
"{{ project_name }}.benchmark.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:latest", | |||
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}", | |||
"{{ project_name }}.mergeNum.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/gatk:v2019.01", | |||
"{{ project_name }}.mergeNum.disk_size": "100", | |||
"{{ project_name }}.multiqc.cluster_config": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc", | |||
"{{ project_name }}.multiqc.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/multiqc:v1.8", | |||
"{{ project_name }}.mergeNum.cluster_config": "OnDemand bcs.a2.large img-ubuntu-vpc", | |||
"{{ project_name }}.vcfstat.cluster_config": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc", | |||
"{{ project_name }}.multiqc.disk_size": "100", | |||
"{{ project_name }}.disk_size": "500", | |||
"{{ project_name }}.vcf_idx": "{{ vcf_idx }}", | |||
"{{ project_name }}.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:latest", | |||
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc", | |||
"{{ project_name }}.vcf": "{{ vcf }}", | |||
"{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/" | |||
} |
@@ -1,9 +1,9 @@ | |||
task benchmark { | |||
File vcf | |||
File vcf_idx | |||
File benchmarking_dir | |||
File ref_dir | |||
String sample = basename(vcf,".vcf") | |||
String sample_mark | |||
String sample = basename(vcf,".vcf.gz") | |||
String fasta | |||
String docker | |||
String cluster_config | |||
@@ -19,22 +19,14 @@ task benchmark { | |||
export HGREF=/cromwell_root/tmp/reference_data/GRCh38.d1.vd1.fa | |||
cat ${vcf} | grep '#' > header | |||
cat ${vcf} | grep -v '#' > body | |||
cat body | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > body.filtered | |||
cat header body.filtered > ${sample}.filtered.vcf | |||
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip ${sample}.filtered.vcf -c > ${sample}.filtered.rtg.vcf.gz | |||
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.filtered.rtg.vcf.gz | |||
if [ ${sample_mark} == "LCL5" ];then | |||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL5.afterfilterdiffbed.vcf.gz ${sample}.filtered.rtg.vcf.gz -f ${benchmarking_dir}/LCL5.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||
elif [ ${sample_mark} == "LCL6" ]; then | |||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL6.afterfilterdiffbed.vcf.gz ${sample}.filtered.rtg.vcf.gz -f ${benchmarking_dir}/LCL6.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||
elif [ ${sample_mark} == "LCL7" ]; then | |||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL7.afterfilterdiffbed.vcf.gz ${sample}.filtered.rtg.vcf.gz -f ${benchmarking_dir}/LCL7.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||
elif [ ${sample_mark} == "LCL8" ]; then | |||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL8.afterfilterdiffbed.vcf.gz ${sample}.filtered.rtg.vcf.gz -f ${benchmarking_dir}/LCL8.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||
if [[ ${sample} =~ "LCL5" ]];then | |||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL5.afterfilterdiffbed.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL5.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||
elif [[ ${sample} =~ "LCL6" ]]; then | |||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL6.afterfilterdiffbed.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL6.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||
elif [[ ${sample} =~ "LCL7" ]]; then | |||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL7.afterfilterdiffbed.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL7.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||
elif [[ ${sample} =~ "LCL8" ]]; then | |||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL8.afterfilterdiffbed.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL8.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||
else | |||
echo "only for quartet samples" | |||
fi | |||
@@ -48,8 +40,8 @@ task benchmark { | |||
} | |||
output { | |||
File rtg_vcf = "${sample}.filtered.rtg.vcf.gz" | |||
File rtg_vcf_index = "${sample}.filtered.rtg.vcf.gz.tbi" | |||
File rtg_vcf = "${sample}.rtg.vcf.gz" | |||
File rtg_vcf_index = "${sample}.rtg.vcf.gz.tbi" | |||
File gzip_vcf = "${sample}.vcf.gz" | |||
File gzip_vcf_index = "${sample}.vcf.gz.tbi" | |||
File roc_all_csv = "${sample}.roc.all.csv.gz" |
@@ -1,42 +1,25 @@ | |||
import "./tasks/benchmark.wdl" as benchmark | |||
import "./tasks/multiqc.wdl" as multiqc | |||
import "./tasks/vcfstat.wdl" as vcfstat | |||
import "./tasks/mergeNum.wdl" as mergeNum | |||
workflow {{ project_name }} { | |||
File inputSamplesFile | |||
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile) | |||
File vcf | |||
File vcf_idx | |||
File benchmarking_dir | |||
File ref_dir | |||
String fasta | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
scatter (sample in inputSamples) { | |||
call benchmark.benchmark as benchmark { | |||
input: | |||
vcf=sample[0], | |||
benchmarking_dir=benchmarking_dir, | |||
ref_dir=ref_dir, | |||
sample_mark=sample[1], | |||
fasta=fasta | |||
} | |||
call vcfstat.vcfstat as vcfstat { | |||
input: | |||
rtg_vcf=benchmark.rtg_vcf, | |||
rtg_vcf_index=benchmark.rtg_vcf_index | |||
} | |||
} | |||
call multiqc.multiqc as multiqc { | |||
call benchmark.benchmark as benchmark { | |||
input: | |||
summary=benchmark.summary | |||
vcf=vcf, | |||
vcf_idx=vcf_idx, | |||
benchmarking_dir=benchmarking_dir, | |||
ref_dir=ref_dir, | |||
fasta=fasta, | |||
docker=docker, | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call mergeNum.mergeNum as mergeNum { | |||
input: | |||
vcfnumber=vcfstat.vcfnumber | |||
} | |||
} | |||