瀏覽代碼

only benchmark

master
LUYAO REN 4 年之前
父節點
當前提交
6531a6dee7
共有 4 個文件被更改,包括 74 次插入65 次删除
  1. +42
    -0
      big_workflow
  2. +5
    -13
      inputs
  3. +12
    -20
      tasks/benchmark.wdl
  4. +15
    -32
      workflow.wdl

+ 42
- 0
big_workflow 查看文件

@@ -0,0 +1,42 @@
import "./tasks/benchmark.wdl" as benchmark
import "./tasks/multiqc.wdl" as multiqc
import "./tasks/vcfstat.wdl" as vcfstat
import "./tasks/mergeNum.wdl" as mergeNum
workflow {{ project_name }} {
File inputSamplesFile
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile)
File benchmarking_dir
File ref_dir
String fasta

scatter (sample in inputSamples) {

call benchmark.benchmark as benchmark {
input:
vcf=sample[0],
benchmarking_dir=benchmarking_dir,
ref_dir=ref_dir,
sample_mark=sample[1],
fasta=fasta
}

call vcfstat.vcfstat as vcfstat {
input:
rtg_vcf=benchmark.rtg_vcf,
rtg_vcf_index=benchmark.rtg_vcf_index
}

}

call multiqc.multiqc as multiqc {
input:
summary=benchmark.summary
}

call mergeNum.mergeNum as mergeNum {
input:
vcfnumber=vcfstat.vcfnumber
}

}


+ 5
- 13
inputs 查看文件

@@ -1,18 +1,10 @@
{
"{{ project_name }}.benchmarking_dir": "oss://pgx-result/renluyao/manuscript/benchmark_calls_v3.0/",
"{{ project_name }}.vcfstat.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:latest",
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa",
"{{ project_name }}.benchmark.cluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc",
"{{ project_name }}.benchmark.disk_size": "500",
"{{ project_name }}.vcfstat.disk_size": "100",
"{{ project_name }}.benchmark.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:latest",
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}",
"{{ project_name }}.mergeNum.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/gatk:v2019.01",
"{{ project_name }}.mergeNum.disk_size": "100",
"{{ project_name }}.multiqc.cluster_config": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc",
"{{ project_name }}.multiqc.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/multiqc:v1.8",
"{{ project_name }}.mergeNum.cluster_config": "OnDemand bcs.a2.large img-ubuntu-vpc",
"{{ project_name }}.vcfstat.cluster_config": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc",
"{{ project_name }}.multiqc.disk_size": "100",
"{{ project_name }}.disk_size": "500",
"{{ project_name }}.vcf_idx": "{{ vcf_idx }}",
"{{ project_name }}.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:latest",
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc",
"{{ project_name }}.vcf": "{{ vcf }}",
"{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/"
}

+ 12
- 20
tasks/benchmark.wdl 查看文件

@@ -1,9 +1,9 @@
task benchmark {
File vcf
File vcf_idx
File benchmarking_dir
File ref_dir
String sample = basename(vcf,".vcf")
String sample_mark
String sample = basename(vcf,".vcf.gz")
String fasta
String docker
String cluster_config
@@ -19,22 +19,14 @@ task benchmark {

export HGREF=/cromwell_root/tmp/reference_data/GRCh38.d1.vd1.fa

cat ${vcf} | grep '#' > header
cat ${vcf} | grep -v '#' > body
cat body | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > body.filtered
cat header body.filtered > ${sample}.filtered.vcf

/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip ${sample}.filtered.vcf -c > ${sample}.filtered.rtg.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.filtered.rtg.vcf.gz

if [ ${sample_mark} == "LCL5" ];then
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL5.afterfilterdiffbed.vcf.gz ${sample}.filtered.rtg.vcf.gz -f ${benchmarking_dir}/LCL5.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
elif [ ${sample_mark} == "LCL6" ]; then
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL6.afterfilterdiffbed.vcf.gz ${sample}.filtered.rtg.vcf.gz -f ${benchmarking_dir}/LCL6.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
elif [ ${sample_mark} == "LCL7" ]; then
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL7.afterfilterdiffbed.vcf.gz ${sample}.filtered.rtg.vcf.gz -f ${benchmarking_dir}/LCL7.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
elif [ ${sample_mark} == "LCL8" ]; then
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL8.afterfilterdiffbed.vcf.gz ${sample}.filtered.rtg.vcf.gz -f ${benchmarking_dir}/LCL8.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
if [[ ${sample} =~ "LCL5" ]];then
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL5.afterfilterdiffbed.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL5.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
elif [[ ${sample} =~ "LCL6" ]]; then
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL6.afterfilterdiffbed.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL6.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
elif [[ ${sample} =~ "LCL7" ]]; then
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL7.afterfilterdiffbed.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL7.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
elif [[ ${sample} =~ "LCL8" ]]; then
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL8.afterfilterdiffbed.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL8.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
else
echo "only for quartet samples"
fi
@@ -48,8 +40,8 @@ task benchmark {
}

output {
File rtg_vcf = "${sample}.filtered.rtg.vcf.gz"
File rtg_vcf_index = "${sample}.filtered.rtg.vcf.gz.tbi"
File rtg_vcf = "${sample}.rtg.vcf.gz"
File rtg_vcf_index = "${sample}.rtg.vcf.gz.tbi"
File gzip_vcf = "${sample}.vcf.gz"
File gzip_vcf_index = "${sample}.vcf.gz.tbi"
File roc_all_csv = "${sample}.roc.all.csv.gz"

+ 15
- 32
workflow.wdl 查看文件

@@ -1,42 +1,25 @@
import "./tasks/benchmark.wdl" as benchmark
import "./tasks/multiqc.wdl" as multiqc
import "./tasks/vcfstat.wdl" as vcfstat
import "./tasks/mergeNum.wdl" as mergeNum

workflow {{ project_name }} {
File inputSamplesFile
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile)
File vcf
File vcf_idx
File benchmarking_dir
File ref_dir
String fasta
String docker
String cluster_config
String disk_size

scatter (sample in inputSamples) {

call benchmark.benchmark as benchmark {
input:
vcf=sample[0],
benchmarking_dir=benchmarking_dir,
ref_dir=ref_dir,
sample_mark=sample[1],
fasta=fasta
}

call vcfstat.vcfstat as vcfstat {
input:
rtg_vcf=benchmark.rtg_vcf,
rtg_vcf_index=benchmark.rtg_vcf_index
}

}

call multiqc.multiqc as multiqc {
call benchmark.benchmark as benchmark {
input:
summary=benchmark.summary
vcf=vcf,
vcf_idx=vcf_idx,
benchmarking_dir=benchmarking_dir,
ref_dir=ref_dir,
fasta=fasta,
docker=docker,
cluster_config=cluster_config,
disk_size=disk_size
}

call mergeNum.mergeNum as mergeNum {
input:
vcfnumber=vcfstat.vcfnumber
}

}


Loading…
取消
儲存