LUYAO REN 4 роки тому
джерело
коміт
b7a7bb0491
2 змінених файлів з 34 додано та 41 видалено
  1. +1
    -1
      inputs
  2. +33
    -40
      tasks/benchmark.wdl

+ 1
- 1
inputs Переглянути файл

@@ -11,7 +11,7 @@
"{{ project_name }}.BIGcluster_config": "OnDemand bcs.ps.g.2xlarge img-ubuntu-vpc",
"{{ project_name }}.LCL6": "{{ LCL6 }}",
"{{ project_name }}.sdf": "oss://pgx-reference-data/GRCh38.d1.vd1/GRCh38.d1.vd1.sdf/",
"{{ project_name }}.contig": "oss://pgx-result/renluyao/NCTR/contig.txt",
"{{ project_name }}.contig": "oss://pgx-result/renluyao/manuscript_v3.0/reference_datasets_v202103/contig.txt",
"{{ project_name }}.MENDELIANdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1",
"{{ project_name }}.LCL7": "{{ LCL7 }}",
"{{ project_name }}.DIYdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.4",

+ 33
- 40
tasks/benchmark.wdl Переглянути файл

@@ -4,7 +4,7 @@ task benchmark {
File contig
File sdf
String project
String sample = basename(vcf,".raw.vcf.gz")
String sample = basename(vcf,".vcf.gz")
String docker
String cluster_config
String disk_size
@@ -41,52 +41,37 @@ task benchmark {
echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL8" > LCL8_name


/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip filtered.vcf -c > ${project}.${sample}.rtg.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${project}.${sample}.rtg.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcffilter -i ${project}.${sample}.rtg.vcf.gz -o ${sample}.rtg.SNV.vcf.gz --snps-only
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcffilter -i ${project}.${sample}.rtg.vcf.gz -o ${sample}.rtg.INDEL.vcf.gz --non-snps-only


if [[ ${sample} =~ "LCL5" ]];then
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL5.high.confidence.calls.SNV.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_SNV -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL5.high.confidence.calls.INDEL.vcf.gz -c ${sample}.rtg.INDEL.vcf.gz -o ${sample}_INDEL -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
cat filtered.vcf | grep '##' > header
cat filtered.vcf | grep -v '#' > body
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL5.high.confidence.calls.vcf ${vcf} -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
cat ${vcf} | grep '##' > header
cat ${vcf} | grep -v '#' > body
cat header LCL5_name body > LCL5.vcf
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL5.vcf -c > ${project}.${sample}.reformed.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${project}.${sample}.reformed.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL5.vcf -c > ${sample}.reformed.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.reformed.vcf.gz
elif [[ ${sample} =~ "LCL6" ]]; then
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL6.high.confidence.calls.SNV.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_SNV -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL6.high.confidence.calls.INDEL.vcf.gz -c ${sample}.rtg.INDEL.vcf.gz -o ${sample}_INDEL -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
cat filtered.vcf | grep '##' > header
cat filtered.vcf | grep -v '#' > body
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL6.high.confidence.calls.vcf ${vcf} -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
cat ${vcf} | grep '##' > header
cat ${vcf} | grep -v '#' > body
cat header LCL6_name body > LCL6.vcf
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL6.vcf -c > ${project}.${sample}.reformed.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${project}.${sample}.reformed.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL6.vcf -c > ${sample}.reformed.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.reformed.vcf.gz
elif [[ ${sample} =~ "LCL7" ]]; then
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL7.high.confidence.calls.SNV.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_SNV -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL7.high.confidence.calls.INDEL.vcf.gz -c ${sample}.rtg.INDEL.vcf.gz -o ${sample}_INDEL -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
cat filtered.vcf | grep '##' > header
cat filtered.vcf | grep -v '#' > body
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL7.high.confidence.calls.vcf ${vcf} -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
cat ${vcf} | grep '##' > header
cat ${vcf} | grep -v '#' > body
cat header LCL7_name body > LCL7.vcf
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL7.vcf -c > ${project}.${sample}.reformed.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${project}.${sample}.reformed.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL7.vcf -c > ${sample}.reformed.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.reformed.vcf.gz
elif [[ ${sample} =~ "LCL8" ]]; then
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL8.high.confidence.calls.SNV.vcf.gz -c ${sample}.rtg.SNV.vcf.gz -o ${sample}_SNV -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfeval -b /cromwell_root/tmp/reference_datasets_v202103/LCL8.high.confidence.calls.INDEL.vcf.gz -c ${sample}.rtg.INDEL.vcf.gz -o ${sample}_INDEL -t ${sdf} --evaluation-regions=/cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed
cat filtered.vcf | grep '##' > header
cat filtered.vcf | grep -v '#' > body
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL8.high.confidence.calls.vcf ${vcf} -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
cat ${vcf} | grep '##' > header
cat ${vcf} | grep -v '#' > body
cat header LCL8_name body > LCL8.vcf
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL8.vcf -c > ${project}.${sample}.reformed.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${project}.${sample}.reformed.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip LCL8.vcf -c > ${sample}.reformed.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.reformed.vcf.gz
else
echo "only for quartet samples"
fi


cat ${sample}_SNV/summary.txt > ${project}.${sample}_SNV_precision_recall.txt
cat ${sample}_INDEL/summary.txt > ${project}.${sample}_INDEL_precision_recall.txt
>>>

runtime {
@@ -97,9 +82,17 @@ task benchmark {
}

output {
File rtg_vcf = "${project}.${sample}.reformed.vcf.gz"
File rtg_vcf_index = "${project}.${sample}.reformed.vcf.gz.tbi"
File SNV_result = "${project}.${sample}_SNV_precision_recall.txt"
File Indel_result = "${project}.${sample}_INDEL_precision_recall.txt"
File rtg_vcf = "${sample}.reformed.vcf.gz"
File rtg_vcf_index = "${sample}.reformed.vcf.gz.tbi"
File gzip_vcf = "${sample}.vcf.gz"
File gzip_vcf_index = "${sample}.vcf.gz.tbi"
File roc_all_csv = "${sample}.roc.all.csv.gz"
File roc_indel = "${sample}.roc.Locations.INDEL.csv.gz"
File roc_indel_pass = "${sample}.roc.Locations.INDEL.PASS.csv.gz"
File roc_snp = "${sample}.roc.Locations.SNP.csv.gz"
File roc_snp_pass = "${sample}.roc.Locations.SNP.PASS.csv.gz"
File summary = "${sample}.summary.csv"
File extended = "${sample}.extended.csv"
File metrics = "${sample}.metrics.json.gz"
}
}

Завантаження…
Відмінити
Зберегти