LUYAO REN 5 роки тому
коміт
a7375f4ff9
7 змінених файлів з 177 додано та 0 видалено
  1. +11
    -0
      inputs
  2. BIN
      tasks/.DS_Store
  3. +56
    -0
      tasks/benchmark.wdl
  4. +26
    -0
      tasks/mergeNum.wdl
  5. +31
    -0
      tasks/multiqc.wdl
  6. +24
    -0
      tasks/vcfstat.wdl
  7. +29
    -0
      workflow.wdl

+ 11
- 0
inputs Переглянути файл

@@ -0,0 +1,11 @@
{
"{{ project_name }}.benchmarking_dir": "oss://chinese-quartet/quartet-result-data/NCTR_benchmarking_20181215/",
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa",
"{{ project_name }}.sample_mark": "{{ sample_mark }}",
"{{ project_name }}.vcf_index": "{{ vcf_index }}",
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc",
"{{ project_name }}.disk_size": "200",
"{{ project_name }}.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:latest",
"{{ project_name }}.vcf": "{{ vcf }}",
"{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/"
}

BIN
tasks/.DS_Store Переглянути файл


+ 56
- 0
tasks/benchmark.wdl Переглянути файл

@@ -0,0 +1,56 @@
task benchmark {
File vcf
File vcf_index
File benchmarking_dir
File ref_dir
String sample = basename(vcf,".vcf")
String sample_mark
String fasta
String docker
String cluster_config
String disk_size


command <<<
set -o pipefail
set -e
nt=$(nproc)
mkdir -p /cromwell_root/tmp
cp -r ${ref_dir} /cromwell_root/tmp/

export HGREF=/cromwell_root/tmp/reference_data/GRCh38.d1.vd1.fa


if [ ${sample_mark} == "LCL5" ];then
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL5.vcf.gz ${vcf} -f ${benchmarking_dir}/LCL5.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
elif [ ${sample_mark} == "LCL6" ]; then
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL6.vcf.gz ${vcf} -f ${benchmarking_dir}/LCL6.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
elif [ ${sample_mark} == "LCL7" ]; then
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL7.vcf.gz ${vcf} -f ${benchmarking_dir}/LCL7.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
elif [ ${sample_mark} == "LCL8" ]; then
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL8.vcf.gz ${vcf} -f ${benchmarking_dir}/LCL8.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
else
echo "only for quartet samples"
fi
>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File gzip_vcf = "${sample}.vcf.gz"
File gzip_vcf_index = "${sample}.vcf.gz.tbi"
File roc_all_csv = "${sample}.roc.all.csv.gz"
File roc_indel = "${sample}.roc.Locations.INDEL.csv.gz"
File roc_indel_pass = "${sample}.roc.Locations.INDEL.PASS.csv.gz"
File roc_snp = "${sample}.roc.Locations.SNP.csv.gz"
File roc_snp_pass = "${sample}.roc.Locations.SNP.PASS.csv.gz"
File summary = "${sample}.summary.csv"
File extended = "${sample}.extended.csv"
File metrics = "${sample}.metrics.json.gz"
}
}

+ 26
- 0
tasks/mergeNum.wdl Переглянути файл

@@ -0,0 +1,26 @@
task mergeNum {
Array[File] vcfnumber
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
for i in ${sep=" " vcfnumber}
do
cat $i | cut -d':' -f2 | tr '\n' '\t' | sed s'/\t$/\n/g' >> vcfstats
done
sed '1i\File\tFailed Filters\tPassed Filters\tSNPs\tMNPs\tInsertions\tDeletions\tIndels\tSame as reference\tSNP Transitions/Transversions\tTotal Het/Hom ratio\tSNP Het/Hom ratio\tMNP Het/Hom ratio\tInsertion Het/Hom ratio\tDeletion Het/Hom ratio\tIndel Het/Hom ratio\tInsertion/Deletion ratio\tIndel/SNP+MNP ratio' vcfstats > vcfstats.txt
>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File vcfstat="vcfstats.txt"
}
}

+ 31
- 0
tasks/multiqc.wdl Переглянути файл

@@ -0,0 +1,31 @@
task multiqc {

Array[File] summary

String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
mkdir -p /cromwell_root/tmp/benchmark

cp ${sep=" " summary} /cromwell_root/tmp/benchmark

multiqc /cromwell_root/tmp/
>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File multiqc_html = "multiqc_report.html"
Array[File] multiqc_txt = glob("multiqc_data/*")
}
}

+ 24
- 0
tasks/vcfstat.wdl Переглянути файл

@@ -0,0 +1,24 @@
task vcfstat {
File rtg_vcf
File rtg_vcf_index
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e

/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfstats ${rtg_vcf} > onestats.txt
>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File vcfnumber="onestats.txt"
}
}

+ 29
- 0
workflow.wdl Переглянути файл

@@ -0,0 +1,29 @@
import "./tasks/benchmark.wdl" as benchmark

workflow {{ project_name }} {
File vcf
File vcf_index
File benchmarking_dir
File ref_dir
String sample_mark
String fasta
String docker
String cluster_config
String disk_size

call benchmark.benchmark as benchmark {
input:
vcf=vcf,
vcf_index=vcf_index,
benchmarking_dir=benchmarking_dir,
ref_dir=ref_dir,
sample_mark=sample_mark,
fasta=fasta,
docker=docker,
cluster_config=cluster_config,
disk_size=disk_size
}


}


Завантаження…
Відмінити
Зберегти