소스 검색

recovery multiqc

master
junshang 3 년 전
부모
커밋
9ac96ab758
3개의 변경된 파일121개의 추가작업 그리고 0개의 파일을 삭제
  1. +41
    -0
      tasks/extract_tables.wdl
  2. +50
    -0
      tasks/multiqc.wdl
  3. +30
    -0
      workflow.wdl

+ 41
- 0
tasks/extract_tables.wdl 파일 보기

@@ -0,0 +1,41 @@
task extract_tables {

File quality_yield_summary
File wgs_metrics_summary
File aln_metrics_summary
File is_metrics_summary
File hap
File fastqc
File fastqscreen


String project
String docker
String cluster_config
String disk_size

command <<<

python /opt/extract_tables.py -quality ${quality_yield_summary} -depth ${wgs_metrics_summary} -aln ${aln_metrics_summary} -is ${is_metrics_summary} -fastqc ${fastqc} -fastqscreen ${fastqscreen} -hap ${hap} -project ${project}

cat variants.calling.qc.txt | cut -f12- | grep -v 'SNV' | awk '{for(i=1;i<=NF;i++) {sum[i] += $i; sumsq[i] += ($i)^2}}
END {for (i=1;i<=NF;i++) {
printf "%f %f \n", sum[i]/NR, sqrt((sumsq[i]-sum[i]^2/NR)/NR)}
}' >> reference_datasets_aver-std.txt

>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File pre_alignment = "pre_alignment.txt"
File post_alignment = "post_alignment.txt"
File variant_calling = "variants.calling.qc.txt"
File precision_recall = "reference_datasets_aver-std.txt"
}
}

+ 50
- 0
tasks/multiqc.wdl 파일 보기

@@ -0,0 +1,50 @@
task multiqc {

Array[File] read1_zip
Array[File] read2_zip

Array[File] txt1
Array[File] txt2

Array[File] summary

Array[File] zip

String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
mkdir -p /cromwell_root/tmp/fastqc
mkdir -p /cromwell_root/tmp/fastqscreen
mkdir -p /cromwell_root/tmp/benchmark

cp ${sep=" " read1_zip} ${sep=" " read2_zip} /cromwell_root/tmp/fastqc
cp ${sep=" " txt1} ${sep=" " txt2} /cromwell_root/tmp/fastqscreen
cp ${sep=" " summary} /cromwell_root/tmp/benchmark

multiqc /cromwell_root/tmp/

cat multiqc_data/multiqc_general_stats.txt > multiqc_general_stats.txt
cat multiqc_data/multiqc_fastq_screen.txt > multiqc_fastq_screen.txt
cat multiqc_data/multiqc_happy_data.json > multiqc_happy_data.json
>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File multiqc_html = "multiqc_report.html"
Array[File] multiqc_txt = glob("multiqc_data/*")
File fastqc = "multiqc_general_stats.txt"
File fastqscreen = "multiqc_fastq_screen.txt"
File hap = "multiqc_happy_data.json"
}
}

+ 30
- 0
workflow.wdl 파일 보기

@@ -9,7 +9,9 @@ import "./tasks/Haplotyper_gVCF.wdl" as Haplotyper_gVCF
import "./tasks/GVCFtyper.wdl" as GVCFtyper
import "./tasks/split_gvcf_files.wdl" as split_gvcf_files
import "./tasks/benchmark.wdl" as benchmark
import "./tasks/multiqc.wdl" as multiqc
import "./tasks/merge_sentieon_metrics.wdl" as merge_sentieon_metrics
import "./tasks/extract_tables.wdl" as extract_tables
import "./tasks/mendelian.wdl" as mendelian
import "./tasks/merge_mendelian.wdl" as merge_mendelian
import "./tasks/quartet_mendelian.wdl" as quartet_mendelian
@@ -220,6 +222,19 @@ workflow {{ project_name }} {

}

call multiqc.multiqc as multiqc {
input:
read1_zip=fastqc.read1_zip,
read2_zip=fastqc.read2_zip,
txt1=fastqscreen.txt1,
txt2=fastqscreen.txt2,
summary=benchmark.summary,
zip=qualimap.zip,
docker=MULTIQCdocker,
cluster_config=SMALLcluster_config,
disk_size=disk_size
}

call merge_sentieon_metrics.merge_sentieon_metrics as merge_sentieon_metrics {
input:
quality_yield_header=sentieon.quality_yield_header,
@@ -234,6 +249,21 @@ workflow {{ project_name }} {
docker=MULTIQCdocker,
cluster_config=SMALLcluster_config,
disk_size=disk_size
}

call extract_tables.extract_tables as extract_tables {
input:
quality_yield_summary=merge_sentieon_metrics.quality_yield_summary,
wgs_metrics_summary=merge_sentieon_metrics.wgs_metrics_summary,
aln_metrics_summary=merge_sentieon_metrics.aln_metrics_summary,
is_metrics_summary=merge_sentieon_metrics.is_metrics_summary,
fastqc=multiqc.fastqc,
fastqscreen=multiqc.fastqscreen,
hap=multiqc.hap,
project=project,
docker=DIYdocker,
cluster_config=SMALLcluster_config,
disk_size=disk_size
}

Boolean sister_tag = read_boolean(split_gvcf_files.sister_tag)

Loading…
취소
저장