{ | { | ||||
"fastqscreen_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqscreen:0.12.0", | |||||
"fastqc_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqc:0.11.8", | |||||
"qualimap_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/qualimap:2.0.0", | |||||
"sentieon_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v2019.11.28", | |||||
"CPU2_GB8_cluster": "OnDemand bcs.ps.g.large img-ubuntu-vpc", | |||||
"CPU4_GB16_cluster": "OnDemand bcs.ps.g.xlarge img-ubuntu-vpc", | |||||
"CPU8_GB32_cluster": "OnDemand bcs.ps.g.2xlarge img-ubuntu-vpc", | |||||
"disk_size": "300", | |||||
"SENTIEON_INSTALL_DIR": "/opt/sentieon-genomics", | |||||
"SENTIEON_LICENSE": "192.168.0.55:8990", | |||||
"covered_bed": "oss://pgx-reference-data/bed/cbcga/S07604514_Covered.bed", | |||||
"fastq_screen_conf": "oss://pgx-reference-data/fastq_screen_reference/fastq_screen.conf", | |||||
"screen_ref_dir": "oss://pgx-reference-data/fastq_screen_reference/", | |||||
"fasta": "GRCh38.d1.vd1.fa", | |||||
"ref_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/" | |||||
"fastqscreen_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqscreen:0.12.0", | |||||
"fastqc_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqc:0.11.8", | |||||
"qualimap_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/qualimap:2.0.0", | |||||
"sentieon_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v2020.10.07", | |||||
"CPU2_GB8_cluster": "OnDemand bcs.ps.g.large img-ubuntu-vpc", | |||||
"CPU4_GB16_cluster": "OnDemand bcs.ps.g.xlarge img-ubuntu-vpc", | |||||
"CPU8_GB32_cluster": "OnDemand bcs.ps.g.2xlarge img-ubuntu-vpc", | |||||
"disk_size": "300", | |||||
"SENTIEON_INSTALL_DIR": "/opt/sentieon-genomics", | |||||
"SENTIEON_LICENSE": "192.168.0.55:8990", | |||||
"covered_bed": "oss://pgx-reference-data/bed/cbcga/S07604514_Covered.bed", | |||||
"fastq_screen_conf": "oss://pgx-reference-data/fastq_screen_reference/fastq_screen.conf", | |||||
"screen_ref_dir": "oss://pgx-reference-data/fastq_screen_reference/", | |||||
"fasta": "GRCh38.d1.vd1.fa", | |||||
"ref_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/" | |||||
} | } |
task Dedup { | task Dedup { | ||||
String SENTIEON_INSTALL_DIR | |||||
String SENTIEON_LICENSE | |||||
String sample | |||||
File sorted_bam | |||||
File sorted_bam_index | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
String SENTIEON_INSTALL_DIR | |||||
String SENTIEON_LICENSE | |||||
String sample | |||||
File sorted_bam | |||||
File sorted_bam_index | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||||
nt=$(nproc) | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -i ${sorted_bam} --algo LocusCollector --fun score_info ${sample}_score.txt | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -i ${sorted_bam} --algo Dedup --rmdup --score_info ${sample}_score.txt --metrics ${sample}_dedup_metrics.txt ${sample}.sorted.deduped.bam | |||||
sed -n '3p' ${sample}_dedup_metrics.txt | awk -F'\t' '{print "'"${sample}"'""\t"$9*100}' > ${sample}_picard_duplication.txt | |||||
# ${sample}_marked_dup_metrics.txt can be recognized as the picard output | |||||
sed '1i\#DuplicationMetrics' ${sample}_dedup_metrics.txt > ${sample}_marked_dup_metrics.txt | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||||
nt=$(nproc) | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -i ${sorted_bam} --algo LocusCollector --fun score_info ${sample}_score.txt | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -i ${sorted_bam} --algo Dedup --rmdup --score_info ${sample}_score.txt --metrics ${sample}_dedup_metrics.txt ${sample}.sorted.deduped.bam | |||||
sed -n '3p' ${sample}_dedup_metrics.txt | awk -F'\t' '{print "'"${sample}"'""\t"$9*100}' > ${sample}_picard_duplication.txt | |||||
# ${sample}_marked_dup_metrics.txt can be recognized as the picard output | |||||
sed '1i\#DuplicationMetrics' ${sample}_dedup_metrics.txt > ${sample}_marked_dup_metrics.txt | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File score = "${sample}_score.txt" | |||||
File dedup_metrics = "${sample}_marked_dup_metrics.txt" | |||||
File duplication = "${sample}_picard_duplication.txt" | |||||
File deduped_bam = "${sample}.sorted.deduped.bam" | |||||
File deduped_bam_index = "${sample}.sorted.deduped.bam.bai" | |||||
} | |||||
output { | |||||
File score = "${sample}_score.txt" | |||||
File dedup_metrics = "${sample}_marked_dup_metrics.txt" | |||||
File duplication = "${sample}_picard_duplication.txt" | |||||
File deduped_bam = "${sample}.sorted.deduped.bam" | |||||
File deduped_bam_index = "${sample}.sorted.deduped.bam.bai" | |||||
} | |||||
} | } |
task Metrics { | task Metrics { | ||||
File ref_dir | |||||
String SENTIEON_INSTALL_DIR | |||||
String SENTIEON_LICENSE | |||||
String sample | |||||
String fasta | |||||
File sorted_bam | |||||
File sorted_bam_index | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
File ref_dir | |||||
String SENTIEON_INSTALL_DIR | |||||
String SENTIEON_LICENSE | |||||
String sample | |||||
String docker | |||||
String cluster_config | |||||
String fasta | |||||
File sorted_bam | |||||
File sorted_bam_index | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||||
nt=$(nproc) | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${sorted_bam} --algo MeanQualityByCycle ${sample}_mq_metrics.txt --algo QualDistribution ${sample}_qd_metrics.txt --algo GCBias --summary ${sample}_gc_summary.txt ${sample}_gc_metrics.txt --algo AlignmentStat ${sample}_aln_metrics.txt --algo InsertSizeMetricAlgo ${sample}_is_metrics.txt --algo CoverageMetrics --omit_base_output ${sample}_coverage_metrics | |||||
File? regions | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon plot metrics -o ${sample}_metrics_report.pdf gc=${sample}_gc_metrics.txt qd=${sample}_qd_metrics.txt mq=${sample}_mq_metrics.txt isize=${sample}_is_metrics.txt | |||||
>>> | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||||
nt=$(nproc) | |||||
if [ ${regions} ]; then | |||||
INTERVAL="--interval ${regions}" | |||||
else | |||||
INTERVAL="" | |||||
fi | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File qd_metrics = "${sample}_qd_metrics.txt" | |||||
File qd_metrics_pdf = "${sample}_qd_metrics.pdf" | |||||
File mq_metrics = "${sample}_mq_metrics.txt" | |||||
File mq_metrics_pdf = "${sample}_mq_metrics.pdf" | |||||
File is_metrics = "${sample}_is_metrics.txt" | |||||
File is_metrics_pdf = "${sample}_is_metrics.pdf" | |||||
File gc_summary = "${sample}_gc_summary.txt" | |||||
File gc_metrics = "${sample}_gc_metrics.txt" | |||||
File gc_metrics_pdf = "${sample}_gc_metrics.pdf" | |||||
File aln_metrics = "${sample}_aln_metrics.txt" | |||||
File coverage_metrics_sample_summary = "${sample}_coverage_metrics.sample_summary" | |||||
File coverage_metrics_sample_statistics = "${sample}_coverage_metrics.sample_statistics" | |||||
File coverage_metrics_sample_interval_statistics = "${sample}_coverage_metrics.sample_interval_statistics" | |||||
File coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_coverage_metrics.sample_cumulative_coverage_proportions" | |||||
File coverage_metrics_sample_cumulative_coverage_counts = "${sample}_coverage_metrics.sample_cumulative_coverage_counts" | |||||
} | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt \ | |||||
-r ${ref_dir}/${fasta} $INTERVAL \ | |||||
-i ${sorted_bam} \ | |||||
--algo MeanQualityByCycle ${sample}_mq_metrics.txt \ | |||||
--algo QualDistribution ${sample}_qd_metrics.txt \ | |||||
--algo GCBias --summary ${sample}_gc_summary.txt ${sample}_gc_metrics.txt \ | |||||
--algo AlignmentStat ${sample}_aln_metrics.txt \ | |||||
--algo InsertSizeMetricAlgo ${sample}_is_metrics.txt \ | |||||
--algo CoverageMetrics --omit_base_output ${sample}_coverage_metrics | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon plot metrics -o ${sample}_metrics_report.pdf gc=${sample}_gc_metrics.txt qd=${sample}_qd_metrics.txt mq=${sample}_mq_metrics.txt isize=${sample}_is_metrics.txt | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File qd_metrics = "${sample}_qd_metrics.txt" | |||||
File qd_metrics_pdf = "${sample}_qd_metrics.pdf" | |||||
File mq_metrics = "${sample}_mq_metrics.txt" | |||||
File mq_metrics_pdf = "${sample}_mq_metrics.pdf" | |||||
File is_metrics = "${sample}_is_metrics.txt" | |||||
File is_metrics_pdf = "${sample}_is_metrics.pdf" | |||||
File gc_summary = "${sample}_gc_summary.txt" | |||||
File gc_metrics = "${sample}_gc_metrics.txt" | |||||
File gc_metrics_pdf = "${sample}_gc_metrics.pdf" | |||||
File aln_metrics = "${sample}_aln_metrics.txt" | |||||
File coverage_metrics_sample_summary = "${sample}_coverage_metrics.sample_summary" | |||||
File coverage_metrics_sample_statistics = "${sample}_coverage_metrics.sample_statistics" | |||||
File coverage_metrics_sample_interval_statistics = "${sample}_coverage_metrics.sample_interval_statistics" | |||||
File coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_coverage_metrics.sample_cumulative_coverage_proportions" | |||||
File coverage_metrics_sample_cumulative_coverage_counts = "${sample}_coverage_metrics.sample_cumulative_coverage_counts" | |||||
} | |||||
} | } |
task deduped_Metrics { | task deduped_Metrics { | ||||
File ref_dir | |||||
String SENTIEON_INSTALL_DIR | |||||
String SENTIEON_LICENSE | |||||
String sample | |||||
String fasta | |||||
File deduped_bam | |||||
File deduped_bam_index | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||||
nt=$(nproc) | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${deduped_bam} --algo CoverageMetrics --omit_base_output ${sample}_deduped_coverage_metrics --algo MeanQualityByCycle ${sample}_deduped_mq_metrics.txt --algo QualDistribution ${sample}_deduped_qd_metrics.txt --algo GCBias --summary ${sample}_deduped_gc_summary.txt ${sample}_deduped_gc_metrics.txt --algo AlignmentStat ${sample}_deduped_aln_metrics.txt --algo InsertSizeMetricAlgo ${sample}_deduped_is_metrics.txt --algo QualityYield ${sample}_deduped_QualityYield.txt --algo WgsMetricsAlgo ${sample}_deduped_WgsMetricsAlgo.txt | |||||
>>> | |||||
File ref_dir | |||||
String SENTIEON_INSTALL_DIR | |||||
String SENTIEON_LICENSE | |||||
String sample | |||||
String fasta | |||||
File deduped_bam | |||||
File deduped_bam_index | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
File? regions | |||||
output { | |||||
File deduped_coverage_metrics_sample_summary = "${sample}_deduped_coverage_metrics.sample_summary" | |||||
File deduped_coverage_metrics_sample_statistics = "${sample}_deduped_coverage_metrics.sample_statistics" | |||||
File deduped_coverage_metrics_sample_interval_statistics = "${sample}_deduped_coverage_metrics.sample_interval_statistics" | |||||
File deduped_coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_proportions" | |||||
File deduped_coverage_metrics_sample_cumulative_coverage_counts = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_counts" | |||||
File deduped_mean_quality = "${sample}_deduped_mq_metrics.txt" | |||||
File deduped_qd_metrics = "${sample}_deduped_qd_metrics.txt" | |||||
File deduped_gc_summary = "${sample}_deduped_gc_summary.txt" | |||||
File deduped_gc_metrics = "${sample}_deduped_gc_metrics.txt" | |||||
File dedeuped_aln_metrics = "${sample}_deduped_aln_metrics.txt" | |||||
File deduped_is_metrics = "${sample}_deduped_is_metrics.txt" | |||||
File deduped_QualityYield = "${sample}_deduped_QualityYield.txt" | |||||
File deduped_wgsmetrics = "${sample}_deduped_WgsMetricsAlgo.txt" | |||||
} | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||||
nt=$(nproc) | |||||
if [ ${regions} ]; then | |||||
INTERVAL="--interval ${regions}" | |||||
else | |||||
INTERVAL="" | |||||
fi | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt \ | |||||
-r ${ref_dir}/${fasta} $INTERVAL \ | |||||
-i ${deduped_bam} \ | |||||
--algo CoverageMetrics --omit_base_output ${sample}_deduped_coverage_metrics \ | |||||
--algo MeanQualityByCycle ${sample}_deduped_mq_metrics.txt \ | |||||
--algo QualDistribution ${sample}_deduped_qd_metrics.txt \ | |||||
--algo GCBias --summary ${sample}_deduped_gc_summary.txt ${sample}_deduped_gc_metrics.txt \ | |||||
--algo AlignmentStat ${sample}_deduped_aln_metrics.txt \ | |||||
--algo InsertSizeMetricAlgo ${sample}_deduped_is_metrics.txt \ | |||||
--algo QualityYield ${sample}_deduped_QualityYield.txt \ | |||||
--algo WgsMetricsAlgo ${sample}_deduped_WgsMetricsAlgo.txt | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File deduped_coverage_metrics_sample_summary = "${sample}_deduped_coverage_metrics.sample_summary" | |||||
File deduped_coverage_metrics_sample_statistics = "${sample}_deduped_coverage_metrics.sample_statistics" | |||||
File deduped_coverage_metrics_sample_interval_statistics = "${sample}_deduped_coverage_metrics.sample_interval_statistics" | |||||
File deduped_coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_proportions" | |||||
File deduped_coverage_metrics_sample_cumulative_coverage_counts = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_counts" | |||||
File deduped_mean_quality = "${sample}_deduped_mq_metrics.txt" | |||||
File deduped_qd_metrics = "${sample}_deduped_qd_metrics.txt" | |||||
File deduped_gc_summary = "${sample}_deduped_gc_summary.txt" | |||||
File deduped_gc_metrics = "${sample}_deduped_gc_metrics.txt" | |||||
File dedeuped_aln_metrics = "${sample}_deduped_aln_metrics.txt" | |||||
File deduped_is_metrics = "${sample}_deduped_is_metrics.txt" | |||||
File deduped_QualityYield = "${sample}_deduped_QualityYield.txt" | |||||
File deduped_wgsmetrics = "${sample}_deduped_WgsMetricsAlgo.txt" | |||||
} | |||||
} | } |
task fastqc { | task fastqc { | ||||
String sample | |||||
File read1 | |||||
File read2 | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
String sample | |||||
File read1 | |||||
File read2 | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
nt=$(nproc) | |||||
ln -s ${read1} ${sample}_R1.fastq.gz | |||||
ln -s ${read2} ${sample}_R2.fastq.gz | |||||
fastqc -t $nt -o ./ ${sample}_R1.fastq.gz | |||||
fastqc -t $nt -o ./ ${sample}_R2.fastq.gz | |||||
>>> | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
nt=$(nproc) | |||||
ln -s ${read1} ${sample}_R1.fastq.gz | |||||
ln -s ${read2} ${sample}_R2.fastq.gz | |||||
fastqc -t $nt -o ./ ${sample}_R1.fastq.gz | |||||
fastqc -t $nt -o ./ ${sample}_R2.fastq.gz | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File read1_html="${sample}_R1_fastqc.html" | |||||
File read1_zip="${sample}_R1_fastqc.zip" | |||||
File read2_html="${sample}_R2_fastqc.html" | |||||
File read2_zip="${sample}_R2_fastqc.zip" | |||||
} | |||||
runtime { | |||||
docker:docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File read1_html="${sample}_R1_fastqc.html" | |||||
File read1_zip="${sample}_R1_fastqc.zip" | |||||
File read2_html="${sample}_R2_fastqc.html" | |||||
File read2_zip="${sample}_R2_fastqc.zip" | |||||
} | |||||
} | } |
task fastq_screen { | task fastq_screen { | ||||
String sample | |||||
File read1 | |||||
File read2 | |||||
File screen_ref_dir | |||||
File fastq_screen_conf | |||||
String sample | |||||
File read1 | |||||
File read2 | |||||
File screen_ref_dir | |||||
File fastq_screen_conf | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
nt=$(nproc) | |||||
mkdir -p /cromwell_root/tmp | |||||
cp -r ${screen_ref_dir} /cromwell_root/tmp/ | |||||
ln -s ${read1} ${sample}_R1.fastq.gz | |||||
ln -s ${read2} ${sample}_R2.fastq.gz | |||||
fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${sample}_R1.fastq.gz | |||||
fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${sample}_R2.fastq.gz | |||||
>>> | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
nt=$(nproc) | |||||
mkdir -p /cromwell_root/tmp | |||||
cp -r ${screen_ref_dir} /cromwell_root/tmp/ | |||||
ln -s ${read1} ${sample}_R1.fastq.gz | |||||
ln -s ${read2} ${sample}_R2.fastq.gz | |||||
fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${sample}_R1.fastq.gz | |||||
fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${sample}_R2.fastq.gz | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
runtime { | |||||
docker:docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File png1 = "${sample}_R1_screen.png" | |||||
File txt1 = "${sample}_R1_screen.txt" | |||||
File html1 = "${sample}_R1_screen.html" | |||||
File png2 = "${sample}_R2_screen.png" | |||||
File txt2 = "${sample}_R2_screen.txt" | |||||
File html2 = "${sample}_R2_screen.html" | |||||
} | |||||
output { | |||||
File png1 = "${sample}_R1_screen.png" | |||||
File txt1 = "${sample}_R1_screen.txt" | |||||
File html1 = "${sample}_R1_screen.html" | |||||
File png2 = "${sample}_R2_screen.png" | |||||
File txt2 = "${sample}_R2_screen.txt" | |||||
File html2 = "${sample}_R2_screen.html" | |||||
} | |||||
} | } |
task mapping { | task mapping { | ||||
String sample | |||||
File fastq_1 | |||||
File fastq_2 | |||||
String sample | |||||
File fastq_1 | |||||
File fastq_2 | |||||
String SENTIEON_INSTALL_DIR | |||||
String SENTIEON_LICENSE | |||||
String group | |||||
String pl | |||||
File ref_dir | |||||
String fasta | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
String SENTIEON_INSTALL_DIR | |||||
String SENTIEON_LICENSE | |||||
String group | |||||
String pl | |||||
File ref_dir | |||||
String fasta | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||||
nt=$(nproc) | |||||
${SENTIEON_INSTALL_DIR}/bin/bwa mem -M -R "@RG\tID:${group}\tSM:${sample}\tPL:${pl}" -t $nt -K 10000000 ${ref_dir}/${fasta} ${fastq_1} ${fastq_2} | ${SENTIEON_INSTALL_DIR}/bin/sentieon util sort -o ${sample}.sorted.bam -t $nt --sam2bam -i - | |||||
>>> | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||||
nt=$(nproc) | |||||
${SENTIEON_INSTALL_DIR}/bin/bwa mem -M -R "@RG\tID:${group}\tSM:${sample}\tPL:${pl}" -t $nt -K 10000000 ${ref_dir}/${fasta} ${fastq_1} ${fastq_2} | ${SENTIEON_INSTALL_DIR}/bin/sentieon util sort -o ${sample}.sorted.bam -t $nt --sam2bam -i - | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File sorted_bam = "${sample}.sorted.bam" | |||||
File sorted_bam_index = "${sample}.sorted.bam.bai" | |||||
} | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File sorted_bam = "${sample}.sorted.bam" | |||||
File sorted_bam_index = "${sample}.sorted.bam.bai" | |||||
} | |||||
} | } |
task qualimap { | task qualimap { | ||||
String sample | |||||
File bam | |||||
File bai | |||||
File covered_bed | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
String sample | |||||
File bam | |||||
File bai | |||||
File covered_bed | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
nt=$(nproc) | |||||
awk 'BEGIN{OFS="\t"}{sub("\r","",$3);print $1,$2,$3,"",0,"."}' ${covered_bed} > new.bed | |||||
/opt/qualimap/qualimap bamqc -bam ${bam} -gff new.bed -outformat PDF:HTML -nt $nt -outdir ${sample} --java-mem-size=32G | |||||
cat ${sample}/genome_results.txt | grep duplication | awk -F "= |%" '{print "'"${sample}"'""\t"$2}' > ${sample}_qualimap_duplication.txt | |||||
tar -zcvf ${sample}_qualimap.tar ${sample} | |||||
>>> | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
nt=$(nproc) | |||||
awk 'BEGIN{OFS="\t"}{sub("\r","",$3);print $1,$2,$3,"",0,"."}' ${covered_bed} > new.bed | |||||
/opt/qualimap/qualimap bamqc -bam ${bam} -gff new.bed -outformat PDF:HTML -nt $nt -outdir ${sample} --java-mem-size=32G | |||||
cat ${sample}/genome_results.txt | grep duplication | awk -F "= |%" '{print "'"${sample}"'""\t"$2}' > ${sample}_qualimap_duplication.txt | |||||
tar -zcvf ${sample}_qualimap.tar ${sample} | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster:cluster_config | |||||
systemDisk:"cloud_ssd 40" | |||||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File tar = "${sample}_qualimap.tar" | |||||
File duplication = "${sample}_qualimap_duplication.txt" | |||||
} | |||||
runtime { | |||||
docker:docker | |||||
cluster:cluster_config | |||||
systemDisk:"cloud_ssd 40" | |||||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File tar = "${sample}_qualimap.tar" | |||||
File duplication = "${sample}_qualimap_duplication.txt" | |||||
} | |||||
} | } |
import "./tasks/Metrics.wdl" as Metrics | import "./tasks/Metrics.wdl" as Metrics | ||||
import "./tasks/Dedup.wdl" as Dedup | import "./tasks/Dedup.wdl" as Dedup | ||||
import "./tasks/deduped_Metrics.wdl" as deduped_Metrics | import "./tasks/deduped_Metrics.wdl" as deduped_Metrics | ||||
import "./tasks/Realigner.wdl" as Realigner | |||||
import "./tasks/BQSR.wdl" as BQSR | |||||
workflow {{ project_name }} { | workflow {{ project_name }} { | ||||
sorted_bam=mapping.sorted_bam, | sorted_bam=mapping.sorted_bam, | ||||
sorted_bam_index=mapping.sorted_bam_index, | sorted_bam_index=mapping.sorted_bam_index, | ||||
sample=sample_id, | sample=sample_id, | ||||
regions=covered_bed, | |||||
docker=sentieon_docker, | docker=sentieon_docker, | ||||
disk_size=disk_size, | disk_size=disk_size, | ||||
cluster_config=CPU2_GB8_cluster | cluster_config=CPU2_GB8_cluster | ||||
deduped_bam=Dedup.deduped_bam, | deduped_bam=Dedup.deduped_bam, | ||||
deduped_bam_index=Dedup.deduped_bam_index, | deduped_bam_index=Dedup.deduped_bam_index, | ||||
sample=sample_id, | sample=sample_id, | ||||
regions=covered_bed, | |||||
docker=sentieon_docker, | docker=sentieon_docker, | ||||
disk_size=disk_size, | disk_size=disk_size, | ||||
cluster_config=CPU2_GB8_cluster | cluster_config=CPU2_GB8_cluster | ||||
disk_size=disk_size, | disk_size=disk_size, | ||||
cluster_config=CPU8_GB32_cluster | cluster_config=CPU8_GB32_cluster | ||||
} | } | ||||
call Realigner.Realigner as Realigner { | |||||
input: | |||||
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR, | |||||
SENTIEON_LICENSE=SENTIEON_LICENSE, | |||||
fasta=fasta, | |||||
ref_dir=ref_dir, | |||||
deduped_bam=Dedup.deduped_bam, | |||||
deduped_bam_index=Dedup.deduped_bam_index, | |||||
db_mills=db_mills, | |||||
dbmills_dir=dbmills_dir, | |||||
sample=sample_id + '_tumor', | |||||
docker=sentieon_docker, | |||||
disk_size=disk_size, | |||||
cluster_config=cluster_config | |||||
} | |||||
call BQSR.BQSR as BQSR { | |||||
input: | |||||
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR, | |||||
SENTIEON_LICENSE=SENTIEON_LICENSE, | |||||
fasta=fasta, | |||||
ref_dir=ref_dir, | |||||
realigned_bam=Realigner.realigner_bam, | |||||
realigned_bam_index=Realigner.realigner_bam_index, | |||||
db_mills=db_mills, | |||||
dbmills_dir=dbmills_dir, | |||||
dbsnp=dbsnp, | |||||
dbsnp_dir=dbsnp_dir, | |||||
sample=sample_id + '_tumor', | |||||
docker=sentieon_docker, | |||||
disk_size=disk_size, | |||||
cluster_config=cluster_config | |||||
} | |||||
output { | output { | ||||
File fastqc_read1_html = fastqc.read1_html | File fastqc_read1_html = fastqc.read1_html |