@@ -0,0 +1,13 @@ | |||
{ | |||
"SENTIEON_INSTALL_DIR": "/opt/sentieon-genomics", | |||
"fasta": "GRCh38.d1.vd1.fa", | |||
"dbsnp_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/", | |||
"disk_size": "800", | |||
"dbmills_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/", | |||
"cluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc", | |||
"SENTIEON_LICENSE": "192.168.0.55:8990", | |||
"SENTIEONdocker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v2019.11.28", | |||
"db_mills": "Mills_and_1000G_gold_standard.indels.hg38.vcf", | |||
"dbsnp": "dbsnp_146.hg38.vcf", | |||
"ref_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/" | |||
} |
@@ -0,0 +1,16 @@ | |||
{ | |||
"{{ project_name }}.SENTIEON_INSTALL_DIR": "{{ SENTIEON_INSTALL_DIR }}", | |||
"{{ project_name }}.fasta": "{{ fasta }}", | |||
"{{ project_name }}.dbsnp_dir": "{{ dbsnp_dir }}", | |||
"{{ project_name }}.disk_size": "{{ disk_size }}", | |||
"{{ project_name }}.corealigner_bam": "{{ corealigner_bam }}", | |||
"{{ project_name }}.dbmills_dir": "{{ dbmills_dir }}", | |||
"{{ project_name }}.cluster_config": "{{ cluster_config }}", | |||
"{{ project_name }}.SENTIEON_LICENSE": "{{ SENTIEON_LICENSE }}", | |||
"{{ project_name }}.corealigner_bam_index": "{{ corealigner_bam_index }}", | |||
"{{ project_name }}.SENTIEONdocker": "{{ SENTIEONdocker }}", | |||
"{{ project_name }}.db_mills": "{{ db_mills }}", | |||
"{{ project_name }}.sample": "{{ sample }}", | |||
"{{ project_name }}.dbsnp": "{{ dbsnp }}", | |||
"{{ project_name }}.ref_dir": "{{ ref_dir }}" | |||
} |
@@ -0,0 +1,49 @@ | |||
task BQSR { | |||
File ref_dir | |||
File dbsnp_dir | |||
File dbmills_dir | |||
String sample | |||
String SENTIEON_INSTALL_DIR | |||
String SENTIEON_LICENSE | |||
String fasta | |||
String dbsnp | |||
String db_mills | |||
File realigned_bam | |||
File realigned_bam_index | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||
nt=$(nproc) | |||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${realigned_bam} --algo QualCal -k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} ${sample}_recal_data.table | |||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${realigned_bam} -q ${sample}_recal_data.table --algo QualCal -k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} ${sample}_recal_data.table.post --algo ReadWriter ${sample}.sorted.deduped.realigned.recaled.bam | |||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt --algo QualCal --plot --before ${sample}_recal_data.table --after ${sample}_recal_data.table.post ${sample}_recal_data.csv | |||
${SENTIEON_INSTALL_DIR}/bin/sentieon plot QualCal -o ${sample}_bqsrreport.pdf ${sample}_recal_data.csv | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File recal_table = "${sample}_recal_data.table" | |||
File recal_post = "${sample}_recal_data.table.post" | |||
File recaled_bam = "${sample}.sorted.deduped.realigned.recaled.bam" | |||
File recaled_bam_index = "${sample}.sorted.deduped.realigned.recaled.bam.bai" | |||
File recal_csv = "${sample}_recal_data.csv" | |||
File bqsrreport_pdf = "${sample}_bqsrreport.pdf" | |||
} | |||
} |
@@ -0,0 +1,44 @@ | |||
task Dedup { | |||
String SENTIEON_INSTALL_DIR | |||
String SENTIEON_LICENSE | |||
String sample | |||
File sorted_bam | |||
File sorted_bam_index | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||
nt=$(nproc) | |||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -i ${sorted_bam} --algo LocusCollector --fun score_info ${sample}_score.txt | |||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -i ${sorted_bam} --algo Dedup --rmdup --score_info ${sample}_score.txt --metrics ${sample}_dedup_metrics.txt ${sample}.sorted.deduped.bam | |||
sed -n '3p' ${sample}_dedup_metrics.txt | awk -F'\t' '{print "'"${sample}"'""\t"$9*100}' > ${sample}_picard_duplication.txt | |||
# ${sample}_marked_dup_metrics.txt can be recognized as the picard output | |||
sed '1i\#DuplicationMetrics' ${sample}_dedup_metrics.txt > ${sample}_marked_dup_metrics.txt | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File score = "${sample}_score.txt" | |||
File dedup_metrics = "${sample}_marked_dup_metrics.txt" | |||
File duplication = "${sample}_picard_duplication.txt" | |||
File Dedup_bam = "${sample}.sorted.deduped.bam" | |||
File Dedup_bam_index = "${sample}.sorted.deduped.bam.bai" | |||
} | |||
} | |||
@@ -0,0 +1,42 @@ | |||
task Realigner { | |||
File ref_dir | |||
File dbmills_dir | |||
String SENTIEON_INSTALL_DIR | |||
String SENTIEON_LICENSE | |||
String sample | |||
String fasta | |||
File Dedup_bam | |||
File Dedup_bam_index | |||
String db_mills | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||
nt=$(nproc) | |||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${Dedup_bam} --algo Realigner -k ${dbmills_dir}/${db_mills} ${sample}.sorted.deduped.realigned.bam | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File realigner_bam = "${sample}.sorted.deduped.realigned.bam" | |||
File realigner_bam_index = "${sample}.sorted.deduped.realigned.bam.bai" | |||
} | |||
} | |||
@@ -0,0 +1,43 @@ | |||
task TNscope { | |||
File ref_dir | |||
File dbsnp_dir | |||
String sample | |||
String SENTIEON_INSTALL_DIR | |||
String SENTIEON_LICENSE | |||
String tumor_name | |||
String normal_name | |||
String docker | |||
String cluster_config | |||
String fasta | |||
File corealigner_bam | |||
File corealigner_bam_index | |||
String dbsnp | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||
nt=$(nproc) | |||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${corealigner_bam} --algo TNscope --tumor_sample ${tumor_name} --normal_sample ${normal_name} --dbsnp ${dbsnp_dir}/${dbsnp} ${sample}.TNscope.TN.vcf | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File TNscope_vcf= "${sample}.TNscope.TN.vcf" | |||
File TNscope_vcf_index = "${sample}.TNscope.TN.vcf.idx" | |||
} | |||
} |
@@ -0,0 +1,44 @@ | |||
task TNseq { | |||
File ref_dir | |||
File dbsnp_dir | |||
String sample | |||
String SENTIEON_INSTALL_DIR | |||
String SENTIEON_LICENSE | |||
String tumor_name | |||
String normal_name | |||
String docker | |||
String cluster_config | |||
String fasta | |||
File corealigner_bam | |||
File corealigner_bam_index | |||
String dbsnp | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||
nt=$(nproc) | |||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${corealigner_bam} --algo TNhaplotyper --tumor_sample ${tumor_name} --normal_sample ${normal_name} --dbsnp ${dbsnp_dir}/${dbsnp} ${sample}.TNseq.TN.vcf | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File TNseq_vcf= "${sample}.TNseq.TN.vcf" | |||
File TNseq_vcf_index = "${sample}.TNseq.TN.vcf.idx" | |||
} | |||
} | |||
@@ -0,0 +1,45 @@ | |||
task corealigner { | |||
File ref_dir | |||
File dbsnp_dir | |||
File dbmills_dir | |||
String sample | |||
String SENTIEON_INSTALL_DIR | |||
String SENTIEON_LICENSE | |||
String docker | |||
String cluster_config | |||
String fasta | |||
String dbsnp | |||
String db_mills | |||
File tumor_recaled_bam | |||
File tumor_recaled_bam_index | |||
File normal_recaled_bam | |||
File normal_recaled_bam_index | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||
nt=$(nproc) | |||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${tumor_recaled_bam} -i ${normal_recaled_bam} --algo Realigner -k ${dbmills_dir}/${db_mills} -k ${dbsnp_dir}/${dbsnp} ${sample}_corealigned.bam | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File corealigner_bam = "${sample}_corealigned.bam" | |||
File corealigner_bam_index = "${sample}_corealigned.bam.bai" | |||
} | |||
} | |||
@@ -0,0 +1,46 @@ | |||
task deduped_Metrics { | |||
File ref_dir | |||
String SENTIEON_INSTALL_DIR | |||
String SENTIEON_LICENSE | |||
String sample | |||
String fasta | |||
File Dedup_bam | |||
File Dedup_bam_index | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||
nt=$(nproc) | |||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${Dedup_bam} --algo CoverageMetrics --omit_base_output ${sample}_deduped_coverage_metrics --algo MeanQualityByCycle ${sample}_deduped_mq_metrics.txt --algo QualDistribution ${sample}_deduped_qd_metrics.txt --algo GCBias --summary ${sample}_deduped_gc_summary.txt ${sample}_deduped_gc_metrics.txt --algo AlignmentStat ${sample}_deduped_aln_metrics.txt --algo InsertSizeMetricAlgo ${sample}_deduped_is_metrics.txt --algo QualityYield ${sample}_deduped_QualityYield.txt --algo WgsMetricsAlgo ${sample}_deduped_WgsMetricsAlgo.txt | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File deduped_coverage_metrics_sample_summary = "${sample}_deduped_coverage_metrics.sample_summary" | |||
File deduped_coverage_metrics_sample_statistics = "${sample}_deduped_coverage_metrics.sample_statistics" | |||
File deduped_coverage_metrics_sample_interval_statistics = "${sample}_deduped_coverage_metrics.sample_interval_statistics" | |||
File deduped_coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_proportions" | |||
File deduped_coverage_metrics_sample_cumulative_coverage_counts = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_counts" | |||
File deduped_mean_quality = "${sample}_deduped_mq_metrics.txt" | |||
File deduped_qd_metrics = "${sample}_deduped_qd_metrics.txt" | |||
File deduped_gc_summary = "${sample}_deduped_gc_summary.txt" | |||
File deduped_gc_metrics = "${sample}_deduped_gc_metrics.txt" | |||
File dedeuped_aln_metrics = "${sample}_deduped_aln_metrics.txt" | |||
File deduped_is_metrics = "${sample}_deduped_is_metrics.txt" | |||
File deduped_QualityYield = "${sample}_deduped_QualityYield.txt" | |||
File deduped_wgsmetrics = "${sample}_deduped_WgsMetricsAlgo.txt" | |||
} | |||
} |
@@ -0,0 +1,37 @@ | |||
task fastqc { | |||
File tumor_read1 | |||
File tumor_read2 | |||
File normal_read1 | |||
File normal_read2 | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
fastqc -t $nt -o ./ ${tumor_read1} | |||
fastqc -t $nt -o ./ ${tumor_read2} | |||
fastqc -t $nt -o ./ ${normal_read1} | |||
fastqc -t $nt -o ./ ${normal_read2} | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File tumor_read1_html = sub(basename(tumor_read1), "\\.(fastq|fq)\\.gz$", "_fastqc.html") | |||
File tumor_read1_zip = sub(basename(tumor_read1), "\\.(fastq|fq)\\.gz$", "_fastqc.zip") | |||
File tumor_read2_html = sub(basename(tumor_read2), "\\.(fastq|fq)\\.gz$", "_fastqc.html") | |||
File tumor_read2_zip = sub(basename(tumor_read2), "\\.(fastq|fq)\\.gz$", "_fastqc.zip") | |||
File normal_read1_html = sub(basename(normal_read1), "\\.(fastq|fq)\\.gz$", "_fastqc.html") | |||
File normal_read1_zip = sub(basename(normal_read1), "\\.(fastq|fq)\\.gz$", "_fastqc.zip") | |||
File normal_read2_html = sub(basename(normal_read2), "\\.(fastq|fq)\\.gz$", "_fastqc.html") | |||
File normal_read2_zip = sub(basename(normal_read2), "\\.(fastq|fq)\\.gz$", "_fastqc.zip") | |||
} | |||
} |
@@ -0,0 +1,48 @@ | |||
task fastqscreen { | |||
File tumor_read1 | |||
File tumor_read2 | |||
File normal_read1 | |||
File normal_read2 | |||
File screen_ref_dir | |||
File fastq_screen_conf | |||
String tumor_read1name = basename(tumor_read1,".fastq.gz") | |||
String tumor_read2name = basename(tumor_read2,".fastq.gz") | |||
String normal_read1name = basename(normal_read1,".fastq.gz") | |||
String normal_read2name = basename(normal_read2,".fastq.gz") | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
mkdir -p /cromwell_root/tmp | |||
cp -r ${screen_ref_dir} /cromwell_root/tmp/ | |||
fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${tumor_read1} | |||
fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${tumor_read2} | |||
fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${normal_read1} | |||
fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${normal_read2} | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File tumor_png1 = "${tumor_read1name}_screen.png" | |||
File tumor_txt1 = "${tumor_read1name}_screen.txt" | |||
File tumor_html1 = "${tumor_read1name}_screen.html" | |||
File tumor_png2 = "${tumor_read2name}_screen.png" | |||
File tumor_txt2 = "${tumor_read2name}_screen.txt" | |||
File tumor_html2 = "${tumor_read2name}_screen.html" | |||
File normal_png1 = "${normal_read1name}_screen.png" | |||
File normal_txt1 = "${normal_read1name}_screen.txt" | |||
File normal_html1 = "${normal_read1name}_screen.html" | |||
File normal_png2 = "${normal_read2name}_screen.png" | |||
File normal_txt2 = "${normal_read2name}_screen.txt" | |||
File normal_html2 = "${normal_read2name}_screen.html" | |||
} | |||
} |
@@ -0,0 +1,36 @@ | |||
task mapping { | |||
String sample | |||
File fastq_1 | |||
File fastq_2 | |||
String SENTIEON_INSTALL_DIR | |||
String SENTIEON_LICENSE | |||
String group | |||
String pl | |||
File ref_dir | |||
String fasta | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||
nt=$(nproc) | |||
${SENTIEON_INSTALL_DIR}/bin/bwa mem -M -R "@RG\tID:${group}\tSM:${sample}\tPL:${pl}" -t $nt -K 10000000 ${ref_dir}/${fasta} ${fastq_1} ${fastq_2} | ${SENTIEON_INSTALL_DIR}/bin/sentieon util sort -o ${sample}.sorted.bam -t $nt --sam2bam -i - | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File sorted_bam = "${sample}.sorted.bam" | |||
File sorted_bam_index = "${sample}.sorted.bam.bai" | |||
} | |||
} |
@@ -0,0 +1,27 @@ | |||
task qualimap { | |||
File bam | |||
File bai | |||
String bamname = basename(bam,".bam") | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
/opt/qualimap/qualimap bamqc -bam ${bam} -outformat PDF:HTML -nt $nt -outdir ${bamname} --java-mem-size=60G | |||
tar -zcvf ${bamname}_qualimap.zip ${bamname} | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster:cluster_config | |||
systemDisk:"cloud_ssd 40" | |||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File zip = "${bamname}_qualimap.zip" | |||
} | |||
} |
@@ -0,0 +1,59 @@ | |||
import "./tasks/TNseq.wdl" as TNseq | |||
import "./tasks/TNscope.wdl" as TNscope | |||
workflow {{ project_name }} { | |||
File corealigner_bam | |||
File corealigner_bam_index | |||
String SENTIEON_INSTALL_DIR | |||
String SENTIEON_LICENSE | |||
String sample | |||
String SENTIEONdocker | |||
String fasta | |||
File ref_dir | |||
File dbmills_dir | |||
String db_mills | |||
File dbsnp_dir | |||
String dbsnp | |||
String disk_size | |||
String cluster_config | |||
call TNseq.TNseq as TNseq { | |||
input: | |||
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR, | |||
SENTIEON_LICENSE=SENTIEON_LICENSE, | |||
fasta=fasta, | |||
ref_dir=ref_dir, | |||
corealigner_bam=corealigner_bam, | |||
corealigner_bam_index=corealigner_bam_index, | |||
dbsnp=dbsnp, | |||
dbsnp_dir=dbsnp_dir, | |||
tumor_name=sample + "tumor", | |||
normal_name=sample + "normal", | |||
docker=SENTIEONdocker, | |||
sample=sample, | |||
disk_size=disk_size, | |||
cluster_config=cluster_config | |||
} | |||
call TNscope.TNscope as TNscope { | |||
input: | |||
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR, | |||
SENTIEON_LICENSE=SENTIEON_LICENSE, | |||
fasta=fasta, | |||
ref_dir=ref_dir, | |||
corealigner_bam=corealigner_bam, | |||
corealigner_bam_index=corealigner_bam_index, | |||
dbsnp=dbsnp, | |||
dbsnp_dir=dbsnp_dir, | |||
tumor_name=sample + "tumor", | |||
normal_name=sample + "normal", | |||
docker=SENTIEONdocker, | |||
sample=sample, | |||
disk_size=disk_size, | |||
cluster_config=cluster_config | |||
} | |||
} |