Browse Source

first commit

master
LUYAO REN 4 years ago
commit
7ccbef6434
16 changed files with 549 additions and 0 deletions
  1. BIN
      assets/1549784133993.png
  2. BIN
      assets/somatic.png
  3. +13
    -0
      defaults
  4. +16
    -0
      inputs
  5. +49
    -0
      tasks/BQSR.wdl
  6. +44
    -0
      tasks/Dedup.wdl
  7. +42
    -0
      tasks/Realigner.wdl
  8. +43
    -0
      tasks/TNscope.wdl
  9. +44
    -0
      tasks/TNseq.wdl
  10. +45
    -0
      tasks/corealigner.wdl
  11. +46
    -0
      tasks/deduped_Metrics.wdl
  12. +37
    -0
      tasks/fastqc.wdl
  13. +48
    -0
      tasks/fastqscreen.wdl
  14. +36
    -0
      tasks/mapping.wdl
  15. +27
    -0
      tasks/qualimap.wdl
  16. +59
    -0
      workflow.wdl

BIN
assets/1549784133993.png View File

Before After
Width: 922  |  Height: 464  |  Size: 55KB

BIN
assets/somatic.png View File

Before After
Width: 1378  |  Height: 1049  |  Size: 97KB

+ 13
- 0
defaults View File

@@ -0,0 +1,13 @@
{
"SENTIEON_INSTALL_DIR": "/opt/sentieon-genomics",
"fasta": "GRCh38.d1.vd1.fa",
"dbsnp_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/",
"disk_size": "800",
"dbmills_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/",
"cluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc",
"SENTIEON_LICENSE": "192.168.0.55:8990",
"SENTIEONdocker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v2019.11.28",
"db_mills": "Mills_and_1000G_gold_standard.indels.hg38.vcf",
"dbsnp": "dbsnp_146.hg38.vcf",
"ref_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/"
}

+ 16
- 0
inputs View File

@@ -0,0 +1,16 @@
{
"{{ project_name }}.SENTIEON_INSTALL_DIR": "{{ SENTIEON_INSTALL_DIR }}",
"{{ project_name }}.fasta": "{{ fasta }}",
"{{ project_name }}.dbsnp_dir": "{{ dbsnp_dir }}",
"{{ project_name }}.disk_size": "{{ disk_size }}",
"{{ project_name }}.corealigner_bam": "{{ corealigner_bam }}",
"{{ project_name }}.dbmills_dir": "{{ dbmills_dir }}",
"{{ project_name }}.cluster_config": "{{ cluster_config }}",
"{{ project_name }}.SENTIEON_LICENSE": "{{ SENTIEON_LICENSE }}",
"{{ project_name }}.corealigner_bam_index": "{{ corealigner_bam_index }}",
"{{ project_name }}.SENTIEONdocker": "{{ SENTIEONdocker }}",
"{{ project_name }}.db_mills": "{{ db_mills }}",
"{{ project_name }}.sample": "{{ sample }}",
"{{ project_name }}.dbsnp": "{{ dbsnp }}",
"{{ project_name }}.ref_dir": "{{ ref_dir }}"
}

+ 49
- 0
tasks/BQSR.wdl View File

@@ -0,0 +1,49 @@
task BQSR {
File ref_dir
File dbsnp_dir
File dbmills_dir
String sample
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String fasta
String dbsnp
String db_mills
File realigned_bam
File realigned_bam_index
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)

${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${realigned_bam} --algo QualCal -k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} ${sample}_recal_data.table

${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${realigned_bam} -q ${sample}_recal_data.table --algo QualCal -k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} ${sample}_recal_data.table.post --algo ReadWriter ${sample}.sorted.deduped.realigned.recaled.bam

${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt --algo QualCal --plot --before ${sample}_recal_data.table --after ${sample}_recal_data.table.post ${sample}_recal_data.csv

${SENTIEON_INSTALL_DIR}/bin/sentieon plot QualCal -o ${sample}_bqsrreport.pdf ${sample}_recal_data.csv

>>>
runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File recal_table = "${sample}_recal_data.table"
File recal_post = "${sample}_recal_data.table.post"
File recaled_bam = "${sample}.sorted.deduped.realigned.recaled.bam"
File recaled_bam_index = "${sample}.sorted.deduped.realigned.recaled.bam.bai"
File recal_csv = "${sample}_recal_data.csv"
File bqsrreport_pdf = "${sample}_bqsrreport.pdf"
}
}

+ 44
- 0
tasks/Dedup.wdl View File

@@ -0,0 +1,44 @@
task Dedup {

String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String sample
File sorted_bam
File sorted_bam_index
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -i ${sorted_bam} --algo LocusCollector --fun score_info ${sample}_score.txt
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -i ${sorted_bam} --algo Dedup --rmdup --score_info ${sample}_score.txt --metrics ${sample}_dedup_metrics.txt ${sample}.sorted.deduped.bam
sed -n '3p' ${sample}_dedup_metrics.txt | awk -F'\t' '{print "'"${sample}"'""\t"$9*100}' > ${sample}_picard_duplication.txt
# ${sample}_marked_dup_metrics.txt can be recognized as the picard output
sed '1i\#DuplicationMetrics' ${sample}_dedup_metrics.txt > ${sample}_marked_dup_metrics.txt
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File score = "${sample}_score.txt"
File dedup_metrics = "${sample}_marked_dup_metrics.txt"
File duplication = "${sample}_picard_duplication.txt"
File Dedup_bam = "${sample}.sorted.deduped.bam"
File Dedup_bam_index = "${sample}.sorted.deduped.bam.bai"
}
}






+ 42
- 0
tasks/Realigner.wdl View File

@@ -0,0 +1,42 @@
task Realigner {

File ref_dir
File dbmills_dir

String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String sample
String fasta

File Dedup_bam
File Dedup_bam_index
String db_mills
String docker
String cluster_config
String disk_size


command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${Dedup_bam} --algo Realigner -k ${dbmills_dir}/${db_mills} ${sample}.sorted.deduped.realigned.bam
>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File realigner_bam = "${sample}.sorted.deduped.realigned.bam"
File realigner_bam_index = "${sample}.sorted.deduped.realigned.bam.bai"

}
}



+ 43
- 0
tasks/TNscope.wdl View File

@@ -0,0 +1,43 @@
task TNscope {

File ref_dir
File dbsnp_dir
String sample

String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String tumor_name
String normal_name
String docker
String cluster_config

String fasta
File corealigner_bam
File corealigner_bam_index
String dbsnp
String disk_size

command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${corealigner_bam} --algo TNscope --tumor_sample ${tumor_name} --normal_sample ${normal_name} --dbsnp ${dbsnp_dir}/${dbsnp} ${sample}.TNscope.TN.vcf
>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File TNscope_vcf= "${sample}.TNscope.TN.vcf"
File TNscope_vcf_index = "${sample}.TNscope.TN.vcf.idx"
}

}

+ 44
- 0
tasks/TNseq.wdl View File

@@ -0,0 +1,44 @@
task TNseq {
File ref_dir
File dbsnp_dir
String sample
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String tumor_name
String normal_name
String docker
String cluster_config

String fasta
File corealigner_bam
File corealigner_bam_index
String dbsnp
String disk_size

command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${corealigner_bam} --algo TNhaplotyper --tumor_sample ${tumor_name} --normal_sample ${normal_name} --dbsnp ${dbsnp_dir}/${dbsnp} ${sample}.TNseq.TN.vcf
>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File TNseq_vcf= "${sample}.TNseq.TN.vcf"
File TNseq_vcf_index = "${sample}.TNseq.TN.vcf.idx"
}

}



+ 45
- 0
tasks/corealigner.wdl View File

@@ -0,0 +1,45 @@
task corealigner {
File ref_dir
File dbsnp_dir
File dbmills_dir

String sample
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String docker
String cluster_config
String fasta

String dbsnp
String db_mills
File tumor_recaled_bam
File tumor_recaled_bam_index
File normal_recaled_bam
File normal_recaled_bam_index
String disk_size


command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${tumor_recaled_bam} -i ${normal_recaled_bam} --algo Realigner -k ${dbmills_dir}/${db_mills} -k ${dbsnp_dir}/${dbsnp} ${sample}_corealigned.bam
>>>
runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File corealigner_bam = "${sample}_corealigned.bam"
File corealigner_bam_index = "${sample}_corealigned.bam.bai"
}
}




+ 46
- 0
tasks/deduped_Metrics.wdl View File

@@ -0,0 +1,46 @@
task deduped_Metrics {

File ref_dir
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String sample
String fasta
File Dedup_bam
File Dedup_bam_index
String docker
String cluster_config
String disk_size


command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${Dedup_bam} --algo CoverageMetrics --omit_base_output ${sample}_deduped_coverage_metrics --algo MeanQualityByCycle ${sample}_deduped_mq_metrics.txt --algo QualDistribution ${sample}_deduped_qd_metrics.txt --algo GCBias --summary ${sample}_deduped_gc_summary.txt ${sample}_deduped_gc_metrics.txt --algo AlignmentStat ${sample}_deduped_aln_metrics.txt --algo InsertSizeMetricAlgo ${sample}_deduped_is_metrics.txt --algo QualityYield ${sample}_deduped_QualityYield.txt --algo WgsMetricsAlgo ${sample}_deduped_WgsMetricsAlgo.txt
>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File deduped_coverage_metrics_sample_summary = "${sample}_deduped_coverage_metrics.sample_summary"
File deduped_coverage_metrics_sample_statistics = "${sample}_deduped_coverage_metrics.sample_statistics"
File deduped_coverage_metrics_sample_interval_statistics = "${sample}_deduped_coverage_metrics.sample_interval_statistics"
File deduped_coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_proportions"
File deduped_coverage_metrics_sample_cumulative_coverage_counts = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_counts"
File deduped_mean_quality = "${sample}_deduped_mq_metrics.txt"
File deduped_qd_metrics = "${sample}_deduped_qd_metrics.txt"
File deduped_gc_summary = "${sample}_deduped_gc_summary.txt"
File deduped_gc_metrics = "${sample}_deduped_gc_metrics.txt"
File dedeuped_aln_metrics = "${sample}_deduped_aln_metrics.txt"
File deduped_is_metrics = "${sample}_deduped_is_metrics.txt"
File deduped_QualityYield = "${sample}_deduped_QualityYield.txt"
File deduped_wgsmetrics = "${sample}_deduped_WgsMetricsAlgo.txt"
}
}

+ 37
- 0
tasks/fastqc.wdl View File

@@ -0,0 +1,37 @@
task fastqc {
File tumor_read1
File tumor_read2
File normal_read1
File normal_read2
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
fastqc -t $nt -o ./ ${tumor_read1}
fastqc -t $nt -o ./ ${tumor_read2}
fastqc -t $nt -o ./ ${normal_read1}
fastqc -t $nt -o ./ ${normal_read2}
>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File tumor_read1_html = sub(basename(tumor_read1), "\\.(fastq|fq)\\.gz$", "_fastqc.html")
File tumor_read1_zip = sub(basename(tumor_read1), "\\.(fastq|fq)\\.gz$", "_fastqc.zip")
File tumor_read2_html = sub(basename(tumor_read2), "\\.(fastq|fq)\\.gz$", "_fastqc.html")
File tumor_read2_zip = sub(basename(tumor_read2), "\\.(fastq|fq)\\.gz$", "_fastqc.zip")
File normal_read1_html = sub(basename(normal_read1), "\\.(fastq|fq)\\.gz$", "_fastqc.html")
File normal_read1_zip = sub(basename(normal_read1), "\\.(fastq|fq)\\.gz$", "_fastqc.zip")
File normal_read2_html = sub(basename(normal_read2), "\\.(fastq|fq)\\.gz$", "_fastqc.html")
File normal_read2_zip = sub(basename(normal_read2), "\\.(fastq|fq)\\.gz$", "_fastqc.zip")

}
}

+ 48
- 0
tasks/fastqscreen.wdl View File

@@ -0,0 +1,48 @@
task fastqscreen {
File tumor_read1
File tumor_read2
File normal_read1
File normal_read2
File screen_ref_dir
File fastq_screen_conf
String tumor_read1name = basename(tumor_read1,".fastq.gz")
String tumor_read2name = basename(tumor_read2,".fastq.gz")
String normal_read1name = basename(normal_read1,".fastq.gz")
String normal_read2name = basename(normal_read2,".fastq.gz")
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
mkdir -p /cromwell_root/tmp
cp -r ${screen_ref_dir} /cromwell_root/tmp/
fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${tumor_read1}
fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${tumor_read2}
fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${normal_read1}
fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${normal_read2}
>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File tumor_png1 = "${tumor_read1name}_screen.png"
File tumor_txt1 = "${tumor_read1name}_screen.txt"
File tumor_html1 = "${tumor_read1name}_screen.html"
File tumor_png2 = "${tumor_read2name}_screen.png"
File tumor_txt2 = "${tumor_read2name}_screen.txt"
File tumor_html2 = "${tumor_read2name}_screen.html"
File normal_png1 = "${normal_read1name}_screen.png"
File normal_txt1 = "${normal_read1name}_screen.txt"
File normal_html1 = "${normal_read1name}_screen.html"
File normal_png2 = "${normal_read2name}_screen.png"
File normal_txt2 = "${normal_read2name}_screen.txt"
File normal_html2 = "${normal_read2name}_screen.html"
}
}

+ 36
- 0
tasks/mapping.wdl View File

@@ -0,0 +1,36 @@
task mapping {
String sample
File fastq_1
File fastq_2

String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String group
String pl
File ref_dir
String fasta
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
${SENTIEON_INSTALL_DIR}/bin/bwa mem -M -R "@RG\tID:${group}\tSM:${sample}\tPL:${pl}" -t $nt -K 10000000 ${ref_dir}/${fasta} ${fastq_1} ${fastq_2} | ${SENTIEON_INSTALL_DIR}/bin/sentieon util sort -o ${sample}.sorted.bam -t $nt --sam2bam -i -
>>>

runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File sorted_bam = "${sample}.sorted.bam"
File sorted_bam_index = "${sample}.sorted.bam.bai"
}
}

+ 27
- 0
tasks/qualimap.wdl View File

@@ -0,0 +1,27 @@
task qualimap {
File bam
File bai
String bamname = basename(bam,".bam")
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
/opt/qualimap/qualimap bamqc -bam ${bam} -outformat PDF:HTML -nt $nt -outdir ${bamname} --java-mem-size=60G
tar -zcvf ${bamname}_qualimap.zip ${bamname}
>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File zip = "${bamname}_qualimap.zip"
}
}

+ 59
- 0
workflow.wdl View File

@@ -0,0 +1,59 @@
import "./tasks/TNseq.wdl" as TNseq
import "./tasks/TNscope.wdl" as TNscope


workflow {{ project_name }} {

File corealigner_bam
File corealigner_bam_index

String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String sample
String SENTIEONdocker

String fasta
File ref_dir
File dbmills_dir
String db_mills
File dbsnp_dir
String dbsnp
String disk_size
String cluster_config

call TNseq.TNseq as TNseq {
input:
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR,
SENTIEON_LICENSE=SENTIEON_LICENSE,
fasta=fasta,
ref_dir=ref_dir,
corealigner_bam=corealigner_bam,
corealigner_bam_index=corealigner_bam_index,
dbsnp=dbsnp,
dbsnp_dir=dbsnp_dir,
tumor_name=sample + "tumor",
normal_name=sample + "normal",
docker=SENTIEONdocker,
sample=sample,
disk_size=disk_size,
cluster_config=cluster_config
}

call TNscope.TNscope as TNscope {
input:
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR,
SENTIEON_LICENSE=SENTIEON_LICENSE,
fasta=fasta,
ref_dir=ref_dir,
corealigner_bam=corealigner_bam,
corealigner_bam_index=corealigner_bam_index,
dbsnp=dbsnp,
dbsnp_dir=dbsnp_dir,
tumor_name=sample + "tumor",
normal_name=sample + "normal",
docker=SENTIEONdocker,
sample=sample,
disk_size=disk_size,
cluster_config=cluster_config
}
}

Loading…
Cancel
Save