Ver código fonte

first commit

master
YaqingLiu 5 anos atrás
commit
e8b7c05485
20 arquivos alterados com 813 adições e 0 exclusões
  1. BIN
      .DS_Store
  2. +0
    -0
      LICENSE.md
  3. +8
    -0
      README.md
  4. +23
    -0
      defaults
  5. +25
    -0
      inputs
  6. +52
    -0
      tasks/BQSR.wdl
  7. +43
    -0
      tasks/Dedup.wdl
  8. +39
    -0
      tasks/Haplotyper.wdl
  9. +57
    -0
      tasks/Metrics.wdl
  10. +46
    -0
      tasks/PON.wdl
  11. +42
    -0
      tasks/Realigner.wdl
  12. +57
    -0
      tasks/TNscope.wdl
  13. +57
    -0
      tasks/TNseq.wdl
  14. +35
    -0
      tasks/annovar.wdl
  15. +40
    -0
      tasks/bcftools.wdl
  16. +46
    -0
      tasks/corealigner.wdl
  17. +37
    -0
      tasks/deduped_Metrics.wdl
  18. +35
    -0
      tasks/mapping.wdl
  19. +26
    -0
      tasks/vcf2maf.wdl
  20. +145
    -0
      workflow.wdl

BIN
.DS_Store Ver arquivo


+ 0
- 0
LICENSE.md Ver arquivo


+ 8
- 0
README.md Ver arquivo

@@ -0,0 +1,8 @@
#### Usage
$ open-choppy-env
$ choppy install ...

#### inputSamplesFile
sample_id tumor_fastq_1 tumor_fastq_2 normal_fastq_1 normal_fastq_1

inputSamplesFile is in the form of tsv, and should be put into Aliyun OSS.

+ 23
- 0
defaults Ver arquivo

@@ -0,0 +1,23 @@
{
"fasta": "hg19_nochr.fa",
"ref_dir": "oss://pgx-reference-data/hg19/",
"dbsnp": "dbsnp_138.hg19_nochr_sorted.vcf",
"dbsnp_dir": "oss://pgx-reference-data/hg19/",
"SENTIEON_INSTALL_DIR": "/opt/sentieon-genomics",
"SENTIEON_LICENSE": "192.168.0.55:8990",
"dbmills_dir": "oss://pgx-reference-data/hg19/",
"db_mills": "Mills_and_1000G_gold_standard.indels.hg19_nochr.vcf",
"sentieon_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v2017.11.04",
"annovar_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/annovar:v2018.04",
"vcf2maf_r_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/r-base:4.0.2",
"bcftools_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9",
"database": "oss://pgx-reference-data/annovar/",
"regions": "oss://pgx-reference-data/reference/wes_bedfiles/agilent_v6/agilent_v6.bed",
"PONmode": true,
"cosmic_vcf": "CosmicCodingMuts.hg19.v91.vcf",
"cosmic_dir": "oss://pgx-reference-data/reference/cosmic/",
"disk_size": "200",
"cluster_config": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc",
"set_annovar": true,
"set_vcf2maf": true
}

+ 25
- 0
inputs Ver arquivo

@@ -0,0 +1,25 @@
{
"{{ project_name }}.fasta": "{{ fasta }}",
"{{ project_name }}.ref_dir": "{{ ref_dir }}",
"{{ project_name }}.dbsnp": "{{ dbsnp }}",
"{{ project_name }}.dbsnp_dir": "{{ dbsnp_dir }}",
"{{ project_name }}.SENTIEON_INSTALL_DIR": "{{ SENTIEON_INSTALL_DIR }}",
"{{ project_name }}.SENTIEON_LICENSE": "{{ SENTIEON_LICENSE }}",
"{{ project_name }}.dbmills_dir": "{{ dbmills_dir }}",
"{{ project_name }}.db_mills": "{{ db_mills }}",
"{{ project_name }}.sentieon_docker": "{{ sentieon_docker }}",
"{{ project_name }}.annovar_docker": "{{ annovar_docker }}",
"{{ project_name }}.vcf2maf_r_docker": "{{ vcf2maf_r_docker }}",
"{{ project_name }}.bcftools_docker": "{{ bcftools_docker }}",
"{{ project_name }}.database": "{{ database }}",
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}",
"{{ project_name }}.inputponfile": "{{ inputponfile }}",
"{{ project_name }}.regions": "{{ regions }}",
"{{ project_name }}.PONmode": "{{ PONmode }}",
"{{ project_name }}.cosmic_vcf": "{{ cosmic_vcf }}",
"{{ project_name }}.cosmic_dir": "{{ cosmic_dir }}",
"{{ project_name }}.disk_size": "{{ disk_size }}",
"{{ project_name }}.cluster_config": "{{ cluster_config }}",
"{{ project_name }}.set_annovar": "{{ set_annovar }}",
"{{ project_name }}.set_vcf2maf": "{{ set_vcf2maf }}"
}

+ 52
- 0
tasks/BQSR.wdl Ver arquivo

@@ -0,0 +1,52 @@
task BQSR {
File ref_dir
File dbsnp_dir
File dbmills_dir
String sample
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String fasta
File regions
String dbsnp
String db_mills
File realigned_bam1
File realigned_bam_index1
String docker
String cluster_config
String disk_size
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${realigned_bam1} --interval ${regions} --algo QualCal -k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} ${sample}_recal_data.table
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${realigned_bam1} -q ${sample}_recal_data.table --algo QualCal -k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} ${sample}_recal_data.table.post --algo ReadWriter ${sample}.sorted.deduped.realigned.recaled.bam
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt --algo QualCal --plot --before ${sample}_recal_data.table --after ${sample}_recal_data.table.post ${sample}_recal_data.csv
${SENTIEON_INSTALL_DIR}/bin/sentieon plot QualCal -o ${sample}_bqsrreport.pdf ${sample}_recal_data.csv
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File recal_table = "${sample}_recal_data.table"
File recal_post = "${sample}_recal_data.table.post"
File recaled_bam = "${sample}.sorted.deduped.realigned.recaled.bam"
File recaled_bam_index = "${sample}.sorted.deduped.realigned.recaled.bam.bai"
File recal_csv = "${sample}_recal_data.csv"
File bqsrreport_pdf = "${sample}_bqsrreport.pdf"
}
}

+ 43
- 0
tasks/Dedup.wdl Ver arquivo

@@ -0,0 +1,43 @@
task Dedup {
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String sample
File sorted_bam
File sorted_bam_index
String docker
String cluster_config
String disk_size
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -i ${sorted_bam} --algo LocusCollector --fun score_info ${sample}_score.txt
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -i ${sorted_bam} --algo Dedup --rmdup --score_info ${sample}_score.txt --metrics ${sample}_dedup_metrics.txt ${sample}.sorted.deduped.bam
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File score = "${sample}_score.txt"
File dedup_metrics = "${sample}_dedup_metrics.txt"
File Dedup_bam = "${sample}.sorted.deduped.bam"
File Dedup_bam_index = "${sample}.sorted.deduped.bam.bai"
}
}

+ 39
- 0
tasks/Haplotyper.wdl Ver arquivo

@@ -0,0 +1,39 @@
task Haplotyper {
File ref_dir
File dbsnp_dir
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String fasta
File recaled_bam
File recaled_bam_index
File regions
String dbsnp
String sample
String docker
String cluster_config
String disk_size
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
${SENTIEON_INSTALL_DIR}/bin/sentieon driver --interval ${regions} -r ${ref_dir}/${fasta} -t $nt -i ${recaled_bam} --algo Haplotyper -d ${dbsnp_dir}/${dbsnp} ${sample}_hc.vcf
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File vcf = "${sample}_hc.vcf"
File vcf_idx = "${sample}_hc.vcf.idx"
}
}

+ 57
- 0
tasks/Metrics.wdl Ver arquivo

@@ -0,0 +1,57 @@
task Metrics {
File ref_dir
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String sample
String docker
String cluster_config
String fasta
File sorted_bam
File sorted_bam_index
String disk_size
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${sorted_bam} --algo MeanQualityByCycle ${sample}_mq_metrics.txt --algo QualDistribution ${sample}_qd_metrics.txt --algo GCBias --summary ${sample}_gc_summary.txt ${sample}_gc_metrics.txt --algo AlignmentStat ${sample}_aln_metrics.txt --algo InsertSizeMetricAlgo ${sample}_is_metrics.txt --algo CoverageMetrics --omit_base_output ${sample}_coverage_metrics
${SENTIEON_INSTALL_DIR}/bin/sentieon plot metrics -o ${sample}_metrics_report.pdf gc=${sample}_gc_metrics.txt qd=${sample}_qd_metrics.txt mq=${sample}_mq_metrics.txt isize=${sample}_is_metrics.txt
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File qd_metrics = "${sample}_qd_metrics.txt"
File qd_metrics_pdf = "${sample}_qd_metrics.pdf"
File mq_metrics = "${sample}_mq_metrics.txt"
File mq_metrics_pdf = "${sample}_mq_metrics.pdf"
File is_metrics = "${sample}_is_metrics.txt"
File is_metrics_pdf = "${sample}_is_metrics.pdf"
File gc_summary = "${sample}_gc_summary.txt"
File gc_metrics = "${sample}_gc_metrics.txt"
File gc_metrics_pdf = "${sample}_gc_metrics.pdf"
File aln_metrics = "${sample}_aln_metrics.txt"
File coverage_metrics_sample_summary = "${sample}_coverage_metrics.sample_summary"
File coverage_metrics_sample_statistics = "${sample}_coverage_metrics.sample_statistics"
File coverage_metrics_sample_interval_statistics = "${sample}_coverage_metrics.sample_interval_statistics"
File coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_coverage_metrics.sample_cumulative_coverage_proportions"
File coverage_metrics_sample_cumulative_coverage_counts = "${sample}_coverage_metrics.sample_cumulative_coverage_counts"
}
}

+ 46
- 0
tasks/PON.wdl Ver arquivo

@@ -0,0 +1,46 @@
task PON {

String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String sample
Boolean PONmode

File ref_dir
String fasta
File cosmic_dir
String cosmic_vcf
File dbsnp_dir
String dbsnp

File normal_recaled_bam
File normal_recaled_bam_index
String docker
String cluster_config
String disk_size
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
if [ ${PONmode} ]; then
mkdir -p /cromwell_root/tmp/cosmic/
cp ${cosmic_dir}/${cosmic_vcf} /cromwell_root/tmp/cosmic/
${SENTIEON_INSTALL_DIR}/bin/sentieon util vcfindex /cromwell_root/tmp/cosmic/${cosmic_vcf}
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -r ${ref_dir}/${fasta} -i ${normal_recaled_bam} --algo TNhaplotyper --detect_pon --cosmic /cromwell_root/tmp/cosmic/${cosmic_vcf} --dbsnp ${dbsnp_dir}/${dbsnp} ${sample}_pon.vcf
else
touch ${sample}_pon.vcf
fi
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File pon_vcf = "${sample}_pon.vcf"
}
}

+ 42
- 0
tasks/Realigner.wdl Ver arquivo

@@ -0,0 +1,42 @@
task Realigner {
File ref_dir
File dbmills_dir
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String sample
String fasta
File regions
File Dedup_bam
File Dedup_bam_index
String db_mills
String docker
String cluster_config
String disk_size
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${Dedup_bam} --algo Realigner -k ${dbmills_dir}/${db_mills} --interval_list ${regions} ${sample}.sorted.deduped.realigned.bam
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File realigner_bam = "${sample}.sorted.deduped.realigned.bam"
File realigner_bam_index = "${sample}.sorted.deduped.realigned.bam.bai"
}
}

+ 57
- 0
tasks/TNscope.wdl Ver arquivo

@@ -0,0 +1,57 @@
task TNscope {
File ref_dir
File dbsnp_dir
String sample
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String tumor_name
String normal_name
String docker
String cluster_config
String fasta
File corealigner_bam
File corealigner_bam_index
String dbsnp
String disk_size
Boolean PONmode
String? cosmic_vcf
File? cosmic_dir
File panel_of_normal_vcf
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
if [ ${PONmode} ]; then
mkdir -p /cromwell_root/tmp/cosmic/
cp ${cosmic_dir}/${cosmic_vcf} /cromwell_root/tmp/cosmic/
${SENTIEON_INSTALL_DIR}/bin/sentieon util vcfindex /cromwell_root/tmp/cosmic/${cosmic_vcf}
PONcommand="--pon ${panel_of_normal_vcf} --cosmic /cromwell_root/tmp/cosmic/${cosmic_vcf}"
else
PONcommand=""
fi
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -r ${ref_dir}/${fasta} -i ${corealigner_bam} --algo TNscope --tumor_sample ${tumor_name} --normal_sample ${normal_name} --dbsnp ${dbsnp_dir}/${dbsnp} PONcommand ${sample}.TNscope.TN.vcf
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File TNscope_vcf= "${sample}.TNscope.TN.vcf"
File TNscope_vcf_index = "${sample}.TNscope.TN.vcf.idx"
}
}

+ 57
- 0
tasks/TNseq.wdl Ver arquivo

@@ -0,0 +1,57 @@
task TNseq {
File ref_dir
File dbsnp_dir
Array[String] sample
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String tumor_name
String normal_name
String docker
String cluster_config
String fasta
Array[File] corealigner_bam
Array[File] corealigner_bam_index
String dbsnp
String disk_size
Boolean PONmode
String? cosmic_vcf
File? cosmic_dir
File panel_of_normal_vcf
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
if [ ${PONmode} ]; then
mkdir -p /cromwell_root/tmp/cosmic/
cp ${cosmic_dir}/${cosmic_vcf} /cromwell_root/tmp/cosmic/
${SENTIEON_INSTALL_DIR}/bin/sentieon util vcfindex /cromwell_root/tmp/cosmic/${cosmic_vcf}
PONcommand="--pon ${panel_of_normal_vcf} --cosmic /cromwell_root/tmp/cosmic/${cosmic_vcf}"
else
PONcommand=""
fi
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -r ${ref_dir}/${fasta} -i ${sep=" -i " corealigner_bam} --algo TNhaplotyper --tumor_sample ${tumor_name} --normal_sample ${normal_name} --dbsnp ${dbsnp_dir}/${dbsnp} PONcommand ${sep=" " sample}.TNseq.TN.vcf
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File TNseq_vcf= "${sep=" " sample}.TNseq.TN.vcf"
File TNseq_vcf_index = "${sep=" " sample}.TNseq.TN.vcf.idx"
}
}

+ 35
- 0
tasks/annovar.wdl Ver arquivo

@@ -0,0 +1,35 @@
task annovar {

File tnscope_vcf_file
File tnseq_vcf_file
File database
String sample
String docker
String cluster_config
String disk_size


command <<<
set -o pipefail
set -e
nt=$(nproc)
/installations/annovar/table_annovar.pl ${tnscope_vcf_file} ${database} -buildver hg19 -out ${sample}_tnscope -remove -protocol refGene,cytoBand,genomicSuperDups,snp138,ljb26_all,cosmic78,clinvar_20170130,popfreq_all_20150413,intervar_20170202 -operation g,r,r,f,f,f,f,f,f -nastring . -vcfinput -thread $nt
/installations/annovar/table_annovar.pl ${tnseq_vcf_file} ${database} -buildver hg19 -out ${sample}_tnseq -remove -protocol refGene,cytoBand,genomicSuperDups,snp138,ljb26_all,cosmic78,clinvar_20170130,popfreq_all_20150413,intervar_20170202 -operation g,r,r,f,f,f,f,f,f -nastring . -vcfinput -thread $nt
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File avinput_tnscope = "${sample}_tnscope.avinput"
File multianno_tnscope_txt = "${sample}_tnscope.hg19_multianno.txt"
File multianno_tnscope_vcf = "${sample}_tnscope.hg19_multianno.vcf"
File avinput_tnseq = "${sample}_tnseq.avinput"
File multianno_tnseq_txt = "${sample}_tnseq.hg19_multianno.txt"
File multianno_tnseq_vcf = "${sample}_tnseq.hg19_multianno.vcf"
}
}

+ 40
- 0
tasks/bcftools.wdl Ver arquivo

@@ -0,0 +1,40 @@
task bcftools {

Array[File] pon_vcf
String docker
String cluster_config
String disk_size
Boolean PONmode


command <<<
set -o pipefail
set -e
if [ ${PONmode} ]; then
mkdir -p /cromwell_root/tmp/bcftools/
for i in ${sep=" " pon_vcf}
do
bcftools view $i -Oz -o /cromwell_root/tmp/bcftools/$i.gz
done
bcftools merge -m any -f PASS,. --force-samples /cromwell_root/tmp/bcftools/*.vcf.gz |\
bcftools plugin fill-AN-AC |\
bcftools filter -i 'SUM(AC)>1' > panel_of_normal.vcf
else
touch panel_of_normal.vcf
fi

>>>

runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File panel_of_normal_vcf = "panel_of_normal.vcf"
}
}

+ 46
- 0
tasks/corealigner.wdl Ver arquivo

@@ -0,0 +1,46 @@
task corealigner {
File ref_dir
File dbsnp_dir
File dbmills_dir
Array[String] sample
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String docker
String cluster_config
String fasta
String dbsnp
String db_mills
Array[File] tumor_recaled_bam
Array[File] tumor_recaled_bam_index
Array[File] normal_recaled_bam
Array[File] normal_recaled_bam_index
String disk_size
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
#${sep=" " tumor_recaled_bam}
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${sep=" -i " tumor_recaled_bam} -i ${sep=" -i " normal_recaled_bam} --algo Realigner -k ${dbmills_dir}/${db_mills} -k ${dbsnp_dir}/${dbsnp} ${sep=" " sample}_corealigned.bam
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File corealigner_bam = "${sep=" " sample}_corealigned.bam"
File corealigner_bam_index = "${sep=" " sample}_corealigned.bam.bai"
}
}

+ 37
- 0
tasks/deduped_Metrics.wdl Ver arquivo

@@ -0,0 +1,37 @@
task deduped_Metrics {
File ref_dir
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String sample
String fasta
File Dedup_bam
File Dedup_bam_index
String docker
String cluster_config
String disk_size
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${Dedup_bam} --algo CoverageMetrics --omit_base_output ${sample}_deduped_coverage_metrics --algo MeanQualityByCycle ${sample}_deduped_mq_metrics.txt --algo QualDistribution ${sample}_deduped_qd_metrics.txt --algo GCBias --summary ${sample}_deduped_gc_summary.txt ${sample}_deduped_gc_metrics.txt --algo AlignmentStat ${sample}_deduped_aln_metrics.txt --algo InsertSizeMetricAlgo ${sample}_deduped_is_metrics.txt
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File deduped_coverage_metrics_sample_summary = "${sample}_deduped_coverage_metrics.sample_summary"
File deduped_coverage_metrics_sample_statistics = "${sample}_deduped_coverage_metrics.sample_statistics"
File deduped_coverage_metrics_sample_interval_statistics = "${sample}_deduped_coverage_metrics.sample_interval_statistics"
File deduped_coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_proportions"
File deduped_coverage_metrics_sample_cumulative_coverage_counts = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_counts"
}
}

+ 35
- 0
tasks/mapping.wdl Ver arquivo

@@ -0,0 +1,35 @@
task mapping {
File ref_dir
String fasta
File fastq_1
File fastq_2
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String group
String sample
String pl
String docker
String cluster_config
String disk_size
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
${SENTIEON_INSTALL_DIR}/bin/bwa mem -M -R "@RG\tID:${group}\tSM:${sample}\tPL:${pl}" -t $nt -K 10000000 ${ref_dir}/${fasta} ${fastq_1} ${fastq_2} | ${SENTIEON_INSTALL_DIR}/bin/sentieon util sort -o ${sample}.sorted.bam -t $nt --sam2bam -i -
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File sorted_bam = "${sample}.sorted.bam"
File sorted_bam_index = "${sample}.sorted.bam.bai"
}
}

+ 26
- 0
tasks/vcf2maf.wdl Ver arquivo

@@ -0,0 +1,26 @@
task vcf2maf {

File? multianno_tnscope_txt
File? multianno_tnseq_txt
String sample
String docker
String cluster_config
String disk_size

command <<<
vcf2maf ${multianno_tnscope_txt} ${sample}_tnscope.MAF
vcf2maf ${multianno_tnseq_txt} ${sample}_tnseq.MAF
>>>

runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File tnscope_maf = "${sample}_tnscope.MAF"
File tnseq_maf = "${sample}_tnseq.MAF"
}
}

+ 145
- 0
workflow.wdl Ver arquivo

@@ -0,0 +1,145 @@

import "./tasks/corealigner.wdl" as corealigner
import "./tasks/bcftools.wdl" as bcftools
import "./tasks/TNseq.wdl" as TNseq
import "./tasks/TNscope.wdl" as TNscope
import "./tasks/annovar.wdl" as annovar
import "./tasks/vcf2maf.wdl" as vcf2maf


workflow {{ project_name }} {

File inputSamplesFile
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile)
#Array[String] sample

String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String sentieon_docker
String annovar_docker
String vcf2maf_r_docker
String bcftools_docker
File ref_dir
String fasta
File dbmills_dir
String db_mills
File dbsnp_dir
String dbsnp
File regions
File database
String disk_size
String cluster_config

Boolean PONmode
File? cosmic_dir
String? cosmic_vcf

Boolean set_annovar
Boolean set_vcf2maf


File inputponfile
Array[Array[File]] bcf = read_tsv(inputponfile)

call bcftools.bcftools as bcftools {
input:
PONmode=PONmode,
pon_vcf=bcf,
docker=bcftools_docker,
disk_size=disk_size,
cluster_config=cluster_config
}
scatter (sample in inputSamples) {
call corealigner.corealigner as corealigner {
input:
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR,
SENTIEON_LICENSE=SENTIEON_LICENSE,
fasta=fasta,
ref_dir=ref_dir,
sample=sample[0],
docker=sentieon_docker,
db_mills=db_mills,
dbmills_dir=dbmills_dir,
dbsnp=dbsnp,
dbsnp_dir=dbsnp_dir,
tumor_recaled_bam=sample[1],
tumor_recaled_bam_index=sample[2],
normal_recaled_bam=sample[3],
normal_recaled_bam_index=sample[4],
disk_size=disk_size,
cluster_config=cluster_config
}

call TNseq.TNseq as TNseq {
input:
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR,
SENTIEON_LICENSE=SENTIEON_LICENSE,
PONmode=PONmode,
fasta=fasta,
ref_dir=ref_dir,
corealigner_bam=corealigner.corealigner_bam,
corealigner_bam_index=corealigner.corealigner_bam_index,
dbsnp=dbsnp,
dbsnp_dir=dbsnp_dir,
tumor_name=sample[0] + "_tumor",
normal_name=sample[0] + "_normal",
cosmic_vcf=cosmic_vcf,
cosmic_dir=cosmic_dir,
panel_of_normal_vcf = bcftools.panel_of_normal_vcf,
docker=sentieon_docker,
sample=sample[0],
disk_size=disk_size,
cluster_config=cluster_config
}
call TNscope.TNscope as TNscope {
input:
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR,
SENTIEON_LICENSE=SENTIEON_LICENSE,
PONmode=PONmode,
fasta=fasta,
ref_dir=ref_dir,
corealigner_bam=corealigner.corealigner_bam,
corealigner_bam_index=corealigner.corealigner_bam_index,
dbsnp=dbsnp,
dbsnp_dir=dbsnp_dir,
tumor_name=sample[0] + "_tumor",
normal_name=sample[0] + "_normal",
cosmic_vcf=cosmic_vcf,
cosmic_dir=cosmic_dir,
panel_of_normal_vcf = bcftools.panel_of_normal_vcf,
docker=sentieon_docker,
sample=sample[0],
disk_size=disk_size,
cluster_config=cluster_config
}

if (set_annovar){
call annovar.annovar as annovar {
input:
docker=annovar_docker,
database=database,
tnscope_vcf_file=TNscope.TNscope_vcf,
tnseq_vcf_file=TNseq.TNseq_vcf,
sample=sample[0],
cluster_config=cluster_config,
disk_size=disk_size
}
}
if (set_vcf2maf){
call vcf2maf.vcf2maf as vcf2maf {
input:
docker=vcf2maf_r_docker,
multianno_tnscope_txt=annovar.multianno_tnscope_txt,
multianno_tnseq_txt=annovar.multianno_tnseq_txt,
sample=sample[0],
cluster_config=cluster_config,
disk_size=disk_size
}
}
}
}

Carregando…
Cancelar
Salvar