Bladeren bron

Update: TNseq.wdl

tags/v0.1.2
YaqingLiu 4 jaren geleden
bovenliggende
commit
24df48e237
13 gewijzigde bestanden met toevoegingen van 416 en 385 verwijderingen
  1. +4
    -1
      README.md
  2. +3
    -2
      defaults
  3. +3
    -2
      inputs
  4. +32
    -34
      tasks/Dedup.wdl
  5. +56
    -39
      tasks/Haplotyper.wdl
  6. +60
    -47
      tasks/Metrics.wdl
  7. +11
    -4
      tasks/Realigner.wdl
  8. +59
    -67
      tasks/TNscope.wdl
  9. +78
    -62
      tasks/TNseq.wdl
  10. +0
    -39
      tasks/bcftools.wdl
  11. +57
    -39
      tasks/deduped_Metrics.wdl
  12. +29
    -29
      tasks/mapping.wdl
  13. +24
    -20
      workflow.wdl

+ 4
- 1
README.md Bestand weergeven

@@ -1,7 +1,10 @@
### Variant Calling
This APP developed for somatic short variant discovery (SNVs + Indels).
This APP developed for germline and somatic short variant discovery (SNVs + Indels).

***Supported callers***
> Germline
* Haplotyper
> Somatic
* TNseq (TNhaplotyper2)
* TNscope
* VarScan

+ 3
- 2
defaults Bestand weergeven

@@ -8,6 +8,7 @@
"dbmills_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/",
"db_mills": "Mills_and_1000G_gold_standard.indels.hg38.vcf",
"germline_resource": "oss://pgx-reference-data/gnomAD/af-only-gnomad.v3.1.1.vcf.gz",
"germline_resource_tbi": "oss://pgx-reference-data/gnomAD/af-only-gnomad.v3.1.1.vcf.gz.tbi",
"sentieon_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v2020.10.07",
"varscan_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/varscan2:v2.4.3",
"annovar_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/annovar:v2018.04",
@@ -15,8 +16,8 @@
"bcftools_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9",
"database": "oss://pgx-reference-data/annovar_hg38/",
"regions": "oss://pgx-reference-data/bed/cbcga/S07604514_Covered.bed",
"set_pon": false,
"pon_vcf": "",
"tnseq_pon": "",
"tnscope_pon": "",
"cosmic_vcf": "CosmicCodingMuts.hg38.v91.vcf",
"cosmic_dir": "oss://pgx-reference-data/reference/cosmic/",
"disk_size": "200",

+ 3
- 2
inputs Bestand weergeven

@@ -13,14 +13,15 @@
"{{ project_name }}.dbmills_dir": "{{ dbmills_dir }}",
"{{ project_name }}.db_mills": "{{ db_mills }}",
"{{ project_name }}.germline_resource": "{{ germline_resource }}",
"{{ project_name }}.germline_resource_tbi": "{{ germline_resource_tbi }}",
"{{ project_name }}.sentieon_docker": "{{ sentieon_docker }}",
"{{ project_name }}.varscan_docker": "{{ varscan_docker }}",
"{{ project_name }}.annovar_docker": "{{ annovar_docker }}",
"{{ project_name }}.maftools_docker": "{{ maftools_docker }}",
"{{ project_name }}.database": "{{ database }}",
"{{ project_name }}.regions": "{{ regions }}",
"{{ project_name }}.set_pon": {{ set_pon | tojson }},
"{{ project_name }}.pon_vcf": "{{ pon_vcf }}",
"{{ project_name }}.tnseq_pon": "{{ tnseq_pon }}",
"{{ project_name }}.tnscope_pon": "{{ tnscope_pon }}",
"{{ project_name }}.cosmic_vcf": "{{ cosmic_vcf }}",
"{{ project_name }}.cosmic_dir": "{{ cosmic_dir }}",
"{{ project_name }}.disk_size": "{{ disk_size }}",

+ 32
- 34
tasks/Dedup.wdl Bestand weergeven

@@ -1,39 +1,37 @@
task Dedup {
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String sample
File sorted_bam
File sorted_bam_index
String docker
String cluster_config
String disk_size
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -i ${sorted_bam} --algo LocusCollector --fun score_info ${sample}_score.txt
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -i ${sorted_bam} --algo Dedup --rmdup --score_info ${sample}_score.txt --metrics ${sample}_dedup_metrics.txt ${sample}.sorted.deduped.bam
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File score = "${sample}_score.txt"
File dedup_metrics = "${sample}_dedup_metrics.txt"
File Dedup_bam = "${sample}.sorted.deduped.bam"
File Dedup_bam_index = "${sample}.sorted.deduped.bam.bai"
}
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String sample
File sorted_bam
File sorted_bam_index
String docker
String cluster_config
String disk_size
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -i ${sorted_bam} --algo LocusCollector --fun score_info ${sample}_score.txt
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -i ${sorted_bam} --algo Dedup --rmdup --score_info ${sample}_score.txt --metrics ${sample}_dedup_metrics.txt ${sample}.sorted.deduped.bam
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File score = "${sample}_score.txt"
File dedup_metrics = "${sample}_dedup_metrics.txt"
File Dedup_bam = "${sample}.sorted.deduped.bam"
File Dedup_bam_index = "${sample}.sorted.deduped.bam.bai"
}
}

+ 56
- 39
tasks/Haplotyper.wdl Bestand weergeven

@@ -1,43 +1,60 @@
task Haplotyper {
File ref_dir
File dbsnp_dir
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String fasta
File recaled_bam
File recaled_bam_index
File? regions
String dbsnp
String sample
String docker
String cluster_config
String disk_size
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
if [ ${regions} != "" ]; then
${SENTIEON_INSTALL_DIR}/bin/sentieon driver --interval ${regions} -r ${ref_dir}/${fasta} -t $nt -i ${recaled_bam} --algo Haplotyper -d ${dbsnp_dir}/${dbsnp} ${sample}_hc.vcf
else
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${recaled_bam} --algo Haplotyper -d ${dbsnp_dir}/${dbsnp} ${sample}_hc.vcf
fi
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File vcf = "${sample}_hc.vcf"
File vcf_idx = "${sample}_hc.vcf.idx"
}
File ref_dir
File dbsnp_dir
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String fasta
File recaled_bam
File recaled_bam_index
File recal_table
String dbsnp
String sample
String docker
String cluster_config
String disk_size
File? regions
Int? interval_padding
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
if [ ${regions} ]; then
INTERVAL="--interval ${regions} --interval_padding ${interval_padding}"
else
INTERVAL=""
fi
if [ ${pon_vcf} ]; then
PON="--pon ${pon_vcf}"
${SENTIEON_INSTALL_DIR}/bin/sentieon util vcfindex ${pon_vcf}
else
PON=""
fi
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt \
--interval ${regions} -r ${ref_dir}/${fasta} \
-i ${recaled_bam} -q ${recal_table}\
--algo Haplotyper -d ${dbsnp_dir}/${dbsnp} \
${sample}_hc.vcf
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File vcf = "${sample}_hc.vcf"
File vcf_idx = "${sample}_hc.vcf.idx"
}
}

+ 60
- 47
tasks/Metrics.wdl Bestand weergeven

@@ -1,54 +1,67 @@
task Metrics {
File ref_dir
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String sample
String docker
String cluster_config
File ref_dir
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String sample
String docker
String cluster_config
String fasta
File sorted_bam
File sorted_bam_index
String disk_size
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${sorted_bam} --algo MeanQualityByCycle ${sample}_mq_metrics.txt --algo QualDistribution ${sample}_qd_metrics.txt --algo GCBias --summary ${sample}_gc_summary.txt ${sample}_gc_metrics.txt --algo AlignmentStat ${sample}_aln_metrics.txt --algo InsertSizeMetricAlgo ${sample}_is_metrics.txt --algo CoverageMetrics --omit_base_output ${sample}_coverage_metrics
${SENTIEON_INSTALL_DIR}/bin/sentieon plot metrics -o ${sample}_metrics_report.pdf gc=${sample}_gc_metrics.txt qd=${sample}_qd_metrics.txt mq=${sample}_mq_metrics.txt isize=${sample}_is_metrics.txt
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File qd_metrics = "${sample}_qd_metrics.txt"
File qd_metrics_pdf = "${sample}_qd_metrics.pdf"
File mq_metrics = "${sample}_mq_metrics.txt"
File mq_metrics_pdf = "${sample}_mq_metrics.pdf"
File is_metrics = "${sample}_is_metrics.txt"
File is_metrics_pdf = "${sample}_is_metrics.pdf"
File gc_summary = "${sample}_gc_summary.txt"
File gc_metrics = "${sample}_gc_metrics.txt"
File gc_metrics_pdf = "${sample}_gc_metrics.pdf"
File aln_metrics = "${sample}_aln_metrics.txt"
File coverage_metrics_sample_summary = "${sample}_coverage_metrics.sample_summary"
File coverage_metrics_sample_statistics = "${sample}_coverage_metrics.sample_statistics"
File coverage_metrics_sample_interval_statistics = "${sample}_coverage_metrics.sample_interval_statistics"
File coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_coverage_metrics.sample_cumulative_coverage_proportions"
File coverage_metrics_sample_cumulative_coverage_counts = "${sample}_coverage_metrics.sample_cumulative_coverage_counts"
}
String fasta
File sorted_bam
File sorted_bam_index
String disk_size
File? regions
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
if [ ${regions} ]; then
INTERVAL="--interval ${regions}"
else
INTERVAL=""
fi
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt \
-r ${ref_dir}/${fasta} $INTERVAL \
-i ${sorted_bam} \
--algo MeanQualityByCycle ${sample}_mq_metrics.txt \
--algo QualDistribution ${sample}_qd_metrics.txt \
--algo GCBias --summary ${sample}_gc_summary.txt ${sample}_gc_metrics.txt \
--algo AlignmentStat ${sample}_aln_metrics.txt \
--algo InsertSizeMetricAlgo ${sample}_is_metrics.txt \
--algo CoverageMetrics --omit_base_output ${sample}_coverage_metrics
${SENTIEON_INSTALL_DIR}/bin/sentieon plot metrics -o ${sample}_metrics_report.pdf gc=${sample}_gc_metrics.txt qd=${sample}_qd_metrics.txt mq=${sample}_mq_metrics.txt isize=${sample}_is_metrics.txt
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File qd_metrics = "${sample}_qd_metrics.txt"
File qd_metrics_pdf = "${sample}_qd_metrics.pdf"
File mq_metrics = "${sample}_mq_metrics.txt"
File mq_metrics_pdf = "${sample}_mq_metrics.pdf"
File is_metrics = "${sample}_is_metrics.txt"
File is_metrics_pdf = "${sample}_is_metrics.pdf"
File gc_summary = "${sample}_gc_summary.txt"
File gc_metrics = "${sample}_gc_metrics.txt"
File gc_metrics_pdf = "${sample}_gc_metrics.pdf"
File aln_metrics = "${sample}_aln_metrics.txt"
File coverage_metrics_sample_summary = "${sample}_coverage_metrics.sample_summary"
File coverage_metrics_sample_statistics = "${sample}_coverage_metrics.sample_statistics"
File coverage_metrics_sample_interval_statistics = "${sample}_coverage_metrics.sample_interval_statistics"
File coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_coverage_metrics.sample_cumulative_coverage_proportions"
File coverage_metrics_sample_cumulative_coverage_counts = "${sample}_coverage_metrics.sample_cumulative_coverage_counts"
}
}

+ 11
- 4
tasks/Realigner.wdl Bestand weergeven

@@ -21,11 +21,18 @@ task Realigner {
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
if [ ${regions} != "" ]; then
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${Dedup_bam} --algo Realigner -k ${dbmills_dir}/${db_mills} --interval_list ${regions} ${sample}.sorted.deduped.realigned.bam
if [ ${regions} ]; then
INTERVAL="--interval_list ${regions}"
else
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${Dedup_bam} --algo Realigner -k ${dbmills_dir}/${db_mills} ${sample}.sorted.deduped.realigned.bam
fi
INTERVAL=""
fi
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt \
-r ${ref_dir}/${fasta} $INTERVAL \
-i ${Dedup_bam} \
--algo Realigner -k ${dbmills_dir}/${db_mills} ${sample}.sorted.deduped.realigned.bam
>>>
runtime {

+ 59
- 67
tasks/TNscope.wdl Bestand weergeven

@@ -1,77 +1,69 @@
task TNscope {
String sample
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
File tumor_recaled_bam
File tumor_recaled_bam_index
File tumor_recal_table
File normal_recaled_bam
File normal_recaled_bam_index
File normal_recal_table
String tumor_name
String normal_name
File ref_dir
String fasta
File dbsnp_dir
String dbsnp
File? regions
Int? interval_padding
File? pon_vcf
String sample
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
File tumor_recaled_bam
File tumor_recaled_bam_index
File tumor_recal_table
File normal_recaled_bam
File normal_recaled_bam_index
File normal_recal_table
String tumor_name
String normal_name
String docker
String cluster_config
String disk_size
File ref_dir
String fasta
File dbsnp_dir
String dbsnp
File? regions
String docker
String cluster_config
String disk_size
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
Boolean set_pon
File? pon_vcf
String pon_command = if set_pon then "--pon /cromwell_root/tmp/PON/$(basename ${pon_vcf})" else ""
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
if [ ${regions} ]; then
INTERVAL="--interval ${regions} --interval_padding ${interval_padding}"
else
INTERVAL=""
fi
if ${set_pon}; then
mkdir -p /cromwell_root/tmp/PON/
cp ${pon_vcf} /cromwell_root/tmp/PON/
${SENTIEON_INSTALL_DIR}/bin/sentieon util vcfindex /cromwell_root/tmp/PON/$(basename ${pon_vcf})
fi
if [ ${regions} != "" ]; then
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt \
--interval ${regions} -r ${ref_dir}/${fasta} \
-i ${tumor_recaled_bam} -q ${tumor_recal_table} \
-i ${normal_recaled_bam} -q ${normal_recal_table} \
--algo TNscope \
--tumor_sample ${tumor_name} --normal_sample ${normal_name} \
--dbsnp ${dbsnp_dir}/${dbsnp} \
${pon_command} \
${sample}.TNscope.TN.vcf
else
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt \
-r ${ref_dir}/${fasta} \
-i ${tumor_recaled_bam} -q ${tumor_recal_table} \
-i ${normal_recaled_bam} -q ${normal_recal_table} \
--algo TNscope \
--tumor_sample ${tumor_name} --normal_sample ${normal_name} \
--dbsnp ${dbsnp_dir}/${dbsnp} \
${pon_command} \
${sample}.TNscope.TN.vcf
fi
>>>
if [ ${pon_vcf} ]; then
PON="--pon ${pon_vcf}"
${SENTIEON_INSTALL_DIR}/bin/sentieon util vcfindex ${pon_vcf}
else
PON=""
fi
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt \
-r ${ref_dir}/${fasta} $INTERVAL \
-i ${tumor_recaled_bam} -q ${tumor_recal_table} \
-i ${normal_recaled_bam} -q ${normal_recal_table} \
--algo TNscope \
--tumor_sample ${tumor_name} --normal_sample ${normal_name} \
--dbsnp ${dbsnp_dir}/${dbsnp} \
$PON \
${sample}.TNscope.TN.vcf
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File TNscope_vcf= "${sample}.TNscope.TN.vcf"
File TNscope_vcf_index = "${sample}.TNscope.TN.vcf.idx"
}
output {
File TNscope_vcf= "${sample}.TNscope.TN.vcf"
File TNscope_vcf_index = "${sample}.TNscope.TN.vcf.idx"
}
}

+ 78
- 62
tasks/TNseq.wdl Bestand weergeven

@@ -1,72 +1,88 @@
task TNseq {
File ref_dir
File dbsnp_dir
String sample
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String tumor_name
String normal_name
String docker
String cluster_config

String fasta
File germline_resource
File tumor_recaled_bam
File tumor_recaled_bam_index
File tumor_recal_table
File normal_recaled_bam
File normal_recaled_bam_index
File normal_recal_table
File TNseq_PoN
String dbsnp
String disk_size
File regions
String sample
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
File tumor_recaled_bam
File tumor_recaled_bam_index
File tumor_recal_table
File normal_recaled_bam
File normal_recaled_bam_index
File normal_recal_table
String tumor_name
String normal_name

command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
File ref_dir
String fasta
File germline_resource
File germline_resource_tbi
File? regions
Int? interval_padding
File? pon_vcf

${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt --interval ${regions} -r ${ref_dir}/${fasta} -i ${corealigner_bam} --algo TNhaplotyper2 --tumor_sample ${tumor_name} --normal_sample ${normal_name} --dbsnp ${dbsnp_dir}/${dbsnp} ${sample}.TNseq.TN.vcf
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)

${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt --interval ${regions} -r ${ref_dir}/${fasta} \
-i ${tumor_recaled_bam} -q ${tumor_recal_table} \
-i ${normal_recaled_bam} -q ${normal_recal_table} \
--algo TNseq --tumor_sample ${tumor_name} \
--normal_sample ${normal_name} \
--germline_vcf ${germline_resource} \
--pon ${TNseq_PoN} \
${sample}.TNseq.TN.tmp.vcf \
--algo OrientationBias --tumor_sample ${tumor_name} \
ORIENTATION_DATA \
--algo ContaminationModel --tumor_sample ${tumor_name} \
--normal_sample ${normal_name} \
--vcf ${germline_resource} \
--tumor_segments CONTAMINATION_DATA.segments \
CONTAMINATION_DATA
sentieon driver -r REFERENCE \
--algo TNfilter --tumor_sample ${tumor_name} \
--normal_sample ${normal_name} \
-v ${sample}.TNseq.TN.tmp.vcf \
--contamination CONTAMINATION_DATA \
--tumor_segments CONTAMINATION_DATA.segments \
--orientation_priors ORIENTATION_DATA \
${sample}.TNseq.TN.vcf
>>>
if [ ${regions} ]; then
INTERVAL="--interval ${regions} --interval_padding ${interval_padding}"
else
INTERVAL=""
fi

runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
if [ ${pon_vcf} ]; then
PON="--pon ${pon_vcf}"
${SENTIEON_INSTALL_DIR}/bin/sentieon util vcfindex ${pon_vcf}
else
PON=""
fi

output {
File TNseq_vcf= "${sample}.TNseq.TN.vcf"
File TNseq_vcf_index = "${sample}.TNseq.TN.vcf.idx"
}
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt \
-r ${ref_dir}/${fasta} $INTERVAL \
-i ${tumor_recaled_bam} -q ${tumor_recal_table} \
-i ${normal_recaled_bam} -q ${normal_recal_table} \
--algo TNhaplotyper2 \
--tumor_sample ${tumor_name} --normal_sample ${normal_name} \
--germline_vcf ${germline_resource} \
$PON \
${sample}.TNseq.TN.tmp.vcf \
--algo OrientationBias --tumor_sample ${tumor_name} \
${sample}.orientation \
--algo ContaminationModel \
--tumor_sample ${tumor_name} --normal_sample ${normal_name} \
--vcf ${germline_resource} \
--tumor_segments ${sample}.contamination.segments \
${sample}.contamination
sentieon driver -r REFERENCE \
--algo TNfilter \
--tumor_sample ${tumor_name} --normal_sample ${normal_name} \
-v ${sample}.TNseq.TN.tmp.vcf \
--contamination ${sample}.contamination \
--tumor_segments ${sample}.contamination.segments \
--orientation_priors ${sample}.orientation \
${sample}.TNseq.TN.vcf
>>>

runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File TNseq_vcf = "${sample}.TNseq.TN.vcf"
File TNseq_vcf_index = "${sample}.TNseq.TN.vcf.idx"
File contamination = "${sample}.contamination"
File contamination_segments = "${sample}.contamination.segments"
File orientation = "${sample}.orientation"
}

}

+ 0
- 39
tasks/bcftools.wdl Bestand weergeven

@@ -1,39 +0,0 @@
task bcftools {

Array[File] pon_vcf
String docker
String cluster_config
String disk_size
Boolean set_pon


command <<<
set -o pipefail
set -e
if ${set_pon} ; then
mkdir -p /cromwell_root/tmp/bcftools/
for i in ${sep=" " pon_vcf}
do
bcftools view $i -Oz -o /cromwell_root/tmp/bcftools/$i.gz
done
bcftools merge -m any -f PASS,. --force-samples /cromwell_root/tmp/bcftools/*.vcf.gz |\
bcftools plugin fill-AN-AC |\
bcftools filter -i 'SUM(AC)>1' > panel_of_normal.vcf
else
touch panel_of_normal.vcf
fi
>>>

runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File panel_of_normal_vcf = "panel_of_normal.vcf"
}
}

+ 57
- 39
tasks/deduped_Metrics.wdl Bestand weergeven

@@ -1,45 +1,63 @@
task deduped_Metrics {
File ref_dir
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String sample
String fasta
File Dedup_bam
File Dedup_bam_index
String docker
String cluster_config
String disk_size
File ref_dir
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String sample
String fasta
File Dedup_bam
File Dedup_bam_index
String docker
String cluster_config
String disk_size
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${Dedup_bam} --algo CoverageMetrics --omit_base_output ${sample}_deduped_coverage_metrics --algo MeanQualityByCycle ${sample}_deduped_mq_metrics.txt --algo QualDistribution ${sample}_deduped_qd_metrics.txt --algo GCBias --summary ${sample}_deduped_gc_summary.txt ${sample}_deduped_gc_metrics.txt --algo AlignmentStat ${sample}_deduped_aln_metrics.txt --algo InsertSizeMetricAlgo ${sample}_deduped_is_metrics.txt --algo QualityYield ${sample}_deduped_QualityYield.txt --algo WgsMetricsAlgo ${sample}_deduped_WgsMetricsAlgo.txt
>>>
File? regions
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
output {
File deduped_coverage_metrics_sample_summary = "${sample}_deduped_coverage_metrics.sample_summary"
File deduped_coverage_metrics_sample_statistics = "${sample}_deduped_coverage_metrics.sample_statistics"
File deduped_coverage_metrics_sample_interval_statistics = "${sample}_deduped_coverage_metrics.sample_interval_statistics"
File deduped_coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_proportions"
File deduped_coverage_metrics_sample_cumulative_coverage_counts = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_counts"
File deduped_mean_quality = "${sample}_deduped_mq_metrics.txt"
File deduped_qd_metrics = "${sample}_deduped_qd_metrics.txt"
File deduped_gc_summary = "${sample}_deduped_gc_summary.txt"
File deduped_gc_metrics = "${sample}_deduped_gc_metrics.txt"
File dedeuped_aln_metrics = "${sample}_deduped_aln_metrics.txt"
File deduped_is_metrics = "${sample}_deduped_is_metrics.txt"
File deduped_QualityYield = "${sample}_deduped_QualityYield.txt"
File deduped_wgsmetrics = "${sample}_deduped_WgsMetricsAlgo.txt"
}
if [ ${regions} ]; then
INTERVAL="--interval ${regions}"
else
INTERVAL=""
fi
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt \
-r ${ref_dir}/${fasta} $INTERVAL \
-i ${Dedup_bam} \
--algo CoverageMetrics --omit_base_output ${sample}_deduped_coverage_metrics \
--algo MeanQualityByCycle ${sample}_deduped_mq_metrics.txt \
--algo QualDistribution ${sample}_deduped_qd_metrics.txt \
--algo GCBias --summary ${sample}_deduped_gc_summary.txt ${sample}_deduped_gc_metrics.txt \
--algo AlignmentStat ${sample}_deduped_aln_metrics.txt \
--algo InsertSizeMetricAlgo ${sample}_deduped_is_metrics.txt \
--algo QualityYield ${sample}_deduped_QualityYield.txt \
--algo WgsMetricsAlgo ${sample}_deduped_WgsMetricsAlgo.txt
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File deduped_coverage_metrics_sample_summary = "${sample}_deduped_coverage_metrics.sample_summary"
File deduped_coverage_metrics_sample_statistics = "${sample}_deduped_coverage_metrics.sample_statistics"
File deduped_coverage_metrics_sample_interval_statistics = "${sample}_deduped_coverage_metrics.sample_interval_statistics"
File deduped_coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_proportions"
File deduped_coverage_metrics_sample_cumulative_coverage_counts = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_counts"
File deduped_mean_quality = "${sample}_deduped_mq_metrics.txt"
File deduped_qd_metrics = "${sample}_deduped_qd_metrics.txt"
File deduped_gc_summary = "${sample}_deduped_gc_summary.txt"
File deduped_gc_metrics = "${sample}_deduped_gc_metrics.txt"
File dedeuped_aln_metrics = "${sample}_deduped_aln_metrics.txt"
File deduped_is_metrics = "${sample}_deduped_is_metrics.txt"
File deduped_QualityYield = "${sample}_deduped_QualityYield.txt"
File deduped_wgsmetrics = "${sample}_deduped_WgsMetricsAlgo.txt"
}
}

+ 29
- 29
tasks/mapping.wdl Bestand weergeven

@@ -1,35 +1,35 @@
task mapping {
File ref_dir
String fasta
File fastq_1
File fastq_2
File ref_dir
String fasta
File fastq_1
File fastq_2
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String group
String sample
String pl
String docker
String cluster_config
String disk_size
String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE
String group
String sample
String pl
String docker
String cluster_config
String disk_size
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
${SENTIEON_INSTALL_DIR}/bin/bwa mem -M -R "@RG\tID:${group}\tSM:${sample}\tPL:${pl}" -t $nt -K 10000000 ${ref_dir}/${fasta} ${fastq_1} ${fastq_2} | ${SENTIEON_INSTALL_DIR}/bin/sentieon util sort -o ${sample}.sorted.bam -t $nt --sam2bam -i -
>>>
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
${SENTIEON_INSTALL_DIR}/bin/bwa mem -M -R "@RG\tID:${group}\tSM:${sample}\tPL:${pl}" -t $nt -K 10000000 ${ref_dir}/${fasta} ${fastq_1} ${fastq_2} | ${SENTIEON_INSTALL_DIR}/bin/sentieon util sort -o ${sample}.sorted.bam -t $nt --sam2bam -i -
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File sorted_bam = "${sample}.sorted.bam"
File sorted_bam_index = "${sample}.sorted.bam.bai"
}
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File sorted_bam = "${sample}.sorted.bam"
File sorted_bam_index = "${sample}.sorted.bam.bai"
}
}

+ 24
- 20
workflow.wdl Bestand weergeven

@@ -37,13 +37,17 @@ workflow {{ project_name }} {
File dbsnp_dir
String dbsnp
File germline_resource
File germline_resource_tbi
File? regions
Int? interval_padding

File database
String disk_size
String cluster_config

Boolean set_pon
File? pon_vcf
File? tnseq_pon
File? tnscope_pon
File? cosmic_dir
String? cosmic_vcf

@@ -51,8 +55,7 @@ workflow {{ project_name }} {
Boolean tnseq
Boolean tnscope
Boolean varscan


call mapping.mapping as tumor_mapping {
input:
group=sample_id + '_tumor',
@@ -170,6 +173,7 @@ workflow {{ project_name }} {
sorted_bam=normal_mapping.sorted_bam,
sorted_bam_index=normal_mapping.sorted_bam_index,
sample=sample_id + '_normal',
regions=regions,
docker=sentieon_docker,
disk_size=disk_size,
cluster_config=cluster_config
@@ -196,6 +200,7 @@ workflow {{ project_name }} {
Dedup_bam=normal_Dedup.Dedup_bam,
Dedup_bam_index=normal_Dedup.Dedup_bam_index,
sample=sample_id + '_normal',
regions=regions,
docker=sentieon_docker,
disk_size=disk_size,
cluster_config=cluster_config
@@ -247,6 +252,7 @@ workflow {{ project_name }} {
ref_dir=ref_dir,
recaled_bam=normal_BQSR.recaled_bam,
recaled_bam_index=normal_BQSR.recaled_bam_index,
recaled_table=normal_BQSR.recal_table,
dbsnp=dbsnp,
dbsnp_dir=dbsnp_dir,
regions=regions,
@@ -273,10 +279,10 @@ workflow {{ project_name }} {
fasta=fasta,
ref_dir=ref_dir,
regions=regions,
dbsnp=dbsnp,
dbsnp_dir=dbsnp_dir,
set_pon=set_pon,
pon_vcf=pon_vcf,
interval_padding=interval_padding,
germline_resource=germline_resource,
germline_resource_tbi=germline_resource_tbi,
pon_vcf=tnseq_pon,
docker=sentieon_docker,
cluster_config=cluster_config,
disk_size=disk_size
@@ -298,27 +304,25 @@ workflow {{ project_name }} {
input:
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR,
SENTIEON_LICENSE=SENTIEON_LICENSE,
fasta=fasta,
ref_dir=ref_dir,
regions=regions,
sample=sample_id,
normal_recaled_bam=normal_BQSR.recaled_bam,
normal_recaled_bam_index=normal_BQSR.recaled_bam_index,
normal_recal_table=normal_BQSR.recal_table,
tumor_recaled_bam=tumor_BQSR.recaled_bam,
tumor_recaled_bam_index=tumor_BQSR.recaled_bam_index,
tumor_recal_table=tumor_BQSR.recal_table,
normal_name=sample_id + "_normal",
tumor_name=sample_id + "_tumor",
fasta=fasta,
ref_dir=ref_dir,
regions=regions,
interval_padding=interval_padding,
dbsnp=dbsnp,
dbsnp_dir=dbsnp_dir,
set_pon=set_pon,
pon_vcf=pon_vcf,
cosmic_vcf=cosmic_vcf,
cosmic_dir=cosmic_dir,
tumor_name=sample_id + "_tumor",
normal_name=sample_id + "_normal",
pon_vcf=tnscope_pon,
docker=sentieon_docker,
sample=sample_id,
disk_size=disk_size,
cluster_config=cluster_config
cluster_config=cluster_config,
disk_size=disk_size
}
call annovar.annovar as TNscope_annovar {

Laden…
Annuleren
Opslaan