### Variant Calling | ### Variant Calling | ||||
This APP developed for somatic short variant discovery (SNVs + Indels). | |||||
This APP developed for germline and somatic short variant discovery (SNVs + Indels). | |||||
***Supported callers*** | ***Supported callers*** | ||||
> Germline | |||||
* Haplotyper | |||||
> Somatic | |||||
* TNseq (TNhaplotyper2) | * TNseq (TNhaplotyper2) | ||||
* TNscope | * TNscope | ||||
* VarScan | * VarScan |
"dbmills_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/", | "dbmills_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/", | ||||
"db_mills": "Mills_and_1000G_gold_standard.indels.hg38.vcf", | "db_mills": "Mills_and_1000G_gold_standard.indels.hg38.vcf", | ||||
"germline_resource": "oss://pgx-reference-data/gnomAD/af-only-gnomad.v3.1.1.vcf.gz", | "germline_resource": "oss://pgx-reference-data/gnomAD/af-only-gnomad.v3.1.1.vcf.gz", | ||||
"germline_resource_tbi": "oss://pgx-reference-data/gnomAD/af-only-gnomad.v3.1.1.vcf.gz.tbi", | |||||
"sentieon_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v2020.10.07", | "sentieon_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v2020.10.07", | ||||
"varscan_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/varscan2:v2.4.3", | "varscan_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/varscan2:v2.4.3", | ||||
"annovar_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/annovar:v2018.04", | "annovar_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/annovar:v2018.04", | ||||
"bcftools_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | "bcftools_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | ||||
"database": "oss://pgx-reference-data/annovar_hg38/", | "database": "oss://pgx-reference-data/annovar_hg38/", | ||||
"regions": "oss://pgx-reference-data/bed/cbcga/S07604514_Covered.bed", | "regions": "oss://pgx-reference-data/bed/cbcga/S07604514_Covered.bed", | ||||
"set_pon": false, | |||||
"pon_vcf": "", | |||||
"tnseq_pon": "", | |||||
"tnscope_pon": "", | |||||
"cosmic_vcf": "CosmicCodingMuts.hg38.v91.vcf", | "cosmic_vcf": "CosmicCodingMuts.hg38.v91.vcf", | ||||
"cosmic_dir": "oss://pgx-reference-data/reference/cosmic/", | "cosmic_dir": "oss://pgx-reference-data/reference/cosmic/", | ||||
"disk_size": "200", | "disk_size": "200", |
"{{ project_name }}.dbmills_dir": "{{ dbmills_dir }}", | "{{ project_name }}.dbmills_dir": "{{ dbmills_dir }}", | ||||
"{{ project_name }}.db_mills": "{{ db_mills }}", | "{{ project_name }}.db_mills": "{{ db_mills }}", | ||||
"{{ project_name }}.germline_resource": "{{ germline_resource }}", | "{{ project_name }}.germline_resource": "{{ germline_resource }}", | ||||
"{{ project_name }}.germline_resource_tbi": "{{ germline_resource_tbi }}", | |||||
"{{ project_name }}.sentieon_docker": "{{ sentieon_docker }}", | "{{ project_name }}.sentieon_docker": "{{ sentieon_docker }}", | ||||
"{{ project_name }}.varscan_docker": "{{ varscan_docker }}", | "{{ project_name }}.varscan_docker": "{{ varscan_docker }}", | ||||
"{{ project_name }}.annovar_docker": "{{ annovar_docker }}", | "{{ project_name }}.annovar_docker": "{{ annovar_docker }}", | ||||
"{{ project_name }}.maftools_docker": "{{ maftools_docker }}", | "{{ project_name }}.maftools_docker": "{{ maftools_docker }}", | ||||
"{{ project_name }}.database": "{{ database }}", | "{{ project_name }}.database": "{{ database }}", | ||||
"{{ project_name }}.regions": "{{ regions }}", | "{{ project_name }}.regions": "{{ regions }}", | ||||
"{{ project_name }}.set_pon": {{ set_pon | tojson }}, | |||||
"{{ project_name }}.pon_vcf": "{{ pon_vcf }}", | |||||
"{{ project_name }}.tnseq_pon": "{{ tnseq_pon }}", | |||||
"{{ project_name }}.tnscope_pon": "{{ tnscope_pon }}", | |||||
"{{ project_name }}.cosmic_vcf": "{{ cosmic_vcf }}", | "{{ project_name }}.cosmic_vcf": "{{ cosmic_vcf }}", | ||||
"{{ project_name }}.cosmic_dir": "{{ cosmic_dir }}", | "{{ project_name }}.cosmic_dir": "{{ cosmic_dir }}", | ||||
"{{ project_name }}.disk_size": "{{ disk_size }}", | "{{ project_name }}.disk_size": "{{ disk_size }}", |
task Dedup { | task Dedup { | ||||
String SENTIEON_INSTALL_DIR | |||||
String SENTIEON_LICENSE | |||||
String sample | |||||
File sorted_bam | |||||
File sorted_bam_index | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||||
nt=$(nproc) | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -i ${sorted_bam} --algo LocusCollector --fun score_info ${sample}_score.txt | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -i ${sorted_bam} --algo Dedup --rmdup --score_info ${sample}_score.txt --metrics ${sample}_dedup_metrics.txt ${sample}.sorted.deduped.bam | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File score = "${sample}_score.txt" | |||||
File dedup_metrics = "${sample}_dedup_metrics.txt" | |||||
File Dedup_bam = "${sample}.sorted.deduped.bam" | |||||
File Dedup_bam_index = "${sample}.sorted.deduped.bam.bai" | |||||
} | |||||
String SENTIEON_INSTALL_DIR | |||||
String SENTIEON_LICENSE | |||||
String sample | |||||
File sorted_bam | |||||
File sorted_bam_index | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||||
nt=$(nproc) | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -i ${sorted_bam} --algo LocusCollector --fun score_info ${sample}_score.txt | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -i ${sorted_bam} --algo Dedup --rmdup --score_info ${sample}_score.txt --metrics ${sample}_dedup_metrics.txt ${sample}.sorted.deduped.bam | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File score = "${sample}_score.txt" | |||||
File dedup_metrics = "${sample}_dedup_metrics.txt" | |||||
File Dedup_bam = "${sample}.sorted.deduped.bam" | |||||
File Dedup_bam_index = "${sample}.sorted.deduped.bam.bai" | |||||
} | |||||
} | } | ||||
task Haplotyper { | task Haplotyper { | ||||
File ref_dir | |||||
File dbsnp_dir | |||||
String SENTIEON_INSTALL_DIR | |||||
String SENTIEON_LICENSE | |||||
String fasta | |||||
File recaled_bam | |||||
File recaled_bam_index | |||||
File? regions | |||||
String dbsnp | |||||
String sample | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||||
nt=$(nproc) | |||||
if [ ${regions} != "" ]; then | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver --interval ${regions} -r ${ref_dir}/${fasta} -t $nt -i ${recaled_bam} --algo Haplotyper -d ${dbsnp_dir}/${dbsnp} ${sample}_hc.vcf | |||||
else | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${recaled_bam} --algo Haplotyper -d ${dbsnp_dir}/${dbsnp} ${sample}_hc.vcf | |||||
fi | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File vcf = "${sample}_hc.vcf" | |||||
File vcf_idx = "${sample}_hc.vcf.idx" | |||||
} | |||||
File ref_dir | |||||
File dbsnp_dir | |||||
String SENTIEON_INSTALL_DIR | |||||
String SENTIEON_LICENSE | |||||
String fasta | |||||
File recaled_bam | |||||
File recaled_bam_index | |||||
File recal_table | |||||
String dbsnp | |||||
String sample | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
File? regions | |||||
Int? interval_padding | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||||
nt=$(nproc) | |||||
if [ ${regions} ]; then | |||||
INTERVAL="--interval ${regions} --interval_padding ${interval_padding}" | |||||
else | |||||
INTERVAL="" | |||||
fi | |||||
if [ ${pon_vcf} ]; then | |||||
PON="--pon ${pon_vcf}" | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon util vcfindex ${pon_vcf} | |||||
else | |||||
PON="" | |||||
fi | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt \ | |||||
--interval ${regions} -r ${ref_dir}/${fasta} \ | |||||
-i ${recaled_bam} -q ${recal_table}\ | |||||
--algo Haplotyper -d ${dbsnp_dir}/${dbsnp} \ | |||||
${sample}_hc.vcf | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File vcf = "${sample}_hc.vcf" | |||||
File vcf_idx = "${sample}_hc.vcf.idx" | |||||
} | |||||
} | } | ||||
task Metrics { | task Metrics { | ||||
File ref_dir | |||||
String SENTIEON_INSTALL_DIR | |||||
String SENTIEON_LICENSE | |||||
String sample | |||||
String docker | |||||
String cluster_config | |||||
File ref_dir | |||||
String SENTIEON_INSTALL_DIR | |||||
String SENTIEON_LICENSE | |||||
String sample | |||||
String docker | |||||
String cluster_config | |||||
String fasta | |||||
File sorted_bam | |||||
File sorted_bam_index | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||||
nt=$(nproc) | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${sorted_bam} --algo MeanQualityByCycle ${sample}_mq_metrics.txt --algo QualDistribution ${sample}_qd_metrics.txt --algo GCBias --summary ${sample}_gc_summary.txt ${sample}_gc_metrics.txt --algo AlignmentStat ${sample}_aln_metrics.txt --algo InsertSizeMetricAlgo ${sample}_is_metrics.txt --algo CoverageMetrics --omit_base_output ${sample}_coverage_metrics | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon plot metrics -o ${sample}_metrics_report.pdf gc=${sample}_gc_metrics.txt qd=${sample}_qd_metrics.txt mq=${sample}_mq_metrics.txt isize=${sample}_is_metrics.txt | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File qd_metrics = "${sample}_qd_metrics.txt" | |||||
File qd_metrics_pdf = "${sample}_qd_metrics.pdf" | |||||
File mq_metrics = "${sample}_mq_metrics.txt" | |||||
File mq_metrics_pdf = "${sample}_mq_metrics.pdf" | |||||
File is_metrics = "${sample}_is_metrics.txt" | |||||
File is_metrics_pdf = "${sample}_is_metrics.pdf" | |||||
File gc_summary = "${sample}_gc_summary.txt" | |||||
File gc_metrics = "${sample}_gc_metrics.txt" | |||||
File gc_metrics_pdf = "${sample}_gc_metrics.pdf" | |||||
File aln_metrics = "${sample}_aln_metrics.txt" | |||||
File coverage_metrics_sample_summary = "${sample}_coverage_metrics.sample_summary" | |||||
File coverage_metrics_sample_statistics = "${sample}_coverage_metrics.sample_statistics" | |||||
File coverage_metrics_sample_interval_statistics = "${sample}_coverage_metrics.sample_interval_statistics" | |||||
File coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_coverage_metrics.sample_cumulative_coverage_proportions" | |||||
File coverage_metrics_sample_cumulative_coverage_counts = "${sample}_coverage_metrics.sample_cumulative_coverage_counts" | |||||
} | |||||
String fasta | |||||
File sorted_bam | |||||
File sorted_bam_index | |||||
String disk_size | |||||
File? regions | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||||
nt=$(nproc) | |||||
if [ ${regions} ]; then | |||||
INTERVAL="--interval ${regions}" | |||||
else | |||||
INTERVAL="" | |||||
fi | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt \ | |||||
-r ${ref_dir}/${fasta} $INTERVAL \ | |||||
-i ${sorted_bam} \ | |||||
--algo MeanQualityByCycle ${sample}_mq_metrics.txt \ | |||||
--algo QualDistribution ${sample}_qd_metrics.txt \ | |||||
--algo GCBias --summary ${sample}_gc_summary.txt ${sample}_gc_metrics.txt \ | |||||
--algo AlignmentStat ${sample}_aln_metrics.txt \ | |||||
--algo InsertSizeMetricAlgo ${sample}_is_metrics.txt \ | |||||
--algo CoverageMetrics --omit_base_output ${sample}_coverage_metrics | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon plot metrics -o ${sample}_metrics_report.pdf gc=${sample}_gc_metrics.txt qd=${sample}_qd_metrics.txt mq=${sample}_mq_metrics.txt isize=${sample}_is_metrics.txt | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File qd_metrics = "${sample}_qd_metrics.txt" | |||||
File qd_metrics_pdf = "${sample}_qd_metrics.pdf" | |||||
File mq_metrics = "${sample}_mq_metrics.txt" | |||||
File mq_metrics_pdf = "${sample}_mq_metrics.pdf" | |||||
File is_metrics = "${sample}_is_metrics.txt" | |||||
File is_metrics_pdf = "${sample}_is_metrics.pdf" | |||||
File gc_summary = "${sample}_gc_summary.txt" | |||||
File gc_metrics = "${sample}_gc_metrics.txt" | |||||
File gc_metrics_pdf = "${sample}_gc_metrics.pdf" | |||||
File aln_metrics = "${sample}_aln_metrics.txt" | |||||
File coverage_metrics_sample_summary = "${sample}_coverage_metrics.sample_summary" | |||||
File coverage_metrics_sample_statistics = "${sample}_coverage_metrics.sample_statistics" | |||||
File coverage_metrics_sample_interval_statistics = "${sample}_coverage_metrics.sample_interval_statistics" | |||||
File coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_coverage_metrics.sample_cumulative_coverage_proportions" | |||||
File coverage_metrics_sample_cumulative_coverage_counts = "${sample}_coverage_metrics.sample_cumulative_coverage_counts" | |||||
} | |||||
} | } | ||||
set -e | set -e | ||||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | export SENTIEON_LICENSE=${SENTIEON_LICENSE} | ||||
nt=$(nproc) | nt=$(nproc) | ||||
if [ ${regions} != "" ]; then | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${Dedup_bam} --algo Realigner -k ${dbmills_dir}/${db_mills} --interval_list ${regions} ${sample}.sorted.deduped.realigned.bam | |||||
if [ ${regions} ]; then | |||||
INTERVAL="--interval_list ${regions}" | |||||
else | else | ||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${Dedup_bam} --algo Realigner -k ${dbmills_dir}/${db_mills} ${sample}.sorted.deduped.realigned.bam | |||||
fi | |||||
INTERVAL="" | |||||
fi | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt \ | |||||
-r ${ref_dir}/${fasta} $INTERVAL \ | |||||
-i ${Dedup_bam} \ | |||||
--algo Realigner -k ${dbmills_dir}/${db_mills} ${sample}.sorted.deduped.realigned.bam | |||||
>>> | >>> | ||||
runtime { | runtime { |
task TNscope { | task TNscope { | ||||
String sample | |||||
String SENTIEON_INSTALL_DIR | |||||
String SENTIEON_LICENSE | |||||
File tumor_recaled_bam | |||||
File tumor_recaled_bam_index | |||||
File tumor_recal_table | |||||
File normal_recaled_bam | |||||
File normal_recaled_bam_index | |||||
File normal_recal_table | |||||
String tumor_name | |||||
String normal_name | |||||
File ref_dir | |||||
String fasta | |||||
File dbsnp_dir | |||||
String dbsnp | |||||
File? regions | |||||
Int? interval_padding | |||||
File? pon_vcf | |||||
String sample | |||||
String SENTIEON_INSTALL_DIR | |||||
String SENTIEON_LICENSE | |||||
File tumor_recaled_bam | |||||
File tumor_recaled_bam_index | |||||
File tumor_recal_table | |||||
File normal_recaled_bam | |||||
File normal_recaled_bam_index | |||||
File normal_recal_table | |||||
String tumor_name | |||||
String normal_name | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
File ref_dir | |||||
String fasta | |||||
File dbsnp_dir | |||||
String dbsnp | |||||
File? regions | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||||
nt=$(nproc) | |||||
Boolean set_pon | |||||
File? pon_vcf | |||||
String pon_command = if set_pon then "--pon /cromwell_root/tmp/PON/$(basename ${pon_vcf})" else "" | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||||
nt=$(nproc) | |||||
if [ ${regions} ]; then | |||||
INTERVAL="--interval ${regions} --interval_padding ${interval_padding}" | |||||
else | |||||
INTERVAL="" | |||||
fi | |||||
if ${set_pon}; then | |||||
mkdir -p /cromwell_root/tmp/PON/ | |||||
cp ${pon_vcf} /cromwell_root/tmp/PON/ | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon util vcfindex /cromwell_root/tmp/PON/$(basename ${pon_vcf}) | |||||
fi | |||||
if [ ${regions} != "" ]; then | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt \ | |||||
--interval ${regions} -r ${ref_dir}/${fasta} \ | |||||
-i ${tumor_recaled_bam} -q ${tumor_recal_table} \ | |||||
-i ${normal_recaled_bam} -q ${normal_recal_table} \ | |||||
--algo TNscope \ | |||||
--tumor_sample ${tumor_name} --normal_sample ${normal_name} \ | |||||
--dbsnp ${dbsnp_dir}/${dbsnp} \ | |||||
${pon_command} \ | |||||
${sample}.TNscope.TN.vcf | |||||
else | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt \ | |||||
-r ${ref_dir}/${fasta} \ | |||||
-i ${tumor_recaled_bam} -q ${tumor_recal_table} \ | |||||
-i ${normal_recaled_bam} -q ${normal_recal_table} \ | |||||
--algo TNscope \ | |||||
--tumor_sample ${tumor_name} --normal_sample ${normal_name} \ | |||||
--dbsnp ${dbsnp_dir}/${dbsnp} \ | |||||
${pon_command} \ | |||||
${sample}.TNscope.TN.vcf | |||||
fi | |||||
>>> | |||||
if [ ${pon_vcf} ]; then | |||||
PON="--pon ${pon_vcf}" | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon util vcfindex ${pon_vcf} | |||||
else | |||||
PON="" | |||||
fi | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt \ | |||||
-r ${ref_dir}/${fasta} $INTERVAL \ | |||||
-i ${tumor_recaled_bam} -q ${tumor_recal_table} \ | |||||
-i ${normal_recaled_bam} -q ${normal_recal_table} \ | |||||
--algo TNscope \ | |||||
--tumor_sample ${tumor_name} --normal_sample ${normal_name} \ | |||||
--dbsnp ${dbsnp_dir}/${dbsnp} \ | |||||
$PON \ | |||||
${sample}.TNscope.TN.vcf | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File TNscope_vcf= "${sample}.TNscope.TN.vcf" | |||||
File TNscope_vcf_index = "${sample}.TNscope.TN.vcf.idx" | |||||
} | |||||
output { | |||||
File TNscope_vcf= "${sample}.TNscope.TN.vcf" | |||||
File TNscope_vcf_index = "${sample}.TNscope.TN.vcf.idx" | |||||
} | |||||
} | } |
task TNseq { | task TNseq { | ||||
File ref_dir | |||||
File dbsnp_dir | |||||
String sample | |||||
String SENTIEON_INSTALL_DIR | |||||
String SENTIEON_LICENSE | |||||
String tumor_name | |||||
String normal_name | |||||
String docker | |||||
String cluster_config | |||||
String fasta | |||||
File germline_resource | |||||
File tumor_recaled_bam | |||||
File tumor_recaled_bam_index | |||||
File tumor_recal_table | |||||
File normal_recaled_bam | |||||
File normal_recaled_bam_index | |||||
File normal_recal_table | |||||
File TNseq_PoN | |||||
String dbsnp | |||||
String disk_size | |||||
File regions | |||||
String sample | |||||
String SENTIEON_INSTALL_DIR | |||||
String SENTIEON_LICENSE | |||||
File tumor_recaled_bam | |||||
File tumor_recaled_bam_index | |||||
File tumor_recal_table | |||||
File normal_recaled_bam | |||||
File normal_recaled_bam_index | |||||
File normal_recal_table | |||||
String tumor_name | |||||
String normal_name | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||||
nt=$(nproc) | |||||
File ref_dir | |||||
String fasta | |||||
File germline_resource | |||||
File germline_resource_tbi | |||||
File? regions | |||||
Int? interval_padding | |||||
File? pon_vcf | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt --interval ${regions} -r ${ref_dir}/${fasta} -i ${corealigner_bam} --algo TNhaplotyper2 --tumor_sample ${tumor_name} --normal_sample ${normal_name} --dbsnp ${dbsnp_dir}/${dbsnp} ${sample}.TNseq.TN.vcf | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||||
nt=$(nproc) | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt --interval ${regions} -r ${ref_dir}/${fasta} \ | |||||
-i ${tumor_recaled_bam} -q ${tumor_recal_table} \ | |||||
-i ${normal_recaled_bam} -q ${normal_recal_table} \ | |||||
--algo TNseq --tumor_sample ${tumor_name} \ | |||||
--normal_sample ${normal_name} \ | |||||
--germline_vcf ${germline_resource} \ | |||||
--pon ${TNseq_PoN} \ | |||||
${sample}.TNseq.TN.tmp.vcf \ | |||||
--algo OrientationBias --tumor_sample ${tumor_name} \ | |||||
ORIENTATION_DATA \ | |||||
--algo ContaminationModel --tumor_sample ${tumor_name} \ | |||||
--normal_sample ${normal_name} \ | |||||
--vcf ${germline_resource} \ | |||||
--tumor_segments CONTAMINATION_DATA.segments \ | |||||
CONTAMINATION_DATA | |||||
sentieon driver -r REFERENCE \ | |||||
--algo TNfilter --tumor_sample ${tumor_name} \ | |||||
--normal_sample ${normal_name} \ | |||||
-v ${sample}.TNseq.TN.tmp.vcf \ | |||||
--contamination CONTAMINATION_DATA \ | |||||
--tumor_segments CONTAMINATION_DATA.segments \ | |||||
--orientation_priors ORIENTATION_DATA \ | |||||
${sample}.TNseq.TN.vcf | |||||
>>> | |||||
if [ ${regions} ]; then | |||||
INTERVAL="--interval ${regions} --interval_padding ${interval_padding}" | |||||
else | |||||
INTERVAL="" | |||||
fi | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
if [ ${pon_vcf} ]; then | |||||
PON="--pon ${pon_vcf}" | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon util vcfindex ${pon_vcf} | |||||
else | |||||
PON="" | |||||
fi | |||||
output { | |||||
File TNseq_vcf= "${sample}.TNseq.TN.vcf" | |||||
File TNseq_vcf_index = "${sample}.TNseq.TN.vcf.idx" | |||||
} | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt \ | |||||
-r ${ref_dir}/${fasta} $INTERVAL \ | |||||
-i ${tumor_recaled_bam} -q ${tumor_recal_table} \ | |||||
-i ${normal_recaled_bam} -q ${normal_recal_table} \ | |||||
--algo TNhaplotyper2 \ | |||||
--tumor_sample ${tumor_name} --normal_sample ${normal_name} \ | |||||
--germline_vcf ${germline_resource} \ | |||||
$PON \ | |||||
${sample}.TNseq.TN.tmp.vcf \ | |||||
--algo OrientationBias --tumor_sample ${tumor_name} \ | |||||
${sample}.orientation \ | |||||
--algo ContaminationModel \ | |||||
--tumor_sample ${tumor_name} --normal_sample ${normal_name} \ | |||||
--vcf ${germline_resource} \ | |||||
--tumor_segments ${sample}.contamination.segments \ | |||||
${sample}.contamination | |||||
sentieon driver -r REFERENCE \ | |||||
--algo TNfilter \ | |||||
--tumor_sample ${tumor_name} --normal_sample ${normal_name} \ | |||||
-v ${sample}.TNseq.TN.tmp.vcf \ | |||||
--contamination ${sample}.contamination \ | |||||
--tumor_segments ${sample}.contamination.segments \ | |||||
--orientation_priors ${sample}.orientation \ | |||||
${sample}.TNseq.TN.vcf | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File TNseq_vcf = "${sample}.TNseq.TN.vcf" | |||||
File TNseq_vcf_index = "${sample}.TNseq.TN.vcf.idx" | |||||
File contamination = "${sample}.contamination" | |||||
File contamination_segments = "${sample}.contamination.segments" | |||||
File orientation = "${sample}.orientation" | |||||
} | |||||
} | } |
task bcftools { | |||||
Array[File] pon_vcf | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
Boolean set_pon | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
if ${set_pon} ; then | |||||
mkdir -p /cromwell_root/tmp/bcftools/ | |||||
for i in ${sep=" " pon_vcf} | |||||
do | |||||
bcftools view $i -Oz -o /cromwell_root/tmp/bcftools/$i.gz | |||||
done | |||||
bcftools merge -m any -f PASS,. --force-samples /cromwell_root/tmp/bcftools/*.vcf.gz |\ | |||||
bcftools plugin fill-AN-AC |\ | |||||
bcftools filter -i 'SUM(AC)>1' > panel_of_normal.vcf | |||||
else | |||||
touch panel_of_normal.vcf | |||||
fi | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File panel_of_normal_vcf = "panel_of_normal.vcf" | |||||
} | |||||
} |
task deduped_Metrics { | task deduped_Metrics { | ||||
File ref_dir | |||||
String SENTIEON_INSTALL_DIR | |||||
String SENTIEON_LICENSE | |||||
String sample | |||||
String fasta | |||||
File Dedup_bam | |||||
File Dedup_bam_index | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
File ref_dir | |||||
String SENTIEON_INSTALL_DIR | |||||
String SENTIEON_LICENSE | |||||
String sample | |||||
String fasta | |||||
File Dedup_bam | |||||
File Dedup_bam_index | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||||
nt=$(nproc) | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${Dedup_bam} --algo CoverageMetrics --omit_base_output ${sample}_deduped_coverage_metrics --algo MeanQualityByCycle ${sample}_deduped_mq_metrics.txt --algo QualDistribution ${sample}_deduped_qd_metrics.txt --algo GCBias --summary ${sample}_deduped_gc_summary.txt ${sample}_deduped_gc_metrics.txt --algo AlignmentStat ${sample}_deduped_aln_metrics.txt --algo InsertSizeMetricAlgo ${sample}_deduped_is_metrics.txt --algo QualityYield ${sample}_deduped_QualityYield.txt --algo WgsMetricsAlgo ${sample}_deduped_WgsMetricsAlgo.txt | |||||
>>> | |||||
File? regions | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||||
nt=$(nproc) | |||||
output { | |||||
File deduped_coverage_metrics_sample_summary = "${sample}_deduped_coverage_metrics.sample_summary" | |||||
File deduped_coverage_metrics_sample_statistics = "${sample}_deduped_coverage_metrics.sample_statistics" | |||||
File deduped_coverage_metrics_sample_interval_statistics = "${sample}_deduped_coverage_metrics.sample_interval_statistics" | |||||
File deduped_coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_proportions" | |||||
File deduped_coverage_metrics_sample_cumulative_coverage_counts = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_counts" | |||||
File deduped_mean_quality = "${sample}_deduped_mq_metrics.txt" | |||||
File deduped_qd_metrics = "${sample}_deduped_qd_metrics.txt" | |||||
File deduped_gc_summary = "${sample}_deduped_gc_summary.txt" | |||||
File deduped_gc_metrics = "${sample}_deduped_gc_metrics.txt" | |||||
File dedeuped_aln_metrics = "${sample}_deduped_aln_metrics.txt" | |||||
File deduped_is_metrics = "${sample}_deduped_is_metrics.txt" | |||||
File deduped_QualityYield = "${sample}_deduped_QualityYield.txt" | |||||
File deduped_wgsmetrics = "${sample}_deduped_WgsMetricsAlgo.txt" | |||||
} | |||||
if [ ${regions} ]; then | |||||
INTERVAL="--interval ${regions}" | |||||
else | |||||
INTERVAL="" | |||||
fi | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt \ | |||||
-r ${ref_dir}/${fasta} $INTERVAL \ | |||||
-i ${Dedup_bam} \ | |||||
--algo CoverageMetrics --omit_base_output ${sample}_deduped_coverage_metrics \ | |||||
--algo MeanQualityByCycle ${sample}_deduped_mq_metrics.txt \ | |||||
--algo QualDistribution ${sample}_deduped_qd_metrics.txt \ | |||||
--algo GCBias --summary ${sample}_deduped_gc_summary.txt ${sample}_deduped_gc_metrics.txt \ | |||||
--algo AlignmentStat ${sample}_deduped_aln_metrics.txt \ | |||||
--algo InsertSizeMetricAlgo ${sample}_deduped_is_metrics.txt \ | |||||
--algo QualityYield ${sample}_deduped_QualityYield.txt \ | |||||
--algo WgsMetricsAlgo ${sample}_deduped_WgsMetricsAlgo.txt | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File deduped_coverage_metrics_sample_summary = "${sample}_deduped_coverage_metrics.sample_summary" | |||||
File deduped_coverage_metrics_sample_statistics = "${sample}_deduped_coverage_metrics.sample_statistics" | |||||
File deduped_coverage_metrics_sample_interval_statistics = "${sample}_deduped_coverage_metrics.sample_interval_statistics" | |||||
File deduped_coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_proportions" | |||||
File deduped_coverage_metrics_sample_cumulative_coverage_counts = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_counts" | |||||
File deduped_mean_quality = "${sample}_deduped_mq_metrics.txt" | |||||
File deduped_qd_metrics = "${sample}_deduped_qd_metrics.txt" | |||||
File deduped_gc_summary = "${sample}_deduped_gc_summary.txt" | |||||
File deduped_gc_metrics = "${sample}_deduped_gc_metrics.txt" | |||||
File dedeuped_aln_metrics = "${sample}_deduped_aln_metrics.txt" | |||||
File deduped_is_metrics = "${sample}_deduped_is_metrics.txt" | |||||
File deduped_QualityYield = "${sample}_deduped_QualityYield.txt" | |||||
File deduped_wgsmetrics = "${sample}_deduped_WgsMetricsAlgo.txt" | |||||
} | |||||
} | } |
task mapping { | task mapping { | ||||
File ref_dir | |||||
String fasta | |||||
File fastq_1 | |||||
File fastq_2 | |||||
File ref_dir | |||||
String fasta | |||||
File fastq_1 | |||||
File fastq_2 | |||||
String SENTIEON_INSTALL_DIR | |||||
String SENTIEON_LICENSE | |||||
String group | |||||
String sample | |||||
String pl | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
String SENTIEON_INSTALL_DIR | |||||
String SENTIEON_LICENSE | |||||
String group | |||||
String sample | |||||
String pl | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||||
nt=$(nproc) | |||||
${SENTIEON_INSTALL_DIR}/bin/bwa mem -M -R "@RG\tID:${group}\tSM:${sample}\tPL:${pl}" -t $nt -K 10000000 ${ref_dir}/${fasta} ${fastq_1} ${fastq_2} | ${SENTIEON_INSTALL_DIR}/bin/sentieon util sort -o ${sample}.sorted.bam -t $nt --sam2bam -i - | |||||
>>> | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||||
nt=$(nproc) | |||||
${SENTIEON_INSTALL_DIR}/bin/bwa mem -M -R "@RG\tID:${group}\tSM:${sample}\tPL:${pl}" -t $nt -K 10000000 ${ref_dir}/${fasta} ${fastq_1} ${fastq_2} | ${SENTIEON_INSTALL_DIR}/bin/sentieon util sort -o ${sample}.sorted.bam -t $nt --sam2bam -i - | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File sorted_bam = "${sample}.sorted.bam" | |||||
File sorted_bam_index = "${sample}.sorted.bam.bai" | |||||
} | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File sorted_bam = "${sample}.sorted.bam" | |||||
File sorted_bam_index = "${sample}.sorted.bam.bai" | |||||
} | |||||
} | } |
File dbsnp_dir | File dbsnp_dir | ||||
String dbsnp | String dbsnp | ||||
File germline_resource | File germline_resource | ||||
File germline_resource_tbi | |||||
File? regions | File? regions | ||||
Int? interval_padding | |||||
File database | File database | ||||
String disk_size | String disk_size | ||||
String cluster_config | String cluster_config | ||||
Boolean set_pon | |||||
File? pon_vcf | |||||
File? tnseq_pon | |||||
File? tnscope_pon | |||||
File? cosmic_dir | File? cosmic_dir | ||||
String? cosmic_vcf | String? cosmic_vcf | ||||
Boolean tnseq | Boolean tnseq | ||||
Boolean tnscope | Boolean tnscope | ||||
Boolean varscan | Boolean varscan | ||||
call mapping.mapping as tumor_mapping { | call mapping.mapping as tumor_mapping { | ||||
input: | input: | ||||
group=sample_id + '_tumor', | group=sample_id + '_tumor', | ||||
sorted_bam=normal_mapping.sorted_bam, | sorted_bam=normal_mapping.sorted_bam, | ||||
sorted_bam_index=normal_mapping.sorted_bam_index, | sorted_bam_index=normal_mapping.sorted_bam_index, | ||||
sample=sample_id + '_normal', | sample=sample_id + '_normal', | ||||
regions=regions, | |||||
docker=sentieon_docker, | docker=sentieon_docker, | ||||
disk_size=disk_size, | disk_size=disk_size, | ||||
cluster_config=cluster_config | cluster_config=cluster_config | ||||
Dedup_bam=normal_Dedup.Dedup_bam, | Dedup_bam=normal_Dedup.Dedup_bam, | ||||
Dedup_bam_index=normal_Dedup.Dedup_bam_index, | Dedup_bam_index=normal_Dedup.Dedup_bam_index, | ||||
sample=sample_id + '_normal', | sample=sample_id + '_normal', | ||||
regions=regions, | |||||
docker=sentieon_docker, | docker=sentieon_docker, | ||||
disk_size=disk_size, | disk_size=disk_size, | ||||
cluster_config=cluster_config | cluster_config=cluster_config | ||||
ref_dir=ref_dir, | ref_dir=ref_dir, | ||||
recaled_bam=normal_BQSR.recaled_bam, | recaled_bam=normal_BQSR.recaled_bam, | ||||
recaled_bam_index=normal_BQSR.recaled_bam_index, | recaled_bam_index=normal_BQSR.recaled_bam_index, | ||||
recaled_table=normal_BQSR.recal_table, | |||||
dbsnp=dbsnp, | dbsnp=dbsnp, | ||||
dbsnp_dir=dbsnp_dir, | dbsnp_dir=dbsnp_dir, | ||||
regions=regions, | regions=regions, | ||||
fasta=fasta, | fasta=fasta, | ||||
ref_dir=ref_dir, | ref_dir=ref_dir, | ||||
regions=regions, | regions=regions, | ||||
dbsnp=dbsnp, | |||||
dbsnp_dir=dbsnp_dir, | |||||
set_pon=set_pon, | |||||
pon_vcf=pon_vcf, | |||||
interval_padding=interval_padding, | |||||
germline_resource=germline_resource, | |||||
germline_resource_tbi=germline_resource_tbi, | |||||
pon_vcf=tnseq_pon, | |||||
docker=sentieon_docker, | docker=sentieon_docker, | ||||
cluster_config=cluster_config, | cluster_config=cluster_config, | ||||
disk_size=disk_size | disk_size=disk_size | ||||
input: | input: | ||||
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR, | SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR, | ||||
SENTIEON_LICENSE=SENTIEON_LICENSE, | SENTIEON_LICENSE=SENTIEON_LICENSE, | ||||
fasta=fasta, | |||||
ref_dir=ref_dir, | |||||
regions=regions, | |||||
sample=sample_id, | |||||
normal_recaled_bam=normal_BQSR.recaled_bam, | normal_recaled_bam=normal_BQSR.recaled_bam, | ||||
normal_recaled_bam_index=normal_BQSR.recaled_bam_index, | normal_recaled_bam_index=normal_BQSR.recaled_bam_index, | ||||
normal_recal_table=normal_BQSR.recal_table, | normal_recal_table=normal_BQSR.recal_table, | ||||
tumor_recaled_bam=tumor_BQSR.recaled_bam, | tumor_recaled_bam=tumor_BQSR.recaled_bam, | ||||
tumor_recaled_bam_index=tumor_BQSR.recaled_bam_index, | tumor_recaled_bam_index=tumor_BQSR.recaled_bam_index, | ||||
tumor_recal_table=tumor_BQSR.recal_table, | tumor_recal_table=tumor_BQSR.recal_table, | ||||
normal_name=sample_id + "_normal", | |||||
tumor_name=sample_id + "_tumor", | |||||
fasta=fasta, | |||||
ref_dir=ref_dir, | |||||
regions=regions, | |||||
interval_padding=interval_padding, | |||||
dbsnp=dbsnp, | dbsnp=dbsnp, | ||||
dbsnp_dir=dbsnp_dir, | dbsnp_dir=dbsnp_dir, | ||||
set_pon=set_pon, | |||||
pon_vcf=pon_vcf, | |||||
cosmic_vcf=cosmic_vcf, | |||||
cosmic_dir=cosmic_dir, | |||||
tumor_name=sample_id + "_tumor", | |||||
normal_name=sample_id + "_normal", | |||||
pon_vcf=tnscope_pon, | |||||
docker=sentieon_docker, | docker=sentieon_docker, | ||||
sample=sample_id, | |||||
disk_size=disk_size, | |||||
cluster_config=cluster_config | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | } | ||||
call annovar.annovar as TNscope_annovar { | call annovar.annovar as TNscope_annovar { |