Bladeren bron

first commit

master
YaqingLiu 2 jaren geleden
bovenliggende
commit
63217d1ae2
18 gewijzigde bestanden met toevoegingen van 1209 en 0 verwijderingen
  1. +33
    -0
      defaults
  2. +33
    -0
      inputs
  3. +35
    -0
      tasks/ANNOVAR.wdl
  4. +35
    -0
      tasks/AnnotSV.wdl
  5. +56
    -0
      tasks/BQSR.wdl
  6. +96
    -0
      tasks/CNVkit.wdl
  7. +34
    -0
      tasks/Dedup.wdl
  8. +70
    -0
      tasks/HRD.wdl
  9. +51
    -0
      tasks/Haplotyper.wdl
  10. +42
    -0
      tasks/MSIsensor.wdl
  11. +58
    -0
      tasks/Manta.wdl
  12. +69
    -0
      tasks/Metrics.wdl
  13. +32
    -0
      tasks/TMB.wdl
  14. +79
    -0
      tasks/TNseq.wdl
  15. +29
    -0
      tasks/bcftools.wdl
  16. +62
    -0
      tasks/deduped_Metrics.wdl
  17. +50
    -0
      tasks/mapping.wdl
  18. +345
    -0
      workflow.wdl

+ 33
- 0
defaults Bestand weergeven

@@ -0,0 +1,33 @@
{
"tumor_fastq_1": "",
"tumor_fastq_2": "",
"normal_fastq_1": "",
"normal_fastq_2": "",
"read_structure": "",
"duplex_umi": "",
"regions": "oss://genomics-platform-reference-data/bed/cbcga/S07604514_Padded.bed",
"interval_padding": "0",
"SENTIEON_LICENSE": "172.25.164.226:8990",
"fasta": "GRCh38_full_analysis_set_plus_decoy_hla.fa",
"ref_dir": "oss://ivd-product/reference/refGenome/",
"ref_flat": "oss://ivd-product/reference/refGenome/refFlat.hg38.txt",
"dbmills_dir": "oss://genomics-platform-reference-data/GRCh38.d1.vd1/",
"db_mills": "Mills_and_1000G_gold_standard.indels.hg38.vcf",
"dbsnp": "dbsnp_146.hg38.vcf",
"dbsnp_dir": "oss://genomics-platform-reference-data/GRCh38.d1.vd1/",
"annovar_database": "oss://genomics-platform-reference-data/annovar/",
"annotsv_database": "oss://ivd-product/reference/AnnotSV/",
"germline_resource": "oss://genomics-platform-reference-data/gnomAD/af-only-gnomad.v3.1.1.vcf.gz",
"germline_resource_tbi": "oss://genomics-platform-reference-data/gnomAD/af-only-gnomad.v3.1.1.vcf.gz.tbi",
"gc": "oss://ivd-product/reference/Sequenza/GRCh38.gc50Base.wig.gz",
"sentieon_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/sentieon-genomics:v202112.05",
"annovar_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/annovar:v20191024",
"manta_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/manta:1.6.0",
"annotsv_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/annotsv:3.1.3",
"cnvkit_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/cnvkit:0.9.8",
"sequenza_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/sequenza:3.0.0",
"msisensor_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/msisensor-pro:1.2.0",
"tmb_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/tmb_docker:1.0.0",
"disk_size": "200",
"cluster_config": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc"
}

+ 33
- 0
inputs Bestand weergeven

@@ -0,0 +1,33 @@
{
"{{ project_name }}.sample_id": "{{ sample_id }}",
"{{ project_name }}.tumor_fastq_1": "{{ tumor_fastq_1 }}",
"{{ project_name }}.tumor_fastq_2": "{{ tumor_fastq_2 }}",
"{{ project_name }}.normal_fastq_1": "{{ normal_fastq_1 }}",
"{{ project_name }}.normal_fastq_2": "{{ normal_fastq_2 }}",
"{{ project_name }}.read_structure": "{{ read_structure }}",
"{{ project_name }}.duplex_umi": "{{ duplex_umi }}",
"{{ project_name }}.SENTIEON_LICENSE": "{{ SENTIEON_LICENSE }}",
"{{ project_name }}.sentieon_docker": "{{ sentieon_docker }}",
"{{ project_name }}.manta_docker": "{{ manta_docker }}",
"{{ project_name }}.annovar_docker": "{{ annovar_docker }}",
"{{ project_name }}.annotsv_docker": "{{ annotsv_docker }}",
"{{ project_name }}.cnvkit_docker": "{{ cnvkit_docker }}",
"{{ project_name }}.sequenza_docker": "{{ sequenza_docker }}",
"{{ project_name }}.msisensor_docker": "{{ msisensor_docker }}",
"{{ project_name }}.tmb_docker": "{{ tmb_docker }}",
"{{ project_name }}.platform": "{{ platform }}",
"{{ project_name }}.fasta": "{{ fasta }}",
"{{ project_name }}.ref_dir": "{{ ref_dir }}",
"{{ project_name }}.dbsnp": "{{ dbsnp }}",
"{{ project_name }}.dbsnp_dir": "{{ dbsnp_dir }}",
"{{ project_name }}.dbmills_dir": "{{ dbmills_dir }}",
"{{ project_name }}.db_mills": "{{ db_mills }}",
"{{ project_name }}.germline_resource": "{{ germline_resource }}",
"{{ project_name }}.germline_resource_tbi": "{{ germline_resource_tbi }}",
"{{ project_name }}.regions": "{{ regions }}",
"{{ project_name }}.interval_padding": "{{ interval_padding }}",
"{{ project_name }}.annovar_database": "{{ annovar_database }}",
"{{ project_name }}.annotsv_database": "{{ annotsv_database }}",
"{{ project_name }}.disk_size": "{{ disk_size }}",
"{{ project_name }}.cluster_config": "{{ cluster_config }}"
}

+ 35
- 0
tasks/ANNOVAR.wdl Bestand weergeven

@@ -0,0 +1,35 @@
task ANNOVAR {

File vcf
String basename = basename(vcf,".vcf")
File annovar_database
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
/installations/annovar/table_annovar.pl ${vcf} \
${annovar_database} -buildver hg38 \
-out ${basename} -remove \
-protocol refGene,cytoBand,genomicSuperDups,clinvar_20220320,intervar_20180118,cosmic95_coding,cosmic95_noncoding,gnomad211_exome,dbnsfp42c,avsnp150 \
-operation g,r,r,f,f,f,f,f,f,f \
-nastring . -vcfinput -polish -thread $nt
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File avinput = "${basename}.avinput"
File multianno_txt = "${basename}.hg38_multianno.txt"
File multianno_vcf = "${basename}.hg38_multianno.vcf"
}
}

+ 35
- 0
tasks/AnnotSV.wdl Bestand weergeven

@@ -0,0 +1,35 @@
task AnnotSV {
String sample
File somatic_vcf
File? germline_vcf
File annotsv_database
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
export ANNOTSV=/opt/AnnotSV
if [ ${somatic_vcf} ]; then
${ANNOTSV}/bin/AnnotSV -SVinputFile ${somatic_vcf} -outputFile ${sample}.somatic.SV.annotated.tsv -genomeBuild GRCh38 -annotationsDir ${annotsv_database}
else
${ANNOTSV}/bin/AnnotSV -SVinputFile ${germline_vcf} -outputFile ${sample}.germline.SV.annotated.tsv -genomeBuild GRCh38 -annotationsDir ${annotsv_database}
fi
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File AnnotSV_somatic_SV = "${sample}.somatic.SV.annotated.tsv"
File? AnnotSV_germline_SV = "${sample}.germline.SV.annotated.tsv"
}
}

+ 56
- 0
tasks/BQSR.wdl Bestand weergeven

@@ -0,0 +1,56 @@
task BQSR {
File ref_dir
File dbsnp_dir
File dbmills_dir
String sample
String SENTIEON_LICENSE
String fasta
String dbsnp
String db_mills
File deduped_bam
File deduped_bam_index
String docker
String cluster_config
String disk_size
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
sentieon driver -t $nt \
-r ${ref_dir}/${fasta} -i ${deduped_bam} \
--algo QualCal \
-k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} \
${sample}_recal_data.table
sentieon driver -t $nt \
-r ${ref_dir}/${fasta} -i ${deduped_bam} -q ${sample}_recal_data.table \
--algo QualCal -k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} \
${sample}_recal_data.table.post --algo ReadWriter ${sample}.sorted.deduped.recaled.bam
sentieon driver -t $nt --algo QualCal \
--plot --before ${sample}_recal_data.table --after ${sample}_recal_data.table.post ${sample}_recal_data.csv
sentieon plot bqsr -o ${sample}_bqsrreport.pdf ${sample}_recal_data.csv
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File recal_table = "${sample}_recal_data.table"
File recal_post = "${sample}_recal_data.table.post"
File recaled_bam = "${sample}.sorted.deduped.recaled.bam"
File recaled_bam_index = "${sample}.sorted.deduped.recaled.bam.bai"
File recal_csv = "${sample}_recal_data.csv"
File bqsrreport_pdf = "${sample}_bqsrreport.pdf"
}
}

+ 96
- 0
tasks/CNVkit.wdl Bestand weergeven

@@ -0,0 +1,96 @@
task CNVkit {

String sample
File tumor_bam
File tumor_bam_index
File? normal_bam
File? normal_bam_index
File regions
File ref_dir
String fasta
File ref_flat
String docker
String cluster_config
String disk_size
command <<<
set -o pipefail
set -e
nt=$(nproc)
mkdir -p /cromwell_root/tmp/cnvkit
cd /cromwell_root/tmp/cnvkit
cnvkit.py access ${ref_dir}/${fasta} -o access.bed
# Prepare the target bed
cnvkit.py target ${regions} --annotate ${ref_flat} --split --short-names -o my_baits.bed
if [ ${normal_bam} ]; then
cnvkit.py autobin ${tumor_bam} ${normal_bam} -t my_baits.bed -g access.bed
else
cnvkit.py autobin ${tumor_bam} -t my_baits.bed -g access.bed
fi
# For each sample...
cnvkit.py coverage ${tumor_bam} my_baits.target.bed -o ${sample}.T.targetcoverage.cnn
cnvkit.py coverage ${tumor_bam} my_baits.antitarget.bed -o ${sample}.T.antitargetcoverage.cnn
if [ ${normal_bam} ]; then
cnvkit.py coverage ${normal_bam} my_baits.target.bed -o ${sample}.N.targetcoverage.cnn
cnvkit.py coverage ${normal_bam} my_baits.antitarget.bed -o ${sample}.N.antitargetcoverage.cnn
# With paired or pooled normals
cnvkit.py reference *.N.{,anti}targetcoverage.cnn --fasta ${ref_dir}/${fasta} -o reference.cnn
else
# With no control sample
cnvkit.py reference -o reference.cnn -f ${ref_dir}/${fasta} -t my_baits.target.bed -a my_baits.antitarget.bed
fi
# For each tumor sample...
cnvkit.py fix ${sample}.T.targetcoverage.cnn ${sample}.T.antitargetcoverage.cnn reference.cnn -o ${sample}.cnr
cnvkit.py segment ${sample}.cnr -o ${sample}.cns
# Check noise
cnvkit.py metrics ${sample}.cnr -s ${sample}.cns > ${sample}.stats

# Derive each segment's absolute integer copy number, ploidy must be int value
purity=`awk -F'\t' '{print $6}' ${hrd} | sed -n '2p'`
cnvkit.py call ${sample}.cns -y -m clonal --purity ${purity} -o ${sample}.call.cns
# Plot the results
cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -o ${sample}.scatter.pdf
cnvkit.py diagram ${sample}.cnr -s ${sample}.call.cns -o ${sample}.diagram.pdf
cnvkit.py heatmap ${sample}.cnr ${sample}.call.cns -o ${sample}.heatmap.pdf
# Genemetrics
mkdir gainloss
cnvkit.py genemetrics ${sample}.cnr -s ${sample}.call.cns -t 0 -m 0 -o ${sample}.cnv.txt
# Filter genes
cnvkit.py genemetrics ${sample}.cnr -t 0.2 -m 3 -o ${sample}.ratio-genes.txt
cnvkit.py genemetrics ${sample}.cnr -s ${sample}.call.cns -t 0.2 -m 3 -o ${sample}.segment-genes.txt
cat ${sample}.ratio-genes.txt | tail -n+2 | cut -f1 | sort | uniq > ratio-genes.txt
cat ${sample}.segment-genes.txt | tail -n+2 | cut -f1 | sort | uniq > segment-genes.txt
comm -12 ratio-genes.txt segment-genes.txt > trusted_cnv_genes.txt
for gene in `cat trusted_cnv_genes.txt`
do
cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -g $gene -o ./gainloss/${sample}.$gene.scatter.pdf
done
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File scatter_pdf = "${sample}.scatter.pdf"
File diagram_pdf = "${sample}.diagram.pdf"
File heatmap_pdf = "${sample}.heatmap.pdf"
File cnr = "${sample}.cnr"
File cns = "${sample}.cns"
File stats = "${sample}.stats"
File call_cns = "${sample}.call.cns"
File genemetrics = "${sample}.cnv.txt"
File gainloss_genes = "/cromwell_root/tmp/cnvkit/trusted_cnv_genes.txt"
Array[File] gainloss = glob("/cromwell_root/tmp/cnvkit/gainloss/*")
}
}

+ 34
- 0
tasks/Dedup.wdl Bestand weergeven

@@ -0,0 +1,34 @@
task Dedup {
String SENTIEON_LICENSE
String sample
File sorted_bam
File sorted_bam_index
String docker
String cluster_config
String disk_size
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
sentieon driver -t $nt -i ${sorted_bam} --algo LocusCollector --fun score_info ${sample}_score.txt
sentieon driver -t $nt -i ${sorted_bam} --algo Dedup --rmdup --score_info ${sample}_score.txt --metrics ${sample}_dedup_metrics.txt ${sample}.sorted.deduped.bam
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File score = "${sample}_score.txt"
File dedup_metrics = "${sample}_dedup_metrics.txt"
File deduped_bam = "${sample}.sorted.deduped.bam"
File deduped_bam_index = "${sample}.sorted.deduped.bam.bai"
}
}

+ 70
- 0
tasks/HRD.wdl Bestand weergeven

@@ -0,0 +1,70 @@
task HRD {
String sample
File ref_dir
String fasta
File gc
File tumor_bam
File tumor_bam_index
File? normal_bam
File? normal_bam_index
String docker
String cluster_config
String disk_size
command <<<
set -o pipefail
set -e
nt=$(nproc)
output_dir="/cromwell_root/tmp"
mkdir ${output_dir}
seqz=${output_dir}'/'${sample}'.seqz.gz'
small=${output_dir}'/'${sample}'.small.seqz.gz'
# bam2seqz
sequenza-utils bam2seqz -gc ${gc} --fasta ${ref_dir}/${fasta} -n ${normal_bam} -t ${tumor_bam} -o $seqz -C chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY --parallel 24
# merge and remove
cd ${output_dir}
zcat ${sample}_*.seqz.gz | awk '{if (NR == 1 || (NR != 1 && $1 != "chromosome")) {print $0}}' | bgzip > $seqz
tabix -f -s 1 -b 2 -e 2 -S 1 $seqz
rm ${sample}_*.seqz.gz; rm ${sample}_*.seqz.gz.tbi
# seqz_binning: WES: 50; WGS: 200
sequenza-utils seqz_binning --seqz $seqz -w 50 -o $small
# analysis in r
Rscript ~/sequenza.r ${output_dir} ${sample}
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
hrd="${sample}.HRD.txt"
alternative_fit="${sample}_alternative_fit.pdf"
alternative_solutions="${sample}_alternative_solutions.txt"
chromosome_depths="${sample}_chromosome_depths.pdf"
chromosome_view="${sample}_chromosome_view.pdf"
CN_bars="${sample}_CN_bars.pdf"
confints_CP="${sample}_confints_CP.txt"
contours_CP="${sample}_contours_CP.pdf"
CP_contours="${sample}_CP_contours.pdf"
gc_plots="${sample}_gc_plots.pdf"
genome_view="${sample}_genome_view.pdf"
model_fit="${sample}_model_fit.pdf"
mutations="${sample}_mutations.txt"
scarHRD_input="${sample}_scarHRD_input.txt"
segments="${sample}_segments.txt"
sequenza_cp_table="${sample}_sequenza_cp_table.RData"
sequenza_extract="${sample}_sequenza_extract.RData"
sequenza_log="${sample}_sequenza_log.txt"
small_seqz="${sample}.small.seqz.gz"
small_seqz_index="${sample}.small.seqz.gz.tbi"
}
}

+ 51
- 0
tasks/Haplotyper.wdl Bestand weergeven

@@ -0,0 +1,51 @@
task Haplotyper {
File ref_dir
String fasta
File dbsnp_dir
String SENTIEON_LICENSE
File recaled_bam
File recaled_bam_index
String dbsnp
String sample
String docker
String cluster_config
String disk_size
File? regions
Int? interval_padding
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
if [ ${regions} ]; then
INTERVAL="--interval ${regions} --interval_padding ${interval_padding}"
else
INTERVAL=""
fi
sentieon driver -t $nt \
--interval ${regions} -r ${ref_dir}/${fasta} \
-i ${recaled_bam} \
--algo Haplotyper -d ${dbsnp_dir}/${dbsnp} \
${sample}.Haplotyper.vcf
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File vcf = "${sample}.Haplotyper.vcf"
File vcf_idx = "${sample}.Haplotyper.vcf.idx"
}
}

+ 42
- 0
tasks/MSIsensor.wdl Bestand weergeven

@@ -0,0 +1,42 @@
task MSIsensor {
File ref_dir
String fasta
File tumor_bam
File tumor_bam_index
File? normal_bam
File? normal_bam_index
File baseline
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
# MSI
mkdir -p /cromwell_root/tmp/${sample}
msisensor-pro scan -d ${ref_dir}/${fasta} -o reference.list
if [ ${normal_recaled_bam} ]; then
msisensor-pro msi -d reference.list -n ${normal_bam} -t ${tumor_bam} -o /cromwell_root/tmp/${sample}
else
msisensor-pro pro -d ${baseline} -t ${tumor_bam} -o /cromwell_root/tmp/${sample}
fi
cp /cromwell_root/tmp/${sample} ${sample}.MSI.txt

# TMB
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File msi = "${sample}.MSI.txt"
}
}

+ 58
- 0
tasks/Manta.wdl Bestand weergeven

@@ -0,0 +1,58 @@
task Manta {
File ref_dir
File fasta
File regions
File tumor_bam
File tumor_bam_index
File? normal_bam
File? normal_bam_index
String sample
String docker
String cluster_config
String disk_size
command <<<
set -o pipefail
set -e
nt=$(nproc)
MANTA_INSTALL_PATH="/opt/manta-1.6.0.centos6_x86_64"
MANTA_ANALYSIS_PATH="/cromwell_root/tmp"
mkdir -p ${MANTA_ANALYSIS_PATH}
# input files
if [ ${normal_bam} ]; then
INPUT="--normalBam ${normal_bam} --tumorBam ${tumor_bam}"
else
INPUT="--tumorBam ${tumor_bam}"
fi
# configManta
${MANTA_INSTALL_PATH}/bin/configManta.py \
$INPUT \
--callRegions ${regions} --exome \
--referenceFasta ${ref_dir}/${fasta} \
--runDir ${MANTA_ANALYSIS_PATH}
# runWorkflow
${MANTA_ANALYSIS_PATH}/runWorkflow.py -j $nt
# results
if [ ${normal_bam} ]; then
cp ${MANTA_ANALYSIS_PATH}/results/variants/somaticSV.vcf.gz ${sample}.Manta.somaticSV.vcf.gz
cp ${MANTA_ANALYSIS_PATH}/results/variants/diploidSV.vcf.gz ${sample}.Manta.germlineSV.vcf.gz
else
cp ${MANTA_ANALYSIS_PATH}/results/variants/tumorSV.vcf.gz ${sample}.Manta.somaticSV.vcf.gz
fi
>>>
runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File somatic_vcf = "${sample}.Manta.somaticSV.vcf.gz"
File? germline_vcf = "${sample}.Manta.germlineSV.vcf.gz"
}
}

+ 69
- 0
tasks/Metrics.wdl Bestand weergeven

@@ -0,0 +1,69 @@
task Metrics {
File ref_dir
String SENTIEON_LICENSE
String sample
String docker
String cluster_config
String fasta
File sorted_bam
File sorted_bam_index
String disk_size
File? regions
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
if [ ${regions} ]; then
INTERVAL="--interval ${regions}"
else
INTERVAL=""
fi
sentieon driver -t $nt \
-r ${ref_dir}/${fasta} $INTERVAL \
-i ${sorted_bam} \
--algo CoverageMetrics --omit_base_output ${sample}_coverage_metrics \
--algo MeanQualityByCycle ${sample}_mq_metrics.txt \
--algo QualDistribution ${sample}_qd_metrics.txt \
--algo GCBias --summary ${sample}_gc_summary.txt ${sample}_gc_metrics.txt \
--algo AlignmentStat ${sample}_aln_metrics.txt \
--algo InsertSizeMetricAlgo ${sample}_is_metrics.txt \
--algo QualityYield ${sample}_QualityYield.txt \
--algo WgsMetricsAlgo ${sample}_WgsMetricsAlgo.txt
sentieon plot metrics -o ${sample}_metrics_report.pdf gc=${sample}_gc_metrics.txt qd=${sample}_qd_metrics.txt mq=${sample}_mq_metrics.txt isize=${sample}_is_metrics.txt
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File coverage_metrics_sample_summary = "${sample}_coverage_metrics.sample_summary"
File coverage_metrics_sample_statistics = "${sample}_coverage_metrics.sample_statistics"
File coverage_metrics_sample_interval_statistics = "${sample}_coverage_metrics.sample_interval_statistics"
File coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_coverage_metrics.sample_cumulative_coverage_proportions"
File coverage_metrics_sample_cumulative_coverage_counts = "${sample}_coverage_metrics.sample_cumulative_coverage_counts"
File qd_metrics = "${sample}_qd_metrics.txt"
File mq_metrics = "${sample}_mq_metrics.txt"
File is_metrics = "${sample}_is_metrics.txt"
File gc_summary = "${sample}_gc_summary.txt"
File gc_metrics = "${sample}_gc_metrics.txt"
File aln_metrics = "${sample}_aln_metrics.txt"
File QualityYield = "${sample}_QualityYield.txt"
File wgsmetrics = "${sample}_WgsMetricsAlgo.txt"
File qd_metrics_pdf = "${sample}_qd_metrics.pdf"
File mq_metrics_pdf = "${sample}_mq_metrics.pdf"
File is_metrics_pdf = "${sample}_is_metrics.pdf"
File gc_metrics_pdf = "${sample}_gc_metrics.pdf"
}
}

+ 32
- 0
tasks/TMB.wdl Bestand weergeven

@@ -0,0 +1,32 @@
task TMB {

File regions
File snpindel_txt
String docker
String cluster_config
String disk_size
command <<<
set -o pipefail
set -e
nt=$(nproc)
sort -k1,1 -k2,2n ${regions} | bedtools merge -i - > merged.bed
size=`awk -F'\t' 'BEGIN{SUM=0}{SUM+=$3-$2}END{print SUM}' merged.bed`
# analysis in python
python ~/tmb.py ${snpindel_txt} ${size} ${sample}
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
snp_indel="${sample}.snp_indel.txt"
tmb="${sample}.TMB.txt"
}
}

+ 79
- 0
tasks/TNseq.wdl Bestand weergeven

@@ -0,0 +1,79 @@
task TNseq {
String sample
String SENTIEON_LICENSE
File tumor_bam
File tumor_bam_index
File? normal_bam
File? normal_bam_index
String tumor_name
String normal_name
File ref_dir
String fasta
File germline_resource
File germline_resource_tbi
File? regions
Int? interval_padding
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
if [ ${regions} ]; then
INTERVAL="--interval ${regions} --interval_padding ${interval_padding}"
else
INTERVAL=""
fi
if [ ${normal_bam} ]; then
INPUT="-i ${tumor_bam} -i ${normal_bam}"
SAMPLE="--tumor_sample ${tumor_name} --normal_sample ${normal_name}"
else
INPUT="-i ${tumor_bam}"
SAMPLE="--tumor_sample ${tumor_name}"
fi
sentieon driver -t $nt -r ${ref_dir}/${fasta} \
$INPUT $INTERVAL \
--algo TNhaplotyper2 $SAMPLE \
--germline_vcf ${germline_resource} \
${sample}.TNseq.raw.vcf \
--algo OrientationBias --tumor_sample ${tumor_name} \
${sample}.orientation \
--algo ContaminationModel $SAMPLE \
--vcf ${germline_resource} \
--tumor_segments ${sample}.contamination.segments \
${sample}.contamination
sentieon driver -t $nt \
-r ${ref_dir}/${fasta} \
--algo TNfilter $SAMPLE \
-v ${sample}.TNseq.raw.vcf \
--contamination ${sample}.contamination \
--tumor_segments ${sample}.contamination.segments \
--orientation_priors ${sample}.orientation \
${sample}.TNseq.vcf
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File vcf = "${sample}.TNseq.vcf"
File vcf_index = "${sample}.TNseq.vcf.idx"
File contamination = "${sample}.contamination"
File contamination_segments = "${sample}.contamination.segments"
File orientation = "${sample}.orientation"
}
}

+ 29
- 0
tasks/bcftools.wdl Bestand weergeven

@@ -0,0 +1,29 @@
task bcftools {
File ref_dir
String fasta
File vcf
String basename = basename(vcf,".vcf")
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
bcftools norm -m -both ${vcf} | bcftools norm -f ${ref_dir}/${fasta} -Ov -o ${basename}.norm.vcf
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File norm_vcf = "${basename}.norm.vcf"
}
}

+ 62
- 0
tasks/deduped_Metrics.wdl Bestand weergeven

@@ -0,0 +1,62 @@
task deduped_Metrics {
File ref_dir
String SENTIEON_LICENSE
String sample
String fasta
File deduped_bam
File deduped_bam_index
String docker
String cluster_config
String disk_size
File? regions
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
if [ ${regions} ]; then
INTERVAL="--interval ${regions}"
else
INTERVAL=""
fi
sentieon driver -t $nt \
-r ${ref_dir}/${fasta} $INTERVAL \
-i ${deduped_bam} \
--algo CoverageMetrics --omit_base_output ${sample}_deduped_coverage_metrics \
--algo MeanQualityByCycle ${sample}_deduped_mq_metrics.txt \
--algo QualDistribution ${sample}_deduped_qd_metrics.txt \
--algo GCBias --summary ${sample}_deduped_gc_summary.txt ${sample}_deduped_gc_metrics.txt \
--algo AlignmentStat ${sample}_deduped_aln_metrics.txt \
--algo InsertSizeMetricAlgo ${sample}_deduped_is_metrics.txt \
--algo QualityYield ${sample}_deduped_QualityYield.txt \
--algo WgsMetricsAlgo ${sample}_deduped_WgsMetricsAlgo.txt
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File deduped_coverage_metrics_sample_summary = "${sample}_deduped_coverage_metrics.sample_summary"
File deduped_coverage_metrics_sample_statistics = "${sample}_deduped_coverage_metrics.sample_statistics"
File deduped_coverage_metrics_sample_interval_statistics = "${sample}_deduped_coverage_metrics.sample_interval_statistics"
File deduped_coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_proportions"
File deduped_coverage_metrics_sample_cumulative_coverage_counts = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_counts"
File deduped_mean_quality = "${sample}_deduped_mq_metrics.txt"
File deduped_qd_metrics = "${sample}_deduped_qd_metrics.txt"
File deduped_gc_summary = "${sample}_deduped_gc_summary.txt"
File deduped_gc_metrics = "${sample}_deduped_gc_metrics.txt"
File dedeuped_aln_metrics = "${sample}_deduped_aln_metrics.txt"
File deduped_is_metrics = "${sample}_deduped_is_metrics.txt"
File deduped_QualityYield = "${sample}_deduped_QualityYield.txt"
File deduped_wgsmetrics = "${sample}_deduped_WgsMetricsAlgo.txt"
}
}

+ 50
- 0
tasks/mapping.wdl Bestand weergeven

@@ -0,0 +1,50 @@
task mapping {
File ref_dir
String fasta
File fastq_1
File fastq_2
String SENTIEON_LICENSE
String group
String sample
String platform
String docker
String cluster_config
String disk_size
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
if [ ${read_structure} ]; then
if [ ${duplex_umi} == "true" ]; then
READ_STRUCTURE="-d ${read_structure}"
fi
sentieon umi extract $READ_STRUCTURE ${fastq_1} ${fastq_2} | \
sentieon bwa mem -p -C -R "@RG\tID:${group}\tSM:${sample}\tPL:${platform}" -t $nt -K 10000000 ${ref_dir}/${fasta} - | \
sentieon umi consensus -o ${sample}.umi_consensus.fastq.gz
sentieon bwa mem -p -C -R "@RG\tID:${group}\tSM:${sample}\tPL:${platform}" -t $nt -K 10000000 $fasta ${sample}.umi_consensus.fastq.gz | \
sentieon util sort --umi_post_process --sam2bam -i - -o ${sample}.sorted.bam
else
sentieon bwa mem -R "@RG\tID:${group}\tSM:${sample}\tPL:${platform}" \
-t $nt -K 10000000 ${ref_dir}/${fasta} ${fastq_1} ${fastq_2} | \
sentieon util sort -o ${sample}.sorted.bam -t $nt --sam2bam -i -
fi
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File sorted_bam = "${sample}.sorted.bam"
File sorted_bam_index = "${sample}.sorted.bam.bai"
}
}

+ 345
- 0
workflow.wdl Bestand weergeven

@@ -0,0 +1,345 @@
import "./tasks/mapping.wdl" as mapping
import "./tasks/Metrics.wdl" as Metrics
import "./tasks/Dedup.wdl" as Dedup
import "./tasks/deduped_Metrics.wdl" as deduped_Metrics
import "./tasks/BQSR.wdl" as BQSR
import "./tasks/Haplotyper.wdl" as Haplotyper
import "./tasks/TNseq.wdl" as TNseq
import "./tasks/bcftools.wdl" as bcftools
import "./tasks/ANNOVAR.wdl" as ANNOVAR
import "./tasks/Manta.wdl" as Manta
import "./tasks/AnnotSV.wdl" as AnnotSV
import "./tasks/CNVkit.wdl" as CNVkit
import "./tasks/MSIsensor.wdl" as MSIsensor
import "./tasks/HRD.wdl" as HRD
import "./tasks/TMB.wdl" as TMB

workflow {{ project_name }} {
String sample_id
File? tumor_fastq_1
File? tumor_fastq_2
File? normal_fastq_1
File? normal_fastq_2
String? duplex_umi
String? read_structure
String SENTIEON_LICENSE
String sentieon_docker
String annovar_docker
String manta_docker
String annotsv_docker
String cnvkit_docker
String sequenza_docker
String msisensor_docker
String tmb_docker
String platform
File ref_dir
String fasta
File dbmills_dir
String db_mills
File dbsnp_dir
String dbsnp
File germline_resource
File germline_resource_tbi
File annovar_database
File annotsv_database
File? regions
Int? interval_padding
String disk_size
String cluster_config
if (tumor_fastq_1!= "") {
call mapping.mapping as tumor_mapping {
input:
group=sample_id + '.T',
sample=sample_id + '.T',
fastq_1=tumor_fastq_1,
fastq_2=tumor_fastq_2,
SENTIEON_LICENSE=SENTIEON_LICENSE,
pl=pl,
fasta=fasta,
ref_dir=ref_dir,
docker=sentieon_docker,
disk_size=disk_size,
cluster_config=cluster_config
}

call Metrics.Metrics as tumor_Metrics {
input:
SENTIEON_LICENSE=SENTIEON_LICENSE,
fasta=fasta,
ref_dir=ref_dir,
sorted_bam=tumor_mapping.sorted_bam,
sorted_bam_index=tumor_mapping.sorted_bam_index,
sample=sample_id + '.T',
docker=sentieon_docker,
disk_size=disk_size,
cluster_config=cluster_config
}

call Dedup.Dedup as tumor_Dedup {
input:
SENTIEON_LICENSE=SENTIEON_LICENSE,
sorted_bam=tumor_mapping.sorted_bam,
sorted_bam_index=tumor_mapping.sorted_bam_index,
sample=sample_id + '.T',
docker=sentieon_docker,
disk_size=disk_size,
cluster_config=cluster_config
}

call deduped_Metrics.deduped_Metrics as tumor_deduped_Metrics {
input:
SENTIEON_LICENSE=SENTIEON_LICENSE,
fasta=fasta,
ref_dir=ref_dir,
deduped_bam=tumor_Dedup.deduped_bam,
deduped_bam_index=tumor_Dedup.deduped_bam_index,
sample=sample_id + '.T',
docker=sentieon_docker,
disk_size=disk_size,
cluster_config=cluster_config
}

call BQSR.BQSR as tumor_BQSR {
input:
SENTIEON_LICENSE=SENTIEON_LICENSE,
fasta=fasta,
ref_dir=ref_dir,
deduped_bam=tumor_Dedup.deduped_bam,
deduped_bam_index=tumor_Dedup.deduped_bam_index,
db_mills=db_mills,
dbmills_dir=dbmills_dir,
dbsnp=dbsnp,
dbsnp_dir=dbsnp_dir,
sample=sample_id + '.T',
docker=sentieon_docker,
disk_size=disk_size,
cluster_config=cluster_config
}
}
if (normal_fastq_1!= "") {
call mapping.mapping as normal_mapping {
input:
group=sample_id + '.N',
sample=sample_id + '.N',
fastq_1=normal_fastq_1,
fastq_2=normal_fastq_2,
SENTIEON_LICENSE=SENTIEON_LICENSE,
platform=platform,
fasta=fasta,
ref_dir=ref_dir,
docker=sentieon_docker,
disk_size=disk_size,
cluster_config=cluster_config
}

call Metrics.Metrics as normal_Metrics {
input:
SENTIEON_LICENSE=SENTIEON_LICENSE,
fasta=fasta,
ref_dir=ref_dir,
sorted_bam=normal_mapping.sorted_bam,
sorted_bam_index=normal_mapping.sorted_bam_index,
sample=sample_id + '.N',
regions=regions,
docker=sentieon_docker,
disk_size=disk_size,
cluster_config=cluster_config
}

call Dedup.Dedup as normal_Dedup {
input:
SENTIEON_LICENSE=SENTIEON_LICENSE,
sorted_bam=normal_mapping.sorted_bam,
sorted_bam_index=normal_mapping.sorted_bam_index,
sample=sample_id + '.N',
docker=sentieon_docker,
disk_size=disk_size,
cluster_config=cluster_config
}

call deduped_Metrics.deduped_Metrics as normal_deduped_Metrics {
input:
SENTIEON_LICENSE=SENTIEON_LICENSE,
fasta=fasta,
ref_dir=ref_dir,
deduped_bam=normal_Dedup.deduped_bam,
deduped_bam_index=normal_Dedup.deduped_bam_index,
sample=sample_id + '.N',
regions=regions,
docker=sentieon_docker,
disk_size=disk_size,
cluster_config=cluster_config
}

call BQSR.BQSR as normal_BQSR {
input:
SENTIEON_LICENSE=SENTIEON_LICENSE,
fasta=fasta,
ref_dir=ref_dir,
deduped_bam=normal_Dedup.deduped_bam,
deduped_bam_index=normal_Dedup.deduped_bam_index,
db_mills=db_mills,
dbmills_dir=dbmills_dir,
dbsnp=dbsnp,
dbsnp_dir=dbsnp_dir,
sample=sample_id + '.N',
docker=sentieon_docker,
disk_size=disk_size,
cluster_config=cluster_config
}

call Haplotyper.Haplotyper as Haplotyper {
input:
SENTIEON_LICENSE=SENTIEON_LICENSE,
sample=sample_id + '.N',
fasta=fasta,
ref_dir=ref_dir,
recaled_bam=normal_BQSR.recaled_bam,
recaled_bam_index=normal_BQSR.recaled_bam_index,
dbsnp=dbsnp,
dbsnp_dir=dbsnp_dir,
regions=regions,
docker=sentieon_docker,
disk_size=disk_size,
cluster_config=cluster_config
}

call bcftools.bcftools as Haplotyper_bcftools {
input:
vcf=Haplotyper.vcf,
fasta=fasta,
ref_dir=ref_dir,
docker=bcftools_docker,
cluster_config=cluster_config,
disk_size=disk_size
}
}
call Manta.Manta as Manta {
input:
sample=sample_id,
fasta=fasta,
ref_dir=ref_dir,
regions=regions,
normal_bam=normal_BQSR.recaled_bam,
normal_bam_index=normal_BQSR.recaled_bam_index,
tumor_bam=tumor_BQSR.recaled_bam,
tumor_bam_index=tumor_BQSR.recaled_bam_index,
docker=manta_docker,
cluster_config=cluster_config,
disk_size=disk_size
}
call AnnotSV.AnnotSV as Manta_AnnotSV {
input:
sample=sample_id,
somatic_vcf=Manta.somatic_vcf,
germline_vcf=Manta.germline_vcf,
annotsv_database=annotsv_database,
docker=annotsv_docker,
cluster_config=cluster_config,
disk_size=disk_size
}
call TNseq.TNseq as TNseq {
input:
SENTIEON_LICENSE=SENTIEON_LICENSE,
sample=sample_id,
normal_bam=normal_BQSR.recaled_bam,
normal_bam_index=normal_BQSR.recaled_bam_index,
tumor_bam=tumor_BQSR.recaled_bam,
tumor_bam_index=tumor_BQSR.recaled_bam_index,
normal_name=sample_id + ".N",
tumor_name=sample_id + ".T",
fasta=fasta,
ref_dir=ref_dir,
regions=regions,
interval_padding=interval_padding,
germline_resource=germline_resource,
germline_resource_tbi=germline_resource_tbi,
docker=sentieon_docker,
cluster_config=cluster_config,
disk_size=disk_size
}
call bcftools.bcftools as TNseq_bcftools {
input:
vcf=TNseq.vcf,
fasta=fasta,
ref_dir=ref_dir,
docker=bcftools_docker,
cluster_config=cluster_config,
disk_size=disk_size
}
call ANNOVAR.ANNOVAR as TNseq_ANNOVAR {
input:
vcf=TNseq_bcftools.norm_vcf,
annovar_database=annovar_database,
docker=annovar_docker,
cluster_config=cluster_config,
disk_size=disk_size
}
call CNVkit.CNVkit as CNVkit {
input:
sample=sample_id,
fasta=fasta,
ref_dir=ref_dir,
regions=regions,
ref_flat=ref_flat,
normal_bam=normal_BQSR.recaled_bam,
normal_bam_index=normal_BQSR.recaled_bam_index,
tumor_bam=tumor_BQSR.recaled_bam,
tumor_bam_index=tumor_BQSR.recaled_bam_index,
docker=cnvkit_docker,
cluster_config=cluster_config,
disk_size=disk_size
}
call MSIsensor.MSIsensor as MSIsensor {
input:
fasta=fasta,
ref_dir=ref_dir,
normal_bam=normal_BQSR.recaled_bam,
normal_bam_index=normal_BQSR.recaled_bam_index,
tumor_bam=tumor_BQSR.recaled_bam,
tumor_bam_index=tumor_BQSR.recaled_bam_index,
baseline=baseline,
docker=msisensor_docker,
cluster_config=cluster_config,
disk_size=disk_size
}
call HRD.HRD as HRD {
input:
sample=sample_id,
fasta=fasta,
ref_dir=ref_dir,
gc=gc,
normal_bam=normal_BQSR.recaled_bam,
normal_bam_index=normal_BQSR.recaled_bam_index,
tumor_bam=tumor_BQSR.recaled_bam,
tumor_bam_index=tumor_BQSR.recaled_bam_index,
docker=sequenza_docker,
cluster_config=cluster_config,
disk_size=disk_size
}
call TMB.TMB as TMB {
input:
sample=sample_id,
regions=regions,
snpindel_txt=TNseq_ANNOVAR.multianno_txt,
docker=tmb_docker,
cluster_config=cluster_config,
disk_size=disk_size
}
}

Laden…
Annuleren
Opslaan