LUYAO REN před 4 roky
rodič
revize
0b7a5dd9ba
11 změnil soubory, kde provedl 272 přidání a 117 odebrání
  1. +0
    -66
      codescripts/extract_multiqc.py
  2. +101
    -0
      codescripts/post-alignment.py
  3. +132
    -0
      codescripts/pre_alignment.py
  4. +3
    -0
      tasks/Metrics.wdl
  5. +4
    -4
      tasks/benchmark.wdl
  6. +9
    -1
      tasks/deduped_Metrics.wdl
  7. +1
    -2
      tasks/extract_tables.wdl
  8. +13
    -10
      tasks/mergeSentieon.wdl
  9. +1
    -3
      tasks/multiqc.wdl
  10. +2
    -3
      tasks/sentieon.wdl
  11. +6
    -28
      workflow.wdl

+ 0
- 66
codescripts/extract_multiqc.py Zobrazit soubor

import json
import pandas as pd
import sys, argparse, os

parser = argparse.ArgumentParser(description="This script is to get information from multiqc")

parser.add_argument('-fastqc_qualimap', '--fastqc_qualimap', type=str, help='multiqc_general_stats.txt', required=True)
parser.add_argument('-fastqc', '--fastqc', type=str, help='multiqc_fastqc.txt', required=True)
parser.add_argument('-fastqscreen', '--fastqscreen', type=str, help='multiqc_fastq_screen.txt', required=True)
parser.add_argument('-hap', '--happy', type=str, help='multiqc_happy_data.json', required=True)

args = parser.parse_args()

# Rename input:
fastqc_qualimap_file = args.fastqc_qualimap
fastqc_file = args.fastqc
fastqscreen_file = args.fastqscreen
hap_file = args.happy


# fastqc and qualimap
dat = pd.read_table(fastqc_qualimap_file)

fastqc = dat.loc[:, dat.columns.str.startswith('FastQC')]
fastqc.insert(loc=0, column='Sample', value=dat['Sample'])
fastqc_stat = fastqc.dropna()

# qulimap
qualimap = dat.loc[:, dat.columns.str.startswith('QualiMap')]
qualimap.insert(loc=0, column='Sample', value=dat['Sample'])
qualimap_stat = qualimap.dropna()

# fastqc
dat = pd.read_table(fastqc_file)

fastqc_module = dat.loc[:, "per_base_sequence_quality":"kmer_content"]
fastqc_module.insert(loc=0, column='Sample', value=dat['Sample'])
fastqc_all = pd.merge(fastqc_stat,fastqc_module, how='outer', left_on=['Sample'], right_on = ['Sample'])

# fastqscreen
dat = pd.read_table(fastqscreen_file)
fastqscreen = dat.loc[:, dat.columns.str.endswith('percentage')]
dat['Sample'] = [i.replace('_screen','') for i in dat['Sample']]
fastqscreen.insert(loc=0, column='Sample', value=dat['Sample'])

# benchmark
with open(hap_file) as hap_json:
happy = json.load(hap_json)
dat =pd.DataFrame.from_records(happy)
dat = dat.loc[:, dat.columns.str.endswith('ALL')]
dat_transposed = dat.T
benchmark = dat_transposed.loc[:,['sample_id','METRIC.Precision','METRIC.Recall']]
benchmark['sample_id'] = benchmark.index
benchmark.columns = ['Sample','Precision','Recall']

#output
fastqc_all.to_csv('fastqc.final.result.txt',sep="\t",index=0)
fastqscreen.to_csv('fastqscreen.final.result.txt',sep="\t",index=0)
qualimap_stat.to_csv('qualimap.final.result.txt',sep="\t",index=0)
benchmark.to_csv('benchmark.final.result.txt',sep="\t",index=0)







+ 101
- 0
codescripts/post-alignment.py Zobrazit soubor

import json
import pandas as pd
import sys, argparse, os
import statistics

parser = argparse.ArgumentParser(description="This script is to summary information for pre-alignment QC")

parser.add_argument('-general', '--general_stat', type=str, help='multiqc_general_stats.txt', required=True)
parser.add_argument('-is', '--is_metrics', type=str, help='_is_metrics.txt', required=True)
parser.add_argument('-wgsmetrics', '--WgsMetricsAlgo', type=str, help='deduped_WgsMetricsAlgo', required=True)
parser.add_argument('-qualityyield', '--QualityYield', type=str, help='deduped_QualityYield', required=True)
parser.add_argument('-aln', '--aln_metrics', type=str, help='aln_metrics.txt', required=True)

args = parser.parse_args()

general_file = args.general_stat
is_file = args.is_metrics
wgsmetrics_file = args.wgsmetrics
qualityyield_file = args.qualityyield
aln_file = args.aln_metrics

##### Table
## general stat: % GC
dat = pd.read_table(general_file)
qualimap = dat.loc[:, dat.columns.str.startswith('QualiMap')]
qualimap.insert(loc=0, column='Sample', value=dat['Sample'])
qualimap_stat = qualimap.dropna()
part1 = fastqc_stat.loc[:,['Sample', 'FastQC_mqc-generalstats-fastqc-percent_duplicates','FastQC_mqc-generalstats-fastqc-total_sequences']]

## is_metrics: median insert size
## deduped_WgsMetricsAlgo: 1x, 5x, 10x, 30x, median coverage
with open(html_file) as file:
origDict = json.load(file)
newdict = {(k1, k2):v2 for k1,v1 in origDict.items() \
for k2,v2 in origDict[k1].items()}
df = pd.DataFrame([newdict[i] for i in sorted(newdict)],
index=pd.MultiIndex.from_tuples([i for i in sorted(newdict.keys())]))
gc = []
at = []
for i in part1['Sample']:
sub_df = df.loc[i,:]
gc.append(statistics.mean(sub_df['g']/sub_df['c']))
at.append(statistics.mean(sub_df['a']/sub_df['t']))

## fastq_screen
dat = pd.read_table(fastqscreen_file)
fastqscreen = dat.loc[:, dat.columns.str.endswith('percentage')]
del fastqscreen['ERCC percentage']
del fastqscreen['Phix percentage']

### merge all information
part1.insert(loc=3, column='G/C ratio', value=gc)
part1.insert(loc=4, column='A/T ratio', value=at)
part1.reset_index(drop=True, inplace=True)
fastqscreen.reset_index(drop=True, inplace=True)
df = pd.concat([part1, fastqscreen], axis=1)
df = df.append(df.mean(axis=0),ignore_index=True)
df = df.fillna('Batch average value')
df.columns = ['Sample','Total sequences (million)','% Dup','G/C ratio','A/T ratio','% Human','% EColi','% Adapter' , '% Vector','% rRNA' , '% Virus','% Yeast' ,'% Mitoch' ,'% No hits']
df.to_csv('per-alignment_table_summary.txt',sep='\t',index=False)

##### Picture
## cumulative genome coverage
with open(json_file) as file:
all_dat = json.load(file)
genome_coverage_json = all_dat['report_plot_data']['qualimap_genome_fraction']['datasets'][0]
dat =pd.DataFrame.from_records(genome_coverage_json)
genome_coverage = pd.DataFrame(index=pd.DataFrame(dat.loc[0,'data'])[0])
for i in range(dat.shape[0]):
one_sample = pd.DataFrame(dat.loc[i,'data'])
one_sample.index = one_sample[0]
genome_coverage[dat.loc[i,'name']] = one_sample[1]
genome_coverage = genome_coverage.transpose()
genome_coverage['Sample'] = genome_coverage.index
genome_coverage.to_csv('post-alignment_genome_coverage.txt',sep='\t',index=False)

## insert size histogram
insert_size_json = all_dat['report_plot_data']['qualimap_insert_size']['datasets'][0]
dat =pd.DataFrame.from_records(insert_size_json)
insert_size = pd.DataFrame(index=pd.DataFrame(dat.loc[0,'data'])[0])
for i in range(dat.shape[0]):
one_sample = pd.DataFrame(dat.loc[i,'data'])
one_sample.index = one_sample[0]
insert_size[dat.loc[i,'name']] = one_sample[1]
insert_size = insert_size.transpose()
insert_size['Sample'] = insert_size.index
insert_size.to_csv('post-alignment_insert_size.txt',sep='\t',index=False)

## GC content distribution
gc_content_json = all_dat['report_plot_data']['qualimap_gc_content']['datasets'][0]
dat =pd.DataFrame.from_records(gc_content_json)
gc_content = pd.DataFrame(index=pd.DataFrame(dat.loc[0,'data'])[0])
for i in range(dat.shape[0]):
one_sample = pd.DataFrame(dat.loc[i,'data'])
one_sample.index = one_sample[0]
gc_content[dat.loc[i,'name']] = one_sample[1]
gc_content = gc_content.transpose()
gc_content['Sample'] = gc_content.index
gc_content.to_csv('post-alignment_gc_content.txt',sep='\t',index=False)



+ 132
- 0
codescripts/pre_alignment.py Zobrazit soubor

import json
import pandas as pd
import sys, argparse, os
import statistics

parser = argparse.ArgumentParser(description="This script is to summary information for pre-alignment QC")

parser.add_argument('-general', '--general_stat', type=str, help='multiqc_general_stats.txt', required=True)
parser.add_argument('-html', '--html', type=str, help='multiqc_report.html', required=True)
parser.add_argument('-fastqscreen', '--fastqscreen', type=str, help='multiqc_fastq_screen.txt', required=True)
parser.add_argument('-json', '--json', type=str, help='multiqc_happy_data.json', required=True)

args = parser.parse_args()

general_file = args.general_stat
html_file = args.html
fastqscreen_file = args.fastqscreen
json_file = args.json

##### Table
## general stat: 1. Total sequences; 2. %Dup
dat = pd.read_table(general_file)

fastqc = dat.loc[:, dat.columns.str.startswith('FastQC')]
fastqc.insert(loc=0, column='Sample', value=dat['Sample'])
fastqc_stat = fastqc.dropna()
part1 = fastqc_stat.loc[:,['Sample', 'FastQC_mqc-generalstats-fastqc-percent_duplicates','FastQC_mqc-generalstats-fastqc-total_sequences']]

## report html: 1. G/C ratio; 2. A/T ratio
## cat multiqc_report.html | grep 'fastqc_seq_content_data = ' | sed s'/fastqc_seq_content_data\ =\ //g' | sed 's/^[ \t]*//g' | sed s'/;//g' > fastqc_sequence_content.json
with open(html_file) as file:
origDict = json.load(file)
newdict = {(k1, k2):v2 for k1,v1 in origDict.items() \
for k2,v2 in origDict[k1].items()}
df = pd.DataFrame([newdict[i] for i in sorted(newdict)],
index=pd.MultiIndex.from_tuples([i for i in sorted(newdict.keys())]))
gc = []
at = []
for i in part1['Sample']:
sub_df = df.loc[i,:]
gc.append(statistics.mean(sub_df['g']/sub_df['c']))
at.append(statistics.mean(sub_df['a']/sub_df['t']))

## fastq_screen
dat = pd.read_table(fastqscreen_file)
fastqscreen = dat.loc[:, dat.columns.str.endswith('percentage')]
del fastqscreen['ERCC percentage']
del fastqscreen['Phix percentage']

### merge all information
part1.insert(loc=3, column='G/C ratio', value=gc)
part1.insert(loc=4, column='A/T ratio', value=at)
part1.reset_index(drop=True, inplace=True)
fastqscreen.reset_index(drop=True, inplace=True)
df = pd.concat([part1, fastqscreen], axis=1)
df = df.append(df.mean(axis=0),ignore_index=True)
df = df.fillna('Batch average value')
df.columns = ['Sample','Total sequences (million)','% Dup','G/C ratio','A/T ratio','% Human','% EColi','% Adapter' , '% Vector','% rRNA' , '% Virus','% Yeast' ,'% Mitoch' ,'% No hits']
df.to_csv('per-alignment_table_summary.txt',sep='\t',index=False)

##### Picture
## mean quality scores
with open(json_file) as file:
all_dat = json.load(file)
mean_quality_json = all_dat['report_plot_data']['fastqc_per_base_sequence_quality_plot']['datasets'][0]
dat =pd.DataFrame.from_records(mean_quality_json)
mean_quality = pd.DataFrame(index=pd.DataFrame(dat.loc[0,'data'])[0])
for i in range(dat.shape[0]):
one_sample = pd.DataFrame(dat.loc[i,'data'])
one_sample.index = one_sample[0]
mean_quality[dat.loc[i,'name']] = one_sample[1]
mean_quality = mean_quality.transpose()
mean_quality['Sample'] = mean_quality.index
mean_quality.to_csv('pre-alignment_mean_quality.txt',sep='\t',index=False)

## per sequence GC content

gc_content_json = all_dat['report_plot_data']['fastqc_per_sequence_gc_content_plot']['datasets'][0]
dat =pd.DataFrame.from_records(gc_content_json)
gc_content = pd.DataFrame(index=pd.DataFrame(dat.loc[0,'data'])[0])
for i in range(dat.shape[0]):
one_sample = pd.DataFrame(dat.loc[i,'data'])
one_sample.index = one_sample[0]
gc_content[dat.loc[i,'name']] = one_sample[1]
gc_content = gc_content.transpose()
gc_content['Sample'] = gc_content.index
gc_content.to_csv('pre-alignment_gc_content.txt',sep='\t',index=False)

# fastqc and qualimap
dat = pd.read_table(fastqc_qualimap_file)

fastqc = dat.loc[:, dat.columns.str.startswith('FastQC')]
fastqc.insert(loc=0, column='Sample', value=dat['Sample'])
fastqc_stat = fastqc.dropna()

# qulimap
qualimap = dat.loc[:, dat.columns.str.startswith('QualiMap')]
qualimap.insert(loc=0, column='Sample', value=dat['Sample'])
qualimap_stat = qualimap.dropna()

# fastqc
dat = pd.read_table(fastqc_file)

fastqc_module = dat.loc[:, "per_base_sequence_quality":"kmer_content"]
fastqc_module.insert(loc=0, column='Sample', value=dat['Sample'])
fastqc_all = pd.merge(fastqc_stat,fastqc_module, how='outer', left_on=['Sample'], right_on = ['Sample'])

# fastqscreen
dat = pd.read_table(fastqscreen_file)
fastqscreen = dat.loc[:, dat.columns.str.endswith('percentage')]
dat['Sample'] = [i.replace('_screen','') for i in dat['Sample']]
fastqscreen.insert(loc=0, column='Sample', value=dat['Sample'])

# benchmark
with open(hap_file) as hap_json:
happy = json.load(hap_json)
dat =pd.DataFrame.from_records(happy)
dat = dat.loc[:, dat.columns.str.endswith('ALL')]
dat_transposed = dat.T
benchmark = dat_transposed.loc[:,['sample_id','METRIC.Precision','METRIC.Recall']]
benchmark['sample_id'] = benchmark.index
benchmark.columns = ['Sample','Precision','Recall']

#output
fastqc_all.to_csv('fastqc.final.result.txt',sep="\t",index=0)
fastqscreen.to_csv('fastqscreen.final.result.txt',sep="\t",index=0)
qualimap_stat.to_csv('qualimap.final.result.txt',sep="\t",index=0)
benchmark.to_csv('benchmark.final.result.txt',sep="\t",index=0)





+ 3
- 0
tasks/Metrics.wdl Zobrazit soubor

task Metrics { task Metrics {



File ref_dir File ref_dir
String SENTIEON_INSTALL_DIR String SENTIEON_INSTALL_DIR
String sample String sample
File sorted_bam File sorted_bam
File sorted_bam_index File sorted_bam_index
String disk_size String disk_size



command <<< command <<<
set -o pipefail set -o pipefail

+ 4
- 4
tasks/benchmark.wdl Zobrazit soubor

/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.rtg.vcf.gz /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.rtg.vcf.gz


if [[ ${sample} =~ "LCL5" ]];then if [[ ${sample} =~ "LCL5" ]];then
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL5.afterfilterdiffbed.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL5.high.confidence.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL5.afterfilterdiffbed.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL5.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
elif [[ ${sample} =~ "LCL6" ]]; then elif [[ ${sample} =~ "LCL6" ]]; then
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL6.afterfilterdiffbed.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL6.high.confidence.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL6.afterfilterdiffbed.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL6.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
elif [[ ${sample} =~ "LCL7" ]]; then elif [[ ${sample} =~ "LCL7" ]]; then
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL7.afterfilterdiffbed.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL7.high.confidence.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL7.afterfilterdiffbed.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL7.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
elif [[ ${sample} =~ "LCL8" ]]; then elif [[ ${sample} =~ "LCL8" ]]; then
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL8.afterfilterdiffbed.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL8.high.confidence.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL8.afterfilterdiffbed.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL8.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
else else
echo "only for quartet samples" echo "only for quartet samples"
fi fi

+ 9
- 1
tasks/deduped_Metrics.wdl Zobrazit soubor

set -e set -e
export SENTIEON_LICENSE=192.168.0.55:8990 export SENTIEON_LICENSE=192.168.0.55:8990
nt=$(nproc) nt=$(nproc)
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${Dedup_bam} --algo CoverageMetrics --omit_base_output ${sample}_deduped_coverage_metrics --algo MeanQualityByCycle ${sample}_deduped_mq_metrics.txt --algo QualDistribution ${sample}_deduped_qd_metrics.txt --algo GCBias --summary ${sample}_deduped_gc_summary.txt ${sample}_deduped_gc_metrics.txt --algo AlignmentStat ${sample}_deduped_aln_metrics.txt --algo InsertSizeMetricAlgo ${sample}_deduped_is_metrics.txt
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${Dedup_bam} --algo CoverageMetrics --omit_base_output ${sample}_deduped_coverage_metrics --algo MeanQualityByCycle ${sample}_deduped_mq_metrics.txt --algo QualDistribution ${sample}_deduped_qd_metrics.txt --algo GCBias --summary ${sample}_deduped_gc_summary.txt ${sample}_deduped_gc_metrics.txt --algo AlignmentStat ${sample}_deduped_aln_metrics.txt --algo InsertSizeMetricAlgo ${sample}_deduped_is_metrics.txt --algo QualityYield ${sample}_deduped_QualityYield.txt --algo WgsMetricsAlgo ${sample}_deduped_WgsMetricsAlgo.txt
>>> >>>


runtime { runtime {
File deduped_coverage_metrics_sample_interval_statistics = "${sample}_deduped_coverage_metrics.sample_interval_statistics" File deduped_coverage_metrics_sample_interval_statistics = "${sample}_deduped_coverage_metrics.sample_interval_statistics"
File deduped_coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_proportions" File deduped_coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_proportions"
File deduped_coverage_metrics_sample_cumulative_coverage_counts = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_counts" File deduped_coverage_metrics_sample_cumulative_coverage_counts = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_counts"
File deduped_mean_quality = "${sample}_deduped_mq_metrics.txt"
File deduped_qd_metrics = "${sample}_deduped_qd_metrics.txt"
File deduped_gc_summary = "${sample}_deduped_gc_summary.txt"
File deduped_gc_metrics = "${sample}_deduped_gc_metrics.txt"
File dedeuped_aln_metrics = "${sample}_deduped_aln_metrics.txt"
File deduped_is_metrics = "${sample}_deduped_is_metrics.txt"
File deduped_QualityYield = "${sample}_deduped_QualityYield.txt"
File deduped_wgsmetrics = "${sample}_deduped_WgsMetricsAlgo.txt"
} }
} }

tasks/extract_multiqc.wdl → tasks/extract_tables.wdl Zobrazit soubor

task extract_multiqc {
task extract_tables {


File fastqc_qualimap
File fastqc File fastqc
File fastqscreen File fastqscreen
File hap File hap

+ 13
- 10
tasks/mergeSentieon.wdl Zobrazit soubor

Array[File] aln_metrics_header Array[File] aln_metrics_header
Array[File] aln_metrics_data Array[File] aln_metrics_data


Array[File] dedup_metrics_header
Array[File] dedup_metrics_data

Array[File] is_metrics_header Array[File] is_metrics_header
Array[File] is_metrics_data Array[File] is_metrics_data


Array[File] deduped_coverage_header
Array[File] deduped_coverage_data
Array[File] quality_yield_header
Array[File] quality_yield_data

Array[File] wgs_metrics_header
Array[File] wgs_metrics_data


String docker String docker
String cluster_config String cluster_config
command <<< command <<<
set -o pipefail set -o pipefail
set -e set -e
cat ${sep=" " aln_metrics_header} | sed -n '1,1p' | cat - ${sep=" " aln_metrics_data} > aln_metrics.txt
cat ${sep=" " dedup_metrics_header} | sed -n '1,1p' | cat - ${sep=" " dedup_metrics_data} > dedup_metrics.txt
echo '''Sample''' > sample_column
cat ${sep=" " aln_metrics_header} | sed -n '1,1p' | cat - ${sep=" " aln_metrics_data} > aln_metrics
ls ${sep=" " aln_metrics_data} | cut -d '.' -f1 | cat sample_column - | paste - aln_metrics > aln_metrics.txt
cat ${sep=" " is_metrics_header} | sed -n '1,1p' | cat - ${sep=" " is_metrics_data} > is_metrics.txt cat ${sep=" " is_metrics_header} | sed -n '1,1p' | cat - ${sep=" " is_metrics_data} > is_metrics.txt
cat ${sep=" " deduped_coverage_header} | sed -n '1,1p' | cat - ${sep=" " deduped_coverage_data} > deduped_coverage.txt
cat ${sep=" " quality_yield_header} | sed -n '1,1p' | cat - ${sep=" " quality_yield_data} > quality_yield_data.txt
cat ${sep=" " wgs_metrics_header} | sed -n '1,1p' | cat - ${sep=" " wgs_metrics_data} > wgs_metrics_data.txt
>>> >>>


runtime { runtime {


output { output {
File aln_metrics_merge = "aln_metrics.txt" File aln_metrics_merge = "aln_metrics.txt"
File dedup_metrics_merge = "dedup_metrics.txt"
File is_metrics_merge = "is_metrics.txt" File is_metrics_merge = "is_metrics.txt"
File deduped_coverage_merge = "deduped_coverage.txt"
File quality_yield_merge = "quality_yield_data.txt"
File wgs_metrics_merge = "wgs_metrics_data.txt"
} }
} }

+ 1
- 3
tasks/multiqc.wdl Zobrazit soubor

multiqc /cromwell_root/tmp/ multiqc /cromwell_root/tmp/


cat multiqc_data/multiqc_general_stats.txt > multiqc_general_stats.txt cat multiqc_data/multiqc_general_stats.txt > multiqc_general_stats.txt
cat multiqc_data/multiqc_fastqc.txt > multiqc_fastqc.txt
cat multiqc_data/multiqc_fastq_screen.txt > multiqc_fastq_screen.txt cat multiqc_data/multiqc_fastq_screen.txt > multiqc_fastq_screen.txt
cat multiqc_data/multiqc_happy_data.json > multiqc_happy_data.json cat multiqc_data/multiqc_happy_data.json > multiqc_happy_data.json
output { output {
File multiqc_html = "multiqc_report.html" File multiqc_html = "multiqc_report.html"
Array[File] multiqc_txt = glob("multiqc_data/*") Array[File] multiqc_txt = glob("multiqc_data/*")
File fastqc_qualimap = "multiqc_general_stats.txt"
File fastqc = "multiqc_fastqc.txt"
File fastqc = "multiqc_general_stats.txt"
File fastqscreen = "multiqc_fastq_screen.txt" File fastqscreen = "multiqc_fastq_screen.txt"
File hap = "multiqc_happy_data.json" File hap = "multiqc_happy_data.json"
} }

+ 2
- 3
tasks/sentieon.wdl Zobrazit soubor

task sentieon { task sentieon {
File aln_metrics File aln_metrics
File dedup_metrics
File is_metrics File is_metrics
File deduped_coverage
String sample_name
File wgsmetrics
File quality_yield
String docker String docker
String cluster_config String cluster_config
String disk_size String disk_size

+ 6
- 28
workflow.wdl Zobrazit soubor

import "./tasks/mapping.wdl" as mapping import "./tasks/mapping.wdl" as mapping
import "./tasks/Metrics.wdl" as Metrics
import "./tasks/Dedup.wdl" as Dedup import "./tasks/Dedup.wdl" as Dedup
import "./tasks/deduped_Metrics.wdl" as deduped_Metrics import "./tasks/deduped_Metrics.wdl" as deduped_Metrics
import "./tasks/Realigner.wdl" as Realigner import "./tasks/Realigner.wdl" as Realigner
import "./tasks/quartet_mendelian.wdl" as quartet_mendelian import "./tasks/quartet_mendelian.wdl" as quartet_mendelian
import "./tasks/fastqc.wdl" as fastqc import "./tasks/fastqc.wdl" as fastqc
import "./tasks/fastqscreen.wdl" as fastqscreen import "./tasks/fastqscreen.wdl" as fastqscreen
import "./tasks/qualimap.wdl" as qualimap
import "./tasks/extract_multiqc.wdl" as extract_multiqc
import "./tasks/extract_tables.wdl" as extract_tables
import "./tasks/D5_D6.wdl" as D5_D6 import "./tasks/D5_D6.wdl" as D5_D6
import "./tasks/merge_family.wdl" as merge_family import "./tasks/merge_family.wdl" as merge_family


disk_size=disk_size disk_size=disk_size
} }


call Metrics.Metrics as Metrics {
input:
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR,
fasta=fasta,
ref_dir=ref_dir,
sorted_bam=mapping.sorted_bam,
sorted_bam_index=mapping.sorted_bam_index,
sample=quartet[2],
docker=SENTIEONdocker,
disk_size=disk_size,
cluster_config=BIGcluster_config
}

call Dedup.Dedup as Dedup { call Dedup.Dedup as Dedup {
input: input:
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR, SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR,
cluster_config=BIGcluster_config cluster_config=BIGcluster_config
} }


call qualimap.qualimap as qualimap {
input:
bam=Dedup.Dedup_bam,
bai=Dedup.Dedup_bam_index,
docker=QUALIMAPdocker,
cluster_config=BIGcluster_config,
disk_size=disk_size
}

call deduped_Metrics.deduped_Metrics as deduped_Metrics { call deduped_Metrics.deduped_Metrics as deduped_Metrics {
input: input:
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR, SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR,
disk_size=disk_size, disk_size=disk_size,
cluster_config=BIGcluster_config cluster_config=BIGcluster_config
} }

call Haplotyper_gVCF.Haplotyper_gVCF as Haplotyper_gVCF { call Haplotyper_gVCF.Haplotyper_gVCF as Haplotyper_gVCF {
input: input:
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR, SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR,
read2_zip=fastqc.read2_zip, read2_zip=fastqc.read2_zip,
txt1=fastqscreen.txt1, txt1=fastqscreen.txt1,
txt2=fastqscreen.txt2, txt2=fastqscreen.txt2,
zip=qualimap.zip,
summary=benchmark.summary, summary=benchmark.summary,
docker=MULTIQCdocker, docker=MULTIQCdocker,
cluster_config=SMALLcluster_config, cluster_config=SMALLcluster_config,
disk_size=disk_size disk_size=disk_size
} }


call extract_multiqc.extract_multiqc as extract_multiqc {
call extract_tables.extract_tables as extract_tables {
input: input:
fastqc_qualimap=multiqc.fastqc_qualimap,
fastqc=multiqc.fastqc, fastqc=multiqc.fastqc,
fastqscreen=multiqc.fastqscreen, fastqscreen=multiqc.fastqscreen,
hap=multiqc.hap, hap=multiqc.hap,
aln=deduped_Metrics.dedeuped_aln_metrics,
quality_yield=deduped_Metrics.deduped_QualityYield,
wgs_metrics=deduped_Metrics.deduped_wgsmetrics,
docker=DIYdocker, docker=DIYdocker,
cluster_config=SMALLcluster_config, cluster_config=SMALLcluster_config,
disk_size=disk_size disk_size=disk_size

Načítá se…
Zrušit
Uložit