import json | |||||
import pandas as pd | |||||
import sys, argparse, os | |||||
parser = argparse.ArgumentParser(description="This script is to get information from multiqc") | |||||
parser.add_argument('-fastqc_qualimap', '--fastqc_qualimap', type=str, help='multiqc_general_stats.txt', required=True) | |||||
parser.add_argument('-fastqc', '--fastqc', type=str, help='multiqc_fastqc.txt', required=True) | |||||
parser.add_argument('-fastqscreen', '--fastqscreen', type=str, help='multiqc_fastq_screen.txt', required=True) | |||||
parser.add_argument('-hap', '--happy', type=str, help='multiqc_happy_data.json', required=True) | |||||
args = parser.parse_args() | |||||
# Rename input: | |||||
fastqc_qualimap_file = args.fastqc_qualimap | |||||
fastqc_file = args.fastqc | |||||
fastqscreen_file = args.fastqscreen | |||||
hap_file = args.happy | |||||
# fastqc and qualimap | |||||
dat = pd.read_table(fastqc_qualimap_file) | |||||
fastqc = dat.loc[:, dat.columns.str.startswith('FastQC')] | |||||
fastqc.insert(loc=0, column='Sample', value=dat['Sample']) | |||||
fastqc_stat = fastqc.dropna() | |||||
# qulimap | |||||
qualimap = dat.loc[:, dat.columns.str.startswith('QualiMap')] | |||||
qualimap.insert(loc=0, column='Sample', value=dat['Sample']) | |||||
qualimap_stat = qualimap.dropna() | |||||
# fastqc | |||||
dat = pd.read_table(fastqc_file) | |||||
fastqc_module = dat.loc[:, "per_base_sequence_quality":"kmer_content"] | |||||
fastqc_module.insert(loc=0, column='Sample', value=dat['Sample']) | |||||
fastqc_all = pd.merge(fastqc_stat,fastqc_module, how='outer', left_on=['Sample'], right_on = ['Sample']) | |||||
# fastqscreen | |||||
dat = pd.read_table(fastqscreen_file) | |||||
fastqscreen = dat.loc[:, dat.columns.str.endswith('percentage')] | |||||
dat['Sample'] = [i.replace('_screen','') for i in dat['Sample']] | |||||
fastqscreen.insert(loc=0, column='Sample', value=dat['Sample']) | |||||
# benchmark | |||||
with open(hap_file) as hap_json: | |||||
happy = json.load(hap_json) | |||||
dat =pd.DataFrame.from_records(happy) | |||||
dat = dat.loc[:, dat.columns.str.endswith('ALL')] | |||||
dat_transposed = dat.T | |||||
benchmark = dat_transposed.loc[:,['sample_id','METRIC.Precision','METRIC.Recall']] | |||||
benchmark['sample_id'] = benchmark.index | |||||
benchmark.columns = ['Sample','Precision','Recall'] | |||||
#output | |||||
fastqc_all.to_csv('fastqc.final.result.txt',sep="\t",index=0) | |||||
fastqscreen.to_csv('fastqscreen.final.result.txt',sep="\t",index=0) | |||||
qualimap_stat.to_csv('qualimap.final.result.txt',sep="\t",index=0) | |||||
benchmark.to_csv('benchmark.final.result.txt',sep="\t",index=0) | |||||
import json | |||||
import pandas as pd | |||||
import sys, argparse, os | |||||
import statistics | |||||
parser = argparse.ArgumentParser(description="This script is to summary information for pre-alignment QC") | |||||
parser.add_argument('-general', '--general_stat', type=str, help='multiqc_general_stats.txt', required=True) | |||||
parser.add_argument('-is', '--is_metrics', type=str, help='_is_metrics.txt', required=True) | |||||
parser.add_argument('-wgsmetrics', '--WgsMetricsAlgo', type=str, help='deduped_WgsMetricsAlgo', required=True) | |||||
parser.add_argument('-qualityyield', '--QualityYield', type=str, help='deduped_QualityYield', required=True) | |||||
parser.add_argument('-aln', '--aln_metrics', type=str, help='aln_metrics.txt', required=True) | |||||
args = parser.parse_args() | |||||
general_file = args.general_stat | |||||
is_file = args.is_metrics | |||||
wgsmetrics_file = args.wgsmetrics | |||||
qualityyield_file = args.qualityyield | |||||
aln_file = args.aln_metrics | |||||
##### Table | |||||
## general stat: % GC | |||||
dat = pd.read_table(general_file) | |||||
qualimap = dat.loc[:, dat.columns.str.startswith('QualiMap')] | |||||
qualimap.insert(loc=0, column='Sample', value=dat['Sample']) | |||||
qualimap_stat = qualimap.dropna() | |||||
part1 = fastqc_stat.loc[:,['Sample', 'FastQC_mqc-generalstats-fastqc-percent_duplicates','FastQC_mqc-generalstats-fastqc-total_sequences']] | |||||
## is_metrics: median insert size | |||||
## deduped_WgsMetricsAlgo: 1x, 5x, 10x, 30x, median coverage | |||||
with open(html_file) as file: | |||||
origDict = json.load(file) | |||||
newdict = {(k1, k2):v2 for k1,v1 in origDict.items() \ | |||||
for k2,v2 in origDict[k1].items()} | |||||
df = pd.DataFrame([newdict[i] for i in sorted(newdict)], | |||||
index=pd.MultiIndex.from_tuples([i for i in sorted(newdict.keys())])) | |||||
gc = [] | |||||
at = [] | |||||
for i in part1['Sample']: | |||||
sub_df = df.loc[i,:] | |||||
gc.append(statistics.mean(sub_df['g']/sub_df['c'])) | |||||
at.append(statistics.mean(sub_df['a']/sub_df['t'])) | |||||
## fastq_screen | |||||
dat = pd.read_table(fastqscreen_file) | |||||
fastqscreen = dat.loc[:, dat.columns.str.endswith('percentage')] | |||||
del fastqscreen['ERCC percentage'] | |||||
del fastqscreen['Phix percentage'] | |||||
### merge all information | |||||
part1.insert(loc=3, column='G/C ratio', value=gc) | |||||
part1.insert(loc=4, column='A/T ratio', value=at) | |||||
part1.reset_index(drop=True, inplace=True) | |||||
fastqscreen.reset_index(drop=True, inplace=True) | |||||
df = pd.concat([part1, fastqscreen], axis=1) | |||||
df = df.append(df.mean(axis=0),ignore_index=True) | |||||
df = df.fillna('Batch average value') | |||||
df.columns = ['Sample','Total sequences (million)','% Dup','G/C ratio','A/T ratio','% Human','% EColi','% Adapter' , '% Vector','% rRNA' , '% Virus','% Yeast' ,'% Mitoch' ,'% No hits'] | |||||
df.to_csv('per-alignment_table_summary.txt',sep='\t',index=False) | |||||
##### Picture | |||||
## cumulative genome coverage | |||||
with open(json_file) as file: | |||||
all_dat = json.load(file) | |||||
genome_coverage_json = all_dat['report_plot_data']['qualimap_genome_fraction']['datasets'][0] | |||||
dat =pd.DataFrame.from_records(genome_coverage_json) | |||||
genome_coverage = pd.DataFrame(index=pd.DataFrame(dat.loc[0,'data'])[0]) | |||||
for i in range(dat.shape[0]): | |||||
one_sample = pd.DataFrame(dat.loc[i,'data']) | |||||
one_sample.index = one_sample[0] | |||||
genome_coverage[dat.loc[i,'name']] = one_sample[1] | |||||
genome_coverage = genome_coverage.transpose() | |||||
genome_coverage['Sample'] = genome_coverage.index | |||||
genome_coverage.to_csv('post-alignment_genome_coverage.txt',sep='\t',index=False) | |||||
## insert size histogram | |||||
insert_size_json = all_dat['report_plot_data']['qualimap_insert_size']['datasets'][0] | |||||
dat =pd.DataFrame.from_records(insert_size_json) | |||||
insert_size = pd.DataFrame(index=pd.DataFrame(dat.loc[0,'data'])[0]) | |||||
for i in range(dat.shape[0]): | |||||
one_sample = pd.DataFrame(dat.loc[i,'data']) | |||||
one_sample.index = one_sample[0] | |||||
insert_size[dat.loc[i,'name']] = one_sample[1] | |||||
insert_size = insert_size.transpose() | |||||
insert_size['Sample'] = insert_size.index | |||||
insert_size.to_csv('post-alignment_insert_size.txt',sep='\t',index=False) | |||||
## GC content distribution | |||||
gc_content_json = all_dat['report_plot_data']['qualimap_gc_content']['datasets'][0] | |||||
dat =pd.DataFrame.from_records(gc_content_json) | |||||
gc_content = pd.DataFrame(index=pd.DataFrame(dat.loc[0,'data'])[0]) | |||||
for i in range(dat.shape[0]): | |||||
one_sample = pd.DataFrame(dat.loc[i,'data']) | |||||
one_sample.index = one_sample[0] | |||||
gc_content[dat.loc[i,'name']] = one_sample[1] | |||||
gc_content = gc_content.transpose() | |||||
gc_content['Sample'] = gc_content.index | |||||
gc_content.to_csv('post-alignment_gc_content.txt',sep='\t',index=False) | |||||
import json | |||||
import pandas as pd | |||||
import sys, argparse, os | |||||
import statistics | |||||
parser = argparse.ArgumentParser(description="This script is to summary information for pre-alignment QC") | |||||
parser.add_argument('-general', '--general_stat', type=str, help='multiqc_general_stats.txt', required=True) | |||||
parser.add_argument('-html', '--html', type=str, help='multiqc_report.html', required=True) | |||||
parser.add_argument('-fastqscreen', '--fastqscreen', type=str, help='multiqc_fastq_screen.txt', required=True) | |||||
parser.add_argument('-json', '--json', type=str, help='multiqc_happy_data.json', required=True) | |||||
args = parser.parse_args() | |||||
general_file = args.general_stat | |||||
html_file = args.html | |||||
fastqscreen_file = args.fastqscreen | |||||
json_file = args.json | |||||
##### Table | |||||
## general stat: 1. Total sequences; 2. %Dup | |||||
dat = pd.read_table(general_file) | |||||
fastqc = dat.loc[:, dat.columns.str.startswith('FastQC')] | |||||
fastqc.insert(loc=0, column='Sample', value=dat['Sample']) | |||||
fastqc_stat = fastqc.dropna() | |||||
part1 = fastqc_stat.loc[:,['Sample', 'FastQC_mqc-generalstats-fastqc-percent_duplicates','FastQC_mqc-generalstats-fastqc-total_sequences']] | |||||
## report html: 1. G/C ratio; 2. A/T ratio | |||||
## cat multiqc_report.html | grep 'fastqc_seq_content_data = ' | sed s'/fastqc_seq_content_data\ =\ //g' | sed 's/^[ \t]*//g' | sed s'/;//g' > fastqc_sequence_content.json | |||||
with open(html_file) as file: | |||||
origDict = json.load(file) | |||||
newdict = {(k1, k2):v2 for k1,v1 in origDict.items() \ | |||||
for k2,v2 in origDict[k1].items()} | |||||
df = pd.DataFrame([newdict[i] for i in sorted(newdict)], | |||||
index=pd.MultiIndex.from_tuples([i for i in sorted(newdict.keys())])) | |||||
gc = [] | |||||
at = [] | |||||
for i in part1['Sample']: | |||||
sub_df = df.loc[i,:] | |||||
gc.append(statistics.mean(sub_df['g']/sub_df['c'])) | |||||
at.append(statistics.mean(sub_df['a']/sub_df['t'])) | |||||
## fastq_screen | |||||
dat = pd.read_table(fastqscreen_file) | |||||
fastqscreen = dat.loc[:, dat.columns.str.endswith('percentage')] | |||||
del fastqscreen['ERCC percentage'] | |||||
del fastqscreen['Phix percentage'] | |||||
### merge all information | |||||
part1.insert(loc=3, column='G/C ratio', value=gc) | |||||
part1.insert(loc=4, column='A/T ratio', value=at) | |||||
part1.reset_index(drop=True, inplace=True) | |||||
fastqscreen.reset_index(drop=True, inplace=True) | |||||
df = pd.concat([part1, fastqscreen], axis=1) | |||||
df = df.append(df.mean(axis=0),ignore_index=True) | |||||
df = df.fillna('Batch average value') | |||||
df.columns = ['Sample','Total sequences (million)','% Dup','G/C ratio','A/T ratio','% Human','% EColi','% Adapter' , '% Vector','% rRNA' , '% Virus','% Yeast' ,'% Mitoch' ,'% No hits'] | |||||
df.to_csv('per-alignment_table_summary.txt',sep='\t',index=False) | |||||
##### Picture | |||||
## mean quality scores | |||||
with open(json_file) as file: | |||||
all_dat = json.load(file) | |||||
mean_quality_json = all_dat['report_plot_data']['fastqc_per_base_sequence_quality_plot']['datasets'][0] | |||||
dat =pd.DataFrame.from_records(mean_quality_json) | |||||
mean_quality = pd.DataFrame(index=pd.DataFrame(dat.loc[0,'data'])[0]) | |||||
for i in range(dat.shape[0]): | |||||
one_sample = pd.DataFrame(dat.loc[i,'data']) | |||||
one_sample.index = one_sample[0] | |||||
mean_quality[dat.loc[i,'name']] = one_sample[1] | |||||
mean_quality = mean_quality.transpose() | |||||
mean_quality['Sample'] = mean_quality.index | |||||
mean_quality.to_csv('pre-alignment_mean_quality.txt',sep='\t',index=False) | |||||
## per sequence GC content | |||||
gc_content_json = all_dat['report_plot_data']['fastqc_per_sequence_gc_content_plot']['datasets'][0] | |||||
dat =pd.DataFrame.from_records(gc_content_json) | |||||
gc_content = pd.DataFrame(index=pd.DataFrame(dat.loc[0,'data'])[0]) | |||||
for i in range(dat.shape[0]): | |||||
one_sample = pd.DataFrame(dat.loc[i,'data']) | |||||
one_sample.index = one_sample[0] | |||||
gc_content[dat.loc[i,'name']] = one_sample[1] | |||||
gc_content = gc_content.transpose() | |||||
gc_content['Sample'] = gc_content.index | |||||
gc_content.to_csv('pre-alignment_gc_content.txt',sep='\t',index=False) | |||||
# fastqc and qualimap | |||||
dat = pd.read_table(fastqc_qualimap_file) | |||||
fastqc = dat.loc[:, dat.columns.str.startswith('FastQC')] | |||||
fastqc.insert(loc=0, column='Sample', value=dat['Sample']) | |||||
fastqc_stat = fastqc.dropna() | |||||
# qulimap | |||||
qualimap = dat.loc[:, dat.columns.str.startswith('QualiMap')] | |||||
qualimap.insert(loc=0, column='Sample', value=dat['Sample']) | |||||
qualimap_stat = qualimap.dropna() | |||||
# fastqc | |||||
dat = pd.read_table(fastqc_file) | |||||
fastqc_module = dat.loc[:, "per_base_sequence_quality":"kmer_content"] | |||||
fastqc_module.insert(loc=0, column='Sample', value=dat['Sample']) | |||||
fastqc_all = pd.merge(fastqc_stat,fastqc_module, how='outer', left_on=['Sample'], right_on = ['Sample']) | |||||
# fastqscreen | |||||
dat = pd.read_table(fastqscreen_file) | |||||
fastqscreen = dat.loc[:, dat.columns.str.endswith('percentage')] | |||||
dat['Sample'] = [i.replace('_screen','') for i in dat['Sample']] | |||||
fastqscreen.insert(loc=0, column='Sample', value=dat['Sample']) | |||||
# benchmark | |||||
with open(hap_file) as hap_json: | |||||
happy = json.load(hap_json) | |||||
dat =pd.DataFrame.from_records(happy) | |||||
dat = dat.loc[:, dat.columns.str.endswith('ALL')] | |||||
dat_transposed = dat.T | |||||
benchmark = dat_transposed.loc[:,['sample_id','METRIC.Precision','METRIC.Recall']] | |||||
benchmark['sample_id'] = benchmark.index | |||||
benchmark.columns = ['Sample','Precision','Recall'] | |||||
#output | |||||
fastqc_all.to_csv('fastqc.final.result.txt',sep="\t",index=0) | |||||
fastqscreen.to_csv('fastqscreen.final.result.txt',sep="\t",index=0) | |||||
qualimap_stat.to_csv('qualimap.final.result.txt',sep="\t",index=0) | |||||
benchmark.to_csv('benchmark.final.result.txt',sep="\t",index=0) | |||||
task Metrics { | task Metrics { | ||||
File ref_dir | File ref_dir | ||||
String SENTIEON_INSTALL_DIR | String SENTIEON_INSTALL_DIR | ||||
String sample | String sample | ||||
File sorted_bam | File sorted_bam | ||||
File sorted_bam_index | File sorted_bam_index | ||||
String disk_size | String disk_size | ||||
command <<< | command <<< | ||||
set -o pipefail | set -o pipefail |
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.rtg.vcf.gz | /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.rtg.vcf.gz | ||||
if [[ ${sample} =~ "LCL5" ]];then | if [[ ${sample} =~ "LCL5" ]];then | ||||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL5.afterfilterdiffbed.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL5.high.confidence.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL5.afterfilterdiffbed.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL5.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||||
elif [[ ${sample} =~ "LCL6" ]]; then | elif [[ ${sample} =~ "LCL6" ]]; then | ||||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL6.afterfilterdiffbed.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL6.high.confidence.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL6.afterfilterdiffbed.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL6.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||||
elif [[ ${sample} =~ "LCL7" ]]; then | elif [[ ${sample} =~ "LCL7" ]]; then | ||||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL7.afterfilterdiffbed.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL7.high.confidence.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL7.afterfilterdiffbed.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL7.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||||
elif [[ ${sample} =~ "LCL8" ]]; then | elif [[ ${sample} =~ "LCL8" ]]; then | ||||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL8.afterfilterdiffbed.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL8.high.confidence.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||||
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL8.afterfilterdiffbed.vcf.gz ${sample}.rtg.vcf.gz -f ${benchmarking_dir}/LCL8.high.confidence.bed.gz --threads $nt -o ${sample} -r ${ref_dir}/${fasta} | |||||
else | else | ||||
echo "only for quartet samples" | echo "only for quartet samples" | ||||
fi | fi |
set -e | set -e | ||||
export SENTIEON_LICENSE=192.168.0.55:8990 | export SENTIEON_LICENSE=192.168.0.55:8990 | ||||
nt=$(nproc) | nt=$(nproc) | ||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${Dedup_bam} --algo CoverageMetrics --omit_base_output ${sample}_deduped_coverage_metrics --algo MeanQualityByCycle ${sample}_deduped_mq_metrics.txt --algo QualDistribution ${sample}_deduped_qd_metrics.txt --algo GCBias --summary ${sample}_deduped_gc_summary.txt ${sample}_deduped_gc_metrics.txt --algo AlignmentStat ${sample}_deduped_aln_metrics.txt --algo InsertSizeMetricAlgo ${sample}_deduped_is_metrics.txt | |||||
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} -t $nt -i ${Dedup_bam} --algo CoverageMetrics --omit_base_output ${sample}_deduped_coverage_metrics --algo MeanQualityByCycle ${sample}_deduped_mq_metrics.txt --algo QualDistribution ${sample}_deduped_qd_metrics.txt --algo GCBias --summary ${sample}_deduped_gc_summary.txt ${sample}_deduped_gc_metrics.txt --algo AlignmentStat ${sample}_deduped_aln_metrics.txt --algo InsertSizeMetricAlgo ${sample}_deduped_is_metrics.txt --algo QualityYield ${sample}_deduped_QualityYield.txt --algo WgsMetricsAlgo ${sample}_deduped_WgsMetricsAlgo.txt | |||||
>>> | >>> | ||||
runtime { | runtime { | ||||
File deduped_coverage_metrics_sample_interval_statistics = "${sample}_deduped_coverage_metrics.sample_interval_statistics" | File deduped_coverage_metrics_sample_interval_statistics = "${sample}_deduped_coverage_metrics.sample_interval_statistics" | ||||
File deduped_coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_proportions" | File deduped_coverage_metrics_sample_cumulative_coverage_proportions = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_proportions" | ||||
File deduped_coverage_metrics_sample_cumulative_coverage_counts = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_counts" | File deduped_coverage_metrics_sample_cumulative_coverage_counts = "${sample}_deduped_coverage_metrics.sample_cumulative_coverage_counts" | ||||
File deduped_mean_quality = "${sample}_deduped_mq_metrics.txt" | |||||
File deduped_qd_metrics = "${sample}_deduped_qd_metrics.txt" | |||||
File deduped_gc_summary = "${sample}_deduped_gc_summary.txt" | |||||
File deduped_gc_metrics = "${sample}_deduped_gc_metrics.txt" | |||||
File dedeuped_aln_metrics = "${sample}_deduped_aln_metrics.txt" | |||||
File deduped_is_metrics = "${sample}_deduped_is_metrics.txt" | |||||
File deduped_QualityYield = "${sample}_deduped_QualityYield.txt" | |||||
File deduped_wgsmetrics = "${sample}_deduped_WgsMetricsAlgo.txt" | |||||
} | } | ||||
} | } |
task extract_multiqc { | |||||
task extract_tables { | |||||
File fastqc_qualimap | |||||
File fastqc | File fastqc | ||||
File fastqscreen | File fastqscreen | ||||
File hap | File hap |
Array[File] aln_metrics_header | Array[File] aln_metrics_header | ||||
Array[File] aln_metrics_data | Array[File] aln_metrics_data | ||||
Array[File] dedup_metrics_header | |||||
Array[File] dedup_metrics_data | |||||
Array[File] is_metrics_header | Array[File] is_metrics_header | ||||
Array[File] is_metrics_data | Array[File] is_metrics_data | ||||
Array[File] deduped_coverage_header | |||||
Array[File] deduped_coverage_data | |||||
Array[File] quality_yield_header | |||||
Array[File] quality_yield_data | |||||
Array[File] wgs_metrics_header | |||||
Array[File] wgs_metrics_data | |||||
String docker | String docker | ||||
String cluster_config | String cluster_config | ||||
command <<< | command <<< | ||||
set -o pipefail | set -o pipefail | ||||
set -e | set -e | ||||
cat ${sep=" " aln_metrics_header} | sed -n '1,1p' | cat - ${sep=" " aln_metrics_data} > aln_metrics.txt | |||||
cat ${sep=" " dedup_metrics_header} | sed -n '1,1p' | cat - ${sep=" " dedup_metrics_data} > dedup_metrics.txt | |||||
echo '''Sample''' > sample_column | |||||
cat ${sep=" " aln_metrics_header} | sed -n '1,1p' | cat - ${sep=" " aln_metrics_data} > aln_metrics | |||||
ls ${sep=" " aln_metrics_data} | cut -d '.' -f1 | cat sample_column - | paste - aln_metrics > aln_metrics.txt | |||||
cat ${sep=" " is_metrics_header} | sed -n '1,1p' | cat - ${sep=" " is_metrics_data} > is_metrics.txt | cat ${sep=" " is_metrics_header} | sed -n '1,1p' | cat - ${sep=" " is_metrics_data} > is_metrics.txt | ||||
cat ${sep=" " deduped_coverage_header} | sed -n '1,1p' | cat - ${sep=" " deduped_coverage_data} > deduped_coverage.txt | |||||
cat ${sep=" " quality_yield_header} | sed -n '1,1p' | cat - ${sep=" " quality_yield_data} > quality_yield_data.txt | |||||
cat ${sep=" " wgs_metrics_header} | sed -n '1,1p' | cat - ${sep=" " wgs_metrics_data} > wgs_metrics_data.txt | |||||
>>> | >>> | ||||
runtime { | runtime { | ||||
output { | output { | ||||
File aln_metrics_merge = "aln_metrics.txt" | File aln_metrics_merge = "aln_metrics.txt" | ||||
File dedup_metrics_merge = "dedup_metrics.txt" | |||||
File is_metrics_merge = "is_metrics.txt" | File is_metrics_merge = "is_metrics.txt" | ||||
File deduped_coverage_merge = "deduped_coverage.txt" | |||||
File quality_yield_merge = "quality_yield_data.txt" | |||||
File wgs_metrics_merge = "wgs_metrics_data.txt" | |||||
} | } | ||||
} | } |
multiqc /cromwell_root/tmp/ | multiqc /cromwell_root/tmp/ | ||||
cat multiqc_data/multiqc_general_stats.txt > multiqc_general_stats.txt | cat multiqc_data/multiqc_general_stats.txt > multiqc_general_stats.txt | ||||
cat multiqc_data/multiqc_fastqc.txt > multiqc_fastqc.txt | |||||
cat multiqc_data/multiqc_fastq_screen.txt > multiqc_fastq_screen.txt | cat multiqc_data/multiqc_fastq_screen.txt > multiqc_fastq_screen.txt | ||||
cat multiqc_data/multiqc_happy_data.json > multiqc_happy_data.json | cat multiqc_data/multiqc_happy_data.json > multiqc_happy_data.json | ||||
output { | output { | ||||
File multiqc_html = "multiqc_report.html" | File multiqc_html = "multiqc_report.html" | ||||
Array[File] multiqc_txt = glob("multiqc_data/*") | Array[File] multiqc_txt = glob("multiqc_data/*") | ||||
File fastqc_qualimap = "multiqc_general_stats.txt" | |||||
File fastqc = "multiqc_fastqc.txt" | |||||
File fastqc = "multiqc_general_stats.txt" | |||||
File fastqscreen = "multiqc_fastq_screen.txt" | File fastqscreen = "multiqc_fastq_screen.txt" | ||||
File hap = "multiqc_happy_data.json" | File hap = "multiqc_happy_data.json" | ||||
} | } |
task sentieon { | task sentieon { | ||||
File aln_metrics | File aln_metrics | ||||
File dedup_metrics | |||||
File is_metrics | File is_metrics | ||||
File deduped_coverage | |||||
String sample_name | |||||
File wgsmetrics | |||||
File quality_yield | |||||
String docker | String docker | ||||
String cluster_config | String cluster_config | ||||
String disk_size | String disk_size |
import "./tasks/mapping.wdl" as mapping | import "./tasks/mapping.wdl" as mapping | ||||
import "./tasks/Metrics.wdl" as Metrics | |||||
import "./tasks/Dedup.wdl" as Dedup | import "./tasks/Dedup.wdl" as Dedup | ||||
import "./tasks/deduped_Metrics.wdl" as deduped_Metrics | import "./tasks/deduped_Metrics.wdl" as deduped_Metrics | ||||
import "./tasks/Realigner.wdl" as Realigner | import "./tasks/Realigner.wdl" as Realigner | ||||
import "./tasks/quartet_mendelian.wdl" as quartet_mendelian | import "./tasks/quartet_mendelian.wdl" as quartet_mendelian | ||||
import "./tasks/fastqc.wdl" as fastqc | import "./tasks/fastqc.wdl" as fastqc | ||||
import "./tasks/fastqscreen.wdl" as fastqscreen | import "./tasks/fastqscreen.wdl" as fastqscreen | ||||
import "./tasks/qualimap.wdl" as qualimap | |||||
import "./tasks/extract_multiqc.wdl" as extract_multiqc | |||||
import "./tasks/extract_tables.wdl" as extract_tables | |||||
import "./tasks/D5_D6.wdl" as D5_D6 | import "./tasks/D5_D6.wdl" as D5_D6 | ||||
import "./tasks/merge_family.wdl" as merge_family | import "./tasks/merge_family.wdl" as merge_family | ||||
disk_size=disk_size | disk_size=disk_size | ||||
} | } | ||||
call Metrics.Metrics as Metrics { | |||||
input: | |||||
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR, | |||||
fasta=fasta, | |||||
ref_dir=ref_dir, | |||||
sorted_bam=mapping.sorted_bam, | |||||
sorted_bam_index=mapping.sorted_bam_index, | |||||
sample=quartet[2], | |||||
docker=SENTIEONdocker, | |||||
disk_size=disk_size, | |||||
cluster_config=BIGcluster_config | |||||
} | |||||
call Dedup.Dedup as Dedup { | call Dedup.Dedup as Dedup { | ||||
input: | input: | ||||
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR, | SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR, | ||||
cluster_config=BIGcluster_config | cluster_config=BIGcluster_config | ||||
} | } | ||||
call qualimap.qualimap as qualimap { | |||||
input: | |||||
bam=Dedup.Dedup_bam, | |||||
bai=Dedup.Dedup_bam_index, | |||||
docker=QUALIMAPdocker, | |||||
cluster_config=BIGcluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call deduped_Metrics.deduped_Metrics as deduped_Metrics { | call deduped_Metrics.deduped_Metrics as deduped_Metrics { | ||||
input: | input: | ||||
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR, | SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR, | ||||
disk_size=disk_size, | disk_size=disk_size, | ||||
cluster_config=BIGcluster_config | cluster_config=BIGcluster_config | ||||
} | } | ||||
call Haplotyper_gVCF.Haplotyper_gVCF as Haplotyper_gVCF { | call Haplotyper_gVCF.Haplotyper_gVCF as Haplotyper_gVCF { | ||||
input: | input: | ||||
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR, | SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR, | ||||
read2_zip=fastqc.read2_zip, | read2_zip=fastqc.read2_zip, | ||||
txt1=fastqscreen.txt1, | txt1=fastqscreen.txt1, | ||||
txt2=fastqscreen.txt2, | txt2=fastqscreen.txt2, | ||||
zip=qualimap.zip, | |||||
summary=benchmark.summary, | summary=benchmark.summary, | ||||
docker=MULTIQCdocker, | docker=MULTIQCdocker, | ||||
cluster_config=SMALLcluster_config, | cluster_config=SMALLcluster_config, | ||||
disk_size=disk_size | disk_size=disk_size | ||||
} | } | ||||
call extract_multiqc.extract_multiqc as extract_multiqc { | |||||
call extract_tables.extract_tables as extract_tables { | |||||
input: | input: | ||||
fastqc_qualimap=multiqc.fastqc_qualimap, | |||||
fastqc=multiqc.fastqc, | fastqc=multiqc.fastqc, | ||||
fastqscreen=multiqc.fastqscreen, | fastqscreen=multiqc.fastqscreen, | ||||
hap=multiqc.hap, | hap=multiqc.hap, | ||||
aln=deduped_Metrics.dedeuped_aln_metrics, | |||||
quality_yield=deduped_Metrics.deduped_QualityYield, | |||||
wgs_metrics=deduped_Metrics.deduped_wgsmetrics, | |||||
docker=DIYdocker, | docker=DIYdocker, | ||||
cluster_config=SMALLcluster_config, | cluster_config=SMALLcluster_config, | ||||
disk_size=disk_size | disk_size=disk_size |