@@ -0,0 +1,65 @@ | |||
import json | |||
import pandas as pd | |||
import sys, argparse, os | |||
parser = argparse.ArgumentParser(description="This script is to get information from multiqc") | |||
parser.add_argument('-fastqc_qualimap', '--fastqc_qualimap', type=str, help='multiqc_general_stats.txt', required=True) | |||
parser.add_argument('-fastqc', '--fastqc', type=str, help='multiqc_fastqc.txt', required=True) | |||
parser.add_argument('-fastqscreen', '--fastqscreen', type=str, help='multiqc_fastq_screen.txt', required=True) | |||
parser.add_argument('-hap', '--happy', type=str, help='multiqc_happy_data.json', required=True) | |||
args = parser.parse_args() | |||
# Rename input: | |||
fastqc_qualimap_file = args.fastqc_qualimap | |||
fastqc_file = args.fastqc | |||
fastqscreen_file = args.fastqscreen | |||
hap_file = args.happy | |||
# fastqc and qualimap | |||
dat = pd.read_table(fastqc_qualimap_file) | |||
fastqc = dat.loc[:, dat.columns.str.startswith('FastQC')] | |||
fastqc.insert(loc=0, column='Sample', value=dat['Sample']) | |||
fastqc_stat = fastqc.dropna() | |||
# qulimap | |||
qualimap = dat.loc[:, dat.columns.str.startswith('QualiMap')] | |||
qualimap.insert(loc=0, column='Sample', value=dat['Sample']) | |||
qualimap_stat = qualimap.dropna() | |||
# fastqc | |||
dat = pd.read_table(fastqc_file) | |||
fastqc_module = dat.loc[:, "per_base_sequence_quality":"kmer_content"] | |||
fastqc_module.insert(loc=0, column='Sample', value=dat['Sample']) | |||
fastqc_all = pd.merge(fastqc_stat,fastqc_module, how='outer', left_on=['Sample'], right_on = ['Sample']) | |||
# fastqscreen | |||
dat = pd.read_table(fastqscreen_file) | |||
fastqscreen = dat.loc[:, dat.columns.str.endswith('percentage')] | |||
dat['Sample'] = [i.replace('_screen','') for i in dat['Sample']] | |||
fastqscreen.insert(loc=0, column='Sample', value=dat['Sample']) | |||
# benchmark | |||
with open(hap_file) as hap_json: | |||
happy = json.load(hap_json) | |||
dat =pd.DataFrame.from_records(happy) | |||
dat = dat.loc[:, dat.columns.str.endswith('ALL')] | |||
dat_transposed = dat.T | |||
benchmark = dat_transposed.loc[:,['sample_id','METRIC.Precision','METRIC.Recall']] | |||
benchmark.columns = ['Sample','Precision','Recall'] | |||
#output | |||
fastqc_all.to_csv('fastqc.final.result.txt',sep="\t",index=0) | |||
fastqscreen.to_csv('fastqscreen.final.result.txt',sep="\t",index=0) | |||
qualimap_stat.to_csv('qualimap.final.result.txt',sep="\t",index=0) | |||
benchmark.to_csv('benchmark.final.result.txt',sep="\t",index=0) | |||
@@ -1,6 +1,5 @@ | |||
task Haplotyper_gVCF { | |||
File ref_dir | |||
String SENTIEON_INSTALL_DIR | |||
String fasta |
@@ -0,0 +1,29 @@ | |||
task extract_multiqc { | |||
File fastqc_qualimap | |||
File fastqc | |||
File fastqscreen | |||
File hap | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
python /opt/extract_multiqc.py -fastqc_qualimap ${fastqc_qualimap} -fastqc ${fastqc} -fastqscreen ${fastqscreen} -hap ${hap} | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster:cluster_config | |||
systemDisk:"cloud_ssd 40" | |||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File fastqc_result = "fastqc.final.result.txt" | |||
File fastqscreen_result = "fastqscreen.final.result.txt" | |||
File qualimap_result = "qualimap.final.result.txt" | |||
File hap_result = "benchmark.final.result.txt" | |||
} | |||
} |
@@ -1,8 +1,8 @@ | |||
task mendelian { | |||
File family_vcf | |||
File ref_dir | |||
String family_name = basename(family_vcf,".family.vcf") | |||
String fasta | |||
String project | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
@@ -11,19 +11,19 @@ task mendelian { | |||
export LD_LIBRARY_PATH=/opt/htslib-1.9 | |||
nt=$(nproc) | |||
echo -e "${project}\tLCL8\t0\t0\t2\t-9\n${project}\tLCL7\t0\t0\t1\t-9\n${project}\tLCL5\tLCL7\tLCL8\t2\t-9" > ${project}.D5.ped | |||
echo -e "${family_name}\tLCL8\t0\t0\t2\t-9\n${family_name}\tLCL7\t0\t0\t1\t-9\n${family_name}\tLCL5\tLCL7\tLCL8\t2\t-9" > ${family_name}.D5.ped | |||
mkdir VBT_D5 | |||
/opt/VBT-TrioAnalysis/vbt mendelian -ref ${ref_dir}/${fasta} -mother ${family_vcf} -father ${family_vcf} -child ${family_vcf} -pedigree ${project}.D5.ped -outDir VBT_D5 -out-prefix ${project}.D5 --output-violation-regions -thread-count $nt | |||
/opt/VBT-TrioAnalysis/vbt mendelian -ref ${ref_dir}/${fasta} -mother ${family_vcf} -father ${family_vcf} -child ${family_vcf} -pedigree ${family_name}.D5.ped -outDir VBT_D5 -out-prefix ${family_name}.D5 --output-violation-regions -thread-count $nt | |||
cat VBT_D5/${project}.D5_trio.vcf > ${project}.D5.vcf | |||
cat VBT_D5/${family_name}.D5_trio.vcf > ${family_name}.D5.vcf | |||
echo -e "${project}\tLCL8\t0\t0\t2\t-9\n${project}\tLCL7\t0\t0\t1\t-9\n${project}\tLCL6\tLCL7\tLCL8\t2\t-9" > ${project}.D6.ped | |||
echo -e "${family_name}\tLCL8\t0\t0\t2\t-9\n${family_name}\tLCL7\t0\t0\t1\t-9\n${family_name}\tLCL6\tLCL7\tLCL8\t2\t-9" > ${family_name}.D6.ped | |||
mkdir VBT_D6 | |||
/opt/VBT-TrioAnalysis/vbt mendelian -ref ${ref_dir}/${fasta} -mother ${family_vcf} -father ${family_vcf} -child ${family_vcf} -pedigree ${project}.D6.ped -outDir VBT_D6 -out-prefix ${project}.D6 --output-violation-regions -thread-count $nt | |||
/opt/VBT-TrioAnalysis/vbt mendelian -ref ${ref_dir}/${fasta} -mother ${family_vcf} -father ${family_vcf} -child ${family_vcf} -pedigree ${family_name}.D6.ped -outDir VBT_D6 -out-prefix ${family_name}.D6 --output-violation-regions -thread-count $nt | |||
cat VBT_D6/${project}.D6_trio.vcf > ${project}.D6.vcf | |||
cat VBT_D6/${family_name}.D6_trio.vcf > ${family_name}.D6.vcf | |||
>>> | |||
runtime { | |||
@@ -33,13 +33,12 @@ task mendelian { | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File D5_ped = "${project}.D5.ped" | |||
File D6_ped = "${project}.D6.ped" | |||
File D5_ped = "${family_name}.D5.ped" | |||
File D6_ped = "${family_name}.D6.ped" | |||
Array[File] D5_mendelian = glob("VBT_D5/*") | |||
Array[File] D6_mendelian = glob("VBT_D6/*") | |||
File D5_trio_vcf = "${project}.D5.vcf" | |||
File D6_trio_vcf = "${project}.D6.vcf" | |||
File family_all_vcf = "${project}.vcf" | |||
File D5_trio_vcf = "${family_name}.D5.vcf" | |||
File D6_trio_vcf = "${family_name}.D6.vcf" | |||
} | |||
} | |||
@@ -2,14 +2,14 @@ task merge_mendelian { | |||
File D5_trio_vcf | |||
File D6_trio_vcf | |||
File family_vcf | |||
String project | |||
String family_name = basename(family_vcf,".family.vcf") | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
cat ${D5_trio_vcf} | grep -v '##' > ${project}.D5.txt | |||
cat ${D6_trio_vcf} | grep -v '##' > ${project}.D6.txt | |||
cat ${D5_trio_vcf} | grep -v '##' > ${family_name}.D5.txt | |||
cat ${D6_trio_vcf} | grep -v '##' > ${family_name}.D6.txt | |||
cat ${family_vcf} | grep -v '##' | awk ' | |||
BEGIN { OFS = "\t" } | |||
NF > 2 && FNR > 1 { | |||
@@ -18,8 +18,8 @@ task merge_mendelian { | |||
} | |||
} | |||
{ print } | |||
' > ${project}.consensus.txt | |||
python /opt/merge_two_family_with_genotype.py -LCL5 ${project}.D5.txt -LCL6 ${project}.D6.txt -genotype ${project}.consensus.txt -family ${project}.mendelian | |||
' > ${family_name}.consensus.txt | |||
python /opt/merge_two_family_with_genotype.py -LCL5 ${family_name}.D5.txt -LCL6 ${family_name}.D6.txt -genotype ${family_name}.consensus.txt -family ${family_name} | |||
>>> | |||
runtime { | |||
@@ -29,7 +29,7 @@ task merge_mendelian { | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File project_mendelian = "${project}.mendelian.txt" | |||
File project_mendelian_summary = "${project}.mendelian.summary.txt" | |||
File project_mendelian = "${family_name}.mendelian.txt" | |||
File project_mendelian_summary = "${family_name}.mendelian.summary.txt" | |||
} | |||
} |
@@ -30,6 +30,11 @@ task multiqc { | |||
done | |||
multiqc /cromwell_root/tmp/ | |||
cp multiqc_data/multiqc_general_stats.txt > multiqc_general_stats.txt | |||
cp multiqc_data/multiqc_fastqc.txt > multiqc_fastqc.txt | |||
cp multiqc_data/multiqc_fastq_screen.txt > multiqc_fastq_screen.txt | |||
cp multiqc_data/multiqc_happy_data.json > multiqc_happy_data.json | |||
>>> | |||
@@ -43,5 +48,9 @@ task multiqc { | |||
output { | |||
File multiqc_html = "multiqc_report.html" | |||
Array[File] multiqc_txt = glob("multiqc_data/*") | |||
File fastqc_qualimap = "multiqc_general_stats.txt" | |||
File fastqc = "multiqc_fastqc.txt" | |||
File fastqscreen = "multiqc_fastq_screen.txt" | |||
File hap = "multiqc_happy_data.json" | |||
} | |||
} |
@@ -15,6 +15,7 @@ import "./tasks/quartet_mendelian.wdl" as quartet_mendelian | |||
import "./tasks/fastqc.wdl" as fastqc | |||
import "./tasks/fastqscreen.wdl" as fastqscreen | |||
import "./tasks/qualimap.wdl" as qualimap | |||
import "./tasks/extract_multiqc.wdl" as extract_multiqc | |||
workflow {{ project_name }} { | |||
@@ -224,6 +225,17 @@ workflow {{ project_name }} { | |||
disk_size=disk_size | |||
} | |||
call extract_multiqc.extract_multiqc as extract_multiqc { | |||
input: | |||
fastqc_qualimap=multiqc.fastqc_qualimap, | |||
fastqc=multiqc.fastqc, | |||
fastqscreen=multiqc.fastqscreen, | |||
hap=multiqc.hap, | |||
docker=DIYdocker, | |||
cluster_config=SMALLcluster_config, | |||
disk_size=disk_size | |||
} | |||
Array[File] family_vcfs = split_gvcf_files.family_vcf | |||
scatter (idx in range(length(family_vcfs))) { | |||
@@ -232,7 +244,6 @@ workflow {{ project_name }} { | |||
family_vcf=family_vcfs[idx], | |||
ref_dir=ref_dir, | |||
fasta=fasta, | |||
project=project, | |||
docker=MENDELIANdocker, | |||
cluster_config=BIGcluster_config, | |||
disk_size=disk_size | |||
@@ -243,7 +254,6 @@ workflow {{ project_name }} { | |||
D5_trio_vcf=mendelian.D5_trio_vcf, | |||
D6_trio_vcf=mendelian.D6_trio_vcf, | |||
family_vcf=family_vcfs[idx], | |||
project=project, | |||
docker=DIYdocker, | |||
cluster_config=SMALLcluster_config, | |||
disk_size=disk_size |