{ | |||||
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}", | |||||
"{{ project_name }}.sample_id": "{{ sample_id }}", | |||||
"{{ project_name }}.read1": "{{ read1 }}", | |||||
"{{ project_name }}.read2": "{{ read2 }}", | |||||
"{{ project_name }}.idx": "oss://pgx-reference-data/reference/hisat2/grch38_snp_tran/", | |||||
"{{ project_name }}.gtf": "oss://pgx-reference-data/reference/annotation/Homo_sapiens.GRCh38.93.gtf", | |||||
"{{ project_name }}.idx_prefix": "genome_snp_tran", | |||||
"{{ project_name }}.hisat2.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/hisat2:v2.1.0-2", | |||||
"{{ project_name }}.hisat2.cluster": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc", | |||||
"{{ project_name }}.samtools.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/samtools:v1.3.1", | |||||
"{{ project_name }}.samtools.cluster": "OnDemand bcs.a2.large img-ubuntu-vpc", | |||||
"{{ project_name }}.stringtie.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/stringtie:v1.3.4", | |||||
"{{ project_name }}.stringtie.cluster": "OnDemand bcs.a2.large img-ubuntu-vpc", | |||||
"{{ project_name }}.fastp.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastp:0.19.6", | |||||
"{{ project_name }}.fastp.cluster": "OnDemand bcs.a2.large img-ubuntu-vpc", | |||||
"{{ project_name }}.adapter_sequence": "{{ adapter_sequence }}", | |||||
"{{ project_name }}.adapter_sequence_r2": "{{ adapter_sequence_r2 }}", | |||||
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", | |||||
"{{ project_name }}.fastqc.disk_size": "150", | |||||
"{{ project_name }}.gtf": "oss://pgx-reference-data/reference/annotation/Homo_sapiens.GRCh38.93.gtf", | |||||
"{{ project_name }}.fastqscreen.cluster_config": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc", | |||||
"{{ project_name }}.fastqc.cluster_config": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc", | |||||
"{{ project_name }}.qualimapBAMqc.disk_size": "500", | |||||
"{{ project_name }}.fastqc.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqc:v0.11.5", | |||||
"{{ project_name }}.fastqscreen.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqscreen:0.12.0", | |||||
"{{ project_name }}.qualimapRNAseq.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/qualimap:2.0.0", | |||||
"{{ project_name }}.screen_ref_dir": "oss://pgx-reference-data/fastq_screen_reference/", | |||||
"{{ project_name }}.fastq_screen_conf": "oss://pgx-reference-data/fastq_screen_reference/fastq_screen.conf", | |||||
"{{ project_name }}.multiqc.cluster_config": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc", | |||||
"{{ project_name }}.multiqc.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/multiqc:v1.8", | |||||
"{{ project_name }}.qualimapRNAseq.disk_size": "500", | |||||
"{{ project_name }}.qualimapBAMqc.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/qualimap:2.0.0", | |||||
"{{ project_name }}.qualimapBAMqc.cluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc", | |||||
"{{ project_name }}.qualimapRNAseq.cluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc", | |||||
"{{ project_name }}.fastqscreen.disk_size": "100", | |||||
"{{ project_name }}.multiqc.disk_size": "100", | |||||
"{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/" | |||||
} |
task fastp { | |||||
String sample_id | |||||
File read1 | |||||
File read2 | |||||
String adapter_sequence | |||||
String adapter_sequence_r2 | |||||
String docker | |||||
String cluster | |||||
command <<< | |||||
fastp --thread 4 -l 50 -q 20 -u 20 --adapter_sequence ${adapter_sequence} --adapter_sequence_r2 ${adapter_sequence_r2} --detect_adapter_for_pe -i ${read1} -I ${read2} -o ${sample_id}_R1.fastq.gz -O ${sample_id}_R2.fastq.gz -j ${sample_id}.json -h ${sample_id}.html | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd 200 /cromwell_root/" | |||||
} | |||||
output { | |||||
File json = "${sample_id}.json" | |||||
File report = "${sample_id}.html" | |||||
File Trim_R1 = "${sample_id}_R1.fastq.gz" | |||||
File Trim_R2 = "${sample_id}_R2.fastq.gz" | |||||
} | |||||
} | |||||
task fastqc { | |||||
File read1 | |||||
File read2 | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
nt=$(nproc) | |||||
fastqc -t $nt -o ./ ${read1} | |||||
fastqc -t $nt -o ./ ${read2} | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File read1_html = sub(basename(read1), "\\.(fastq|fq)\\.gz$", "_fastqc.html") | |||||
File read1_zip = sub(basename(read1), "\\.(fastq|fq)\\.gz$", "_fastqc.zip") | |||||
File read2_html = sub(basename(read2), "\\.(fastq|fq)\\.gz$", "_fastqc.html") | |||||
File read2_zip = sub(basename(read2), "\\.(fastq|fq)\\.gz$", "_fastqc.zip") | |||||
} | |||||
} |
task fastq_screen { | |||||
File read1 | |||||
File read2 | |||||
File screen_ref_dir | |||||
File fastq_screen_conf | |||||
String read1name = basename(read1,".fastq.gz") | |||||
String read2name = basename(read2,".fastq.gz") | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
nt=$(nproc) | |||||
mkdir -p /cromwell_root/tmp | |||||
cp -r ${screen_ref_dir} /cromwell_root/tmp/ | |||||
fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${read1} | |||||
fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${read2} | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File png1 = "${read1name}_screen.png" | |||||
File txt1 = "${read1name}_screen.txt" | |||||
File html1 = "${read1name}_screen.html" | |||||
File png2 = "${read2name}_screen.png" | |||||
File txt2 = "${read2name}_screen.txt" | |||||
File html2 = "${read2name}_screen.html" | |||||
} | |||||
} |
task hisat2 { | |||||
File idx | |||||
File Trim_R1 | |||||
File Trim_R2 | |||||
String idx_prefix | |||||
String sample_id | |||||
String docker | |||||
String cluster | |||||
command <<< | |||||
nt=$(nproc) | |||||
hisat2 -t -p $nt -x ${idx}/${idx_prefix} -1 ${Trim_R1} -2 ${Trim_R2} -S ${sample_id}.sam --un-conc-gz ${sample_id}_un.fq.gz | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd 200 /cromwell_root/" | |||||
} | |||||
output { | |||||
File sam = "${sample_id}.sam" | |||||
File unmapread_1p = "${sample_id}_un.fq.1.gz" | |||||
File unmapread_2p = "${sample_id}_un.fq.2.gz" | |||||
} | |||||
} |
task multiqc { | |||||
Array[File] read1_zip | |||||
Array[File] read2_zip | |||||
Array[File] txt1 | |||||
Array[File] txt2 | |||||
Array[File] bamqc_zip | |||||
Array[File] rnaseq_zip | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
mkdir -p /cromwell_root/tmp/fastqc | |||||
mkdir -p /cromwell_root/tmp/fastqscreen | |||||
mkdir -p /cromwell_root/tmp/bamqc | |||||
mkdir -p /cromwell_root/tmp/rnaseq | |||||
cp ${sep=" " read1_zip} ${sep=" " read2_zip} /cromwell_root/tmp/fastqc | |||||
cp ${sep=" " txt1} ${sep=" " txt2} /cromwell_root/tmp/fastqscreen | |||||
for i in ${sep=" " bamqc_zip} | |||||
do | |||||
tar -zxvf $i -C /cromwell_root/tmp/bamqc | |||||
done | |||||
for i in ${sep=" " rnaseq_zip} | |||||
do | |||||
tar -zxvf $i -C /cromwell_root/tmp/rnaseq | |||||
done | |||||
multiqc /cromwell_root/tmp/ | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster:cluster_config | |||||
systemDisk:"cloud_ssd 40" | |||||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File multiqc_html = "multiqc_report.html" | |||||
Array[File] multiqc_txt = glob("multiqc_data/*") | |||||
} | |||||
} |
task qualimapBAMqc { | |||||
File bam | |||||
String bamname = basename(bam,".bam") | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
nt=$(nproc) | |||||
/opt/qualimap/qualimap bamqc -bam ${bam} -outformat PDF:HTML -nt $nt -outdir ${bamname}_bamqc --java-mem-size=32G | |||||
tar -zcvf ${bamname}_bamqc_qualimap.zip ${bamname}_bamqc | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster:cluster_config | |||||
systemDisk:"cloud_ssd 40" | |||||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File bamqc_zip = "${bamname}_bamqc_qualimap.zip" | |||||
} | |||||
} |
task samtools { | |||||
File sam | |||||
String sample_id | |||||
String bam = sample_id + ".bam" | |||||
String sorted_bam = sample_id + ".sorted.bam" | |||||
String sorted_bam_index = sample_id + ".sorted.bam.bai" | |||||
String ins_size = sample_id + ".ins_size" | |||||
String docker | |||||
String cluster | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
/opt/conda/bin/samtools view -bS ${sam} > ${bam} | |||||
/opt/conda/bin/samtools sort -m 1000000000 ${bam} -o ${sorted_bam} | |||||
/opt/conda/bin/samtools index ${sorted_bam} | |||||
/opt/conda/bin/samtools stats -i 8000 ${sorted_bam} |grep ^IS|cut -f 2- > ${sample_id}.ins_size | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd 200 /cromwell_root/" | |||||
} | |||||
output { | |||||
File out_bam = sorted_bam | |||||
File out_bam_index = sorted_bam_index | |||||
File out_ins_size = ins_size | |||||
} | |||||
} | |||||
task stringtie { | |||||
File bam | |||||
File gtf | |||||
String docker | |||||
String sample_id | |||||
String cluster | |||||
command <<< | |||||
nt=$(nproc) | |||||
mkdir ballgown | |||||
/opt/conda/bin/stringtie -e -B -p $nt -G ${gtf} -o ballgown/${sample_id}/${sample_id}.gtf -C ${sample_id}.cov.ref.gtf -A ${sample_id}.gene.abundance.txt ${bam} -g ${sample_id}_genecount.csv | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd 150 /cromwell_root/" | |||||
} | |||||
output { | |||||
File covered_transcripts = "${sample_id}.cov.ref.gtf" | |||||
File gene_abundance = "${sample_id}.gene.abundance.txt" | |||||
Array[File] ballgown = ["ballgown/${sample_id}/${sample_id}.gtf", "ballgown/${sample_id}/e2t.ctab", "ballgown/${sample_id}/e_data.ctab", "ballgown/${sample_id}/i2t.ctab", "ballgown/${sample_id}/i_data.ctab", "ballgown/${sample_id}/t_data.ctab"] | |||||
File genecount = "{sample_id}_genecount.csv" | |||||
} | |||||
} |
import "./tasks/fastp.wdl" as fastp | |||||
import "./tasks/hisat2.wdl" as hisat2 | |||||
import "./tasks/fastqc.wdl" as fastqc | |||||
import "./tasks/multiqc.wdl" as multiqc | |||||
import "./tasks/samtools.wdl" as samtools | |||||
import "./tasks/fastqscreen.wdl" as fastqscreen | |||||
import "./tasks/qualimapBAMqc.wdl" as qualimapBAMqc | |||||
import "./tasks/stringtie.wdl" as stringtie | |||||
workflow {{ project_name }} { | |||||
File inputSamplesFile | |||||
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile) | |||||
File screen_ref_dir | |||||
File fastq_screen_conf | |||||
File gtf | |||||
String fasta | |||||
String sample_id | |||||
File read1 | |||||
File read2 | |||||
File idx | |||||
String adapter_sequence | |||||
String adapter_sequence_r2 | |||||
String idx_prefix | |||||
File gtf | |||||
scatter (quartet in inputSamples){ | |||||
call fastp.fastp as fastp { | |||||
input: | |||||
sample_id=quartet[2], | |||||
read1=quartet[0], | |||||
read2=quartet[1], | |||||
adapter_sequence=quartet[3], | |||||
adapter_sequence_r2=quartet[4] | |||||
} | |||||
call fastqc.fastqc as fastqc { | |||||
input: | |||||
read1=fastp.Trim_R1, | |||||
read2=fastp.Trim_R2 | |||||
} | |||||
call fastqscreen.fastqscreen as fastqscreen { | |||||
input: | |||||
read1=fastp.Trim_R1, | |||||
read2=fastp.Trim_R2, | |||||
screen_ref_dir=screen_ref_dir, | |||||
fastq_screen_conf=fastq_screen_conf | |||||
} | |||||
call hisat2.hisat2 as hisat2 { | |||||
input: | |||||
sample_id=quartet[2], | |||||
idx=idx, | |||||
idx_prefix=idx_prefix, | |||||
Trim_R1=fastp.Trim_R1, | |||||
Trim_R2=fastp.Trim_R2 | |||||
} | |||||
call samtools.samtools as samtools { | |||||
input: | |||||
sample_id=quartet[2], | |||||
sam = hisat2.sam | |||||
} | |||||
call qualimapBAMqc.qualimapBAMqc as qualimapBAMqc { | |||||
input: | |||||
bam= samtools.out_bam | |||||
} | |||||
call stringtie.stringtie as stringtie { | |||||
input: | |||||
sample_id=quartet[2], | |||||
gtf = gtf, | |||||
bam = samtools.out_bam | |||||
} | |||||
} | |||||
call multiqc.multiqc as multiqc { | |||||
input: | |||||
read1_zip=fastqc.read1_zip, | |||||
read2_zip=fastqc.read2_zip, | |||||
txt1=fastqscreen.txt1, | |||||
txt2=fastqscreen.txt2, | |||||
rnaseq_zip=qualimapRNAseq.rnaseq_zip | |||||
} | |||||
} |