{ | |||||
"{{ project_name }}.read1": "{{ read1 }}", | |||||
"{{ project_name }}.read2": "{{ read2 }}", | |||||
"{{ project_name }}.idx": "oss://pgx-reference-data/reference/hisat2/grch38_snp_tran/", | |||||
"{{ project_name }}.gtf": "oss://pgx-reference-data/reference/annotation/Homo_sapiens.GRCh38.93.gtf", | |||||
"{{ project_name }}.idx_prefix": "genome_snp_tran", | |||||
"{{ project_name }}.hisat2.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/hisat2:v2.0.5-1-deb-cv1", | |||||
"{{ project_name }}.hisat2.cluster": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc", | |||||
"{{ project_name }}.samtools.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/samtools:v1.3.1", | |||||
"{{ project_name }}.samtools.cluster": "OnDemand bcs.a2.large img-ubuntu-vpc", | |||||
"{{ project_name }}.stringtie.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/stringtie:v1.3.4", | |||||
"{{ project_name }}.stringtie.cluster": "OnDemand bcs.a2.large img-ubuntu-vpc" | |||||
} |
task qc { | |||||
File read | |||||
String docker | |||||
String cluster | |||||
String out_dir = "./" | |||||
command { | |||||
fastqc -o ${out_dir} ${read} | |||||
} | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster | |||||
} | |||||
# 输入文件后缀必须为.fq.gz或者.fastq.gz | |||||
output { | |||||
File html = sub(basename(read), "\\.(fastq|fq)\\.gz$", "_fastqc.html") | |||||
File zip = sub(basename(read), "\\.(fastq|fq)\\.gz$", "_fastqc.zip") | |||||
} | |||||
} |
task hisat2 { | |||||
File idx | |||||
File read_1P | |||||
File read_2P | |||||
String idx_prefix | |||||
String base = sub(basename(read_1P),"\\.\\S+$", "") | |||||
String docker | |||||
String cluster | |||||
command { | |||||
nt=$(nproc) | |||||
hisat2 -t -p $nt -x ${idx}/${idx_prefix} -1 ${read_1P} -2 ${read_2P} -S ${base}.sam | |||||
} | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd 200 /cromwell_root/" | |||||
} | |||||
output { | |||||
File sam = base + ".sam" | |||||
} | |||||
} |
task samtools { | |||||
File sam | |||||
String base = basename(sam, ".sam") | |||||
String bam = base + ".bam" | |||||
String sorted_bam = base + ".sorted.bam" | |||||
String sorted_bam_index = base + ".sorted.bam.bai" | |||||
String docker | |||||
String cluster | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
/opt/conda/bin/samtools view -bS ${sam} > ${bam} | |||||
/opt/conda/bin/samtools sort -m 5000000000 ${bam} -o ${sorted_bam} | |||||
/opt/conda/bin/samtools index ${sorted_bam} | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd 200 /cromwell_root/" | |||||
} | |||||
output { | |||||
File out_bam = sorted_bam | |||||
File out_bam_index = sorted_bam_index | |||||
} | |||||
} | |||||
task stringtie { | |||||
File bam | |||||
File gtf | |||||
String docker | |||||
String base = basename(bam, ".sorted.bam") | |||||
String cluster | |||||
command <<< | |||||
nt=$(nproc) | |||||
mkdir ballgown | |||||
/opt/conda/bin/stringtie -e -B -p $nt -G ${gtf} -o ballgown/${base}/${base}.gtf -C ${base}.cov.ref.gtf -A ${base}.gene.abundance.txt ${bam} | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd 150 /cromwell_root/" | |||||
} | |||||
output { | |||||
File covered_transcripts = "${base}.cov.ref.gtf" | |||||
File gene_abundance = "${base}.gene.abundance.txt" | |||||
Array[File] ballgown = ["ballgown/${base}/${base}.gtf", "ballgown/${base}/e2t.ctab", "ballgown/${base}/e_data.ctab", "ballgown/${base}/i2t.ctab", "ballgown/${base}/i_data.ctab", "ballgown/${base}/t_data.ctab"] | |||||
} | |||||
} |
task trimmomatic { | |||||
File read1 | |||||
File read2 | |||||
File adapter | |||||
String baseout | |||||
String baseout_gz = baseout + ".fq.gz" | |||||
String docker | |||||
String cluster | |||||
command { | |||||
/opt/conda/bin/trimmomatic PE -threads 20 -phred33 ${read1} ${read2} -baseout ${baseout_gz} ILLUMINACLIP:${adapter}:2:30:10:1:true HEADCROP:10 LEADING:10 TRAILING:10 SLIDINGWINDOW:4:15 MINLEN:36 | |||||
} | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster | |||||
} | |||||
output { | |||||
File read_1p = baseout + "_1P.fq.gz" | |||||
File read_2p = baseout + "_2P.fq.gz" | |||||
} | |||||
} |
import "./tasks/hisat2.wdl" as hisat2 | |||||
import "./tasks/samtools.wdl" as samtools | |||||
import "./tasks/stringtie.wdl" as stringtie | |||||
workflow {{ project_name }} { | |||||
File read1 | |||||
File read2 | |||||
File idx | |||||
String idx_prefix | |||||
File gtf | |||||
call hisat2.hisat2 as hisat2 { | |||||
input: idx=idx, idx_prefix=idx_prefix, read_1P=read1, read_2P=read2 | |||||
} | |||||
call samtools.samtools as samtools { | |||||
input: sam = hisat2.sam | |||||
} | |||||
call stringtie.stringtie as stringtie { | |||||
input: gtf = gtf, bam = samtools.out_bam | |||||
} | |||||
} | |||||