@@ -0,0 +1,18 @@ | |||
{ | |||
"{{ project_name }}.sample_id": "{{ sample_id }}", | |||
"{{ project_name }}.read1": "{{ read1 }}", | |||
"{{ project_name }}.read2": "{{ read2 }}", | |||
"{{ project_name }}.idx": "oss://pgx-reference-data/reference/hisat2/grch38_snp_tran/", | |||
"{{ project_name }}.gtf": "oss://pgx-reference-data/reference/annotation/Homo_sapiens.GRCh38.93.gtf", | |||
"{{ project_name }}.idx_prefix": "genome_snp_tran", | |||
"{{ project_name }}.hisat2.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/hisat2:v2.0.5-1-deb-cv1", | |||
"{{ project_name }}.hisat2.cluster": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc", | |||
"{{ project_name }}.samtools.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/samtools:v1.3.1", | |||
"{{ project_name }}.samtools.cluster": "OnDemand bcs.a2.large img-ubuntu-vpc", | |||
"{{ project_name }}.stringtie.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/stringtie:v1.3.4", | |||
"{{ project_name }}.stringtie.cluster": "OnDemand bcs.a2.large img-ubuntu-vpc", | |||
"{{ project_name }}.fastp.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastp:0.19.6", | |||
"{{ project_name }}.fastp.cluster": "OnDemand bcs.a2.large img-ubuntu-vpc", | |||
"{{ project_name }}.adapter_sequence": "{{ adapter_sequence if adapter_sequence != '' else 'AGATCGGAAGAGCACACGTCTGAACTCCAGTCA' }}", | |||
"{{ project_name }}.adapter_sequence_r2": "{{ adapter_sequence_r2 if adapter_sequence_r2 != '' else 'AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT' }}" | |||
} |
@@ -0,0 +1,30 @@ | |||
task fastp { | |||
String sample_id | |||
File read1 | |||
File read2 | |||
String adapter_sequence | |||
String adapter_sequence_r2 | |||
String docker | |||
String cluster | |||
command <<< | |||
fastp --thread 4 -l 50 -q 20 -u 20 --adapter_sequence ${adapter_sequence} --adapter_sequence_r2 ${adapter_sequence_r2} --detect_adapter_for_pe -i ${read1} -I ${read2} -o ${sample_id}_R1.fastq.gz -O ${sample_id}_R2.fastq.gz -j ${sample_id}.json -h ${sample_id}.html | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd 200 /cromwell_root/" | |||
} | |||
output { | |||
File json = "${sample_id}.json" | |||
File report = "${sample_id}.html" | |||
File Trim_R1 = "${sample_id}_R1.fastq.gz" | |||
File Trim_R2 = "${sample_id}_R2.fastq.gz" | |||
} | |||
} | |||
@@ -0,0 +1,27 @@ | |||
task hisat2 { | |||
File idx | |||
File Trim_R1 | |||
File Trim_R2 | |||
String idx_prefix | |||
String sample_id | |||
String docker | |||
String cluster | |||
command <<< | |||
nt=$(nproc) | |||
hisat2 -t -p $nt -x ${idx}/${idx_prefix} -1 ${Trim_R1} -2 ${Trim_R2} -S ${sample_id}.sam --un-conc-gz ${sample_id}_un.fq.gz | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd 200 /cromwell_root/" | |||
} | |||
output { | |||
File sam = "${sample_id}.sam" | |||
File unmapread_1p = "${sample_id}_un.fq.1.gz" | |||
File unmapread_2p = "${sample_id}_un.fq.2.gz" | |||
} | |||
} |
@@ -0,0 +1,34 @@ | |||
task samtools { | |||
File sam | |||
String sample_id | |||
String bam = sample_id + ".bam" | |||
String sorted_bam = sample_id + ".sorted.bam" | |||
String sorted_bam_index = sample_id + ".sorted.bam.bai" | |||
String ins_size = sample_id + ".ins_size" | |||
String docker | |||
String cluster | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
/opt/conda/bin/samtools view -bS ${sam} > ${bam} | |||
/opt/conda/bin/samtools sort -m 1000000000 ${bam} -o ${sorted_bam} | |||
/opt/conda/bin/samtools index ${sorted_bam} | |||
/opt/conda/bin/samtools stats -i 8000 ${sorted_bam} |grep ^IS|cut -f 2- > ${sample_id}.ins_size | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd 200 /cromwell_root/" | |||
} | |||
output { | |||
File out_bam = sorted_bam | |||
File out_bam_index = sorted_bam_index | |||
File out_ins_size = ins_size | |||
} | |||
} | |||
@@ -0,0 +1,26 @@ | |||
task stringtie { | |||
File bam | |||
File gtf | |||
String docker | |||
String sample_id | |||
String cluster | |||
command <<< | |||
nt=$(nproc) | |||
mkdir ballgown | |||
/opt/conda/bin/stringtie -e -B -p $nt -G ${gtf} -o ballgown/${sample_id}/${sample_id}.gtf -C ${sample_id}.cov.ref.gtf -A ${sample_id}.gene.abundance.txt ${bam} | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd 150 /cromwell_root/" | |||
} | |||
output { | |||
File covered_transcripts = "${sample_id}.cov.ref.gtf" | |||
File gene_abundance = "${sample_id}.gene.abundance.txt" | |||
Array[File] ballgown = ["ballgown/${sample_id}/${sample_id}.gtf", "ballgown/${sample_id}/e2t.ctab", "ballgown/${sample_id}/e_data.ctab", "ballgown/${sample_id}/i2t.ctab", "ballgown/${sample_id}/i_data.ctab", "ballgown/${sample_id}/t_data.ctab"] | |||
} | |||
} |
@@ -0,0 +1,49 @@ | |||
import "./tasks/fastp.wdl" as fastp | |||
import "./tasks/hisat2.wdl" as hisat2 | |||
import "./tasks/samtools.wdl" as samtools | |||
import "./tasks/stringtie.wdl" as stringtie | |||
workflow {{ project_name }} { | |||
String sample_id | |||
File read1 | |||
File read2 | |||
File idx | |||
String adapter_sequence | |||
String adapter_sequence_r2 | |||
String idx_prefix | |||
File gtf | |||
call fastp.fastp as fastp { | |||
input: | |||
sample_id=sample_id, | |||
read1=read1, | |||
read2=read2, | |||
adapter_sequence=adapter_sequence, | |||
adapter_sequence_r2=adapter_sequence_r2 | |||
} | |||
call hisat2.hisat2 as hisat2 { | |||
input: | |||
sample_id=sample_id, | |||
idx=idx, | |||
idx_prefix=idx_prefix, | |||
Trim_R1=fastp.Trim_R1, | |||
Trim_R2=fastp.Trim_R2 | |||
} | |||
call samtools.samtools as samtools { | |||
input: | |||
sample_id=sample_id, | |||
sam = hisat2.sam | |||
} | |||
call stringtie.stringtie as stringtie { | |||
input: | |||
gtf = gtf, | |||
bam = samtools.out_bam , | |||
sample_id=sample_id | |||
} | |||
} | |||