@@ -1,6 +1,6 @@ | |||
{ | |||
"idx": "oss://pgx-reference-data/reference/hisat2/grch38_snp_tran/", | |||
"gtf": "oss://pgx-reference-data/reference/tophat2/annotation/gencode.v22.annotation.gtf", | |||
"gtf": "oss://pgx-reference-data/reference/annotation/Homo_sapiens.GRCh38.93.gtf", | |||
"idx_prefix": "genome_snp_tran", | |||
"screen_ref_dir": "oss://pgx-reference-data/fastq_screen_reference/", | |||
"fastq_screen_conf": "oss://pgx-reference-data/fastq_screen_reference/fastq_screen.conf", | |||
@@ -8,6 +8,24 @@ | |||
"fastqscreen_cluster": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc", | |||
"fastqc_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqc:v0.11.5", | |||
"fastqc_cluster": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc", | |||
"fastp_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastp:0.19.6", | |||
"fastp_cluster": "OnDemand bcs.a2.xlarge img-ubuntu-vpc", | |||
"trim_front1": "0", | |||
"trim_tail1": "0", | |||
"max_len1": "0", | |||
"trim_front2": "0", | |||
"trim_tail2": "0", | |||
"max_len2": "0", | |||
"adapter_sequence": "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA", | |||
"adapter_sequence_r2": "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT", | |||
"disable_adapter_trimming": "0", | |||
"length_required": "50", | |||
"length_required1": "20", | |||
"UMI": "0", | |||
"umi_len": "0", | |||
"umi_loc": "umi_loc", | |||
"qualified_quality_phred": "20", | |||
"disable_quality_filtering": "1", | |||
"hisat2_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/hisat2:v2.1.0-2", | |||
"hisat2_cluster": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc", | |||
"samtools_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/samtools:v1.3.1", |
@@ -12,6 +12,24 @@ | |||
"{{ project_name }}.fastqscreen_cluster": "{{ fastqscreen_cluster }}", | |||
"{{ project_name }}.fastqc_cluster": "{{ fastqc_cluster }}", | |||
"{{ project_name }}.fastqc_docker": "{{ fastqc_docker }}", | |||
"{{ project_name }}.fastp_docker": "{{ fastp_docker }}", | |||
"{{ project_name }}.fastp_cluster": "{{ fastp_cluster }}", | |||
"{{ project_name }}.trim_front1": "{{ trim_front1 }}", | |||
"{{ project_name }}.trim_tail1": "{{ trim_tail1 }}", | |||
"{{ project_name }}.max_len1": "{{ max_len1 }}", | |||
"{{ project_name }}.trim_front2": "{{ trim_front2 }}", | |||
"{{ project_name }}.trim_tail2": "{{ trim_tail2 }}", | |||
"{{ project_name }}.max_len2": "{{ max_len2 }}", | |||
"{{ project_name }}.adapter_sequence": "{{ adapter_sequence }}", | |||
"{{ project_name }}.adapter_sequence_r2": "{{ adapter_sequence_r2 }}", | |||
"{{ project_name }}.disable_adapter_trimming": "{{ disable_adapter_trimming }}", | |||
"{{ project_name }}.length_required1": "{{ length_required1 }}", | |||
"{{ project_name }}.UMI": "{{ UMI }}", | |||
"{{ project_name }}.umi_loc": "{{ umi_loc }}", | |||
"{{ project_name }}.umi_len": "{{ umi_len }}", | |||
"{{ project_name }}.length_required": "{{ length_required }}", | |||
"{{ project_name }}.qualified_quality_phred": "{{ qualified_quality_phred }}", | |||
"{{ project_name }}.disable_quality_filtering": "{{ disable_quality_filtering }}", | |||
"{{ project_name }}.hisat2_docker": "{{ hisat2_docker }}", | |||
"{{ project_name }}.hisat2_cluster": "{{ hisat2_cluster }}", | |||
"{{ project_name }}.insert_size": "{{ insert_size }}", | |||
@@ -26,5 +44,7 @@ | |||
"{{ project_name }}.count_docker": "{{ count_docker }}", | |||
"{{ project_name }}.count_cluster": "{{ count_cluster }}", | |||
"{{ project_name }}.count_length": "{{ count_length }}", | |||
"{{ project_name }}.pre_alignment_qc": "{{ pre_alignment_qc }}" | |||
"{{ project_name }}.fastp_run": "{{ fastp_run }}", | |||
"{{ project_name }}.pre_alignment_qc": "{{ pre_alignment_qc }}", | |||
"{{ project_name }}.qualimap_qc": "{{ qualimap_qc }}" | |||
} |
@@ -0,0 +1,69 @@ | |||
task fastp { | |||
String sample_id | |||
File read1 | |||
File read2 | |||
String adapter_sequence | |||
String adapter_sequence_r2 | |||
String docker | |||
String cluster | |||
String disk_size | |||
String umi_loc | |||
Int trim_front1 | |||
Int trim_tail1 | |||
Int max_len1 | |||
Int trim_front2 | |||
Int trim_tail2 | |||
Int max_len2 | |||
Int disable_adapter_trimming | |||
Int length_required | |||
Int umi_len | |||
Int UMI | |||
Int qualified_quality_phred | |||
Int length_required1 | |||
Int disable_quality_filtering | |||
command <<< | |||
nt=$(nproc) | |||
mkdir -p /cromwell_root/tmp/fastp/ | |||
## 1.Disable_quality_filtering | |||
if [ "${disable_quality_filtering}" == 0 ] | |||
then | |||
cp ${read1} /cromwell_root/tmp/fastp/{sample_id}_R1.fastq.tmp1.gz | |||
cp ${read2} /cromwell_root/tmp/fastp/{sample_id}_R2.fastq.tmp1.gz | |||
else | |||
fastp --thread nt --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i ${read1} -I ${read2} -o /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz -O /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz -j ${sample_id}.json -h ${sample_id}.html | |||
fi | |||
## 2.UMI | |||
if [ "${UMI}" == 0 ] | |||
then | |||
cp /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz | |||
cp /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz | |||
else | |||
fastp --thread nt -U --umi_loc=${umi_loc} --umi_len=${umi_len} --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz -I /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz -o /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz -O /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz -j ${sample_id}.json -h ${sample_id}.html | |||
fi | |||
## 3.Trim | |||
if [ "${disable_adapter_trimming}" == 0 ] | |||
then | |||
fastp --thread nt -l ${length_required} -q ${qualified_quality_phred} -u ${length_required1} --adapter_sequence ${adapter_sequence} --adapter_sequence_r2 ${adapter_sequence_r2} --detect_adapter_for_pe --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz -I /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz -o ${sample_id}_R1.fastq.gz -O ${sample_id}_R2.fastq.gz -j ${sample_id}.json -h ${sample_id}.html | |||
else | |||
cp /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz ${sample_id}_R1.fastq.gz | |||
cp /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz ${sample_id}_R2.fastq.gz | |||
fi | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File json = "${sample_id}.json" | |||
File report = "${sample_id}.html" | |||
File Trim_R1 = "${sample_id}_R1.fastq.gz" | |||
File Trim_R2 = "${sample_id}_R2.fastq.gz" | |||
} | |||
} |
@@ -3,7 +3,7 @@ task hisat2 { | |||
File read_1P | |||
File read_2P | |||
String idx_prefix | |||
String base=sub(basename(read_1P),"\\.\\S+$", "") | |||
String sample_id | |||
String docker | |||
String cluster | |||
String disk_size | |||
@@ -11,7 +11,7 @@ task hisat2 { | |||
command { | |||
nt=$(nproc) | |||
hisat2 -t -p $nt -x ${idx}/${idx_prefix} -1 ${read_1P} -2 ${read_2P} -S ${base}.sam --un-conc-gz ${base}_un.fq.gz | |||
hisat2 -t -p $nt -x ${idx}/${idx_prefix} -1 ${read_1P} -2 ${read_2P} -S ${sample_id}.sam --un-conc-gz ${sample_id}_un.fq.gz | |||
} | |||
runtime { | |||
@@ -22,8 +22,8 @@ task hisat2 { | |||
} | |||
output { | |||
File sam=base + ".sam" | |||
File unmapread_1p=base + "_un.fq.1.gz" | |||
File unmapread_2p=base + "_un.fq.2.gz" | |||
File sam=sample_id + ".sam" | |||
File unmapread_1p=sample_id + "_un.fq.1.gz" | |||
File unmapread_2p=sample_id + "_un.fq.2.gz" | |||
} | |||
} |
@@ -1,6 +1,5 @@ | |||
task qualimap { | |||
File bam | |||
File bam_percent | |||
File gtf | |||
String bamname=basename(bam, ".sorted.bam") | |||
String docker |
@@ -4,7 +4,6 @@ task samtools { | |||
String bam=base + ".bam" | |||
String sorted_bam=base + ".sorted.bam" | |||
String sorted_bam_index=base + ".sorted.bam.bai" | |||
String percent_bam=base + ".percent.bam" | |||
String samstats=base + ".samstats" | |||
String ins_size=base + ".ins_size" | |||
String docker | |||
@@ -15,13 +14,12 @@ task samtools { | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
/opt/conda/bin/samtools view -bS ${sam} > ${bam} | |||
/opt/conda/bin/samtools sort -m 1000000000 ${bam} -o ${sorted_bam} | |||
/opt/conda/bin/samtools index ${sorted_bam} | |||
/opt/conda/bin/samtools view -bs 42.1 ${sorted_bam} > ${percent_bam} | |||
/opt/conda/bin/samtools stats ${sorted_bam} > ${samstats} | |||
/opt/conda/bin/samtools stats -i ${insert_size} ${sorted_bam} |grep ^IS|cut -f 2- > ${ins_size} | |||
nt=$(nproc) | |||
/opt/conda/bin/samtools view -@ nt -bS ${sam} > ${bam} | |||
/opt/conda/bin/samtools sort -@ nt -m 2G ${bam} -o ${sorted_bam} | |||
/opt/conda/bin/samtools index -@ nt ${sorted_bam} | |||
/opt/conda/bin/samtools stats -@ nt ${sorted_bam} > ${samstats} | |||
/opt/conda/bin/samtools stats -@ nt -i ${insert_size} ${sorted_bam} |grep ^IS|cut -f 2- > ${ins_size} | |||
>>> | |||
runtime { | |||
@@ -32,11 +30,10 @@ task samtools { | |||
} | |||
output { | |||
File out_sort_bam=sorted_bam | |||
File out_sort_bam_index=sorted_bam_index | |||
File out_percent = percent_bam | |||
File out_samstats=samstats | |||
File out_ins_size=ins_size | |||
File out_sort_bam="${sorted_bam}" | |||
File out_sort_bam_index="${sorted_bam_index}" | |||
File out_samstats="${samstats}" | |||
File out_ins_size="${ins_size}" | |||
} | |||
} |
@@ -1,5 +1,6 @@ | |||
import "./tasks/fastqc.wdl" as fastqc | |||
import "./tasks/fastqscreen.wdl" as fastqscreen | |||
import "./tasks/fastp.wdl" as fastp | |||
import "./tasks/qualimap.wdl" as qualimap | |||
import "./tasks/hisat2.wdl" as hisat2 | |||
import "./tasks/samtools.wdl" as samtools | |||
@@ -8,7 +9,7 @@ import "./tasks/ballgown.wdl" as ballgown | |||
import "./tasks/count.wdl" as count | |||
workflow {{ project_name }} { | |||
File read1 | |||
File read2 | |||
File idx | |||
@@ -21,8 +22,14 @@ workflow {{ project_name }} { | |||
String fastqc_cluster | |||
String fastqscreen_docker | |||
String fastqscreen_cluster | |||
String fastp_docker | |||
String fastp_cluster | |||
String adapter_sequence | |||
String adapter_sequence_r2 | |||
String umi_loc | |||
String hisat2_docker | |||
String hisat2_cluster | |||
String idx_prefix | |||
String stringtie_docker | |||
String stringtie_cluster | |||
String samtools_docker | |||
@@ -37,7 +44,23 @@ workflow {{ project_name }} { | |||
String count_length | |||
String sample_id | |||
Int trim_front1 | |||
Int trim_tail1 | |||
Int max_len1 | |||
Int trim_front2 | |||
Int trim_tail2 | |||
Int max_len2 | |||
Int disable_adapter_trimming | |||
Int length_required | |||
Int umi_len | |||
Int UMI | |||
Int qualified_quality_phred | |||
Int length_required1 | |||
Int disable_quality_filtering | |||
Boolean pre_alignment_qc | |||
Boolean qualimap_qc | |||
Boolean fastp_run | |||
if (pre_alignment_qc) { | |||
@@ -62,8 +85,47 @@ workflow {{ project_name }} { | |||
} | |||
} | |||
call hisat2.hisat2 as hisat2 { | |||
if (fastp_run){ | |||
call fastp.fastp as fastp { | |||
input: | |||
sample_id=sample_id, | |||
read1=read1, | |||
read2=read2, | |||
docker=fastp_docker, | |||
cluster=fastp_cluster, | |||
disk_size=disk_size, | |||
adapter_sequence=adapter_sequence, | |||
adapter_sequence_r2=adapter_sequence_r2, | |||
umi_loc=umi_loc, | |||
trim_front1=trim_front1, | |||
trim_tail1=trim_tail1, | |||
max_len1=max_len1, | |||
trim_front2=trim_front2, | |||
trim_tail2=trim_tail2, | |||
max_len2=max_len2, | |||
disable_adapter_trimming=disable_adapter_trimming, | |||
length_required=length_required, | |||
umi_len=umi_len, | |||
UMI=UMI, | |||
qualified_quality_phred=qualified_quality_phred, | |||
length_required1=length_required1, | |||
disable_quality_filtering=disable_quality_filtering | |||
} | |||
call hisat2.hisat2 as hisat2 { | |||
input: | |||
sample_id=sample_id, | |||
idx=idx, | |||
idx_prefix=idx_prefix, | |||
read_1P=fastp.Trim_R1, | |||
read_2P=fastp.Trim_R2, | |||
docker=hisat2_docker, | |||
cluster=hisat2_cluster, | |||
disk_size=disk_size | |||
} | |||
} else { | |||
call hisat2.hisat2 as hisat2 { | |||
input: | |||
docker=hisat2_docker, | |||
cluster=hisat2_cluster, | |||
@@ -73,7 +135,8 @@ workflow {{ project_name }} { | |||
read_2P=read2, | |||
disk_size=disk_size | |||
} | |||
} | |||
call samtools.samtools as samtools { | |||
input: | |||
docker=samtools_docker, | |||
@@ -83,15 +146,16 @@ workflow {{ project_name }} { | |||
disk_size=disk_size | |||
} | |||
call qualimap.qualimap as qualimap { | |||
if (qualimap_qc){ | |||
call qualimap.qualimap as qualimap { | |||
input: | |||
bam=samtools.out_sort_bam, | |||
bam_percent=samtools.out_percent, | |||
gtf=gtf, | |||
docker=qualimap_docker, | |||
cluster=qualimap_cluster, | |||
disk_size=disk_size | |||
} | |||
} | |||
call stringtie.stringtie as stringtie { | |||
input: |