瀏覽代碼

add fastp and revise samtools thread

tags/v0.1.0
stead99 4 年之前
父節點
當前提交
9ab997b8fd
共有 7 個文件被更改,包括 194 次插入27 次删除
  1. +19
    -1
      defaults
  2. +21
    -1
      inputs
  3. +69
    -0
      tasks/fastp.wdl
  4. +5
    -5
      tasks/hisat2.wdl
  5. +0
    -1
      tasks/qualimap.wdl
  6. +10
    -13
      tasks/samtools.wdl
  7. +70
    -6
      workflow.wdl

+ 19
- 1
defaults 查看文件

@@ -1,6 +1,6 @@
{
"idx": "oss://pgx-reference-data/reference/hisat2/grch38_snp_tran/",
"gtf": "oss://pgx-reference-data/reference/tophat2/annotation/gencode.v22.annotation.gtf",
"gtf": "oss://pgx-reference-data/reference/annotation/Homo_sapiens.GRCh38.93.gtf",
"idx_prefix": "genome_snp_tran",
"screen_ref_dir": "oss://pgx-reference-data/fastq_screen_reference/",
"fastq_screen_conf": "oss://pgx-reference-data/fastq_screen_reference/fastq_screen.conf",
@@ -8,6 +8,24 @@
"fastqscreen_cluster": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
"fastqc_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqc:v0.11.5",
"fastqc_cluster": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
"fastp_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastp:0.19.6",
"fastp_cluster": "OnDemand bcs.a2.xlarge img-ubuntu-vpc",
"trim_front1": "0",
"trim_tail1": "0",
"max_len1": "0",
"trim_front2": "0",
"trim_tail2": "0",
"max_len2": "0",
"adapter_sequence": "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA",
"adapter_sequence_r2": "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT",
"disable_adapter_trimming": "0",
"length_required": "50",
"length_required1": "20",
"UMI": "0",
"umi_len": "0",
"umi_loc": "umi_loc",
"qualified_quality_phred": "20",
"disable_quality_filtering": "1",
"hisat2_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/hisat2:v2.1.0-2",
"hisat2_cluster": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc",
"samtools_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/samtools:v1.3.1",

+ 21
- 1
inputs 查看文件

@@ -12,6 +12,24 @@
"{{ project_name }}.fastqscreen_cluster": "{{ fastqscreen_cluster }}",
"{{ project_name }}.fastqc_cluster": "{{ fastqc_cluster }}",
"{{ project_name }}.fastqc_docker": "{{ fastqc_docker }}",
"{{ project_name }}.fastp_docker": "{{ fastp_docker }}",
"{{ project_name }}.fastp_cluster": "{{ fastp_cluster }}",
"{{ project_name }}.trim_front1": "{{ trim_front1 }}",
"{{ project_name }}.trim_tail1": "{{ trim_tail1 }}",
"{{ project_name }}.max_len1": "{{ max_len1 }}",
"{{ project_name }}.trim_front2": "{{ trim_front2 }}",
"{{ project_name }}.trim_tail2": "{{ trim_tail2 }}",
"{{ project_name }}.max_len2": "{{ max_len2 }}",
"{{ project_name }}.adapter_sequence": "{{ adapter_sequence }}",
"{{ project_name }}.adapter_sequence_r2": "{{ adapter_sequence_r2 }}",
"{{ project_name }}.disable_adapter_trimming": "{{ disable_adapter_trimming }}",
"{{ project_name }}.length_required1": "{{ length_required1 }}",
"{{ project_name }}.UMI": "{{ UMI }}",
"{{ project_name }}.umi_loc": "{{ umi_loc }}",
"{{ project_name }}.umi_len": "{{ umi_len }}",
"{{ project_name }}.length_required": "{{ length_required }}",
"{{ project_name }}.qualified_quality_phred": "{{ qualified_quality_phred }}",
"{{ project_name }}.disable_quality_filtering": "{{ disable_quality_filtering }}",
"{{ project_name }}.hisat2_docker": "{{ hisat2_docker }}",
"{{ project_name }}.hisat2_cluster": "{{ hisat2_cluster }}",
"{{ project_name }}.insert_size": "{{ insert_size }}",
@@ -26,5 +44,7 @@
"{{ project_name }}.count_docker": "{{ count_docker }}",
"{{ project_name }}.count_cluster": "{{ count_cluster }}",
"{{ project_name }}.count_length": "{{ count_length }}",
"{{ project_name }}.pre_alignment_qc": "{{ pre_alignment_qc }}"
"{{ project_name }}.fastp_run": "{{ fastp_run }}",
"{{ project_name }}.pre_alignment_qc": "{{ pre_alignment_qc }}",
"{{ project_name }}.qualimap_qc": "{{ qualimap_qc }}"
}

+ 69
- 0
tasks/fastp.wdl 查看文件

@@ -0,0 +1,69 @@
task fastp {
String sample_id
File read1
File read2
String adapter_sequence
String adapter_sequence_r2
String docker
String cluster
String disk_size
String umi_loc
Int trim_front1
Int trim_tail1
Int max_len1
Int trim_front2
Int trim_tail2
Int max_len2
Int disable_adapter_trimming
Int length_required
Int umi_len
Int UMI
Int qualified_quality_phred
Int length_required1
Int disable_quality_filtering
command <<<
nt=$(nproc)
mkdir -p /cromwell_root/tmp/fastp/
## 1.Disable_quality_filtering
if [ "${disable_quality_filtering}" == 0 ]
then
cp ${read1} /cromwell_root/tmp/fastp/{sample_id}_R1.fastq.tmp1.gz
cp ${read2} /cromwell_root/tmp/fastp/{sample_id}_R2.fastq.tmp1.gz
else
fastp --thread nt --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i ${read1} -I ${read2} -o /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz -O /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz -j ${sample_id}.json -h ${sample_id}.html
fi

## 2.UMI
if [ "${UMI}" == 0 ]
then
cp /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz
cp /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz
else
fastp --thread nt -U --umi_loc=${umi_loc} --umi_len=${umi_len} --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz -I /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz -o /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz -O /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz -j ${sample_id}.json -h ${sample_id}.html
fi

## 3.Trim
if [ "${disable_adapter_trimming}" == 0 ]
then
fastp --thread nt -l ${length_required} -q ${qualified_quality_phred} -u ${length_required1} --adapter_sequence ${adapter_sequence} --adapter_sequence_r2 ${adapter_sequence_r2} --detect_adapter_for_pe --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz -I /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz -o ${sample_id}_R1.fastq.gz -O ${sample_id}_R2.fastq.gz -j ${sample_id}.json -h ${sample_id}.html
else
cp /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz ${sample_id}_R1.fastq.gz
cp /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz ${sample_id}_R2.fastq.gz
fi
>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File json = "${sample_id}.json"
File report = "${sample_id}.html"
File Trim_R1 = "${sample_id}_R1.fastq.gz"
File Trim_R2 = "${sample_id}_R2.fastq.gz"
}
}

+ 5
- 5
tasks/hisat2.wdl 查看文件

@@ -3,7 +3,7 @@ task hisat2 {
File read_1P
File read_2P
String idx_prefix
String base=sub(basename(read_1P),"\\.\\S+$", "")
String sample_id
String docker
String cluster
String disk_size
@@ -11,7 +11,7 @@ task hisat2 {

command {
nt=$(nproc)
hisat2 -t -p $nt -x ${idx}/${idx_prefix} -1 ${read_1P} -2 ${read_2P} -S ${base}.sam --un-conc-gz ${base}_un.fq.gz
hisat2 -t -p $nt -x ${idx}/${idx_prefix} -1 ${read_1P} -2 ${read_2P} -S ${sample_id}.sam --un-conc-gz ${sample_id}_un.fq.gz
}
runtime {
@@ -22,8 +22,8 @@ task hisat2 {
}

output {
File sam=base + ".sam"
File unmapread_1p=base + "_un.fq.1.gz"
File unmapread_2p=base + "_un.fq.2.gz"
File sam=sample_id + ".sam"
File unmapread_1p=sample_id + "_un.fq.1.gz"
File unmapread_2p=sample_id + "_un.fq.2.gz"
}
}

+ 0
- 1
tasks/qualimap.wdl 查看文件

@@ -1,6 +1,5 @@
task qualimap {
File bam
File bam_percent
File gtf
String bamname=basename(bam, ".sorted.bam")
String docker

+ 10
- 13
tasks/samtools.wdl 查看文件

@@ -4,7 +4,6 @@ task samtools {
String bam=base + ".bam"
String sorted_bam=base + ".sorted.bam"
String sorted_bam_index=base + ".sorted.bam.bai"
String percent_bam=base + ".percent.bam"
String samstats=base + ".samstats"
String ins_size=base + ".ins_size"
String docker
@@ -15,13 +14,12 @@ task samtools {
command <<<
set -o pipefail
set -e
/opt/conda/bin/samtools view -bS ${sam} > ${bam}
/opt/conda/bin/samtools sort -m 1000000000 ${bam} -o ${sorted_bam}
/opt/conda/bin/samtools index ${sorted_bam}
/opt/conda/bin/samtools view -bs 42.1 ${sorted_bam} > ${percent_bam}
/opt/conda/bin/samtools stats ${sorted_bam} > ${samstats}
/opt/conda/bin/samtools stats -i ${insert_size} ${sorted_bam} |grep ^IS|cut -f 2- > ${ins_size}

nt=$(nproc)
/opt/conda/bin/samtools view -@ nt -bS ${sam} > ${bam}
/opt/conda/bin/samtools sort -@ nt -m 2G ${bam} -o ${sorted_bam}
/opt/conda/bin/samtools index -@ nt ${sorted_bam}
/opt/conda/bin/samtools stats -@ nt ${sorted_bam} > ${samstats}
/opt/conda/bin/samtools stats -@ nt -i ${insert_size} ${sorted_bam} |grep ^IS|cut -f 2- > ${ins_size}
>>>

runtime {
@@ -32,11 +30,10 @@ task samtools {
}

output {
File out_sort_bam=sorted_bam
File out_sort_bam_index=sorted_bam_index
File out_percent = percent_bam
File out_samstats=samstats
File out_ins_size=ins_size
File out_sort_bam="${sorted_bam}"
File out_sort_bam_index="${sorted_bam_index}"
File out_samstats="${samstats}"
File out_ins_size="${ins_size}"
}

}

+ 70
- 6
workflow.wdl 查看文件

@@ -1,5 +1,6 @@
import "./tasks/fastqc.wdl" as fastqc
import "./tasks/fastqscreen.wdl" as fastqscreen
import "./tasks/fastp.wdl" as fastp
import "./tasks/qualimap.wdl" as qualimap
import "./tasks/hisat2.wdl" as hisat2
import "./tasks/samtools.wdl" as samtools
@@ -8,7 +9,7 @@ import "./tasks/ballgown.wdl" as ballgown
import "./tasks/count.wdl" as count

workflow {{ project_name }} {
File read1
File read2
File idx
@@ -21,8 +22,14 @@ workflow {{ project_name }} {
String fastqc_cluster
String fastqscreen_docker
String fastqscreen_cluster
String fastp_docker
String fastp_cluster
String adapter_sequence
String adapter_sequence_r2
String umi_loc
String hisat2_docker
String hisat2_cluster
String idx_prefix
String stringtie_docker
String stringtie_cluster
String samtools_docker
@@ -37,7 +44,23 @@ workflow {{ project_name }} {
String count_length
String sample_id

Int trim_front1
Int trim_tail1
Int max_len1
Int trim_front2
Int trim_tail2
Int max_len2
Int disable_adapter_trimming
Int length_required
Int umi_len
Int UMI
Int qualified_quality_phred
Int length_required1
Int disable_quality_filtering

Boolean pre_alignment_qc
Boolean qualimap_qc
Boolean fastp_run
if (pre_alignment_qc) {
@@ -62,8 +85,47 @@ workflow {{ project_name }} {
}
}
call hisat2.hisat2 as hisat2 {
if (fastp_run){

call fastp.fastp as fastp {
input:
sample_id=sample_id,
read1=read1,
read2=read2,
docker=fastp_docker,
cluster=fastp_cluster,
disk_size=disk_size,
adapter_sequence=adapter_sequence,
adapter_sequence_r2=adapter_sequence_r2,
umi_loc=umi_loc,
trim_front1=trim_front1,
trim_tail1=trim_tail1,
max_len1=max_len1,
trim_front2=trim_front2,
trim_tail2=trim_tail2,
max_len2=max_len2,
disable_adapter_trimming=disable_adapter_trimming,
length_required=length_required,
umi_len=umi_len,
UMI=UMI,
qualified_quality_phred=qualified_quality_phred,
length_required1=length_required1,
disable_quality_filtering=disable_quality_filtering
}

call hisat2.hisat2 as hisat2 {
input:
sample_id=sample_id,
idx=idx,
idx_prefix=idx_prefix,
read_1P=fastp.Trim_R1,
read_2P=fastp.Trim_R2,
docker=hisat2_docker,
cluster=hisat2_cluster,
disk_size=disk_size
}
} else {
call hisat2.hisat2 as hisat2 {
input:
docker=hisat2_docker,
cluster=hisat2_cluster,
@@ -73,7 +135,8 @@ workflow {{ project_name }} {
read_2P=read2,
disk_size=disk_size
}

}
call samtools.samtools as samtools {
input:
docker=samtools_docker,
@@ -83,15 +146,16 @@ workflow {{ project_name }} {
disk_size=disk_size
}

call qualimap.qualimap as qualimap {
if (qualimap_qc){
call qualimap.qualimap as qualimap {
input:
bam=samtools.out_sort_bam,
bam_percent=samtools.out_percent,
gtf=gtf,
docker=qualimap_docker,
cluster=qualimap_cluster,
disk_size=disk_size
}
}

call stringtie.stringtie as stringtie {
input:

Loading…
取消
儲存