20110700030@fudan.edu.cn 4 роки тому
коміт
a9735f4c3a
8 змінених файлів з 425 додано та 0 видалено
  1. +45
    -0
      README.md
  2. +41
    -0
      defaults
  3. +50
    -0
      inputs
  4. +67
    -0
      tasks/fastp.wdl
  5. +29
    -0
      tasks/featureCounts.wdl
  6. +34
    -0
      tasks/hisat2.wdl
  7. +37
    -0
      tasks/samtools.wdl
  8. +122
    -0
      workflow.wdl

+ 45
- 0
README.md Переглянути файл

@@ -0,0 +1,45 @@
# README.md

> Author: Qingwang Chen
>
> Email: [qwch20@fudan.edu.cn](mailto:qwch20@fudan.edu.cn)
>
> Last Updates: 05/11/2020

#### Requirements

- choppy
- Ali-Cloud
- Linux

```
# 激活choppy环境
$ source activate choppy (open-choppy-env)

# 第一次安装
$ choppy install chenqingwang/lncRNAseq
# 非第一次安装
$ choppy install chenqingwang/lncRNAseq -f

# 查询已安装APP
$ choppy apps
```

#### Quick Start

```
# 准备 samples.csv 文件
$ choppy samples chenqingwang/lncRNAseq-latest > samples.csv

# 提交任务
$ choppy batch jchenqingwang/lncRNAseq-latest samples.csv -p Your_project_name -l Your_label

# 查询任务运行状况
$ choppy query -L Your_label | grep "status"

# 查询失败任务
$ choppy search -s Failed -p Your_project_name -u chenqingwang --short-format

# 结果文件地址
$ oss://choppy-cromwell-result/test-choppy/Your_project_name/
```

+ 41
- 0
defaults Переглянути файл

@@ -0,0 +1,41 @@
{
"adapter_sequence": "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA",
"adapter_sequence_r2": "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT",
"fastp_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastp:0.19.6",
"fastp_cluster": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
"umi_loc": "umi_loc",
"trim_front1": "0",
"trim_tail1": "0",
"max_len1": "0",
"trim_front2": "0",
"trim_tail2": "0",
"max_len2": "0",
"disable_adapter_trimming": "0",
"length_required": "50",
"umi_len": "0",
"UMI": "0",
"qualified_quality_phred": "20",
"length_required1": "20",
"disable_quality_filtering": "1",
"idx": "oss://pgx-reference-data/reference/hisat2/grch38_snp_tran/",
"idx_prefix": "genome_snp_tran",
"pen_intronlen":"G,-8,1",
"hisat2_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/hisat2:v2.1.0-2",
"hisat2_cluster": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc",
"pen_cansplice":"0",
"pen_noncansplice":"3",
"min_intronlen":"30",
"max_intronlen":"500000",
"maxins":"500",
"minins":"0",
"samtools_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/samtools:v1.3.1",
"samtools_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc",
"insert_size":"8000",
"lnc_gtf_file": "oss://pgx-reference-data/reference/subread/lncRNAKB_hg38_v7.gtf",
"subread_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/subread:v1.6.4",
"subread_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc",
"cpu_num": "4",
"strand_information": "0",
"gtf_dir": "oss://pgx-reference-data/reference/subread/",
"fasta": "GRCh38.d1.vd1.fa",
}

+ 50
- 0
inputs Переглянути файл

@@ -0,0 +1,50 @@
{
"{{ project_name }}.sample_id": "{{ sample_id }}",
"{{ project_name }}.read1": "{{ read1 }}",
"{{ project_name }}.read2": "{{ read2 }}",
"{{ project_name }}.adapter_sequence": "{{ adapter_sequence }}",
"{{ project_name }}.adapter_sequence_r2": "{{ adapter_sequence_r2 }}",
"{{ project_name }}.fastp_docker": "{{ fastp_docker }}",
"{{ project_name }}.fastp_cluster": "{{ fastp_cluster }}",
"{{ project_name }}.umi_loc": "{{ umi_loc }}",
"{{ project_name }}.trim_front1": "{{ trim_front1 }}",
"{{ project_name }}.trim_tail1": "{{ trim_tail1 }}",
"{{ project_name }}.max_len1": "{{ max_len1 }}",
"{{ project_name }}.trim_front2": "{{ trim_front2 }}",
"{{ project_name }}.trim_tail2": "{{ trim_tail2 }}",
"{{ project_name }}.max_len2": "{{ max_len2 }}",
"{{ project_name }}.disable_adapter_trimming": "{{ disable_adapter_trimming }}",
"{{ project_name }}.length_required": "{{ length_required }}",
"{{ project_name }}.umi_len": "{{ umi_len }}",
"{{ project_name }}.UMI": "{{ UMI }}",
"{{ project_name }}.qualified_quality_phred": "{{ qualified_quality_phred }}",
"{{ project_name }}.length_required1": "{{ length_required1 }}",
"{{ project_name }}.disable_quality_filtering": "{{ disable_quality_filtering }}",
"{{ project_name }}.idx": "{{ idx }}",
"{{ project_name }}.Trim_R1": "{{ Trim_R1 }}",
"{{ project_name }}.Trim_R2": "{{ Trim_R2 }}",
"{{ project_name }}.idx_prefix": "{{ idx_prefix }}",
"{{ project_name }}.pen_intronlen": "{{ pen_intronlen }}",
"{{ project_name }}.hisat2_docker": "{{ hisat2_docker }}",
"{{ project_name }}.hisat2_cluster": "{{ hisat2_cluster }}",
"{{ project_name }}.pen_cansplice": "{{ pen_cansplice }}",
"{{ project_name }}.pen_noncansplice": "{{ pen_noncansplice }}",
"{{ project_name }}.min_intronlen": "{{ min_intronlen }}",
"{{ project_name }}.max_intronlen": "{{ max_intronlen }}",
"{{ project_name }}.maxins": "{{ maxins }}",
"{{ project_name }}.minins": "{{ minins }}",
"{{ project_name }}.sam": "{{ sam }}",
"{{ project_name }}.sorted_bam": "{{ sorted_bam }}",
"{{ project_name }}.percent_bam ": "{{ percent_bam }}",
"{{ project_name }}.sorted_bam_index": "{{ sorted_bam_index }}",
"{{ project_name }}.ins_size": "{{ ins_size }}",
"{{ project_name }}.samtools_docker": "{{ samtools_docker }}",
"{{ project_name }}.samtools_cluster": "{{ samtools_cluster }}",
"{{ project_name }}.insert_size": "{{ insert_size }}",
"{{ project_name }}.bam_file": "{{ bam_file }}",
"{{ project_name }}.lnc_gtf_file": "{{ lnc_gtf_file }}",
"{{ project_name }}.subread_docker": "{{ subread_docker }}",
"{{ project_name }}.subread_cluster": "{{ subread_cluster }}",
"{{ project_name }}.cpu_num": "{{ cpu_num }}",
"{{ project_name }}.strand_information": "{{ strand_information }}",
}

+ 67
- 0
tasks/fastp.wdl Переглянути файл

@@ -0,0 +1,67 @@
task fastp {
String sample_id
File read1
File read2
String adapter_sequence
String adapter_sequence_r2
String docker
String cluster
String umi_loc
Int trim_front1
Int trim_tail1
Int max_len1
Int trim_front2
Int trim_tail2
Int max_len2
Int disable_adapter_trimming
Int length_required
Int umi_len
Int UMI
Int qualified_quality_phred
Int length_required1
Int disable_quality_filtering
command <<<
mkdir -p /cromwell_root/tmp/fastp/
##1.Disable_quality_filtering
if [ "${disable_quality_filtering}" == 0 ]
then
cp ${read1} /cromwell_root/tmp/fastp/{sample_id}_R1.fastq.tmp1.gz
cp ${read2} /cromwell_root/tmp/fastp/{sample_id}_R2.fastq.tmp1.gz
else
fastp --thread 4 --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i ${read1} -I ${read2} -o /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz -O /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz -j ${sample_id}.json -h ${sample_id}.html
fi

##2.UMI
if [ "${UMI}" == 0 ]
then
cp /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz
cp /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz
else
fastp --thread 4 -U --umi_loc=${umi_loc} --umi_len=${umi_len} --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz -I /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz -o /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz -O /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz -j ${sample_id}.json -h ${sample_id}.html
fi

##3.Trim
if [ "${disable_adapter_trimming}" == 0 ]
then
fastp --thread 4 -l ${length_required} -q ${qualified_quality_phred} -u ${length_required1} --adapter_sequence ${adapter_sequence} --adapter_sequence_r2 ${adapter_sequence_r2} --detect_adapter_for_pe --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz -I /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz -o ${sample_id}_R1.fastq.gz -O ${sample_id}_R2.fastq.gz -j ${sample_id}.json -h ${sample_id}.html
else
cp /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz ${sample_id}_R1.fastq.gz
cp /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz ${sample_id}_R2.fastq.gz
fi
>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd 200 /cromwell_root/"
}

output {
File json = "${sample_id}.json"
File report = "${sample_id}.html"
File Trim_R1 = "${sample_id}_R1.fastq.gz"
File Trim_R2 = "${sample_id}_R2.fastq.gz"
}
}

+ 29
- 0
tasks/featureCounts.wdl Переглянути файл

@@ -0,0 +1,29 @@
task featureCounts {
File bam_file
File lnc_gtf_file = "lncRNAKB_hg38_v7.gtf"
String gtf_dir = "oss://pgx-reference-data/reference/subread/"
String sample_id
String docker
String cluster
Int cpu_num = 4
Int strand_information = 0
command <<<
mkdir -p $sample_id
featureCounts -T $cpu_num -t exon -g gene_id -a $gtf_dir/$lnc_gtf_file -s $strand_information -p -o $sample_id/$sample_id.genefeaturecount.txt $bam_file
>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd 200 /cromwell_root/"
}
output {
File out_profile = "${sample_id}.genefeaturecount.txt"
File out_summary = "${sample_id}.summary"
}
}

+ 34
- 0
tasks/hisat2.wdl Переглянути файл

@@ -0,0 +1,34 @@
task hisat2 {
File idx
File Trim_R1
File Trim_R2
String idx_prefix
String sample_id
String docker
String cluster
String pen_intronlen
Int pen_cansplice
Int pen_noncansplice
Int min_intronlen
Int max_intronlen
Int maxins
Int minins
command <<<
nt=$(nproc)
hisat2 -t -p $nt -x ${idx}/${idx_prefix} --pen-cansplice ${pen_cansplice} --pen-noncansplice ${pen_noncansplice} --pen-intronlen ${pen_intronlen} --min-intronlen ${min_intronlen} --max-intronlen ${max_intronlen} --maxins ${maxins} --minins ${minins} --un-conc-gz ${sample_id}_un.fq.gz -1 ${Trim_R1} -2 ${Trim_R2} -S ${sample_id}.sam
>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd 200 /cromwell_root/"
}

output {
File sam = "${sample_id}.sam"
File unmapread_1p = "${sample_id}_un.fq.1.gz"
File unmapread_2p = "${sample_id}_un.fq.2.gz"
}
}

+ 37
- 0
tasks/samtools.wdl Переглянути файл

@@ -0,0 +1,37 @@
task samtools {
File sam
String sample_id
String bam = sample_id + ".bam"
String sorted_bam = sample_id + ".sorted.bam"
String percent_bam = sample_id + ".percent.bam"
String sorted_bam_index = sample_id + ".sorted.bam.bai"
String ins_size = sample_id + ".ins_size"
String docker
String cluster
Int insert_size

command <<<
set -o pipefail
set -e
/opt/conda/bin/samtools view -bS ${sam} > ${bam}
/opt/conda/bin/samtools sort -m 1000000000 ${bam} -o ${sorted_bam}
/opt/conda/bin/samtools index ${sorted_bam}
/opt/conda/bin/samtools view -bs 42.1 ${sorted_bam} > ${percent_bam}
/opt/conda/bin/samtools stats -i ${insert_size} ${sorted_bam} |grep ^IS|cut -f 2- > ${sample_id}.ins_size
>>>

runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd 200 /cromwell_root/"
}

output {
File out_bam = sorted_bam
File out_percent = percent_bam
File out_bam_index = sorted_bam_index
File out_ins_size = ins_size
}
}


+ 122
- 0
workflow.wdl Переглянути файл

@@ -0,0 +1,122 @@
import "./tasks/fastp.wdl" as fastp
import "./tasks/hisat2.wdl" as hisat2
import "./tasks/samtools.wdl" as samtools
import "./tasks/featureCounts.wdl" as featureCounts


workflow {{ project_name }} {
String sample_id
File read1
File read2
String adapter_sequence
String adapter_sequence_r2
String fastp_docker
String fastp_cluster
String umi_loc
Int trim_front1
Int trim_tail1
Int max_len1
Int trim_front2
Int trim_tail2
Int max_len2
Int disable_adapter_trimming
Int length_required
Int umi_len
Int UMI
Int qualified_quality_phred
Int length_required1
Int disable_quality_filtering
File idx
File Trim_R1
File Trim_R2
String idx_prefix
String pen_intronlen
String hisat2_docker
String hisat2_cluster
Int pen_cansplice
Int pen_noncansplice
Int min_intronlen
Int max_intronlen
Int maxins
Int minins
File sam
String bam = sample_id + ".bam"
String sorted_bam = sample_id + ".sorted.bam"
String percent_bam = sample_id + ".percent.bam"
String sorted_bam_index = sample_id + ".sorted.bam.bai"
String ins_size = sample_id + ".ins_size"
String samtools_docker
String samtools_cluster
Int insert_size
File bam_file
File lnc_gtf_file = "lncRNAKB_hg38_v7.gtf"
String gtf_dir = "oss://pgx-reference-data/reference/subread/"
String subread_docker
String subread_cluster
Int cpu_num = 4
Int strand_information = 0

call fastp.fastp as fastp {
input:
sample_id = sample_id,
read1 = read1,
read2 = read2,
docker = fastp_docker,
cluster = fastp_cluster,
adapter_sequence = adapter_sequence,
adapter_sequence_r2 = adapter_sequence_r2,
umi_loc = umi_loc,
trim_front1 = trim_front1,
trim_tail1 = trim_tail1,
max_len1 = max_len1,
trim_front2 = trim_front2,
trim_tail2 = trim_tail2,
max_len2 = max_len2,
disable_adapter_trimming = disable_adapter_trimming,
length_required = length_required,
umi_len = umi_len,
UMI = UMI,
qualified_quality_phred = qualified_quality_phred,
length_required1 = length_required1,
disable_quality_filtering = disable_quality_filtering
}

call hisat2.hisat2 as hisat2 {
input:
sample_id = sample_id,
idx = idx,
idx_prefix = idx_prefix,
Trim_R1 = fastp.Trim_R1,
Trim_R2 = fastp.Trim_R2,
docker = hisat2_docker,
cluster = hisat2_cluster,
pen_intronlen = pen_intronlen,
pen_cansplice = pen_cansplice,
pen_noncansplice = pen_noncansplice,
min_intronlen = min_intronlen,
max_intronlen = max_intronlen,
maxins = maxins,
minins = minins
}

call samtools.samtools as samtools {
input:
sample_id = sample_id,
sam = hisat2.sam,
docker = samtools_docker,
cluster = samtools_cluster,
insert_size = insert_size
}

call featureCounts.featureCounts as featureCounts {
input:
sample_id = sample_id,
bam_file = samtools.bam,
lnc_gtf_file = lnc_gtf_file,
gtf_dir = gtf_dir
docker = subread_docker,
cluster = subread_cluster,
cpu_num = cpu_num,
strand_information = strand_information
}
}

Завантаження…
Відмінити
Зберегти