@@ -0,0 +1,45 @@ | |||
# README.md | |||
> Author: Qingwang Chen | |||
> | |||
> Email: [qwch20@fudan.edu.cn](mailto:qwch20@fudan.edu.cn) | |||
> | |||
> Last Updates: 05/11/2020 | |||
#### Requirements | |||
- choppy | |||
- Ali-Cloud | |||
- Linux | |||
``` | |||
# 激活choppy环境 | |||
$ source activate choppy (open-choppy-env) | |||
# 第一次安装 | |||
$ choppy install chenqingwang/lncRNAseq | |||
# 非第一次安装 | |||
$ choppy install chenqingwang/lncRNAseq -f | |||
# 查询已安装APP | |||
$ choppy apps | |||
``` | |||
#### Quick Start | |||
``` | |||
# 准备 samples.csv 文件 | |||
$ choppy samples chenqingwang/lncRNAseq-latest > samples.csv | |||
# 提交任务 | |||
$ choppy batch jchenqingwang/lncRNAseq-latest samples.csv -p Your_project_name -l Your_label | |||
# 查询任务运行状况 | |||
$ choppy query -L Your_label | grep "status" | |||
# 查询失败任务 | |||
$ choppy search -s Failed -p Your_project_name -u chenqingwang --short-format | |||
# 结果文件地址 | |||
$ oss://choppy-cromwell-result/test-choppy/Your_project_name/ | |||
``` |
@@ -0,0 +1,41 @@ | |||
{ | |||
"adapter_sequence": "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA", | |||
"adapter_sequence_r2": "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT", | |||
"fastp_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastp:0.19.6", | |||
"fastp_cluster": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc", | |||
"umi_loc": "umi_loc", | |||
"trim_front1": "0", | |||
"trim_tail1": "0", | |||
"max_len1": "0", | |||
"trim_front2": "0", | |||
"trim_tail2": "0", | |||
"max_len2": "0", | |||
"disable_adapter_trimming": "0", | |||
"length_required": "50", | |||
"umi_len": "0", | |||
"UMI": "0", | |||
"qualified_quality_phred": "20", | |||
"length_required1": "20", | |||
"disable_quality_filtering": "1", | |||
"idx": "oss://pgx-reference-data/reference/hisat2/grch38_snp_tran/", | |||
"idx_prefix": "genome_snp_tran", | |||
"pen_intronlen":"G,-8,1", | |||
"hisat2_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/hisat2:v2.1.0-2", | |||
"hisat2_cluster": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc", | |||
"pen_cansplice":"0", | |||
"pen_noncansplice":"3", | |||
"min_intronlen":"30", | |||
"max_intronlen":"500000", | |||
"maxins":"500", | |||
"minins":"0", | |||
"samtools_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/samtools:v1.3.1", | |||
"samtools_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc", | |||
"insert_size":"8000", | |||
"lnc_gtf_file": "oss://pgx-reference-data/reference/subread/lncRNAKB_hg38_v7.gtf", | |||
"subread_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/subread:v1.6.4", | |||
"subread_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc", | |||
"cpu_num": "4", | |||
"strand_information": "0", | |||
"gtf_dir": "oss://pgx-reference-data/reference/subread/", | |||
"fasta": "GRCh38.d1.vd1.fa", | |||
} |
@@ -0,0 +1,50 @@ | |||
{ | |||
"{{ project_name }}.sample_id": "{{ sample_id }}", | |||
"{{ project_name }}.read1": "{{ read1 }}", | |||
"{{ project_name }}.read2": "{{ read2 }}", | |||
"{{ project_name }}.adapter_sequence": "{{ adapter_sequence }}", | |||
"{{ project_name }}.adapter_sequence_r2": "{{ adapter_sequence_r2 }}", | |||
"{{ project_name }}.fastp_docker": "{{ fastp_docker }}", | |||
"{{ project_name }}.fastp_cluster": "{{ fastp_cluster }}", | |||
"{{ project_name }}.umi_loc": "{{ umi_loc }}", | |||
"{{ project_name }}.trim_front1": "{{ trim_front1 }}", | |||
"{{ project_name }}.trim_tail1": "{{ trim_tail1 }}", | |||
"{{ project_name }}.max_len1": "{{ max_len1 }}", | |||
"{{ project_name }}.trim_front2": "{{ trim_front2 }}", | |||
"{{ project_name }}.trim_tail2": "{{ trim_tail2 }}", | |||
"{{ project_name }}.max_len2": "{{ max_len2 }}", | |||
"{{ project_name }}.disable_adapter_trimming": "{{ disable_adapter_trimming }}", | |||
"{{ project_name }}.length_required": "{{ length_required }}", | |||
"{{ project_name }}.umi_len": "{{ umi_len }}", | |||
"{{ project_name }}.UMI": "{{ UMI }}", | |||
"{{ project_name }}.qualified_quality_phred": "{{ qualified_quality_phred }}", | |||
"{{ project_name }}.length_required1": "{{ length_required1 }}", | |||
"{{ project_name }}.disable_quality_filtering": "{{ disable_quality_filtering }}", | |||
"{{ project_name }}.idx": "{{ idx }}", | |||
"{{ project_name }}.Trim_R1": "{{ Trim_R1 }}", | |||
"{{ project_name }}.Trim_R2": "{{ Trim_R2 }}", | |||
"{{ project_name }}.idx_prefix": "{{ idx_prefix }}", | |||
"{{ project_name }}.pen_intronlen": "{{ pen_intronlen }}", | |||
"{{ project_name }}.hisat2_docker": "{{ hisat2_docker }}", | |||
"{{ project_name }}.hisat2_cluster": "{{ hisat2_cluster }}", | |||
"{{ project_name }}.pen_cansplice": "{{ pen_cansplice }}", | |||
"{{ project_name }}.pen_noncansplice": "{{ pen_noncansplice }}", | |||
"{{ project_name }}.min_intronlen": "{{ min_intronlen }}", | |||
"{{ project_name }}.max_intronlen": "{{ max_intronlen }}", | |||
"{{ project_name }}.maxins": "{{ maxins }}", | |||
"{{ project_name }}.minins": "{{ minins }}", | |||
"{{ project_name }}.sam": "{{ sam }}", | |||
"{{ project_name }}.sorted_bam": "{{ sorted_bam }}", | |||
"{{ project_name }}.percent_bam ": "{{ percent_bam }}", | |||
"{{ project_name }}.sorted_bam_index": "{{ sorted_bam_index }}", | |||
"{{ project_name }}.ins_size": "{{ ins_size }}", | |||
"{{ project_name }}.samtools_docker": "{{ samtools_docker }}", | |||
"{{ project_name }}.samtools_cluster": "{{ samtools_cluster }}", | |||
"{{ project_name }}.insert_size": "{{ insert_size }}", | |||
"{{ project_name }}.bam_file": "{{ bam_file }}", | |||
"{{ project_name }}.lnc_gtf_file": "{{ lnc_gtf_file }}", | |||
"{{ project_name }}.subread_docker": "{{ subread_docker }}", | |||
"{{ project_name }}.subread_cluster": "{{ subread_cluster }}", | |||
"{{ project_name }}.cpu_num": "{{ cpu_num }}", | |||
"{{ project_name }}.strand_information": "{{ strand_information }}", | |||
} |
@@ -0,0 +1,67 @@ | |||
task fastp { | |||
String sample_id | |||
File read1 | |||
File read2 | |||
String adapter_sequence | |||
String adapter_sequence_r2 | |||
String docker | |||
String cluster | |||
String umi_loc | |||
Int trim_front1 | |||
Int trim_tail1 | |||
Int max_len1 | |||
Int trim_front2 | |||
Int trim_tail2 | |||
Int max_len2 | |||
Int disable_adapter_trimming | |||
Int length_required | |||
Int umi_len | |||
Int UMI | |||
Int qualified_quality_phred | |||
Int length_required1 | |||
Int disable_quality_filtering | |||
command <<< | |||
mkdir -p /cromwell_root/tmp/fastp/ | |||
##1.Disable_quality_filtering | |||
if [ "${disable_quality_filtering}" == 0 ] | |||
then | |||
cp ${read1} /cromwell_root/tmp/fastp/{sample_id}_R1.fastq.tmp1.gz | |||
cp ${read2} /cromwell_root/tmp/fastp/{sample_id}_R2.fastq.tmp1.gz | |||
else | |||
fastp --thread 4 --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i ${read1} -I ${read2} -o /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz -O /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz -j ${sample_id}.json -h ${sample_id}.html | |||
fi | |||
##2.UMI | |||
if [ "${UMI}" == 0 ] | |||
then | |||
cp /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz | |||
cp /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz | |||
else | |||
fastp --thread 4 -U --umi_loc=${umi_loc} --umi_len=${umi_len} --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz -I /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz -o /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz -O /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz -j ${sample_id}.json -h ${sample_id}.html | |||
fi | |||
##3.Trim | |||
if [ "${disable_adapter_trimming}" == 0 ] | |||
then | |||
fastp --thread 4 -l ${length_required} -q ${qualified_quality_phred} -u ${length_required1} --adapter_sequence ${adapter_sequence} --adapter_sequence_r2 ${adapter_sequence_r2} --detect_adapter_for_pe --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz -I /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz -o ${sample_id}_R1.fastq.gz -O ${sample_id}_R2.fastq.gz -j ${sample_id}.json -h ${sample_id}.html | |||
else | |||
cp /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz ${sample_id}_R1.fastq.gz | |||
cp /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz ${sample_id}_R2.fastq.gz | |||
fi | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd 200 /cromwell_root/" | |||
} | |||
output { | |||
File json = "${sample_id}.json" | |||
File report = "${sample_id}.html" | |||
File Trim_R1 = "${sample_id}_R1.fastq.gz" | |||
File Trim_R2 = "${sample_id}_R2.fastq.gz" | |||
} | |||
} |
@@ -0,0 +1,29 @@ | |||
task featureCounts { | |||
File bam_file | |||
File lnc_gtf_file = "lncRNAKB_hg38_v7.gtf" | |||
String gtf_dir = "oss://pgx-reference-data/reference/subread/" | |||
String sample_id | |||
String docker | |||
String cluster | |||
Int cpu_num = 4 | |||
Int strand_information = 0 | |||
command <<< | |||
mkdir -p $sample_id | |||
featureCounts -T $cpu_num -t exon -g gene_id -a $gtf_dir/$lnc_gtf_file -s $strand_information -p -o $sample_id/$sample_id.genefeaturecount.txt $bam_file | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd 200 /cromwell_root/" | |||
} | |||
output { | |||
File out_profile = "${sample_id}.genefeaturecount.txt" | |||
File out_summary = "${sample_id}.summary" | |||
} | |||
} | |||
@@ -0,0 +1,34 @@ | |||
task hisat2 { | |||
File idx | |||
File Trim_R1 | |||
File Trim_R2 | |||
String idx_prefix | |||
String sample_id | |||
String docker | |||
String cluster | |||
String pen_intronlen | |||
Int pen_cansplice | |||
Int pen_noncansplice | |||
Int min_intronlen | |||
Int max_intronlen | |||
Int maxins | |||
Int minins | |||
command <<< | |||
nt=$(nproc) | |||
hisat2 -t -p $nt -x ${idx}/${idx_prefix} --pen-cansplice ${pen_cansplice} --pen-noncansplice ${pen_noncansplice} --pen-intronlen ${pen_intronlen} --min-intronlen ${min_intronlen} --max-intronlen ${max_intronlen} --maxins ${maxins} --minins ${minins} --un-conc-gz ${sample_id}_un.fq.gz -1 ${Trim_R1} -2 ${Trim_R2} -S ${sample_id}.sam | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd 200 /cromwell_root/" | |||
} | |||
output { | |||
File sam = "${sample_id}.sam" | |||
File unmapread_1p = "${sample_id}_un.fq.1.gz" | |||
File unmapread_2p = "${sample_id}_un.fq.2.gz" | |||
} | |||
} |
@@ -0,0 +1,37 @@ | |||
task samtools { | |||
File sam | |||
String sample_id | |||
String bam = sample_id + ".bam" | |||
String sorted_bam = sample_id + ".sorted.bam" | |||
String percent_bam = sample_id + ".percent.bam" | |||
String sorted_bam_index = sample_id + ".sorted.bam.bai" | |||
String ins_size = sample_id + ".ins_size" | |||
String docker | |||
String cluster | |||
Int insert_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
/opt/conda/bin/samtools view -bS ${sam} > ${bam} | |||
/opt/conda/bin/samtools sort -m 1000000000 ${bam} -o ${sorted_bam} | |||
/opt/conda/bin/samtools index ${sorted_bam} | |||
/opt/conda/bin/samtools view -bs 42.1 ${sorted_bam} > ${percent_bam} | |||
/opt/conda/bin/samtools stats -i ${insert_size} ${sorted_bam} |grep ^IS|cut -f 2- > ${sample_id}.ins_size | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd 200 /cromwell_root/" | |||
} | |||
output { | |||
File out_bam = sorted_bam | |||
File out_percent = percent_bam | |||
File out_bam_index = sorted_bam_index | |||
File out_ins_size = ins_size | |||
} | |||
} | |||
@@ -0,0 +1,122 @@ | |||
import "./tasks/fastp.wdl" as fastp | |||
import "./tasks/hisat2.wdl" as hisat2 | |||
import "./tasks/samtools.wdl" as samtools | |||
import "./tasks/featureCounts.wdl" as featureCounts | |||
workflow {{ project_name }} { | |||
String sample_id | |||
File read1 | |||
File read2 | |||
String adapter_sequence | |||
String adapter_sequence_r2 | |||
String fastp_docker | |||
String fastp_cluster | |||
String umi_loc | |||
Int trim_front1 | |||
Int trim_tail1 | |||
Int max_len1 | |||
Int trim_front2 | |||
Int trim_tail2 | |||
Int max_len2 | |||
Int disable_adapter_trimming | |||
Int length_required | |||
Int umi_len | |||
Int UMI | |||
Int qualified_quality_phred | |||
Int length_required1 | |||
Int disable_quality_filtering | |||
File idx | |||
File Trim_R1 | |||
File Trim_R2 | |||
String idx_prefix | |||
String pen_intronlen | |||
String hisat2_docker | |||
String hisat2_cluster | |||
Int pen_cansplice | |||
Int pen_noncansplice | |||
Int min_intronlen | |||
Int max_intronlen | |||
Int maxins | |||
Int minins | |||
File sam | |||
String bam = sample_id + ".bam" | |||
String sorted_bam = sample_id + ".sorted.bam" | |||
String percent_bam = sample_id + ".percent.bam" | |||
String sorted_bam_index = sample_id + ".sorted.bam.bai" | |||
String ins_size = sample_id + ".ins_size" | |||
String samtools_docker | |||
String samtools_cluster | |||
Int insert_size | |||
File bam_file | |||
File lnc_gtf_file = "lncRNAKB_hg38_v7.gtf" | |||
String gtf_dir = "oss://pgx-reference-data/reference/subread/" | |||
String subread_docker | |||
String subread_cluster | |||
Int cpu_num = 4 | |||
Int strand_information = 0 | |||
call fastp.fastp as fastp { | |||
input: | |||
sample_id = sample_id, | |||
read1 = read1, | |||
read2 = read2, | |||
docker = fastp_docker, | |||
cluster = fastp_cluster, | |||
adapter_sequence = adapter_sequence, | |||
adapter_sequence_r2 = adapter_sequence_r2, | |||
umi_loc = umi_loc, | |||
trim_front1 = trim_front1, | |||
trim_tail1 = trim_tail1, | |||
max_len1 = max_len1, | |||
trim_front2 = trim_front2, | |||
trim_tail2 = trim_tail2, | |||
max_len2 = max_len2, | |||
disable_adapter_trimming = disable_adapter_trimming, | |||
length_required = length_required, | |||
umi_len = umi_len, | |||
UMI = UMI, | |||
qualified_quality_phred = qualified_quality_phred, | |||
length_required1 = length_required1, | |||
disable_quality_filtering = disable_quality_filtering | |||
} | |||
call hisat2.hisat2 as hisat2 { | |||
input: | |||
sample_id = sample_id, | |||
idx = idx, | |||
idx_prefix = idx_prefix, | |||
Trim_R1 = fastp.Trim_R1, | |||
Trim_R2 = fastp.Trim_R2, | |||
docker = hisat2_docker, | |||
cluster = hisat2_cluster, | |||
pen_intronlen = pen_intronlen, | |||
pen_cansplice = pen_cansplice, | |||
pen_noncansplice = pen_noncansplice, | |||
min_intronlen = min_intronlen, | |||
max_intronlen = max_intronlen, | |||
maxins = maxins, | |||
minins = minins | |||
} | |||
call samtools.samtools as samtools { | |||
input: | |||
sample_id = sample_id, | |||
sam = hisat2.sam, | |||
docker = samtools_docker, | |||
cluster = samtools_cluster, | |||
insert_size = insert_size | |||
} | |||
call featureCounts.featureCounts as featureCounts { | |||
input: | |||
sample_id = sample_id, | |||
bam_file = samtools.bam, | |||
lnc_gtf_file = lnc_gtf_file, | |||
gtf_dir = gtf_dir | |||
docker = subread_docker, | |||
cluster = subread_cluster, | |||
cpu_num = cpu_num, | |||
strand_information = strand_information | |||
} | |||
} |