4 years ago · a9735f4c3a
--- a/README.md
+++ b/README.md
@@ -0,0 +1,45 @@
 # README.md

 > Author: Qingwang Chen
 >
 > Email: [qwch20@fudan.edu.cn](mailto:qwch20@fudan.edu.cn)
 >
 > Last Updates: 05/11/2020

 #### Requirements

 - choppy
 - Ali-Cloud
 - Linux

 ```
 # 激活choppy环境
 $ source activate choppy (open-choppy-env)

 # 第一次安装
 $ choppy install chenqingwang/lncRNAseq
 # 非第一次安装
 $ choppy install chenqingwang/lncRNAseq -f 

 # 查询已安装APP
 $ choppy apps
 ```

 #### Quick Start

 ```
 # 准备 samples.csv 文件
 $ choppy samples chenqingwang/lncRNAseq-latest > samples.csv

 # 提交任务
 $ choppy batch jchenqingwang/lncRNAseq-latest samples.csv -p Your_project_name -l Your_label

 # 查询任务运行状况
 $ choppy query -L Your_label | grep "status"

 # 查询失败任务
 $ choppy search -s Failed -p Your_project_name -u chenqingwang --short-format

 # 结果文件地址
 $ oss://choppy-cromwell-result/test-choppy/Your_project_name/
 ```
--- a/defaults
+++ b/defaults
@@ -0,0 +1,41 @@
 {   
    "adapter_sequence": "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA",
    "adapter_sequence_r2": "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT",    
    "fastp_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastp:0.19.6",
    "fastp_cluster": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
    "umi_loc": "umi_loc",
    "trim_front1": "0",
    "trim_tail1": "0",
    "max_len1": "0",
    "trim_front2": "0",
    "trim_tail2": "0",
    "max_len2": "0",
    "disable_adapter_trimming": "0",
    "length_required": "50",
    "umi_len": "0",
    "UMI": "0",
    "qualified_quality_phred": "20",
    "length_required1": "20",
    "disable_quality_filtering": "1",
    "idx": "oss://pgx-reference-data/reference/hisat2/grch38_snp_tran/",
    "idx_prefix": "genome_snp_tran",
    "pen_intronlen":"G,-8,1",
    "hisat2_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/hisat2:v2.1.0-2",
    "hisat2_cluster": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc",
    "pen_cansplice":"0",
    "pen_noncansplice":"3",
    "min_intronlen":"30",
    "max_intronlen":"500000",
    "maxins":"500",
    "minins":"0",
    "samtools_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/samtools:v1.3.1",
    "samtools_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc",
    "insert_size":"8000",
    "lnc_gtf_file": "oss://pgx-reference-data/reference/subread/lncRNAKB_hg38_v7.gtf",
    "subread_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/subread:v1.6.4",
    "subread_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc",
    "cpu_num": "4",
    "strand_information": "0",
    "gtf_dir": "oss://pgx-reference-data/reference/subread/",
    "fasta": "GRCh38.d1.vd1.fa",
    }
--- a/inputs
+++ b/inputs
@@ -0,0 +1,50 @@
 {
 	"{{ project_name }}.sample_id": "{{ sample_id }}",
 	"{{ project_name }}.read1": "{{ read1 }}",
 	"{{ project_name }}.read2": "{{ read2 }}",
 	"{{ project_name }}.adapter_sequence": "{{ adapter_sequence }}",
 	"{{ project_name }}.adapter_sequence_r2": "{{ adapter_sequence_r2 }}",
 	"{{ project_name }}.fastp_docker": "{{ fastp_docker }}",
 	"{{ project_name }}.fastp_cluster": "{{ fastp_cluster }}",
 	"{{ project_name }}.umi_loc": "{{ umi_loc }}",
 	"{{ project_name }}.trim_front1": "{{ trim_front1 }}",
 	"{{ project_name }}.trim_tail1": "{{ trim_tail1 }}",
 	"{{ project_name }}.max_len1": "{{ max_len1 }}",
 	"{{ project_name }}.trim_front2": "{{ trim_front2 }}",
 	"{{ project_name }}.trim_tail2": "{{ trim_tail2 }}",
 	"{{ project_name }}.max_len2": "{{ max_len2 }}",
 	"{{ project_name }}.disable_adapter_trimming": "{{ disable_adapter_trimming }}",
 	"{{ project_name }}.length_required": "{{ length_required }}",
 	"{{ project_name }}.umi_len": "{{ umi_len }}",
 	"{{ project_name }}.UMI": "{{ UMI }}",
 	"{{ project_name }}.qualified_quality_phred": "{{ qualified_quality_phred }}",
 	"{{ project_name }}.length_required1": "{{ length_required1 }}",	
 	"{{ project_name }}.disable_quality_filtering": "{{ disable_quality_filtering }}",
 	"{{ project_name }}.idx": "{{ idx }}",
 	"{{ project_name }}.Trim_R1": "{{ Trim_R1 }}",
 	"{{ project_name }}.Trim_R2": "{{ Trim_R2 }}",	
 	"{{ project_name }}.idx_prefix": "{{ idx_prefix }}",
 	"{{ project_name }}.pen_intronlen": "{{ pen_intronlen }}",
 	"{{ project_name }}.hisat2_docker": "{{ hisat2_docker }}",
 	"{{ project_name }}.hisat2_cluster": "{{ hisat2_cluster }}",
 	"{{ project_name }}.pen_cansplice": "{{ pen_cansplice }}",
 	"{{ project_name }}.pen_noncansplice": "{{ pen_noncansplice }}",
 	"{{ project_name }}.min_intronlen": "{{ min_intronlen }}",
 	"{{ project_name }}.max_intronlen": "{{ max_intronlen }}",
 	"{{ project_name }}.maxins": "{{ maxins }}",
 	"{{ project_name }}.minins": "{{ minins }}",
 	"{{ project_name }}.sam": "{{ sam }}",
 	"{{ project_name }}.sorted_bam": "{{ sorted_bam }}",
 	"{{ project_name }}.percent_bam ": "{{ percent_bam  }}",
 	"{{ project_name }}.sorted_bam_index": "{{ sorted_bam_index }}",
 	"{{ project_name }}.ins_size": "{{ ins_size }}",	
 	"{{ project_name }}.samtools_docker": "{{ samtools_docker }}",
 	"{{ project_name }}.samtools_cluster": "{{ samtools_cluster }}",
 	"{{ project_name }}.insert_size": "{{ insert_size }}",
 	"{{ project_name }}.bam_file": "{{ bam_file }}",
 	"{{ project_name }}.lnc_gtf_file": "{{ lnc_gtf_file }}",
 	"{{ project_name }}.subread_docker": "{{ subread_docker }}",
 	"{{ project_name }}.subread_cluster": "{{ subread_cluster }}",
 	"{{ project_name }}.cpu_num": "{{ cpu_num }}",
 	"{{ project_name }}.strand_information": "{{ strand_information }}",
 }
--- a/tasks/fastp.wdl
+++ b/tasks/fastp.wdl
@@ -0,0 +1,67 @@
 task fastp {
    String sample_id
    File read1
    File read2
    String adapter_sequence
    String adapter_sequence_r2
    String docker
    String cluster
    String umi_loc	
    Int trim_front1
    Int trim_tail1
    Int max_len1
    Int trim_front2
    Int trim_tail2
    Int max_len2
    Int disable_adapter_trimming
    Int length_required
    Int umi_len
    Int UMI
    Int qualified_quality_phred
    Int length_required1
    Int disable_quality_filtering
   
 	command <<<
        mkdir -p /cromwell_root/tmp/fastp/
 	##1.Disable_quality_filtering
 	if [ "${disable_quality_filtering}" == 0 ]
        then
 	cp ${read1} /cromwell_root/tmp/fastp/{sample_id}_R1.fastq.tmp1.gz
 	cp ${read2} /cromwell_root/tmp/fastp/{sample_id}_R2.fastq.tmp1.gz
        else
 	fastp --thread 4 --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i ${read1} -I ${read2} -o /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz -O /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz -j ${sample_id}.json -h ${sample_id}.html
        fi

 	##2.UMI
 	if [ "${UMI}" == 0 ]
        then
 	cp /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz
 	cp /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz
        else
 	fastp --thread 4 -U --umi_loc=${umi_loc} --umi_len=${umi_len} --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz -I /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz -o /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz -O /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz -j ${sample_id}.json -h ${sample_id}.html
 	fi

 	##3.Trim
        if [ "${disable_adapter_trimming}" == 0 ]
        then
 	fastp --thread 4 -l ${length_required} -q ${qualified_quality_phred} -u ${length_required1} --adapter_sequence ${adapter_sequence} --adapter_sequence_r2 ${adapter_sequence_r2} --detect_adapter_for_pe --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz -I /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz -o ${sample_id}_R1.fastq.gz -O ${sample_id}_R2.fastq.gz -j ${sample_id}.json -h ${sample_id}.html
        else
 	cp /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz ${sample_id}_R1.fastq.gz
 	cp /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz ${sample_id}_R2.fastq.gz
        fi
    >>>
   
    runtime { 
        docker: docker
        cluster: cluster
        systemDisk: "cloud_ssd 40"
        dataDisk: "cloud_ssd 200 /cromwell_root/"
    }

    output {
        File json = "${sample_id}.json"
        File report = "${sample_id}.html"
        File Trim_R1 = "${sample_id}_R1.fastq.gz"
        File Trim_R2 = "${sample_id}_R2.fastq.gz"
    }
 }
--- a/tasks/featureCounts.wdl
+++ b/tasks/featureCounts.wdl
@@ -0,0 +1,29 @@
 task featureCounts {
    File bam_file
    File lnc_gtf_file = "lncRNAKB_hg38_v7.gtf"
    String gtf_dir = "oss://pgx-reference-data/reference/subread/"
    String sample_id
    String docker
    String cluster
    Int cpu_num = 4
    Int strand_information = 0

    
    command <<<
        mkdir -p $sample_id
        featureCounts -T $cpu_num -t exon -g gene_id -a $gtf_dir/$lnc_gtf_file -s $strand_information -p -o $sample_id/$sample_id.genefeaturecount.txt $bam_file
    >>>

    runtime {
        docker: docker
        cluster: cluster
        systemDisk: "cloud_ssd 40"
        dataDisk: "cloud_ssd 200 /cromwell_root/"
    }

    output {
        File out_profile = "${sample_id}.genefeaturecount.txt"
        File out_summary = "${sample_id}.summary"
    }
 }

--- a/tasks/hisat2.wdl
+++ b/tasks/hisat2.wdl
@@ -0,0 +1,34 @@
 task hisat2 {
    File idx
    File Trim_R1
    File Trim_R2
    String idx_prefix
    String sample_id
    String docker
    String cluster
    String pen_intronlen
    Int pen_cansplice
    Int pen_noncansplice
    Int min_intronlen
    Int max_intronlen
    Int maxins
    Int minins
   
    command <<<
        nt=$(nproc)
        hisat2 -t -p $nt -x ${idx}/${idx_prefix} --pen-cansplice ${pen_cansplice} --pen-noncansplice ${pen_noncansplice} --pen-intronlen ${pen_intronlen} --min-intronlen ${min_intronlen} --max-intronlen ${max_intronlen} --maxins ${maxins} --minins ${minins} --un-conc-gz ${sample_id}_un.fq.gz -1 ${Trim_R1} -2 ${Trim_R2} -S ${sample_id}.sam 
    >>>
   
    runtime { 
        docker: docker 
        cluster: cluster
        systemDisk: "cloud_ssd 40"
        dataDisk: "cloud_ssd 200 /cromwell_root/"
    }

    output {
        File sam = "${sample_id}.sam"
        File unmapread_1p = "${sample_id}_un.fq.1.gz"
        File unmapread_2p = "${sample_id}_un.fq.2.gz"
    }
 }
--- a/tasks/samtools.wdl
+++ b/tasks/samtools.wdl
@@ -0,0 +1,37 @@
 task samtools {
    File sam
    String sample_id
    String bam = sample_id + ".bam"
    String sorted_bam = sample_id + ".sorted.bam"
    String percent_bam = sample_id + ".percent.bam"
    String sorted_bam_index = sample_id + ".sorted.bam.bai"
    String ins_size = sample_id + ".ins_size"
    String docker
    String cluster
    Int insert_size

    command <<<
        set -o pipefail
        set -e
        /opt/conda/bin/samtools view -bS ${sam} > ${bam}
        /opt/conda/bin/samtools sort -m 1000000000 ${bam} -o ${sorted_bam}
        /opt/conda/bin/samtools index ${sorted_bam}
        /opt/conda/bin/samtools view -bs 42.1 ${sorted_bam} > ${percent_bam}
        /opt/conda/bin/samtools stats -i ${insert_size} ${sorted_bam} |grep ^IS|cut -f 2- > ${sample_id}.ins_size
    >>>

    runtime {
        docker: docker
        cluster: cluster
        systemDisk: "cloud_ssd 40" 
        dataDisk: "cloud_ssd 200 /cromwell_root/"
    }

    output {
        File out_bam = sorted_bam
        File out_percent = percent_bam
        File out_bam_index = sorted_bam_index
        File out_ins_size = ins_size
    }
 }

--- a/workflow.wdl
+++ b/workflow.wdl
@@ -0,0 +1,122 @@
 import "./tasks/fastp.wdl" as fastp
 import "./tasks/hisat2.wdl" as hisat2
 import "./tasks/samtools.wdl" as samtools
 import "./tasks/featureCounts.wdl" as featureCounts


 workflow {{ project_name }} {
    String sample_id
    File read1
    File read2
    String adapter_sequence
    String adapter_sequence_r2
    String fastp_docker
    String fastp_cluster
    String umi_loc	
    Int trim_front1
    Int trim_tail1
    Int max_len1
    Int trim_front2
    Int trim_tail2
    Int max_len2
    Int disable_adapter_trimming
    Int length_required
    Int umi_len
    Int UMI
    Int qualified_quality_phred
    Int length_required1
    Int disable_quality_filtering
    File idx
    File Trim_R1
    File Trim_R2
    String idx_prefix
    String pen_intronlen
    String hisat2_docker
    String hisat2_cluster	
    Int pen_cansplice
    Int pen_noncansplice
    Int min_intronlen
    Int max_intronlen
    Int maxins
    Int minins	
    File sam
    String bam = sample_id + ".bam"
    String sorted_bam = sample_id + ".sorted.bam"
    String percent_bam = sample_id + ".percent.bam"
    String sorted_bam_index = sample_id + ".sorted.bam.bai"
    String ins_size = sample_id + ".ins_size"
    String samtools_docker
    String samtools_cluster	
    Int insert_size	
    File bam_file
    File lnc_gtf_file = "lncRNAKB_hg38_v7.gtf"
 	String gtf_dir = "oss://pgx-reference-data/reference/subread/"
    String subread_docker
    String subread_cluster	
    Int cpu_num = 4
    Int strand_information = 0

 	call fastp.fastp as fastp {
 		input: 
 		sample_id = sample_id,
 		read1 = read1, 
 		read2 = read2,
 		docker = fastp_docker,
 		cluster = fastp_cluster,
 		adapter_sequence = adapter_sequence,
 		adapter_sequence_r2 = adapter_sequence_r2,
 		umi_loc = umi_loc,
 		trim_front1 = trim_front1,
 		trim_tail1 = trim_tail1, 
 		max_len1 = max_len1,
 		trim_front2 = trim_front2,
 		trim_tail2 = trim_tail2,
 		max_len2 = max_len2,
 		disable_adapter_trimming = disable_adapter_trimming,
 		length_required = length_required,
 		umi_len = umi_len,
 		UMI = UMI,
 		qualified_quality_phred = qualified_quality_phred,
 		length_required1 = length_required1,
 		disable_quality_filtering = disable_quality_filtering
 	}

 	call hisat2.hisat2 as hisat2 {
 		input: 
 		sample_id = sample_id,
 		idx = idx, 
 		idx_prefix = idx_prefix, 
 		Trim_R1 = fastp.Trim_R1, 
 		Trim_R2 = fastp.Trim_R2,
 		docker = hisat2_docker,
 		cluster = hisat2_cluster,
 		pen_intronlen = pen_intronlen,
 		pen_cansplice = pen_cansplice,
 		pen_noncansplice = pen_noncansplice,
 		min_intronlen = min_intronlen,
 		max_intronlen = max_intronlen,
 		maxins = maxins,
 		minins = minins
 	}

 	call samtools.samtools as samtools {
 		input: 
 		sample_id = sample_id,
 		sam = hisat2.sam,
 		docker = samtools_docker,
 		cluster = samtools_cluster,
 		insert_size = insert_size
 	}

 	call featureCounts.featureCounts as featureCounts {
 		input: 
 		sample_id = sample_id,
 		bam_file = samtools.bam, 
 		lnc_gtf_file = lnc_gtf_file,
 		gtf_dir = gtf_dir
 		docker = subread_docker,
 		cluster = subread_cluster,
 		cpu_num = cpu_num,
 		strand_information = strand_information
 	}
 }