3 anos atrás · 51e95ec973
--- a/README.md
+++ b/README.md
@@ -0,0 +1,59 @@
 # README.md

 > Author: Qingwang Chen
 >
 > Email: [qwch20@fudan.edu.cn](mailto:qwch20@fudan.edu.cn)
 >
 > Last Updates: 24/08/2021

 #### Brief Introduction

 The the joint graph is fully quantified and we can move on to use it for downstream analyses, for instance to extract different AS events:

 --event-types exon_skip,intron_retention,alt_3prime,alt_5prime,mutex_exons,mult_exon_skip (Pick one at a time for analysis)

 #### Requirements

 - choppy
 - Ali-Cloud
 - Linux

 ```
 # 激活choppy环境
 $ source activate choppy (open-choppy-env)

 # 第一次安装
 $ choppy install chenqingwang/SplAdder-event-calling
 # 非第一次安装
 $ choppy install chenqingwang/SplAdder-event-calling -f 

 # 查询已安装APP
 $ choppy apps
 ```

 #### Quick Start

 ```
 # 准备 samples.csv 文件
 $ choppy samples chenqingwang/SplAdder-event-calling-latest > samples.csv
 # 准备无默认参数的samples.csv 文件
 $ choppy samples --no-default chenqingwang/SplAdder-event-calling-latest > samples.csv

 # 提交任务
 $ choppy batch chenqingwang/SplAdder-event-calling-latest samples.csv -p Your_project_name -l Your_label

 # 查询任务运行状况
 $ choppy query -L Your_label | grep "status"

 # 查询失败任务
 $ choppy search -s Failed -p Your_project_name -u chenqingwang --short-format

 # 结果文件地址
 $ oss://choppy-cromwell-result/test-choppy/Your_project_name/
 ```

 #### Description
 ```
 # samples: The file names to be analyzed should be written line by line in this file, and the file should be uploaded to AliCloud.

 ```
--- a/defaults
+++ b/defaults
@@ -0,0 +1,9 @@
 {   
    "reference_gtf_file":"oss://pgx-reference-data/reference/spladder/SplAdder/data/reference/Homo_sapiens.GRCh38.103.gtf",
    "bam":"oss://pgx-source-data/CBCGA2020/RNA-seq/intermediate-results-temp/Alternative_Splicing/spladder/tmp_bam/",    
    "pickle":"oss://pgx-source-data/CBCGA2020/RNA-seq/intermediate-results-temp/Alternative_Splicing/spladder/genes_graph_conf2_sorted_pickle/",
    "count_hdf5":"oss://pgx-source-data/CBCGA2020/RNA-seq/intermediate-results-temp/Alternative_Splicing/spladder/single_count_hdf5/",
    "spladder_docker":"registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/spladder:v2.4.2",
    "spladder_cluster":"OnDemand bcs.a2.3xlarge img-ubuntu-vpc",
    "disk_size":"200"
    }
--- a/inputs
+++ b/inputs
@@ -0,0 +1,13 @@
 {
 	"{{ project_name }}.sample_id": "{{ sample_id }}",
 	"{{ project_name }}.bam": "{{ bam }}",
 	"{{ project_name }}.pickle": "{{ pickle }}",
 	"{{ project_name }}.merge_graphs": "{{ merge_graphs }}",
 	"{{ project_name }}.count_hdf5": "{{ count_hdf5 }}",
 	"{{ project_name }}.reference_gtf_file": "{{ reference_gtf_file }}",
 	"{{ project_name }}.samples": "{{ samples }}",
 	"{{ project_name }}.event_types": "{{ event_types }}",
 	"{{ project_name }}.spladder_docker": "{{ spladder_docker }}",
 	"{{ project_name }}.spladder_cluster": "{{ spladder_cluster }}",
 	"{{ project_name }}.disk_size": "{{ disk_size }}"
 }
--- a/tasks/spladder_EC.wdl
+++ b/tasks/spladder_EC.wdl
@@ -0,0 +1,71 @@
 task spladder_EC {
    String sample_id
    File bam
    File pickle
    File merge_graphs
    File count_hdf5
    File reference_gtf_file
    File samples
    
    String event_types
    String spladder_docker
    String spladder_cluster
    String disk_size

    
    command <<<
        set -o pipefail
        set -e

        mkdir -p ${sample_id}/spladder_out/spladder
        
        ln -s ${bam}/*.bam ./
        cat ${samples} | while read a; do ls ./ | grep bam$ | grep $a; done > alignment.txt
       
        ln -s ${bam}/*.bai ./
        cat ${samples} | while read a; do ls ./ | grep bai$ | grep $a; done > alignment_bai.txt

        ln -s ${pickle}/*.pickle ${sample_id}/spladder_out/spladder/
        cat ${samples} | while read a; do ls ${sample_id}/spladder_out/spladder/| grep pickle$| grep $a; done >> pickle.txt
 		
        cp -r ${merge_graphs} ${sample_id}/spladder_out/spladder/

        cp -r ${count_hdf5}/* ${sample_id}/spladder_out/spladder/
        cat ${samples} | while read a; do ls ${sample_id}/spladder_out/spladder/| grep count| grep $a; done >> count.txt


        nt=$(nproc)
 		
        spladder build -o ${sample_id}/spladder_out \
          --annotation ${reference_gtf_file} \
          --bams alignment.txt \
          --confidence 2 \
          --merge-strat merge_graphs \
          --readlen 150 \
          --parallel $nt \
          --event-types ${event_types}
          
          
        find . -depth > fileList.txt
    >>>

    runtime {
        docker: spladder_docker
        cluster: spladder_cluster
        systemDisk: "cloud_ssd 50"
        dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
        timeout: 129600
    }

    output {
        File fileList = "fileList.txt"
        File alignment = "alignment.txt"
        File alignment_bai = "alignment_bai.txt"
        File pickle_txt = "pickle.txt"
        File count_hdf5_txt = "count.txt"
        Array[File] AS_gff = glob("${sample_id}/spladder_out/*.gff3")
        Array[File] AS_gz = glob("${sample_id}/spladder_out/*.gz")
        Array[File] spladder_out = glob("${sample_id}/spladder_out/*")
        Array[File] spladder = glob("${sample_id}/spladder_out/spladder/*")
    }
 }
--- a/workflow.wdl
+++ b/workflow.wdl
@@ -0,0 +1,32 @@
 import "./tasks/spladder_EC.wdl" as spladder_quantification


 workflow {{ project_name }} {
    String sample_id
    File bam
    File pickle
    File merge_graphs
    File count_hdf5
    File reference_gtf_file
    File samples

    String event_types
    String spladder_docker
    String spladder_cluster
    String disk_size	

 	call spladder_EC.spladder_EC as spladder_EC {
 		input:
 		reference_gtf_file=reference_gtf_file,
 		sample_id=sample_id,
 		bam=bam,
 		pickle=pickle,
 		merge_graphs=merge_graphs,
 		count_hdf5=count_hdf5,
 		samples=samples,
 		event_types=event_types,
 		spladder_docker=spladder_docker,
 		spladder_cluster=spladder_cluster,
 		disk_size=disk_size
 	}
 }