Ver código fonte

first commit

master
chenqingwang 3 anos atrás
commit
51e95ec973
5 arquivos alterados com 184 adições e 0 exclusões
  1. +59
    -0
      README.md
  2. +9
    -0
      defaults
  3. +13
    -0
      inputs
  4. +71
    -0
      tasks/spladder_EC.wdl
  5. +32
    -0
      workflow.wdl

+ 59
- 0
README.md Ver arquivo

@@ -0,0 +1,59 @@
# README.md

> Author: Qingwang Chen
>
> Email: [qwch20@fudan.edu.cn](mailto:qwch20@fudan.edu.cn)
>
> Last Updates: 24/08/2021

#### Brief Introduction

The the joint graph is fully quantified and we can move on to use it for downstream analyses, for instance to extract different AS events:

--event-types exon_skip,intron_retention,alt_3prime,alt_5prime,mutex_exons,mult_exon_skip (Pick one at a time for analysis)

#### Requirements

- choppy
- Ali-Cloud
- Linux

```
# 激活choppy环境
$ source activate choppy (open-choppy-env)

# 第一次安装
$ choppy install chenqingwang/SplAdder-event-calling
# 非第一次安装
$ choppy install chenqingwang/SplAdder-event-calling -f

# 查询已安装APP
$ choppy apps
```

#### Quick Start

```
# 准备 samples.csv 文件
$ choppy samples chenqingwang/SplAdder-event-calling-latest > samples.csv
# 准备无默认参数的samples.csv 文件
$ choppy samples --no-default chenqingwang/SplAdder-event-calling-latest > samples.csv

# 提交任务
$ choppy batch chenqingwang/SplAdder-event-calling-latest samples.csv -p Your_project_name -l Your_label

# 查询任务运行状况
$ choppy query -L Your_label | grep "status"

# 查询失败任务
$ choppy search -s Failed -p Your_project_name -u chenqingwang --short-format

# 结果文件地址
$ oss://choppy-cromwell-result/test-choppy/Your_project_name/
```

#### Description
```
# samples: The file names to be analyzed should be written line by line in this file, and the file should be uploaded to AliCloud.

```

+ 9
- 0
defaults Ver arquivo

@@ -0,0 +1,9 @@
{
"reference_gtf_file":"oss://pgx-reference-data/reference/spladder/SplAdder/data/reference/Homo_sapiens.GRCh38.103.gtf",
"bam":"oss://pgx-source-data/CBCGA2020/RNA-seq/intermediate-results-temp/Alternative_Splicing/spladder/tmp_bam/",
"pickle":"oss://pgx-source-data/CBCGA2020/RNA-seq/intermediate-results-temp/Alternative_Splicing/spladder/genes_graph_conf2_sorted_pickle/",
"count_hdf5":"oss://pgx-source-data/CBCGA2020/RNA-seq/intermediate-results-temp/Alternative_Splicing/spladder/single_count_hdf5/",
"spladder_docker":"registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/spladder:v2.4.2",
"spladder_cluster":"OnDemand bcs.a2.3xlarge img-ubuntu-vpc",
"disk_size":"200"
}

+ 13
- 0
inputs Ver arquivo

@@ -0,0 +1,13 @@
{
"{{ project_name }}.sample_id": "{{ sample_id }}",
"{{ project_name }}.bam": "{{ bam }}",
"{{ project_name }}.pickle": "{{ pickle }}",
"{{ project_name }}.merge_graphs": "{{ merge_graphs }}",
"{{ project_name }}.count_hdf5": "{{ count_hdf5 }}",
"{{ project_name }}.reference_gtf_file": "{{ reference_gtf_file }}",
"{{ project_name }}.samples": "{{ samples }}",
"{{ project_name }}.event_types": "{{ event_types }}",
"{{ project_name }}.spladder_docker": "{{ spladder_docker }}",
"{{ project_name }}.spladder_cluster": "{{ spladder_cluster }}",
"{{ project_name }}.disk_size": "{{ disk_size }}"
}

+ 71
- 0
tasks/spladder_EC.wdl Ver arquivo

@@ -0,0 +1,71 @@
task spladder_EC {
String sample_id
File bam
File pickle
File merge_graphs
File count_hdf5
File reference_gtf_file
File samples
String event_types
String spladder_docker
String spladder_cluster
String disk_size

command <<<
set -o pipefail
set -e

mkdir -p ${sample_id}/spladder_out/spladder
ln -s ${bam}/*.bam ./
cat ${samples} | while read a; do ls ./ | grep bam$ | grep $a; done > alignment.txt
ln -s ${bam}/*.bai ./
cat ${samples} | while read a; do ls ./ | grep bai$ | grep $a; done > alignment_bai.txt

ln -s ${pickle}/*.pickle ${sample_id}/spladder_out/spladder/
cat ${samples} | while read a; do ls ${sample_id}/spladder_out/spladder/| grep pickle$| grep $a; done >> pickle.txt
cp -r ${merge_graphs} ${sample_id}/spladder_out/spladder/

cp -r ${count_hdf5}/* ${sample_id}/spladder_out/spladder/
cat ${samples} | while read a; do ls ${sample_id}/spladder_out/spladder/| grep count| grep $a; done >> count.txt


nt=$(nproc)
spladder build -o ${sample_id}/spladder_out \
--annotation ${reference_gtf_file} \
--bams alignment.txt \
--confidence 2 \
--merge-strat merge_graphs \
--readlen 150 \
--parallel $nt \
--event-types ${event_types}
find . -depth > fileList.txt
>>>

runtime {
docker: spladder_docker
cluster: spladder_cluster
systemDisk: "cloud_ssd 50"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
timeout: 129600
}

output {
File fileList = "fileList.txt"
File alignment = "alignment.txt"
File alignment_bai = "alignment_bai.txt"
File pickle_txt = "pickle.txt"
File count_hdf5_txt = "count.txt"
Array[File] AS_gff = glob("${sample_id}/spladder_out/*.gff3")
Array[File] AS_gz = glob("${sample_id}/spladder_out/*.gz")
Array[File] spladder_out = glob("${sample_id}/spladder_out/*")
Array[File] spladder = glob("${sample_id}/spladder_out/spladder/*")
}
}

+ 32
- 0
workflow.wdl Ver arquivo

@@ -0,0 +1,32 @@
import "./tasks/spladder_EC.wdl" as spladder_quantification


workflow {{ project_name }} {
String sample_id
File bam
File pickle
File merge_graphs
File count_hdf5
File reference_gtf_file
File samples

String event_types
String spladder_docker
String spladder_cluster
String disk_size

call spladder_EC.spladder_EC as spladder_EC {
input:
reference_gtf_file=reference_gtf_file,
sample_id=sample_id,
bam=bam,
pickle=pickle,
merge_graphs=merge_graphs,
count_hdf5=count_hdf5,
samples=samples,
event_types=event_types,
spladder_docker=spladder_docker,
spladder_cluster=spladder_cluster,
disk_size=disk_size
}
}

Carregando…
Cancelar
Salvar