@@ -0,0 +1,59 @@ | |||
# README.md | |||
> Author: Qingwang Chen | |||
> | |||
> Email: [qwch20@fudan.edu.cn](mailto:qwch20@fudan.edu.cn) | |||
> | |||
> Last Updates: 24/08/2021 | |||
#### Brief Introduction | |||
Collect the individual quantifications and aggregate them in a joint database. The the joint graph is fully quantified and we can move on to use it for downstream analyses, for instance to extract different AS events: | |||
--event-types exon_skip,intron_retention,alt_3prime,alt_5prime,mutex_exons,mult_exon_skip | |||
#### Requirements | |||
- choppy | |||
- Ali-Cloud | |||
- Linux | |||
``` | |||
# 激活choppy环境 | |||
$ source activate choppy (open-choppy-env) | |||
# 第一次安装 | |||
$ choppy install chenqingwang/SplAdder-quantification-merge | |||
# 非第一次安装 | |||
$ choppy install chenqingwang/SplAdder-quantification-merge -f | |||
# 查询已安装APP | |||
$ choppy apps | |||
``` | |||
#### Quick Start | |||
``` | |||
# 准备 samples.csv 文件 | |||
$ choppy samples chenqingwang/SplAdder-quantification-merge-latest > samples.csv | |||
# 准备无默认参数的samples.csv 文件 | |||
$ choppy samples --no-default chenqingwang/SplAdder-quantification-merge-latest > samples.csv | |||
# 提交任务 | |||
$ choppy batch chenqingwang/SplAdder-quantification-merge-latest samples.csv -p Your_project_name -l Your_label | |||
# 查询任务运行状况 | |||
$ choppy query -L Your_label | grep "status" | |||
# 查询失败任务 | |||
$ choppy search -s Failed -p Your_project_name -u chenqingwang --short-format | |||
# 结果文件地址 | |||
$ oss://choppy-cromwell-result/test-choppy/Your_project_name/ | |||
``` | |||
#### Description | |||
``` | |||
# samples: The file names to be analyzed should be written line by line in this file, and the file should be uploaded to AliCloud. | |||
``` |
@@ -0,0 +1,9 @@ | |||
{ | |||
"reference_gtf_file":"oss://pgx-reference-data/reference/spladder/SplAdder/data/reference/Homo_sapiens.GRCh38.103.gtf", | |||
"bam":"oss://pgx-source-data/CBCGA2020/RNA-seq/intermediate-results-temp/Alternative_Splicing/spladder/tmp_bam/", | |||
"pickle":"oss://pgx-source-data/CBCGA2020/RNA-seq/intermediate-results-temp/Alternative_Splicing/spladder/genes_graph_conf2_sorted_pickle/", | |||
"count_hdf5":"oss://pgx-source-data/CBCGA2020/RNA-seq/intermediate-results-temp/Alternative_Splicing/spladder/conf2_count_hdf5/", | |||
"spladder_docker":"registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/spladder:v2.4.2", | |||
"spladder_cluster":"OnDemand bcs.a2.3xlarge img-ubuntu-vpc", | |||
"disk_size":"500" | |||
} |
@@ -0,0 +1,12 @@ | |||
{ | |||
"{{ project_name }}.sample_id": "{{ sample_id }}", | |||
"{{ project_name }}.bam": "{{ bam }}", | |||
"{{ project_name }}.pickle": "{{ pickle }}", | |||
"{{ project_name }}.merge_graphs": "{{ merge_graphs }}", | |||
"{{ project_name }}.count_hdf5": "{{ count_hdf5 }}", | |||
"{{ project_name }}.reference_gtf_file": "{{ reference_gtf_file }}", | |||
"{{ project_name }}.samples": "{{ samples }}", | |||
"{{ project_name }}.spladder_docker": "{{ spladder_docker }}", | |||
"{{ project_name }}.spladder_cluster": "{{ spladder_cluster }}", | |||
"{{ project_name }}.disk_size": "{{ disk_size }}" | |||
} |
@@ -0,0 +1,71 @@ | |||
task spladder_quantification { | |||
String sample_id | |||
File bam | |||
File pickle | |||
File merge_graphs | |||
File count_hdf5 | |||
File reference_gtf_file | |||
File samples | |||
String docker | |||
String cluster | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
mkdir -p ${sample_id}/spladder_out/spladder | |||
ln -s ${bam}/*.bam ./ | |||
cat ${samples} | while read a; do ls ./ | grep bam$ | grep $a; done > alignment.txt | |||
ln -s ${bam}/*.bai ./ | |||
cat ${samples} | while read a; do ls ./ | grep bai$ | grep $a; done > alignment_bai.txt | |||
ln -s ${pickle}/*.pickle ${sample_id}/spladder_out/spladder/ | |||
cat ${samples} | while read a; do ls ${sample_id}/spladder_out/spladder/| grep pickle$| grep $a; done >> pickle.txt | |||
cp -r ${merge_graphs} ${sample_id}/spladder_out/spladder/ | |||
ln -s ${count_hdf5}/*.hdf5 ${sample_id}/spladder_out/spladder/ | |||
cat ${samples} | while read a; do ls ${sample_id}/spladder_out/spladder/| grep hdf5$| grep $a; done >> count_hdf5.txt | |||
nt=$(nproc) | |||
spladder build -o ${sample_id}/spladder_out \ | |||
--annotation ${reference_gtf_file} \ | |||
--bams alignment.txt \ | |||
--confidence 2 \ | |||
--merge-strat merge_graphs \ | |||
--readlen 150 \ | |||
--parallel $nt \ | |||
--quantify-graph --qmode collect \ | |||
--event-types exon_skip,intron_retention,alt_3prime,alt_5prime,mutex_exons,mult_exon_skip | |||
find . -depth > fileList.txt | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster | |||
systemDisk: "cloud_ssd 500" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File fileList = "fileList.txt" | |||
File alignment = "alignment.txt" | |||
File alignment_bai = "alignment_bai.txt" | |||
File pickle_txt = "pickle.txt" | |||
File count_hdf5_txt = "count_hdf5.txt" | |||
Array[File] AS_gff = glob("${sample_id}/spladder_out/*.gff3") | |||
Array[File] AS_gz = glob("${sample_id}/spladder_out/*.gz") | |||
Array[File] spladder_out = glob("${sample_id}/spladder_out/*") | |||
Array[File] spladder = glob("${sample_id}/spladder_out/spladder/*") | |||
} | |||
} |
@@ -0,0 +1,30 @@ | |||
import "./tasks/spladder_quantification.wdl" as spladder_quantification | |||
workflow {{ project_name }} { | |||
String sample_id | |||
File bam | |||
File pickle | |||
File merge_graphs | |||
File count_hdf5 | |||
File reference_gtf_file | |||
File samples | |||
String spladder_docker | |||
String spladder_cluster | |||
String disk_size | |||
call spladder_quantification.spladder_quantification as spladder_quantification { | |||
input: | |||
reference_gtf_file=reference_gtf_file, | |||
sample_id=sample_id, | |||
bam=bam, | |||
pickle=pickle, | |||
merge_graphs=merge_graphs, | |||
count_hdf5=count_hdf5, | |||
samples=samples, | |||
docker=spladder_docker, | |||
cluster=spladder_cluster, | |||
disk_size=disk_size | |||
} | |||
} |