# README.md | |||||
> Author: Qingwang Chen | |||||
> | |||||
> Email: [qwch20@fudan.edu.cn](mailto:qwch20@fudan.edu.cn) | |||||
> | |||||
> Last Updates: 24/08/2021 | |||||
#### Brief Introduction | |||||
Collect the individual quantifications and aggregate them in a joint database. The the joint graph is fully quantified and we can move on to use it for downstream analyses, for instance to extract different AS events: | |||||
--event-types exon_skip,intron_retention,alt_3prime,alt_5prime,mutex_exons,mult_exon_skip | |||||
#### Requirements | |||||
- choppy | |||||
- Ali-Cloud | |||||
- Linux | |||||
``` | |||||
# 激活choppy环境 | |||||
$ source activate choppy (open-choppy-env) | |||||
# 第一次安装 | |||||
$ choppy install chenqingwang/SplAdder-quantification-merge | |||||
# 非第一次安装 | |||||
$ choppy install chenqingwang/SplAdder-quantification-merge -f | |||||
# 查询已安装APP | |||||
$ choppy apps | |||||
``` | |||||
#### Quick Start | |||||
``` | |||||
# 准备 samples.csv 文件 | |||||
$ choppy samples chenqingwang/SplAdder-quantification-merge-latest > samples.csv | |||||
# 准备无默认参数的samples.csv 文件 | |||||
$ choppy samples --no-default chenqingwang/SplAdder-quantification-merge-latest > samples.csv | |||||
# 提交任务 | |||||
$ choppy batch chenqingwang/SplAdder-quantification-merge-latest samples.csv -p Your_project_name -l Your_label | |||||
# 查询任务运行状况 | |||||
$ choppy query -L Your_label | grep "status" | |||||
# 查询失败任务 | |||||
$ choppy search -s Failed -p Your_project_name -u chenqingwang --short-format | |||||
# 结果文件地址 | |||||
$ oss://choppy-cromwell-result/test-choppy/Your_project_name/ | |||||
``` | |||||
#### Description | |||||
``` | |||||
# samples: The file names to be analyzed should be written line by line in this file, and the file should be uploaded to AliCloud. | |||||
``` |
{ | |||||
"reference_gtf_file":"oss://pgx-reference-data/reference/spladder/SplAdder/data/reference/Homo_sapiens.GRCh38.103.gtf", | |||||
"bam":"oss://pgx-source-data/CBCGA2020/RNA-seq/intermediate-results-temp/Alternative_Splicing/spladder/tmp_bam/", | |||||
"pickle":"oss://pgx-source-data/CBCGA2020/RNA-seq/intermediate-results-temp/Alternative_Splicing/spladder/genes_graph_conf2_sorted_pickle/", | |||||
"count_hdf5":"oss://pgx-source-data/CBCGA2020/RNA-seq/intermediate-results-temp/Alternative_Splicing/spladder/conf2_count_hdf5/", | |||||
"spladder_docker":"registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/spladder:v2.4.2", | |||||
"spladder_cluster":"OnDemand bcs.a2.3xlarge img-ubuntu-vpc", | |||||
"disk_size":"500" | |||||
} |
{ | |||||
"{{ project_name }}.sample_id": "{{ sample_id }}", | |||||
"{{ project_name }}.bam": "{{ bam }}", | |||||
"{{ project_name }}.pickle": "{{ pickle }}", | |||||
"{{ project_name }}.merge_graphs": "{{ merge_graphs }}", | |||||
"{{ project_name }}.count_hdf5": "{{ count_hdf5 }}", | |||||
"{{ project_name }}.reference_gtf_file": "{{ reference_gtf_file }}", | |||||
"{{ project_name }}.samples": "{{ samples }}", | |||||
"{{ project_name }}.spladder_docker": "{{ spladder_docker }}", | |||||
"{{ project_name }}.spladder_cluster": "{{ spladder_cluster }}", | |||||
"{{ project_name }}.disk_size": "{{ disk_size }}" | |||||
} |
task spladder_quantification { | |||||
String sample_id | |||||
File bam | |||||
File pickle | |||||
File merge_graphs | |||||
File count_hdf5 | |||||
File reference_gtf_file | |||||
File samples | |||||
String docker | |||||
String cluster | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
mkdir -p ${sample_id}/spladder_out/spladder | |||||
ln -s ${bam}/*.bam ./ | |||||
cat ${samples} | while read a; do ls ./ | grep bam$ | grep $a; done > alignment.txt | |||||
ln -s ${bam}/*.bai ./ | |||||
cat ${samples} | while read a; do ls ./ | grep bai$ | grep $a; done > alignment_bai.txt | |||||
ln -s ${pickle}/*.pickle ${sample_id}/spladder_out/spladder/ | |||||
cat ${samples} | while read a; do ls ${sample_id}/spladder_out/spladder/| grep pickle$| grep $a; done >> pickle.txt | |||||
cp -r ${merge_graphs} ${sample_id}/spladder_out/spladder/ | |||||
ln -s ${count_hdf5}/*.hdf5 ${sample_id}/spladder_out/spladder/ | |||||
cat ${samples} | while read a; do ls ${sample_id}/spladder_out/spladder/| grep hdf5$| grep $a; done >> count_hdf5.txt | |||||
nt=$(nproc) | |||||
spladder build -o ${sample_id}/spladder_out \ | |||||
--annotation ${reference_gtf_file} \ | |||||
--bams alignment.txt \ | |||||
--confidence 2 \ | |||||
--merge-strat merge_graphs \ | |||||
--readlen 150 \ | |||||
--parallel $nt \ | |||||
--quantify-graph --qmode collect \ | |||||
--event-types exon_skip,intron_retention,alt_3prime,alt_5prime,mutex_exons,mult_exon_skip | |||||
find . -depth > fileList.txt | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster | |||||
systemDisk: "cloud_ssd 500" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File fileList = "fileList.txt" | |||||
File alignment = "alignment.txt" | |||||
File alignment_bai = "alignment_bai.txt" | |||||
File pickle_txt = "pickle.txt" | |||||
File count_hdf5_txt = "count_hdf5.txt" | |||||
Array[File] AS_gff = glob("${sample_id}/spladder_out/*.gff3") | |||||
Array[File] AS_gz = glob("${sample_id}/spladder_out/*.gz") | |||||
Array[File] spladder_out = glob("${sample_id}/spladder_out/*") | |||||
Array[File] spladder = glob("${sample_id}/spladder_out/spladder/*") | |||||
} | |||||
} |
import "./tasks/spladder_quantification.wdl" as spladder_quantification | |||||
workflow {{ project_name }} { | |||||
String sample_id | |||||
File bam | |||||
File pickle | |||||
File merge_graphs | |||||
File count_hdf5 | |||||
File reference_gtf_file | |||||
File samples | |||||
String spladder_docker | |||||
String spladder_cluster | |||||
String disk_size | |||||
call spladder_quantification.spladder_quantification as spladder_quantification { | |||||
input: | |||||
reference_gtf_file=reference_gtf_file, | |||||
sample_id=sample_id, | |||||
bam=bam, | |||||
pickle=pickle, | |||||
merge_graphs=merge_graphs, | |||||
count_hdf5=count_hdf5, | |||||
samples=samples, | |||||
docker=spladder_docker, | |||||
cluster=spladder_cluster, | |||||
disk_size=disk_size | |||||
} | |||||
} |