@@ -21,14 +21,14 @@ choppy install XXXX | |||
### 甲基化原理简述 | |||
 | |||
全基因组亚硫酸氢盐测序用于研究基因粒度的DNA甲基化模式。 亚硫酸氢盐处理将胞嘧啶转化为尿嘧啶,但甲基化胞嘧啶不变。 | |||
比对软件(如Bismark)将基因组序列转化之后进行比对。 | |||
 | |||
### APP功能简述 | |||
为了更好的分析全基因组甲基化数据,我们选用了目前最好的比对软件Bismark,构建了分析pipeline。用来提取全基因组的CpG,CHH,CHG甲基化模式信息。 | |||
@@ -37,7 +37,9 @@ choppy install XXXX | |||
### WGBS分析流程 | |||
 | |||
 | |||
### 参数说明 | |||
@@ -57,6 +59,8 @@ choppy install XXXX | |||
-p 多线程 | |||
``` | |||
## 输入和输出 | |||
@@ -80,13 +84,11 @@ CpG_context_test_data_bismark_bt2.txt,CHG_context_test_data_bismark_bt2.txt, | |||
以CpG_context_test_data_bismark_bt2.txt为例 | |||
``` | |||
Bismark methylation extractor version v0.19.0 | |||
SRR15024317_length=86 - 1 57798691 z | |||
SRR15024319_length=86 + 2 10166600 Z | |||
SRR15024331_length=86 + 11 77736289 Z | |||
SRR15024338_length=86 + 3 197272186 Z | |||
``` | |||
第一行为Bismark的版本信息 | |||
@@ -96,7 +98,6 @@ SRR15024338_length=86 + 3 197272186 Z | |||
不同字母表明不同的甲基化状态: | |||
``` | |||
X 代表CHG中甲基化的C | |||
x 代笔CHG中非甲基化的C | |||
H 代表CHH中甲基化的C | |||
@@ -105,31 +106,21 @@ Z 代表CpG中甲基化的C | |||
z 代表CpG中非甲基化的C | |||
U 代表其他情况的甲基化C(CN或者CHN) | |||
u 代表其他情况的非甲基化C (CN或者CHN) | |||
``` | |||
#### 补充文件 | |||
上面的文件是methylation calling 最直接的证据,但是对于甲基化水平的定量来说,缺少了相关信息。运行bismark_methylation_extractor时,除了生成上述文件之外,还会有下列3个文件: | |||
``` | |||
test_data_bismark_bt2_splitting_report.txt | |||
test_data_bismark_bt2.M-bias.txt | |||
test_data_bismark_bt2.M-bias_R1.png | |||
``` | |||
##### test_data_bismark_bt2_splitting_report.txt | |||
记录了该样本甲基化的汇总信息 | |||
``` | |||
Final Cytosine Methylation Report | |||
Total number of C’s analysed: 40348 | |||
Total methylated C’s in CpG context: 1365 | |||
@@ -141,7 +132,6 @@ Total C to T conversions in CHH context: 28105 | |||
C methylated in CpG context: 66.8% | |||
C methylated in CHG context: 0.2% | |||
C methylated in CHH context: 0.4% | |||
``` | |||
##### test_data_bismark_bt2.M-bias.txt | |||
@@ -150,12 +140,10 @@ C methylated in CHH context: 0.4% | |||
部分文件内容如下 | |||
``` | |||
CpG context | |||
position count methylated count unmethylated % methylation coverage | |||
1 42 13 76.36 55 | |||
2 31 9 77.50 40 | |||
``` | |||
@@ -1,12 +1,10 @@ | |||
{ | |||
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", | |||
"{{ project_name }}.ref_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/", | |||
"{{ project_name }}.ref_dir": "oss://pgx-test-data/wgbs/genome/hg38/", | |||
"{{ project_name }}.fastq_1": "{{ read1 }}", | |||
"{{ project_name }}.cluster_config": "{{ cluster if cluster != '' else 'OnDemand ecs.sn2ne.2xlarge img-ubuntu-vpc' }}", | |||
"{{ project_name }}.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v2018.08.01", | |||
"{{ project_name }}.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bismark", | |||
"{{ project_name }}.sample": "{{ sample_name }}", | |||
"{{ project_name }}.disk_size": "{{ disk_size }}", | |||
"{{ project_name }}.regions": "{{ regions }}", | |||
"{{ project_name }}.fastq_2": "{{ read2 }}" | |||
} | |||
@@ -12,7 +12,6 @@ task Dedup { | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
deduplicate_bismark -p --bam ${unsorted_bam} | |||
>>> | |||
runtime { |
@@ -0,0 +1,34 @@ | |||
task Dedup { | |||
String sample | |||
File unsorted_bam | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
deduplicate_bismark -p --bam ${unsorted_bam} | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File Dedup_bam = "${sample}_R1_val_1_bismark_bt2_pe.deduplicated.bam" | |||
} | |||
} | |||
@@ -12,8 +12,7 @@ task mapping { | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
bismark --bowtie2 -p ${nt} --bam ${ref_dir} -1 ${trim_read1} -2 ${trim_read2} | |||
bismark --bowtie2 -p 8 --bam ${ref_dir} -1 ${trim_read1} -2 ${trim_read2} | |||
>>> | |||
@@ -0,0 +1,29 @@ | |||
task mapping { | |||
File ref_dir | |||
File trim_read1 | |||
File trim_read2 | |||
String sample | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
bismark --bowtie2 -p 8 --bam ${ref_dir} -1 ${trim_read1} -2 ${trim_read2} | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File unsorted_bam = "${sample}_R1_val_1_bismark_bt2_pe.bam" | |||
} | |||
} |