Przeglądaj źródła

first commit

master
LUYAO REN 5 lat temu
commit
43fa001853
5 zmienionych plików z 180 dodań i 0 usunięć
  1. +94
    -0
      codescript/EPIC.modified.R
  2. +7
    -0
      inputs
  3. +32
    -0
      readme.md
  4. +28
    -0
      tasks/minfi.wdl
  5. +19
    -0
      workflow.wdl

+ 94
- 0
codescript/EPIC.modified.R Wyświetl plik

@@ -0,0 +1,94 @@
suppressPackageStartupMessages(library("optparse"))
suppressPackageStartupMessages(library("stats"))

# specify our desired options in a list
# by default OptionParser will add an help option equivalent to
# make_option(c("-h", "--help"), action="store_true", default=FALSE,
# help="Show this help message and exit")

option_list <- list(
make_option(c("-p", "--prefix"), type="character",default="./",
help="The output files prefix [default ./]"),
make_option(c("-i", "--input"),type="character", default=NULL,
help="The directory of input EPIC files. required!")
)

# get command line options, if help option encountered print help and exit,
# otherwise if options not found on command line then set defaults,
opt <- parse_args(OptionParser(option_list=option_list))

if (is.null(opt$input)){
print_help(opt_parser)
stop("At least one argument must be supplied (input file).", call.=FALSE)
}

# load libraries

library("minfi")
library("IlluminaHumanMethylationEPICmanifest")
library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")

## 1. data import
targets <- read.metharray.sheet(opt$input)
rgSet <- read.metharray.exp(targets = targets)
targets$ID <- paste(targets$Sample_Group,targets$Sample_Name,sep=".")
sampleNames(rgSet) <- targets$ID
#phenoData <- pData(rgSet)
message("data import:finished")

## 2. Quality control
# 2.1 qc report by minfi
# qcReport(rgSet, sampNames=targets$ID, sampGroups=targets$Sample_Group,pdf="qcReport.pdf")
# 2.2 data filtering
# get detected p value
# a. remove samples with average p value less than 0.05
detP <- detectionP(rgSet)
keep <- colMeans(detP) < 0.05
rgSet_sample_removed <- rgSet[,keep]
targets_sample_removed <- targets[keep,]
message("sample p value filtration:finished")

# b. normalization
mSetSq <- preprocessFunnorm(rgSet_sample_removed)
message("Funnorm normalization:finished")

# c. filter probes with p value less than 0.01
# ensure probes are in the same order in the mSetSq and detP objects
detP <- detP[match(featureNames(mSetSq),rownames(detP)),]
# remove any probes that have failed in one or more samples
keep <- rowSums(detP < 0.01) > ncol(mSetSq) * 0.9
mSetSqFlt <- mSetSq[keep,]
message("probe p value filteration:finished")

# d. remove sex probes
annotation <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
keep <- !(featureNames(mSetSqFlt) %in% annotation$Name[annotation$chr %in% c("chrX","chrY")])
mSetSqFlt <- mSetSqFlt[keep,]
mSetSqFlt <- dropLociWithSnps(mSetSqFlt)
message("remove:finished")

## get M and beta value
# filtered
mVals <- getM(mSetSqFlt)
bVals <- getBeta(mSetSqFlt)
# raw
mSetRaw <- preprocessRaw(rgSet)
raw_mVals <- getM(mSetRaw)
raw_bVals <- getBeta(mSetRaw)
message("m value and beta value output:finished")

# write output
message("saving R data")
rdata_filename = paste(opt$prefix, '.RData',sep="")
save(rgSet, targets, detP, file = rdata_filename)
message("writing filtered table")
m_filename = paste(opt$prefix,'.filter.p.sex.snp.mVal.txt',sep="")
write.table(mVals,m_filename,col.names = T,row.names = T,sep="\t",quote=F)
b_filename = paste(opt$prefix,'.filter.p.sex.snp.bVal.txt',sep="")
write.table(bVals,b_filename,col.names = T,row.names = T,sep="\t",quote=F)
message("writing raw table")
m_raw_filename = paste(opt$prefix,'.raw.mVal.txt',sep="")
write.table(raw_mVals,m_raw_filename,col.names = T,row.names = T,sep="\t",quote=F)
b_raw_filename = paste(opt$prefix,'.raw.bVal.txt',sep="")
write.table(raw_bVals,b_raw_filename,col.names = T,row.names = T,sep="\t",quote=F)

+ 7
- 0
inputs Wyświetl plik

@@ -0,0 +1,7 @@
{
"{{ project_name }}.disk_size": "200",
"{{ project_name }}.prefix": "{{ prefix }}",
"{{ project_name }}.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/r-base:v1.0",
"{{ project_name }}.cluster_config": "OnDemand bcs.b4.xlarge img-ubuntu-vpc",
"{{ project_name }}.input_dir": "{{ input_dir }}"
}

+ 32
- 0
readme.md Wyświetl plik

@@ -0,0 +1,32 @@
## minfi分析Illumina 850K(EPIC)
#APP介绍
###甲基化原理简述
Illumina 850K甲基化芯片可同时检测>850,000个位点,覆盖>95%的CpG岛,99%的RefSeq基因,已经成为精准医学研究的重要方法之一。
850K芯片采用了两种探针Infinium Ⅰ 和Infinium Ⅱ对样品甲基化进行测定,Infinium I采用了两种bead(甲基化M和非甲基化U,如图显示),而II只有一种bead(即甲基化和非甲基化在一起),这也导致了它们在后续荧光探测的不同,而根据不同探针的bead的荧光值,就可以得到样品各个位点上的甲基化水平。
###APP简介
为了更好更便捷的分析全基因组甲基化数据,我们选用了分析850K芯片的R包——minfi包,构建了分析pipeline,可以得到全基因组的各个位点甲基化表达谱。
#流程和参数
#### 850K array分析流程
#输入和输出
###输入
需要一个文件夹,其中包含:
▪ idat文件:样本的芯片测序结果文件,命名方式为:“'Sentrix_ID'_'Sentrix_Position'__Grn/Red.idat”
▪ sample_sheet文件:样本的注释信息文件,命名为“sample_sheet.csv”,其中包含Sample_Name,Sentrix_ID, Sentrix_Position, Sample_Group等注释信息
###输出
在850K芯片的分析中,beta 值是最常用的甲基化水平的定量方式,其主要用于差异分析。

+ 28
- 0
tasks/minfi.wdl Wyświetl plik

@@ -0,0 +1,28 @@
task minfi {
File prefix
File input_dir
String docker
String cluster_config
String disk_size
command <<<
set -o pipefail
set -e
Rscript /opt/EPIC.modified.R -p ${prefix} -i ${input_dir}
>>>
runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File rdata = "${prefix}.RData"
File filter_mVal = "${prefix}.filter.p.sex.snp.mVal.txt"
File filter_bVal = "${prefix}.filter.p.sex.snp.bVal.txt"
File raw_mVal = "${prefix}.raw.mVal.txt"
File raw_bVal = "${prefix}.raw.bVal.txt"
}
}

+ 19
- 0
workflow.wdl Wyświetl plik

@@ -0,0 +1,19 @@
import "./tasks/minfi.wdl" as minfi

workflow {{ project_name }} {
File prefix
File input_dir
String docker
String cluster_config
String disk_size

call minfi.minfi as minfi {
input:
prefix=prefix,
input_dir=input_dir,
docker=docker,
cluster_config=cluster_config,
disk_size=disk_size
}
}


Ładowanie…
Anuluj
Zapisz