|
|
@@ -0,0 +1,94 @@ |
|
|
|
suppressPackageStartupMessages(library("optparse")) |
|
|
|
suppressPackageStartupMessages(library("stats")) |
|
|
|
|
|
|
|
# specify our desired options in a list |
|
|
|
# by default OptionParser will add an help option equivalent to |
|
|
|
# make_option(c("-h", "--help"), action="store_true", default=FALSE, |
|
|
|
# help="Show this help message and exit") |
|
|
|
|
|
|
|
option_list <- list( |
|
|
|
make_option(c("-p", "--prefix"), type="character",default="./", |
|
|
|
help="The output files prefix [default ./]"), |
|
|
|
make_option(c("-i", "--input"),type="character", default=NULL, |
|
|
|
help="The directory of input EPIC files. required!") |
|
|
|
) |
|
|
|
|
|
|
|
# get command line options, if help option encountered print help and exit, |
|
|
|
# otherwise if options not found on command line then set defaults, |
|
|
|
opt <- parse_args(OptionParser(option_list=option_list)) |
|
|
|
|
|
|
|
if (is.null(opt$input)){ |
|
|
|
print_help(opt_parser) |
|
|
|
stop("At least one argument must be supplied (input file).", call.=FALSE) |
|
|
|
} |
|
|
|
|
|
|
|
# load libraries |
|
|
|
|
|
|
|
library("minfi") |
|
|
|
library("IlluminaHumanMethylationEPICmanifest") |
|
|
|
library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19") |
|
|
|
|
|
|
|
## 1. data import |
|
|
|
targets <- read.metharray.sheet(opt$input) |
|
|
|
rgSet <- read.metharray.exp(targets = targets) |
|
|
|
targets$ID <- paste(targets$Sample_Group,targets$Sample_Name,sep=".") |
|
|
|
sampleNames(rgSet) <- targets$ID |
|
|
|
#phenoData <- pData(rgSet) |
|
|
|
message("data import:finished") |
|
|
|
|
|
|
|
## 2. Quality control |
|
|
|
# 2.1 qc report by minfi |
|
|
|
# qcReport(rgSet, sampNames=targets$ID, sampGroups=targets$Sample_Group,pdf="qcReport.pdf") |
|
|
|
|
|
|
|
# 2.2 data filtering |
|
|
|
# get detected p value |
|
|
|
# a. remove samples with average p value less than 0.05 |
|
|
|
detP <- detectionP(rgSet) |
|
|
|
keep <- colMeans(detP) < 0.05 |
|
|
|
rgSet_sample_removed <- rgSet[,keep] |
|
|
|
targets_sample_removed <- targets[keep,] |
|
|
|
message("sample p value filtration:finished") |
|
|
|
|
|
|
|
# b. normalization |
|
|
|
mSetSq <- preprocessFunnorm(rgSet_sample_removed) |
|
|
|
message("Funnorm normalization:finished") |
|
|
|
|
|
|
|
# c. filter probes with p value less than 0.01 |
|
|
|
# ensure probes are in the same order in the mSetSq and detP objects |
|
|
|
detP <- detP[match(featureNames(mSetSq),rownames(detP)),] |
|
|
|
# remove any probes that have failed in one or more samples |
|
|
|
keep <- rowSums(detP < 0.01) > ncol(mSetSq) * 0.9 |
|
|
|
mSetSqFlt <- mSetSq[keep,] |
|
|
|
message("probe p value filteration:finished") |
|
|
|
|
|
|
|
# d. remove sex probes |
|
|
|
annotation <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19) |
|
|
|
keep <- !(featureNames(mSetSqFlt) %in% annotation$Name[annotation$chr %in% c("chrX","chrY")]) |
|
|
|
mSetSqFlt <- mSetSqFlt[keep,] |
|
|
|
mSetSqFlt <- dropLociWithSnps(mSetSqFlt) |
|
|
|
message("remove:finished") |
|
|
|
|
|
|
|
## get M and beta value |
|
|
|
# filtered |
|
|
|
mVals <- getM(mSetSqFlt) |
|
|
|
bVals <- getBeta(mSetSqFlt) |
|
|
|
# raw |
|
|
|
mSetRaw <- preprocessRaw(rgSet) |
|
|
|
raw_mVals <- getM(mSetRaw) |
|
|
|
raw_bVals <- getBeta(mSetRaw) |
|
|
|
message("m value and beta value output:finished") |
|
|
|
|
|
|
|
# write output |
|
|
|
message("saving R data") |
|
|
|
rdata_filename = paste(opt$prefix, '.RData',sep="") |
|
|
|
save(rgSet, targets, detP, file = rdata_filename) |
|
|
|
message("writing filtered table") |
|
|
|
m_filename = paste(opt$prefix,'.filter.p.sex.snp.mVal.txt',sep="") |
|
|
|
write.table(mVals,m_filename,col.names = T,row.names = T,sep="\t",quote=F) |
|
|
|
b_filename = paste(opt$prefix,'.filter.p.sex.snp.bVal.txt',sep="") |
|
|
|
write.table(bVals,b_filename,col.names = T,row.names = T,sep="\t",quote=F) |
|
|
|
message("writing raw table") |
|
|
|
m_raw_filename = paste(opt$prefix,'.raw.mVal.txt',sep="") |
|
|
|
write.table(raw_mVals,m_raw_filename,col.names = T,row.names = T,sep="\t",quote=F) |
|
|
|
b_raw_filename = paste(opt$prefix,'.raw.bVal.txt',sep="") |
|
|
|
write.table(raw_bVals,b_raw_filename,col.names = T,row.names = T,sep="\t",quote=F) |