suppressPackageStartupMessages(library("optparse")) suppressPackageStartupMessages(library("stats")) # specify our desired options in a list # by default OptionParser will add an help option equivalent to # make_option(c("-h", "--help"), action="store_true", default=FALSE, # help="Show this help message and exit") option_list <- list( make_option(c("-p", "--prefix"), type="character",default="./", help="The output files prefix [default ./]"), make_option(c("-i", "--input"),type="character", default=NULL, help="The directory of input EPIC files. required!") ) # get command line options, if help option encountered print help and exit, # otherwise if options not found on command line then set defaults, opt <- parse_args(OptionParser(option_list=option_list)) if (is.null(opt$input)){ print_help(opt_parser) stop("At least one argument must be supplied (input file).", call.=FALSE) } # load libraries library("minfi") library("IlluminaHumanMethylationEPICmanifest") library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19") ## 1. data import targets <- read.metharray.sheet(opt$input) rgSet <- read.metharray.exp(targets = targets) targets$ID <- paste(targets$Sample_Group,targets$Sample_Name,sep=".") sampleNames(rgSet) <- targets$ID #phenoData <- pData(rgSet) message("data import:finished") ## 2. Quality control # 2.1 qc report by minfi # qcReport(rgSet, sampNames=targets$ID, sampGroups=targets$Sample_Group,pdf="qcReport.pdf") # 2.2 data filtering # get detected p value # a. remove samples with average p value less than 0.05 detP <- detectionP(rgSet) keep <- colMeans(detP) < 0.05 rgSet_sample_removed <- rgSet[,keep] targets_sample_removed <- targets[keep,] message("sample p value filtration:finished") # b. normalization mSetSq <- preprocessFunnorm(rgSet_sample_removed) message("Funnorm normalization:finished") # c. filter probes with p value less than 0.01 # ensure probes are in the same order in the mSetSq and detP objects detP <- detP[match(featureNames(mSetSq),rownames(detP)),] # remove any probes that have failed in one or more samples keep <- rowSums(detP < 0.01) > ncol(mSetSq) * 0.9 mSetSqFlt <- mSetSq[keep,] message("probe p value filteration:finished") # d. remove sex probes annotation <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19) keep <- !(featureNames(mSetSqFlt) %in% annotation$Name[annotation$chr %in% c("chrX","chrY")]) mSetSqFlt <- mSetSqFlt[keep,] mSetSqFlt <- dropLociWithSnps(mSetSqFlt) message("remove:finished") ## get M and beta value # filtered mVals <- getM(mSetSqFlt) bVals <- getBeta(mSetSqFlt) # raw mSetRaw <- preprocessRaw(rgSet) raw_mVals <- getM(mSetRaw) raw_bVals <- getBeta(mSetRaw) message("m value and beta value output:finished") # write output message("saving R data") rdata_filename = paste(opt$prefix, '.RData',sep="") save(rgSet, targets, detP, file = rdata_filename) message("writing filtered table") m_filename = paste(opt$prefix,'.filter.p.sex.snp.mVal.txt',sep="") write.table(mVals,m_filename,col.names = T,row.names = T,sep="\t",quote=F) b_filename = paste(opt$prefix,'.filter.p.sex.snp.bVal.txt',sep="") write.table(bVals,b_filename,col.names = T,row.names = T,sep="\t",quote=F) message("writing raw table") m_raw_filename = paste(opt$prefix,'.raw.mVal.txt',sep="") write.table(raw_mVals,m_raw_filename,col.names = T,row.names = T,sep="\t",quote=F) b_raw_filename = paste(opt$prefix,'.raw.bVal.txt',sep="") write.table(raw_bVals,b_raw_filename,col.names = T,row.names = T,sep="\t",quote=F)