|
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394 |
- suppressPackageStartupMessages(library("optparse"))
- suppressPackageStartupMessages(library("stats"))
-
- # specify our desired options in a list
- # by default OptionParser will add an help option equivalent to
- # make_option(c("-h", "--help"), action="store_true", default=FALSE,
- # help="Show this help message and exit")
-
- option_list <- list(
- make_option(c("-p", "--prefix"), type="character",default="./",
- help="The output files prefix [default ./]"),
- make_option(c("-i", "--input"),type="character", default=NULL,
- help="The directory of input EPIC files. required!")
- )
-
- # get command line options, if help option encountered print help and exit,
- # otherwise if options not found on command line then set defaults,
- opt <- parse_args(OptionParser(option_list=option_list))
-
- if (is.null(opt$input)){
- print_help(opt_parser)
- stop("At least one argument must be supplied (input file).", call.=FALSE)
- }
-
- # load libraries
-
- library("minfi")
- library("IlluminaHumanMethylationEPICmanifest")
- library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
-
- ## 1. data import
- targets <- read.metharray.sheet(opt$input)
- rgSet <- read.metharray.exp(targets = targets)
- targets$ID <- paste(targets$Sample_Group,targets$Sample_Name,sep=".")
- sampleNames(rgSet) <- targets$ID
- #phenoData <- pData(rgSet)
- message("data import:finished")
-
- ## 2. Quality control
- # 2.1 qc report by minfi
- # qcReport(rgSet, sampNames=targets$ID, sampGroups=targets$Sample_Group,pdf="qcReport.pdf")
-
- # 2.2 data filtering
- # get detected p value
- # a. remove samples with average p value less than 0.05
- detP <- detectionP(rgSet)
- keep <- colMeans(detP) < 0.05
- rgSet_sample_removed <- rgSet[,keep]
- targets_sample_removed <- targets[keep,]
- message("sample p value filtration:finished")
-
- # b. normalization
- mSetSq <- preprocessFunnorm(rgSet_sample_removed)
- message("Funnorm normalization:finished")
-
- # c. filter probes with p value less than 0.01
- # ensure probes are in the same order in the mSetSq and detP objects
- detP <- detP[match(featureNames(mSetSq),rownames(detP)),]
- # remove any probes that have failed in one or more samples
- keep <- rowSums(detP < 0.01) > ncol(mSetSq) * 0.9
- mSetSqFlt <- mSetSq[keep,]
- message("probe p value filteration:finished")
-
- # d. remove sex probes
- annotation <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
- keep <- !(featureNames(mSetSqFlt) %in% annotation$Name[annotation$chr %in% c("chrX","chrY")])
- mSetSqFlt <- mSetSqFlt[keep,]
- mSetSqFlt <- dropLociWithSnps(mSetSqFlt)
- message("remove:finished")
-
- ## get M and beta value
- # filtered
- mVals <- getM(mSetSqFlt)
- bVals <- getBeta(mSetSqFlt)
- # raw
- mSetRaw <- preprocessRaw(rgSet)
- raw_mVals <- getM(mSetRaw)
- raw_bVals <- getBeta(mSetRaw)
- message("m value and beta value output:finished")
-
- # write output
- message("saving R data")
- rdata_filename = paste(opt$prefix, '.RData',sep="")
- save(rgSet, targets, detP, file = rdata_filename)
- message("writing filtered table")
- m_filename = paste(opt$prefix,'.filter.p.sex.snp.mVal.txt',sep="")
- write.table(mVals,m_filename,col.names = T,row.names = T,sep="\t",quote=F)
- b_filename = paste(opt$prefix,'.filter.p.sex.snp.bVal.txt',sep="")
- write.table(bVals,b_filename,col.names = T,row.names = T,sep="\t",quote=F)
- message("writing raw table")
- m_raw_filename = paste(opt$prefix,'.raw.mVal.txt',sep="")
- write.table(raw_mVals,m_raw_filename,col.names = T,row.names = T,sep="\t",quote=F)
- b_raw_filename = paste(opt$prefix,'.raw.bVal.txt',sep="")
- write.table(raw_bVals,b_raw_filename,col.names = T,row.names = T,sep="\t",quote=F)
|