您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

95 行
3.5KB

  1. suppressPackageStartupMessages(library("optparse"))
  2. suppressPackageStartupMessages(library("stats"))
  3. # specify our desired options in a list
  4. # by default OptionParser will add an help option equivalent to
  5. # make_option(c("-h", "--help"), action="store_true", default=FALSE,
  6. # help="Show this help message and exit")
  7. option_list <- list(
  8. make_option(c("-p", "--prefix"), type="character",default="./",
  9. help="The output files prefix [default ./]"),
  10. make_option(c("-i", "--input"),type="character", default=NULL,
  11. help="The directory of input EPIC files. required!")
  12. )
  13. # get command line options, if help option encountered print help and exit,
  14. # otherwise if options not found on command line then set defaults,
  15. opt <- parse_args(OptionParser(option_list=option_list))
  16. if (is.null(opt$input)){
  17. print_help(opt_parser)
  18. stop("At least one argument must be supplied (input file).", call.=FALSE)
  19. }
  20. # load libraries
  21. library("minfi")
  22. library("IlluminaHumanMethylationEPICmanifest")
  23. library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
  24. ## 1. data import
  25. targets <- read.metharray.sheet(opt$input)
  26. rgSet <- read.metharray.exp(targets = targets)
  27. targets$ID <- paste(targets$Sample_Group,targets$Sample_Name,sep=".")
  28. sampleNames(rgSet) <- targets$ID
  29. #phenoData <- pData(rgSet)
  30. message("data import:finished")
  31. ## 2. Quality control
  32. # 2.1 qc report by minfi
  33. # qcReport(rgSet, sampNames=targets$ID, sampGroups=targets$Sample_Group,pdf="qcReport.pdf")
  34. # 2.2 data filtering
  35. # get detected p value
  36. # a. remove samples with average p value less than 0.05
  37. detP <- detectionP(rgSet)
  38. keep <- colMeans(detP) < 0.05
  39. rgSet_sample_removed <- rgSet[,keep]
  40. targets_sample_removed <- targets[keep,]
  41. message("sample p value filtration:finished")
  42. # b. normalization
  43. mSetSq <- preprocessFunnorm(rgSet_sample_removed)
  44. message("Funnorm normalization:finished")
  45. # c. filter probes with p value less than 0.01
  46. # ensure probes are in the same order in the mSetSq and detP objects
  47. detP <- detP[match(featureNames(mSetSq),rownames(detP)),]
  48. # remove any probes that have failed in one or more samples
  49. keep <- rowSums(detP < 0.01) > ncol(mSetSq) * 0.9
  50. mSetSqFlt <- mSetSq[keep,]
  51. message("probe p value filteration:finished")
  52. # d. remove sex probes
  53. annotation <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
  54. keep <- !(featureNames(mSetSqFlt) %in% annotation$Name[annotation$chr %in% c("chrX","chrY")])
  55. mSetSqFlt <- mSetSqFlt[keep,]
  56. mSetSqFlt <- dropLociWithSnps(mSetSqFlt)
  57. message("remove:finished")
  58. ## get M and beta value
  59. # filtered
  60. mVals <- getM(mSetSqFlt)
  61. bVals <- getBeta(mSetSqFlt)
  62. # raw
  63. mSetRaw <- preprocessRaw(rgSet)
  64. raw_mVals <- getM(mSetRaw)
  65. raw_bVals <- getBeta(mSetRaw)
  66. message("m value and beta value output:finished")
  67. # write output
  68. message("saving R data")
  69. rdata_filename = paste(opt$prefix, '.RData',sep="")
  70. save(rgSet, targets, detP, file = rdata_filename)
  71. message("writing filtered table")
  72. m_filename = paste(opt$prefix,'.filter.p.sex.snp.mVal.txt',sep="")
  73. write.table(mVals,m_filename,col.names = T,row.names = T,sep="\t",quote=F)
  74. b_filename = paste(opt$prefix,'.filter.p.sex.snp.bVal.txt',sep="")
  75. write.table(bVals,b_filename,col.names = T,row.names = T,sep="\t",quote=F)
  76. message("writing raw table")
  77. m_raw_filename = paste(opt$prefix,'.raw.mVal.txt',sep="")
  78. write.table(raw_mVals,m_raw_filename,col.names = T,row.names = T,sep="\t",quote=F)
  79. b_raw_filename = paste(opt$prefix,'.raw.bVal.txt',sep="")
  80. write.table(raw_bVals,b_raw_filename,col.names = T,row.names = T,sep="\t",quote=F)