5 lat temu · 43fa001853
--- a/codescript/EPIC.modified.R
+++ b/codescript/EPIC.modified.R
@@ -0,0 +1,94 @@
 suppressPackageStartupMessages(library("optparse"))
 suppressPackageStartupMessages(library("stats"))

 # specify our desired options in a list
 # by default OptionParser will add an help option equivalent to 
 # make_option(c("-h", "--help"), action="store_true", default=FALSE, 
 #               help="Show this help message and exit")

 option_list <- list( 
  make_option(c("-p", "--prefix"), type="character",default="./",
              help="The output files prefix [default ./]"),
  make_option(c("-i", "--input"),type="character", default=NULL,
              help="The directory of input EPIC files. required!")
 )

 # get command line options, if help option encountered print help and exit,
 # otherwise if options not found on command line then set defaults, 
 opt <- parse_args(OptionParser(option_list=option_list))

 if (is.null(opt$input)){
  print_help(opt_parser)
  stop("At least one argument must be supplied (input file).", call.=FALSE)
 }

 # load libraries

 library("minfi")
 library("IlluminaHumanMethylationEPICmanifest")
 library("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")

 ## 1. data import
 targets <- read.metharray.sheet(opt$input)
 rgSet <- read.metharray.exp(targets = targets)
 targets$ID <- paste(targets$Sample_Group,targets$Sample_Name,sep=".")
 sampleNames(rgSet) <- targets$ID
 #phenoData <- pData(rgSet)
 message("data import:finished")

 ## 2. Quality control
 # 2.1 qc report by minfi
 # qcReport(rgSet, sampNames=targets$ID, sampGroups=targets$Sample_Group,pdf="qcReport.pdf")
          
 # 2.2 data filtering
 # get detected p value
 # a. remove samples with average p value less than 0.05
 detP <- detectionP(rgSet)
 keep <- colMeans(detP) < 0.05
 rgSet_sample_removed <- rgSet[,keep]
 targets_sample_removed <- targets[keep,]
 message("sample p value filtration:finished")

 # b. normalization
 mSetSq <- preprocessFunnorm(rgSet_sample_removed)
 message("Funnorm normalization:finished")

 # c. filter probes with p value less than 0.01
 # ensure probes are in the same order in the mSetSq and detP objects
 detP <- detP[match(featureNames(mSetSq),rownames(detP)),] 
 # remove any probes that have failed in one or more samples
 keep <- rowSums(detP < 0.01) > ncol(mSetSq) * 0.9 
 mSetSqFlt <- mSetSq[keep,]
 message("probe p value filteration:finished")

 # d. remove sex probes
 annotation <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
 keep <- !(featureNames(mSetSqFlt) %in% annotation$Name[annotation$chr %in% c("chrX","chrY")])
 mSetSqFlt <- mSetSqFlt[keep,]
 mSetSqFlt <- dropLociWithSnps(mSetSqFlt)
 message("remove:finished")

 ## get M and beta value
 # filtered
 mVals <- getM(mSetSqFlt)
 bVals <- getBeta(mSetSqFlt)
 # raw
 mSetRaw <- preprocessRaw(rgSet)
 raw_mVals <- getM(mSetRaw)
 raw_bVals <- getBeta(mSetRaw)
 message("m value and beta value output:finished")

 # write output
 message("saving R data")
 rdata_filename = paste(opt$prefix, '.RData',sep="")
 save(rgSet, targets, detP, file = rdata_filename)
 message("writing filtered table")
 m_filename = paste(opt$prefix,'.filter.p.sex.snp.mVal.txt',sep="")
 write.table(mVals,m_filename,col.names = T,row.names = T,sep="\t",quote=F)
 b_filename = paste(opt$prefix,'.filter.p.sex.snp.bVal.txt',sep="")
 write.table(bVals,b_filename,col.names = T,row.names = T,sep="\t",quote=F)
 message("writing raw table")
 m_raw_filename = paste(opt$prefix,'.raw.mVal.txt',sep="")
 write.table(raw_mVals,m_raw_filename,col.names = T,row.names = T,sep="\t",quote=F)
 b_raw_filename = paste(opt$prefix,'.raw.bVal.txt',sep="")
 write.table(raw_bVals,b_raw_filename,col.names = T,row.names = T,sep="\t",quote=F)
--- a/inputs
+++ b/inputs
@@ -0,0 +1,7 @@
 {
  "{{ project_name }}.disk_size": "200",
  "{{ project_name }}.prefix": "{{ prefix }}",
  "{{ project_name }}.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/r-base:v1.0",
  "{{ project_name }}.cluster_config": "OnDemand bcs.b4.xlarge img-ubuntu-vpc",
  "{{ project_name }}.input_dir": "{{ input_dir }}"
 }
--- a/readme.md
+++ b/readme.md
@@ -0,0 +1,32 @@
 ## minfi分析Illumina 850K(EPIC)

 #APP介绍

 ###甲基化原理简述

 Illumina 850K甲基化芯片可同时检测>850,000个位点，覆盖>95%的CpG岛，99%的RefSeq基因，已经成为精准医学研究的重要方法之一。

 850K芯片采用了两种探针Infinium Ⅰ 和Infinium Ⅱ对样品甲基化进行测定，Infinium I采用了两种bead（甲基化M和非甲基化U，如图显示），而II只有一种bead（即甲基化和非甲基化在一起），这也导致了它们在后续荧光探测的不同，而根据不同探针的bead的荧光值，就可以得到样品各个位点上的甲基化水平。

 ###APP简介

 为了更好更便捷的分析全基因组甲基化数据，我们选用了分析850K芯片的R包——minfi包，构建了分析pipeline，可以得到全基因组的各个位点甲基化表达谱。

 #流程和参数

 #### 850K array分析流程

 #输入和输出

 ###输入

 需要一个文件夹，其中包含：

 ▪   idat文件：样本的芯片测序结果文件，命名方式为：“'Sentrix_ID'_'Sentrix_Position'__Grn/Red.idat”

 ▪   sample_sheet文件：样本的注释信息文件，命名为“sample_sheet.csv”，其中包含Sample_Name，Sentrix_ID, Sentrix_Position, Sample_Group等注释信息

 ###输出

 在850K芯片的分析中，beta 值是最常用的甲基化水平的定量方式，其主要用于差异分析。

--- a/tasks/minfi.wdl
+++ b/tasks/minfi.wdl
@@ -0,0 +1,28 @@
 task minfi {
    File prefix
    File input_dir
    String docker
    String cluster_config
    String disk_size

    command <<<
 		  set -o pipefail
 		  set -e	
      Rscript /opt/EPIC.modified.R -p ${prefix} -i ${input_dir}
    >>>

    runtime {
 		  docker:docker
 		  cluster: cluster_config
      systemDisk: "cloud_ssd 40"
      dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
    }

    output {
 		  File rdata = "${prefix}.RData"
 		  File filter_mVal = "${prefix}.filter.p.sex.snp.mVal.txt"
 		  File filter_bVal = "${prefix}.filter.p.sex.snp.bVal.txt"
      File raw_mVal = "${prefix}.raw.mVal.txt"
      File raw_bVal = "${prefix}.raw.bVal.txt"
    }
  }
--- a/workflow.wdl
+++ b/workflow.wdl
@@ -0,0 +1,19 @@
 import "./tasks/minfi.wdl" as minfi

 workflow {{ project_name }} {
  File prefix
  File input_dir
  String docker
  String cluster_config
  String disk_size

 	call minfi.minfi as minfi {
 		input:
 		prefix=prefix,
 		input_dir=input_dir,
 		docker=docker,
 		cluster_config=cluster_config,
 		disk_size=disk_size
 	}
 }