#!/usr/bin/env Rscript ###Copyright 2019 Ying Yu from Fudan-PGx group # example: # Rscript RNAseq_6_enrichfunc.R -o /home/yuying/rnaseqreport_test -i ballgown_geneexp_log2fpkm_floor0p01_c3r58395_2019-04-29.txt -g group1.txt -p organoid suppressPackageStartupMessages(library("optparse")) suppressPackageStartupMessages(library("clusterProfiler")) # specify our desired options in a list # by default OptionParser will add an help option equivalent to # make_option(c("-h", "--help"), action="store_true", default=FALSE, # help="Show this help message and exit") # input input list , rds, from * to * option_list <- list( make_option(c("-o", "--out_dir"), type="character",default="./", help="The output directory [default ./]"), make_option(c("-i", "--input"),type="character", default=NULL, help="The input DEG list in csv format. The first column: gene; second column: group. Required! "), make_option(c("-e", "--type_gene_id"),type="character", default="EnsemblGID", help="The type of gene symbol. Could be either of EnsemblGID/EntrezID/GeneSymbol [default: EnsemblGID]"), make_option(c("-f", "--pvalueCutoff"), type="double",default=0.05,metavar="number", help="Cutoff value of p value. [default: 0.05]"), make_option(c("-m", "--pAdjustMethod"), type="character",default="BH", help="Method of adjust p value. One of \"holm\", \"hochberg\", \"hommel\", \"bonferroni\", \"BH\", \"BY\", \"fdr\", \"none\". [default: BH]"), make_option(c("-q", "--qvalueCutoff"), type="double",default=0.2,metavar="number", help="Cutoff value of q value. [default: 0.2]"), make_option(c("-p", "--project_code"), type="character",default="rnaseq", help="Project code, which is used as prefix of output file. [default: rnaseq]") ) # get command line options, if help option encountered print help and exit, # otherwise if options not found on command line then set defaults, opt <- parse_args(OptionParser(option_list=option_list)) if (is.null(opt$input)){ print_help(opt_parser) stop("At least one argument must be supplied (input file).", call.=FALSE) } ##import file out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="") gene<-read.csv(opt$input,header=T,stringsAsFactors=F) ########################## #########ID convert####### ########################## if(length(grep("ID_convert_table.rds",dir()))>0){ idconvert<-readRDS("ID_convert_table.rds") }else{ stop("Cannot find ID_convert_table.rds in the working folder. Exit!", call.=FALSE) } if(opt$type_gene_id=="EnsemblGID"){ gene$EntrezID<-idconvert$EntrezID[match(gene[,1],idconvert$EnsemblID)] if(length(which(is.na(gene$EntrezID)))==nrow(gene)){ stop("Cannot convert Ensembl gene ID to Entrez gene ID. Exit!", call.=FALSE) } } if(opt$type_gene_id=="GeneSymbol"){ gene$EntrezID<-idconvert$EntrezID[match(gene[,1],idconvert$GeneSymbol)] if(length(which(is.na(gene$EntrezID)))==nrow(gene)){ stop("Cannot convert GeneSymbol to Entrez gene ID. Exit!", call.=FALSE) } } if(opt$type_gene_id=="EntrezID"){ gene$EntrezID<-gene[,1] } ########################## #########Enrich GO####### ########################## groupn<-unique(gene[,2]) if(length(groupn)==0){ message("Warning: no group infomation. Function enrichment will be conducted as one group.") }else{ message(paste("A number of ", length(groupn)," group(s) is detected. Function enrichment will be conducted in ",length(groupn), " group(s).",sep="")) } #### egoall<-c() ekeggall<-c() if(length(groupn)==0){ g1<-gene$EntrezID g1<-g1[!g1==""] #conduct enrichment ego<-data.frame(enrichGO(g1, 'org.Hs.eg.db', ont = 'ALL', pvalueCutoff = opt$pvalueCutoff, pAdjustMethod = opt$pAdjustMethod, qvalueCutoff = opt$qvalueCutoff)) ekg<- data.frame( enrichKEGG(g1, organism = "hsa", keyType = "kegg", pvalueCutoff = opt$pvalueCutoff, pAdjustMethod = opt$pAdjustMethod, qvalueCutoff = opt$qvalueCutoff)) #modify output if(!nrow(ego)==0){ ego1<-cbind(groupn[i],ego) colnames(ego1)[1]<-c("versus") egoall<-rbind(egoall,ego1) } if(!nrow(ekg)==0){ ekg1<-cbind(groupn[i],ekg) colnames(ekg1)[1]<-c("versus") ekeggall<-rbind(ekeggall,ekg1) } }else{ for (i in 1:length(groupn)){ g1<-gene$EntrezID[gene[,2]==groupn[i]] g1<-g1[!g1==""] #conduct enrichment ego<-data.frame(enrichGO(g1, 'org.Hs.eg.db', ont = 'ALL', pvalueCutoff = opt$pvalueCutoff, pAdjustMethod = opt$pAdjustMethod, qvalueCutoff = opt$qvalueCutoff)) ekg<- data.frame( enrichKEGG(g1, organism = "hsa", keyType = "kegg", pvalueCutoff = opt$pvalueCutoff, pAdjustMethod = opt$pAdjustMethod, qvalueCutoff = opt$qvalueCutoff)) if(!nrow(ego)==0){ ego1<-cbind(groupn[i],ego) colnames(ego1)[1]<-c("versus") egoall<-rbind(egoall,ego1) } if(!nrow(ekg)==0){ ekg1<-cbind(groupn[i],ekg) colnames(ekg1)[1]<-c("versus") ekeggall<-rbind(ekeggall,ekg1) } } } #write output if(nrow(egoall)==0){ message("No significant GO term is identified.") }else{ message(paste(nrow(egoall),"significant GO term(s) is(are) identified.")) rownames(egoall)<-c(1:nrow(egoall)) egoall$pvalue<-signif(egoall$pvalue,4) egoall$p.adjust<-signif(egoall$p.adjust,4) egoall$qvalue<-signif(egoall$qvalue,4) write.csv(egoall,paste(out_dir,opt$project_code,"_GOenrich.csv",sep="")) } if(nrow(ekeggall)==0){ message("No significant KEGG pathway is identified.") }else{ message(paste(nrow(ekeggall),"significant KEGG pathway(s) is(are) identified.")) rownames(ekeggall)<-c(1:nrow(ekeggall)) ekeggall$pvalue<-signif(ekeggall$pvalue,4) ekeggall$p.adjust<-signif(ekeggall$p.adjust,4) ekeggall$qvalue<-signif(ekeggall$qvalue,4) write.csv(ekeggall,paste(out_dir,opt$project_code,"_KEGGenrich.csv",sep="")) } ########