|
|
@@ -1,7 +1,7 @@ |
|
|
|
#!/usr/bin/env Rscript |
|
|
|
###Copyright 2019 Ying Yu from Fudan-PGx group |
|
|
|
# example: |
|
|
|
# Rscript RNAseq_6_enrichfunc.R -o /home/yuying/rnaseqreport_test -i ballgown_geneexp_log2fpkm_floor0p01_c3r58395_2019-04-29.txt -g group1.txt -p organoid |
|
|
|
# Rscript RNAseq_6_enrichfunc.R -i rnaseq_degs_acrossgroups.csv |
|
|
|
|
|
|
|
suppressPackageStartupMessages(library("optparse")) |
|
|
|
suppressPackageStartupMessages(library("clusterProfiler")) |
|
|
@@ -39,16 +39,19 @@ if (is.null(opt$input)){ |
|
|
|
stop("At least one argument must be supplied (input file).", call.=FALSE) |
|
|
|
} |
|
|
|
|
|
|
|
message("Need to connected to the Internet.") |
|
|
|
|
|
|
|
##import file |
|
|
|
out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="") |
|
|
|
gene<-read.csv(opt$input,header=T,stringsAsFactors=F) |
|
|
|
|
|
|
|
|
|
|
|
########################## |
|
|
|
#########ID convert####### |
|
|
|
########################## |
|
|
|
if(length(grep("ID_convert_table.rds",dir()))>0){ |
|
|
|
|
|
|
|
message("Begin ID conversion.") |
|
|
|
|
|
|
|
if(length(grep("ID_convert_table.rds",dir()))>0){ |
|
|
|
idconvert<-readRDS("ID_convert_table.rds") |
|
|
|
}else{ |
|
|
|
stop("Cannot find ID_convert_table.rds in the working folder. Exit!", call.=FALSE) |
|
|
@@ -72,6 +75,8 @@ if(opt$type_gene_id=="EntrezID"){ |
|
|
|
gene$EntrezID<-gene[,1] |
|
|
|
} |
|
|
|
|
|
|
|
message("Finish ID conversion.") |
|
|
|
|
|
|
|
########################## |
|
|
|
#########Enrich GO####### |
|
|
|
########################## |
|
|
@@ -91,8 +96,10 @@ g1<-gene$EntrezID |
|
|
|
g1<-g1[!g1==""] |
|
|
|
#conduct enrichment |
|
|
|
|
|
|
|
message("Contucting enrichment analysis on GO terms...") |
|
|
|
ego<-data.frame(enrichGO(g1, 'org.Hs.eg.db', ont = 'ALL', pvalueCutoff = opt$pvalueCutoff, pAdjustMethod = opt$pAdjustMethod, qvalueCutoff = opt$qvalueCutoff)) |
|
|
|
|
|
|
|
|
|
|
|
message("Contucting enrichment analysis on KEGG pathways...") |
|
|
|
ekg<- data.frame( enrichKEGG(g1, organism = "hsa", keyType = "kegg", pvalueCutoff = opt$pvalueCutoff, pAdjustMethod = opt$pAdjustMethod, qvalueCutoff = opt$qvalueCutoff)) |
|
|
|
|
|
|
|
#modify output |
|
|
@@ -109,34 +116,47 @@ ekeggall<-rbind(ekeggall,ekg1) |
|
|
|
} |
|
|
|
|
|
|
|
}else{ |
|
|
|
|
|
|
|
for (i in 1:length(groupn)){ |
|
|
|
|
|
|
|
message(paste("Group ", groupn[i],sep="")) |
|
|
|
|
|
|
|
g1<-gene$EntrezID[gene[,2]==groupn[i]] |
|
|
|
g1<-g1[!g1==""] |
|
|
|
#conduct enrichment |
|
|
|
|
|
|
|
message("Contucting enrichment analysis on GO terms...") |
|
|
|
ego<-data.frame(enrichGO(g1, 'org.Hs.eg.db', ont = 'ALL', pvalueCutoff = opt$pvalueCutoff, pAdjustMethod = opt$pAdjustMethod, qvalueCutoff = opt$qvalueCutoff)) |
|
|
|
|
|
|
|
message("Contucting enrichment analysis on KEGG pathways...") |
|
|
|
ekg<- data.frame( enrichKEGG(g1, organism = "hsa", keyType = "kegg", pvalueCutoff = opt$pvalueCutoff, pAdjustMethod = opt$pAdjustMethod, qvalueCutoff = opt$qvalueCutoff)) |
|
|
|
|
|
|
|
if(!nrow(ego)==0){ |
|
|
|
ego1<-cbind(groupn[i],ego) |
|
|
|
colnames(ego1)[1]<-c("versus") |
|
|
|
egoall<-rbind(egoall,ego1) |
|
|
|
message(paste(nrow(ego),"significant GO term(s) is(are) identified.")) |
|
|
|
}else{ |
|
|
|
message("No significant GO term is identified.") |
|
|
|
} |
|
|
|
|
|
|
|
if(!nrow(ekg)==0){ |
|
|
|
ekg1<-cbind(groupn[i],ekg) |
|
|
|
colnames(ekg1)[1]<-c("versus") |
|
|
|
ekeggall<-rbind(ekeggall,ekg1) |
|
|
|
message(paste(nrow(ekg),"significant KEGG pathway(s) is(are) identified.")) |
|
|
|
}else{ |
|
|
|
message("No significant KEGG pathway is identified.") |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
message("Wrinting output...") |
|
|
|
|
|
|
|
#write output |
|
|
|
if(nrow(egoall)==0){ |
|
|
|
message("No significant GO term is identified.") |
|
|
|
message("No significant GO term is identified across all tested groups.") |
|
|
|
}else{ |
|
|
|
message(paste(nrow(egoall),"significant GO term(s) is(are) identified.")) |
|
|
|
message(paste(nrow(egoall),"significant GO term(s) is(are) identified across all tested groups.")) |
|
|
|
rownames(egoall)<-c(1:nrow(egoall)) |
|
|
|
egoall$pvalue<-signif(egoall$pvalue,4) |
|
|
|
egoall$p.adjust<-signif(egoall$p.adjust,4) |
|
|
@@ -145,9 +165,9 @@ write.csv(egoall,paste(out_dir,opt$project_code,"_GOenrich.csv",sep="")) |
|
|
|
} |
|
|
|
|
|
|
|
if(nrow(ekeggall)==0){ |
|
|
|
message("No significant KEGG pathway is identified.") |
|
|
|
message("No significant KEGG pathway is identified across all tested groups.") |
|
|
|
}else{ |
|
|
|
message(paste(nrow(ekeggall),"significant KEGG pathway(s) is(are) identified.")) |
|
|
|
message(paste(nrow(ekeggall),"significant KEGG pathway(s) is(are) identified across all tested groups.")) |
|
|
|
rownames(ekeggall)<-c(1:nrow(ekeggall)) |
|
|
|
ekeggall$pvalue<-signif(ekeggall$pvalue,4) |
|
|
|
ekeggall$p.adjust<-signif(ekeggall$p.adjust,4) |