|
|
@@ -5,6 +5,7 @@ |
|
|
|
|
|
|
|
suppressPackageStartupMessages(library("optparse")) |
|
|
|
suppressPackageStartupMessages(library("fgsea")) |
|
|
|
suppressPackageStartupMessages(library("data.table")) |
|
|
|
|
|
|
|
# specify our desired options in a list |
|
|
|
# by default OptionParser will add an help option equivalent to |
|
|
@@ -21,11 +22,13 @@ option_list <- list( |
|
|
|
make_option(c("-e", "--type_gene_id"),type="character", default="EnsemblGID", |
|
|
|
help="The type of gene symbol. Could be either of EnsemblGID/EntrezID/GeneSymbol [default: EnsemblGID]"), |
|
|
|
make_option(c("-g", "--sample_group"),type="character", default=NULL, |
|
|
|
help="File for sample group infomation.The input file containing sample name and group infomation. note colname must be like: sample group1 group2... Required! "), |
|
|
|
help="File in tab-delimited format for sample group infomation. The input file containing sample name and group infomation. note colname must be like: sample group1 group2... Required! "), |
|
|
|
make_option(c("-q", "--padjvalueCutoff"), type="double",default=0.2,metavar="number", |
|
|
|
help="Cutoff value of adjusted p value. [default: 0.2]"), |
|
|
|
make_option(c("-p", "--project_code"), type="character",default="rnaseq", |
|
|
|
help="Project code, which is used as prefix of output file. [default: rnaseq]") |
|
|
|
help="Project code, which is used as prefix of output file. [default: rnaseq]"), |
|
|
|
make_option(c("-d", "--ref_rdata_dir"), type="character",default="./", |
|
|
|
help="The directory of reference files: human_c2_v5p2.rdata, human_c5_v5p2.rdata and ID_convert_table.rds. [default: ./]") |
|
|
|
) |
|
|
|
|
|
|
|
# get command line options, if help option encountered print help and exit, |
|
|
@@ -43,7 +46,7 @@ if (is.null(opt$sample_group)){ |
|
|
|
|
|
|
|
##import file |
|
|
|
out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="") |
|
|
|
logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1) |
|
|
|
logexpr<-fread(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F,data.table=F) |
|
|
|
|
|
|
|
#check exp file is log scale |
|
|
|
if(max(logexpr[,1])-min(logexpr[,1])>100){ |
|
|
@@ -56,10 +59,23 @@ sample_group<-read.table(opt$sample_group,sep="\t",header=T) |
|
|
|
if(length(grep("group",colnames(sample_group)))==0){ |
|
|
|
stop("No group is identified in sample_group file. Make sure the head of sample_group file is like sample, group1, group2.") |
|
|
|
} |
|
|
|
|
|
|
|
#refdir |
|
|
|
refdir<-paste(gsub("/$","",opt$ref_rdata_dir),"/",sep="") |
|
|
|
#c2: curated gene sets (rdata file) |
|
|
|
load("./human_c2_v5p2.rdata") |
|
|
|
#c5: GO gene sets (rdata file) |
|
|
|
load("./human_c5_v5p2.rdata") |
|
|
|
|
|
|
|
if(length(grep("human_c2_v5p2.rdata",dir(refdir)))>0){ |
|
|
|
load(paste(refdir,"human_c2_v5p2.rdata",sep="")) |
|
|
|
}else{ |
|
|
|
stop("Cannot find human_c2_v5p2.rds in the ref_rdata_dir. Exit!", call.=FALSE) |
|
|
|
} |
|
|
|
|
|
|
|
if(length(grep("human_c5_v5p2.rdata",dir(refdir)))>0){ |
|
|
|
load(paste(refdir,"human_c5_v5p2.rdata",sep="")) |
|
|
|
}else{ |
|
|
|
stop("Cannot find human_c5_v5p2.rds in the ref_rdata_dir. Exit!", call.=FALSE) |
|
|
|
} |
|
|
|
|
|
|
|
########################## |
|
|
|
#########ID convert####### |
|
|
@@ -67,8 +83,8 @@ load("./human_c5_v5p2.rdata") |
|
|
|
|
|
|
|
message("Begin ID conversion.") |
|
|
|
|
|
|
|
if(length(grep("ID_convert_table.rds",dir()))>0){ |
|
|
|
idconvert<-readRDS("./ID_convert_table.rds") |
|
|
|
if(length(grep("ID_convert_table.rds",dir(refdir)))>0){ |
|
|
|
idconvert<-readRDS(paste(refdir,"ID_convert_table.rds",sep="")) |
|
|
|
}else{ |
|
|
|
stop("Cannot find ID_convert_table.rds in the working folder. Exit!", call.=FALSE) |
|
|
|
} |
|
|
@@ -122,33 +138,36 @@ logfc<-logfc[order(-logfc)] |
|
|
|
#GSEA in GO term |
|
|
|
fgseaRes.c5 <- fgsea(Hs.c5, logfc, minSize=15, maxSize = 500, nperm=1000) |
|
|
|
c5sig<-fgseaRes.c5[fgseaRes.c5$padj<opt$padjvalueCutoff,] |
|
|
|
c5sig<-c5sig[order(c5sig$pval),] |
|
|
|
c5sig<-data.frame(c5sig) |
|
|
|
c5sig$leadingEdge<-sapply(c5sig$leadingEdge,function(x){paste0(unlist(x),collapse=", ")}) |
|
|
|
|
|
|
|
if(nrow(c5sig)==0){ |
|
|
|
message(paste("No significant GO term is identified in group ",nam,".",sep="")) |
|
|
|
}else{ |
|
|
|
message(paste(nrow(c5sig)," significant GO term(s) is(are) identified in group ",nam,".",sep="")) |
|
|
|
|
|
|
|
c5sig<-c5sig[order(c5sig$pval),] |
|
|
|
c5sig<-data.frame(c5sig) |
|
|
|
c5sig$leadingEdge<-sapply(c5sig$leadingEdge,function(x){paste0(unlist(x),collapse=", ")}) |
|
|
|
|
|
|
|
c5sigall<-rbind(c5sigall,cbind(versus,c5sig)) |
|
|
|
} |
|
|
|
#GSEA in curated gene sets |
|
|
|
|
|
|
|
fgseaRes.c2 <- fgsea(Hs.c2, logfc, minSize=15, maxSize = 500, nperm=1000) |
|
|
|
c2sig<-fgseaRes.c2[fgseaRes.c2$padj<opt$padjvalueCutoff,] |
|
|
|
c2sig<-c2sig[order(c2sig$pval),] |
|
|
|
c2sig<-data.frame(c2sig) |
|
|
|
c2sig$leadingEdge<-sapply(c2sig$leadingEdge,function(x){paste0(unlist(x),collapse=", ")}) |
|
|
|
|
|
|
|
if(nrow(c2sig)==0){ |
|
|
|
message(paste("No significant curated gene sets is identified in group ",nam,".",sep="")) |
|
|
|
}else{ |
|
|
|
message(paste(nrow(c2sig)," significant curated gene sets are identified in group ",nam,".",sep="")) |
|
|
|
|
|
|
|
c2sig<-c2sig[order(c2sig$pval),] |
|
|
|
c2sig<-data.frame(c2sig) |
|
|
|
c2sig$leadingEdge<-sapply(c2sig$leadingEdge,function(x){paste0(unlist(x),collapse=", ")}) |
|
|
|
c2sigall<-rbind(c2sigall,cbind(versus,c2sig)) |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if(nrow(c5sigall)==0){ |
|
|
|
if(length(c5sigall)==0){ |
|
|
|
message("No significant GO term is identified.") |
|
|
|
}else{ |
|
|
|
c5sigall$pval<-signif(c5sigall$pval,4) |
|
|
@@ -159,7 +178,7 @@ rownames(c5sigall)<-c(1:nrow(c5sigall)) |
|
|
|
write.csv(c5sigall,paste(out_dir,opt$project_code,"_gsea_go.csv",sep="")) |
|
|
|
} |
|
|
|
|
|
|
|
if(nrow(c2sigall)==0){ |
|
|
|
if(length(c2sigall)==0){ |
|
|
|
message("No significant GO term is identified.") |
|
|
|
}else{ |
|
|
|
c2sigall$pval<-signif(c2sigall$pval,4) |