6 years ago · 506e33ea32
--- a/RNAseq_2_pca.R
+++ b/RNAseq_2_pca.R
@@ -4,6 +4,7 @@
 # Rscript RNAseq_2_pca.R -o /home/yuying/rnaseqreport_test -i ballgown_geneexp_log2fpkm_floor0p01_c3r58395_2019-04-29.txt  -g group1.txt -p organoid 

 suppressPackageStartupMessages(library("optparse"))
 suppressPackageStartupMessages(library("data.table"))

 # specify our desired options in a list
 # by default OptionParser will add an help option equivalent to 
@@ -16,7 +17,7 @@ option_list <- list(
    make_option(c("-i", "--input"),type="character", default=NULL,
        help="The input expression files. required!"),
    make_option(c("-g", "--sample_group"),type="character",  default=NULL,
        help="File for sample group infomation. The input file containing sample name and group infomation. note colname must be like: sample	group1	group2... "),
        help="File in tab-delimited format for sample group infomation. The input file containing sample name and group infomation. note colname must be like: sample	group1	group2... "),
   	make_option(c("-p", "--project_code"), type="character",default="rnaseq",
        help="Project code, which is used as prefix of output file. [default: rnaseq]")
 		)
@@ -32,7 +33,7 @@ if (is.null(opt$input)){

 ##import exp file
 out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")
 logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1)
 logexpr<-fread(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F,data.table=F)

 #check exp file is log scale
 if(max(logexpr[,1])-min(logexpr[,1])>100){
--- a/RNAseq_3_cor.R
+++ b/RNAseq_3_cor.R
@@ -4,6 +4,7 @@
 # Rscript RNAseq_3_cor.R -o /home/yuying/rnaseqreport_test -i ballgown_geneexp_log2fpkm_floor0p01_c3r58395_2019-04-29.txt  -g group1.txt -p organoid 

 suppressPackageStartupMessages(library("optparse"))
 suppressPackageStartupMessages(library("data.table"))

 # specify our desired options in a list
 # by default OptionParser will add an help option equivalent to 
@@ -16,7 +17,7 @@ option_list <- list(
    make_option(c("-i", "--input"),type="character", default=NULL,
        help="The input expression files. required!"),
    make_option(c("-g", "--sample_group"),type="character",  default=NULL,
        help="File for sample group infomation.The input file containing sample name and group infomation. note colname must be like: sample	group1	group2... "),
        help="File in tab-delimited format for sample group infomation. The input file containing sample name and group infomation. note colname must be like: sample	group1	group2... "),
   	make_option(c("-p", "--project_code"), type="character",default="rnaseq",
        help="Project code, which is used as prefix of output file. [default: rnaseq]")
 		)
@@ -32,7 +33,7 @@ if (is.null(opt$input)){

 ##import exp file
 out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")
 logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1)
 logexpr<-fread(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F,data.table=F)

 #check exp file is log scale
 if(max(logexpr[,1])-min(logexpr[,1])>100){
--- a/RNAseq_4_pwDEG.R
+++ b/RNAseq_4_pwDEG.R
@@ -6,6 +6,7 @@
 # choppy report script like : @scatter-plot(dataFile='/mnt/c/Users/YY/Documents/working/choppy_report/data/zhanggroup_P1-6vsP7-13_choppy_scatterplot_degs.rds', dataType='rds', xAxis='log2FC', xTitle="log2FC",yAxis='log10p',yTitle="-log10 (p)")

 suppressPackageStartupMessages(library("optparse"))
 suppressPackageStartupMessages(library("data.table"))

 # specify our desired options in a list
 # by default OptionParser will add an help option equivalent to 
@@ -18,7 +19,7 @@ option_list <- list(
    make_option(c("-i", "--input"),type="character", default=NULL,
        help="The input expression files. Required!"),
    make_option(c("-g", "--sample_group"),type="character",  default=NULL,
        help="File for sample group infomation.The input file containing sample name and group infomation. note colname must be like: sample	group1	group2... Required! "),
        help="File in tab-delimited format for sample group infomation. The input file containing sample name and group infomation. note colname must be like: sample	group1	group2... Required! "),
   	make_option(c("-p", "--project_code"), type="character",default="rnaseq",
        help="Project code, which is used as prefix of output file. [default: rnaseq]"),
 	make_option(c("-a", "--output_all_genes"), metavar="FALSE", default=FALSE,
@@ -42,7 +43,7 @@ if (is.null(opt$sample_group)){

 ##import files
 out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")
 logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1)
 logexpr<-fread(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F,data.table=F)

 #check exp file is log scale
 if(max(logexpr[,1])-min(logexpr[,1])>100){
@@ -161,5 +162,5 @@ degstat$number<-as.numeric(as.character(degstat$number))
 ########
 write.csv(degstat,paste(out_dir,opt$project_code,"_degs_stats.csv",sep=""),row.names=F)
 saveRDS(degstat,paste(out_dir,opt$project_code,"_degs_stats.rds",sep=""))

 message("RNAseq_4_pwDEG.R finished!")
 }
--- a/RNAseq_5_pwGSEA.R
+++ b/RNAseq_5_pwGSEA.R
@@ -5,6 +5,7 @@

 suppressPackageStartupMessages(library("optparse"))
 suppressPackageStartupMessages(library("fgsea"))
 suppressPackageStartupMessages(library("data.table"))

 # specify our desired options in a list
 # by default OptionParser will add an help option equivalent to 
@@ -21,11 +22,13 @@ option_list <- list(
 	make_option(c("-e", "--type_gene_id"),type="character", default="EnsemblGID",
        help="The type of gene symbol. Could be either of EnsemblGID/EntrezID/GeneSymbol [default: EnsemblGID]"),
 	make_option(c("-g", "--sample_group"),type="character",  default=NULL,
        help="File for sample group infomation.The input file containing sample name and group infomation. note colname must be like: sample	group1	group2... Required! "),
        help="File in tab-delimited format for sample group infomation. The input file containing sample name and group infomation. note colname must be like: sample	group1	group2... Required! "),
 	make_option(c("-q", "--padjvalueCutoff"), type="double",default=0.2,metavar="number",
 		help="Cutoff value of adjusted p value. [default: 0.2]"),
   	make_option(c("-p", "--project_code"), type="character",default="rnaseq",
        help="Project code, which is used as prefix of output file. [default: rnaseq]")
        help="Project code, which is used as prefix of output file. [default: rnaseq]"),
 	make_option(c("-d", "--ref_rdata_dir"), type="character",default="./",
        help="The directory of reference files: human_c2_v5p2.rdata, human_c5_v5p2.rdata and ID_convert_table.rds. [default: ./]")
 		)

 # get command line options, if help option encountered print help and exit,
@@ -43,7 +46,7 @@ if (is.null(opt$sample_group)){

 ##import file
 out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")
 logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1)
 logexpr<-fread(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F,data.table=F)

 #check exp file is log scale
 if(max(logexpr[,1])-min(logexpr[,1])>100){
@@ -56,10 +59,23 @@ sample_group<-read.table(opt$sample_group,sep="\t",header=T)
 if(length(grep("group",colnames(sample_group)))==0){
 stop("No group is identified in sample_group file. Make sure the head of sample_group file is like sample, group1, group2.")
 }

 #refdir
 refdir<-paste(gsub("/$","",opt$ref_rdata_dir),"/",sep="")
 #c2: curated gene sets (rdata file)
 load("./human_c2_v5p2.rdata")
 #c5: GO gene sets (rdata file)
 load("./human_c5_v5p2.rdata")

 if(length(grep("human_c2_v5p2.rdata",dir(refdir)))>0){
 load(paste(refdir,"human_c2_v5p2.rdata",sep=""))
 }else{
 stop("Cannot find human_c2_v5p2.rds in the ref_rdata_dir. Exit!", call.=FALSE)
 }

 if(length(grep("human_c5_v5p2.rdata",dir(refdir)))>0){
 load(paste(refdir,"human_c5_v5p2.rdata",sep=""))
 }else{
 stop("Cannot find human_c5_v5p2.rds in the ref_rdata_dir. Exit!", call.=FALSE)
 }

 ##########################
 #########ID convert#######
@@ -67,8 +83,8 @@ load("./human_c5_v5p2.rdata")

 message("Begin ID conversion.")

 if(length(grep("ID_convert_table.rds",dir()))>0){
 idconvert<-readRDS("./ID_convert_table.rds")
 if(length(grep("ID_convert_table.rds",dir(refdir)))>0){
 idconvert<-readRDS(paste(refdir,"ID_convert_table.rds",sep=""))
 }else{
 stop("Cannot find ID_convert_table.rds in the working folder. Exit!", call.=FALSE)
 }
@@ -122,33 +138,36 @@ logfc<-logfc[order(-logfc)]
 #GSEA in GO term
 fgseaRes.c5 <- fgsea(Hs.c5, logfc, minSize=15, maxSize = 500, nperm=1000)
 c5sig<-fgseaRes.c5[fgseaRes.c5$padj<opt$padjvalueCutoff,]
 c5sig<-c5sig[order(c5sig$pval),]
 c5sig<-data.frame(c5sig)
 c5sig$leadingEdge<-sapply(c5sig$leadingEdge,function(x){paste0(unlist(x),collapse=", ")})

 if(nrow(c5sig)==0){
 message(paste("No significant GO term is identified in group ",nam,".",sep=""))
 }else{
 message(paste(nrow(c5sig)," significant GO term(s) is(are) identified in group ",nam,".",sep=""))

 c5sig<-c5sig[order(c5sig$pval),]
 c5sig<-data.frame(c5sig)
 c5sig$leadingEdge<-sapply(c5sig$leadingEdge,function(x){paste0(unlist(x),collapse=", ")})

 c5sigall<-rbind(c5sigall,cbind(versus,c5sig))
 }
 #GSEA in curated gene sets

 fgseaRes.c2 <- fgsea(Hs.c2, logfc, minSize=15, maxSize = 500, nperm=1000)
 c2sig<-fgseaRes.c2[fgseaRes.c2$padj<opt$padjvalueCutoff,]
 c2sig<-c2sig[order(c2sig$pval),]
 c2sig<-data.frame(c2sig)
 c2sig$leadingEdge<-sapply(c2sig$leadingEdge,function(x){paste0(unlist(x),collapse=", ")})

 if(nrow(c2sig)==0){
 message(paste("No significant curated gene sets is identified in group ",nam,".",sep=""))
 }else{
 message(paste(nrow(c2sig)," significant curated gene sets are identified in group ",nam,".",sep=""))

 c2sig<-c2sig[order(c2sig$pval),]
 c2sig<-data.frame(c2sig)
 c2sig$leadingEdge<-sapply(c2sig$leadingEdge,function(x){paste0(unlist(x),collapse=", ")})
 c2sigall<-rbind(c2sigall,cbind(versus,c2sig))
 }
 }
 }

 if(nrow(c5sigall)==0){
 if(length(c5sigall)==0){
 message("No significant GO term is identified.")
 }else{
 c5sigall$pval<-signif(c5sigall$pval,4)
@@ -159,7 +178,7 @@ rownames(c5sigall)<-c(1:nrow(c5sigall))
 write.csv(c5sigall,paste(out_dir,opt$project_code,"_gsea_go.csv",sep=""))
 }

 if(nrow(c2sigall)==0){
 if(length(c2sigall)==0){
 message("No significant GO term is identified.")
 }else{
 c2sigall$pval<-signif(c2sigall$pval,4)