vor 6 Jahren · e37fcb7190
--- a/RNAseq_2_pca.R
+++ b/RNAseq_2_pca.R
@@ -33,7 +33,7 @@ if (is.null(opt$input)){

 ##import exp file
 out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")
 logexpr<-fread(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F,data.table=F)
 logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F)

 #check exp file is log scale
 if(max(logexpr[,1])-min(logexpr[,1])>100){
--- a/RNAseq_3_cor.R
+++ b/RNAseq_3_cor.R
@@ -4,7 +4,6 @@
 # Rscript RNAseq_3_cor.R -o /home/yuying/rnaseqreport_test -i ballgown_geneexp_log2fpkm_floor0p01_c3r58395_2019-04-29.txt  -g group1.txt -p organoid 

 suppressPackageStartupMessages(library("optparse"))
 suppressPackageStartupMessages(library("data.table"))

 # specify our desired options in a list
 # by default OptionParser will add an help option equivalent to 
@@ -33,7 +32,7 @@ if (is.null(opt$input)){

 ##import exp file
 out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")
 logexpr<-fread(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F,data.table=F)
 logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F)

 #check exp file is log scale
 if(max(logexpr[,1])-min(logexpr[,1])>100){
--- a/RNAseq_4_pwDEG.R
+++ b/RNAseq_4_pwDEG.R
@@ -6,7 +6,6 @@
 # choppy report script like : @scatter-plot(dataFile='/mnt/c/Users/YY/Documents/working/choppy_report/data/zhanggroup_P1-6vsP7-13_choppy_scatterplot_degs.rds', dataType='rds', xAxis='log2FC', xTitle="log2FC",yAxis='log10p',yTitle="-log10 (p)")

 suppressPackageStartupMessages(library("optparse"))
 suppressPackageStartupMessages(library("data.table"))

 # specify our desired options in a list
 # by default OptionParser will add an help option equivalent to 
@@ -43,7 +42,7 @@ if (is.null(opt$sample_group)){

 ##import files
 out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")
 logexpr<-fread(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F,data.table=F)
 logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F)

 #check exp file is log scale
 if(max(logexpr[,1])-min(logexpr[,1])>100){
--- a/RNAseq_5_pwGSEA.R
+++ b/RNAseq_5_pwGSEA.R
@@ -5,7 +5,6 @@

 suppressPackageStartupMessages(library("optparse"))
 suppressPackageStartupMessages(library("fgsea"))
 suppressPackageStartupMessages(library("data.table"))

 # specify our desired options in a list
 # by default OptionParser will add an help option equivalent to 
@@ -46,7 +45,7 @@ if (is.null(opt$sample_group)){

 ##import file
 out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")
 logexpr<-fread(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F,data.table=F)
 logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F)

 #check exp file is log scale
 if(max(logexpr[,1])-min(logexpr[,1])>100){
--- a/RNAseq_6_enrichfunc.R
+++ b/RNAseq_6_enrichfunc.R
@@ -1,7 +1,7 @@
 #!/usr/bin/env Rscript
 ###Copyright 2019 Ying Yu from Fudan-PGx group 
 # example:
 # Rscript RNAseq_6_enrichfunc.R -o /home/yuying/rnaseqreport_test -i ballgown_geneexp_log2fpkm_floor0p01_c3r58395_2019-04-29.txt  -g group1.txt -p organoid 
 # Rscript RNAseq_6_enrichfunc.R -i rnaseq_degs_acrossgroups.csv

 suppressPackageStartupMessages(library("optparse"))
 suppressPackageStartupMessages(library("clusterProfiler"))
@@ -39,16 +39,19 @@ if (is.null(opt$input)){
  stop("At least one argument must be supplied (input file).", call.=FALSE)
 }

 message("Need to connected to the Internet.")

 ##import file
 out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")
 gene<-read.csv(opt$input,header=T,stringsAsFactors=F)


 ##########################
 #########ID convert#######
 ##########################
 if(length(grep("ID_convert_table.rds",dir()))>0){

 message("Begin ID conversion.")

 if(length(grep("ID_convert_table.rds",dir()))>0){
 idconvert<-readRDS("ID_convert_table.rds")
 }else{
 stop("Cannot find ID_convert_table.rds in the working folder. Exit!", call.=FALSE)
@@ -72,6 +75,8 @@ if(opt$type_gene_id=="EntrezID"){
 gene$EntrezID<-gene[,1]
 }

 message("Finish ID conversion.")

 ##########################
 #########Enrich GO#######
 ##########################
@@ -91,8 +96,10 @@ g1<-gene$EntrezID
 g1<-g1[!g1==""]
 #conduct enrichment

 message("Contucting enrichment analysis on GO terms...")
 ego<-data.frame(enrichGO(g1, 'org.Hs.eg.db', ont = 'ALL', pvalueCutoff = opt$pvalueCutoff, pAdjustMethod = opt$pAdjustMethod, qvalueCutoff = opt$qvalueCutoff))
 

 message("Contucting enrichment analysis on KEGG pathways...")
 ekg<- data.frame( enrichKEGG(g1, organism = "hsa", keyType = "kegg", pvalueCutoff = opt$pvalueCutoff, pAdjustMethod = opt$pAdjustMethod, qvalueCutoff = opt$qvalueCutoff))
  
 #modify output
@@ -109,34 +116,47 @@ ekeggall<-rbind(ekeggall,ekg1)
 }

 }else{

 for (i in 1:length(groupn)){

 message(paste("Group ", groupn[i],sep=""))

 g1<-gene$EntrezID[gene[,2]==groupn[i]]
 g1<-g1[!g1==""]
 #conduct enrichment

 message("Contucting enrichment analysis on GO terms...")
 ego<-data.frame(enrichGO(g1, 'org.Hs.eg.db', ont = 'ALL', pvalueCutoff = opt$pvalueCutoff, pAdjustMethod = opt$pAdjustMethod, qvalueCutoff = opt$qvalueCutoff))

 message("Contucting enrichment analysis on KEGG pathways...")
 ekg<- data.frame( enrichKEGG(g1, organism = "hsa", keyType = "kegg", pvalueCutoff = opt$pvalueCutoff, pAdjustMethod = opt$pAdjustMethod, qvalueCutoff = opt$qvalueCutoff))

  if(!nrow(ego)==0){
 ego1<-cbind(groupn[i],ego)
 colnames(ego1)[1]<-c("versus")
 egoall<-rbind(egoall,ego1)
 message(paste(nrow(ego),"significant GO term(s) is(are) identified."))
 }else{
 message("No significant GO term is identified.")
 }

 if(!nrow(ekg)==0){
 ekg1<-cbind(groupn[i],ekg)
 colnames(ekg1)[1]<-c("versus")
 ekeggall<-rbind(ekeggall,ekg1)
 message(paste(nrow(ekg),"significant KEGG pathway(s) is(are) identified."))
 }else{
 message("No significant KEGG pathway is identified.")
 }
 }
 }
 message("Wrinting output...")

 #write output
 if(nrow(egoall)==0){
 message("No significant GO term is identified.")
 message("No significant GO term is identified across all tested groups.")
 }else{
 message(paste(nrow(egoall),"significant GO term(s) is(are) identified."))
 message(paste(nrow(egoall),"significant GO term(s) is(are) identified across all tested groups."))
 rownames(egoall)<-c(1:nrow(egoall))
 egoall$pvalue<-signif(egoall$pvalue,4)
 egoall$p.adjust<-signif(egoall$p.adjust,4)
@@ -145,9 +165,9 @@ write.csv(egoall,paste(out_dir,opt$project_code,"_GOenrich.csv",sep=""))
 }

 if(nrow(ekeggall)==0){
 message("No significant KEGG pathway is identified.")
 message("No significant KEGG pathway is identified across all tested groups.")
 }else{
 message(paste(nrow(ekeggall),"significant KEGG pathway(s) is(are) identified."))
 message(paste(nrow(ekeggall),"significant KEGG pathway(s) is(are) identified across all tested groups."))
 rownames(ekeggall)<-c(1:nrow(ekeggall))
 ekeggall$pvalue<-signif(ekeggall$pvalue,4)
 ekeggall$p.adjust<-signif(ekeggall$p.adjust,4)