Browse Source

上传文件至 '.'

master
yingyu 6 years ago
parent
commit
506e33ea32
4 changed files with 45 additions and 23 deletions
  1. +3
    -2
      RNAseq_2_pca.R
  2. +3
    -2
      RNAseq_3_cor.R
  3. +4
    -3
      RNAseq_4_pwDEG.R
  4. +35
    -16
      RNAseq_5_pwGSEA.R

+ 3
- 2
RNAseq_2_pca.R View File

@@ -4,6 +4,7 @@
# Rscript RNAseq_2_pca.R -o /home/yuying/rnaseqreport_test -i ballgown_geneexp_log2fpkm_floor0p01_c3r58395_2019-04-29.txt -g group1.txt -p organoid

suppressPackageStartupMessages(library("optparse"))
suppressPackageStartupMessages(library("data.table"))

# specify our desired options in a list
# by default OptionParser will add an help option equivalent to
@@ -16,7 +17,7 @@ option_list <- list(
make_option(c("-i", "--input"),type="character", default=NULL,
help="The input expression files. required!"),
make_option(c("-g", "--sample_group"),type="character", default=NULL,
help="File for sample group infomation. The input file containing sample name and group infomation. note colname must be like: sample group1 group2... "),
help="File in tab-delimited format for sample group infomation. The input file containing sample name and group infomation. note colname must be like: sample group1 group2... "),
make_option(c("-p", "--project_code"), type="character",default="rnaseq",
help="Project code, which is used as prefix of output file. [default: rnaseq]")
)
@@ -32,7 +33,7 @@ if (is.null(opt$input)){

##import exp file
out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")
logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1)
logexpr<-fread(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F,data.table=F)

#check exp file is log scale
if(max(logexpr[,1])-min(logexpr[,1])>100){

+ 3
- 2
RNAseq_3_cor.R View File

@@ -4,6 +4,7 @@
# Rscript RNAseq_3_cor.R -o /home/yuying/rnaseqreport_test -i ballgown_geneexp_log2fpkm_floor0p01_c3r58395_2019-04-29.txt -g group1.txt -p organoid

suppressPackageStartupMessages(library("optparse"))
suppressPackageStartupMessages(library("data.table"))

# specify our desired options in a list
# by default OptionParser will add an help option equivalent to
@@ -16,7 +17,7 @@ option_list <- list(
make_option(c("-i", "--input"),type="character", default=NULL,
help="The input expression files. required!"),
make_option(c("-g", "--sample_group"),type="character", default=NULL,
help="File for sample group infomation.The input file containing sample name and group infomation. note colname must be like: sample group1 group2... "),
help="File in tab-delimited format for sample group infomation. The input file containing sample name and group infomation. note colname must be like: sample group1 group2... "),
make_option(c("-p", "--project_code"), type="character",default="rnaseq",
help="Project code, which is used as prefix of output file. [default: rnaseq]")
)
@@ -32,7 +33,7 @@ if (is.null(opt$input)){

##import exp file
out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")
logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1)
logexpr<-fread(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F,data.table=F)

#check exp file is log scale
if(max(logexpr[,1])-min(logexpr[,1])>100){

+ 4
- 3
RNAseq_4_pwDEG.R View File

@@ -6,6 +6,7 @@
# choppy report script like : @scatter-plot(dataFile='/mnt/c/Users/YY/Documents/working/choppy_report/data/zhanggroup_P1-6vsP7-13_choppy_scatterplot_degs.rds', dataType='rds', xAxis='log2FC', xTitle="log2FC",yAxis='log10p',yTitle="-log10 (p)")

suppressPackageStartupMessages(library("optparse"))
suppressPackageStartupMessages(library("data.table"))

# specify our desired options in a list
# by default OptionParser will add an help option equivalent to
@@ -18,7 +19,7 @@ option_list <- list(
make_option(c("-i", "--input"),type="character", default=NULL,
help="The input expression files. Required!"),
make_option(c("-g", "--sample_group"),type="character", default=NULL,
help="File for sample group infomation.The input file containing sample name and group infomation. note colname must be like: sample group1 group2... Required! "),
help="File in tab-delimited format for sample group infomation. The input file containing sample name and group infomation. note colname must be like: sample group1 group2... Required! "),
make_option(c("-p", "--project_code"), type="character",default="rnaseq",
help="Project code, which is used as prefix of output file. [default: rnaseq]"),
make_option(c("-a", "--output_all_genes"), metavar="FALSE", default=FALSE,
@@ -42,7 +43,7 @@ if (is.null(opt$sample_group)){

##import files
out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")
logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1)
logexpr<-fread(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F,data.table=F)

#check exp file is log scale
if(max(logexpr[,1])-min(logexpr[,1])>100){
@@ -161,5 +162,5 @@ degstat$number<-as.numeric(as.character(degstat$number))
########
write.csv(degstat,paste(out_dir,opt$project_code,"_degs_stats.csv",sep=""),row.names=F)
saveRDS(degstat,paste(out_dir,opt$project_code,"_degs_stats.rds",sep=""))
message("RNAseq_4_pwDEG.R finished!")
}

+ 35
- 16
RNAseq_5_pwGSEA.R View File

@@ -5,6 +5,7 @@

suppressPackageStartupMessages(library("optparse"))
suppressPackageStartupMessages(library("fgsea"))
suppressPackageStartupMessages(library("data.table"))

# specify our desired options in a list
# by default OptionParser will add an help option equivalent to
@@ -21,11 +22,13 @@ option_list <- list(
make_option(c("-e", "--type_gene_id"),type="character", default="EnsemblGID",
help="The type of gene symbol. Could be either of EnsemblGID/EntrezID/GeneSymbol [default: EnsemblGID]"),
make_option(c("-g", "--sample_group"),type="character", default=NULL,
help="File for sample group infomation.The input file containing sample name and group infomation. note colname must be like: sample group1 group2... Required! "),
help="File in tab-delimited format for sample group infomation. The input file containing sample name and group infomation. note colname must be like: sample group1 group2... Required! "),
make_option(c("-q", "--padjvalueCutoff"), type="double",default=0.2,metavar="number",
help="Cutoff value of adjusted p value. [default: 0.2]"),
make_option(c("-p", "--project_code"), type="character",default="rnaseq",
help="Project code, which is used as prefix of output file. [default: rnaseq]")
help="Project code, which is used as prefix of output file. [default: rnaseq]"),
make_option(c("-d", "--ref_rdata_dir"), type="character",default="./",
help="The directory of reference files: human_c2_v5p2.rdata, human_c5_v5p2.rdata and ID_convert_table.rds. [default: ./]")
)

# get command line options, if help option encountered print help and exit,
@@ -43,7 +46,7 @@ if (is.null(opt$sample_group)){

##import file
out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")
logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1)
logexpr<-fread(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F,data.table=F)

#check exp file is log scale
if(max(logexpr[,1])-min(logexpr[,1])>100){
@@ -56,10 +59,23 @@ sample_group<-read.table(opt$sample_group,sep="\t",header=T)
if(length(grep("group",colnames(sample_group)))==0){
stop("No group is identified in sample_group file. Make sure the head of sample_group file is like sample, group1, group2.")
}

#refdir
refdir<-paste(gsub("/$","",opt$ref_rdata_dir),"/",sep="")
#c2: curated gene sets (rdata file)
load("./human_c2_v5p2.rdata")
#c5: GO gene sets (rdata file)
load("./human_c5_v5p2.rdata")

if(length(grep("human_c2_v5p2.rdata",dir(refdir)))>0){
load(paste(refdir,"human_c2_v5p2.rdata",sep=""))
}else{
stop("Cannot find human_c2_v5p2.rds in the ref_rdata_dir. Exit!", call.=FALSE)
}

if(length(grep("human_c5_v5p2.rdata",dir(refdir)))>0){
load(paste(refdir,"human_c5_v5p2.rdata",sep=""))
}else{
stop("Cannot find human_c5_v5p2.rds in the ref_rdata_dir. Exit!", call.=FALSE)
}

##########################
#########ID convert#######
@@ -67,8 +83,8 @@ load("./human_c5_v5p2.rdata")

message("Begin ID conversion.")

if(length(grep("ID_convert_table.rds",dir()))>0){
idconvert<-readRDS("./ID_convert_table.rds")
if(length(grep("ID_convert_table.rds",dir(refdir)))>0){
idconvert<-readRDS(paste(refdir,"ID_convert_table.rds",sep=""))
}else{
stop("Cannot find ID_convert_table.rds in the working folder. Exit!", call.=FALSE)
}
@@ -122,33 +138,36 @@ logfc<-logfc[order(-logfc)]
#GSEA in GO term
fgseaRes.c5 <- fgsea(Hs.c5, logfc, minSize=15, maxSize = 500, nperm=1000)
c5sig<-fgseaRes.c5[fgseaRes.c5$padj<opt$padjvalueCutoff,]
c5sig<-c5sig[order(c5sig$pval),]
c5sig<-data.frame(c5sig)
c5sig$leadingEdge<-sapply(c5sig$leadingEdge,function(x){paste0(unlist(x),collapse=", ")})

if(nrow(c5sig)==0){
message(paste("No significant GO term is identified in group ",nam,".",sep=""))
}else{
message(paste(nrow(c5sig)," significant GO term(s) is(are) identified in group ",nam,".",sep=""))

c5sig<-c5sig[order(c5sig$pval),]
c5sig<-data.frame(c5sig)
c5sig$leadingEdge<-sapply(c5sig$leadingEdge,function(x){paste0(unlist(x),collapse=", ")})

c5sigall<-rbind(c5sigall,cbind(versus,c5sig))
}
#GSEA in curated gene sets

fgseaRes.c2 <- fgsea(Hs.c2, logfc, minSize=15, maxSize = 500, nperm=1000)
c2sig<-fgseaRes.c2[fgseaRes.c2$padj<opt$padjvalueCutoff,]
c2sig<-c2sig[order(c2sig$pval),]
c2sig<-data.frame(c2sig)
c2sig$leadingEdge<-sapply(c2sig$leadingEdge,function(x){paste0(unlist(x),collapse=", ")})

if(nrow(c2sig)==0){
message(paste("No significant curated gene sets is identified in group ",nam,".",sep=""))
}else{
message(paste(nrow(c2sig)," significant curated gene sets are identified in group ",nam,".",sep=""))

c2sig<-c2sig[order(c2sig$pval),]
c2sig<-data.frame(c2sig)
c2sig$leadingEdge<-sapply(c2sig$leadingEdge,function(x){paste0(unlist(x),collapse=", ")})
c2sigall<-rbind(c2sigall,cbind(versus,c2sig))
}
}
}

if(nrow(c5sigall)==0){
if(length(c5sigall)==0){
message("No significant GO term is identified.")
}else{
c5sigall$pval<-signif(c5sigall$pval,4)
@@ -159,7 +178,7 @@ rownames(c5sigall)<-c(1:nrow(c5sigall))
write.csv(c5sigall,paste(out_dir,opt$project_code,"_gsea_go.csv",sep=""))
}

if(nrow(c2sigall)==0){
if(length(c2sigall)==0){
message("No significant GO term is identified.")
}else{
c2sigall$pval<-signif(c2sigall$pval,4)

Loading…
Cancel
Save