瀏覽代碼

上传文件至 '.'

master
yingyu 6 年之前
父節點
當前提交
506e33ea32
共有 4 個文件被更改,包括 45 次插入23 次删除
  1. +3
    -2
      RNAseq_2_pca.R
  2. +3
    -2
      RNAseq_3_cor.R
  3. +4
    -3
      RNAseq_4_pwDEG.R
  4. +35
    -16
      RNAseq_5_pwGSEA.R

+ 3
- 2
RNAseq_2_pca.R 查看文件

# Rscript RNAseq_2_pca.R -o /home/yuying/rnaseqreport_test -i ballgown_geneexp_log2fpkm_floor0p01_c3r58395_2019-04-29.txt -g group1.txt -p organoid # Rscript RNAseq_2_pca.R -o /home/yuying/rnaseqreport_test -i ballgown_geneexp_log2fpkm_floor0p01_c3r58395_2019-04-29.txt -g group1.txt -p organoid


suppressPackageStartupMessages(library("optparse")) suppressPackageStartupMessages(library("optparse"))
suppressPackageStartupMessages(library("data.table"))


# specify our desired options in a list # specify our desired options in a list
# by default OptionParser will add an help option equivalent to # by default OptionParser will add an help option equivalent to
make_option(c("-i", "--input"),type="character", default=NULL, make_option(c("-i", "--input"),type="character", default=NULL,
help="The input expression files. required!"), help="The input expression files. required!"),
make_option(c("-g", "--sample_group"),type="character", default=NULL, make_option(c("-g", "--sample_group"),type="character", default=NULL,
help="File for sample group infomation. The input file containing sample name and group infomation. note colname must be like: sample group1 group2... "),
help="File in tab-delimited format for sample group infomation. The input file containing sample name and group infomation. note colname must be like: sample group1 group2... "),
make_option(c("-p", "--project_code"), type="character",default="rnaseq", make_option(c("-p", "--project_code"), type="character",default="rnaseq",
help="Project code, which is used as prefix of output file. [default: rnaseq]") help="Project code, which is used as prefix of output file. [default: rnaseq]")
) )


##import exp file ##import exp file
out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="") out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")
logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1)
logexpr<-fread(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F,data.table=F)


#check exp file is log scale #check exp file is log scale
if(max(logexpr[,1])-min(logexpr[,1])>100){ if(max(logexpr[,1])-min(logexpr[,1])>100){

+ 3
- 2
RNAseq_3_cor.R 查看文件

# Rscript RNAseq_3_cor.R -o /home/yuying/rnaseqreport_test -i ballgown_geneexp_log2fpkm_floor0p01_c3r58395_2019-04-29.txt -g group1.txt -p organoid # Rscript RNAseq_3_cor.R -o /home/yuying/rnaseqreport_test -i ballgown_geneexp_log2fpkm_floor0p01_c3r58395_2019-04-29.txt -g group1.txt -p organoid


suppressPackageStartupMessages(library("optparse")) suppressPackageStartupMessages(library("optparse"))
suppressPackageStartupMessages(library("data.table"))


# specify our desired options in a list # specify our desired options in a list
# by default OptionParser will add an help option equivalent to # by default OptionParser will add an help option equivalent to
make_option(c("-i", "--input"),type="character", default=NULL, make_option(c("-i", "--input"),type="character", default=NULL,
help="The input expression files. required!"), help="The input expression files. required!"),
make_option(c("-g", "--sample_group"),type="character", default=NULL, make_option(c("-g", "--sample_group"),type="character", default=NULL,
help="File for sample group infomation.The input file containing sample name and group infomation. note colname must be like: sample group1 group2... "),
help="File in tab-delimited format for sample group infomation. The input file containing sample name and group infomation. note colname must be like: sample group1 group2... "),
make_option(c("-p", "--project_code"), type="character",default="rnaseq", make_option(c("-p", "--project_code"), type="character",default="rnaseq",
help="Project code, which is used as prefix of output file. [default: rnaseq]") help="Project code, which is used as prefix of output file. [default: rnaseq]")
) )


##import exp file ##import exp file
out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="") out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")
logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1)
logexpr<-fread(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F,data.table=F)


#check exp file is log scale #check exp file is log scale
if(max(logexpr[,1])-min(logexpr[,1])>100){ if(max(logexpr[,1])-min(logexpr[,1])>100){

+ 4
- 3
RNAseq_4_pwDEG.R 查看文件

# choppy report script like : @scatter-plot(dataFile='/mnt/c/Users/YY/Documents/working/choppy_report/data/zhanggroup_P1-6vsP7-13_choppy_scatterplot_degs.rds', dataType='rds', xAxis='log2FC', xTitle="log2FC",yAxis='log10p',yTitle="-log10 (p)") # choppy report script like : @scatter-plot(dataFile='/mnt/c/Users/YY/Documents/working/choppy_report/data/zhanggroup_P1-6vsP7-13_choppy_scatterplot_degs.rds', dataType='rds', xAxis='log2FC', xTitle="log2FC",yAxis='log10p',yTitle="-log10 (p)")


suppressPackageStartupMessages(library("optparse")) suppressPackageStartupMessages(library("optparse"))
suppressPackageStartupMessages(library("data.table"))


# specify our desired options in a list # specify our desired options in a list
# by default OptionParser will add an help option equivalent to # by default OptionParser will add an help option equivalent to
make_option(c("-i", "--input"),type="character", default=NULL, make_option(c("-i", "--input"),type="character", default=NULL,
help="The input expression files. Required!"), help="The input expression files. Required!"),
make_option(c("-g", "--sample_group"),type="character", default=NULL, make_option(c("-g", "--sample_group"),type="character", default=NULL,
help="File for sample group infomation.The input file containing sample name and group infomation. note colname must be like: sample group1 group2... Required! "),
help="File in tab-delimited format for sample group infomation. The input file containing sample name and group infomation. note colname must be like: sample group1 group2... Required! "),
make_option(c("-p", "--project_code"), type="character",default="rnaseq", make_option(c("-p", "--project_code"), type="character",default="rnaseq",
help="Project code, which is used as prefix of output file. [default: rnaseq]"), help="Project code, which is used as prefix of output file. [default: rnaseq]"),
make_option(c("-a", "--output_all_genes"), metavar="FALSE", default=FALSE, make_option(c("-a", "--output_all_genes"), metavar="FALSE", default=FALSE,


##import files ##import files
out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="") out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")
logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1)
logexpr<-fread(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F,data.table=F)


#check exp file is log scale #check exp file is log scale
if(max(logexpr[,1])-min(logexpr[,1])>100){ if(max(logexpr[,1])-min(logexpr[,1])>100){
######## ########
write.csv(degstat,paste(out_dir,opt$project_code,"_degs_stats.csv",sep=""),row.names=F) write.csv(degstat,paste(out_dir,opt$project_code,"_degs_stats.csv",sep=""),row.names=F)
saveRDS(degstat,paste(out_dir,opt$project_code,"_degs_stats.rds",sep="")) saveRDS(degstat,paste(out_dir,opt$project_code,"_degs_stats.rds",sep=""))
message("RNAseq_4_pwDEG.R finished!")
} }

+ 35
- 16
RNAseq_5_pwGSEA.R 查看文件



suppressPackageStartupMessages(library("optparse")) suppressPackageStartupMessages(library("optparse"))
suppressPackageStartupMessages(library("fgsea")) suppressPackageStartupMessages(library("fgsea"))
suppressPackageStartupMessages(library("data.table"))


# specify our desired options in a list # specify our desired options in a list
# by default OptionParser will add an help option equivalent to # by default OptionParser will add an help option equivalent to
make_option(c("-e", "--type_gene_id"),type="character", default="EnsemblGID", make_option(c("-e", "--type_gene_id"),type="character", default="EnsemblGID",
help="The type of gene symbol. Could be either of EnsemblGID/EntrezID/GeneSymbol [default: EnsemblGID]"), help="The type of gene symbol. Could be either of EnsemblGID/EntrezID/GeneSymbol [default: EnsemblGID]"),
make_option(c("-g", "--sample_group"),type="character", default=NULL, make_option(c("-g", "--sample_group"),type="character", default=NULL,
help="File for sample group infomation.The input file containing sample name and group infomation. note colname must be like: sample group1 group2... Required! "),
help="File in tab-delimited format for sample group infomation. The input file containing sample name and group infomation. note colname must be like: sample group1 group2... Required! "),
make_option(c("-q", "--padjvalueCutoff"), type="double",default=0.2,metavar="number", make_option(c("-q", "--padjvalueCutoff"), type="double",default=0.2,metavar="number",
help="Cutoff value of adjusted p value. [default: 0.2]"), help="Cutoff value of adjusted p value. [default: 0.2]"),
make_option(c("-p", "--project_code"), type="character",default="rnaseq", make_option(c("-p", "--project_code"), type="character",default="rnaseq",
help="Project code, which is used as prefix of output file. [default: rnaseq]")
help="Project code, which is used as prefix of output file. [default: rnaseq]"),
make_option(c("-d", "--ref_rdata_dir"), type="character",default="./",
help="The directory of reference files: human_c2_v5p2.rdata, human_c5_v5p2.rdata and ID_convert_table.rds. [default: ./]")
) )


# get command line options, if help option encountered print help and exit, # get command line options, if help option encountered print help and exit,


##import file ##import file
out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="") out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")
logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1)
logexpr<-fread(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F,data.table=F)


#check exp file is log scale #check exp file is log scale
if(max(logexpr[,1])-min(logexpr[,1])>100){ if(max(logexpr[,1])-min(logexpr[,1])>100){
if(length(grep("group",colnames(sample_group)))==0){ if(length(grep("group",colnames(sample_group)))==0){
stop("No group is identified in sample_group file. Make sure the head of sample_group file is like sample, group1, group2.") stop("No group is identified in sample_group file. Make sure the head of sample_group file is like sample, group1, group2.")
} }

#refdir
refdir<-paste(gsub("/$","",opt$ref_rdata_dir),"/",sep="")
#c2: curated gene sets (rdata file) #c2: curated gene sets (rdata file)
load("./human_c2_v5p2.rdata")
#c5: GO gene sets (rdata file) #c5: GO gene sets (rdata file)
load("./human_c5_v5p2.rdata")

if(length(grep("human_c2_v5p2.rdata",dir(refdir)))>0){
load(paste(refdir,"human_c2_v5p2.rdata",sep=""))
}else{
stop("Cannot find human_c2_v5p2.rds in the ref_rdata_dir. Exit!", call.=FALSE)
}

if(length(grep("human_c5_v5p2.rdata",dir(refdir)))>0){
load(paste(refdir,"human_c5_v5p2.rdata",sep=""))
}else{
stop("Cannot find human_c5_v5p2.rds in the ref_rdata_dir. Exit!", call.=FALSE)
}


########################## ##########################
#########ID convert####### #########ID convert#######


message("Begin ID conversion.") message("Begin ID conversion.")


if(length(grep("ID_convert_table.rds",dir()))>0){
idconvert<-readRDS("./ID_convert_table.rds")
if(length(grep("ID_convert_table.rds",dir(refdir)))>0){
idconvert<-readRDS(paste(refdir,"ID_convert_table.rds",sep=""))
}else{ }else{
stop("Cannot find ID_convert_table.rds in the working folder. Exit!", call.=FALSE) stop("Cannot find ID_convert_table.rds in the working folder. Exit!", call.=FALSE)
} }
#GSEA in GO term #GSEA in GO term
fgseaRes.c5 <- fgsea(Hs.c5, logfc, minSize=15, maxSize = 500, nperm=1000) fgseaRes.c5 <- fgsea(Hs.c5, logfc, minSize=15, maxSize = 500, nperm=1000)
c5sig<-fgseaRes.c5[fgseaRes.c5$padj<opt$padjvalueCutoff,] c5sig<-fgseaRes.c5[fgseaRes.c5$padj<opt$padjvalueCutoff,]
c5sig<-c5sig[order(c5sig$pval),]
c5sig<-data.frame(c5sig)
c5sig$leadingEdge<-sapply(c5sig$leadingEdge,function(x){paste0(unlist(x),collapse=", ")})

if(nrow(c5sig)==0){ if(nrow(c5sig)==0){
message(paste("No significant GO term is identified in group ",nam,".",sep="")) message(paste("No significant GO term is identified in group ",nam,".",sep=""))
}else{ }else{
message(paste(nrow(c5sig)," significant GO term(s) is(are) identified in group ",nam,".",sep="")) message(paste(nrow(c5sig)," significant GO term(s) is(are) identified in group ",nam,".",sep=""))

c5sig<-c5sig[order(c5sig$pval),]
c5sig<-data.frame(c5sig)
c5sig$leadingEdge<-sapply(c5sig$leadingEdge,function(x){paste0(unlist(x),collapse=", ")})

c5sigall<-rbind(c5sigall,cbind(versus,c5sig)) c5sigall<-rbind(c5sigall,cbind(versus,c5sig))
} }
#GSEA in curated gene sets #GSEA in curated gene sets


fgseaRes.c2 <- fgsea(Hs.c2, logfc, minSize=15, maxSize = 500, nperm=1000) fgseaRes.c2 <- fgsea(Hs.c2, logfc, minSize=15, maxSize = 500, nperm=1000)
c2sig<-fgseaRes.c2[fgseaRes.c2$padj<opt$padjvalueCutoff,] c2sig<-fgseaRes.c2[fgseaRes.c2$padj<opt$padjvalueCutoff,]
c2sig<-c2sig[order(c2sig$pval),]
c2sig<-data.frame(c2sig)
c2sig$leadingEdge<-sapply(c2sig$leadingEdge,function(x){paste0(unlist(x),collapse=", ")})

if(nrow(c2sig)==0){ if(nrow(c2sig)==0){
message(paste("No significant curated gene sets is identified in group ",nam,".",sep="")) message(paste("No significant curated gene sets is identified in group ",nam,".",sep=""))
}else{ }else{
message(paste(nrow(c2sig)," significant curated gene sets are identified in group ",nam,".",sep="")) message(paste(nrow(c2sig)," significant curated gene sets are identified in group ",nam,".",sep=""))

c2sig<-c2sig[order(c2sig$pval),]
c2sig<-data.frame(c2sig)
c2sig$leadingEdge<-sapply(c2sig$leadingEdge,function(x){paste0(unlist(x),collapse=", ")})
c2sigall<-rbind(c2sigall,cbind(versus,c2sig)) c2sigall<-rbind(c2sigall,cbind(versus,c2sig))
} }
} }
} }


if(nrow(c5sigall)==0){
if(length(c5sigall)==0){
message("No significant GO term is identified.") message("No significant GO term is identified.")
}else{ }else{
c5sigall$pval<-signif(c5sigall$pval,4) c5sigall$pval<-signif(c5sigall$pval,4)
write.csv(c5sigall,paste(out_dir,opt$project_code,"_gsea_go.csv",sep="")) write.csv(c5sigall,paste(out_dir,opt$project_code,"_gsea_go.csv",sep=""))
} }


if(nrow(c2sigall)==0){
if(length(c2sigall)==0){
message("No significant GO term is identified.") message("No significant GO term is identified.")
}else{ }else{
c2sigall$pval<-signif(c2sigall$pval,4) c2sigall$pval<-signif(c2sigall$pval,4)

Loading…
取消
儲存