# Rscript RNAseq_2_pca.R -o /home/yuying/rnaseqreport_test -i ballgown_geneexp_log2fpkm_floor0p01_c3r58395_2019-04-29.txt -g group1.txt -p organoid | # Rscript RNAseq_2_pca.R -o /home/yuying/rnaseqreport_test -i ballgown_geneexp_log2fpkm_floor0p01_c3r58395_2019-04-29.txt -g group1.txt -p organoid | ||||
suppressPackageStartupMessages(library("optparse")) | suppressPackageStartupMessages(library("optparse")) | ||||
suppressPackageStartupMessages(library("data.table")) | |||||
# specify our desired options in a list | # specify our desired options in a list | ||||
# by default OptionParser will add an help option equivalent to | # by default OptionParser will add an help option equivalent to | ||||
make_option(c("-i", "--input"),type="character", default=NULL, | make_option(c("-i", "--input"),type="character", default=NULL, | ||||
help="The input expression files. required!"), | help="The input expression files. required!"), | ||||
make_option(c("-g", "--sample_group"),type="character", default=NULL, | make_option(c("-g", "--sample_group"),type="character", default=NULL, | ||||
help="File for sample group infomation. The input file containing sample name and group infomation. note colname must be like: sample group1 group2... "), | |||||
help="File in tab-delimited format for sample group infomation. The input file containing sample name and group infomation. note colname must be like: sample group1 group2... "), | |||||
make_option(c("-p", "--project_code"), type="character",default="rnaseq", | make_option(c("-p", "--project_code"), type="character",default="rnaseq", | ||||
help="Project code, which is used as prefix of output file. [default: rnaseq]") | help="Project code, which is used as prefix of output file. [default: rnaseq]") | ||||
) | ) | ||||
##import exp file | ##import exp file | ||||
out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="") | out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="") | ||||
logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1) | |||||
logexpr<-fread(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F,data.table=F) | |||||
#check exp file is log scale | #check exp file is log scale | ||||
if(max(logexpr[,1])-min(logexpr[,1])>100){ | if(max(logexpr[,1])-min(logexpr[,1])>100){ |
# Rscript RNAseq_3_cor.R -o /home/yuying/rnaseqreport_test -i ballgown_geneexp_log2fpkm_floor0p01_c3r58395_2019-04-29.txt -g group1.txt -p organoid | # Rscript RNAseq_3_cor.R -o /home/yuying/rnaseqreport_test -i ballgown_geneexp_log2fpkm_floor0p01_c3r58395_2019-04-29.txt -g group1.txt -p organoid | ||||
suppressPackageStartupMessages(library("optparse")) | suppressPackageStartupMessages(library("optparse")) | ||||
suppressPackageStartupMessages(library("data.table")) | |||||
# specify our desired options in a list | # specify our desired options in a list | ||||
# by default OptionParser will add an help option equivalent to | # by default OptionParser will add an help option equivalent to | ||||
make_option(c("-i", "--input"),type="character", default=NULL, | make_option(c("-i", "--input"),type="character", default=NULL, | ||||
help="The input expression files. required!"), | help="The input expression files. required!"), | ||||
make_option(c("-g", "--sample_group"),type="character", default=NULL, | make_option(c("-g", "--sample_group"),type="character", default=NULL, | ||||
help="File for sample group infomation.The input file containing sample name and group infomation. note colname must be like: sample group1 group2... "), | |||||
help="File in tab-delimited format for sample group infomation. The input file containing sample name and group infomation. note colname must be like: sample group1 group2... "), | |||||
make_option(c("-p", "--project_code"), type="character",default="rnaseq", | make_option(c("-p", "--project_code"), type="character",default="rnaseq", | ||||
help="Project code, which is used as prefix of output file. [default: rnaseq]") | help="Project code, which is used as prefix of output file. [default: rnaseq]") | ||||
) | ) | ||||
##import exp file | ##import exp file | ||||
out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="") | out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="") | ||||
logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1) | |||||
logexpr<-fread(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F,data.table=F) | |||||
#check exp file is log scale | #check exp file is log scale | ||||
if(max(logexpr[,1])-min(logexpr[,1])>100){ | if(max(logexpr[,1])-min(logexpr[,1])>100){ |
# choppy report script like : @scatter-plot(dataFile='/mnt/c/Users/YY/Documents/working/choppy_report/data/zhanggroup_P1-6vsP7-13_choppy_scatterplot_degs.rds', dataType='rds', xAxis='log2FC', xTitle="log2FC",yAxis='log10p',yTitle="-log10 (p)") | # choppy report script like : @scatter-plot(dataFile='/mnt/c/Users/YY/Documents/working/choppy_report/data/zhanggroup_P1-6vsP7-13_choppy_scatterplot_degs.rds', dataType='rds', xAxis='log2FC', xTitle="log2FC",yAxis='log10p',yTitle="-log10 (p)") | ||||
suppressPackageStartupMessages(library("optparse")) | suppressPackageStartupMessages(library("optparse")) | ||||
suppressPackageStartupMessages(library("data.table")) | |||||
# specify our desired options in a list | # specify our desired options in a list | ||||
# by default OptionParser will add an help option equivalent to | # by default OptionParser will add an help option equivalent to | ||||
make_option(c("-i", "--input"),type="character", default=NULL, | make_option(c("-i", "--input"),type="character", default=NULL, | ||||
help="The input expression files. Required!"), | help="The input expression files. Required!"), | ||||
make_option(c("-g", "--sample_group"),type="character", default=NULL, | make_option(c("-g", "--sample_group"),type="character", default=NULL, | ||||
help="File for sample group infomation.The input file containing sample name and group infomation. note colname must be like: sample group1 group2... Required! "), | |||||
help="File in tab-delimited format for sample group infomation. The input file containing sample name and group infomation. note colname must be like: sample group1 group2... Required! "), | |||||
make_option(c("-p", "--project_code"), type="character",default="rnaseq", | make_option(c("-p", "--project_code"), type="character",default="rnaseq", | ||||
help="Project code, which is used as prefix of output file. [default: rnaseq]"), | help="Project code, which is used as prefix of output file. [default: rnaseq]"), | ||||
make_option(c("-a", "--output_all_genes"), metavar="FALSE", default=FALSE, | make_option(c("-a", "--output_all_genes"), metavar="FALSE", default=FALSE, | ||||
##import files | ##import files | ||||
out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="") | out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="") | ||||
logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1) | |||||
logexpr<-fread(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F,data.table=F) | |||||
#check exp file is log scale | #check exp file is log scale | ||||
if(max(logexpr[,1])-min(logexpr[,1])>100){ | if(max(logexpr[,1])-min(logexpr[,1])>100){ | ||||
######## | ######## | ||||
write.csv(degstat,paste(out_dir,opt$project_code,"_degs_stats.csv",sep=""),row.names=F) | write.csv(degstat,paste(out_dir,opt$project_code,"_degs_stats.csv",sep=""),row.names=F) | ||||
saveRDS(degstat,paste(out_dir,opt$project_code,"_degs_stats.rds",sep="")) | saveRDS(degstat,paste(out_dir,opt$project_code,"_degs_stats.rds",sep="")) | ||||
message("RNAseq_4_pwDEG.R finished!") | |||||
} | } |
suppressPackageStartupMessages(library("optparse")) | suppressPackageStartupMessages(library("optparse")) | ||||
suppressPackageStartupMessages(library("fgsea")) | suppressPackageStartupMessages(library("fgsea")) | ||||
suppressPackageStartupMessages(library("data.table")) | |||||
# specify our desired options in a list | # specify our desired options in a list | ||||
# by default OptionParser will add an help option equivalent to | # by default OptionParser will add an help option equivalent to | ||||
make_option(c("-e", "--type_gene_id"),type="character", default="EnsemblGID", | make_option(c("-e", "--type_gene_id"),type="character", default="EnsemblGID", | ||||
help="The type of gene symbol. Could be either of EnsemblGID/EntrezID/GeneSymbol [default: EnsemblGID]"), | help="The type of gene symbol. Could be either of EnsemblGID/EntrezID/GeneSymbol [default: EnsemblGID]"), | ||||
make_option(c("-g", "--sample_group"),type="character", default=NULL, | make_option(c("-g", "--sample_group"),type="character", default=NULL, | ||||
help="File for sample group infomation.The input file containing sample name and group infomation. note colname must be like: sample group1 group2... Required! "), | |||||
help="File in tab-delimited format for sample group infomation. The input file containing sample name and group infomation. note colname must be like: sample group1 group2... Required! "), | |||||
make_option(c("-q", "--padjvalueCutoff"), type="double",default=0.2,metavar="number", | make_option(c("-q", "--padjvalueCutoff"), type="double",default=0.2,metavar="number", | ||||
help="Cutoff value of adjusted p value. [default: 0.2]"), | help="Cutoff value of adjusted p value. [default: 0.2]"), | ||||
make_option(c("-p", "--project_code"), type="character",default="rnaseq", | make_option(c("-p", "--project_code"), type="character",default="rnaseq", | ||||
help="Project code, which is used as prefix of output file. [default: rnaseq]") | |||||
help="Project code, which is used as prefix of output file. [default: rnaseq]"), | |||||
make_option(c("-d", "--ref_rdata_dir"), type="character",default="./", | |||||
help="The directory of reference files: human_c2_v5p2.rdata, human_c5_v5p2.rdata and ID_convert_table.rds. [default: ./]") | |||||
) | ) | ||||
# get command line options, if help option encountered print help and exit, | # get command line options, if help option encountered print help and exit, | ||||
##import file | ##import file | ||||
out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="") | out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="") | ||||
logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1) | |||||
logexpr<-fread(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F,data.table=F) | |||||
#check exp file is log scale | #check exp file is log scale | ||||
if(max(logexpr[,1])-min(logexpr[,1])>100){ | if(max(logexpr[,1])-min(logexpr[,1])>100){ | ||||
if(length(grep("group",colnames(sample_group)))==0){ | if(length(grep("group",colnames(sample_group)))==0){ | ||||
stop("No group is identified in sample_group file. Make sure the head of sample_group file is like sample, group1, group2.") | stop("No group is identified in sample_group file. Make sure the head of sample_group file is like sample, group1, group2.") | ||||
} | } | ||||
#refdir | |||||
refdir<-paste(gsub("/$","",opt$ref_rdata_dir),"/",sep="") | |||||
#c2: curated gene sets (rdata file) | #c2: curated gene sets (rdata file) | ||||
load("./human_c2_v5p2.rdata") | |||||
#c5: GO gene sets (rdata file) | #c5: GO gene sets (rdata file) | ||||
load("./human_c5_v5p2.rdata") | |||||
if(length(grep("human_c2_v5p2.rdata",dir(refdir)))>0){ | |||||
load(paste(refdir,"human_c2_v5p2.rdata",sep="")) | |||||
}else{ | |||||
stop("Cannot find human_c2_v5p2.rds in the ref_rdata_dir. Exit!", call.=FALSE) | |||||
} | |||||
if(length(grep("human_c5_v5p2.rdata",dir(refdir)))>0){ | |||||
load(paste(refdir,"human_c5_v5p2.rdata",sep="")) | |||||
}else{ | |||||
stop("Cannot find human_c5_v5p2.rds in the ref_rdata_dir. Exit!", call.=FALSE) | |||||
} | |||||
########################## | ########################## | ||||
#########ID convert####### | #########ID convert####### | ||||
message("Begin ID conversion.") | message("Begin ID conversion.") | ||||
if(length(grep("ID_convert_table.rds",dir()))>0){ | |||||
idconvert<-readRDS("./ID_convert_table.rds") | |||||
if(length(grep("ID_convert_table.rds",dir(refdir)))>0){ | |||||
idconvert<-readRDS(paste(refdir,"ID_convert_table.rds",sep="")) | |||||
}else{ | }else{ | ||||
stop("Cannot find ID_convert_table.rds in the working folder. Exit!", call.=FALSE) | stop("Cannot find ID_convert_table.rds in the working folder. Exit!", call.=FALSE) | ||||
} | } | ||||
#GSEA in GO term | #GSEA in GO term | ||||
fgseaRes.c5 <- fgsea(Hs.c5, logfc, minSize=15, maxSize = 500, nperm=1000) | fgseaRes.c5 <- fgsea(Hs.c5, logfc, minSize=15, maxSize = 500, nperm=1000) | ||||
c5sig<-fgseaRes.c5[fgseaRes.c5$padj<opt$padjvalueCutoff,] | c5sig<-fgseaRes.c5[fgseaRes.c5$padj<opt$padjvalueCutoff,] | ||||
c5sig<-c5sig[order(c5sig$pval),] | |||||
c5sig<-data.frame(c5sig) | |||||
c5sig$leadingEdge<-sapply(c5sig$leadingEdge,function(x){paste0(unlist(x),collapse=", ")}) | |||||
if(nrow(c5sig)==0){ | if(nrow(c5sig)==0){ | ||||
message(paste("No significant GO term is identified in group ",nam,".",sep="")) | message(paste("No significant GO term is identified in group ",nam,".",sep="")) | ||||
}else{ | }else{ | ||||
message(paste(nrow(c5sig)," significant GO term(s) is(are) identified in group ",nam,".",sep="")) | message(paste(nrow(c5sig)," significant GO term(s) is(are) identified in group ",nam,".",sep="")) | ||||
c5sig<-c5sig[order(c5sig$pval),] | |||||
c5sig<-data.frame(c5sig) | |||||
c5sig$leadingEdge<-sapply(c5sig$leadingEdge,function(x){paste0(unlist(x),collapse=", ")}) | |||||
c5sigall<-rbind(c5sigall,cbind(versus,c5sig)) | c5sigall<-rbind(c5sigall,cbind(versus,c5sig)) | ||||
} | } | ||||
#GSEA in curated gene sets | #GSEA in curated gene sets | ||||
fgseaRes.c2 <- fgsea(Hs.c2, logfc, minSize=15, maxSize = 500, nperm=1000) | fgseaRes.c2 <- fgsea(Hs.c2, logfc, minSize=15, maxSize = 500, nperm=1000) | ||||
c2sig<-fgseaRes.c2[fgseaRes.c2$padj<opt$padjvalueCutoff,] | c2sig<-fgseaRes.c2[fgseaRes.c2$padj<opt$padjvalueCutoff,] | ||||
c2sig<-c2sig[order(c2sig$pval),] | |||||
c2sig<-data.frame(c2sig) | |||||
c2sig$leadingEdge<-sapply(c2sig$leadingEdge,function(x){paste0(unlist(x),collapse=", ")}) | |||||
if(nrow(c2sig)==0){ | if(nrow(c2sig)==0){ | ||||
message(paste("No significant curated gene sets is identified in group ",nam,".",sep="")) | message(paste("No significant curated gene sets is identified in group ",nam,".",sep="")) | ||||
}else{ | }else{ | ||||
message(paste(nrow(c2sig)," significant curated gene sets are identified in group ",nam,".",sep="")) | message(paste(nrow(c2sig)," significant curated gene sets are identified in group ",nam,".",sep="")) | ||||
c2sig<-c2sig[order(c2sig$pval),] | |||||
c2sig<-data.frame(c2sig) | |||||
c2sig$leadingEdge<-sapply(c2sig$leadingEdge,function(x){paste0(unlist(x),collapse=", ")}) | |||||
c2sigall<-rbind(c2sigall,cbind(versus,c2sig)) | c2sigall<-rbind(c2sigall,cbind(versus,c2sig)) | ||||
} | } | ||||
} | } | ||||
} | } | ||||
if(nrow(c5sigall)==0){ | |||||
if(length(c5sigall)==0){ | |||||
message("No significant GO term is identified.") | message("No significant GO term is identified.") | ||||
}else{ | }else{ | ||||
c5sigall$pval<-signif(c5sigall$pval,4) | c5sigall$pval<-signif(c5sigall$pval,4) | ||||
write.csv(c5sigall,paste(out_dir,opt$project_code,"_gsea_go.csv",sep="")) | write.csv(c5sigall,paste(out_dir,opt$project_code,"_gsea_go.csv",sep="")) | ||||
} | } | ||||
if(nrow(c2sigall)==0){ | |||||
if(length(c2sigall)==0){ | |||||
message("No significant GO term is identified.") | message("No significant GO term is identified.") | ||||
}else{ | }else{ | ||||
c2sigall$pval<-signif(c2sigall$pval,4) | c2sigall$pval<-signif(c2sigall$pval,4) |