RNA-seq下游数据分析-ballgown到报告。 以Rscript为主,对接PGx RNA-seq choppy现有pipeline,到生成RNA-seq分析报告所需的rds和csv文件。
r
Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

184 rindas
6.4KB

  1. #!/usr/bin/env Rscript
  2. ###Copyright 2019 Ying Yu from Fudan-PGx group
  3. # example:
  4. # Rscript RNAseq_6_enrichfunc.R -i rnaseq_degs_acrossgroups.csv
  5. suppressPackageStartupMessages(library("optparse"))
  6. suppressPackageStartupMessages(library("clusterProfiler"))
  7. # specify our desired options in a list
  8. # by default OptionParser will add an help option equivalent to
  9. # make_option(c("-h", "--help"), action="store_true", default=FALSE,
  10. # help="Show this help message and exit")
  11. # input input list , rds, from * to *
  12. option_list <- list(
  13. make_option(c("-o", "--out_dir"), type="character",default="./",
  14. help="The output directory [default ./]"),
  15. make_option(c("-i", "--input"),type="character", default=NULL,
  16. help="The input DEG list in csv format. The first column: gene; second column: group. Required! "),
  17. make_option(c("-e", "--type_gene_id"),type="character", default="EnsemblGID",
  18. help="The type of gene symbol. Could be either of EnsemblGID/EntrezID/GeneSymbol [default: EnsemblGID]"),
  19. make_option(c("-f", "--pvalueCutoff"), type="double",default=0.05,metavar="number",
  20. help="Cutoff value of p value. [default: 0.05]"),
  21. make_option(c("-m", "--pAdjustMethod"), type="character",default="BH",
  22. help="Method of adjust p value. One of \"holm\", \"hochberg\", \"hommel\", \"bonferroni\", \"BH\", \"BY\", \"fdr\", \"none\". [default: BH]"),
  23. make_option(c("-q", "--qvalueCutoff"), type="double",default=0.2,metavar="number",
  24. help="Cutoff value of q value. [default: 0.2]"),
  25. make_option(c("-p", "--project_code"), type="character",default="rnaseq",
  26. help="Project code, which is used as prefix of output file. [default: rnaseq]"),
  27. make_option(c("-d", "--ref_rdata_dir"), type="character",default="./",
  28. help="The directory of reference files: human_c2_v5p2.rdata, human_c5_v5p2.rdata and ID_convert_table.rds. [default: ./]")
  29. )
  30. # get command line options, if help option encountered print help and exit,
  31. # otherwise if options not found on command line then set defaults,
  32. opt <- parse_args(OptionParser(option_list=option_list))
  33. if (is.null(opt$input)){
  34. print_help(opt_parser)
  35. stop("At least one argument must be supplied (input file).", call.=FALSE)
  36. }
  37. message("Need to connected to the Internet.")
  38. ##import file
  39. out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")
  40. gene<-read.csv(opt$input,header=T,stringsAsFactors=F)
  41. ##########################
  42. #########ID convert#######
  43. ##########################
  44. message("Begin ID conversion.")
  45. refdir<-paste(gsub("/$","",opt$ref_rdata_dir),"/",sep="")
  46. if(length(grep("ID_convert_table.rds",dir(refdir)))>0){
  47. idconvert<-readRDS(paste(refdir,"ID_convert_table.rds",sep=""))
  48. }else{
  49. stop("Cannot find ID_convert_table.rds in the ref_rdata_dir. Exit!", call.=FALSE)
  50. }
  51. if(opt$type_gene_id=="EnsemblGID"){
  52. gene$EntrezID<-idconvert$EntrezID[match(gene[,1],idconvert$EnsemblID)]
  53. if(length(which(is.na(gene$EntrezID)))==nrow(gene)){
  54. stop("Cannot convert Ensembl gene ID to Entrez gene ID. Exit!", call.=FALSE)
  55. }
  56. }
  57. if(opt$type_gene_id=="GeneSymbol"){
  58. gene$EntrezID<-idconvert$EntrezID[match(gene[,1],idconvert$GeneSymbol)]
  59. if(length(which(is.na(gene$EntrezID)))==nrow(gene)){
  60. stop("Cannot convert GeneSymbol to Entrez gene ID. Exit!", call.=FALSE)
  61. }
  62. }
  63. if(opt$type_gene_id=="EntrezID"){
  64. gene$EntrezID<-gene[,1]
  65. }
  66. message("Finish ID conversion.")
  67. ##########################
  68. #########Enrich GO#######
  69. ##########################
  70. groupn<-unique(gene[,2])
  71. if(length(groupn)==0){
  72. message("Warning: no group infomation. Function enrichment will be conducted as one group.")
  73. }else{
  74. message(paste("A number of ", length(groupn)," group(s) is detected. Function enrichment will be conducted in ",length(groupn), " group(s).",sep=""))
  75. }
  76. ####
  77. egoall<-c()
  78. ekeggall<-c()
  79. if(length(groupn)==0){
  80. g1<-gene$EntrezID
  81. g1<-g1[!g1==""]
  82. #conduct enrichment
  83. message("Contucting enrichment analysis on GO terms...")
  84. ego<-data.frame(enrichGO(g1, 'org.Hs.eg.db', ont = 'ALL', pvalueCutoff = opt$pvalueCutoff, pAdjustMethod = opt$pAdjustMethod, qvalueCutoff = opt$qvalueCutoff))
  85. message("Contucting enrichment analysis on KEGG pathways...")
  86. ekg<- data.frame( enrichKEGG(g1, organism = "hsa", keyType = "kegg", pvalueCutoff = opt$pvalueCutoff, pAdjustMethod = opt$pAdjustMethod, qvalueCutoff = opt$qvalueCutoff))
  87. #modify output
  88. if(!nrow(ego)==0){
  89. ego1<-cbind(groupn[i],ego)
  90. colnames(ego1)[1]<-c("versus")
  91. egoall<-rbind(egoall,ego1)
  92. }
  93. if(!nrow(ekg)==0){
  94. ekg1<-cbind(groupn[i],ekg)
  95. colnames(ekg1)[1]<-c("versus")
  96. ekeggall<-rbind(ekeggall,ekg1)
  97. }
  98. }else{
  99. for (i in 1:length(groupn)){
  100. message(paste("Group ", groupn[i],sep=""))
  101. g1<-gene$EntrezID[gene[,2]==groupn[i]]
  102. g1<-g1[!g1==""]
  103. #conduct enrichment
  104. message("Contucting enrichment analysis on GO terms...")
  105. ego<-data.frame(enrichGO(g1, 'org.Hs.eg.db', ont = 'ALL', pvalueCutoff = opt$pvalueCutoff, pAdjustMethod = opt$pAdjustMethod, qvalueCutoff = opt$qvalueCutoff))
  106. message("Contucting enrichment analysis on KEGG pathways...")
  107. ekg<- data.frame( enrichKEGG(g1, organism = "hsa", keyType = "kegg", pvalueCutoff = opt$pvalueCutoff, pAdjustMethod = opt$pAdjustMethod, qvalueCutoff = opt$qvalueCutoff))
  108. if(!nrow(ego)==0){
  109. ego1<-cbind(groupn[i],ego)
  110. colnames(ego1)[1]<-c("versus")
  111. egoall<-rbind(egoall,ego1)
  112. message(paste(nrow(ego),"significant GO term(s) is(are) identified."))
  113. }else{
  114. message("No significant GO term is identified.")
  115. }
  116. if(!nrow(ekg)==0){
  117. ekg1<-cbind(groupn[i],ekg)
  118. colnames(ekg1)[1]<-c("versus")
  119. ekeggall<-rbind(ekeggall,ekg1)
  120. message(paste(nrow(ekg),"significant KEGG pathway(s) is(are) identified."))
  121. }else{
  122. message("No significant KEGG pathway is identified.")
  123. }
  124. }
  125. }
  126. message("Wrinting output...")
  127. #write output
  128. if(nrow(egoall)==0){
  129. message("No significant GO term is identified across all tested groups.")
  130. }else{
  131. message(paste(nrow(egoall),"significant GO term(s) is(are) identified across all tested groups."))
  132. rownames(egoall)<-c(1:nrow(egoall))
  133. egoall$pvalue<-signif(egoall$pvalue,4)
  134. egoall$p.adjust<-signif(egoall$p.adjust,4)
  135. egoall$qvalue<-signif(egoall$qvalue,4)
  136. write.csv(egoall,paste(out_dir,opt$project_code,"_GOenrich.csv",sep=""))
  137. }
  138. if(nrow(ekeggall)==0){
  139. message("No significant KEGG pathway is identified across all tested groups.")
  140. }else{
  141. message(paste(nrow(ekeggall),"significant KEGG pathway(s) is(are) identified across all tested groups."))
  142. rownames(ekeggall)<-c(1:nrow(ekeggall))
  143. ekeggall$pvalue<-signif(ekeggall$pvalue,4)
  144. ekeggall$p.adjust<-signif(ekeggall$p.adjust,4)
  145. ekeggall$qvalue<-signif(ekeggall$qvalue,4)
  146. write.csv(ekeggall,paste(out_dir,opt$project_code,"_KEGGenrich.csv",sep=""))
  147. }
  148. ########