RNA-seq下游数据分析-ballgown到报告。 以Rscript为主,对接PGx RNA-seq choppy现有pipeline,到生成RNA-seq分析报告所需的rds和csv文件。
r
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

71 line
2.8KB

  1. #!/usr/bin/env Rscript
  2. # example:
  3. # Rscript RNAseq_1_ballgown.R -o /home/yuying/rnaseqreport_test -i ./ballgown/ -l FALSE -p test
  4. # Rscript RNAseq_1_ballgown.R -o /home/yuying/rnaseqreport_test -i ./ballgown/
  5. suppressPackageStartupMessages(library("optparse"))
  6. suppressPackageStartupMessages(library("ballgown"))
  7. # specify our desired options in a list
  8. # by default OptionParser will add an help option equivalent to
  9. # make_option(c("-h", "--help"), action="store_true", default=FALSE,
  10. # help="Show this help message and exit")
  11. option_list <- list(
  12. make_option(c("-o", "--out_dir"), type="character",default="./",
  13. help="The output directory [default ./]"),
  14. make_option(c("-i", "--input"),type="character", default=NULL,
  15. help="The directory input of expression files. It is output from ballgown software."),
  16. make_option(c("-f", "--floor_value"),metavar="number",default=0.01,
  17. help="A number to add to each value before log2 transformation to avoid infinite value.[default: 0.01]"),
  18. make_option(c("-l", "--log2_norm"), metavar="TRUE", default=TRUE,
  19. help="Perform log2 transformation on FPKM value. [default: TRUE]"),
  20. make_option(c("-p", "--project_code"), type="character",default="rnaseq",
  21. help="Project code, which is used as prefix of output file. [default: rnaseq]")
  22. )
  23. # get command line options, if help option encountered print help and exit,
  24. # otherwise if options not found on command line then set defaults,
  25. opt <- parse_args(OptionParser(option_list=option_list))
  26. if (is.null(opt$input)){
  27. print_help(opt_parser)
  28. stop("At least one argument must be supplied (input file).", call.=FALSE)
  29. }
  30. #generate FPKM expression profile from ballgown outputs
  31. geballgown_expr <- ballgown(dataDir = opt$input ,samplePattern = ".*",meas = "all")
  32. expr <- gexpr(geballgown_expr)
  33. message("finish ballgown\n")
  34. #remove _1P and FPKM from colnames, _1P is from alicloud app, FPKM is added due to default output of stringtie/ballgown.
  35. nam<-colnames(expr)
  36. nam<-gsub("_1P$","",nam)
  37. nam<-gsub("^FPKM.","",nam)
  38. colnames(expr) <- nam
  39. out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")
  40. if(opt$log2_norm==TRUE){
  41. message("start log2 transformation\n")
  42. logexpr<-apply(expr,2,function(x){log2(x+as.numeric(opt$floor_value))})
  43. logexpr_out<-cbind(rownames(logexpr),round(logexpr,3))
  44. colnames(logexpr_out)[1]<-"Gene"
  45. message("Write log2 expression file\n")
  46. write.table(logexpr_out,file = paste(out_dir,opt$project_code,"_geneexp_log2FPKM.txt",sep=""),sep="\t",row.names=F,quote=F)
  47. }else{
  48. #output expression file with fpkm
  49. expr<-cbind(rownames(expr),round(expr,3))
  50. colnames(expr)[1]<-"Gene"
  51. message("Write fpkm expression file\n")
  52. write.table(expr,file = paste(out_dir,opt$project_code,"_geneexp_FPKM.txt",sep=""),sep="\t",row.names=F,quote=F)
  53. }