RNA-seq下游数据分析-ballgown到报告。 以Rscript为主,对接PGx RNA-seq choppy现有pipeline,到生成RNA-seq分析报告所需的rds和csv文件。
r
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

76 satır
2.8KB

  1. #!/usr/bin/env Rscript
  2. ###Copyright 2019 Ying Yu from Fudan-PGx group
  3. # example:
  4. # Rscript RNAseq_2_pca.R -o /home/yuying/rnaseqreport_test -i ballgown_geneexp_log2fpkm_floor0p01_c3r58395_2019-04-29.txt -g group1.txt -p organoid
  5. suppressPackageStartupMessages(library("optparse"))
  6. # specify our desired options in a list
  7. # by default OptionParser will add an help option equivalent to
  8. # make_option(c("-h", "--help"), action="store_true", default=FALSE,
  9. # help="Show this help message and exit")
  10. option_list <- list(
  11. make_option(c("-o", "--out_dir"), type="character",default="./",
  12. help="The output directory [default ./]"),
  13. make_option(c("-i", "--input"),type="character", default=NULL,
  14. help="The input expression files. required!"),
  15. make_option(c("-g", "--sample_group"),type="character", default=NULL,
  16. help="File for sample group infomation. The input file containing sample name and group infomation. note colname must be like: sample group1 group2... "),
  17. make_option(c("-p", "--project_code"), type="character",default="rnaseq",
  18. help="Project code, which is used as prefix of output file. [default: rnaseq]")
  19. )
  20. # get command line options, if help option encountered print help and exit,
  21. # otherwise if options not found on command line then set defaults,
  22. opt <- parse_args(OptionParser(option_list=option_list))
  23. if (is.null(opt$input)){
  24. print_help(opt_parser)
  25. stop("At least one argument must be supplied (input file).", call.=FALSE)
  26. }
  27. ##import exp file
  28. out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")
  29. logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1)
  30. #check exp file is log scale
  31. if(max(logexpr[,1])-min(logexpr[,1])>100){
  32. stop("PCA anlaysis shoulc be conducted based on expression profile on log scale.", call.=FALSE)
  33. }
  34. #####################
  35. ##########PCA #######
  36. ##calculate pca
  37. pc.cr<-prcomp(t(logexpr),retx = TRUE)
  38. pca<-pc.cr$x
  39. pca<-data.frame(pca)
  40. pca$sample<-rownames(pca)
  41. pcanew<-pca
  42. message("PCA finished.")
  43. ####finished PCA
  44. ####add group infomaiton if imort
  45. if (is.null(opt$sample_group)){
  46. message("Warning: no group sample file. PCA will not be able to colored by group.")
  47. }else{
  48. sample_group<-read.table(opt$sample_group,sep="\t",header=T)
  49. if(length(grep("group",colnames(sample_group)))==0){
  50. message("No group is identified in sample_group file. Make sure the head of sample_group file is like sample, group1, group2.")
  51. }else{
  52. groupn<-grep("group",colnames(sample_group))
  53. for ( i in groupn){
  54. pcanew<- cbind(pcanew,sample_group[match(pca$sample,sample_group$sample),i])
  55. }
  56. colnames(pcanew)[c((ncol(pca)+1):ncol(pcanew))]<-colnames(sample_group)[groupn]
  57. }
  58. }
  59. #write output
  60. write.csv(pcanew,paste(out_dir,opt$project_code,"_pca.csv",sep=""))
  61. saveRDS(pcanew,paste(out_dir,opt$project_code,"_pca.rds",sep=""))
  62. ########