RNA-seq下游数据分析-ballgown到报告。 以Rscript为主,对接PGx RNA-seq choppy现有pipeline,到生成RNA-seq分析报告所需的rds和csv文件。
r
No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.

113 líneas
5.1KB

  1. #!/usr/bin/env Rscript
  2. # example:
  3. # Rscript RNAseq_1_expr_stringtieout.R
  4. #Rscript RNAseq_1_expr_stringtieout.R -o ../ -l FALSE -s samplenames.txt -p 111
  5. suppressPackageStartupMessages(library("optparse"))
  6. suppressPackageStartupMessages(library("data.table"))
  7. # specify our desired options in a list
  8. # by default OptionParser will add an help option equivalent to
  9. # make_option(c("-h", "--help"), action="store_true", default=FALSE,
  10. # help="Show this help message and exit")
  11. option_list <- list(
  12. make_option(c("-o", "--out_dir"), type="character",default="./",
  13. help="The output directory [default ./]"),
  14. make_option(c("-i", "--input_dir"),type="character", default="./",
  15. help="The directory input of expression files. It is output from stringtie software named as \".gene.abundance.txt\"."),
  16. make_option(c("-f", "--floor_value"),metavar="number",default=0.01,
  17. help="A number to add to each value before log2 transformation to avoid infinite value.[default: 0.01]"),
  18. make_option(c("-l", "--log2_norm"), metavar="TRUE", default=TRUE,
  19. help="Perform log2 transformation on FPKM/TPM value. [default: TRUE]"),
  20. make_option(c("-s", "--sample_name"),type="character", default=NULL,
  21. help="File in tab-delimited format for sample name if usr want to rename sample name. The input file containing sample name as file name and sample name to be renamed."),
  22. make_option(c("-p", "--project_code"), type="character",default="rnaseq",
  23. help="Project code, which is used as prefix of output file. [default: rnaseq]")
  24. )
  25. # get command line options, if help option encountered print help and exit,
  26. # otherwise if options not found on command line then set defaults,
  27. opt <- parse_args(OptionParser(option_list=option_list))
  28. #modify dir input
  29. in_dir<-paste(gsub("/$","",opt$input_dir),"/",sep="")
  30. out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")
  31. #read gene.abundance.txt files into
  32. if(length(grep("gene.abundance.txt",dir(in_dir)))==0){
  33. stop("Cannot find *gene.abundance.txt files in the working folder. Exit!", call.=FALSE)
  34. }
  35. if(length(grep("gene.abundance.txt",dir(in_dir)))==1){
  36. stop("Only one *gene.abundance.txt files in the working folder. Exit!", call.=FALSE)
  37. }
  38. genefile<-dir(in_dir)[grep("gene.abundance.txt",dir(in_dir))]
  39. message(paste("Detect ",length(genefile)," files named as *gene.abundance.txt. \nMerging. ",sep=""))
  40. #read first one
  41. eval(parse(text =paste("a<-fread(\"",genefile[1],"\",header=T,sep=\"\\t\",stringsAsFactors=F,check.names = F,data.table=F)",sep="")))
  42. atpm<-tapply(a$TPM,as.factor(a$"Gene ID"),mean)
  43. expr_fpkm<-matrix(0,ncol=length(genefile),nrow=length(atpm))
  44. expr_tpm<-matrix(0,ncol=length(genefile),nrow=length(atpm))
  45. rownames(expr_fpkm)<-names(atpm)
  46. rownames(expr_tpm)<-names(atpm)
  47. #merge gene file
  48. for (i in 1:length(genefile)){
  49. eval(parse(text =paste("a<-fread(\"",genefile[i],"\",header=T,sep=\"\\t\",stringsAsFactors=F,check.names = F,data.table=F)",sep="")))
  50. atpm<-tapply(a$TPM,as.factor(a$"Gene ID"),mean)
  51. afpkm<-tapply(a$FPKM,as.factor(a$"Gene ID"),mean)
  52. expr_tpm[,i]<-atpm[match(rownames(expr_tpm),names(atpm))]
  53. expr_fpkm[,i]<-afpkm[match(rownames(expr_fpkm),names(afpkm))]
  54. message(paste("Merge ", i, "/",length(genefile)," gene.abundance.txt files ",sep=""))
  55. }
  56. #colnames
  57. #remove _1P.gene.abundance.txt from colnames, _1P is from alicloud app
  58. if (is.null(opt$sample_name)){
  59. samplename<-gsub("_1P.gene.abundance.txt","",genefile)
  60. message("Sample name is not specified. Using file names instead.")
  61. }else{
  62. sample_name<-read.table(opt$sample_name,sep="\t",header=T,stringsAsFactors=F,check.names=F)
  63. samplename<-gsub("_1P.gene.abundance.txt","",genefile)
  64. sample_name[,1]<-gsub(".gene.abundance.txt","",sample_name[,1])
  65. sample_name[,1]<-gsub("_1P$","",sample_name[,1])
  66. samplename<-sample_name[match(samplename,sample_name[,1]),2]
  67. }
  68. colnames(expr_tpm)<-samplename
  69. colnames(expr_fpkm)<-samplename
  70. if(opt$log2_norm==TRUE){
  71. message("start log2 transformation")
  72. #tpm
  73. logexpr_tpm<-apply(expr_tpm,2,function(x){log2(x+as.numeric(opt$floor_value))})
  74. logexpr_tpm_out<-cbind(rownames(logexpr_tpm),round(logexpr_tpm,3))
  75. colnames(logexpr_tpm_out)[1]<-"Gene"
  76. write.table(logexpr_tpm_out,file = paste(out_dir,opt$project_code,"_geneexp_log2TPM.txt",sep=""),sep="\t",row.names=F,quote=F)
  77. #fpkm
  78. logexpr_fpkm<-apply(expr_fpkm,2,function(x){log2(x+as.numeric(opt$floor_value))})
  79. logexpr_fpkm_out<-cbind(rownames(logexpr_fpkm),round(logexpr_fpkm,3))
  80. colnames(logexpr_fpkm_out)[1]<-"Gene"
  81. write.table(logexpr_fpkm_out,file = paste(out_dir,opt$project_code,"_geneexp_log2FPKM.txt",sep=""),sep="\t",row.names=F,quote=F)
  82. message("Write log2 TPM and FPKM expression file.")
  83. }else{
  84. #output expression file
  85. #tpm
  86. expr_tpm_out<-cbind(rownames(expr_tpm),round(expr_tpm,3))
  87. colnames(expr_tpm_out)[1]<-"Gene"
  88. write.table(expr_tpm_out,file = paste(out_dir,opt$project_code,"_geneexp_TPM.txt",sep=""),sep="\t",row.names=F,quote=F)
  89. #fpkm
  90. expr_fpkm_out<-cbind(rownames(expr_fpkm),round(expr_fpkm,3))
  91. colnames(expr_fpkm_out)[1]<-"Gene"
  92. write.table(expr_fpkm_out,file = paste(out_dir,opt$project_code,"_geneexp_FPKM.txt",sep=""),sep="\t",row.names=F,quote=F)
  93. message("Write TPM and FPKM expression file.")
  94. }