#!/usr/bin/env Rscript # example: # Rscript RNAseq_1_expr_stringtieout.R #Rscript RNAseq_1_expr_stringtieout.R -o ../ -l FALSE -s samplenames.txt -p 111 suppressPackageStartupMessages(library("optparse")) suppressPackageStartupMessages(library("data.table")) # specify our desired options in a list # by default OptionParser will add an help option equivalent to # make_option(c("-h", "--help"), action="store_true", default=FALSE, # help="Show this help message and exit") option_list <- list( make_option(c("-o", "--out_dir"), type="character",default="./", help="The output directory [default ./]"), make_option(c("-i", "--input_dir"),type="character", default="./", help="The directory input of expression files. It is output from stringtie software named as \".gene.abundance.txt\"."), make_option(c("-f", "--floor_value"),metavar="number",default=0.01, help="A number to add to each value before log2 transformation to avoid infinite value.[default: 0.01]"), make_option(c("-l", "--log2_norm"), metavar="TRUE", default=TRUE, help="Perform log2 transformation on FPKM/TPM value. [default: TRUE]"), make_option(c("-s", "--sample_name"),type="character", default=NULL, help="File in tab-delimited format for sample name if usr want to rename sample name. The input file containing sample name as file name and sample name to be renamed."), make_option(c("-p", "--project_code"), type="character",default="rnaseq", help="Project code, which is used as prefix of output file. [default: rnaseq]") ) # get command line options, if help option encountered print help and exit, # otherwise if options not found on command line then set defaults, opt <- parse_args(OptionParser(option_list=option_list)) #modify dir input in_dir<-paste(gsub("/$","",opt$input_dir),"/",sep="") out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="") #read gene.abundance.txt files into if(length(grep("gene.abundance.txt",dir(in_dir)))==0){ stop("Cannot find *gene.abundance.txt files in the working folder. Exit!", call.=FALSE) } if(length(grep("gene.abundance.txt",dir(in_dir)))==1){ stop("Only one *gene.abundance.txt files in the working folder. Exit!", call.=FALSE) } genefile<-dir(in_dir)[grep("gene.abundance.txt",dir(in_dir))] message(paste("Detect ",length(genefile)," files named as *gene.abundance.txt. \nMerging. ",sep="")) #read first one eval(parse(text =paste("a<-fread(\"",genefile[1],"\",header=T,sep=\"\\t\",stringsAsFactors=F,check.names = F,data.table=F)",sep=""))) atpm<-tapply(a$TPM,as.factor(a$"Gene ID"),mean) expr_fpkm<-matrix(0,ncol=length(genefile),nrow=length(atpm)) expr_tpm<-matrix(0,ncol=length(genefile),nrow=length(atpm)) rownames(expr_fpkm)<-names(atpm) rownames(expr_tpm)<-names(atpm) #merge gene file for (i in 1:length(genefile)){ eval(parse(text =paste("a<-fread(\"",genefile[i],"\",header=T,sep=\"\\t\",stringsAsFactors=F,check.names = F,data.table=F)",sep=""))) atpm<-tapply(a$TPM,as.factor(a$"Gene ID"),mean) afpkm<-tapply(a$FPKM,as.factor(a$"Gene ID"),mean) expr_tpm[,i]<-atpm[match(rownames(expr_tpm),names(atpm))] expr_fpkm[,i]<-afpkm[match(rownames(expr_fpkm),names(afpkm))] message(paste("Merge ", i, "/",length(genefile)," gene.abundance.txt files ",sep="")) } #colnames #remove _1P.gene.abundance.txt from colnames, _1P is from alicloud app if (is.null(opt$sample_name)){ samplename<-gsub("_1P.gene.abundance.txt","",genefile) message("Sample name is not specified. Using file names instead.") }else{ sample_name<-read.table(opt$sample_name,sep="\t",header=T,stringsAsFactors=F,check.names=F) samplename<-gsub("_1P.gene.abundance.txt","",genefile) sample_name[,1]<-gsub(".gene.abundance.txt","",sample_name[,1]) sample_name[,1]<-gsub("_1P$","",sample_name[,1]) samplename<-sample_name[match(samplename,sample_name[,1]),2] } colnames(expr_tpm)<-samplename colnames(expr_fpkm)<-samplename if(opt$log2_norm==TRUE){ message("start log2 transformation") #tpm logexpr_tpm<-apply(expr_tpm,2,function(x){log2(x+as.numeric(opt$floor_value))}) logexpr_tpm_out<-cbind(rownames(logexpr_tpm),round(logexpr_tpm,3)) colnames(logexpr_tpm_out)[1]<-"Gene" write.table(logexpr_tpm_out,file = paste(out_dir,opt$project_code,"_geneexp_log2TPM.txt",sep=""),sep="\t",row.names=F,quote=F) #fpkm logexpr_fpkm<-apply(expr_fpkm,2,function(x){log2(x+as.numeric(opt$floor_value))}) logexpr_fpkm_out<-cbind(rownames(logexpr_fpkm),round(logexpr_fpkm,3)) colnames(logexpr_fpkm_out)[1]<-"Gene" write.table(logexpr_fpkm_out,file = paste(out_dir,opt$project_code,"_geneexp_log2FPKM.txt",sep=""),sep="\t",row.names=F,quote=F) message("Write log2 TPM and FPKM expression file.") }else{ #output expression file #tpm expr_tpm_out<-cbind(rownames(expr_tpm),round(expr_tpm,3)) colnames(expr_tpm_out)[1]<-"Gene" write.table(expr_tpm_out,file = paste(out_dir,opt$project_code,"_geneexp_TPM.txt",sep=""),sep="\t",row.names=F,quote=F) #fpkm expr_fpkm_out<-cbind(rownames(expr_fpkm),round(expr_fpkm,3)) colnames(expr_fpkm_out)[1]<-"Gene" write.table(expr_fpkm_out,file = paste(out_dir,opt$project_code,"_geneexp_FPKM.txt",sep=""),sep="\t",row.names=F,quote=F) message("Write TPM and FPKM expression file.") }