|
- #!/usr/bin/env Rscript
- # example:
- # Rscript RNAseq_1_expr_stringtieout.R
- #Rscript RNAseq_1_expr_stringtieout.R -o ../ -l FALSE -s samplenames.txt -p 111
- suppressPackageStartupMessages(library("optparse"))
- suppressPackageStartupMessages(library("data.table"))
-
-
- # specify our desired options in a list
- # by default OptionParser will add an help option equivalent to
- # make_option(c("-h", "--help"), action="store_true", default=FALSE,
- # help="Show this help message and exit")
-
- option_list <- list(
- make_option(c("-o", "--out_dir"), type="character",default="./",
- help="The output directory [default ./]"),
- make_option(c("-i", "--input_dir"),type="character", default="./",
- help="The directory input of expression files. It is output from stringtie software named as \".gene.abundance.txt\"."),
- make_option(c("-f", "--floor_value"),metavar="number",default=0.01,
- help="A number to add to each value before log2 transformation to avoid infinite value.[default: 0.01]"),
- make_option(c("-l", "--log2_norm"), metavar="TRUE", default=TRUE,
- help="Perform log2 transformation on FPKM/TPM value. [default: TRUE]"),
- make_option(c("-s", "--sample_name"),type="character", default=NULL,
- help="File in tab-delimited format for sample name if usr want to rename sample name. The input file containing sample name as file name and sample name to be renamed."),
- make_option(c("-p", "--project_code"), type="character",default="rnaseq",
- help="Project code, which is used as prefix of output file. [default: rnaseq]")
- )
-
- # get command line options, if help option encountered print help and exit,
- # otherwise if options not found on command line then set defaults,
- opt <- parse_args(OptionParser(option_list=option_list))
-
- #modify dir input
- in_dir<-paste(gsub("/$","",opt$input_dir),"/",sep="")
- out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")
-
- #read gene.abundance.txt files into
- if(length(grep("gene.abundance.txt",dir(in_dir)))==0){
- stop("Cannot find *gene.abundance.txt files in the working folder. Exit!", call.=FALSE)
- }
-
- if(length(grep("gene.abundance.txt",dir(in_dir)))==1){
- stop("Only one *gene.abundance.txt files in the working folder. Exit!", call.=FALSE)
- }
-
- genefile<-dir(in_dir)[grep("gene.abundance.txt",dir(in_dir))]
- message(paste("Detect ",length(genefile)," files named as *gene.abundance.txt. \nMerging. ",sep=""))
-
- #read first one
- eval(parse(text =paste("a<-fread(\"",genefile[1],"\",header=T,sep=\"\\t\",stringsAsFactors=F,check.names = F,data.table=F)",sep="")))
- atpm<-tapply(a$TPM,as.factor(a$"Gene ID"),mean)
-
- expr_fpkm<-matrix(0,ncol=length(genefile),nrow=length(atpm))
- expr_tpm<-matrix(0,ncol=length(genefile),nrow=length(atpm))
- rownames(expr_fpkm)<-names(atpm)
- rownames(expr_tpm)<-names(atpm)
- #merge gene file
- for (i in 1:length(genefile)){
- eval(parse(text =paste("a<-fread(\"",genefile[i],"\",header=T,sep=\"\\t\",stringsAsFactors=F,check.names = F,data.table=F)",sep="")))
- atpm<-tapply(a$TPM,as.factor(a$"Gene ID"),mean)
- afpkm<-tapply(a$FPKM,as.factor(a$"Gene ID"),mean)
- expr_tpm[,i]<-atpm[match(rownames(expr_tpm),names(atpm))]
- expr_fpkm[,i]<-afpkm[match(rownames(expr_fpkm),names(afpkm))]
- message(paste("Merge ", i, "/",length(genefile)," gene.abundance.txt files ",sep=""))
- }
-
- #colnames
- #remove _1P.gene.abundance.txt from colnames, _1P is from alicloud app
-
- if (is.null(opt$sample_name)){
- samplename<-gsub("_1P.gene.abundance.txt","",genefile)
- message("Sample name is not specified. Using file names instead.")
- }else{
- sample_name<-read.table(opt$sample_name,sep="\t",header=T,stringsAsFactors=F,check.names=F)
- samplename<-gsub("_1P.gene.abundance.txt","",genefile)
- sample_name[,1]<-gsub(".gene.abundance.txt","",sample_name[,1])
- sample_name[,1]<-gsub("_1P$","",sample_name[,1])
- samplename<-sample_name[match(samplename,sample_name[,1]),2]
- }
- colnames(expr_tpm)<-samplename
- colnames(expr_fpkm)<-samplename
-
-
- if(opt$log2_norm==TRUE){
- message("start log2 transformation")
- #tpm
- logexpr_tpm<-apply(expr_tpm,2,function(x){log2(x+as.numeric(opt$floor_value))})
- logexpr_tpm_out<-cbind(rownames(logexpr_tpm),round(logexpr_tpm,3))
- colnames(logexpr_tpm_out)[1]<-"Gene"
- write.table(logexpr_tpm_out,file = paste(out_dir,opt$project_code,"_geneexp_log2TPM.txt",sep=""),sep="\t",row.names=F,quote=F)
- #fpkm
- logexpr_fpkm<-apply(expr_fpkm,2,function(x){log2(x+as.numeric(opt$floor_value))})
- logexpr_fpkm_out<-cbind(rownames(logexpr_fpkm),round(logexpr_fpkm,3))
- colnames(logexpr_fpkm_out)[1]<-"Gene"
- write.table(logexpr_fpkm_out,file = paste(out_dir,opt$project_code,"_geneexp_log2FPKM.txt",sep=""),sep="\t",row.names=F,quote=F)
- message("Write log2 TPM and FPKM expression file.")
- }else{
- #output expression file
- #tpm
- expr_tpm_out<-cbind(rownames(expr_tpm),round(expr_tpm,3))
- colnames(expr_tpm_out)[1]<-"Gene"
- write.table(expr_tpm_out,file = paste(out_dir,opt$project_code,"_geneexp_TPM.txt",sep=""),sep="\t",row.names=F,quote=F)
- #fpkm
- expr_fpkm_out<-cbind(rownames(expr_fpkm),round(expr_fpkm,3))
- colnames(expr_fpkm_out)[1]<-"Gene"
- write.table(expr_fpkm_out,file = paste(out_dir,opt$project_code,"_geneexp_FPKM.txt",sep=""),sep="\t",row.names=F,quote=F)
- message("Write TPM and FPKM expression file.")
- }
-
-
-
|