yingyu
/
RNAseqDownstream2report


			
							#!/usr/bin/env Rscript
# example:
# Rscript RNAseq_1_expr_stringtieout.R 
#Rscript RNAseq_1_expr_stringtieout.R -o ../ -l FALSE -s samplenames.txt -p 111
suppressPackageStartupMessages(library("optparse"))
suppressPackageStartupMessages(library("data.table"))


# specify our desired options in a list
# by default OptionParser will add an help option equivalent to 
# make_option(c("-h", "--help"), action="store_true", default=FALSE, 
#               help="Show this help message and exit")

option_list <- list( 
    make_option(c("-o", "--out_dir"), type="character",default="./",
        help="The output directory [default ./]"),
    make_option(c("-i", "--input_dir"),type="character", default="./",
        help="The directory input of expression files. It is output from stringtie software named as \".gene.abundance.txt\"."),
    make_option(c("-f", "--floor_value"),metavar="number",default=0.01,
        help="A number to add to each value before log2 transformation to avoid infinite value.[default: 0.01]"),
    make_option(c("-l", "--log2_norm"), metavar="TRUE", default=TRUE,
        help="Perform log2 transformation on FPKM/TPM value. [default: TRUE]"),
    make_option(c("-s", "--sample_name"),type="character",  default=NULL,
        help="File in tab-delimited format for sample name if usr want to rename sample name. The input file containing sample name as file name and sample name to be renamed."),
	make_option(c("-p", "--project_code"), type="character",default="rnaseq",
        help="Project code, which is used as prefix of output file. [default: rnaseq]")
		)

# get command line options, if help option encountered print help and exit,
# otherwise if options not found on command line then set defaults, 
opt <- parse_args(OptionParser(option_list=option_list))

#modify dir input
in_dir<-paste(gsub("/$","",opt$input_dir),"/",sep="")
out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")

#read gene.abundance.txt files into 
if(length(grep("gene.abundance.txt",dir(in_dir)))==0){
 stop("Cannot find *gene.abundance.txt files in the working folder. Exit!", call.=FALSE)
}

if(length(grep("gene.abundance.txt",dir(in_dir)))==1){
 stop("Only one *gene.abundance.txt files in the working folder. Exit!", call.=FALSE)
}

genefile<-dir(in_dir)[grep("gene.abundance.txt",dir(in_dir))]
message(paste("Detect ",length(genefile)," files named as *gene.abundance.txt. \nMerging. ",sep=""))

#read first one
eval(parse(text =paste("a<-fread(\"",genefile[1],"\",header=T,sep=\"\\t\",stringsAsFactors=F,check.names = F,data.table=F)",sep="")))
atpm<-tapply(a$TPM,as.factor(a$"Gene ID"),mean)

expr_fpkm<-matrix(0,ncol=length(genefile),nrow=length(atpm))
expr_tpm<-matrix(0,ncol=length(genefile),nrow=length(atpm))
rownames(expr_fpkm)<-names(atpm)
rownames(expr_tpm)<-names(atpm)
#merge gene file
for (i in 1:length(genefile)){
eval(parse(text =paste("a<-fread(\"",genefile[i],"\",header=T,sep=\"\\t\",stringsAsFactors=F,check.names = F,data.table=F)",sep="")))
atpm<-tapply(a$TPM,as.factor(a$"Gene ID"),mean)
afpkm<-tapply(a$FPKM,as.factor(a$"Gene ID"),mean)
expr_tpm[,i]<-atpm[match(rownames(expr_tpm),names(atpm))]
expr_fpkm[,i]<-afpkm[match(rownames(expr_fpkm),names(afpkm))]
message(paste("Merge ", i, "/",length(genefile)," gene.abundance.txt files ",sep=""))
}

#colnames
#remove _1P.gene.abundance.txt from colnames, _1P is from alicloud app

if (is.null(opt$sample_name)){
samplename<-gsub("_1P.gene.abundance.txt","",genefile)
message("Sample name is not specified. Using file names instead.")
}else{
sample_name<-read.table(opt$sample_name,sep="\t",header=T,stringsAsFactors=F,check.names=F)
samplename<-gsub("_1P.gene.abundance.txt","",genefile)
sample_name[,1]<-gsub(".gene.abundance.txt","",sample_name[,1])
sample_name[,1]<-gsub("_1P$","",sample_name[,1])
samplename<-sample_name[match(samplename,sample_name[,1]),2]
}
colnames(expr_tpm)<-samplename
colnames(expr_fpkm)<-samplename


if(opt$log2_norm==TRUE){
message("start log2 transformation")
#tpm
logexpr_tpm<-apply(expr_tpm,2,function(x){log2(x+as.numeric(opt$floor_value))})
logexpr_tpm_out<-cbind(rownames(logexpr_tpm),round(logexpr_tpm,3))
colnames(logexpr_tpm_out)[1]<-"Gene"
write.table(logexpr_tpm_out,file = paste(out_dir,opt$project_code,"_geneexp_log2TPM.txt",sep=""),sep="\t",row.names=F,quote=F)
#fpkm
logexpr_fpkm<-apply(expr_fpkm,2,function(x){log2(x+as.numeric(opt$floor_value))})
logexpr_fpkm_out<-cbind(rownames(logexpr_fpkm),round(logexpr_fpkm,3))
colnames(logexpr_fpkm_out)[1]<-"Gene"
write.table(logexpr_fpkm_out,file = paste(out_dir,opt$project_code,"_geneexp_log2FPKM.txt",sep=""),sep="\t",row.names=F,quote=F)
message("Write log2 TPM and FPKM expression file.")
}else{ 
#output expression file 
#tpm
expr_tpm_out<-cbind(rownames(expr_tpm),round(expr_tpm,3))
colnames(expr_tpm_out)[1]<-"Gene"
write.table(expr_tpm_out,file = paste(out_dir,opt$project_code,"_geneexp_TPM.txt",sep=""),sep="\t",row.names=F,quote=F)
#fpkm
expr_fpkm_out<-cbind(rownames(expr_fpkm),round(expr_fpkm,3))
colnames(expr_fpkm_out)[1]<-"Gene"
write.table(expr_fpkm_out,file = paste(out_dir,opt$project_code,"_geneexp_FPKM.txt",sep=""),sep="\t",row.names=F,quote=F)
message("Write TPM and FPKM expression file.")
}