|
|
@@ -0,0 +1,112 @@ |
|
|
|
#!/usr/bin/env Rscript |
|
|
|
# example: |
|
|
|
# Rscript RNAseq_1_expr_stringtieout.R |
|
|
|
#Rscript RNAseq_1_expr_stringtieout.R -o ../ -l FALSE -s samplenames.txt -p 111 |
|
|
|
suppressPackageStartupMessages(library("optparse")) |
|
|
|
suppressPackageStartupMessages(library("data.table")) |
|
|
|
|
|
|
|
|
|
|
|
# specify our desired options in a list |
|
|
|
# by default OptionParser will add an help option equivalent to |
|
|
|
# make_option(c("-h", "--help"), action="store_true", default=FALSE, |
|
|
|
# help="Show this help message and exit") |
|
|
|
|
|
|
|
option_list <- list( |
|
|
|
make_option(c("-o", "--out_dir"), type="character",default="./", |
|
|
|
help="The output directory [default ./]"), |
|
|
|
make_option(c("-i", "--input_dir"),type="character", default="./", |
|
|
|
help="The directory input of expression files. It is output from stringtie software named as \".gene.abundance.txt\"."), |
|
|
|
make_option(c("-f", "--floor_value"),metavar="number",default=0.01, |
|
|
|
help="A number to add to each value before log2 transformation to avoid infinite value.[default: 0.01]"), |
|
|
|
make_option(c("-l", "--log2_norm"), metavar="TRUE", default=TRUE, |
|
|
|
help="Perform log2 transformation on FPKM/TPM value. [default: TRUE]"), |
|
|
|
make_option(c("-s", "--sample_name"),type="character", default=NULL, |
|
|
|
help="File in tab-delimited format for sample name if usr want to rename sample name. The input file containing sample name as file name and sample name to be renamed."), |
|
|
|
make_option(c("-p", "--project_code"), type="character",default="rnaseq", |
|
|
|
help="Project code, which is used as prefix of output file. [default: rnaseq]") |
|
|
|
) |
|
|
|
|
|
|
|
# get command line options, if help option encountered print help and exit, |
|
|
|
# otherwise if options not found on command line then set defaults, |
|
|
|
opt <- parse_args(OptionParser(option_list=option_list)) |
|
|
|
|
|
|
|
#modify dir input |
|
|
|
in_dir<-paste(gsub("/$","",opt$input_dir),"/",sep="") |
|
|
|
out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="") |
|
|
|
|
|
|
|
#read gene.abundance.txt files into |
|
|
|
if(length(grep("gene.abundance.txt",dir(in_dir)))==0){ |
|
|
|
stop("Cannot find *gene.abundance.txt files in the working folder. Exit!", call.=FALSE) |
|
|
|
} |
|
|
|
|
|
|
|
if(length(grep("gene.abundance.txt",dir(in_dir)))==1){ |
|
|
|
stop("Only one *gene.abundance.txt files in the working folder. Exit!", call.=FALSE) |
|
|
|
} |
|
|
|
|
|
|
|
genefile<-dir(in_dir)[grep("gene.abundance.txt",dir(in_dir))] |
|
|
|
message(paste("Detect ",length(genefile)," files named as *gene.abundance.txt. \nMerging. ",sep="")) |
|
|
|
|
|
|
|
#read first one |
|
|
|
eval(parse(text =paste("a<-fread(\"",genefile[1],"\",header=T,sep=\"\\t\",stringsAsFactors=F,check.names = F,data.table=F)",sep=""))) |
|
|
|
atpm<-tapply(a$TPM,as.factor(a$"Gene ID"),mean) |
|
|
|
|
|
|
|
expr_fpkm<-matrix(0,ncol=length(genefile),nrow=length(atpm)) |
|
|
|
expr_tpm<-matrix(0,ncol=length(genefile),nrow=length(atpm)) |
|
|
|
rownames(expr_fpkm)<-names(atpm) |
|
|
|
rownames(expr_tpm)<-names(atpm) |
|
|
|
#merge gene file |
|
|
|
for (i in 1:length(genefile)){ |
|
|
|
eval(parse(text =paste("a<-fread(\"",genefile[i],"\",header=T,sep=\"\\t\",stringsAsFactors=F,check.names = F,data.table=F)",sep=""))) |
|
|
|
atpm<-tapply(a$TPM,as.factor(a$"Gene ID"),mean) |
|
|
|
afpkm<-tapply(a$FPKM,as.factor(a$"Gene ID"),mean) |
|
|
|
expr_tpm[,i]<-atpm[match(rownames(expr_tpm),names(atpm))] |
|
|
|
expr_fpkm[,i]<-afpkm[match(rownames(expr_fpkm),names(afpkm))] |
|
|
|
message(paste("Merge ", i, "/",length(genefile)," gene.abundance.txt files ",sep="")) |
|
|
|
} |
|
|
|
|
|
|
|
#colnames |
|
|
|
#remove _1P.gene.abundance.txt from colnames, _1P is from alicloud app |
|
|
|
|
|
|
|
if (is.null(opt$sample_name)){ |
|
|
|
samplename<-gsub("_1P.gene.abundance.txt","",genefile) |
|
|
|
message("Sample name is not specified. Using file names instead.") |
|
|
|
}else{ |
|
|
|
sample_name<-read.table(opt$sample_name,sep="\t",header=T,stringsAsFactors=F,check.names=F) |
|
|
|
samplename<-gsub("_1P.gene.abundance.txt","",genefile) |
|
|
|
sample_name[,1]<-gsub(".gene.abundance.txt","",sample_name[,1]) |
|
|
|
sample_name[,1]<-gsub("_1P$","",sample_name[,1]) |
|
|
|
samplename<-sample_name[match(samplename,sample_name[,1]),2] |
|
|
|
} |
|
|
|
colnames(expr_tpm)<-samplename |
|
|
|
colnames(expr_fpkm)<-samplename |
|
|
|
|
|
|
|
|
|
|
|
if(opt$log2_norm==TRUE){ |
|
|
|
message("start log2 transformation") |
|
|
|
#tpm |
|
|
|
logexpr_tpm<-apply(expr_tpm,2,function(x){log2(x+as.numeric(opt$floor_value))}) |
|
|
|
logexpr_tpm_out<-cbind(rownames(logexpr_tpm),round(logexpr_tpm,3)) |
|
|
|
colnames(logexpr_tpm_out)[1]<-"Gene" |
|
|
|
write.table(logexpr_tpm_out,file = paste(out_dir,opt$project_code,"_geneexp_log2TPM.txt",sep=""),sep="\t",row.names=F,quote=F) |
|
|
|
#fpkm |
|
|
|
logexpr_fpkm<-apply(expr_fpkm,2,function(x){log2(x+as.numeric(opt$floor_value))}) |
|
|
|
logexpr_fpkm_out<-cbind(rownames(logexpr_fpkm),round(logexpr_fpkm,3)) |
|
|
|
colnames(logexpr_fpkm_out)[1]<-"Gene" |
|
|
|
write.table(logexpr_fpkm_out,file = paste(out_dir,opt$project_code,"_geneexp_log2FPKM.txt",sep=""),sep="\t",row.names=F,quote=F) |
|
|
|
message("Write log2 TPM and FPKM expression file.") |
|
|
|
}else{ |
|
|
|
#output expression file |
|
|
|
#tpm |
|
|
|
expr_tpm_out<-cbind(rownames(expr_tpm),round(expr_tpm,3)) |
|
|
|
colnames(expr_tpm_out)[1]<-"Gene" |
|
|
|
write.table(expr_tpm_out,file = paste(out_dir,opt$project_code,"_geneexp_TPM.txt",sep=""),sep="\t",row.names=F,quote=F) |
|
|
|
#fpkm |
|
|
|
expr_fpkm_out<-cbind(rownames(expr_fpkm),round(expr_fpkm,3)) |
|
|
|
colnames(expr_fpkm_out)[1]<-"Gene" |
|
|
|
write.table(expr_fpkm_out,file = paste(out_dir,opt$project_code,"_geneexp_FPKM.txt",sep=""),sep="\t",row.names=F,quote=F) |
|
|
|
message("Write TPM and FPKM expression file.") |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|