Pārlūkot izejas kodu

上传文件至 '.'

master
yingyu pirms 6 gadiem
vecāks
revīzija
999a7d950f
2 mainītis faili ar 185 papildinājumiem un 0 dzēšanām
  1. +73
    -0
      RNAseq_1_expr_ballgown.R
  2. +112
    -0
      RNAseq_1_expr_stringtie.R

+ 73
- 0
RNAseq_1_expr_ballgown.R Parādīt failu

@@ -0,0 +1,73 @@
#!/usr/bin/env Rscript
# example:
# Rscript RNAseq_1_ballgown.R -o /home/yuying/rnaseqreport_test -i ./ballgown/ -l FALSE -p test
# Rscript RNAseq_1_ballgown.R -o /home/yuying/rnaseqreport_test -i ./ballgown/


suppressPackageStartupMessages(library("optparse"))
suppressPackageStartupMessages(library("ballgown"))

# specify our desired options in a list
# by default OptionParser will add an help option equivalent to
# make_option(c("-h", "--help"), action="store_true", default=FALSE,
# help="Show this help message and exit")

option_list <- list(
make_option(c("-o", "--out_dir"), type="character",default="./",
help="The output directory [default ./]"),
make_option(c("-i", "--input"),type="character", default=NULL,
help="The directory input of expression files. It is output from ballgown software."),
make_option(c("-f", "--floor_value"),metavar="number",default=0.01,
help="A number to add to each value before log2 transformation to avoid infinite value.[default: 0.01]"),
make_option(c("-l", "--log2_norm"), metavar="TRUE", default=TRUE,
help="Perform log2 transformation on FPKM value. [default: TRUE]"),
make_option(c("-p", "--project_code"), type="character",default="rnaseq",
help="Project code, which is used as prefix of output file. [default: rnaseq]")
)

# get command line options, if help option encountered print help and exit,
# otherwise if options not found on command line then set defaults,
opt <- parse_args(OptionParser(option_list=option_list))

if (is.null(opt$input)){
print_help(opt_parser)
stop("At least one argument must be supplied (input file).", call.=FALSE)
}

#generate FPKM expression profile from ballgown outputs
geballgown_expr <- ballgown(dataDir = opt$input ,samplePattern = ".*",meas = "all")
expr <- gexpr(geballgown_expr)
message("finish ballgown\n")

#remove _1P and FPKM from colnames, _1P is from alicloud app, FPKM is added due to default output of stringtie/ballgown.
nam<-colnames(expr)
nam<-gsub("_1P$","",nam)
nam<-gsub("^FPKM.","",nam)
colnames(expr) <- nam

out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")

if(opt$log2_norm==TRUE){
message("start log2 transformation\n")

logexpr<-apply(expr,2,function(x){log2(x+as.numeric(opt$floor_value))})
logexpr_out<-cbind(rownames(logexpr),round(logexpr,3))
colnames(logexpr_out)[1]<-"Gene"

message("output log2 expression file\n")

write.table(logexpr_out,file = paste(out_dir,opt$project_code,"_geneexp_log2fpkm_floor0p01_c",ncol(logexpr),"r",nrow(logexpr),"_",Sys.Date(),".txt",sep=""),sep="\t",row.names=F,quote=F)
}else{

#output expression file with fpkm
expr<-cbind(rownames(expr),round(expr,3))
colnames(expr)[1]<-"Gene"

message("output fpkm expression file\n")

write.table(expr,file = paste(out_dir,opt$project_code,"_geneexp_fpkm_c",ncol(expr),"r",nrow(expr),"_",Sys.Date(),".txt",sep=""),sep="\t",row.names=F,quote=F)
}





+ 112
- 0
RNAseq_1_expr_stringtie.R Parādīt failu

@@ -0,0 +1,112 @@
#!/usr/bin/env Rscript
# example:
# Rscript RNAseq_1_expr_stringtieout.R
#Rscript RNAseq_1_expr_stringtieout.R -o ../ -l FALSE -s samplenames.txt -p 111
suppressPackageStartupMessages(library("optparse"))
suppressPackageStartupMessages(library("data.table"))


# specify our desired options in a list
# by default OptionParser will add an help option equivalent to
# make_option(c("-h", "--help"), action="store_true", default=FALSE,
# help="Show this help message and exit")

option_list <- list(
make_option(c("-o", "--out_dir"), type="character",default="./",
help="The output directory [default ./]"),
make_option(c("-i", "--input_dir"),type="character", default="./",
help="The directory input of expression files. It is output from stringtie software named as \".gene.abundance.txt\"."),
make_option(c("-f", "--floor_value"),metavar="number",default=0.01,
help="A number to add to each value before log2 transformation to avoid infinite value.[default: 0.01]"),
make_option(c("-l", "--log2_norm"), metavar="TRUE", default=TRUE,
help="Perform log2 transformation on FPKM/TPM value. [default: TRUE]"),
make_option(c("-s", "--sample_name"),type="character", default=NULL,
help="File in tab-delimited format for sample name if usr want to rename sample name. The input file containing sample name as file name and sample name to be renamed."),
make_option(c("-p", "--project_code"), type="character",default="rnaseq",
help="Project code, which is used as prefix of output file. [default: rnaseq]")
)

# get command line options, if help option encountered print help and exit,
# otherwise if options not found on command line then set defaults,
opt <- parse_args(OptionParser(option_list=option_list))

#modify dir input
in_dir<-paste(gsub("/$","",opt$input_dir),"/",sep="")
out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")

#read gene.abundance.txt files into
if(length(grep("gene.abundance.txt",dir(in_dir)))==0){
stop("Cannot find *gene.abundance.txt files in the working folder. Exit!", call.=FALSE)
}

if(length(grep("gene.abundance.txt",dir(in_dir)))==1){
stop("Only one *gene.abundance.txt files in the working folder. Exit!", call.=FALSE)
}

genefile<-dir(in_dir)[grep("gene.abundance.txt",dir(in_dir))]
message(paste("Detect ",length(genefile)," files named as *gene.abundance.txt. \nMerging. ",sep=""))

#read first one
eval(parse(text =paste("a<-fread(\"",genefile[1],"\",header=T,sep=\"\\t\",stringsAsFactors=F,check.names = F,data.table=F)",sep="")))
atpm<-tapply(a$TPM,as.factor(a$"Gene ID"),mean)

expr_fpkm<-matrix(0,ncol=length(genefile),nrow=length(atpm))
expr_tpm<-matrix(0,ncol=length(genefile),nrow=length(atpm))
rownames(expr_fpkm)<-names(atpm)
rownames(expr_tpm)<-names(atpm)
#merge gene file
for (i in 1:length(genefile)){
eval(parse(text =paste("a<-fread(\"",genefile[i],"\",header=T,sep=\"\\t\",stringsAsFactors=F,check.names = F,data.table=F)",sep="")))
atpm<-tapply(a$TPM,as.factor(a$"Gene ID"),mean)
afpkm<-tapply(a$FPKM,as.factor(a$"Gene ID"),mean)
expr_tpm[,i]<-atpm[match(rownames(expr_tpm),names(atpm))]
expr_fpkm[,i]<-afpkm[match(rownames(expr_fpkm),names(afpkm))]
message(paste("Merge ", i, "/",length(genefile)," gene.abundance.txt files ",sep=""))
}

#colnames
#remove _1P.gene.abundance.txt from colnames, _1P is from alicloud app

if (is.null(opt$sample_name)){
samplename<-gsub("_1P.gene.abundance.txt","",genefile)
message("Sample name is not specified. Using file names instead.")
}else{
sample_name<-read.table(opt$sample_name,sep="\t",header=T,stringsAsFactors=F,check.names=F)
samplename<-gsub("_1P.gene.abundance.txt","",genefile)
sample_name[,1]<-gsub(".gene.abundance.txt","",sample_name[,1])
sample_name[,1]<-gsub("_1P$","",sample_name[,1])
samplename<-sample_name[match(samplename,sample_name[,1]),2]
}
colnames(expr_tpm)<-samplename
colnames(expr_fpkm)<-samplename


if(opt$log2_norm==TRUE){
message("start log2 transformation")
#tpm
logexpr_tpm<-apply(expr_tpm,2,function(x){log2(x+as.numeric(opt$floor_value))})
logexpr_tpm_out<-cbind(rownames(logexpr_tpm),round(logexpr_tpm,3))
colnames(logexpr_tpm_out)[1]<-"Gene"
write.table(logexpr_tpm_out,file = paste(out_dir,opt$project_code,"_geneexp_log2TPM.txt",sep=""),sep="\t",row.names=F,quote=F)
#fpkm
logexpr_fpkm<-apply(expr_fpkm,2,function(x){log2(x+as.numeric(opt$floor_value))})
logexpr_fpkm_out<-cbind(rownames(logexpr_fpkm),round(logexpr_fpkm,3))
colnames(logexpr_fpkm_out)[1]<-"Gene"
write.table(logexpr_fpkm_out,file = paste(out_dir,opt$project_code,"_geneexp_log2FPKM.txt",sep=""),sep="\t",row.names=F,quote=F)
message("Write log2 TPM and FPKM expression file.")
}else{
#output expression file
#tpm
expr_tpm_out<-cbind(rownames(expr_tpm),round(expr_tpm,3))
colnames(expr_tpm_out)[1]<-"Gene"
write.table(expr_tpm_out,file = paste(out_dir,opt$project_code,"_geneexp_TPM.txt",sep=""),sep="\t",row.names=F,quote=F)
#fpkm
expr_fpkm_out<-cbind(rownames(expr_fpkm),round(expr_fpkm,3))
colnames(expr_fpkm_out)[1]<-"Gene"
write.table(expr_fpkm_out,file = paste(out_dir,opt$project_code,"_geneexp_FPKM.txt",sep=""),sep="\t",row.names=F,quote=F)
message("Write TPM and FPKM expression file.")
}





Notiek ielāde…
Atcelt
Saglabāt