#!/usr/bin/env Rscript
###Copyright 2019 Ying Yu from Fudan-PGx group 
# example:
# Rscript RNAseq_2_pca.R -o /home/yuying/rnaseqreport_test -i ballgown_geneexp_log2fpkm_floor0p01_c3r58395_2019-04-29.txt  -g group1.txt -p organoid 

suppressPackageStartupMessages(library("optparse"))

# specify our desired options in a list
# by default OptionParser will add an help option equivalent to 
# make_option(c("-h", "--help"), action="store_true", default=FALSE, 
#               help="Show this help message and exit")

option_list <- list( 
    make_option(c("-o", "--out_dir"), type="character",default="./",
        help="The output directory [default ./]"),
    make_option(c("-i", "--input"),type="character", default=NULL,
        help="The input expression files. required!"),
    make_option(c("-g", "--sample_group"),type="character",  default=NULL,
        help="File for sample group infomation. The input file containing sample name and group infomation. note colname must be like: sample	group1	group2... "),
   	make_option(c("-p", "--project_code"), type="character",default="rnaseq",
        help="Project code, which is used as prefix of output file. [default: rnaseq]")
		)

# get command line options, if help option encountered print help and exit,
# otherwise if options not found on command line then set defaults, 
opt <- parse_args(OptionParser(option_list=option_list))

if (is.null(opt$input)){
  print_help(opt_parser)
  stop("At least one argument must be supplied (input file).", call.=FALSE)
}

##import exp file
out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")
logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1)

#check exp file is log scale
if(max(logexpr[,1])-min(logexpr[,1])>100){
  stop("PCA anlaysis shoulc be conducted based on expression profile on log scale.", call.=FALSE)
}
#####################
##########PCA #######
##calculate pca
pc.cr<-prcomp(t(logexpr),retx = TRUE)
pca<-pc.cr$x
pca<-data.frame(pca)
pca$sample<-rownames(pca)
pcanew<-pca
message("PCA finished.")
####finished PCA

####add group infomaiton if imort

if (is.null(opt$sample_group)){
  message("Warning: no group sample file. PCA will not be able to colored by group.")
}else{
sample_group<-read.table(opt$sample_group,sep="\t",header=T)


if(length(grep("group",colnames(sample_group)))==0){
message("No group is identified in sample_group file. Make sure the head of sample_group file is like sample, group1, group2.")
}else{
groupn<-grep("group",colnames(sample_group))
for ( i in groupn){
pcanew<- cbind(pcanew,sample_group[match(pca$sample,sample_group$sample),i])
}
colnames(pcanew)[c((ncol(pca)+1):ncol(pcanew))]<-colnames(sample_group)[groupn]
}
}

#write output
write.csv(pcanew,paste(out_dir,opt$project_code,"_pca.csv",sep=""))
saveRDS(pcanew,paste(out_dir,opt$project_code,"_pca.rds",sep=""))
########