| @@ -17,11 +17,9 @@ option_list <- list( | |||
| help="The input expression files. required!"), | |||
| make_option(c("-e", "--type_gene_id"),type="character", default="EnsemblID", | |||
| help="The type of gene symbol. Could be either of EnsemblID/EntrezID/GeneSymbol [default: EnsemblID]"), | |||
| make_option(c("-b", "--pre_lowexpr_filtered"), metavar="FALSE",default=FALSE, | |||
| help="Where pre-filterd low expressed genes. [default: FALSE]"), | |||
| make_option(c("-s", "--sex_genes"),type="character", default="./sexgenelist.txt", | |||
| help="File in tab-delimited format sex gene list with EnsemblID/EntrezID/GeneSymbol. [default: ./sexgenelist.txt ]"), | |||
| make_option(c("-p", "--project_code"), type="character",default="rnaseq", | |||
| make_option(c("-p", "--project_code"), type="character",default="rnaseq", | |||
| help="Project code, which is used as prefix of output file. [default: rnaseq]") | |||
| ) | |||
| @@ -29,6 +27,8 @@ option_list <- list( | |||
| # otherwise if options not found on command line then set defaults, | |||
| opt <- parse_args(OptionParser(option_list=option_list)) | |||
| message("NOTE: If low expresssed genes were filtered, this analysis might be successful. ") | |||
| #pre analysis | |||
| if (is.null(opt$input)){ | |||
| print_help(opt_parser) | |||
| @@ -50,42 +50,49 @@ sexgene<-read.delim(opt$sex_genes,header=T,stringsAsFactors=F,check.names=F) | |||
| # | |||
| if(grepl("Ensembl",opt$type_gene_id,ignore.case=T)){ | |||
| sexgenelist<-as.character(sexgene$EnsemblID) | |||
| message("The type of gene symbol is set: EnsembleID. Could be either of EnsemblID/EntrezID/GeneSymbol.") | |||
| malegene<-as.character(sexgene$EnsemblID[grep("^M",sexgene$SexSpecific,ignore.case=T)]) | |||
| femalegene<-as.character(sexgene$EnsemblID[grep("^F",sexgene$SexSpecific,ignore.case=T)]) | |||
| } | |||
| if(grepl("Entrez",opt$type_gene_id,ignore.case=T)){ | |||
| sexgenelist<-as.character(sexgene$EntrezID) | |||
| message("The type of gene symbol is set: EntrezID. Could be either of EnsemblID/EntrezID/GeneSymbol.") | |||
| malegene<-as.character(sexgene$EntrezID[grep("^M",sexgene$SexSpecific,ignore.case=T)]) | |||
| femalegene<-as.character(sexgene$EntrezID[grep("^F",sexgene$SexSpecific,ignore.case=T)]) | |||
| } | |||
| if(grepl("Symbol",opt$type_gene_id,ignore.case=T)){ | |||
| sexgenelist<-as.character(sexgene$GeneSymbol) | |||
| message("The type of gene symbol is set: GeneSymbol. Could be either of EnsemblID/EntrezID/GeneSymbol.") | |||
| malegene<-as.character(sexgene$GeneSymbol[grep("^M",sexgene$SexSpecific,ignore.case=T)]) | |||
| femalegene<-as.character(sexgene$GeneSymbol[grep("^F",sexgene$SexSpecific,ignore.case=T)]) | |||
| } | |||
| sexexpr<-logexpr[rownames(logexpr) %in% sexgenelist, ] | |||
| if(nrow(sexexpr)<=(length(sexgenelist)/2)){ | |||
| stop("Not sufficent expression profile sex specific genes were detected for sex prediction. Please check rowname is matched with type_gene_id in the command.", call.=FALSE) | |||
| } | |||
| male_expr<-logexpr[rownames(logexpr) %in% malegene, ] | |||
| female_expr<-logexpr[rownames(logexpr) %in% femalegene, ] | |||
| #get median value without | |||
| #if pre_lowexpr_filtered = FALSE, remove not expressed values before obtaining median value | |||
| if(nrow(male_expr)<(length(malegene))){ | |||
| stop("Not sufficent expression profile male-specific genes were detected for sex prediction. Please check rowname is matched with type_gene_id in the command.", call.=FALSE) | |||
| } | |||
| if (opt$pre_lowexpr_filtered){ | |||
| medians<-apply(logexpr,2,median) | |||
| }else{ | |||
| minvalue<- min(as.numeric(logexpr)[!is.na(as.numeric(logexpr))]) | |||
| medians<-apply(logexpr,2,function(x){median(x[which(x> minvalue)])}) | |||
| if(nrow(female_expr)<(length(female_expr))){ | |||
| stop("Not sufficent expression profile female-specific genes were detected for sex prediction. Please check rowname is matched with type_gene_id in the command.", call.=FALSE) | |||
| } | |||
| s<-c() | |||
| male_expr_t<-t(male_expr) | |||
| female_expr_t<-t(female_expr) | |||
| for (i in 1:nrow(female_expr)){ | |||
| s<-cbind(s,male_expr_t-female_expr_t[,i]) | |||
| } | |||
| #sexexpr1<-sexexpr[apply(sexexpr,1,function(x){length(which(x<( -6)))}<13),] | |||
| sexpredict<-data.frame( | |||
| SampleID=colnames(logexpr), | |||
| Sex=apply(s,1,function(x){ifelse(length(which(x<0))>=8,"Female","Male")})) | |||
| rownames(sexpredict)<-c(1:nrow(sexpredict)) | |||
| #if all of the genes expressed lower than median: female, else male | |||
| sexpredict<-ifelse(rowSums(apply(sexexpr,1,function(x){ifelse(x-medians>0,1,0)}))>2,"Male","Female") | |||
| sexpredict_tab<-data.frame( | |||
| Sample=names(sexpredict), | |||
| Sex=sexpredict | |||
| ) | |||
| rownames(sexpredict_tab)<-c(1:nrow(sexpredict_tab)) | |||
| write.csv(sexpredict,paste(out_dir,opt$project_code,"_sexpredict.csv",sep=""),quote=F,row.names=F) | |||
| saveRDS(sexpredict,paste(out_dir,opt$project_code,"_sexpredict.rds",sep="")) | |||
| write.csv(sexpredict_tab,paste(out_dir,opt$project_code,"_sexpredict.csv",sep=""),quote=F,row.names=F) | |||
| saveRDS(sexpredict_tab,paste(out_dir,opt$project_code,"_sexpredict.rds",sep="")) | |||
| message("RNAseq_sexcheck.R finished!") | |||