#!/usr/bin/env Rscript # example: # Rscript RNAseq_sexcheck.R -i geneexp_log2fpkm.txt # Rscript RNAseq_sexcheck.R -i FUSCCTNBC_RNAseqShi.Complete_log2_448x45308_V15_190209.txt -p 111 -s ./sexgenelist.txt -e GeneSymbol suppressPackageStartupMessages(library("optparse")) # specify our desired options in a list # by default OptionParser will add an help option equivalent to # make_option(c("-h", "--help"), action="store_true", default=FALSE, # help="Show this help message and exit") option_list <- list( make_option(c("-o", "--out_dir"), type="character",default="./", help="The output directory [default ./]"), make_option(c("-i", "--input"),type="character", default=NULL, help="The input expression files. required!"), make_option(c("-e", "--type_gene_id"),type="character", default="EnsemblID", help="The type of gene symbol. Could be either of EnsemblID/EntrezID/GeneSymbol [default: EnsemblID]"), make_option(c("-s", "--sex_genes"),type="character", default="./sexgenelist.txt", help="File in tab-delimited format sex gene list with EnsemblID/EntrezID/GeneSymbol. [default: ./sexgenelist.txt ]"), make_option(c("-p", "--project_code"), type="character",default="rnaseq", help="Project code, which is used as prefix of output file. [default: rnaseq]") ) # get command line options, if help option encountered print help and exit, # otherwise if options not found on command line then set defaults, opt <- parse_args(OptionParser(option_list=option_list)) message("NOTE: If low expresssed genes were filtered, this analysis might be successful. ") #pre analysis if (is.null(opt$input)){ print_help(opt_parser) stop("At least one argument must be supplied (input file).", call.=FALSE) } ##import exp file out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="") logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F) #check exp file is log scale if(max(logexpr[,1])-min(logexpr[,1])>100){ stop("sex check anlaysis should be conducted based on expression profile on log scale.", call.=FALSE) } ####import sex gene list sexgene<-read.delim(opt$sex_genes,header=T,stringsAsFactors=F,check.names=F) # if(grepl("Ensembl",opt$type_gene_id,ignore.case=T)){ message("The type of gene symbol is set: EnsembleID. Could be either of EnsemblID/EntrezID/GeneSymbol.") malegene<-as.character(sexgene$EnsemblID[grep("^M",sexgene$SexSpecific,ignore.case=T)]) femalegene<-as.character(sexgene$EnsemblID[grep("^F",sexgene$SexSpecific,ignore.case=T)]) } if(grepl("Entrez",opt$type_gene_id,ignore.case=T)){ message("The type of gene symbol is set: EntrezID. Could be either of EnsemblID/EntrezID/GeneSymbol.") malegene<-as.character(sexgene$EntrezID[grep("^M",sexgene$SexSpecific,ignore.case=T)]) femalegene<-as.character(sexgene$EntrezID[grep("^F",sexgene$SexSpecific,ignore.case=T)]) } if(grepl("Symbol",opt$type_gene_id,ignore.case=T)){ message("The type of gene symbol is set: GeneSymbol. Could be either of EnsemblID/EntrezID/GeneSymbol.") malegene<-as.character(sexgene$GeneSymbol[grep("^M",sexgene$SexSpecific,ignore.case=T)]) femalegene<-as.character(sexgene$GeneSymbol[grep("^F",sexgene$SexSpecific,ignore.case=T)]) } male_expr<-logexpr[rownames(logexpr) %in% malegene, ] female_expr<-logexpr[rownames(logexpr) %in% femalegene, ] if(nrow(male_expr)<(length(malegene))){ stop("Not sufficent expression profile male-specific genes were detected for sex prediction. Please check rowname is matched with type_gene_id in the command.", call.=FALSE) } if(nrow(female_expr)<(length(female_expr))){ stop("Not sufficent expression profile female-specific genes were detected for sex prediction. Please check rowname is matched with type_gene_id in the command.", call.=FALSE) } s<-c() male_expr_t<-t(male_expr) female_expr_t<-t(female_expr) for (i in 1:nrow(female_expr)){ s<-cbind(s,male_expr_t-female_expr_t[,i]) } sexpredict<-data.frame( SampleID=colnames(logexpr), Sex=apply(s,1,function(x){ifelse(length(which(x<0))>=8,"Female","Male")})) rownames(sexpredict)<-c(1:nrow(sexpredict)) write.csv(sexpredict,paste(out_dir,opt$project_code,"_sexpredict.csv",sep=""),quote=F,row.names=F) saveRDS(sexpredict,paste(out_dir,opt$project_code,"_sexpredict.rds",sep="")) message("RNAseq_sexcheck.R finished!")