|
|
@@ -1,91 +1,91 @@ |
|
|
|
#!/usr/bin/env Rscript |
|
|
|
# example: |
|
|
|
# Rscript RNAseq_sexcheck.R -i geneexp_log2fpkm.txt |
|
|
|
# Rscript RNAseq_sexcheck.R -i FUSCCTNBC_RNAseqShi.Complete_log2_448x45308_V15_190209.txt -p 111 -s ./sexgenelist.txt -e GeneSymbol |
|
|
|
|
|
|
|
suppressPackageStartupMessages(library("optparse")) |
|
|
|
|
|
|
|
# specify our desired options in a list |
|
|
|
# by default OptionParser will add an help option equivalent to |
|
|
|
# make_option(c("-h", "--help"), action="store_true", default=FALSE, |
|
|
|
# help="Show this help message and exit") |
|
|
|
|
|
|
|
option_list <- list( |
|
|
|
make_option(c("-o", "--out_dir"), type="character",default="./", |
|
|
|
help="The output directory [default ./]"), |
|
|
|
make_option(c("-i", "--input"),type="character", default=NULL, |
|
|
|
help="The input expression files. required!"), |
|
|
|
make_option(c("-e", "--type_gene_id"),type="character", default="EnsemblID", |
|
|
|
help="The type of gene symbol. Could be either of EnsemblID/EntrezID/GeneSymbol [default: EnsemblID]"), |
|
|
|
make_option(c("-b", "--pre_lowexpr_filtered"), metavar="FALSE",default=FALSE, |
|
|
|
help="Whether pre-filterd low expressed genes in the input file or not. [default: FALSE]"), |
|
|
|
make_option(c("-s", "--sex_genes"),type="character", default="./sexgenelist.txt", |
|
|
|
help="File in tab-delimited format sex gene list with EnsemblID/EntrezID/GeneSymbol. [default: ./sexgenelist.txt ]"), |
|
|
|
make_option(c("-p", "--project_code"), type="character",default="rnaseq", |
|
|
|
help="Project code, which is used as prefix of output file. [default: rnaseq]") |
|
|
|
) |
|
|
|
|
|
|
|
# get command line options, if help option encountered print help and exit, |
|
|
|
# otherwise if options not found on command line then set defaults, |
|
|
|
opt <- parse_args(OptionParser(option_list=option_list)) |
|
|
|
|
|
|
|
#pre analysis |
|
|
|
if (is.null(opt$input)){ |
|
|
|
print_help(opt_parser) |
|
|
|
stop("At least one argument must be supplied (input file).", call.=FALSE) |
|
|
|
} |
|
|
|
|
|
|
|
##import exp file |
|
|
|
out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="") |
|
|
|
|
|
|
|
logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F) |
|
|
|
|
|
|
|
#check exp file is log scale |
|
|
|
if(max(logexpr[,1])-min(logexpr[,1])>100){ |
|
|
|
stop("sex check anlaysis should be conducted based on expression profile on log scale.", call.=FALSE) |
|
|
|
} |
|
|
|
|
|
|
|
####import sex gene list |
|
|
|
sexgene<-read.delim(opt$sex_genes,header=T,stringsAsFactors=F,check.names=F) |
|
|
|
|
|
|
|
# |
|
|
|
if(grepl("Ensembl",opt$type_gene_id,ignore.case=T)){ |
|
|
|
sexgenelist<-as.character(sexgene$EnsemblID) |
|
|
|
} |
|
|
|
if(grepl("Entrez",opt$type_gene_id,ignore.case=T)){ |
|
|
|
sexgenelist<-as.character(sexgene$EntrezID) |
|
|
|
} |
|
|
|
if(grepl("Symbol",opt$type_gene_id,ignore.case=T)){ |
|
|
|
sexgenelist<-as.character(sexgene$GeneSymbol) |
|
|
|
} |
|
|
|
|
|
|
|
sexexpr<-logexpr[rownames(logexpr) %in% sexgenelist, ] |
|
|
|
|
|
|
|
if(nrow(sexexpr)<=(length(sexgenelist)/2)){ |
|
|
|
stop("Not sufficent expression profile sex specific genes were detected for sex prediction. Please check rowname is matched with type_gene_id in the command.", call.=FALSE) |
|
|
|
} |
|
|
|
|
|
|
|
#get median value without |
|
|
|
#if pre_lowexpr_filtered = FALSE, remove not expressed values before obtaining median value |
|
|
|
|
|
|
|
if (opt$pre_lowexpr_filtered){ |
|
|
|
medians<-apply(logexpr,2,median) |
|
|
|
}else{ |
|
|
|
minvalue<- min(logexpr) |
|
|
|
medians<-apply(logexpr,2,function(x){median(x[which(x> minvalue)])}) |
|
|
|
} |
|
|
|
|
|
|
|
#sexexpr1<-sexexpr[apply(sexexpr,1,function(x){length(which(x<( -6)))}<13),] |
|
|
|
|
|
|
|
#if all of the genes expressed lower than median: female, else male |
|
|
|
sexpredict<-ifelse(rowSums(apply(sexexpr,1,function(x){ifelse(x-medians>0,1,0)}))>0,"Male","Female") |
|
|
|
sexpredict_tab<-data.frame( |
|
|
|
Sample=names(sexpredict), |
|
|
|
Sex=sexpredict |
|
|
|
) |
|
|
|
rownames(sexpredict_tab)<-c(1:nrow(sexpredict_tab)) |
|
|
|
|
|
|
|
write.csv(sexpredict_tab,paste(out_dir,opt$project_code,"_sexpredict.csv",sep=""),quote=F,row.names=F) |
|
|
|
saveRDS(sexpredict_tab,paste(out_dir,opt$project_code,"_sexpredict.rds",sep="")) |
|
|
|
message("RNAseq_sexcheck.R finished!") |
|
|
|
|
|
|
|
#!/usr/bin/env Rscript
|
|
|
|
# example:
|
|
|
|
# Rscript RNAseq_sexcheck.R -i geneexp_log2fpkm.txt
|
|
|
|
# Rscript RNAseq_sexcheck.R -i FUSCCTNBC_RNAseqShi.Complete_log2_448x45308_V15_190209.txt -p 111 -s ./sexgenelist.txt -e GeneSymbol
|
|
|
|
|
|
|
|
suppressPackageStartupMessages(library("optparse"))
|
|
|
|
|
|
|
|
# specify our desired options in a list
|
|
|
|
# by default OptionParser will add an help option equivalent to
|
|
|
|
# make_option(c("-h", "--help"), action="store_true", default=FALSE,
|
|
|
|
# help="Show this help message and exit")
|
|
|
|
|
|
|
|
option_list <- list(
|
|
|
|
make_option(c("-o", "--out_dir"), type="character",default="./",
|
|
|
|
help="The output directory [default ./]"),
|
|
|
|
make_option(c("-i", "--input"),type="character", default=NULL,
|
|
|
|
help="The input expression files. required!"),
|
|
|
|
make_option(c("-e", "--type_gene_id"),type="character", default="EnsemblID",
|
|
|
|
help="The type of gene symbol. Could be either of EnsemblID/EntrezID/GeneSymbol [default: EnsemblID]"),
|
|
|
|
make_option(c("-b", "--pre_lowexpr_filtered"), metavar="FALSE",default=FALSE,
|
|
|
|
help="Where pre-filterd low expressed genes. [default: FALSE]"),
|
|
|
|
make_option(c("-s", "--sex_genes"),type="character", default="./sexgenelist.txt",
|
|
|
|
help="File in tab-delimited format sex gene list with EnsemblID/EntrezID/GeneSymbol. [default: ./sexgenelist.txt ]"),
|
|
|
|
make_option(c("-p", "--project_code"), type="character",default="rnaseq",
|
|
|
|
help="Project code, which is used as prefix of output file. [default: rnaseq]")
|
|
|
|
)
|
|
|
|
|
|
|
|
# get command line options, if help option encountered print help and exit,
|
|
|
|
# otherwise if options not found on command line then set defaults,
|
|
|
|
opt <- parse_args(OptionParser(option_list=option_list))
|
|
|
|
|
|
|
|
#pre analysis
|
|
|
|
if (is.null(opt$input)){
|
|
|
|
print_help(opt_parser)
|
|
|
|
stop("At least one argument must be supplied (input file).", call.=FALSE)
|
|
|
|
}
|
|
|
|
|
|
|
|
##import exp file
|
|
|
|
out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")
|
|
|
|
|
|
|
|
logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F)
|
|
|
|
|
|
|
|
#check exp file is log scale
|
|
|
|
if(max(logexpr[,1])-min(logexpr[,1])>100){
|
|
|
|
stop("sex check anlaysis should be conducted based on expression profile on log scale.", call.=FALSE)
|
|
|
|
}
|
|
|
|
|
|
|
|
####import sex gene list
|
|
|
|
sexgene<-read.delim(opt$sex_genes,header=T,stringsAsFactors=F,check.names=F)
|
|
|
|
|
|
|
|
#
|
|
|
|
if(grepl("Ensembl",opt$type_gene_id,ignore.case=T)){
|
|
|
|
sexgenelist<-as.character(sexgene$EnsemblID)
|
|
|
|
}
|
|
|
|
if(grepl("Entrez",opt$type_gene_id,ignore.case=T)){
|
|
|
|
sexgenelist<-as.character(sexgene$EntrezID)
|
|
|
|
}
|
|
|
|
if(grepl("Symbol",opt$type_gene_id,ignore.case=T)){
|
|
|
|
sexgenelist<-as.character(sexgene$GeneSymbol)
|
|
|
|
}
|
|
|
|
|
|
|
|
sexexpr<-logexpr[rownames(logexpr) %in% sexgenelist, ]
|
|
|
|
|
|
|
|
if(nrow(sexexpr)<=(length(sexgenelist)/2)){
|
|
|
|
stop("Not sufficent expression profile sex specific genes were detected for sex prediction. Please check rowname is matched with type_gene_id in the command.", call.=FALSE)
|
|
|
|
}
|
|
|
|
|
|
|
|
#get median value without
|
|
|
|
#if pre_lowexpr_filtered = FALSE, remove not expressed values before obtaining median value
|
|
|
|
|
|
|
|
if (opt$pre_lowexpr_filtered){
|
|
|
|
medians<-apply(logexpr,2,median)
|
|
|
|
}else{
|
|
|
|
minvalue<- min(as.numeric(logexpr)[!is.na(as.numeric(logexpr))])
|
|
|
|
medians<-apply(logexpr,2,function(x){median(x[which(x> minvalue)])})
|
|
|
|
}
|
|
|
|
|
|
|
|
#sexexpr1<-sexexpr[apply(sexexpr,1,function(x){length(which(x<( -6)))}<13),]
|
|
|
|
|
|
|
|
#if all of the genes expressed lower than median: female, else male
|
|
|
|
sexpredict<-ifelse(rowSums(apply(sexexpr,1,function(x){ifelse(x-medians>0,1,0)}))>2,"Male","Female")
|
|
|
|
sexpredict_tab<-data.frame(
|
|
|
|
Sample=names(sexpredict),
|
|
|
|
Sex=sexpredict
|
|
|
|
)
|
|
|
|
rownames(sexpredict_tab)<-c(1:nrow(sexpredict_tab))
|
|
|
|
|
|
|
|
write.csv(sexpredict_tab,paste(out_dir,opt$project_code,"_sexpredict.csv",sep=""),quote=F,row.names=F)
|
|
|
|
saveRDS(sexpredict_tab,paste(out_dir,opt$project_code,"_sexpredict.rds",sep=""))
|
|
|
|
message("RNAseq_sexcheck.R finished!")
|
|
|
|
|