소스 검색

上传文件至 '.'

master
yingyu 5 년 전
부모
커밋
4829ad94a6
1개의 변경된 파일34개의 추가작업 그리고 27개의 파일을 삭제
  1. +34
    -27
      RNAseq_sexcheck.R

+ 34
- 27
RNAseq_sexcheck.R 파일 보기

@@ -17,11 +17,9 @@ option_list <- list(
help="The input expression files. required!"),
make_option(c("-e", "--type_gene_id"),type="character", default="EnsemblID",
help="The type of gene symbol. Could be either of EnsemblID/EntrezID/GeneSymbol [default: EnsemblID]"),
make_option(c("-b", "--pre_lowexpr_filtered"), metavar="FALSE",default=FALSE,
help="Where pre-filterd low expressed genes. [default: FALSE]"),
make_option(c("-s", "--sex_genes"),type="character", default="./sexgenelist.txt",
help="File in tab-delimited format sex gene list with EnsemblID/EntrezID/GeneSymbol. [default: ./sexgenelist.txt ]"),
make_option(c("-p", "--project_code"), type="character",default="rnaseq",
make_option(c("-p", "--project_code"), type="character",default="rnaseq",
help="Project code, which is used as prefix of output file. [default: rnaseq]")
)
@@ -29,6 +27,8 @@ option_list <- list(
# otherwise if options not found on command line then set defaults,
opt <- parse_args(OptionParser(option_list=option_list))
message("NOTE: If low expresssed genes were filtered, this analysis might be successful. ")
#pre analysis
if (is.null(opt$input)){
print_help(opt_parser)
@@ -50,42 +50,49 @@ sexgene<-read.delim(opt$sex_genes,header=T,stringsAsFactors=F,check.names=F)
#
if(grepl("Ensembl",opt$type_gene_id,ignore.case=T)){
sexgenelist<-as.character(sexgene$EnsemblID)
message("The type of gene symbol is set: EnsembleID. Could be either of EnsemblID/EntrezID/GeneSymbol.")
malegene<-as.character(sexgene$EnsemblID[grep("^M",sexgene$SexSpecific,ignore.case=T)])
femalegene<-as.character(sexgene$EnsemblID[grep("^F",sexgene$SexSpecific,ignore.case=T)])
}
if(grepl("Entrez",opt$type_gene_id,ignore.case=T)){
sexgenelist<-as.character(sexgene$EntrezID)
message("The type of gene symbol is set: EntrezID. Could be either of EnsemblID/EntrezID/GeneSymbol.")
malegene<-as.character(sexgene$EntrezID[grep("^M",sexgene$SexSpecific,ignore.case=T)])
femalegene<-as.character(sexgene$EntrezID[grep("^F",sexgene$SexSpecific,ignore.case=T)])
}
if(grepl("Symbol",opt$type_gene_id,ignore.case=T)){
sexgenelist<-as.character(sexgene$GeneSymbol)
message("The type of gene symbol is set: GeneSymbol. Could be either of EnsemblID/EntrezID/GeneSymbol.")
malegene<-as.character(sexgene$GeneSymbol[grep("^M",sexgene$SexSpecific,ignore.case=T)])
femalegene<-as.character(sexgene$GeneSymbol[grep("^F",sexgene$SexSpecific,ignore.case=T)])
}
sexexpr<-logexpr[rownames(logexpr) %in% sexgenelist, ]
if(nrow(sexexpr)<=(length(sexgenelist)/2)){
stop("Not sufficent expression profile sex specific genes were detected for sex prediction. Please check rowname is matched with type_gene_id in the command.", call.=FALSE)
}
male_expr<-logexpr[rownames(logexpr) %in% malegene, ]
female_expr<-logexpr[rownames(logexpr) %in% femalegene, ]
#get median value without
#if pre_lowexpr_filtered = FALSE, remove not expressed values before obtaining median value
if(nrow(male_expr)<(length(malegene))){
stop("Not sufficent expression profile male-specific genes were detected for sex prediction. Please check rowname is matched with type_gene_id in the command.", call.=FALSE)
}
if (opt$pre_lowexpr_filtered){
medians<-apply(logexpr,2,median)
}else{
minvalue<- min(as.numeric(logexpr)[!is.na(as.numeric(logexpr))])
medians<-apply(logexpr,2,function(x){median(x[which(x> minvalue)])})
if(nrow(female_expr)<(length(female_expr))){
stop("Not sufficent expression profile female-specific genes were detected for sex prediction. Please check rowname is matched with type_gene_id in the command.", call.=FALSE)
}
s<-c()
male_expr_t<-t(male_expr)
female_expr_t<-t(female_expr)
for (i in 1:nrow(female_expr)){
s<-cbind(s,male_expr_t-female_expr_t[,i])
}
#sexexpr1<-sexexpr[apply(sexexpr,1,function(x){length(which(x<( -6)))}<13),]
sexpredict<-data.frame(
SampleID=colnames(logexpr),
Sex=apply(s,1,function(x){ifelse(length(which(x<0))>=8,"Female","Male")}))
rownames(sexpredict)<-c(1:nrow(sexpredict))
#if all of the genes expressed lower than median: female, else male
sexpredict<-ifelse(rowSums(apply(sexexpr,1,function(x){ifelse(x-medians>0,1,0)}))>2,"Male","Female")
sexpredict_tab<-data.frame(
Sample=names(sexpredict),
Sex=sexpredict
)
rownames(sexpredict_tab)<-c(1:nrow(sexpredict_tab))
write.csv(sexpredict,paste(out_dir,opt$project_code,"_sexpredict.csv",sep=""),quote=F,row.names=F)
saveRDS(sexpredict,paste(out_dir,opt$project_code,"_sexpredict.rds",sep=""))
write.csv(sexpredict_tab,paste(out_dir,opt$project_code,"_sexpredict.csv",sep=""),quote=F,row.names=F)
saveRDS(sexpredict_tab,paste(out_dir,opt$project_code,"_sexpredict.rds",sep=""))
message("RNAseq_sexcheck.R finished!")

Loading…
취소
저장