RNA-seq Download QC analysis
Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.

92 lines
3.8KB

  1. #!/usr/bin/env Rscript
  2. # example:
  3. # Rscript RNAseq_sexcheck.R -i geneexp_log2fpkm.txt
  4. # Rscript RNAseq_sexcheck.R -i FUSCCTNBC_RNAseqShi.Complete_log2_448x45308_V15_190209.txt -p 111 -s ./sexgenelist.txt -e GeneSymbol
  5. suppressPackageStartupMessages(library("optparse"))
  6. # specify our desired options in a list
  7. # by default OptionParser will add an help option equivalent to
  8. # make_option(c("-h", "--help"), action="store_true", default=FALSE,
  9. # help="Show this help message and exit")
  10. option_list <- list(
  11. make_option(c("-o", "--out_dir"), type="character",default="./",
  12. help="The output directory [default ./]"),
  13. make_option(c("-i", "--input"),type="character", default=NULL,
  14. help="The input expression files. required!"),
  15. make_option(c("-e", "--type_gene_id"),type="character", default="EnsemblID",
  16. help="The type of gene symbol. Could be either of EnsemblID/EntrezID/GeneSymbol [default: EnsemblID]"),
  17. make_option(c("-b", "--pre_lowexpr_filtered"), metavar="FALSE",default=FALSE,
  18. help="Where pre-filterd low expressed genes. [default: FALSE]"),
  19. make_option(c("-s", "--sex_genes"),type="character", default="./sexgenelist.txt",
  20. help="File in tab-delimited format sex gene list with EnsemblID/EntrezID/GeneSymbol. [default: ./sexgenelist.txt ]"),
  21. make_option(c("-p", "--project_code"), type="character",default="rnaseq",
  22. help="Project code, which is used as prefix of output file. [default: rnaseq]")
  23. )
  24. # get command line options, if help option encountered print help and exit,
  25. # otherwise if options not found on command line then set defaults,
  26. opt <- parse_args(OptionParser(option_list=option_list))
  27. #pre analysis
  28. if (is.null(opt$input)){
  29. print_help(opt_parser)
  30. stop("At least one argument must be supplied (input file).", call.=FALSE)
  31. }
  32. ##import exp file
  33. out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")
  34. logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F)
  35. #check exp file is log scale
  36. if(max(logexpr[,1])-min(logexpr[,1])>100){
  37. stop("sex check anlaysis should be conducted based on expression profile on log scale.", call.=FALSE)
  38. }
  39. ####import sex gene list
  40. sexgene<-read.delim(opt$sex_genes,header=T,stringsAsFactors=F,check.names=F)
  41. #
  42. if(grepl("Ensembl",opt$type_gene_id,ignore.case=T)){
  43. sexgenelist<-as.character(sexgene$EnsemblID)
  44. }
  45. if(grepl("Entrez",opt$type_gene_id,ignore.case=T)){
  46. sexgenelist<-as.character(sexgene$EntrezID)
  47. }
  48. if(grepl("Symbol",opt$type_gene_id,ignore.case=T)){
  49. sexgenelist<-as.character(sexgene$GeneSymbol)
  50. }
  51. sexexpr<-logexpr[rownames(logexpr) %in% sexgenelist, ]
  52. if(nrow(sexexpr)<=(length(sexgenelist)/2)){
  53. stop("Not sufficent expression profile sex specific genes were detected for sex prediction. Please check rowname is matched with type_gene_id in the command.", call.=FALSE)
  54. }
  55. #get median value without
  56. #if pre_lowexpr_filtered = FALSE, remove not expressed values before obtaining median value
  57. if (opt$pre_lowexpr_filtered){
  58. medians<-apply(logexpr,2,median)
  59. }else{
  60. minvalue<- min(as.numeric(logexpr)[!is.na(as.numeric(logexpr))])
  61. medians<-apply(logexpr,2,function(x){median(x[which(x> minvalue)])})
  62. }
  63. #sexexpr1<-sexexpr[apply(sexexpr,1,function(x){length(which(x<( -6)))}<13),]
  64. #if all of the genes expressed lower than median: female, else male
  65. sexpredict<-ifelse(rowSums(apply(sexexpr,1,function(x){ifelse(x-medians>0,1,0)}))>2,"Male","Female")
  66. sexpredict_tab<-data.frame(
  67. Sample=names(sexpredict),
  68. Sex=sexpredict
  69. )
  70. rownames(sexpredict_tab)<-c(1:nrow(sexpredict_tab))
  71. write.csv(sexpredict_tab,paste(out_dir,opt$project_code,"_sexpredict.csv",sep=""),quote=F,row.names=F)
  72. saveRDS(sexpredict_tab,paste(out_dir,opt$project_code,"_sexpredict.rds",sep=""))
  73. message("RNAseq_sexcheck.R finished!")