RNA-seq Download QC analysis
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

99 lines
4.3KB

  1. #!/usr/bin/env Rscript
  2. # example:
  3. # Rscript RNAseq_sexcheck.R -i geneexp_log2fpkm.txt
  4. # Rscript RNAseq_sexcheck.R -i FUSCCTNBC_RNAseqShi.Complete_log2_448x45308_V15_190209.txt -p 111 -s ./sexgenelist.txt -e GeneSymbol
  5. suppressPackageStartupMessages(library("optparse"))
  6. # specify our desired options in a list
  7. # by default OptionParser will add an help option equivalent to
  8. # make_option(c("-h", "--help"), action="store_true", default=FALSE,
  9. # help="Show this help message and exit")
  10. option_list <- list(
  11. make_option(c("-o", "--out_dir"), type="character",default="./",
  12. help="The output directory [default ./]"),
  13. make_option(c("-i", "--input"),type="character", default=NULL,
  14. help="The input expression files. required!"),
  15. make_option(c("-e", "--type_gene_id"),type="character", default="EnsemblID",
  16. help="The type of gene symbol. Could be either of EnsemblID/EntrezID/GeneSymbol [default: EnsemblID]"),
  17. make_option(c("-s", "--sex_genes"),type="character", default="./sexgenelist.txt",
  18. help="File in tab-delimited format sex gene list with EnsemblID/EntrezID/GeneSymbol. [default: ./sexgenelist.txt ]"),
  19. make_option(c("-p", "--project_code"), type="character",default="rnaseq",
  20. help="Project code, which is used as prefix of output file. [default: rnaseq]")
  21. )
  22. # get command line options, if help option encountered print help and exit,
  23. # otherwise if options not found on command line then set defaults,
  24. opt <- parse_args(OptionParser(option_list=option_list))
  25. message("NOTE: If low expresssed genes were filtered, this analysis might be successful. ")
  26. #pre analysis
  27. if (is.null(opt$input)){
  28. print_help(opt_parser)
  29. stop("At least one argument must be supplied (input file).", call.=FALSE)
  30. }
  31. ##import exp file
  32. out_dir<-paste(gsub("/$","",opt$out_dir),"/",sep="")
  33. logexpr<-read.table(opt$input,header=T,stringsAsFactors=F,row.names=1,check.names=F)
  34. #check exp file is log scale
  35. if(max(logexpr[,1])-min(logexpr[,1])>100){
  36. stop("sex check anlaysis should be conducted based on expression profile on log scale.", call.=FALSE)
  37. }
  38. ####import sex gene list
  39. sexgene<-read.delim(opt$sex_genes,header=T,stringsAsFactors=F,check.names=F)
  40. #
  41. if(grepl("Ensembl",opt$type_gene_id,ignore.case=T)){
  42. message("The type of gene symbol is set: EnsembleID. Could be either of EnsemblID/EntrezID/GeneSymbol.")
  43. malegene<-as.character(sexgene$EnsemblID[grep("^M",sexgene$SexSpecific,ignore.case=T)])
  44. femalegene<-as.character(sexgene$EnsemblID[grep("^F",sexgene$SexSpecific,ignore.case=T)])
  45. }
  46. if(grepl("Entrez",opt$type_gene_id,ignore.case=T)){
  47. message("The type of gene symbol is set: EntrezID. Could be either of EnsemblID/EntrezID/GeneSymbol.")
  48. malegene<-as.character(sexgene$EntrezID[grep("^M",sexgene$SexSpecific,ignore.case=T)])
  49. femalegene<-as.character(sexgene$EntrezID[grep("^F",sexgene$SexSpecific,ignore.case=T)])
  50. }
  51. if(grepl("Symbol",opt$type_gene_id,ignore.case=T)){
  52. message("The type of gene symbol is set: GeneSymbol. Could be either of EnsemblID/EntrezID/GeneSymbol.")
  53. malegene<-as.character(sexgene$GeneSymbol[grep("^M",sexgene$SexSpecific,ignore.case=T)])
  54. femalegene<-as.character(sexgene$GeneSymbol[grep("^F",sexgene$SexSpecific,ignore.case=T)])
  55. }
  56. male_expr<-logexpr[rownames(logexpr) %in% malegene, ]
  57. female_expr<-logexpr[rownames(logexpr) %in% femalegene, ]
  58. if(nrow(male_expr)<(length(malegene))){
  59. stop("Not sufficent expression profile male-specific genes were detected for sex prediction. Please check rowname is matched with type_gene_id in the command.", call.=FALSE)
  60. }
  61. if(nrow(female_expr)<(length(female_expr))){
  62. stop("Not sufficent expression profile female-specific genes were detected for sex prediction. Please check rowname is matched with type_gene_id in the command.", call.=FALSE)
  63. }
  64. s<-c()
  65. male_expr_t<-t(male_expr)
  66. female_expr_t<-t(female_expr)
  67. for (i in 1:nrow(female_expr)){
  68. s<-cbind(s,male_expr_t-female_expr_t[,i])
  69. }
  70. sexpredict<-data.frame(
  71. SampleID=colnames(logexpr),
  72. Sex=apply(s,1,function(x){ifelse(length(which(x<0))>=8,"Female","Male")}))
  73. rownames(sexpredict)<-c(1:nrow(sexpredict))
  74. write.csv(sexpredict,paste(out_dir,opt$project_code,"_sexpredict.csv",sep=""),quote=F,row.names=F)
  75. saveRDS(sexpredict,paste(out_dir,opt$project_code,"_sexpredict.rds",sep=""))
  76. message("RNAseq_sexcheck.R finished!")