You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

148 lines
4.6KB

  1. ###################
  2. ## cn.mops ##
  3. ## Jiyang Zhang ##
  4. ## 2019.01.21 ##
  5. ###################
  6. #
  7. #1 Introduction
  8. ##The cn.mops package is part of the Bioconductor (http://www.bioconductor.org) project.
  9. ##The package allows to detect copy number variations (CNVs) from next generation sequencing (NGS) data sets based on a generative model.
  10. ##Please visit http://www.bioinf.jku.at/software/cnmops/cnmops.htmlfor additional information.
  11. #
  12. #2 Set up
  13. #
  14. ##2.1 load package
  15. library(cn.mops)
  16. #
  17. ##2.2 load options
  18. #
  19. ###2.2.1 Collect arguments
  20. args <- commandArgs(TRUE)
  21. #
  22. ###2.2.2 Help section
  23. #### Default setting when no arguments passed
  24. if(length(args) < 1) {
  25. args <- c("--help")
  26. }
  27. #### Help section
  28. if("--help" %in% args) {
  29. cat("
  30. The R Script
  31. Arguments:
  32. --Tumor=the name of the TUMOR sample BAMfile - string
  33. --Normal=the name of the NORMAL sample BAMfile - string
  34. --TumorSampleID=the name of the TUMOR sample - string
  35. --bed_file=BED (Browser Extensible Data) lines have four required fields included chr, start, end and gene annotation. - string
  36. --workDir=working directory - character
  37. --outputDir=where you want to output the pictures - character
  38. --help - print this text
  39. Example:
  40. Rscript cn.mops.R --Tumor=TumorSample.bam --Normal=NormalSample.bam --TumorSampleID=TumorSampleID --bed_file=bedFile --workDir=./ --outputDir=./ \n\n")
  41. q(save="no")
  42. }
  43. #
  44. ####2.2.3 Parse arguments (we expect the form --arg=value)
  45. parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
  46. argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
  47. argsL <- as.list(as.character(argsDF$V2))
  48. names(argsL) <- argsDF$V1
  49. args<-argsL
  50. #
  51. ## Arg1 default
  52. if(is.null(args$Tumor)) {
  53. stop("Tumor sample bamFile is missing")
  54. }else{
  55. Tumor=args$Tumor
  56. }
  57. #
  58. ## Arg2 default
  59. if(is.null(args$Normal)) {
  60. stop("Normal sample bamFileis missing")
  61. }else{
  62. Normal=args$Normal
  63. }
  64. #
  65. ## Arg3 default
  66. if(is.null(args$TumorSampleID)) {
  67. stop("TumorSampleID missing")
  68. }else{
  69. TumorSampleID=args$TumorSampleID
  70. }
  71. #
  72. ## Arg4 default
  73. if(is.null(args$bed_file)) {
  74. stop("bed_file is missing")
  75. }else{
  76. bed_file=args$bed_file
  77. }
  78. #
  79. ## Arg5 default
  80. if(is.null(args$workDir)) {
  81. stop("workDir is missing")
  82. }else{
  83. workDir=args$workDir
  84. }
  85. #
  86. ## Arg6 default
  87. if(is.null(args$outputDir)) {
  88. stop("outputDir is missing")
  89. }else{
  90. outputDir=args$outputDir
  91. }
  92. #
  93. #3 Run cn.mops
  94. # Tumor <- c("Illumina_pt2_B1700.chr20_X.sorted.deduped.bam")
  95. # Normal <- c("Illumina_pt2_B17NC.chr20_X.sorted.deduped.bam")
  96. # workDir <- "/home/zhangjiyang/shiJzhiP/shijianzhiP2/scripts"
  97. # outputDir <- "/home/zhangjiyang/shiJzhiP/shijianzhiP2/scripts"
  98. # bed_file <- "Illumina_pt2_exonanno_exonfrom1_chr20_X.bed"
  99. #
  100. ##3.1 BAM files and bed file as input.
  101. BAMFiles <- c(Tumor,Normal)
  102. segments <- read.table(paste0(workDir,"/", bed_file),sep="\t",as.is=TRUE)
  103. #
  104. ##3.2 Get read counts from BAM.
  105. gr <- GRanges(segments[,1],IRanges(segments[,2],segments[,3]))
  106. #
  107. ##3.3 The result object
  108. sample <- getSegmentReadCountsFromBAM(BAMFiles,GR=gr,mode="paired")
  109. resRef <- referencecn.mops(cases=sample[,1],controls=sample[,2],
  110. classes=c("CN0", "CN1", "CN2", "CN3", "CN4", "CN5", "CN6",
  111. "CN7","CN8","CN16","CN32","CN64","CN128"),
  112. I = c(0.025, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 8, 16, 32, 64),
  113. segAlgorithm="DNAcopy")
  114. result<- calcIntegerCopyNumbers(resRef)
  115. CNVs <- as.data.frame(cnvs(result))
  116. colnames(segments) <- c("chr","start","end","anno")
  117. colnames(CNVs)[colnames(CNVs)=="seqnames"] <- c("chr")
  118. bed.anno <- segments
  119. bed.anno$start_idx <- paste(bed.anno$chr,bed.anno$start,sep="_")
  120. bed.anno$end_idx <- paste(bed.anno$chr,bed.anno$end,sep="_")
  121. CNV_number <- nrow(CNVs)
  122. Res <- data.frame()
  123. for(i in 1:CNV_number) {
  124. CNVs.start.idx <- paste(CNVs[i,]$chr,CNVs[i,]$start,sep="_")
  125. CNVs.end.idx <- paste(CNVs[i,]$chr,CNVs[i,]$end,sep="_")
  126. exon_start <- rownames(bed.anno[bed.anno$start_idx%in%CNVs.start.idx,])
  127. exon_end <- rownames(bed.anno[bed.anno$end_idx%in%CNVs.end.idx,])
  128. res <- CNVs[i,]
  129. res$gene <- unlist(strsplit(bed.anno[exon_start,]$anno,split = "_"))[1]
  130. res$exon_start <- unlist(strsplit(bed.anno[exon_start,]$anno,split = "_"))[2]
  131. res$exon_end <- unlist(strsplit(bed.anno[exon_end,]$anno,split = "_"))[2]
  132. Res <- rbind(Res,res)
  133. }
  134. Res <- data.frame(sampleName=Res$sampleName,chr=Res$chr,
  135. start=Res$start,end=Res$end,gene=Res$gene,
  136. exon_start=Res$exon_start,exon_end=Res$exon_end,
  137. CN=Res$CN)
  138. #
  139. ##3.4 Output
  140. write.table(Res,paste0(outputDir,"/",TumorSampleID,"cn.mops.res.txt"),sep="\t",
  141. col.names=T,row.names=F,quote=F)