################### ## cn.mops ## ## Jiyang Zhang ## ## 2019.01.21 ## ################### # #1 Introduction ##The cn.mops package is part of the Bioconductor (http://www.bioconductor.org) project. ##The package allows to detect copy number variations (CNVs) from next generation sequencing (NGS) data sets based on a generative model. ##Please visit http://www.bioinf.jku.at/software/cnmops/cnmops.htmlfor additional information. # #2 Set up # ##2.1 load package library(cn.mops) #cn.mops_1.16.2 # ##2.2 load options # ###2.2.1 Collect arguments args <- commandArgs(TRUE) # ###2.2.2 Help section #### Default setting when no arguments passed if(length(args) < 1) { args <- c("--help") } #### Help section if("--help" %in% args) { cat(" The R Script Arguments: --Tumor=the name of the TUMOR sample BAMfile - string --Normal=the name of the NORMAL sample BAMfile - string --TumorSampleID=the name of the TUMOR sample - string --bed_file=BED (Browser Extensible Data) lines have four required fields included chr, start, end and gene annotation. - string --workDir=working directory - character --help - print this text Example: Rscript cn.mops.R --Tumor=TumorSample.bam --Normal=NormalSample.bam --TumorSampleID=TumorSampleID --bed_file=bedFile --workDir=./ \n\n") #--outputDir=./ q(save="no") } # ####2.2.3 Parse arguments (we expect the form --arg=value) parseArgs <- function(x) strsplit(sub("^--", "", x), "=") argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) argsL <- as.list(as.character(argsDF$V2)) names(argsL) <- argsDF$V1 args<-argsL # ## Arg1 default if(is.null(args$Tumor)) { stop("Tumor sample bamFile is missing") }else{ Tumor=args$Tumor } # ## Arg2 default if(is.null(args$Normal)) { stop("Normal sample bamFileis missing") }else{ Normal=args$Normal } # ## Arg3 default if(is.null(args$TumorSampleID)) { stop("TumorSampleID missing") }else{ TumorSampleID=args$TumorSampleID } # ## Arg4 default if(is.null(args$bed_file)) { stop("bed_file is missing") }else{ bed_file=args$bed_file } # ## Arg5 default if(is.null(args$workDir)) { stop("workDir is missing") }else{ workDir=args$workDir } # #3 Run cn.mops # Tumor <- c("Illumina_pt2_B1700.chr20_X.sorted.deduped.bam") # Normal <- c("Illumina_pt2_B17NC.chr20_X.sorted.deduped.bam") # workDir <- "/home/zhangjiyang/shiJzhiP/shijianzhiP2/scripts" # outputDir <- "/home/zhangjiyang/shiJzhiP/shijianzhiP2/scripts" # bed_file <- "Illumina_pt2_exonanno_exonfrom1_chr20_X.bed" # ##3.1 BAM files and bed file as input. BAMFiles <- c(Tumor,Normal) segments <- read.table(paste0(workDir,"/", bed_file),sep="\t",as.is=TRUE) # ##3.2 Get read counts from BAM. gr <- GRanges(segments[,1],IRanges(segments[,2],segments[,3])) # ##3.3 The result object sample <- getSegmentReadCountsFromBAM(BAMFiles,GR=gr,mode="paired") resRef <- referencecn.mops(cases=sample[,1],controls=sample[,2], classes=c("CN0", "CN1", "CN2", "CN3", "CN4", "CN5", "CN6", "CN7","CN8","CN16","CN32","CN64","CN128"), I = c(0.025, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 8, 16, 32, 64), segAlgorithm="DNAcopy") result<- calcIntegerCopyNumbers(resRef) CNVs <- as.data.frame(cnvs(result)) colnames(segments) <- c("chr","start","end","anno") colnames(CNVs)[colnames(CNVs)=="seqnames"] <- c("chr") bed.anno <- segments bed.anno$start_idx <- paste(bed.anno$chr,bed.anno$start,sep="_") bed.anno$end_idx <- paste(bed.anno$chr,bed.anno$end,sep="_") CNV_number <- nrow(CNVs) Res <- data.frame() for(i in 1:CNV_number) { CNVs.start.idx <- paste(CNVs[i,]$chr,CNVs[i,]$start,sep="_") CNVs.end.idx <- paste(CNVs[i,]$chr,CNVs[i,]$end,sep="_") exon_start <- rownames(bed.anno[bed.anno$start_idx%in%CNVs.start.idx,]) exon_end <- rownames(bed.anno[bed.anno$end_idx%in%CNVs.end.idx,]) res <- CNVs[i,] res$gene <- unlist(strsplit(bed.anno[exon_start,]$anno,split = "_"))[1] res$exon_start <- unlist(strsplit(bed.anno[exon_start,]$anno,split = "_"))[2] res$exon_end <- unlist(strsplit(bed.anno[exon_end,]$anno,split = "_"))[2] Res <- rbind(Res,res) } Res <- data.frame(sampleName=Res$sampleName,chr=Res$chr, start=Res$start,end=Res$end,gene=Res$gene, exon_start=Res$exon_start,exon_end=Res$exon_end, CN=Res$CN) # ##3.4 Output write.table(Res,paste0(TumorSampleID,"cn.mops.res.txt"),sep="\t", col.names=T,row.names=F,quote=F)