|
- ###################
- ## cn.mops ##
- ## Jiyang Zhang ##
- ## 2019.01.21 ##
- ###################
- #
- #1 Introduction
- ##The cn.mops package is part of the Bioconductor (http://www.bioconductor.org) project.
- ##The package allows to detect copy number variations (CNVs) from next generation sequencing (NGS) data sets based on a generative model.
- ##Please visit http://www.bioinf.jku.at/software/cnmops/cnmops.htmlfor additional information.
- #
- #2 Set up
- #
- ##2.1 load package
- library(cn.mops) #cn.mops_1.16.2
- #
- ##2.2 load options
- #
- ###2.2.1 Collect arguments
- args <- commandArgs(TRUE)
- #
- ###2.2.2 Help section
- #### Default setting when no arguments passed
- if(length(args) < 1) {
- args <- c("--help")
- }
- #### Help section
- if("--help" %in% args) {
- cat("
- The R Script
-
- Arguments:
- --Tumor=the name of the TUMOR sample BAMfile - string
- --Normal=the name of the NORMAL sample BAMfile - string
- --TumorSampleID=the name of the TUMOR sample - string
- --bed_file=BED (Browser Extensible Data) lines have four required fields included chr, start, end and gene annotation. - string
- --workDir=working directory - character
- --help - print this text
-
-
- Example:
- Rscript cn.mops.R --Tumor=TumorSample.bam --Normal=NormalSample.bam --TumorSampleID=TumorSampleID --bed_file=bedFile --workDir=./ \n\n") #--outputDir=./
-
- q(save="no")
- }
- #
- ####2.2.3 Parse arguments (we expect the form --arg=value)
- parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
- argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
- argsL <- as.list(as.character(argsDF$V2))
- names(argsL) <- argsDF$V1
- args<-argsL
- #
- ## Arg1 default
- if(is.null(args$Tumor)) {
- stop("Tumor sample bamFile is missing")
- }else{
- Tumor=args$Tumor
- }
- #
- ## Arg2 default
- if(is.null(args$Normal)) {
- stop("Normal sample bamFileis missing")
- }else{
- Normal=args$Normal
- }
- #
- ## Arg3 default
- if(is.null(args$TumorSampleID)) {
- stop("TumorSampleID missing")
- }else{
- TumorSampleID=args$TumorSampleID
- }
- #
- ## Arg4 default
- if(is.null(args$bed_file)) {
- stop("bed_file is missing")
- }else{
- bed_file=args$bed_file
- }
- #
- ## Arg5 default
- if(is.null(args$workDir)) {
- stop("workDir is missing")
- }else{
- workDir=args$workDir
- }
- #
- #3 Run cn.mops
- # Tumor <- c("Illumina_pt2_B1700.chr20_X.sorted.deduped.bam")
- # Normal <- c("Illumina_pt2_B17NC.chr20_X.sorted.deduped.bam")
- # workDir <- "/home/zhangjiyang/shiJzhiP/shijianzhiP2/scripts"
- # outputDir <- "/home/zhangjiyang/shiJzhiP/shijianzhiP2/scripts"
- # bed_file <- "Illumina_pt2_exonanno_exonfrom1_chr20_X.bed"
- #
- ##3.1 BAM files and bed file as input.
- BAMFiles <- c(Tumor,Normal)
- segments <- read.table(paste0(workDir,"/", bed_file),sep="\t",as.is=TRUE)
- #
- ##3.2 Get read counts from BAM.
- gr <- GRanges(segments[,1],IRanges(segments[,2],segments[,3]))
- #
- ##3.3 The result object
- sample <- getSegmentReadCountsFromBAM(BAMFiles,GR=gr,mode="paired")
- resRef <- referencecn.mops(cases=sample[,1],controls=sample[,2],
- classes=c("CN0", "CN1", "CN2", "CN3", "CN4", "CN5", "CN6",
- "CN7","CN8","CN16","CN32","CN64","CN128"),
- I = c(0.025, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 8, 16, 32, 64),
- segAlgorithm="DNAcopy")
- result<- calcIntegerCopyNumbers(resRef)
- CNVs <- as.data.frame(cnvs(result))
- colnames(segments) <- c("chr","start","end","anno")
- colnames(CNVs)[colnames(CNVs)=="seqnames"] <- c("chr")
- bed.anno <- segments
- bed.anno$start_idx <- paste(bed.anno$chr,bed.anno$start,sep="_")
- bed.anno$end_idx <- paste(bed.anno$chr,bed.anno$end,sep="_")
- CNV_number <- nrow(CNVs)
- Res <- data.frame()
- for(i in 1:CNV_number) {
- CNVs.start.idx <- paste(CNVs[i,]$chr,CNVs[i,]$start,sep="_")
- CNVs.end.idx <- paste(CNVs[i,]$chr,CNVs[i,]$end,sep="_")
- exon_start <- rownames(bed.anno[bed.anno$start_idx%in%CNVs.start.idx,])
- exon_end <- rownames(bed.anno[bed.anno$end_idx%in%CNVs.end.idx,])
- res <- CNVs[i,]
- res$gene <- unlist(strsplit(bed.anno[exon_start,]$anno,split = "_"))[1]
- res$exon_start <- unlist(strsplit(bed.anno[exon_start,]$anno,split = "_"))[2]
- res$exon_end <- unlist(strsplit(bed.anno[exon_end,]$anno,split = "_"))[2]
- Res <- rbind(Res,res)
- }
- Res <- data.frame(sampleName=Res$sampleName,chr=Res$chr,
- start=Res$start,end=Res$end,gene=Res$gene,
- exon_start=Res$exon_start,exon_end=Res$exon_end,
- CN=Res$CN)
- #
- ##3.4 Output
- write.table(Res,paste0(TumorSampleID,"cn.mops.res.txt"),sep="\t",
- col.names=T,row.names=F,quote=F)
-
-
|