|
|
@@ -0,0 +1,147 @@ |
|
|
|
################### |
|
|
|
## cn.mops ## |
|
|
|
## Jiyang Zhang ## |
|
|
|
## 2019.01.21 ## |
|
|
|
################### |
|
|
|
# |
|
|
|
#1 Introduction |
|
|
|
##The cn.mops package is part of the Bioconductor (http://www.bioconductor.org) project. |
|
|
|
##The package allows to detect copy number variations (CNVs) from next generation sequencing (NGS) data sets based on a generative model. |
|
|
|
##Please visit http://www.bioinf.jku.at/software/cnmops/cnmops.htmlfor additional information. |
|
|
|
# |
|
|
|
#2 Set up |
|
|
|
# |
|
|
|
##2.1 load package |
|
|
|
library(cn.mops) |
|
|
|
# |
|
|
|
##2.2 load options |
|
|
|
# |
|
|
|
###2.2.1 Collect arguments |
|
|
|
args <- commandArgs(TRUE) |
|
|
|
# |
|
|
|
###2.2.2 Help section |
|
|
|
#### Default setting when no arguments passed |
|
|
|
if(length(args) < 1) { |
|
|
|
args <- c("--help") |
|
|
|
} |
|
|
|
#### Help section |
|
|
|
if("--help" %in% args) { |
|
|
|
cat(" |
|
|
|
The R Script |
|
|
|
|
|
|
|
Arguments: |
|
|
|
--Tumor=the name of the TUMOR sample BAMfile - string |
|
|
|
--Normal=the name of the NORMAL sample BAMfile - string |
|
|
|
--TumorSampleID=the name of the TUMOR sample - string |
|
|
|
--bed_file=BED (Browser Extensible Data) lines have four required fields included chr, start, end and gene annotation. - string |
|
|
|
--workDir=working directory - character |
|
|
|
--outputDir=where you want to output the pictures - character |
|
|
|
--help - print this text |
|
|
|
|
|
|
|
|
|
|
|
Example: |
|
|
|
Rscript cn.mops.R --Tumor=TumorSample.bam --Normal=NormalSample.bam --TumorSampleID=TumorSampleID --bed_file=bedFile --workDir=./ --outputDir=./ \n\n") |
|
|
|
|
|
|
|
q(save="no") |
|
|
|
} |
|
|
|
# |
|
|
|
####2.2.3 Parse arguments (we expect the form --arg=value) |
|
|
|
parseArgs <- function(x) strsplit(sub("^--", "", x), "=") |
|
|
|
argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) |
|
|
|
argsL <- as.list(as.character(argsDF$V2)) |
|
|
|
names(argsL) <- argsDF$V1 |
|
|
|
args<-argsL |
|
|
|
# |
|
|
|
## Arg1 default |
|
|
|
if(is.null(args$Tumor)) { |
|
|
|
stop("Tumor sample bamFile is missing") |
|
|
|
}else{ |
|
|
|
Tumor=args$Tumor |
|
|
|
} |
|
|
|
# |
|
|
|
## Arg2 default |
|
|
|
if(is.null(args$Normal)) { |
|
|
|
stop("Normal sample bamFileis missing") |
|
|
|
}else{ |
|
|
|
Normal=args$Normal |
|
|
|
} |
|
|
|
# |
|
|
|
## Arg3 default |
|
|
|
if(is.null(args$TumorSampleID)) { |
|
|
|
stop("TumorSampleID missing") |
|
|
|
}else{ |
|
|
|
TumorSampleID=args$TumorSampleID |
|
|
|
} |
|
|
|
# |
|
|
|
## Arg4 default |
|
|
|
if(is.null(args$bed_file)) { |
|
|
|
stop("bed_file is missing") |
|
|
|
}else{ |
|
|
|
bed_file=args$bed_file |
|
|
|
} |
|
|
|
# |
|
|
|
## Arg5 default |
|
|
|
if(is.null(args$workDir)) { |
|
|
|
stop("workDir is missing") |
|
|
|
}else{ |
|
|
|
workDir=args$workDir |
|
|
|
} |
|
|
|
# |
|
|
|
## Arg6 default |
|
|
|
if(is.null(args$outputDir)) { |
|
|
|
stop("outputDir is missing") |
|
|
|
}else{ |
|
|
|
outputDir=args$outputDir |
|
|
|
} |
|
|
|
# |
|
|
|
#3 Run cn.mops |
|
|
|
# Tumor <- c("Illumina_pt2_B1700.chr20_X.sorted.deduped.bam") |
|
|
|
# Normal <- c("Illumina_pt2_B17NC.chr20_X.sorted.deduped.bam") |
|
|
|
# workDir <- "/home/zhangjiyang/shiJzhiP/shijianzhiP2/scripts" |
|
|
|
# outputDir <- "/home/zhangjiyang/shiJzhiP/shijianzhiP2/scripts" |
|
|
|
# bed_file <- "Illumina_pt2_exonanno_exonfrom1_chr20_X.bed" |
|
|
|
# |
|
|
|
##3.1 BAM files and bed file as input. |
|
|
|
BAMFiles <- c(Tumor,Normal) |
|
|
|
segments <- read.table(paste0(workDir,"/", bed_file),sep="\t",as.is=TRUE) |
|
|
|
# |
|
|
|
##3.2 Get read counts from BAM. |
|
|
|
gr <- GRanges(segments[,1],IRanges(segments[,2],segments[,3])) |
|
|
|
# |
|
|
|
##3.3 The result object |
|
|
|
sample <- getSegmentReadCountsFromBAM(BAMFiles,GR=gr,mode="paired") |
|
|
|
resRef <- referencecn.mops(cases=sample[,1],controls=sample[,2], |
|
|
|
classes=c("CN0", "CN1", "CN2", "CN3", "CN4", "CN5", "CN6", |
|
|
|
"CN7","CN8","CN16","CN32","CN64","CN128"), |
|
|
|
I = c(0.025, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 8, 16, 32, 64), |
|
|
|
segAlgorithm="DNAcopy") |
|
|
|
result<- calcIntegerCopyNumbers(resRef) |
|
|
|
CNVs <- as.data.frame(cnvs(result)) |
|
|
|
colnames(segments) <- c("chr","start","end","anno") |
|
|
|
colnames(CNVs)[colnames(CNVs)=="seqnames"] <- c("chr") |
|
|
|
bed.anno <- segments |
|
|
|
bed.anno$start_idx <- paste(bed.anno$chr,bed.anno$start,sep="_") |
|
|
|
bed.anno$end_idx <- paste(bed.anno$chr,bed.anno$end,sep="_") |
|
|
|
CNV_number <- nrow(CNVs) |
|
|
|
Res <- data.frame() |
|
|
|
for(i in 1:CNV_number) { |
|
|
|
CNVs.start.idx <- paste(CNVs[i,]$chr,CNVs[i,]$start,sep="_") |
|
|
|
CNVs.end.idx <- paste(CNVs[i,]$chr,CNVs[i,]$end,sep="_") |
|
|
|
exon_start <- rownames(bed.anno[bed.anno$start_idx%in%CNVs.start.idx,]) |
|
|
|
exon_end <- rownames(bed.anno[bed.anno$end_idx%in%CNVs.end.idx,]) |
|
|
|
res <- CNVs[i,] |
|
|
|
res$gene <- unlist(strsplit(bed.anno[exon_start,]$anno,split = "_"))[1] |
|
|
|
res$exon_start <- unlist(strsplit(bed.anno[exon_start,]$anno,split = "_"))[2] |
|
|
|
res$exon_end <- unlist(strsplit(bed.anno[exon_end,]$anno,split = "_"))[2] |
|
|
|
Res <- rbind(Res,res) |
|
|
|
} |
|
|
|
Res <- data.frame(sampleName=Res$sampleName,chr=Res$chr, |
|
|
|
start=Res$start,end=Res$end,gene=Res$gene, |
|
|
|
exon_start=Res$exon_start,exon_end=Res$exon_end, |
|
|
|
CN=Res$CN) |
|
|
|
# |
|
|
|
##3.4 Output |
|
|
|
write.table(Res,paste0(outputDir,"/",TumorSampleID,"cn.mops.res.txt"),sep="\t", |
|
|
|
col.names=T,row.names=F,quote=F) |
|
|
|
|
|
|
|
|