Przeglądaj źródła

First Commit

master
ISCAP 6 lat temu
commit
6a4a414312
4 zmienionych plików z 211 dodań i 0 usunięć
  1. +147
    -0
      cn.mops.R
  2. +12
    -0
      inputs
  3. +25
    -0
      tasks/cn_mops.wdl
  4. +27
    -0
      workflow.wdl

+ 147
- 0
cn.mops.R Wyświetl plik

@@ -0,0 +1,147 @@
###################
## cn.mops ##
## Jiyang Zhang ##
## 2019.01.21 ##
###################
#
#1 Introduction
##The cn.mops package is part of the Bioconductor (http://www.bioconductor.org) project.
##The package allows to detect copy number variations (CNVs) from next generation sequencing (NGS) data sets based on a generative model.
##Please visit http://www.bioinf.jku.at/software/cnmops/cnmops.htmlfor additional information.
#
#2 Set up
#
##2.1 load package
library(cn.mops)
#
##2.2 load options
#
###2.2.1 Collect arguments
args <- commandArgs(TRUE)
#
###2.2.2 Help section
#### Default setting when no arguments passed
if(length(args) < 1) {
args <- c("--help")
}
#### Help section
if("--help" %in% args) {
cat("
The R Script
Arguments:
--Tumor=the name of the TUMOR sample BAMfile - string
--Normal=the name of the NORMAL sample BAMfile - string
--TumorSampleID=the name of the TUMOR sample - string
--bed_file=BED (Browser Extensible Data) lines have four required fields included chr, start, end and gene annotation. - string
--workDir=working directory - character
--outputDir=where you want to output the pictures - character
--help - print this text
Example:
Rscript cn.mops.R --Tumor=TumorSample.bam --Normal=NormalSample.bam --TumorSampleID=TumorSampleID --bed_file=bedFile --workDir=./ --outputDir=./ \n\n")
q(save="no")
}
#
####2.2.3 Parse arguments (we expect the form --arg=value)
parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
argsL <- as.list(as.character(argsDF$V2))
names(argsL) <- argsDF$V1
args<-argsL
#
## Arg1 default
if(is.null(args$Tumor)) {
stop("Tumor sample bamFile is missing")
}else{
Tumor=args$Tumor
}
#
## Arg2 default
if(is.null(args$Normal)) {
stop("Normal sample bamFileis missing")
}else{
Normal=args$Normal
}
#
## Arg3 default
if(is.null(args$TumorSampleID)) {
stop("TumorSampleID missing")
}else{
TumorSampleID=args$TumorSampleID
}
#
## Arg4 default
if(is.null(args$bed_file)) {
stop("bed_file is missing")
}else{
bed_file=args$bed_file
}
#
## Arg5 default
if(is.null(args$workDir)) {
stop("workDir is missing")
}else{
workDir=args$workDir
}
#
## Arg6 default
if(is.null(args$outputDir)) {
stop("outputDir is missing")
}else{
outputDir=args$outputDir
}
#
#3 Run cn.mops
# Tumor <- c("Illumina_pt2_B1700.chr20_X.sorted.deduped.bam")
# Normal <- c("Illumina_pt2_B17NC.chr20_X.sorted.deduped.bam")
# workDir <- "/home/zhangjiyang/shiJzhiP/shijianzhiP2/scripts"
# outputDir <- "/home/zhangjiyang/shiJzhiP/shijianzhiP2/scripts"
# bed_file <- "Illumina_pt2_exonanno_exonfrom1_chr20_X.bed"
#
##3.1 BAM files and bed file as input.
BAMFiles <- c(Tumor,Normal)
segments <- read.table(paste0(workDir,"/", bed_file),sep="\t",as.is=TRUE)
#
##3.2 Get read counts from BAM.
gr <- GRanges(segments[,1],IRanges(segments[,2],segments[,3]))
#
##3.3 The result object
sample <- getSegmentReadCountsFromBAM(BAMFiles,GR=gr,mode="paired")
resRef <- referencecn.mops(cases=sample[,1],controls=sample[,2],
classes=c("CN0", "CN1", "CN2", "CN3", "CN4", "CN5", "CN6",
"CN7","CN8","CN16","CN32","CN64","CN128"),
I = c(0.025, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 8, 16, 32, 64),
segAlgorithm="DNAcopy")
result<- calcIntegerCopyNumbers(resRef)
CNVs <- as.data.frame(cnvs(result))
colnames(segments) <- c("chr","start","end","anno")
colnames(CNVs)[colnames(CNVs)=="seqnames"] <- c("chr")
bed.anno <- segments
bed.anno$start_idx <- paste(bed.anno$chr,bed.anno$start,sep="_")
bed.anno$end_idx <- paste(bed.anno$chr,bed.anno$end,sep="_")
CNV_number <- nrow(CNVs)
Res <- data.frame()
for(i in 1:CNV_number) {
CNVs.start.idx <- paste(CNVs[i,]$chr,CNVs[i,]$start,sep="_")
CNVs.end.idx <- paste(CNVs[i,]$chr,CNVs[i,]$end,sep="_")
exon_start <- rownames(bed.anno[bed.anno$start_idx%in%CNVs.start.idx,])
exon_end <- rownames(bed.anno[bed.anno$end_idx%in%CNVs.end.idx,])
res <- CNVs[i,]
res$gene <- unlist(strsplit(bed.anno[exon_start,]$anno,split = "_"))[1]
res$exon_start <- unlist(strsplit(bed.anno[exon_start,]$anno,split = "_"))[2]
res$exon_end <- unlist(strsplit(bed.anno[exon_end,]$anno,split = "_"))[2]
Res <- rbind(Res,res)
}
Res <- data.frame(sampleName=Res$sampleName,chr=Res$chr,
start=Res$start,end=Res$end,gene=Res$gene,
exon_start=Res$exon_start,exon_end=Res$exon_end,
CN=Res$CN)
#
##3.4 Output
write.table(Res,paste0(outputDir,"/",TumorSampleID,"cn.mops.res.txt"),sep="\t",
col.names=T,row.names=F,quote=F)



+ 12
- 0
inputs Wyświetl plik

@@ -0,0 +1,12 @@
{
"{{ project_name }}.TumorBam": "{{ TumorBam }}",
"{{ project_name }}.TumorSampleID": "{{ TumorSampleID }}",
"{{ project_name }}.outputDir": "{{ outputDir }}",
"{{ project_name }}.docker": "{{ docker }}",
"{{ project_name }}.cluster_config": "{{ cluster_config }}",
"{{ project_name }}.workDir": "{{ workDir }}",
"{{ project_name }}.NormalBam": "{{ NormalBam }}",
"{{ project_name }}.disk_size": "{{ disk_size }}",
"{{ project_name }}.bed_file": "{{ bed_file }}"
}


+ 25
- 0
tasks/cn_mops.wdl Wyświetl plik

@@ -0,0 +1,25 @@
task CNV {
String TumorBam
String NormalBam
String TumorSampleID
String bed_file
String workDir
String outputDir
String docker
String disk_size
String cluster_config
command <<<
Rscript cn.mops.R --Tumor=${TumorBam} --Normal=${NormalBam} --TumorID==${TumorSampleID} --bed_file=${bed_file} --workDir=${workDir} --outputDir=${outputDir}
>>>

runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File sorted_bam = "${TumorSampleID}.cn.mops.res.txt"
}
}

+ 27
- 0
workflow.wdl Wyświetl plik

@@ -0,0 +1,27 @@
import "./tasks/cn_mops.wdl" as CNV

workflow {{project_name}} {

String TumorBam
String NormalBam
String TumorSampleID
String bed_file
String workDir
String outputDir
String docker
String disk_size
String cluster_config

call CNV.CNV as CNV {
input:
TumorBam=TumorBam,
NormalBam=NormalBam,
TumorSampleID=TumorSampleID,
bed_file=bed_file,
workDir=workDir,
outputDir=outputDir,
docker=docker,
disk_size=disk_size,
cluster_config=cluster_config
}
}

Ładowanie…
Anuluj
Zapisz