6 lat temu · 6a4a414312
--- a/cn.mops.R
+++ b/cn.mops.R
@@ -0,0 +1,147 @@
 ###################
 ##   cn.mops     ## 
 ##  Jiyang Zhang ##
 ##  2019.01.21   ##
 ###################
 #
 #1 Introduction
 ##The cn.mops package is part of the Bioconductor (http://www.bioconductor.org) project.
 ##The package allows to detect copy number variations (CNVs) from next generation sequencing (NGS) data sets based on a generative model. 
 ##Please visit http://www.bioinf.jku.at/software/cnmops/cnmops.htmlfor additional information.
 #
 #2 Set up
 #
 ##2.1 load package
 library(cn.mops)
 #
 ##2.2 load options
 #
 ###2.2.1 Collect arguments
 args <- commandArgs(TRUE)
 #
 ###2.2.2 Help section
 #### Default setting when no arguments passed
 if(length(args) < 1) {
  args <- c("--help")
 }
 #### Help section
 if("--help" %in% args) {
  cat("
      The R Script
      
      Arguments:
      --Tumor=the name of the TUMOR sample BAMfile - string
      --Normal=the name of the NORMAL sample BAMfile - string
 	  --TumorSampleID=the name of the TUMOR sample  - string
      --bed_file=BED (Browser Extensible Data) lines have four required fields included chr, start, end and gene annotation.  - string
      --workDir=working directory   - character
      --outputDir=where you want to output the pictures - character
      --help              - print this text
      
      
      Example:
      Rscript cn.mops.R --Tumor=TumorSample.bam --Normal=NormalSample.bam --TumorSampleID=TumorSampleID --bed_file=bedFile --workDir=./ --outputDir=./ \n\n")
  
  q(save="no")
 }
 #
 ####2.2.3 Parse arguments (we expect the form --arg=value)
 parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
 argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
 argsL <- as.list(as.character(argsDF$V2))
 names(argsL) <- argsDF$V1
 args<-argsL
 #
 ## Arg1 default
 if(is.null(args$Tumor)) {
  stop("Tumor sample bamFile is missing")
 }else{
  Tumor=args$Tumor
 }
 #
 ## Arg2 default
 if(is.null(args$Normal)) {
  stop("Normal sample bamFileis missing")
 }else{
  Normal=args$Normal
 }
 #
 ## Arg3 default
 if(is.null(args$TumorSampleID)) {
  stop("TumorSampleID missing")
 }else{
 TumorSampleID=args$TumorSampleID
 }
 #
 ## Arg4 default
 if(is.null(args$bed_file)) {
  stop("bed_file is missing")
 }else{
  bed_file=args$bed_file
 }
 #
 ## Arg5 default
 if(is.null(args$workDir)) {
  stop("workDir is missing")
 }else{
  workDir=args$workDir
 }
 #
 ## Arg6 default
 if(is.null(args$outputDir)) {
  stop("outputDir is missing")
 }else{
  outputDir=args$outputDir
 }
 #
 #3 Run cn.mops
 # Tumor <- c("Illumina_pt2_B1700.chr20_X.sorted.deduped.bam")
 # Normal <- c("Illumina_pt2_B17NC.chr20_X.sorted.deduped.bam")
 # workDir <- "/home/zhangjiyang/shiJzhiP/shijianzhiP2/scripts"
 # outputDir <- "/home/zhangjiyang/shiJzhiP/shijianzhiP2/scripts"
 # bed_file <- "Illumina_pt2_exonanno_exonfrom1_chr20_X.bed"
 #
 ##3.1 BAM files and bed file as input.
 BAMFiles <- c(Tumor,Normal)
 segments <- read.table(paste0(workDir,"/", bed_file),sep="\t",as.is=TRUE)
 #
 ##3.2 Get read counts from BAM.
 gr <- GRanges(segments[,1],IRanges(segments[,2],segments[,3]))
 #
 ##3.3 The result object
 sample <- getSegmentReadCountsFromBAM(BAMFiles,GR=gr,mode="paired")
 resRef <- referencecn.mops(cases=sample[,1],controls=sample[,2],
                           classes=c("CN0", "CN1", "CN2", "CN3", "CN4", "CN5", "CN6",
                                     "CN7","CN8","CN16","CN32","CN64","CN128"),
                           I = c(0.025, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 8, 16, 32, 64),
                           segAlgorithm="DNAcopy")
 result<- calcIntegerCopyNumbers(resRef)
 CNVs <- as.data.frame(cnvs(result))
 colnames(segments) <- c("chr","start","end","anno")
 colnames(CNVs)[colnames(CNVs)=="seqnames"]  <- c("chr")
 bed.anno <- segments
 bed.anno$start_idx <- paste(bed.anno$chr,bed.anno$start,sep="_")
 bed.anno$end_idx <- paste(bed.anno$chr,bed.anno$end,sep="_")
 CNV_number <- nrow(CNVs)
 Res <- data.frame()
 for(i in 1:CNV_number) {
  CNVs.start.idx <- paste(CNVs[i,]$chr,CNVs[i,]$start,sep="_")
  CNVs.end.idx <- paste(CNVs[i,]$chr,CNVs[i,]$end,sep="_")
  exon_start <- rownames(bed.anno[bed.anno$start_idx%in%CNVs.start.idx,])
  exon_end <- rownames(bed.anno[bed.anno$end_idx%in%CNVs.end.idx,])
  res <- CNVs[i,]
  res$gene <- unlist(strsplit(bed.anno[exon_start,]$anno,split = "_"))[1]
  res$exon_start <- unlist(strsplit(bed.anno[exon_start,]$anno,split = "_"))[2]
  res$exon_end <-  unlist(strsplit(bed.anno[exon_end,]$anno,split = "_"))[2]
  Res <- rbind(Res,res)
 }
 Res <- data.frame(sampleName=Res$sampleName,chr=Res$chr,
                  start=Res$start,end=Res$end,gene=Res$gene,
                  exon_start=Res$exon_start,exon_end=Res$exon_end,
                  CN=Res$CN)
 #
 ##3.4 Output
 write.table(Res,paste0(outputDir,"/",TumorSampleID,"cn.mops.res.txt"),sep="\t",
           col.names=T,row.names=F,quote=F)


--- a/inputs
+++ b/inputs
@@ -0,0 +1,12 @@
 {
  "{{ project_name }}.TumorBam": "{{ TumorBam }}",
  "{{ project_name }}.TumorSampleID": "{{ TumorSampleID }}",
  "{{ project_name }}.outputDir": "{{ outputDir }}",
  "{{ project_name }}.docker": "{{ docker }}",
  "{{ project_name }}.cluster_config": "{{ cluster_config }}",
  "{{ project_name }}.workDir": "{{ workDir }}",
  "{{ project_name }}.NormalBam": "{{ NormalBam }}",
  "{{ project_name }}.disk_size": "{{ disk_size }}",
  "{{ project_name }}.bed_file": "{{ bed_file }}"
 }

--- a/tasks/cn_mops.wdl
+++ b/tasks/cn_mops.wdl
@@ -0,0 +1,25 @@
 task CNV {
 	String TumorBam
 	String NormalBam
 	String TumorSampleID
 	String bed_file
 	String workDir
 	String outputDir
 	String docker
 	String disk_size
 	String cluster_config
 	
 	command <<<
 		Rscript cn.mops.R --Tumor=${TumorBam} --Normal=${NormalBam} --TumorID==${TumorSampleID} --bed_file=${bed_file} --workDir=${workDir} --outputDir=${outputDir}
 	>>>

 	runtime {
 		docker: docker
    	cluster: cluster_config
    	systemDisk: "cloud_ssd 40"
    	dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
 	}
 	output {
 		File sorted_bam = "${TumorSampleID}.cn.mops.res.txt"
 	}
 }
--- a/workflow.wdl
+++ b/workflow.wdl
@@ -0,0 +1,27 @@
 import "./tasks/cn_mops.wdl" as CNV

 workflow {{project_name}} {

 	String TumorBam
 	String NormalBam
 	String TumorSampleID
 	String bed_file
 	String workDir
 	String outputDir
 	String docker
 	String disk_size
 	String cluster_config

 	call CNV.CNV as CNV {
 		input: 
 			TumorBam=TumorBam,
 			NormalBam=NormalBam,
 			TumorSampleID=TumorSampleID,
 			bed_file=bed_file,
 			workDir=workDir,
 			outputDir=outputDir,
 			docker=docker,
 			disk_size=disk_size,
 			cluster_config=cluster_config
 	}
 }