瀏覽代碼

first commit

master
LUYAO REN 5 年之前
當前提交
94b5cd976a
共有 9 個文件被更改,包括 244 次插入0 次删除
  1. +55
    -0
      codescripts/callable_bed_voting.py
  2. +18
    -0
      codescripts/count_bed.py
  3. +10
    -0
      inputs
  4. 二進制
      tasks/.DS_Store
  5. +29
    -0
      tasks/CallableLoci.wdl
  6. +31
    -0
      tasks/bedVote.wdl
  7. +33
    -0
      tasks/gvcf_homo_ref.wdl
  8. +30
    -0
      tasks/mergeBed.wdl
  9. +38
    -0
      workflow.wdl

+ 55
- 0
codescripts/callable_bed_voting.py 查看文件

@@ -0,0 +1,55 @@
# import modules
import sys, argparse, os
import fileinput
from operator import itemgetter

parser = argparse.ArgumentParser(description="this script is to vote callable bed region")

parser.add_argument('-bed', '--multiSampleBED', type=str, help='The bed file to get high confidence region', required=True)
parser.add_argument('-prefix', '--prefix', type=str, help='The output file you want to name', required=True)

args = parser.parse_args()

# Rename input:
input_file = args.multiSampleBED
prefix = args.prefix

consensus_filename = prefix + '.27consensus.bed'
outCONSENSUS = open(consensus_filename,'w')
filter_filename = prefix + '.filtered.bed'
outFiltered = open(filter_filename,'w')
#initial
#sequence_tech = ['SEQ2000','SEQ2000','SEQ2000','SEQ2000','SEQ2000','SEQ500','SEQ500','SEQ500','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen']
#sequence_site = ['BGI','BGI','BGI','WGE','WGE','BGI','BGI','BGI','ARD','ARD','ARD','ARD','ARD','ARD','BRG','BRG','BRG','BRG','GAC','NVG','WUX','ARD','ARD','ARD','NVG','NVG','NVG','WUX','WUX','WUX','WUX','WUX','WUX']

def consensus_bed(oneLine):
line = oneLine.strip()
strings = line.split('\t')
# replicate
SEQ2000_BGI = 1 if strings[5:8].count('1') > 1 else 0
T7_WGE = 1 if strings[8:11].count('1') > 1 else 0
Nova_ARD_1 = 1 if strings[11:14].count('1') > 1 else 0
Nova_ARD_2 = 1 if strings[14:17].count('1') > 1 else 0
Nova_BRG = 1 if strings[17:20].count('1') > 1 else 0
Nova_WUX = 1 if strings[20:23].count('1') > 1 else 0
XTen_ARD = 1 if strings[23:26].count('1') >1 else 0
XTen_NVG = 1 if strings[26:29].count('1') > 1 else 0
XTen_WUX = 1 if strings[29:32].count('1') > 1 else 0
# library
pcr = 1 if [SEQ2000_BGI,XTen_ARD,XTen_WUX,XTen_NVG].count(1) > 2 else 0
pcr_free = 1 if [T7_WGE,Nova_ARD_1,Nova_ARD_2,Nova_BRG,Nova_WUX].count(1) > 3 else 0
voted = 1 if [pcr,pcr_free].count(1) > 1 else 0
# get consensus bed and tech specific bed
if voted == 1:
outCONSENSUS.write(oneLine)
else:
outFiltered.write(oneLine)

for oneLine in fileinput.input(input_file):
consensus_bed(oneLine)


outCONSENSUS.close()
outFiltered.close()



+ 18
- 0
codescripts/count_bed.py 查看文件

@@ -0,0 +1,18 @@
import sys,getopt
import fileinput

def process(line):
strings = line.strip().split('\t')
pos2 = int(strings[2])
pos1 = int(strings[1])
c = pos2 - pos1
return c

result = 0

for line in fileinput.input(sys.argv[1]):
C = process(line)
result = result + C

print(result)

+ 10
- 0
inputs 查看文件

@@ -0,0 +1,10 @@
{
"{{ project_name }}.disk_size": "150",
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}",
"{{ project_name }}.bedVote.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1",
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.large img-ubuntu-vpc",
"{{ project_name }}.mergeBed.docker": "registry-internal.cn-shanghai.aliyuncs.com/pgx-docker-registry/bedtools:v2.27.1",
"{{ project_name }}.CallableLoci.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/gatk:3.8-1",
"{{ project_name }}.quartet_sample": "{{ quartet_sample }}"
}


二進制
tasks/.DS_Store 查看文件


+ 29
- 0
tasks/CallableLoci.wdl 查看文件

@@ -0,0 +1,29 @@
task CallableLoci {
File bed
String sample
String docker
String disk_size
String cluster_config

command <<<

cat ${bed} | grep CALLABLE > ${sample}.CALLABLE.bed
>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File callable_bed = "${sample}.CALLABLE.bed"
}
}





+ 31
- 0
tasks/bedVote.wdl 查看文件

@@ -0,0 +1,31 @@
task bedVote {
File merged_bed
String sample
String docker
String disk_size
String cluster_config

command <<<
python /opt/callable_bed_voting.py -bed ${merged_bed} -prefix ${sample}

>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File consensus_bed = "${sample}.27consensus.bed"
File filtered_bed = "${sample}.filtered.bed"
}
}





+ 33
- 0
tasks/gvcf_homo_ref.wdl 查看文件

@@ -0,0 +1,33 @@
task gvcf_homo_ref {
File gvcf
String sample
String docker
String disk_size
String cluster_config

command <<<

awk '{ if ($5 == "<NON_REF>") { print } }' ${gvcf} | sed s'/:/\t/'g | awk '{ if($16 >= 30) { print }}' | cut -f1,2,8 | sed s'/END=//g' > ${sample}.filtered.bed

bedtools merged -i ${sample}.filtered.bed > ${sample}.homo_ref.bed

>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File filtered_bed = "${sample}.filtered.bed"
File homo_ref_bed = "${sample}.homo_ref.bed"
}
}





+ 30
- 0
tasks/mergeBed.wdl 查看文件

@@ -0,0 +1,30 @@
task mergeBed {
Array[File] callable_bed
String sample
String docker
String disk_size
String cluster_config

command <<<
/opt/ccdg/bedtools-2.27.1/bin/bedtools multiinter -i ${sep=" " callable_bed} > ${sample}.CALLABLE.merged.bed

>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File merged_bed = "${sample}.CALLABLE.merged.bed"
}
}





+ 38
- 0
workflow.wdl 查看文件

@@ -0,0 +1,38 @@
import "./tasks/gvcf_homo_ref.wdl" as gvcf_homo_ref
import "./tasks/mergeBed.wdl" as mergeBed
import "./tasks/bedVote.wdl" as bedVote


workflow project_name {

File inputSamplesFile
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile)
String quartet_sample
String disk_size
String cluster_config
scatter (quartet in inputSamples){
call gvcf_homo_ref.gvcf_homo_ref as gvcf_homo_ref {
input:
gvcf=quartet[0],
sample=quartet[1],
disk_size=disk_size,
cluster_config=cluster_config
}
}
call mergeBed.mergeBed as mergeBed {
input:
callable_bed=gvcf_homo_ref.homo_ref_bed,
sample=quartet_sample,
disk_size=disk_size,
cluster_config=cluster_config
}
call bedVote.bedVote as bedVote {
input:
merged_bed=mergeBed.merged_bed,
sample=quartet_sample,
disk_size=disk_size,
cluster_config=cluster_config
}
}


Loading…
取消
儲存