LUYAO REN 4 роки тому
коміт
af7065fb15
9 змінених файлів з 332 додано та 0 видалено
  1. +55
    -0
      codescripts/callable_bed_voting.py
  2. +18
    -0
      codescripts/count_bed.py
  3. +17
    -0
      inputs
  4. BIN
      tasks/.DS_Store
  5. +49
    -0
      tasks/callable_loci.wdl
  6. +37
    -0
      tasks/final_merge.wdl
  7. +47
    -0
      tasks/homo_bed.wdl
  8. +39
    -0
      tasks/variant_bed.wdl
  9. +70
    -0
      workflow.wdl

+ 55
- 0
codescripts/callable_bed_voting.py Переглянути файл

@@ -0,0 +1,55 @@
# import modules
import sys, argparse, os
import fileinput
from operator import itemgetter

parser = argparse.ArgumentParser(description="this script is to vote callable bed region")

parser.add_argument('-bed', '--multiSampleBED', type=str, help='The bed file to get high confidence region', required=True)
parser.add_argument('-prefix', '--prefix', type=str, help='The output file you want to name', required=True)

args = parser.parse_args()

# Rename input:
input_file = args.multiSampleBED
prefix = args.prefix

consensus_filename = prefix + '.27consensus.bed'
outCONSENSUS = open(consensus_filename,'w')
filter_filename = prefix + '.filtered.bed'
outFiltered = open(filter_filename,'w')
#initial
#sequence_tech = ['chr','start','end','number','sample','SEQ2000','SEQ2000','SEQ2000','SEQT7','SEQT7','SEQT7','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen']
#sequence_site = ['chr','start','end','number','sample','BGI','BGI','BGI','WGE','WGE','WGE','ARD','ARD','ARD','ARD','ARD','ARD','BRG','BRG','BRG','WUX','WUX','WUX','ARD','ARD','ARD','NVG','NVG','NVG','WUX','WUX','WUX']

def consensus_bed(oneLine):
line = oneLine.strip()
strings = line.split('\t')
# replicate
SEQ2000_BGI = 1 if strings[5:8].count('1') > 1 else 0
T7_WGE = 1 if strings[8:11].count('1') > 1 else 0
Nova_ARD_1 = 1 if strings[11:14].count('1') > 1 else 0
Nova_ARD_2 = 1 if strings[14:17].count('1') > 1 else 0
Nova_BRG = 1 if strings[17:20].count('1') > 1 else 0
Nova_WUX = 1 if strings[20:23].count('1') > 1 else 0
XTen_ARD = 1 if strings[23:26].count('1') >1 else 0
XTen_NVG = 1 if strings[26:29].count('1') > 1 else 0
XTen_WUX = 1 if strings[29:32].count('1') > 1 else 0
# library
pcr = 1 if [SEQ2000_BGI,Nova_WUX,XTen_ARD,XTen_WUX,XTen_NVG].count(1) > 3 else 0
pcr_free = 1 if [T7_WGE,Nova_ARD_1,Nova_ARD_2,Nova_BRG].count(1) > 2 else 0
voted = 1 if [pcr,pcr_free].count(1) > 1 else 0
# get consensus bed and tech specific bed
if voted == 1:
outCONSENSUS.write(oneLine)
else:
outFiltered.write(oneLine)

for oneLine in fileinput.input(input_file):
consensus_bed(oneLine)


outCONSENSUS.close()
outFiltered.close()



+ 18
- 0
codescripts/count_bed.py Переглянути файл

@@ -0,0 +1,18 @@
import sys,getopt
import fileinput

def process(line):
strings = line.strip().split('\t')
pos2 = int(strings[2])
pos1 = int(strings[1])
c = pos2 - pos1
return c

result = 0

for line in fileinput.input(sys.argv[1]):
C = process(line)
result = result + C

print(result)

+ 17
- 0
inputs Переглянути файл

@@ -0,0 +1,17 @@
{
"{{ project_name }}.LCL8_callable_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL8.consensus.merged.bed",
"{{ project_name }}.LCL6_variants_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL6.variants.bed",
"{{ project_name }}.LCL5_variants_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL5.variants.bed",
"{{ project_name }}.LCL8_HR_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL8.27.homo_ref.consensus.bed",
"{{ project_name }}.disk_size": "150",
"{{ project_name }}.LCL7_variants_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL7.variants.bed",
"{{ project_name }}.LCL7_callable_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL7.consensus.merged.bed",
"{{ project_name }}.docker": "registry-internal.cn-shanghai.aliyuncs.com/pgx-docker-registry/bedtools:v2.27.1",
"{{ project_name }}.LCL5_HR_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL5.27.homo_ref.consensus.bed",
"{{ project_name }}.LCL7_HR_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL7.27.homo_ref.consensus.bed",
"{{ project_name }}.LCL5_callable_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL5.consensus.merged.bed",
"{{ project_name }}.LCL6_callable_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL6.consensus.merged.bed",
"{{ project_name }}.LCL8_variants_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL8.variants.bed",
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.large img-ubuntu-vpc",
"{{ project_name }}.LCL6_HR_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL6.27.homo_ref.consensus.bed"
}

BIN
tasks/.DS_Store Переглянути файл


+ 49
- 0
tasks/callable_loci.wdl Переглянути файл

@@ -0,0 +1,49 @@
task callable_loci {
File LCL5_callable_bed
File LCL6_callable_bed
File LCL7_callable_bed
File LCL8_callable_bed
String docker
String disk_size
String cluster_config

command <<<
cat ${LCL5_callable_bed} | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > LCL5_callable_bed.chr1-22.x.bed


cat ${LCL6_callable_bed} | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > LCL6_callable_bed.chr1-22.x.bed

cat ${LCL7_callable_bed} | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > LCL7_callable_bed.chr1-22.x.bed

cat ${LCL8_callable_bed} | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > LCL8_callable_bed.chr1-22.x.bed


/opt/ccdg/bedtools-2.27.1/bin/bedtools multiinter -i LCL5_callable_bed.chr1-22.x.bed LCL6_callable_bed.chr1-22.x.bed LCL7_callable_bed.chr1-22.x.bed LCL8_callable_bed.chr1-22.x.bed > Quartet.callable.merged.bed

cat Quartet.callable.merged.bed | grep "1,2,3,4" | cut -f1-3 > Quartet.callable.merged.intersect.bed

>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File LCL5_callable_filtered_bed = "LCL5_callable_bed.chr1-22.x.bed"
File LCL6_callable_filtered_bed = "LCL6_callable_bed.chr1-22.x.bed"
File LCL7_callable_filtered_bed = "LCL7_callable_bed.chr1-22.x.bed"
File LCL8_callable_filtered_bed = "LCL8_callable_bed.chr1-22.x.bed"
File callable_merged_bed = "Quartet.callable.merged.bed"
File callable_merged_intersect_bed = "Quartet.callable.merged.intersect.bed"
}
}





+ 37
- 0
tasks/final_merge.wdl Переглянути файл

@@ -0,0 +1,37 @@
task final_merge {
File callable_merged_intersect_bed
File HR_merged_intersect_bed
File variants_merged_bed
String docker
String disk_size
String cluster_config

command <<<

/opt/ccdg/bedtools-2.27.1/bin/bedtools merge -i ${HR_merged_intersect_bed} ${variants_merged_bed} > variant_invariant.bed

/opt/ccdg/bedtools-2.27.1/bin/bedtools intersect -a variant_invariant.bed -b ${callable_merged_intersect_bed} > benchmark_regions.bed

>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File variant_invariant = "variant_invariant.bed"
File benchmark = "benchmark_regions.bed"
}
}








+ 47
- 0
tasks/homo_bed.wdl Переглянути файл

@@ -0,0 +1,47 @@
task homo_bed {
File LCL5_HR_bed
File LCL6_HR_bed
File LCL7_HR_bed
File LCL8_HR_bed
String docker
String disk_size
String cluster_config

command <<<
cat ${LCL5_HR_bed} | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > LCL5_HR_bed.chr1-22.x.bed


cat ${LCL6_HR_bed} | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > LCL6_HR_bed.chr1-22.x.bed

cat ${LCL7_HR_bed} | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > LCL7_HR_bed.chr1-22.x.bed

cat ${LCL8_HR_bed} | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > LCL8_HR_bed.chr1-22.x.bed


/opt/ccdg/bedtools-2.27.1/bin/bedtools multiinter -i LCL5_HR_bed.chr1-22.x.bed LCL6_HR_bed.chr1-22.x.bed LCL7_HR_bed.chr1-22.x.bed LCl8_HR_bed.chr1-22.x.bed > Quartet.HR.merged.bed

cat Quartet.HR.merged.bed | grep "1,2,3,4" | cut -f1-3 > Quartet.HR.merged.intersect.bed
>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File LCL5_HR_filtered_bed = "LCL5_HR_bed.chr1-22.x.bed"
File LCL6_HR_filtered_bed = "LCL6_HR_bed.chr1-22.x.bed"
File LCL7_HR_filtered_bed = "LCL7_HR_bed.chr1-22.x.bed"
File LCL8_HR_filtered_bed = "LCL8_HR_bed.chr1-22.x.bed"
File HR_merged_bed = "Quartet.HR.merged.bed"
File HR_merged_intersect_bed = "Quartet.HR.merged.intersect.bed"
}
}





+ 39
- 0
tasks/variant_bed.wdl Переглянути файл

@@ -0,0 +1,39 @@
task variant_bed {
File LCL5_variants_bed
File LCL6_variants_bed
File LCL7_variants_bed
File LCL8_variants_bed
String docker
String disk_size
String cluster_config

command <<<
cat ${LCL5_variants_bed} | cut -f1,11,12 > LCL5_variants_bed.chr1-22.x.bed
cat ${LCL6_variants_bed} | cut -f1,11,12 > LCL6_variants_bed.chr1-22.x.bed
cat ${LCL7_variants_bed} | cut -f1,11,12 > LCL7_variants_bed.chr1-22.x.bed
cat ${LCL8_variants_bed} | cut -f1,11,12 > LCL8_variants_bed.chr1-22.x.bed

cat LCL5_variants_bed.chr1-22.x.bed LCL6_variants_bed.chr1-22.x.bed LCL7_variants_bed.chr1-22.x.bed LCL8_variants_bed.chr1-22.x.bed | sort -k1,1 -k2,2n | /opt/ccdg/bedtools-2.27.1/bin/bedtools merge -i - > Quartet.variants.merged.union.bed

>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File variants_merged_bed = "Quartet.variants.merged.union.bed"
}
}








+ 70
- 0
workflow.wdl Переглянути файл

@@ -0,0 +1,70 @@
import "./tasks/callable_loci.wdl" as callable_loci
import "./tasks/final_merge.wdl" as final_merge
import "./tasks/homo_bed.wdl" as homo_bed
import "./tasks/variant_bed.wdl" as variant_bed



workflow {{ project_name }} {

File LCL5_callable_bed
File LCL6_callable_bed
File LCL7_callable_bed
File LCL8_callable_bed
File LCL5_HR_bed
File LCL6_HR_bed
File LCL7_HR_bed
File LCL8_HR_bed
File LCL5_variants_bed
File LCL6_variants_bed
File LCL7_variants_bed
File LCL8_variants_bed
String docker
String disk_size
String cluster_config
call callable_loci.callable_loci as callable_loci {
input:
LCL5_callable_bed=LCL5_callable_bed,
LCL6_callable_bed=LCL6_callable_bed,
LCL7_callable_bed=LCL7_callable_bed,
LCL8_callable_bed=LCL8_callable_bed,
docker=docker,
disk_size=disk_size,
cluster_config=cluster_config
}

call homo_bed.homo_bed as homo_bed {
input:
LCL5_HR_bed=LCL5_HR_bed,
LCL6_HR_bed=LCL6_HR_bed,
LCL7_HR_bed=LCL7_HR_bed,
LCL8_HR_bed=LCL8_HR_bed,
docker=docker,
disk_size=disk_size,
cluster_config=cluster_config
}

call variant_bed.variant_bed as variant_bed {
input:
LCL5_variants_bed=LCL5_variants_bed,
LCL6_variants_bed=LCL6_variants_bed,
LCL7_variants_bed=LCL7_variants_bed,
LCL8_variants_bed=LCL8_variants_bed,
docker=docker,
disk_size=disk_size,
cluster_config=cluster_config
}

call final_merge.final_merge as final_merge {
input:
callable_merged_intersect_bed=callable_loci.callable_merged_intersect_bed,
HR_merged_intersect_bed=homo_bed.HR_merged_intersect_bed,
variants_merged_bed=variant_bed.variants_merged_bed,
docker=docker,
disk_size=disk_size,
cluster_config=cluster_config
}

}


Завантаження…
Відмінити
Зберегти