@@ -0,0 +1,55 @@ | |||
# import modules | |||
import sys, argparse, os | |||
import fileinput | |||
from operator import itemgetter | |||
parser = argparse.ArgumentParser(description="this script is to vote callable bed region") | |||
parser.add_argument('-bed', '--multiSampleBED', type=str, help='The bed file to get high confidence region', required=True) | |||
parser.add_argument('-prefix', '--prefix', type=str, help='The output file you want to name', required=True) | |||
args = parser.parse_args() | |||
# Rename input: | |||
input_file = args.multiSampleBED | |||
prefix = args.prefix | |||
consensus_filename = prefix + '.27consensus.bed' | |||
outCONSENSUS = open(consensus_filename,'w') | |||
filter_filename = prefix + '.filtered.bed' | |||
outFiltered = open(filter_filename,'w') | |||
#initial | |||
#sequence_tech = ['chr','start','end','number','sample','SEQ2000','SEQ2000','SEQ2000','SEQT7','SEQT7','SEQT7','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen'] | |||
#sequence_site = ['chr','start','end','number','sample','BGI','BGI','BGI','WGE','WGE','WGE','ARD','ARD','ARD','ARD','ARD','ARD','BRG','BRG','BRG','WUX','WUX','WUX','ARD','ARD','ARD','NVG','NVG','NVG','WUX','WUX','WUX'] | |||
def consensus_bed(oneLine): | |||
line = oneLine.strip() | |||
strings = line.split('\t') | |||
# replicate | |||
SEQ2000_BGI = 1 if strings[5:8].count('1') > 1 else 0 | |||
T7_WGE = 1 if strings[8:11].count('1') > 1 else 0 | |||
Nova_ARD_1 = 1 if strings[11:14].count('1') > 1 else 0 | |||
Nova_ARD_2 = 1 if strings[14:17].count('1') > 1 else 0 | |||
Nova_BRG = 1 if strings[17:20].count('1') > 1 else 0 | |||
Nova_WUX = 1 if strings[20:23].count('1') > 1 else 0 | |||
XTen_ARD = 1 if strings[23:26].count('1') >1 else 0 | |||
XTen_NVG = 1 if strings[26:29].count('1') > 1 else 0 | |||
XTen_WUX = 1 if strings[29:32].count('1') > 1 else 0 | |||
# library | |||
pcr = 1 if [SEQ2000_BGI,Nova_WUX,XTen_ARD,XTen_WUX,XTen_NVG].count(1) > 3 else 0 | |||
pcr_free = 1 if [T7_WGE,Nova_ARD_1,Nova_ARD_2,Nova_BRG].count(1) > 2 else 0 | |||
voted = 1 if [pcr,pcr_free].count(1) > 1 else 0 | |||
# get consensus bed and tech specific bed | |||
if voted == 1: | |||
outCONSENSUS.write(oneLine) | |||
else: | |||
outFiltered.write(oneLine) | |||
for oneLine in fileinput.input(input_file): | |||
consensus_bed(oneLine) | |||
outCONSENSUS.close() | |||
outFiltered.close() | |||
@@ -0,0 +1,18 @@ | |||
import sys,getopt | |||
import fileinput | |||
def process(line): | |||
strings = line.strip().split('\t') | |||
pos2 = int(strings[2]) | |||
pos1 = int(strings[1]) | |||
c = pos2 - pos1 | |||
return c | |||
result = 0 | |||
for line in fileinput.input(sys.argv[1]): | |||
C = process(line) | |||
result = result + C | |||
print(result) |
@@ -0,0 +1,17 @@ | |||
{ | |||
"{{ project_name }}.LCL8_callable_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL8.consensus.merged.bed", | |||
"{{ project_name }}.LCL6_variants_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL6.variants.bed", | |||
"{{ project_name }}.LCL5_variants_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL5.variants.bed", | |||
"{{ project_name }}.LCL8_HR_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL8.27.homo_ref.consensus.bed", | |||
"{{ project_name }}.disk_size": "150", | |||
"{{ project_name }}.LCL7_variants_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL7.variants.bed", | |||
"{{ project_name }}.LCL7_callable_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL7.consensus.merged.bed", | |||
"{{ project_name }}.docker": "registry-internal.cn-shanghai.aliyuncs.com/pgx-docker-registry/bedtools:v2.27.1", | |||
"{{ project_name }}.LCL5_HR_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL5.27.homo_ref.consensus.bed", | |||
"{{ project_name }}.LCL7_HR_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL7.27.homo_ref.consensus.bed", | |||
"{{ project_name }}.LCL5_callable_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL5.consensus.merged.bed", | |||
"{{ project_name }}.LCL6_callable_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL6.consensus.merged.bed", | |||
"{{ project_name }}.LCL8_variants_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL8.variants.bed", | |||
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.large img-ubuntu-vpc", | |||
"{{ project_name }}.LCL6_HR_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL6.27.homo_ref.consensus.bed" | |||
} |
@@ -0,0 +1,49 @@ | |||
task callable_loci { | |||
File LCL5_callable_bed | |||
File LCL6_callable_bed | |||
File LCL7_callable_bed | |||
File LCL8_callable_bed | |||
String docker | |||
String disk_size | |||
String cluster_config | |||
command <<< | |||
cat ${LCL5_callable_bed} | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > LCL5_callable_bed.chr1-22.x.bed | |||
cat ${LCL6_callable_bed} | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > LCL6_callable_bed.chr1-22.x.bed | |||
cat ${LCL7_callable_bed} | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > LCL7_callable_bed.chr1-22.x.bed | |||
cat ${LCL8_callable_bed} | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > LCL8_callable_bed.chr1-22.x.bed | |||
/opt/ccdg/bedtools-2.27.1/bin/bedtools multiinter -i LCL5_callable_bed.chr1-22.x.bed LCL6_callable_bed.chr1-22.x.bed LCL7_callable_bed.chr1-22.x.bed LCL8_callable_bed.chr1-22.x.bed > Quartet.callable.merged.bed | |||
cat Quartet.callable.merged.bed | grep "1,2,3,4" | cut -f1-3 > Quartet.callable.merged.intersect.bed | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster:cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File LCL5_callable_filtered_bed = "LCL5_callable_bed.chr1-22.x.bed" | |||
File LCL6_callable_filtered_bed = "LCL6_callable_bed.chr1-22.x.bed" | |||
File LCL7_callable_filtered_bed = "LCL7_callable_bed.chr1-22.x.bed" | |||
File LCL8_callable_filtered_bed = "LCL8_callable_bed.chr1-22.x.bed" | |||
File callable_merged_bed = "Quartet.callable.merged.bed" | |||
File callable_merged_intersect_bed = "Quartet.callable.merged.intersect.bed" | |||
} | |||
} | |||
@@ -0,0 +1,37 @@ | |||
task final_merge { | |||
File callable_merged_intersect_bed | |||
File HR_merged_intersect_bed | |||
File variants_merged_bed | |||
String docker | |||
String disk_size | |||
String cluster_config | |||
command <<< | |||
/opt/ccdg/bedtools-2.27.1/bin/bedtools merge -i ${HR_merged_intersect_bed} ${variants_merged_bed} > variant_invariant.bed | |||
/opt/ccdg/bedtools-2.27.1/bin/bedtools intersect -a variant_invariant.bed -b ${callable_merged_intersect_bed} > benchmark_regions.bed | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster:cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File variant_invariant = "variant_invariant.bed" | |||
File benchmark = "benchmark_regions.bed" | |||
} | |||
} | |||
@@ -0,0 +1,47 @@ | |||
task homo_bed { | |||
File LCL5_HR_bed | |||
File LCL6_HR_bed | |||
File LCL7_HR_bed | |||
File LCL8_HR_bed | |||
String docker | |||
String disk_size | |||
String cluster_config | |||
command <<< | |||
cat ${LCL5_HR_bed} | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > LCL5_HR_bed.chr1-22.x.bed | |||
cat ${LCL6_HR_bed} | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > LCL6_HR_bed.chr1-22.x.bed | |||
cat ${LCL7_HR_bed} | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > LCL7_HR_bed.chr1-22.x.bed | |||
cat ${LCL8_HR_bed} | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > LCL8_HR_bed.chr1-22.x.bed | |||
/opt/ccdg/bedtools-2.27.1/bin/bedtools multiinter -i LCL5_HR_bed.chr1-22.x.bed LCL6_HR_bed.chr1-22.x.bed LCL7_HR_bed.chr1-22.x.bed LCl8_HR_bed.chr1-22.x.bed > Quartet.HR.merged.bed | |||
cat Quartet.HR.merged.bed | grep "1,2,3,4" | cut -f1-3 > Quartet.HR.merged.intersect.bed | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster:cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File LCL5_HR_filtered_bed = "LCL5_HR_bed.chr1-22.x.bed" | |||
File LCL6_HR_filtered_bed = "LCL6_HR_bed.chr1-22.x.bed" | |||
File LCL7_HR_filtered_bed = "LCL7_HR_bed.chr1-22.x.bed" | |||
File LCL8_HR_filtered_bed = "LCL8_HR_bed.chr1-22.x.bed" | |||
File HR_merged_bed = "Quartet.HR.merged.bed" | |||
File HR_merged_intersect_bed = "Quartet.HR.merged.intersect.bed" | |||
} | |||
} | |||
@@ -0,0 +1,39 @@ | |||
task variant_bed { | |||
File LCL5_variants_bed | |||
File LCL6_variants_bed | |||
File LCL7_variants_bed | |||
File LCL8_variants_bed | |||
String docker | |||
String disk_size | |||
String cluster_config | |||
command <<< | |||
cat ${LCL5_variants_bed} | cut -f1,11,12 > LCL5_variants_bed.chr1-22.x.bed | |||
cat ${LCL6_variants_bed} | cut -f1,11,12 > LCL6_variants_bed.chr1-22.x.bed | |||
cat ${LCL7_variants_bed} | cut -f1,11,12 > LCL7_variants_bed.chr1-22.x.bed | |||
cat ${LCL8_variants_bed} | cut -f1,11,12 > LCL8_variants_bed.chr1-22.x.bed | |||
cat LCL5_variants_bed.chr1-22.x.bed LCL6_variants_bed.chr1-22.x.bed LCL7_variants_bed.chr1-22.x.bed LCL8_variants_bed.chr1-22.x.bed | sort -k1,1 -k2,2n | /opt/ccdg/bedtools-2.27.1/bin/bedtools merge -i - > Quartet.variants.merged.union.bed | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster:cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File variants_merged_bed = "Quartet.variants.merged.union.bed" | |||
} | |||
} | |||
@@ -0,0 +1,70 @@ | |||
import "./tasks/callable_loci.wdl" as callable_loci | |||
import "./tasks/final_merge.wdl" as final_merge | |||
import "./tasks/homo_bed.wdl" as homo_bed | |||
import "./tasks/variant_bed.wdl" as variant_bed | |||
workflow {{ project_name }} { | |||
File LCL5_callable_bed | |||
File LCL6_callable_bed | |||
File LCL7_callable_bed | |||
File LCL8_callable_bed | |||
File LCL5_HR_bed | |||
File LCL6_HR_bed | |||
File LCL7_HR_bed | |||
File LCL8_HR_bed | |||
File LCL5_variants_bed | |||
File LCL6_variants_bed | |||
File LCL7_variants_bed | |||
File LCL8_variants_bed | |||
String docker | |||
String disk_size | |||
String cluster_config | |||
call callable_loci.callable_loci as callable_loci { | |||
input: | |||
LCL5_callable_bed=LCL5_callable_bed, | |||
LCL6_callable_bed=LCL6_callable_bed, | |||
LCL7_callable_bed=LCL7_callable_bed, | |||
LCL8_callable_bed=LCL8_callable_bed, | |||
docker=docker, | |||
disk_size=disk_size, | |||
cluster_config=cluster_config | |||
} | |||
call homo_bed.homo_bed as homo_bed { | |||
input: | |||
LCL5_HR_bed=LCL5_HR_bed, | |||
LCL6_HR_bed=LCL6_HR_bed, | |||
LCL7_HR_bed=LCL7_HR_bed, | |||
LCL8_HR_bed=LCL8_HR_bed, | |||
docker=docker, | |||
disk_size=disk_size, | |||
cluster_config=cluster_config | |||
} | |||
call variant_bed.variant_bed as variant_bed { | |||
input: | |||
LCL5_variants_bed=LCL5_variants_bed, | |||
LCL6_variants_bed=LCL6_variants_bed, | |||
LCL7_variants_bed=LCL7_variants_bed, | |||
LCL8_variants_bed=LCL8_variants_bed, | |||
docker=docker, | |||
disk_size=disk_size, | |||
cluster_config=cluster_config | |||
} | |||
call final_merge.final_merge as final_merge { | |||
input: | |||
callable_merged_intersect_bed=callable_loci.callable_merged_intersect_bed, | |||
HR_merged_intersect_bed=homo_bed.HR_merged_intersect_bed, | |||
variants_merged_bed=variant_bed.variants_merged_bed, | |||
docker=docker, | |||
disk_size=disk_size, | |||
cluster_config=cluster_config | |||
} | |||
} | |||