# import modules | |||||
import sys, argparse, os | |||||
import fileinput | |||||
from operator import itemgetter | |||||
parser = argparse.ArgumentParser(description="this script is to vote callable bed region") | |||||
parser.add_argument('-bed', '--multiSampleBED', type=str, help='The bed file to get high confidence region', required=True) | |||||
parser.add_argument('-prefix', '--prefix', type=str, help='The output file you want to name', required=True) | |||||
args = parser.parse_args() | |||||
# Rename input: | |||||
input_file = args.multiSampleBED | |||||
prefix = args.prefix | |||||
consensus_filename = prefix + '.27consensus.bed' | |||||
outCONSENSUS = open(consensus_filename,'w') | |||||
filter_filename = prefix + '.filtered.bed' | |||||
outFiltered = open(filter_filename,'w') | |||||
#initial | |||||
#sequence_tech = ['chr','start','end','number','sample','SEQ2000','SEQ2000','SEQ2000','SEQT7','SEQT7','SEQT7','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen'] | |||||
#sequence_site = ['chr','start','end','number','sample','BGI','BGI','BGI','WGE','WGE','WGE','ARD','ARD','ARD','ARD','ARD','ARD','BRG','BRG','BRG','WUX','WUX','WUX','ARD','ARD','ARD','NVG','NVG','NVG','WUX','WUX','WUX'] | |||||
def consensus_bed(oneLine): | |||||
line = oneLine.strip() | |||||
strings = line.split('\t') | |||||
# replicate | |||||
SEQ2000_BGI = 1 if strings[5:8].count('1') > 1 else 0 | |||||
T7_WGE = 1 if strings[8:11].count('1') > 1 else 0 | |||||
Nova_ARD_1 = 1 if strings[11:14].count('1') > 1 else 0 | |||||
Nova_ARD_2 = 1 if strings[14:17].count('1') > 1 else 0 | |||||
Nova_BRG = 1 if strings[17:20].count('1') > 1 else 0 | |||||
Nova_WUX = 1 if strings[20:23].count('1') > 1 else 0 | |||||
XTen_ARD = 1 if strings[23:26].count('1') >1 else 0 | |||||
XTen_NVG = 1 if strings[26:29].count('1') > 1 else 0 | |||||
XTen_WUX = 1 if strings[29:32].count('1') > 1 else 0 | |||||
# library | |||||
pcr = 1 if [SEQ2000_BGI,Nova_WUX,XTen_ARD,XTen_WUX,XTen_NVG].count(1) > 3 else 0 | |||||
pcr_free = 1 if [T7_WGE,Nova_ARD_1,Nova_ARD_2,Nova_BRG].count(1) > 2 else 0 | |||||
voted = 1 if [pcr,pcr_free].count(1) > 1 else 0 | |||||
# get consensus bed and tech specific bed | |||||
if voted == 1: | |||||
outCONSENSUS.write(oneLine) | |||||
else: | |||||
outFiltered.write(oneLine) | |||||
for oneLine in fileinput.input(input_file): | |||||
consensus_bed(oneLine) | |||||
outCONSENSUS.close() | |||||
outFiltered.close() | |||||
import sys,getopt | |||||
import fileinput | |||||
def process(line): | |||||
strings = line.strip().split('\t') | |||||
pos2 = int(strings[2]) | |||||
pos1 = int(strings[1]) | |||||
c = pos2 - pos1 | |||||
return c | |||||
result = 0 | |||||
for line in fileinput.input(sys.argv[1]): | |||||
C = process(line) | |||||
result = result + C | |||||
print(result) |
{ | |||||
"{{ project_name }}.LCL8_callable_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL8.consensus.merged.bed", | |||||
"{{ project_name }}.LCL6_variants_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL6.variants.bed", | |||||
"{{ project_name }}.LCL5_variants_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL5.variants.bed", | |||||
"{{ project_name }}.LCL8_HR_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL8.27.homo_ref.consensus.bed", | |||||
"{{ project_name }}.disk_size": "150", | |||||
"{{ project_name }}.LCL7_variants_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL7.variants.bed", | |||||
"{{ project_name }}.LCL7_callable_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL7.consensus.merged.bed", | |||||
"{{ project_name }}.docker": "registry-internal.cn-shanghai.aliyuncs.com/pgx-docker-registry/bedtools:v2.27.1", | |||||
"{{ project_name }}.LCL5_HR_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL5.27.homo_ref.consensus.bed", | |||||
"{{ project_name }}.LCL7_HR_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL7.27.homo_ref.consensus.bed", | |||||
"{{ project_name }}.LCL5_callable_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL5.consensus.merged.bed", | |||||
"{{ project_name }}.LCL6_callable_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL6.consensus.merged.bed", | |||||
"{{ project_name }}.LCL8_variants_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL8.variants.bed", | |||||
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.large img-ubuntu-vpc", | |||||
"{{ project_name }}.LCL6_HR_bed": "oss://pgx-result/renluyao/manuscript_v3.0/benchmark_region/dat/LCL6.27.homo_ref.consensus.bed" | |||||
} |
task callable_loci { | |||||
File LCL5_callable_bed | |||||
File LCL6_callable_bed | |||||
File LCL7_callable_bed | |||||
File LCL8_callable_bed | |||||
String docker | |||||
String disk_size | |||||
String cluster_config | |||||
command <<< | |||||
cat ${LCL5_callable_bed} | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > LCL5_callable_bed.chr1-22.x.bed | |||||
cat ${LCL6_callable_bed} | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > LCL6_callable_bed.chr1-22.x.bed | |||||
cat ${LCL7_callable_bed} | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > LCL7_callable_bed.chr1-22.x.bed | |||||
cat ${LCL8_callable_bed} | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > LCL8_callable_bed.chr1-22.x.bed | |||||
/opt/ccdg/bedtools-2.27.1/bin/bedtools multiinter -i LCL5_callable_bed.chr1-22.x.bed LCL6_callable_bed.chr1-22.x.bed LCL7_callable_bed.chr1-22.x.bed LCL8_callable_bed.chr1-22.x.bed > Quartet.callable.merged.bed | |||||
cat Quartet.callable.merged.bed | grep "1,2,3,4" | cut -f1-3 > Quartet.callable.merged.intersect.bed | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster:cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File LCL5_callable_filtered_bed = "LCL5_callable_bed.chr1-22.x.bed" | |||||
File LCL6_callable_filtered_bed = "LCL6_callable_bed.chr1-22.x.bed" | |||||
File LCL7_callable_filtered_bed = "LCL7_callable_bed.chr1-22.x.bed" | |||||
File LCL8_callable_filtered_bed = "LCL8_callable_bed.chr1-22.x.bed" | |||||
File callable_merged_bed = "Quartet.callable.merged.bed" | |||||
File callable_merged_intersect_bed = "Quartet.callable.merged.intersect.bed" | |||||
} | |||||
} | |||||
task final_merge { | |||||
File callable_merged_intersect_bed | |||||
File HR_merged_intersect_bed | |||||
File variants_merged_bed | |||||
String docker | |||||
String disk_size | |||||
String cluster_config | |||||
command <<< | |||||
/opt/ccdg/bedtools-2.27.1/bin/bedtools merge -i ${HR_merged_intersect_bed} ${variants_merged_bed} > variant_invariant.bed | |||||
/opt/ccdg/bedtools-2.27.1/bin/bedtools intersect -a variant_invariant.bed -b ${callable_merged_intersect_bed} > benchmark_regions.bed | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster:cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File variant_invariant = "variant_invariant.bed" | |||||
File benchmark = "benchmark_regions.bed" | |||||
} | |||||
} | |||||
task homo_bed { | |||||
File LCL5_HR_bed | |||||
File LCL6_HR_bed | |||||
File LCL7_HR_bed | |||||
File LCL8_HR_bed | |||||
String docker | |||||
String disk_size | |||||
String cluster_config | |||||
command <<< | |||||
cat ${LCL5_HR_bed} | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > LCL5_HR_bed.chr1-22.x.bed | |||||
cat ${LCL6_HR_bed} | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > LCL6_HR_bed.chr1-22.x.bed | |||||
cat ${LCL7_HR_bed} | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > LCL7_HR_bed.chr1-22.x.bed | |||||
cat ${LCL8_HR_bed} | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > LCL8_HR_bed.chr1-22.x.bed | |||||
/opt/ccdg/bedtools-2.27.1/bin/bedtools multiinter -i LCL5_HR_bed.chr1-22.x.bed LCL6_HR_bed.chr1-22.x.bed LCL7_HR_bed.chr1-22.x.bed LCl8_HR_bed.chr1-22.x.bed > Quartet.HR.merged.bed | |||||
cat Quartet.HR.merged.bed | grep "1,2,3,4" | cut -f1-3 > Quartet.HR.merged.intersect.bed | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster:cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File LCL5_HR_filtered_bed = "LCL5_HR_bed.chr1-22.x.bed" | |||||
File LCL6_HR_filtered_bed = "LCL6_HR_bed.chr1-22.x.bed" | |||||
File LCL7_HR_filtered_bed = "LCL7_HR_bed.chr1-22.x.bed" | |||||
File LCL8_HR_filtered_bed = "LCL8_HR_bed.chr1-22.x.bed" | |||||
File HR_merged_bed = "Quartet.HR.merged.bed" | |||||
File HR_merged_intersect_bed = "Quartet.HR.merged.intersect.bed" | |||||
} | |||||
} | |||||
task variant_bed { | |||||
File LCL5_variants_bed | |||||
File LCL6_variants_bed | |||||
File LCL7_variants_bed | |||||
File LCL8_variants_bed | |||||
String docker | |||||
String disk_size | |||||
String cluster_config | |||||
command <<< | |||||
cat ${LCL5_variants_bed} | cut -f1,11,12 > LCL5_variants_bed.chr1-22.x.bed | |||||
cat ${LCL6_variants_bed} | cut -f1,11,12 > LCL6_variants_bed.chr1-22.x.bed | |||||
cat ${LCL7_variants_bed} | cut -f1,11,12 > LCL7_variants_bed.chr1-22.x.bed | |||||
cat ${LCL8_variants_bed} | cut -f1,11,12 > LCL8_variants_bed.chr1-22.x.bed | |||||
cat LCL5_variants_bed.chr1-22.x.bed LCL6_variants_bed.chr1-22.x.bed LCL7_variants_bed.chr1-22.x.bed LCL8_variants_bed.chr1-22.x.bed | sort -k1,1 -k2,2n | /opt/ccdg/bedtools-2.27.1/bin/bedtools merge -i - > Quartet.variants.merged.union.bed | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster:cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File variants_merged_bed = "Quartet.variants.merged.union.bed" | |||||
} | |||||
} | |||||
import "./tasks/callable_loci.wdl" as callable_loci | |||||
import "./tasks/final_merge.wdl" as final_merge | |||||
import "./tasks/homo_bed.wdl" as homo_bed | |||||
import "./tasks/variant_bed.wdl" as variant_bed | |||||
workflow {{ project_name }} { | |||||
File LCL5_callable_bed | |||||
File LCL6_callable_bed | |||||
File LCL7_callable_bed | |||||
File LCL8_callable_bed | |||||
File LCL5_HR_bed | |||||
File LCL6_HR_bed | |||||
File LCL7_HR_bed | |||||
File LCL8_HR_bed | |||||
File LCL5_variants_bed | |||||
File LCL6_variants_bed | |||||
File LCL7_variants_bed | |||||
File LCL8_variants_bed | |||||
String docker | |||||
String disk_size | |||||
String cluster_config | |||||
call callable_loci.callable_loci as callable_loci { | |||||
input: | |||||
LCL5_callable_bed=LCL5_callable_bed, | |||||
LCL6_callable_bed=LCL6_callable_bed, | |||||
LCL7_callable_bed=LCL7_callable_bed, | |||||
LCL8_callable_bed=LCL8_callable_bed, | |||||
docker=docker, | |||||
disk_size=disk_size, | |||||
cluster_config=cluster_config | |||||
} | |||||
call homo_bed.homo_bed as homo_bed { | |||||
input: | |||||
LCL5_HR_bed=LCL5_HR_bed, | |||||
LCL6_HR_bed=LCL6_HR_bed, | |||||
LCL7_HR_bed=LCL7_HR_bed, | |||||
LCL8_HR_bed=LCL8_HR_bed, | |||||
docker=docker, | |||||
disk_size=disk_size, | |||||
cluster_config=cluster_config | |||||
} | |||||
call variant_bed.variant_bed as variant_bed { | |||||
input: | |||||
LCL5_variants_bed=LCL5_variants_bed, | |||||
LCL6_variants_bed=LCL6_variants_bed, | |||||
LCL7_variants_bed=LCL7_variants_bed, | |||||
LCL8_variants_bed=LCL8_variants_bed, | |||||
docker=docker, | |||||
disk_size=disk_size, | |||||
cluster_config=cluster_config | |||||
} | |||||
call final_merge.final_merge as final_merge { | |||||
input: | |||||
callable_merged_intersect_bed=callable_loci.callable_merged_intersect_bed, | |||||
HR_merged_intersect_bed=homo_bed.HR_merged_intersect_bed, | |||||
variants_merged_bed=variant_bed.variants_merged_bed, | |||||
docker=docker, | |||||
disk_size=disk_size, | |||||
cluster_config=cluster_config | |||||
} | |||||
} | |||||