# import modules | |||||
################################ | |||||
import sys, argparse, os | |||||
import fileinput | |||||
from operator import itemgetter | |||||
parser = argparse.ArgumentParser(description="this script is to vote callable bed region") | |||||
parser.add_argument('-bed', '--multiSampleBED', type=str, help='The bed file to get high confidence region', required=True) | |||||
parser.add_argument('-prefix', '--prefix', type=str, help='The output file you want to name', required=True) | |||||
args = parser.parse_args() | |||||
# Rename input: | |||||
input_file = args.multiSampleBED | |||||
prefix = args.prefix | |||||
consensus_filename = prefix + '.27consensus.bed' | |||||
outCONSENSUS = open(consensus_filename,'w') | |||||
filter_filename = prefix + '.filtered.bed' | |||||
outFiltered = open(filter_filename,'w') | |||||
#initial | |||||
#sequence_tech = ['SEQ2000','SEQ2000','SEQ2000','SEQ2000','SEQ2000','SEQ500','SEQ500','SEQ500','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen'] | |||||
#sequence_site = ['BGI','BGI','BGI','WGE','WGE','BGI','BGI','BGI','ARD','ARD','ARD','ARD','ARD','ARD','BRG','BRG','BRG','BRG','GAC','NVG','WUX','ARD','ARD','ARD','NVG','NVG','NVG','WUX','WUX','WUX','WUX','WUX','WUX'] | |||||
def consensus_bed(oneLine): | |||||
line = oneLine.strip() | |||||
strings = line.split('\t') | |||||
# replicate | |||||
SEQ2000_BGI = 1 if strings[5:8].count('1') > 1 else 0 | |||||
T7_WGE = 1 if strings[8:11].count('1') > 1 else 0 | |||||
Nova_ARD_1 = 1 if strings[11:14].count('1') > 1 else 0 | |||||
Nova_ARD_2 = 1 if strings[14:17].count('1') > 1 else 0 | |||||
Nova_BRG = 1 if strings[17:20].count('1') > 1 else 0 | |||||
Nova_WUX = 1 if strings[20:23].count('1') > 1 else 0 | |||||
XTen_ARD = 1 if strings[23:26].count('1') >1 else 0 | |||||
XTen_NVG = 1 if strings[26:29].count('1') > 1 else 0 | |||||
XTen_WUX = 1 if strings[29:32].count('1') > 1 else 0 | |||||
# library | |||||
pcr = 1 if [SEQ2000_BGI,XTen_ARD,XTen_WUX,XTen_NVG].count(1) > 2 else 0 | |||||
pcr_free = 1 if [T7_WGE,Nova_ARD_1,Nova_ARD_2,Nova_BRG,Nova_WUX].count(1) > 3 else 0 | |||||
voted = 1 if [pcr,pcr_free].count(1) > 1 else 0 | |||||
# get consensus bed and tech specific bed | |||||
if voted == 1: | |||||
outCONSENSUS.write(oneLine) | |||||
else: | |||||
outFiltered.write(oneLine) | |||||
for oneLine in fileinput.input(input_file): | |||||
consensus_bed(oneLine) | |||||
outCONSENSUS.close() | |||||
outFiltered.close() | |||||
import sys,getopt | |||||
import fileinput | |||||
def process(line): | |||||
strings = line.strip().split('\t') | |||||
pos2 = int(strings[2]) | |||||
pos1 = int(strings[1]) | |||||
c = pos2 - pos1 | |||||
return c | |||||
result = 0 | |||||
for line in fileinput.input(sys.argv[1]): | |||||
C = process(line) | |||||
result = result + C | |||||
print(result) |
{ | |||||
"{{ project_name }}.disk_size": "150", | |||||
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}", | |||||
"{{ project_name }}.bedVote.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||||
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.large img-ubuntu-vpc", | |||||
"{{ project_name }}.mergeBed.docker": "registry-internal.cn-shanghai.aliyuncs.com/pgx-docker-registry/bedtools:v2.27.1", | |||||
"{{ project_name }}.CallableLoci.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/gatk:3.8-1", | |||||
"{{ project_name }}.quartet_sample": "{{ quartet_sample }}" | |||||
} | |||||
task CallableLoci { | |||||
File bed | |||||
String sample | |||||
String docker | |||||
String disk_size | |||||
String cluster_config | |||||
command <<< | |||||
cat ${bed} | grep CALLABLE > ${sample}.CALLABLE.bed | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster:cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File callable_bed = "${sample}.CALLABLE.bed" | |||||
} | |||||
} | |||||
task bedVote { | |||||
File merged_bed | |||||
String sample | |||||
String docker | |||||
String disk_size | |||||
String cluster_config | |||||
command <<< | |||||
python /opt/callable_bed_voting.py -bed ${merged_bed} -prefix ${sample} | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster:cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File consensus_bed = "${sample}.27consensus.bed" | |||||
File filtered_bed = "${sample}.filtered.bed" | |||||
} | |||||
} | |||||
task mergeBed { | |||||
Array[File] callable_bed | |||||
String sample | |||||
String docker | |||||
String disk_size | |||||
String cluster_config | |||||
command <<< | |||||
/opt/ccdg/bedtools-2.27.1/bin/bedtools multiinter -i ${sep=" " callable_bed} > ${sample}.CALLABLE.merged.bed | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster:cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File merged_bed = "${sample}.CALLABLE.merged.bed" | |||||
} | |||||
} | |||||
import "./tasks/CallableLoci.wdl" as CallableLoci | |||||
import "./tasks/mergeBed.wdl" as mergeBed | |||||
import "./tasks/bedVote.wdl" as bedVote | |||||
workflow {{ project_name }} { | |||||
File inputSamplesFile | |||||
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile) | |||||
String quartet_sample | |||||
String disk_size | |||||
String cluster_config | |||||
scatter (quartet in inputSamples){ | |||||
call CallableLoci.CallableLoci as CallableLoci { | |||||
input: | |||||
bed=quartet[0], | |||||
sample=quartet[1], | |||||
disk_size=disk_size, | |||||
cluster_config=cluster_config | |||||
} | |||||
} | |||||
call mergeBed.mergeBed as mergeBed { | |||||
input: | |||||
callable_bed=CallableLoci.callable_bed, | |||||
sample=quartet_sample, | |||||
disk_size=disk_size, | |||||
cluster_config=cluster_config | |||||
} | |||||
call bedVote.bedVote as bedVote { | |||||
input: | |||||
merged_bed=mergeBed.merged_bed, | |||||
sample=quartet_sample, | |||||
disk_size=disk_size, | |||||
cluster_config=cluster_config | |||||
} | |||||
} | |||||