@@ -0,0 +1,55 @@ | |||
# import modules | |||
import sys, argparse, os | |||
import fileinput | |||
from operator import itemgetter | |||
parser = argparse.ArgumentParser(description="this script is to vote callable bed region") | |||
parser.add_argument('-bed', '--multiSampleBED', type=str, help='The bed file to get high confidence region', required=True) | |||
parser.add_argument('-prefix', '--prefix', type=str, help='The output file you want to name', required=True) | |||
args = parser.parse_args() | |||
# Rename input: | |||
input_file = args.multiSampleBED | |||
prefix = args.prefix | |||
consensus_filename = prefix + '.27consensus.bed' | |||
outCONSENSUS = open(consensus_filename,'w') | |||
filter_filename = prefix + '.filtered.bed' | |||
outFiltered = open(filter_filename,'w') | |||
#initial | |||
#sequence_tech = ['SEQ2000','SEQ2000','SEQ2000','SEQ2000','SEQ2000','SEQ500','SEQ500','SEQ500','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen'] | |||
#sequence_site = ['BGI','BGI','BGI','WGE','WGE','BGI','BGI','BGI','ARD','ARD','ARD','ARD','ARD','ARD','BRG','BRG','BRG','BRG','GAC','NVG','WUX','ARD','ARD','ARD','NVG','NVG','NVG','WUX','WUX','WUX','WUX','WUX','WUX'] | |||
def consensus_bed(oneLine): | |||
line = oneLine.strip() | |||
strings = line.split('\t') | |||
# replicate | |||
SEQ2000_BGI = 1 if strings[5:8].count('1') > 1 else 0 | |||
T7_WGE = 1 if strings[8:11].count('1') > 1 else 0 | |||
Nova_ARD_1 = 1 if strings[11:14].count('1') > 1 else 0 | |||
Nova_ARD_2 = 1 if strings[14:17].count('1') > 1 else 0 | |||
Nova_BRG = 1 if strings[17:20].count('1') > 1 else 0 | |||
Nova_WUX = 1 if strings[20:23].count('1') > 1 else 0 | |||
XTen_ARD = 1 if strings[23:26].count('1') >1 else 0 | |||
XTen_NVG = 1 if strings[26:29].count('1') > 1 else 0 | |||
XTen_WUX = 1 if strings[29:32].count('1') > 1 else 0 | |||
# library | |||
pcr = 1 if [SEQ2000_BGI,XTen_ARD,XTen_WUX,XTen_NVG].count(1) > 2 else 0 | |||
pcr_free = 1 if [T7_WGE,Nova_ARD_1,Nova_ARD_2,Nova_BRG,Nova_WUX].count(1) > 3 else 0 | |||
voted = 1 if [pcr,pcr_free].count(1) > 1 else 0 | |||
# get consensus bed and tech specific bed | |||
if voted == 1: | |||
outCONSENSUS.write(oneLine) | |||
else: | |||
outFiltered.write(oneLine) | |||
for oneLine in fileinput.input(input_file): | |||
consensus_bed(oneLine) | |||
outCONSENSUS.close() | |||
outFiltered.close() | |||
@@ -0,0 +1,18 @@ | |||
import sys,getopt | |||
import fileinput | |||
def process(line): | |||
strings = line.strip().split('\t') | |||
pos2 = int(strings[2]) | |||
pos1 = int(strings[1]) | |||
c = pos2 - pos1 | |||
return c | |||
result = 0 | |||
for line in fileinput.input(sys.argv[1]): | |||
C = process(line) | |||
result = result + C | |||
print(result) |
@@ -0,0 +1,10 @@ | |||
{ | |||
"{{ project_name }}.disk_size": "150", | |||
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}", | |||
"{{ project_name }}.bedVote.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.large img-ubuntu-vpc", | |||
"{{ project_name }}.mergeBed.docker": "registry-internal.cn-shanghai.aliyuncs.com/pgx-docker-registry/bedtools:v2.27.1", | |||
"{{ project_name }}.CallableLoci.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/gatk:3.8-1", | |||
"{{ project_name }}.quartet_sample": "{{ quartet_sample }}" | |||
} | |||
@@ -0,0 +1,29 @@ | |||
task CallableLoci { | |||
File bed | |||
String sample | |||
String docker | |||
String disk_size | |||
String cluster_config | |||
command <<< | |||
cat ${bed} | grep CALLABLE > ${sample}.CALLABLE.bed | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster:cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File callable_bed = "${sample}.CALLABLE.bed" | |||
} | |||
} | |||
@@ -0,0 +1,31 @@ | |||
task bedVote { | |||
File merged_bed | |||
String sample | |||
String docker | |||
String disk_size | |||
String cluster_config | |||
command <<< | |||
python /opt/callable_bed_voting.py -bed ${merged_bed} -prefix ${sample} | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster:cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File consensus_bed = "${sample}.27consensus.bed" | |||
File filtered_bed = "${sample}.filtered.bed" | |||
} | |||
} | |||
@@ -0,0 +1,33 @@ | |||
task gvcf_homo_ref { | |||
File gvcf | |||
String sample | |||
String docker | |||
String disk_size | |||
String cluster_config | |||
command <<< | |||
awk '{ if ($5 == "<NON_REF>") { print } }' ${gvcf} | sed s'/:/\t/'g | awk '{ if($16 >= 30) { print }}' | cut -f1,2,8 | sed s'/END=//g' > ${sample}.filtered.bed | |||
bedtools merged -i ${sample}.filtered.bed > ${sample}.homo_ref.bed | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster:cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File filtered_bed = "${sample}.filtered.bed" | |||
File homo_ref_bed = "${sample}.homo_ref.bed" | |||
} | |||
} | |||
@@ -0,0 +1,30 @@ | |||
task mergeBed { | |||
Array[File] callable_bed | |||
String sample | |||
String docker | |||
String disk_size | |||
String cluster_config | |||
command <<< | |||
/opt/ccdg/bedtools-2.27.1/bin/bedtools multiinter -i ${sep=" " callable_bed} > ${sample}.CALLABLE.merged.bed | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster:cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File merged_bed = "${sample}.CALLABLE.merged.bed" | |||
} | |||
} | |||
@@ -0,0 +1,38 @@ | |||
import "./tasks/gvcf_homo_ref.wdl" as gvcf_homo_ref | |||
import "./tasks/mergeBed.wdl" as mergeBed | |||
import "./tasks/bedVote.wdl" as bedVote | |||
workflow project_name { | |||
File inputSamplesFile | |||
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile) | |||
String quartet_sample | |||
String disk_size | |||
String cluster_config | |||
scatter (quartet in inputSamples){ | |||
call gvcf_homo_ref.gvcf_homo_ref as gvcf_homo_ref { | |||
input: | |||
gvcf=quartet[0], | |||
sample=quartet[1], | |||
disk_size=disk_size, | |||
cluster_config=cluster_config | |||
} | |||
} | |||
call mergeBed.mergeBed as mergeBed { | |||
input: | |||
callable_bed=gvcf_homo_ref.homo_ref_bed, | |||
sample=quartet_sample, | |||
disk_size=disk_size, | |||
cluster_config=cluster_config | |||
} | |||
call bedVote.bedVote as bedVote { | |||
input: | |||
merged_bed=mergeBed.merged_bed, | |||
sample=quartet_sample, | |||
disk_size=disk_size, | |||
cluster_config=cluster_config | |||
} | |||
} | |||