# import modules | |||||
import sys, argparse, os | |||||
import fileinput | |||||
from operator import itemgetter | |||||
parser = argparse.ArgumentParser(description="this script is to vote callable bed region") | |||||
parser.add_argument('-bed', '--multiSampleBED', type=str, help='The bed file to get high confidence region', required=True) | |||||
parser.add_argument('-prefix', '--prefix', type=str, help='The output file you want to name', required=True) | |||||
args = parser.parse_args() | |||||
# Rename input: | |||||
input_file = args.multiSampleBED | |||||
prefix = args.prefix | |||||
consensus_filename = prefix + '.27consensus.bed' | |||||
outCONSENSUS = open(consensus_filename,'w') | |||||
filter_filename = prefix + '.filtered.bed' | |||||
outFiltered = open(filter_filename,'w') | |||||
#initial | |||||
#sequence_tech = ['SEQ2000','SEQ2000','SEQ2000','SEQ2000','SEQ2000','SEQ500','SEQ500','SEQ500','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen'] | |||||
#sequence_site = ['BGI','BGI','BGI','WGE','WGE','BGI','BGI','BGI','ARD','ARD','ARD','ARD','ARD','ARD','BRG','BRG','BRG','BRG','GAC','NVG','WUX','ARD','ARD','ARD','NVG','NVG','NVG','WUX','WUX','WUX','WUX','WUX','WUX'] | |||||
def consensus_bed(oneLine): | |||||
line = oneLine.strip() | |||||
strings = line.split('\t') | |||||
# replicate | |||||
SEQ2000_BGI = 1 if strings[5:8].count('1') > 1 else 0 | |||||
T7_WGE = 1 if strings[8:11].count('1') > 1 else 0 | |||||
Nova_ARD_1 = 1 if strings[11:14].count('1') > 1 else 0 | |||||
Nova_ARD_2 = 1 if strings[14:17].count('1') > 1 else 0 | |||||
Nova_BRG = 1 if strings[17:20].count('1') > 1 else 0 | |||||
Nova_WUX = 1 if strings[20:23].count('1') > 1 else 0 | |||||
XTen_ARD = 1 if strings[23:26].count('1') >1 else 0 | |||||
XTen_NVG = 1 if strings[26:29].count('1') > 1 else 0 | |||||
XTen_WUX = 1 if strings[29:32].count('1') > 1 else 0 | |||||
# library | |||||
pcr = 1 if [SEQ2000_BGI,XTen_ARD,XTen_WUX,XTen_NVG].count(1) > 2 else 0 | |||||
pcr_free = 1 if [T7_WGE,Nova_ARD_1,Nova_ARD_2,Nova_BRG,Nova_WUX].count(1) > 3 else 0 | |||||
voted = 1 if [pcr,pcr_free].count(1) > 1 else 0 | |||||
# get consensus bed and tech specific bed | |||||
if voted == 1: | |||||
outCONSENSUS.write(oneLine) | |||||
else: | |||||
outFiltered.write(oneLine) | |||||
for oneLine in fileinput.input(input_file): | |||||
consensus_bed(oneLine) | |||||
outCONSENSUS.close() | |||||
outFiltered.close() | |||||
import sys,getopt | |||||
import fileinput | |||||
def process(line): | |||||
strings = line.strip().split('\t') | |||||
pos2 = int(strings[2]) | |||||
pos1 = int(strings[1]) | |||||
c = pos2 - pos1 | |||||
return c | |||||
result = 0 | |||||
for line in fileinput.input(sys.argv[1]): | |||||
C = process(line) | |||||
result = result + C | |||||
print(result) |
{ | |||||
"{{ project_name }}.fasta": "String", | |||||
"{{ project_name }}.disk_size": "String", | |||||
"{{ project_name }}.inputSamplesFile": "File", | |||||
"{{ project_name }}.bedVote.docker": "String", | |||||
"{{ project_name }}.cluster_config": "String", | |||||
"{{ project_name }}.mergeBed.docker": "String", | |||||
"{{ project_name }}.CallableLoci.docker": "String", | |||||
"{{ project_name }}.sample": "String", | |||||
"{{ project_name }}.ref_dir": "File" | |||||
} |
task CallableLoci { | |||||
File bam | |||||
File bam_index | |||||
File ref_dir | |||||
String fasta | |||||
String sample | |||||
String docker | |||||
String disk_size | |||||
String cluster_config | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
java -jar /usr/GenomeAnalysisTK.jar \ | |||||
-T CallableLoci \ | |||||
-R ${ref_dir}/${fasta} \ | |||||
-I ${bam} \ | |||||
--maxDepth 300 \ | |||||
--maxFractionOfReadsWithLowMAPQ 0.1 \ | |||||
--maxLowMAPQ 1 \ | |||||
--minBaseQuality 20 \ | |||||
--minMappingQuality 20 \ | |||||
--minDepth 10 \ | |||||
--minDepthForLowMAPQ 10 \ | |||||
-summary ${sample}_table.txt \ | |||||
-o ${sample}_callable_status.bed | |||||
cat ${sample}_callable_status.bed | grep CALLABLE > ${sample}.CALLABLE.bed | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster:cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File summary = "${sample}_table.txt" | |||||
File bed = "${sample}_callable_status.bed" | |||||
File callable_bed = "${sample}.CALLABLE.bed" | |||||
} | |||||
} | |||||
task bedVote { | |||||
File merged_bed | |||||
String sample | |||||
String docker | |||||
String disk_size | |||||
String cluster_config | |||||
command <<< | |||||
python /opt/callable_bed_voting.py -bed ${merged_bed} -prefix ${sample} | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster:cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File consensus_bed = "${sample}.27consensus.bed" | |||||
File filtered_bed = "${sample}.filtered.bed" | |||||
} | |||||
} | |||||
task mergeBed { | |||||
Array[File] callable_bed | |||||
String sample | |||||
String docker | |||||
String disk_size | |||||
String cluster_config | |||||
command <<< | |||||
bedtools multiinter -i ${sep=" " callable_bed} > ${sample}.CALLABLE.bed | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster:cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File merged_bed = "${sample}.CALLABLE.merged.bed" | |||||
} | |||||
} | |||||
import "./tasks/CallableLoci.wdl" as CallableLoci | |||||
import "./tasks/mergeBed.wdl" as mergeBed | |||||
import "./tasks/bedVote.wdl" as bedVote | |||||
workflow {{ project_name }} { | |||||
File inputSamplesFile | |||||
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile) | |||||
File ref_dir | |||||
String fasta | |||||
String sample | |||||
String disk_size | |||||
String cluster_config | |||||
scatter (sample in inputSamples){ | |||||
call CallableLoci.CallableLoci as CallableLoci { | |||||
input: | |||||
bam=sample[0], | |||||
bam_index=sample[1], | |||||
ref_dir=ref_dir, | |||||
fasta=fasta, | |||||
sample=sample, | |||||
disk_size=disk_size, | |||||
cluster_config=cluster_config | |||||
} | |||||
} | |||||
call mergeBed.mergeBed as mergeBed { | |||||
input: | |||||
callable_bed=CallableLoci.callable_bed, | |||||
sample=sample, | |||||
disk_size=disk_size, | |||||
cluster_config=cluster_config | |||||
} | |||||
call bedVote.bedVote as bedVote { | |||||
input: | |||||
merged_bed=mergeBed.merged_bed, | |||||
sample=sample, | |||||
disk_size=disk_size, | |||||
cluster_config=cluster_config | |||||
} | |||||
} | |||||