5 years ago · f9bbc8520e
--- a/codescripts/high_confidence_call_vote.py
+++ b/codescripts/high_confidence_call_vote.py
@@ -0,0 +1,197 @@
 # import modules
 import sys, argparse, os
 import fileinput
 import re
 import pandas as pd
 from operator import itemgetter
 from collections import Counter
 from itertools import islice  

 # input arguments
 parser = argparse.ArgumentParser(description="this script is to count voting number")

 parser.add_argument('-vcf', '--multi_sample_vcf', type=str, help='The VCF file you want to count the voting number',  required=True)
 parser.add_argument('-dup', '--dup_list', type=str, help='Duplication list',  required=True)
 parser.add_argument('-sample', '--sample_name', type=str, help='which sample of quartet',  required=True)
 parser.add_argument('-prefix', '--prefix', type=str, help='Prefix of output file name',  required=True)

 args = parser.parse_args()
 multi_sample_vcf = args.multi_sample_vcf
 dup_list = args.dup_list
 prefix = args.prefix
 sample_name = args.sample_name

 vcf_header = '''##fileformat=VCFv4.2
 ##fileDate=20191224
 ##source=high_confidence_calls_intergration(choppy app)
 ##reference=GRCh38.d1.vd1
 ##INFO=<ID=PCT,Number=1,Type=Float,Description="Percentage of votes">
 ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
 ##FORMAT=<ID=TWINS,Number=0,Type=Flag,Description="0 for sister consistent, 1 for sister inconsistent">
 ##FORMAT=<ID=TRIO5,Number=0,Type=Flag,Description="0 for trio consistent, 1 for trio inconsistent">
 ##FORMAT=<ID=TRIO6,Number=0,Type=Flag,Description="0 for trio consistent, 1 for trio inconsistent">
 ##contig=<ID=chr1,length=248956422>
 ##contig=<ID=chr2,length=242193529>
 ##contig=<ID=chr3,length=198295559>
 ##contig=<ID=chr4,length=190214555>
 ##contig=<ID=chr5,length=181538259>
 ##contig=<ID=chr6,length=170805979>
 ##contig=<ID=chr7,length=159345973>
 ##contig=<ID=chr8,length=145138636>
 ##contig=<ID=chr9,length=138394717>
 ##contig=<ID=chr10,length=133797422>
 ##contig=<ID=chr11,length=135086622>
 ##contig=<ID=chr12,length=133275309>
 ##contig=<ID=chr13,length=114364328>
 ##contig=<ID=chr14,length=107043718>
 ##contig=<ID=chr15,length=101991189>
 ##contig=<ID=chr16,length=90338345>
 ##contig=<ID=chr17,length=83257441>
 ##contig=<ID=chr18,length=80373285>
 ##contig=<ID=chr19,length=58617616>
 ##contig=<ID=chr20,length=64444167>
 ##contig=<ID=chr21,length=46709983>
 ##contig=<ID=chr22,length=50818468>
 ##contig=<ID=chrX,length=156040895>
 '''

 # read in duplication list
 dup = pd.read_table(dup_list,header=None)
 var_dup = dup[0].tolist()

 # output file
 file_name = prefix + '_annotated.vcf'
 outfile = open(file_name,'w')

 # write VCF
 outputcolumn = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tQuartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5\tQuartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5\tQuartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5\tQuartet_DNA_BGI_SEQ2000_WGE_1_20190402_LCL5\tQuartet_DNA_BGI_SEQ2000_WGE_2_20190402_LCL5\tQuartet_DNA_BGI_SEQ500_BGI_1_20180328_LCL5 \tQuartet_DNA_BGI_SEQ500_BGI_2_20180328_LCL5\tQuartet_DNA_BGI_SEQ500_BGI_3_20180328_LCL5\tQuartet_DNA_ILM_Nova_ARD_1_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_2_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_3_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_4_20190111_LCL5\tQuartet_DNA_ILM_Nova_ARD_5_20190111_LCL5\tQuartet_DNA_ILM_Nova_ARD_6_20190111_LCL5\tQuartet_DNA_ILM_Nova_BRG_1_20171024_LCL5\tQuartet_DNA_ILM_Nova_BRG_1_20180930_LCL5\tQuartet_DNA_ILM_Nova_BRG_2_20180930_LCL5\tQuartet_DNA_ILM_Nova_BRG_3_20180930_LCL5\tQuartet_DNA_ILM_Nova_GAC_1_20171025_LCL5\tQuartet_DNA_ILM_Nova_NVG_1_20171024_LCL5\tQuartet_DNA_ILM_Nova_WUX_1_20171024_LCL5\tQuartet_DNA_ILM_XTen_ARD_1_20170403_LCL5\tQuartet_DNA_ILM_XTen_ARD_2_20170403_LCL5\tQuartet_DNA_ILM_XTen_ARD_3_20170403_LCL5\tQuartet_DNA_ILM_XTen_NVG_1_20170329_LCL5\tQuartet_DNA_ILM_XTen_NVG_2_20170329_LCL5\tQuartet_DNA_ILM_XTen_NVG_3_20170329_LCL5\tQuartet_DNA_ILM_XTen_WUX_1_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_2_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_3_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_4_20180703_LCL5\tQuartet_DNA_ILM_XTen_WUX_5_20180703_LCL5\tQuartet_DNA_ILM_XTen_WUX_6_20180703_LCL5' +'\t'+ sample_name+'_pcr'+'\t' + sample_name+'_pcr-free'+ '\t'+ sample_name +'consensus' + '\n'
 outfile.write(vcf_header)
 outfile.write(outputcolumn)

 #function
 def vote_percentage(strings):
 	strings = [x.replace('0/0','.') for x in strings]
 	gt = [x.split(':')[0] for x in strings]
 	percentage = round((33 - gt.count('.'))/33,2)
 	return(str(percentage))

 def decide_by_rep(strings):
 	consensus_rep = ''
 	mendelian = [x[-5:] for x in strings]
 	strings = [x.replace('.','0/0') for x in strings]
 	gt = [x.split(':')[0] for x in strings]
 	# mendelian consistent?
 	mendelian_dict = Counter(mendelian)
 	highest_mendelian = mendelian_dict.most_common(1)
 	candidate_mendelian = highest_mendelian[0][0]
 	freq_mendelian = highest_mendelian[0][1]
 	if (candidate_mendelian == '1:1:1') and (freq_mendelian >= 2):
 		gt_num_dict = Counter(gt)
 		highest_gt = gt_num_dict.most_common(1)
 		candidate_gt = highest_gt[0][0]
 		freq_gt = highest_gt[0][1]
 		if (candidate_gt != '0/0') and (freq_gt >= 2):
 			consensus_rep = candidate_gt
 		elif (candidate_gt == '0/0') and (freq_gt >= 2):
 			consensus_rep = '0/0'
 		else:
 			consensus_rep = 'inconGT'
 	elif (candidate_mendelian == '.') and (freq_mendelian >= 2):
 		consensus_rep = 'noInfo'
 	else:
 		consensus_rep = 'inconMen'
 	return consensus_rep


 def main():
 	for line in fileinput.input(multi_sample_vcf):
 		headline = re.match('^\#',line)
 		if headline is not None:
 			pass
 		else:
 			line = line.strip()
 			strings = line.split('\t')
 			variant_id = '_'.join([strings[0],strings[1]])
 			# check if the variants location is duplicated
 			if variant_id in var_dup:
 				outLine = '\t'.join(strings) + '\t' + '.' +'\t' + '.' + '\t' + 'dupVar' + '\n'
 				outfile.write(outLine)
 			else:
 				# pre-define
 				pcr_consensus = ''
 				pcr_free_consensus = ''
 				consensus_call = ''
 				# pcr 
 				pcr = itemgetter(*[9,10,11,12,14,15,16,23,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41])(strings)
 				SEQ2000 = decide_by_rep(pcr[0:4])
 				SEQ500 = decide_by_rep(pcr[4:7])
 				Nova = decide_by_rep(pcr[7:11])
 				XTen_ARD = decide_by_rep(pcr[11:14])
 				XTen_NVG = decide_by_rep(pcr[14:17])
 				XTen_WUX_1 = decide_by_rep(pcr[17:20])
 				XTen_WUX_2 = decide_by_rep(pcr[20:23])
 				sequence_site = [SEQ2000,SEQ500,Nova,XTen_ARD,XTen_NVG,XTen_WUX_1,XTen_WUX_2]
 				sequence_dict = Counter(sequence_site)
 				highest_sequence = sequence_dict.most_common(1)
 				candidate_sequence = highest_sequence[0][0]
 				freq_sequence = highest_sequence[0][1]
 				if freq_sequence > 4:
 					pcr_consensus = candidate_sequence
 				else:
 					pcr_consensus = 'inconSequenceSite'
 				# pcr-free
 				pcr_free = itemgetter(*[13,17,18,19,20,21,22,24,25,26])(strings)
 				SEQ2000 = decide_by_rep(pcr_free[0])
 				Nova_ARD_1 = decide_by_rep(pcr_free[1:4])
 				Nova_ARD_2 = decide_by_rep(pcr_free[4:7])
 				Nova_BRG = decide_by_rep(pcr_free[7:10])
 				sequence_site = [SEQ2000,Nova_ARD_1,Nova_ARD_2,Nova_BRG]
 				highest_sequence = sequence_dict.most_common(1)
 				candidate_sequence = highest_sequence[0][0]
 				freq_sequence = highest_sequence[0][1]
 				if freq_sequence > 2:
 					pcr_free_consensus = candidate_sequence
 				else:
 					pcr_free_consensus = 'inconSequenceSite'
 				# pcr and pcr-free
 				tag = ['inconGT','noInfo','inconMen','inconSequenceSite']
 				if (pcr_consensus == pcr_free_consensus) and (pcr_consensus not in tag) and (pcr_consensus != '0/0'):
 					consensus_call = pcr_consensus
 					strings[6] = 'reproducible'
 				elif (pcr_consensus in tag) or (pcr_free_consensus in tag):
 					consensus_call = 'filtered'
 					strings[6] = '.'
 				elif (pcr_consensus == '0/0') and (pcr_free_consensus not in tag) and (pcr_free_consensus != '0/0'):
 					consensus_call = 'pcr-free-speicifc'
 					strings[6] = '.'
 				elif (pcr_consensus != '0/0') and (pcr_consensus not in tag) and (pcr_free_consensus == '0/0'):
 					consensus_call = 'pcr-speicifc'
 					strings[6] = '.'
 				elif (pcr_consensus == '0/0') and (pcr_free_consensus == '0/0'):
 					consensus_call = 'confirm for parents'
 					strings[6] = '.'					
 				else:
 					consensus_call = 'filtered'
 					strings[6] = '.'
 				# percentage
 				percentage = vote_percentage(strings[9:])
 				strings[7] = 'PCT=' + percentage
 				# output 
 				outLine = '\t'.join(strings) + '\t' + pcr_consensus +'\t' + pcr_free_consensus + '\t' + consensus_call + '\n'
 				outfile.write(outLine)


 if __name__ == '__main__':
 	main()












--- a/codescripts/reformVCF.py
+++ b/codescripts/reformVCF.py
@@ -28,11 +28,11 @@ familyfile = open(family_filename,'w')

 # default columns, which will be included in the included in the calssifier
 vcfheader = '''##fileformat=VCFv4.2
 ##FILTER=<ID=PASS,Description="Voted by at least two replicates, six callers and two sequencing sites">
 ##FILTER=<ID=PASS,Description="the same genotype between twin sister and mendelian consistent in 578 and 678">
 ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
 ##INFO=<ID=sister,Number=0,Type=Flag,Description="0 for sister consistent, 1 for sister inconsistent">
 ##INFO=<ID=trioLCL5,Number=0,Type=Flag,Description="0 for trio consistent, 1 for trio inconsistent">
 ##INFO=<ID=trioLCL6,Number=0,Type=Flag,Description="0 for trio consistent, 1 for trio inconsistent">
 ##FORMAT=<ID=TWINS,Number=0,Type=Flag,Description="0 for sister consistent, 1 for sister inconsistent">
 ##FORMAT=<ID=TRIO5,Number=0,Type=Flag,Description="0 for trio consistent, 1 for trio inconsistent">
 ##FORMAT=<ID=TRIO6,Number=0,Type=Flag,Description="0 for trio consistent, 1 for trio inconsistent">
 ##contig=<ID=chr1,length=248956422>
 ##contig=<ID=chr2,length=242193529>
 ##contig=<ID=chr3,length=198295559>
@@ -99,33 +99,38 @@ def process(oneLine):
 		pass
 	# sister
 	if strings[11] == strings[14]:
 		info = "sister=1"
 		add_format = ":1"
 	else:
 		info = "sister=0"
 		add_format = ":0"
 	# trioLCL5
 	if strings[15] == 'MD=1':
 		info = info + ";trioLCL5=1"
 		add_format = add_format + ":1"
 	else:
 		info = info + ";trioLCL5=0"
 		add_format = add_format + ":0"
 	# trioLCL6
 	if strings[7] == 'MD=1':
 		info = info + ";trioLCL6=1"
 		add_format = add_format + ":1"
 	else:
 		info = info + ";trioLCL6=0"
 		add_format = add_format + ":0"
 	# filter
 	if (strings[11] == strings[14]) and (strings[15] == 'MD=1') and (strings[7] == 'MD=1'):
 		strings[6] = 'PASS'
 	else:
 		strings[6] = '.'
 	# output LCL5
 	LCL5outLine = strings[0]+'\t'+strings[1]+'\t'+strings[2]+'\t'+strings[3]+'\t'+strings[4]+'\t'+strings[5]+'\t'+strings[6]+'\t'+ info +'\t'+ strings[8] + '\t' + strings[14] + '\n'
 	LCL5outLine = strings[0]+'\t'+strings[1]+'\t'+strings[2]+'\t'+strings[3]+'\t'+strings[4]+'\t'+'.'+'\t'+strings[6]+'\t'+ '.' +'\t'+ 'GT:TWINS:TRIO5:TRIO6' + '\t' + strings[14] + add_format + '\n'
 	LCL5file.write(LCL5outLine)
 	# output LCL6
 	LCL6outLine = strings[0]+'\t'+strings[1]+'\t'+strings[2]+'\t'+strings[3]+'\t'+strings[4]+'\t'+strings[5]+'\t'+strings[6]+'\t'+ info +'\t'+ strings[8] + '\t' + strings[11] + '\n'
 	LCL6outLine = strings[0]+'\t'+strings[1]+'\t'+strings[2]+'\t'+strings[3]+'\t'+strings[4]+'\t'+'.'+'\t'+strings[6]+'\t'+ '.' +'\t'+ 'GT:TWINS:TRIO5:TRIO6' + '\t' + strings[11] + add_format + '\n'
 	LCL6file.write(LCL6outLine)
 	# output LCL7
 	LCL7outLine = strings[0]+'\t'+strings[1]+'\t'+strings[2]+'\t'+strings[3]+'\t'+strings[4]+'\t'+strings[5]+'\t'+strings[6]+'\t'+ info +'\t'+ strings[8] + '\t' + strings[10] + '\n'
 	LCL7outLine = strings[0]+'\t'+strings[1]+'\t'+strings[2]+'\t'+strings[3]+'\t'+strings[4]+'\t'+'.'+'\t'+strings[6]+'\t'+ '.' +'\t'+ 'GT:TWINS:TRIO5:TRIO6' + '\t' + strings[10] + add_format + '\n'
 	LCL7file.write(LCL7outLine)
 	# output LCL8
 	LCL8outLine = strings[0]+'\t'+strings[1]+'\t'+strings[2]+'\t'+strings[3]+'\t'+strings[4]+'\t'+strings[5]+'\t'+strings[6]+'\t'+ info +'\t'+ strings[8] + '\t' + strings[9] + '\n'
 	LCL8outLine = strings[0]+'\t'+strings[1]+'\t'+strings[2]+'\t'+strings[3]+'\t'+strings[4]+'\t'+'.'+'\t'+strings[6]+'\t'+ '.' +'\t'+ 'GT:TWINS:TRIO5:TRIO6' + '\t' + strings[9] + add_format + '\n'
 	LCL8file.write(LCL8outLine)
 	# output family
 	familyoutLine = strings[0]+'\t'+strings[1]+'\t'+strings[2]+'\t'+strings[3]+'\t'+strings[4]+'\t'+strings[5]+'\t'+strings[6]+'\t'+ info +'\t'+ strings[8] + '\t' + strings[14] + '\t' + strings[11] + '\t' + strings[10] + '\t' + strings[9] + '\n'
 	familyoutLine = strings[0]+'\t'+strings[1]+'\t'+strings[2]+'\t'+strings[3]+'\t'+strings[4]+'\t'+ '.'+'\t'+strings[6]+'\t'+ '.' +'\t'+ 'GT:TWINS:TRIO5:TRIO6' + '\t' + strings[14] + add_format +'\t' + strings[11] + add_format + '\t' + strings[10] + add_format +'\t' + strings[9] + add_format + '\n'
 	familyfile.write(familyoutLine)


--- a/inputs
+++ b/inputs
@@ -2,6 +2,8 @@
  "{{ project_name }}.LCL7merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
  "{{ project_name }}.fasta": "GRCh38.d1.vd1.fa",
  "{{ project_name }}.LCL6familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
  "{{ project_name }}.LCL7votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call:v1.1",
  "{{ project_name }}.LCL6votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call:v1.1",
  "{{ project_name }}.LCL5VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
  "{{ project_name }}.LCL6mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1",
  "{{ project_name }}.mergeSister.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
@@ -11,10 +13,12 @@
  "{{ project_name }}.LCL6merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
  "{{ project_name }}.LCL6variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9",
  "{{ project_name }}.LCL6zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
  "{{ project_name }}.LCL5votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call:v1.1",
  "{{ project_name }}.LCL7familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
  "{{ project_name }}.LCL5familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
  "{{ project_name }}.LCL6VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
  "{{ project_name }}.LCL5merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
  "{{ project_name }}.LCL8votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call:v1.1",
  "{{ project_name }}.reformVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call:v1.1",
  "{{ project_name }}.LCL5zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
  "{{ project_name }}.cluster_config": "OnDemand bcs.a2.xlarge img-ubuntu-vpc",
@@ -26,3 +30,4 @@
  "{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/"
 }


--- a/tasks/indelNorm.wdl
+++ b/tasks/indelNorm.wdl
@@ -0,0 +1,30 @@
 task indelNorm {
 	File vcf
 	File ref_dir
 	String fasta
 	String sampleName
 	String docker
 	String cluster_config
 	String disk_size
 	
 	command <<<

 		cat ${vcf} | grep '#' > header
 		cat ${vcf} | grep -v '#' > body
 		cat body | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX\' > body.filtered
 		cat header body.filtered > ${sampleName}.filtered.vcf

 		/opt/hall-lab/bcftools-1.9/bin/bcftools norm -f ${ref_dir}/${fasta} ${sampleName}.filtered.vcf > ${sampleName}.normed.vcf

 	>>>

 	runtime {
 		docker:docker
 		cluster: cluster_config
 		systemDisk: "cloud_ssd 40"
 		dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
 	}
 	output {
 		File normed_vcf = "${sampleName}.normed.vcf"
 	}
 }
--- a/tasks/merge.wdl
+++ b/tasks/merge.wdl
@@ -10,6 +10,8 @@ task merge {

 		rtg vcfmerge --force-merge-all --no-gzip -o ${sample}.merged.vcf ${sep=" " family_vcf_gz}

 		cat ${sample}.merged.vcf | grep -v '#' | cut -f1-2 | sed s'/\t/_/g' | sort | uniq -c | sed 's/\s\+/\t/g' | awk '{ if ($1 != 1) { print } }' | cut -f3 > ${sample}.vcf_dup.txt

 	>>>

 	runtime {
@@ -20,5 +22,6 @@ task merge {
 	}
 	output {
 		File merged_vcf = "${sample}.merged.vcf"
 		File vcf_dup = "${sample}.vcf_dup.txt"
 	}
 }
--- a/tasks/votes.wdl
+++ b/tasks/votes.wdl
@@ -1,20 +1,14 @@
 task votes {
 	Array[File] mother_vcf_gz
 	Array[File] mother_vcf_idx
 	Array[File] father_vcf_gz
 	Array[File] father_vcf_idx
 	Array[File] twins_vcf_gz
 	Array[File] twins_vcf_idx
 	File merged_vcf
 	String vcf_dup
 	String sample
 	String prefix
 	String docker
 	String cluster_config
 	String disk_size
 	
 	command <<<
 		rtg vcfmerge --force-merge-all --no-gzip -o LCL8.sister.consistent.merged.vcf ${sep=" " mother_vcf_gz}

 		rtg vcfmerge --force-merge-all --no-gzip -o LCL7.sister.consistent.merged.vcf ${sep=" " father_vcf_gz}

 		rtg vcfmerge --force-merge-all --no-gzip -o Twins.sister.consistent.vcf ${sep=" " twins_vcf_gz}
 		python /opt/high_confidence_call_vote.py -vcf ${merged_vcf} -dup ${vcf_dup} -sample ${sample} -prefix ${prefix}
 	>>>

 	runtime {
@@ -24,8 +18,6 @@ task votes {
 		dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
 	}
 	output {
 		File mother_merged_vcf = "LCL8.sister.consistent.merged.vcf"
 		File father_merged_vcf = "LCL7.sister.consistent.merged.vcf"
 		File twins_merged_vcf = "Twins.sister.consistent.merged.vcf"
 		File annotated_vcf = "${prefix}_annotated.vcf"
 	}
 }
--- a/workflow.wdl
+++ b/workflow.wdl
@@ -5,6 +5,7 @@ import "./tasks/VCFrename.wdl" as VCFrename
 import "./tasks/mergeSister.wdl" as mergeSister
 import "./tasks/reformVCF.wdl" as reformVCF
 import "./tasks/merge.wdl" as merge
 import "./tasks/votes.wdl" as votes

 workflow {{ project_name }} {
 	File inputSamplesFile
@@ -175,6 +176,15 @@ workflow {{ project_name }} {
 		cluster_config=cluster_config,
 		disk_size=disk_size
 	}
 	call votes.votes as LCL5votes{
 		input:
 		merged_vcf=LCL5merge.merged_vcf,
 		vcf_dup=LCL5merge.vcf_dup,
 		sample='LCL5',
 		prefix='LCL5_consensus',
 		cluster_config=cluster_config,
 		disk_size=disk_size
 	}
 	call merge.merge as LCL6merge {
 		input:
 		family_vcf_gz=LCL6familyzipIndex.vcf_gz,
@@ -183,6 +193,15 @@ workflow {{ project_name }} {
 		cluster_config=cluster_config,
 		disk_size=disk_size
 	}
 	call votes.votes as LCL6votes {
 		input:
 		merged_vcf=LCL6merge.merged_vcf,
 		vcf_dup=LCL6merge.vcf_dup,
 		sample='LCL6',
 		prefix='LCL6_consensus',
 		cluster_config=cluster_config,
 		disk_size=disk_size
 	}
 	call merge.merge as LCL7merge {
 		input:
 		family_vcf_gz=LCL7familyzipIndex.vcf_gz,
@@ -191,6 +210,15 @@ workflow {{ project_name }} {
 		cluster_config=cluster_config,
 		disk_size=disk_size
 	}
 	call votes.votes as LCL7votes {
 		input:
 		merged_vcf=LCL7merge.merged_vcf,
 		vcf_dup=LCL7merge.vcf_dup,
 		sample='LCL7',
 		prefix='LCL7_consensus',
 		cluster_config=cluster_config,
 		disk_size=disk_size
 	}
 	call merge.merge as LCL8merge {
 		input:
 		family_vcf_gz=LCL8familyzipIndex.vcf_gz,
@@ -199,4 +227,13 @@ workflow {{ project_name }} {
 		cluster_config=cluster_config,
 		disk_size=disk_size
 	}
 	call votes.votes as LCL8votes {
 		input:
 		merged_vcf=LCL8merge.merged_vcf,
 		vcf_dup=LCL8merge.vcf_dup,
 		sample='LCL8',
 		prefix='LCL8_consensus',
 		cluster_config=cluster_config,
 		disk_size=disk_size
 	}
 }