@@ -0,0 +1,197 @@ | |||
# import modules | |||
import sys, argparse, os | |||
import fileinput | |||
import re | |||
import pandas as pd | |||
from operator import itemgetter | |||
from collections import Counter | |||
from itertools import islice | |||
# input arguments | |||
parser = argparse.ArgumentParser(description="this script is to count voting number") | |||
parser.add_argument('-vcf', '--multi_sample_vcf', type=str, help='The VCF file you want to count the voting number', required=True) | |||
parser.add_argument('-dup', '--dup_list', type=str, help='Duplication list', required=True) | |||
parser.add_argument('-sample', '--sample_name', type=str, help='which sample of quartet', required=True) | |||
parser.add_argument('-prefix', '--prefix', type=str, help='Prefix of output file name', required=True) | |||
args = parser.parse_args() | |||
multi_sample_vcf = args.multi_sample_vcf | |||
dup_list = args.dup_list | |||
prefix = args.prefix | |||
sample_name = args.sample_name | |||
vcf_header = '''##fileformat=VCFv4.2 | |||
##fileDate=20191224 | |||
##source=high_confidence_calls_intergration(choppy app) | |||
##reference=GRCh38.d1.vd1 | |||
##INFO=<ID=PCT,Number=1,Type=Float,Description="Percentage of votes"> | |||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | |||
##FORMAT=<ID=TWINS,Number=0,Type=Flag,Description="0 for sister consistent, 1 for sister inconsistent"> | |||
##FORMAT=<ID=TRIO5,Number=0,Type=Flag,Description="0 for trio consistent, 1 for trio inconsistent"> | |||
##FORMAT=<ID=TRIO6,Number=0,Type=Flag,Description="0 for trio consistent, 1 for trio inconsistent"> | |||
##contig=<ID=chr1,length=248956422> | |||
##contig=<ID=chr2,length=242193529> | |||
##contig=<ID=chr3,length=198295559> | |||
##contig=<ID=chr4,length=190214555> | |||
##contig=<ID=chr5,length=181538259> | |||
##contig=<ID=chr6,length=170805979> | |||
##contig=<ID=chr7,length=159345973> | |||
##contig=<ID=chr8,length=145138636> | |||
##contig=<ID=chr9,length=138394717> | |||
##contig=<ID=chr10,length=133797422> | |||
##contig=<ID=chr11,length=135086622> | |||
##contig=<ID=chr12,length=133275309> | |||
##contig=<ID=chr13,length=114364328> | |||
##contig=<ID=chr14,length=107043718> | |||
##contig=<ID=chr15,length=101991189> | |||
##contig=<ID=chr16,length=90338345> | |||
##contig=<ID=chr17,length=83257441> | |||
##contig=<ID=chr18,length=80373285> | |||
##contig=<ID=chr19,length=58617616> | |||
##contig=<ID=chr20,length=64444167> | |||
##contig=<ID=chr21,length=46709983> | |||
##contig=<ID=chr22,length=50818468> | |||
##contig=<ID=chrX,length=156040895> | |||
''' | |||
# read in duplication list | |||
dup = pd.read_table(dup_list,header=None) | |||
var_dup = dup[0].tolist() | |||
# output file | |||
file_name = prefix + '_annotated.vcf' | |||
outfile = open(file_name,'w') | |||
# write VCF | |||
outputcolumn = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tQuartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5\tQuartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5\tQuartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5\tQuartet_DNA_BGI_SEQ2000_WGE_1_20190402_LCL5\tQuartet_DNA_BGI_SEQ2000_WGE_2_20190402_LCL5\tQuartet_DNA_BGI_SEQ500_BGI_1_20180328_LCL5 \tQuartet_DNA_BGI_SEQ500_BGI_2_20180328_LCL5\tQuartet_DNA_BGI_SEQ500_BGI_3_20180328_LCL5\tQuartet_DNA_ILM_Nova_ARD_1_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_2_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_3_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_4_20190111_LCL5\tQuartet_DNA_ILM_Nova_ARD_5_20190111_LCL5\tQuartet_DNA_ILM_Nova_ARD_6_20190111_LCL5\tQuartet_DNA_ILM_Nova_BRG_1_20171024_LCL5\tQuartet_DNA_ILM_Nova_BRG_1_20180930_LCL5\tQuartet_DNA_ILM_Nova_BRG_2_20180930_LCL5\tQuartet_DNA_ILM_Nova_BRG_3_20180930_LCL5\tQuartet_DNA_ILM_Nova_GAC_1_20171025_LCL5\tQuartet_DNA_ILM_Nova_NVG_1_20171024_LCL5\tQuartet_DNA_ILM_Nova_WUX_1_20171024_LCL5\tQuartet_DNA_ILM_XTen_ARD_1_20170403_LCL5\tQuartet_DNA_ILM_XTen_ARD_2_20170403_LCL5\tQuartet_DNA_ILM_XTen_ARD_3_20170403_LCL5\tQuartet_DNA_ILM_XTen_NVG_1_20170329_LCL5\tQuartet_DNA_ILM_XTen_NVG_2_20170329_LCL5\tQuartet_DNA_ILM_XTen_NVG_3_20170329_LCL5\tQuartet_DNA_ILM_XTen_WUX_1_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_2_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_3_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_4_20180703_LCL5\tQuartet_DNA_ILM_XTen_WUX_5_20180703_LCL5\tQuartet_DNA_ILM_XTen_WUX_6_20180703_LCL5' +'\t'+ sample_name+'_pcr'+'\t' + sample_name+'_pcr-free'+ '\t'+ sample_name +'consensus' + '\n' | |||
outfile.write(vcf_header) | |||
outfile.write(outputcolumn) | |||
#function | |||
def vote_percentage(strings): | |||
strings = [x.replace('0/0','.') for x in strings] | |||
gt = [x.split(':')[0] for x in strings] | |||
percentage = round((33 - gt.count('.'))/33,2) | |||
return(str(percentage)) | |||
def decide_by_rep(strings): | |||
consensus_rep = '' | |||
mendelian = [x[-5:] for x in strings] | |||
strings = [x.replace('.','0/0') for x in strings] | |||
gt = [x.split(':')[0] for x in strings] | |||
# mendelian consistent? | |||
mendelian_dict = Counter(mendelian) | |||
highest_mendelian = mendelian_dict.most_common(1) | |||
candidate_mendelian = highest_mendelian[0][0] | |||
freq_mendelian = highest_mendelian[0][1] | |||
if (candidate_mendelian == '1:1:1') and (freq_mendelian >= 2): | |||
gt_num_dict = Counter(gt) | |||
highest_gt = gt_num_dict.most_common(1) | |||
candidate_gt = highest_gt[0][0] | |||
freq_gt = highest_gt[0][1] | |||
if (candidate_gt != '0/0') and (freq_gt >= 2): | |||
consensus_rep = candidate_gt | |||
elif (candidate_gt == '0/0') and (freq_gt >= 2): | |||
consensus_rep = '0/0' | |||
else: | |||
consensus_rep = 'inconGT' | |||
elif (candidate_mendelian == '.') and (freq_mendelian >= 2): | |||
consensus_rep = 'noInfo' | |||
else: | |||
consensus_rep = 'inconMen' | |||
return consensus_rep | |||
def main(): | |||
for line in fileinput.input(multi_sample_vcf): | |||
headline = re.match('^\#',line) | |||
if headline is not None: | |||
pass | |||
else: | |||
line = line.strip() | |||
strings = line.split('\t') | |||
variant_id = '_'.join([strings[0],strings[1]]) | |||
# check if the variants location is duplicated | |||
if variant_id in var_dup: | |||
outLine = '\t'.join(strings) + '\t' + '.' +'\t' + '.' + '\t' + 'dupVar' + '\n' | |||
outfile.write(outLine) | |||
else: | |||
# pre-define | |||
pcr_consensus = '' | |||
pcr_free_consensus = '' | |||
consensus_call = '' | |||
# pcr | |||
pcr = itemgetter(*[9,10,11,12,14,15,16,23,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41])(strings) | |||
SEQ2000 = decide_by_rep(pcr[0:4]) | |||
SEQ500 = decide_by_rep(pcr[4:7]) | |||
Nova = decide_by_rep(pcr[7:11]) | |||
XTen_ARD = decide_by_rep(pcr[11:14]) | |||
XTen_NVG = decide_by_rep(pcr[14:17]) | |||
XTen_WUX_1 = decide_by_rep(pcr[17:20]) | |||
XTen_WUX_2 = decide_by_rep(pcr[20:23]) | |||
sequence_site = [SEQ2000,SEQ500,Nova,XTen_ARD,XTen_NVG,XTen_WUX_1,XTen_WUX_2] | |||
sequence_dict = Counter(sequence_site) | |||
highest_sequence = sequence_dict.most_common(1) | |||
candidate_sequence = highest_sequence[0][0] | |||
freq_sequence = highest_sequence[0][1] | |||
if freq_sequence > 4: | |||
pcr_consensus = candidate_sequence | |||
else: | |||
pcr_consensus = 'inconSequenceSite' | |||
# pcr-free | |||
pcr_free = itemgetter(*[13,17,18,19,20,21,22,24,25,26])(strings) | |||
SEQ2000 = decide_by_rep(pcr_free[0]) | |||
Nova_ARD_1 = decide_by_rep(pcr_free[1:4]) | |||
Nova_ARD_2 = decide_by_rep(pcr_free[4:7]) | |||
Nova_BRG = decide_by_rep(pcr_free[7:10]) | |||
sequence_site = [SEQ2000,Nova_ARD_1,Nova_ARD_2,Nova_BRG] | |||
highest_sequence = sequence_dict.most_common(1) | |||
candidate_sequence = highest_sequence[0][0] | |||
freq_sequence = highest_sequence[0][1] | |||
if freq_sequence > 2: | |||
pcr_free_consensus = candidate_sequence | |||
else: | |||
pcr_free_consensus = 'inconSequenceSite' | |||
# pcr and pcr-free | |||
tag = ['inconGT','noInfo','inconMen','inconSequenceSite'] | |||
if (pcr_consensus == pcr_free_consensus) and (pcr_consensus not in tag) and (pcr_consensus != '0/0'): | |||
consensus_call = pcr_consensus | |||
strings[6] = 'reproducible' | |||
elif (pcr_consensus in tag) or (pcr_free_consensus in tag): | |||
consensus_call = 'filtered' | |||
strings[6] = '.' | |||
elif (pcr_consensus == '0/0') and (pcr_free_consensus not in tag) and (pcr_free_consensus != '0/0'): | |||
consensus_call = 'pcr-free-speicifc' | |||
strings[6] = '.' | |||
elif (pcr_consensus != '0/0') and (pcr_consensus not in tag) and (pcr_free_consensus == '0/0'): | |||
consensus_call = 'pcr-speicifc' | |||
strings[6] = '.' | |||
elif (pcr_consensus == '0/0') and (pcr_free_consensus == '0/0'): | |||
consensus_call = 'confirm for parents' | |||
strings[6] = '.' | |||
else: | |||
consensus_call = 'filtered' | |||
strings[6] = '.' | |||
# percentage | |||
percentage = vote_percentage(strings[9:]) | |||
strings[7] = 'PCT=' + percentage | |||
# output | |||
outLine = '\t'.join(strings) + '\t' + pcr_consensus +'\t' + pcr_free_consensus + '\t' + consensus_call + '\n' | |||
outfile.write(outLine) | |||
if __name__ == '__main__': | |||
main() | |||
@@ -28,11 +28,11 @@ familyfile = open(family_filename,'w') | |||
# default columns, which will be included in the included in the calssifier | |||
vcfheader = '''##fileformat=VCFv4.2 | |||
##FILTER=<ID=PASS,Description="Voted by at least two replicates, six callers and two sequencing sites"> | |||
##FILTER=<ID=PASS,Description="the same genotype between twin sister and mendelian consistent in 578 and 678"> | |||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | |||
##INFO=<ID=sister,Number=0,Type=Flag,Description="0 for sister consistent, 1 for sister inconsistent"> | |||
##INFO=<ID=trioLCL5,Number=0,Type=Flag,Description="0 for trio consistent, 1 for trio inconsistent"> | |||
##INFO=<ID=trioLCL6,Number=0,Type=Flag,Description="0 for trio consistent, 1 for trio inconsistent"> | |||
##FORMAT=<ID=TWINS,Number=0,Type=Flag,Description="0 for sister consistent, 1 for sister inconsistent"> | |||
##FORMAT=<ID=TRIO5,Number=0,Type=Flag,Description="0 for trio consistent, 1 for trio inconsistent"> | |||
##FORMAT=<ID=TRIO6,Number=0,Type=Flag,Description="0 for trio consistent, 1 for trio inconsistent"> | |||
##contig=<ID=chr1,length=248956422> | |||
##contig=<ID=chr2,length=242193529> | |||
##contig=<ID=chr3,length=198295559> | |||
@@ -99,33 +99,38 @@ def process(oneLine): | |||
pass | |||
# sister | |||
if strings[11] == strings[14]: | |||
info = "sister=1" | |||
add_format = ":1" | |||
else: | |||
info = "sister=0" | |||
add_format = ":0" | |||
# trioLCL5 | |||
if strings[15] == 'MD=1': | |||
info = info + ";trioLCL5=1" | |||
add_format = add_format + ":1" | |||
else: | |||
info = info + ";trioLCL5=0" | |||
add_format = add_format + ":0" | |||
# trioLCL6 | |||
if strings[7] == 'MD=1': | |||
info = info + ";trioLCL6=1" | |||
add_format = add_format + ":1" | |||
else: | |||
info = info + ";trioLCL6=0" | |||
add_format = add_format + ":0" | |||
# filter | |||
if (strings[11] == strings[14]) and (strings[15] == 'MD=1') and (strings[7] == 'MD=1'): | |||
strings[6] = 'PASS' | |||
else: | |||
strings[6] = '.' | |||
# output LCL5 | |||
LCL5outLine = strings[0]+'\t'+strings[1]+'\t'+strings[2]+'\t'+strings[3]+'\t'+strings[4]+'\t'+strings[5]+'\t'+strings[6]+'\t'+ info +'\t'+ strings[8] + '\t' + strings[14] + '\n' | |||
LCL5outLine = strings[0]+'\t'+strings[1]+'\t'+strings[2]+'\t'+strings[3]+'\t'+strings[4]+'\t'+'.'+'\t'+strings[6]+'\t'+ '.' +'\t'+ 'GT:TWINS:TRIO5:TRIO6' + '\t' + strings[14] + add_format + '\n' | |||
LCL5file.write(LCL5outLine) | |||
# output LCL6 | |||
LCL6outLine = strings[0]+'\t'+strings[1]+'\t'+strings[2]+'\t'+strings[3]+'\t'+strings[4]+'\t'+strings[5]+'\t'+strings[6]+'\t'+ info +'\t'+ strings[8] + '\t' + strings[11] + '\n' | |||
LCL6outLine = strings[0]+'\t'+strings[1]+'\t'+strings[2]+'\t'+strings[3]+'\t'+strings[4]+'\t'+'.'+'\t'+strings[6]+'\t'+ '.' +'\t'+ 'GT:TWINS:TRIO5:TRIO6' + '\t' + strings[11] + add_format + '\n' | |||
LCL6file.write(LCL6outLine) | |||
# output LCL7 | |||
LCL7outLine = strings[0]+'\t'+strings[1]+'\t'+strings[2]+'\t'+strings[3]+'\t'+strings[4]+'\t'+strings[5]+'\t'+strings[6]+'\t'+ info +'\t'+ strings[8] + '\t' + strings[10] + '\n' | |||
LCL7outLine = strings[0]+'\t'+strings[1]+'\t'+strings[2]+'\t'+strings[3]+'\t'+strings[4]+'\t'+'.'+'\t'+strings[6]+'\t'+ '.' +'\t'+ 'GT:TWINS:TRIO5:TRIO6' + '\t' + strings[10] + add_format + '\n' | |||
LCL7file.write(LCL7outLine) | |||
# output LCL8 | |||
LCL8outLine = strings[0]+'\t'+strings[1]+'\t'+strings[2]+'\t'+strings[3]+'\t'+strings[4]+'\t'+strings[5]+'\t'+strings[6]+'\t'+ info +'\t'+ strings[8] + '\t' + strings[9] + '\n' | |||
LCL8outLine = strings[0]+'\t'+strings[1]+'\t'+strings[2]+'\t'+strings[3]+'\t'+strings[4]+'\t'+'.'+'\t'+strings[6]+'\t'+ '.' +'\t'+ 'GT:TWINS:TRIO5:TRIO6' + '\t' + strings[9] + add_format + '\n' | |||
LCL8file.write(LCL8outLine) | |||
# output family | |||
familyoutLine = strings[0]+'\t'+strings[1]+'\t'+strings[2]+'\t'+strings[3]+'\t'+strings[4]+'\t'+strings[5]+'\t'+strings[6]+'\t'+ info +'\t'+ strings[8] + '\t' + strings[14] + '\t' + strings[11] + '\t' + strings[10] + '\t' + strings[9] + '\n' | |||
familyoutLine = strings[0]+'\t'+strings[1]+'\t'+strings[2]+'\t'+strings[3]+'\t'+strings[4]+'\t'+ '.'+'\t'+strings[6]+'\t'+ '.' +'\t'+ 'GT:TWINS:TRIO5:TRIO6' + '\t' + strings[14] + add_format +'\t' + strings[11] + add_format + '\t' + strings[10] + add_format +'\t' + strings[9] + add_format + '\n' | |||
familyfile.write(familyoutLine) | |||
@@ -2,6 +2,8 @@ | |||
"{{ project_name }}.LCL7merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", | |||
"{{ project_name }}.LCL6familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL7votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call:v1.1", | |||
"{{ project_name }}.LCL6votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call:v1.1", | |||
"{{ project_name }}.LCL5VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL6mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | |||
"{{ project_name }}.mergeSister.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
@@ -11,10 +13,12 @@ | |||
"{{ project_name }}.LCL6merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL6variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | |||
"{{ project_name }}.LCL6zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL5votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call:v1.1", | |||
"{{ project_name }}.LCL7familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL5familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL6VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL5merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL8votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call:v1.1", | |||
"{{ project_name }}.reformVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call:v1.1", | |||
"{{ project_name }}.LCL5zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.xlarge img-ubuntu-vpc", | |||
@@ -26,3 +30,4 @@ | |||
"{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/" | |||
} | |||
@@ -0,0 +1,30 @@ | |||
task indelNorm { | |||
File vcf | |||
File ref_dir | |||
String fasta | |||
String sampleName | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
cat ${vcf} | grep '#' > header | |||
cat ${vcf} | grep -v '#' > body | |||
cat body | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX\' > body.filtered | |||
cat header body.filtered > ${sampleName}.filtered.vcf | |||
/opt/hall-lab/bcftools-1.9/bin/bcftools norm -f ${ref_dir}/${fasta} ${sampleName}.filtered.vcf > ${sampleName}.normed.vcf | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File normed_vcf = "${sampleName}.normed.vcf" | |||
} | |||
} |
@@ -10,6 +10,8 @@ task merge { | |||
rtg vcfmerge --force-merge-all --no-gzip -o ${sample}.merged.vcf ${sep=" " family_vcf_gz} | |||
cat ${sample}.merged.vcf | grep -v '#' | cut -f1-2 | sed s'/\t/_/g' | sort | uniq -c | sed 's/\s\+/\t/g' | awk '{ if ($1 != 1) { print } }' | cut -f3 > ${sample}.vcf_dup.txt | |||
>>> | |||
runtime { | |||
@@ -20,5 +22,6 @@ task merge { | |||
} | |||
output { | |||
File merged_vcf = "${sample}.merged.vcf" | |||
File vcf_dup = "${sample}.vcf_dup.txt" | |||
} | |||
} |
@@ -1,20 +1,14 @@ | |||
task votes { | |||
Array[File] mother_vcf_gz | |||
Array[File] mother_vcf_idx | |||
Array[File] father_vcf_gz | |||
Array[File] father_vcf_idx | |||
Array[File] twins_vcf_gz | |||
Array[File] twins_vcf_idx | |||
File merged_vcf | |||
String vcf_dup | |||
String sample | |||
String prefix | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
rtg vcfmerge --force-merge-all --no-gzip -o LCL8.sister.consistent.merged.vcf ${sep=" " mother_vcf_gz} | |||
rtg vcfmerge --force-merge-all --no-gzip -o LCL7.sister.consistent.merged.vcf ${sep=" " father_vcf_gz} | |||
rtg vcfmerge --force-merge-all --no-gzip -o Twins.sister.consistent.vcf ${sep=" " twins_vcf_gz} | |||
python /opt/high_confidence_call_vote.py -vcf ${merged_vcf} -dup ${vcf_dup} -sample ${sample} -prefix ${prefix} | |||
>>> | |||
runtime { | |||
@@ -24,8 +18,6 @@ task votes { | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File mother_merged_vcf = "LCL8.sister.consistent.merged.vcf" | |||
File father_merged_vcf = "LCL7.sister.consistent.merged.vcf" | |||
File twins_merged_vcf = "Twins.sister.consistent.merged.vcf" | |||
File annotated_vcf = "${prefix}_annotated.vcf" | |||
} | |||
} |
@@ -5,6 +5,7 @@ import "./tasks/VCFrename.wdl" as VCFrename | |||
import "./tasks/mergeSister.wdl" as mergeSister | |||
import "./tasks/reformVCF.wdl" as reformVCF | |||
import "./tasks/merge.wdl" as merge | |||
import "./tasks/votes.wdl" as votes | |||
workflow {{ project_name }} { | |||
File inputSamplesFile | |||
@@ -175,6 +176,15 @@ workflow {{ project_name }} { | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call votes.votes as LCL5votes{ | |||
input: | |||
merged_vcf=LCL5merge.merged_vcf, | |||
vcf_dup=LCL5merge.vcf_dup, | |||
sample='LCL5', | |||
prefix='LCL5_consensus', | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call merge.merge as LCL6merge { | |||
input: | |||
family_vcf_gz=LCL6familyzipIndex.vcf_gz, | |||
@@ -183,6 +193,15 @@ workflow {{ project_name }} { | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call votes.votes as LCL6votes { | |||
input: | |||
merged_vcf=LCL6merge.merged_vcf, | |||
vcf_dup=LCL6merge.vcf_dup, | |||
sample='LCL6', | |||
prefix='LCL6_consensus', | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call merge.merge as LCL7merge { | |||
input: | |||
family_vcf_gz=LCL7familyzipIndex.vcf_gz, | |||
@@ -191,6 +210,15 @@ workflow {{ project_name }} { | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call votes.votes as LCL7votes { | |||
input: | |||
merged_vcf=LCL7merge.merged_vcf, | |||
vcf_dup=LCL7merge.vcf_dup, | |||
sample='LCL7', | |||
prefix='LCL7_consensus', | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call merge.merge as LCL8merge { | |||
input: | |||
family_vcf_gz=LCL8familyzipIndex.vcf_gz, | |||
@@ -199,4 +227,13 @@ workflow {{ project_name }} { | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call votes.votes as LCL8votes { | |||
input: | |||
merged_vcf=LCL8merge.merged_vcf, | |||
vcf_dup=LCL8merge.vcf_dup, | |||
sample='LCL8', | |||
prefix='LCL8_consensus', | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
} |