|
|
@@ -62,7 +62,7 @@ file_name = prefix + '_annotated.vcf' |
|
|
|
outfile = open(file_name,'w') |
|
|
|
|
|
|
|
# write VCF |
|
|
|
outputcolumn = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tQuartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5\tQuartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5\tQuartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5\tQuartet_DNA_BGI_SEQ2000_WGE_1_20190402_LCL5\tQuartet_DNA_BGI_SEQ2000_WGE_2_20190402_LCL5\tQuartet_DNA_BGI_SEQ500_BGI_1_20180328_LCL5 \tQuartet_DNA_BGI_SEQ500_BGI_2_20180328_LCL5\tQuartet_DNA_BGI_SEQ500_BGI_3_20180328_LCL5\tQuartet_DNA_ILM_Nova_ARD_1_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_2_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_3_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_4_20190111_LCL5\tQuartet_DNA_ILM_Nova_ARD_5_20190111_LCL5\tQuartet_DNA_ILM_Nova_ARD_6_20190111_LCL5\tQuartet_DNA_ILM_Nova_BRG_1_20171024_LCL5\tQuartet_DNA_ILM_Nova_BRG_1_20180930_LCL5\tQuartet_DNA_ILM_Nova_BRG_2_20180930_LCL5\tQuartet_DNA_ILM_Nova_BRG_3_20180930_LCL5\tQuartet_DNA_ILM_Nova_GAC_1_20171025_LCL5\tQuartet_DNA_ILM_Nova_NVG_1_20171024_LCL5\tQuartet_DNA_ILM_Nova_WUX_1_20171024_LCL5\tQuartet_DNA_ILM_XTen_ARD_1_20170403_LCL5\tQuartet_DNA_ILM_XTen_ARD_2_20170403_LCL5\tQuartet_DNA_ILM_XTen_ARD_3_20170403_LCL5\tQuartet_DNA_ILM_XTen_NVG_1_20170329_LCL5\tQuartet_DNA_ILM_XTen_NVG_2_20170329_LCL5\tQuartet_DNA_ILM_XTen_NVG_3_20170329_LCL5\tQuartet_DNA_ILM_XTen_WUX_1_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_2_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_3_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_4_20180703_LCL5\tQuartet_DNA_ILM_XTen_WUX_5_20180703_LCL5\tQuartet_DNA_ILM_XTen_WUX_6_20180703_LCL5' +'\t'+ sample_name+'_pcr'+'\t' + sample_name+'_pcr-free'+ '\t'+ sample_name +'_consensus' + '\t' + sample_name + '_consensus_alt_seq' +'\n' |
|
|
|
outputcolumn = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tQuartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5\tQuartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5\tQuartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5\tQuartet_DNA_BGI_T7_WGE_1_20191105_LCL5\tQuartet_DNA_BGI_T7_WGE_2_20191105_LCL5\tQuartet_DNA_BGI_T7_WGE_3_20191105_LCL5\tQuartet_DNA_ILM_Nova_ARD_1_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_2_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_3_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_4_20190111_LCL5\tQuartet_DNA_ILM_Nova_ARD_5_20190111_LCL5\tQuartet_DNA_ILM_Nova_ARD_6_20190111_LCL5\tQuartet_DNA_ILM_Nova_BRG_1_20180930_LCL5\tQuartet_DNA_ILM_Nova_BRG_2_20180930_LCL5\tQuartet_DNA_ILM_Nova_BRG_3_20180930_LCL5\tQuartet_DNA_ILM_Nova_WUX_1_20190917_LCL5\tQuartet_DNA_ILM_Nova_WUX_2_20190917_LCL5\tQuartet_DNA_ILM_Nova_WUX_3_20190917_LCL5\tQuartet_DNA_ILM_XTen_ARD_1_20170403_LCL5\tQuartet_DNA_ILM_XTen_ARD_2_20170403_LCL5\tQuartet_DNA_ILM_XTen_ARD_3_20170403_LCL5\tQuartet_DNA_ILM_XTen_NVG_1_20170329_LCL5\tQuartet_DNA_ILM_XTen_NVG_2_20170329_LCL5\tQuartet_DNA_ILM_XTen_NVG_3_20170329_LCL5\tQuartet_DNA_ILM_XTen_WUX_1_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_2_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_3_20170216_LCL5' +'\t'+ sample_name+'_pcr'+'\t' + sample_name+'_pcr-free'+ '\t'+ sample_name +'_consensus' + '\t' + sample_name + '_consensus_alt_seq' +'\n' |
|
|
|
outfile.write(vcf_header) |
|
|
|
outfile.write(outputcolumn) |
|
|
|
|
|
|
@@ -71,14 +71,14 @@ outfile.write(outputcolumn) |
|
|
|
def detected_percentage(strings): |
|
|
|
strings = [x.replace('0/0','.') for x in strings] |
|
|
|
gt = [x.split(':')[0] for x in strings] |
|
|
|
percentage = round((33 - gt.count('.'))/33,4) |
|
|
|
percentage = round((27 - gt.count('.'))/27,4) |
|
|
|
return(str(percentage)) |
|
|
|
|
|
|
|
def vote_percentage(strings,consensus_call): |
|
|
|
strings = [x.replace('.','0/0') for x in strings] |
|
|
|
gt = [x.split(':')[0] for x in strings] |
|
|
|
gt = list(map(gt_uniform,[i for i in gt])) |
|
|
|
percentage = round(gt.count(consensus_call)/33,4) |
|
|
|
percentage = round(gt.count(consensus_call)/27,4) |
|
|
|
return(str(percentage)) |
|
|
|
|
|
|
|
def family_vote(strings,consensus_call): |
|
|
@@ -143,36 +143,35 @@ def main(): |
|
|
|
pcr_consensus = '' |
|
|
|
pcr_free_consensus = '' |
|
|
|
consensus_call = '' |
|
|
|
consensus_alt_seq = '.' |
|
|
|
consensus_alt_seq = '' |
|
|
|
# pcr |
|
|
|
pcr = itemgetter(*[9,10,11,12,14,15,16,23,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41])(strings) |
|
|
|
pcr = itemgetter(*[9,10,11,27,28,29,30,31,32,33,34,35])(strings) |
|
|
|
SEQ2000 = decide_by_rep(pcr[0:3]) |
|
|
|
SEQ500 = decide_by_rep(pcr[4:7]) |
|
|
|
Nova = decide_by_rep(pcr[7:11]) |
|
|
|
XTen_ARD = decide_by_rep(pcr[11:14]) |
|
|
|
XTen_NVG = decide_by_rep(pcr[14:17]) |
|
|
|
XTen_WUX_1 = decide_by_rep(pcr[17:20]) |
|
|
|
XTen_WUX_2 = decide_by_rep(pcr[20:23]) |
|
|
|
sequence_site = [SEQ2000,SEQ500,Nova,XTen_ARD,XTen_NVG,XTen_WUX_1,XTen_WUX_2] |
|
|
|
XTen_ARD = decide_by_rep(pcr[3:6]) |
|
|
|
XTen_NVG = decide_by_rep(pcr[6:9]) |
|
|
|
XTen_WUX = decide_by_rep(pcr[9:12]) |
|
|
|
sequence_site = [SEQ2000,XTen_ARD,XTen_NVG,XTen_WUX] |
|
|
|
sequence_dict = Counter(sequence_site) |
|
|
|
highest_sequence = sequence_dict.most_common(1) |
|
|
|
candidate_sequence = highest_sequence[0][0] |
|
|
|
freq_sequence = highest_sequence[0][1] |
|
|
|
if freq_sequence > 4: |
|
|
|
if freq_sequence > 2: |
|
|
|
pcr_consensus = candidate_sequence |
|
|
|
else: |
|
|
|
pcr_consensus = 'inconSequenceSite' |
|
|
|
# pcr-free |
|
|
|
pcr_free = itemgetter(*[13,17,18,19,20,21,22,24,25,26])(strings) |
|
|
|
pcr_free = itemgetter(*[12,13,14,15,16,17,18,19,20,21,22,23,24,25,26])(strings) |
|
|
|
#SEQ2000 = decide_by_rep(pcr_free[0]) |
|
|
|
Nova_ARD_1 = decide_by_rep(pcr_free[1:4]) |
|
|
|
Nova_ARD_2 = decide_by_rep(pcr_free[4:7]) |
|
|
|
Nova_BRG = decide_by_rep(pcr_free[7:10]) |
|
|
|
sequence_site = [SEQ2000,Nova_ARD_1,Nova_ARD_2,Nova_BRG] |
|
|
|
T7_WGE = decide_by_rep(pcr_free[0:3]) |
|
|
|
Nova_ARD_1 = decide_by_rep(pcr_free[3:6]) |
|
|
|
Nova_ARD_2 = decide_by_rep(pcr_free[6:9]) |
|
|
|
Nova_BRG = decide_by_rep(pcr_free[9:12]) |
|
|
|
Nova_WUX = decide_by_rep(pcr_free[12:15]) |
|
|
|
sequence_site = [T7_WGE,Nova_ARD_1,Nova_ARD_2,Nova_BRG,Nova_WUX] |
|
|
|
highest_sequence = sequence_dict.most_common(1) |
|
|
|
candidate_sequence = highest_sequence[0][0] |
|
|
|
freq_sequence = highest_sequence[0][1] |
|
|
|
if freq_sequence > 2: |
|
|
|
if freq_sequence > 3: |
|
|
|
pcr_free_consensus = candidate_sequence |
|
|
|
else: |
|
|
|
pcr_free_consensus = 'inconSequenceSite' |