You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

пре 3 година
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. from __future__ import division
  2. import sys, argparse, os
  3. import fileinput
  4. import re
  5. import pandas as pd
  6. from operator import itemgetter
  7. from collections import Counter
  8. from itertools import islice
  9. from numpy import *
  10. import statistics
  11. # input arguments
  12. parser = argparse.ArgumentParser(description="this script is to count voting number")
  13. parser.add_argument('-vcf', '--multi_sample_vcf', type=str, help='The VCF file you want to count the voting number', required=True)
  14. parser.add_argument('-dup', '--dup_list', type=str, help='Duplication list', required=True)
  15. parser.add_argument('-sample', '--sample_name', type=str, help='which sample of quartet', required=True)
  16. parser.add_argument('-prefix', '--prefix', type=str, help='Prefix of output file name', required=True)
  17. args = parser.parse_args()
  18. multi_sample_vcf = args.multi_sample_vcf
  19. dup_list = args.dup_list
  20. prefix = args.prefix
  21. sample_name = args.sample_name
  22. vcf_header = '''##fileformat=VCFv4.2
  23. ##fileDate=20200331
  24. ##source=high_confidence_calls_intergration(choppy app)
  25. ##reference=GRCh38.d1.vd1
  26. ##INFO=<ID=location,Number=1,Type=String,Description="Repeat region">
  27. ##INFO=<ID=DETECTED,Number=1,Type=Integer,Description="Number of detected votes">
  28. ##INFO=<ID=VOTED,Number=1,Type=Integer,Description="Number of consnesus votes">
  29. ##INFO=<ID=FAM,Number=1,Type=Integer,Description="Number mendelian consisitent votes">
  30. ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
  31. ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Sum depth of all samples">
  32. ##FORMAT=<ID=ALT,Number=1,Type=Integer,Description="Sum alternative depth of all samples">
  33. ##FORMAT=<ID=AF,Number=1,Type=Float,Description="Allele frequency, sum alternative depth / sum depth">
  34. ##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Average genotype quality">
  35. ##FORMAT=<ID=QD,Number=1,Type=Float,Description="Average Variant Confidence/Quality by Depth">
  36. ##FORMAT=<ID=MQ,Number=1,Type=Float,Description="Average mapping quality">
  37. ##FORMAT=<ID=FS,Number=1,Type=Float,Description="Average Phred-scaled p-value using Fisher's exact test to detect strand bias">
  38. ##FORMAT=<ID=QUALI,Number=1,Type=Float,Description="Average variant quality">
  39. ##contig=<ID=chr1,length=248956422>
  40. ##contig=<ID=chr2,length=242193529>
  41. ##contig=<ID=chr3,length=198295559>
  42. ##contig=<ID=chr4,length=190214555>
  43. ##contig=<ID=chr5,length=181538259>
  44. ##contig=<ID=chr6,length=170805979>
  45. ##contig=<ID=chr7,length=159345973>
  46. ##contig=<ID=chr8,length=145138636>
  47. ##contig=<ID=chr9,length=138394717>
  48. ##contig=<ID=chr10,length=133797422>
  49. ##contig=<ID=chr11,length=135086622>
  50. ##contig=<ID=chr12,length=133275309>
  51. ##contig=<ID=chr13,length=114364328>
  52. ##contig=<ID=chr14,length=107043718>
  53. ##contig=<ID=chr15,length=101991189>
  54. ##contig=<ID=chr16,length=90338345>
  55. ##contig=<ID=chr17,length=83257441>
  56. ##contig=<ID=chr18,length=80373285>
  57. ##contig=<ID=chr19,length=58617616>
  58. ##contig=<ID=chr20,length=64444167>
  59. ##contig=<ID=chr21,length=46709983>
  60. ##contig=<ID=chr22,length=50818468>
  61. ##contig=<ID=chrX,length=156040895>
  62. '''
  63. vcf_header_all_sample = '''##fileformat=VCFv4.2
  64. ##fileDate=20200331
  65. ##reference=GRCh38.d1.vd1
  66. ##INFO=<ID=location,Number=1,Type=String,Description="Repeat region">
  67. ##INFO=<ID=DUP,Number=1,Type=Flag,Description="Duplicated variant records">
  68. ##INFO=<ID=DETECTED,Number=1,Type=Integer,Description="Number of detected votes">
  69. ##INFO=<ID=VOTED,Number=1,Type=Integer,Description="Number of consnesus votes">
  70. ##INFO=<ID=FAM,Number=1,Type=Integer,Description="Number mendelian consisitent votes">
  71. ##INFO=<ID=ALL_ALT,Number=1,Type=Float,Description="Sum of alternative reads of all samples">
  72. ##INFO=<ID=ALL_DP,Number=1,Type=Float,Description="Sum of depth of all samples">
  73. ##INFO=<ID=ALL_AF,Number=1,Type=Float,Description="Allele frequency of net alternatice reads and net depth">
  74. ##INFO=<ID=GQ_MEAN,Number=1,Type=Float,Description="Mean of genotype quality of all samples">
  75. ##INFO=<ID=QD_MEAN,Number=1,Type=Float,Description="Average Variant Confidence/Quality by Depth">
  76. ##INFO=<ID=MQ_MEAN,Number=1,Type=Float,Description="Mean of mapping quality of all samples">
  77. ##INFO=<ID=FS_MEAN,Number=1,Type=Float,Description="Average Phred-scaled p-value using Fisher's exact test to detect strand bias">
  78. ##INFO=<ID=QUAL_MEAN,Number=1,Type=Float,Description="Average variant quality">
  79. ##INFO=<ID=PCR,Number=1,Type=String,Description="Consensus of PCR votes">
  80. ##INFO=<ID=PCR_FREE,Number=1,Type=String,Description="Consensus of PCR-free votes">
  81. ##INFO=<ID=CONSENSUS,Number=1,Type=String,Description="Consensus calls">
  82. ##INFO=<ID=CONSENSUS_SEQ,Number=1,Type=String,Description="Consensus sequence">
  83. ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
  84. ##FORMAT=<ID=DP,Number=1,Type=String,Description="Depth">
  85. ##FORMAT=<ID=ALT,Number=1,Type=Integer,Description="Alternative Depth">
  86. ##FORMAT=<ID=AF,Number=1,Type=String,Description="Allele frequency">
  87. ##FORMAT=<ID=GQ,Number=1,Type=String,Description="Genotype quality">
  88. ##FORMAT=<ID=MQ,Number=1,Type=String,Description="Mapping quality">
  89. ##FORMAT=<ID=TWINS,Number=1,Type=String,Description="1 is twins shared, 0 is twins discordant ">
  90. ##FORMAT=<ID=TRIO5,Number=1,Type=String,Description="1 is LCL7, LCL8 and LCL5 mendelian consistent, 0 is mendelian vioaltion">
  91. ##FORMAT=<ID=TRIO6,Number=1,Type=String,Description="1 is LCL7, LCL8 and LCL6 mendelian consistent, 0 is mendelian vioaltion">
  92. ##contig=<ID=chr1,length=248956422>
  93. ##contig=<ID=chr2,length=242193529>
  94. ##contig=<ID=chr3,length=198295559>
  95. ##contig=<ID=chr4,length=190214555>
  96. ##contig=<ID=chr5,length=181538259>
  97. ##contig=<ID=chr6,length=170805979>
  98. ##contig=<ID=chr7,length=159345973>
  99. ##contig=<ID=chr8,length=145138636>
  100. ##contig=<ID=chr9,length=138394717>
  101. ##contig=<ID=chr10,length=133797422>
  102. ##contig=<ID=chr11,length=135086622>
  103. ##contig=<ID=chr12,length=133275309>
  104. ##contig=<ID=chr13,length=114364328>
  105. ##contig=<ID=chr14,length=107043718>
  106. ##contig=<ID=chr15,length=101991189>
  107. ##contig=<ID=chr16,length=90338345>
  108. ##contig=<ID=chr17,length=83257441>
  109. ##contig=<ID=chr18,length=80373285>
  110. ##contig=<ID=chr19,length=58617616>
  111. ##contig=<ID=chr20,length=64444167>
  112. ##contig=<ID=chr21,length=46709983>
  113. ##contig=<ID=chr22,length=50818468>
  114. ##contig=<ID=chrX,length=156040895>
  115. '''
  116. # read in duplication list
  117. dup = pd.read_table(dup_list,header=None)
  118. var_dup = dup[0].tolist()
  119. # output file
  120. benchmark_file_name = prefix + '_voted.vcf'
  121. benchmark_outfile = open(benchmark_file_name,'w')
  122. all_sample_file_name = prefix + '_all_sample_information.vcf'
  123. all_sample_outfile = open(all_sample_file_name,'w')
  124. # write VCF
  125. outputcolumn = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t' + sample_name + '_benchmark_calls\n'
  126. benchmark_outfile.write(vcf_header)
  127. benchmark_outfile.write(outputcolumn)
  128. outputcolumn_all_sample = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t'+ \
  129. 'Quartet_DNA_BGI_SEQ2000_BGI_1_20180518\tQuartet_DNA_BGI_SEQ2000_BGI_2_20180530\tQuartet_DNA_BGI_SEQ2000_BGI_3_20180530\t' + \
  130. 'Quartet_DNA_BGI_T7_WGE_1_20191105\tQuartet_DNA_BGI_T7_WGE_2_20191105\tQuartet_DNA_BGI_T7_WGE_3_20191105\t' + \
  131. 'Quartet_DNA_ILM_Nova_ARD_1_20181108\tQuartet_DNA_ILM_Nova_ARD_2_20181108\tQuartet_DNA_ILM_Nova_ARD_3_20181108\t' + \
  132. 'Quartet_DNA_ILM_Nova_ARD_4_20190111\tQuartet_DNA_ILM_Nova_ARD_5_20190111\tQuartet_DNA_ILM_Nova_ARD_6_20190111\t' + \
  133. 'Quartet_DNA_ILM_Nova_BRG_1_20180930\tQuartet_DNA_ILM_Nova_BRG_2_20180930\tQuartet_DNA_ILM_Nova_BRG_3_20180930\t' + \
  134. 'Quartet_DNA_ILM_Nova_WUX_1_20190917\tQuartet_DNA_ILM_Nova_WUX_2_20190917\tQuartet_DNA_ILM_Nova_WUX_3_20190917\t' + \
  135. 'Quartet_DNA_ILM_XTen_ARD_1_20170403\tQuartet_DNA_ILM_XTen_ARD_2_20170403\tQuartet_DNA_ILM_XTen_ARD_3_20170403\t' + \
  136. 'Quartet_DNA_ILM_XTen_NVG_1_20170329\tQuartet_DNA_ILM_XTen_NVG_2_20170329\tQuartet_DNA_ILM_XTen_NVG_3_20170329\t' + \
  137. 'Quartet_DNA_ILM_XTen_WUX_1_20170216\tQuartet_DNA_ILM_XTen_WUX_2_20170216\tQuartet_DNA_ILM_XTen_WUX_3_20170216\n'
  138. all_sample_outfile.write(vcf_header_all_sample)
  139. all_sample_outfile.write(outputcolumn_all_sample)
  140. #function
  141. def replace_nan(strings_list):
  142. updated_list = []
  143. for i in strings_list:
  144. if i == '.':
  145. updated_list.append('.:.:.:.:.:.:.:.:.:.:.:.')
  146. else:
  147. updated_list.append(i)
  148. return updated_list
  149. def remove_dot(strings_list):
  150. updated_list = []
  151. for i in strings_list:
  152. if i == '.':
  153. pass
  154. else:
  155. updated_list.append(i)
  156. return updated_list
  157. def detected_number(strings):
  158. gt = [x.split(':')[0] for x in strings]
  159. percentage = 27 - gt.count('.')
  160. return(str(percentage))
  161. def vote_number(strings,consensus_call):
  162. gt = [x.split(':')[0] for x in strings]
  163. gt = [x.replace('.','0/0') for x in gt]
  164. gt = list(map(gt_uniform,[i for i in gt]))
  165. vote_num = gt.count(consensus_call)
  166. return(str(vote_num))
  167. def family_vote(strings,consensus_call):
  168. gt = [x.split(':')[0] for x in strings]
  169. gt = [x.replace('.','0/0') for x in gt]
  170. gt = list(map(gt_uniform,[i for i in gt]))
  171. mendelian = [':'.join(x.split(':')[1:4]) for x in strings]
  172. indices = [i for i, x in enumerate(gt) if x == consensus_call]
  173. matched_mendelian = itemgetter(*indices)(mendelian)
  174. mendelian_num = matched_mendelian.count('1:1:1')
  175. return(str(mendelian_num))
  176. def gt_uniform(strings):
  177. uniformed_gt = ''
  178. allele1 = strings.split('/')[0]
  179. allele2 = strings.split('/')[1]
  180. if int(allele1) > int(allele2):
  181. uniformed_gt = allele2 + '/' + allele1
  182. else:
  183. uniformed_gt = allele1 + '/' + allele2
  184. return uniformed_gt
  185. def decide_by_rep(strings):
  186. consensus_rep = ''
  187. mendelian = [':'.join(x.split(':')[1:4]) for x in strings]
  188. gt = [x.split(':')[0] for x in strings]
  189. gt = [x.replace('.','0/0') for x in gt]
  190. # modified gt turn 2/1 to 1/2
  191. gt = list(map(gt_uniform,[i for i in gt]))
  192. # mendelian consistent?
  193. mendelian_dict = Counter(mendelian)
  194. highest_mendelian = mendelian_dict.most_common(1)
  195. candidate_mendelian = highest_mendelian[0][0]
  196. freq_mendelian = highest_mendelian[0][1]
  197. if (candidate_mendelian == '1:1:1') and (freq_mendelian >= 2):
  198. gt_num_dict = Counter(gt)
  199. highest_gt = gt_num_dict.most_common(1)
  200. candidate_gt = highest_gt[0][0]
  201. freq_gt = highest_gt[0][1]
  202. if (candidate_gt != '0/0') and (freq_gt >= 2):
  203. consensus_rep = candidate_gt
  204. elif (candidate_gt == '0/0') and (freq_gt >= 2):
  205. consensus_rep = '0/0'
  206. else:
  207. consensus_rep = 'inconGT'
  208. elif (candidate_mendelian == '') and (freq_mendelian >= 2):
  209. consensus_rep = 'noInfo'
  210. else:
  211. consensus_rep = 'inconMen'
  212. return consensus_rep
  213. def main():
  214. for line in fileinput.input(multi_sample_vcf):
  215. headline = re.match('^\#',line)
  216. if headline is not None:
  217. pass
  218. else:
  219. line = line.strip()
  220. strings = line.split('\t')
  221. variant_id = '_'.join([strings[0],strings[1]])
  222. # check if the variants location is duplicated
  223. if variant_id in var_dup:
  224. strings[7] = strings[7] + ';DUP'
  225. outLine = '\t'.join(strings) + '\n'
  226. all_sample_outfile.write(outLine)
  227. else:
  228. # pre-define
  229. pcr_consensus = '.'
  230. pcr_free_consensus = '.'
  231. consensus_call = '.'
  232. consensus_alt_seq = '.'
  233. # pcr
  234. strings[9:] = replace_nan(strings[9:])
  235. pcr = itemgetter(*[9,10,11,27,28,29,30,31,32,33,34,35])(strings)
  236. SEQ2000 = decide_by_rep(pcr[0:3])
  237. XTen_ARD = decide_by_rep(pcr[3:6])
  238. XTen_NVG = decide_by_rep(pcr[6:9])
  239. XTen_WUX = decide_by_rep(pcr[9:12])
  240. sequence_site = [SEQ2000,XTen_ARD,XTen_NVG,XTen_WUX]
  241. sequence_dict = Counter(sequence_site)
  242. highest_sequence = sequence_dict.most_common(1)
  243. candidate_sequence = highest_sequence[0][0]
  244. freq_sequence = highest_sequence[0][1]
  245. if freq_sequence > 2:
  246. pcr_consensus = candidate_sequence
  247. else:
  248. pcr_consensus = 'inconSequenceSite'
  249. # pcr-free
  250. pcr_free = itemgetter(*[12,13,14,15,16,17,18,19,20,21,22,23,24,25,26])(strings)
  251. T7_WGE = decide_by_rep(pcr_free[0:3])
  252. Nova_ARD_1 = decide_by_rep(pcr_free[3:6])
  253. Nova_ARD_2 = decide_by_rep(pcr_free[6:9])
  254. Nova_BRG = decide_by_rep(pcr_free[9:12])
  255. Nova_WUX = decide_by_rep(pcr_free[12:15])
  256. sequence_site = [T7_WGE,Nova_ARD_1,Nova_ARD_2,Nova_BRG,Nova_WUX]
  257. highest_sequence = sequence_dict.most_common(1)
  258. candidate_sequence = highest_sequence[0][0]
  259. freq_sequence = highest_sequence[0][1]
  260. if freq_sequence > 3:
  261. pcr_free_consensus = candidate_sequence
  262. else:
  263. pcr_free_consensus = 'inconSequenceSite'
  264. # pcr and pcr-free
  265. tag = ['inconGT','noInfo','inconMen','inconSequenceSite']
  266. if (pcr_consensus == pcr_free_consensus) and (pcr_consensus not in tag) and (pcr_consensus != '0/0'):
  267. consensus_call = pcr_consensus
  268. VOTED = vote_number(strings[9:],consensus_call)
  269. strings[7] = strings[7] + ';VOTED=' + VOTED
  270. DETECTED = detected_number(strings[9:])
  271. strings[7] = strings[7] + ';DETECTED=' + DETECTED
  272. FAM = family_vote(strings[9:],consensus_call)
  273. strings[7] = strings[7] + ';FAM=' + FAM
  274. # Delete multiple alternative genotype to necessary expression
  275. alt = strings[4]
  276. alt_gt = alt.split(',')
  277. if len(alt_gt) > 1:
  278. allele1 = consensus_call.split('/')[0]
  279. allele2 = consensus_call.split('/')[1]
  280. if allele1 == '0':
  281. allele2_seq = alt_gt[int(allele2) - 1]
  282. consensus_alt_seq = allele2_seq
  283. consensus_call = '0/1'
  284. else:
  285. allele1_seq = alt_gt[int(allele1) - 1]
  286. allele2_seq = alt_gt[int(allele2) - 1]
  287. if int(allele1) > int(allele2):
  288. consensus_alt_seq = allele2_seq + ',' + allele1_seq
  289. consensus_call = '1/2'
  290. elif int(allele1) < int(allele2):
  291. consensus_alt_seq = allele1_seq + ',' + allele2_seq
  292. consensus_call = '1/2'
  293. else:
  294. consensus_alt_seq = allele1_seq
  295. consensus_call = '1/1'
  296. else:
  297. consensus_alt_seq = alt
  298. # GT:DP:ALT:AF:GQ:QD:MQ:FS:QUAL
  299. # GT:TWINS:TRIO5:TRIO6:DP:ALT:AF:GQ:QD:MQ:FS:QUAL:rawGT
  300. # DP
  301. DP = [x.split(':')[4] for x in strings[9:]]
  302. DP = remove_dot(DP)
  303. DP = [int(x) for x in DP]
  304. ALL_DP = sum(DP)
  305. # AF
  306. ALT = [x.split(':')[5] for x in strings[9:]]
  307. ALT = remove_dot(ALT)
  308. ALT = [int(x) for x in ALT]
  309. ALL_ALT = sum(ALT)
  310. ALL_AF = round(ALL_ALT/ALL_DP,2)
  311. # GQ
  312. GQ = [x.split(':')[7] for x in strings[9:]]
  313. GQ = remove_dot(GQ)
  314. GQ = [int(x) for x in GQ]
  315. GQ_MEAN = round(mean(GQ),2)
  316. # QD
  317. QD = [x.split(':')[8] for x in strings[9:]]
  318. QD = remove_dot(QD)
  319. QD = [float(x) for x in QD]
  320. QD_MEAN = round(mean(QD),2)
  321. # MQ
  322. MQ = [x.split(':')[9] for x in strings[9:]]
  323. MQ = remove_dot(MQ)
  324. MQ = [float(x) for x in MQ]
  325. MQ_MEAN = round(mean(MQ),2)
  326. # FS
  327. FS = [x.split(':')[10] for x in strings[9:]]
  328. FS = remove_dot(FS)
  329. FS = [float(x) for x in FS]
  330. FS_MEAN = round(mean(FS),2)
  331. # QUAL
  332. QUAL = [x.split(':')[11] for x in strings[9:]]
  333. QUAL = remove_dot(QUAL)
  334. QUAL = [float(x) for x in QUAL]
  335. QUAL_MEAN = round(mean(QUAL),2)
  336. # benchmark output
  337. output_format = consensus_call + ':' + str(ALL_DP) + ':' + str(ALL_ALT) + ':' + str(ALL_AF) + ':' + str(GQ_MEAN) + ':' + str(QD_MEAN) + ':' + str(MQ_MEAN) + ':' + str(FS_MEAN) + ':' + str(QUAL_MEAN)
  338. outLine = strings[0] + '\t' + strings[1] + '\t' + strings[2] + '\t' + strings[3] + '\t' + consensus_alt_seq + '\t' + '.' + '\t' + '.' + '\t' + strings[7] + '\t' + 'GT:DP:ALT:AF:GQ:QD:MQ:FS:QUAL' + '\t' + output_format + '\n'
  339. benchmark_outfile.write(outLine)
  340. # all sample output
  341. strings[7] = strings[7] + ';ALL_ALT=' + str(ALL_ALT) + ';ALL_DP=' + str(ALL_DP) + ';ALL_AF=' + str(ALL_AF) \
  342. + ';GQ_MEAN=' + str(GQ_MEAN) + ';QD_MEAN=' + str(QD_MEAN) + ';MQ_MEAN=' + str(MQ_MEAN) + ';FS_MEAN=' + str(FS_MEAN) \
  343. + ';QUAL_MEAN=' + str(QUAL_MEAN) + ';PCR=' + consensus_call + ';PCR_FREE=' + consensus_call + ';CONSENSUS=' + consensus_call \
  344. + ';CONSENSUS_SEQ=' + consensus_alt_seq
  345. all_sample_outLine = '\t'.join(strings) + '\n'
  346. all_sample_outfile.write(all_sample_outLine)
  347. elif (pcr_consensus in tag) and (pcr_free_consensus in tag):
  348. consensus_call = 'filtered'
  349. DETECTED = detected_number(strings[9:])
  350. strings[7] = strings[7] + ';DETECTED=' + DETECTED
  351. strings[7] = strings[7] + ';CONSENSUS=' + consensus_call
  352. all_sample_outLine = '\t'.join(strings) + '\n'
  353. all_sample_outfile.write(all_sample_outLine)
  354. elif ((pcr_consensus == '0/0') or (pcr_consensus in tag)) and ((pcr_free_consensus not in tag) and (pcr_free_consensus != '0/0')):
  355. consensus_call = 'pcr-free-speicifc'
  356. DETECTED = detected_number(strings[9:])
  357. strings[7] = strings[7] + ';DETECTED=' + DETECTED
  358. strings[7] = strings[7] + ';CONSENSUS=' + consensus_call
  359. all_sample_outLine = '\t'.join(strings) + '\n'
  360. all_sample_outfile.write(all_sample_outLine)
  361. elif ((pcr_consensus != '0/0') or (pcr_consensus not in tag)) and ((pcr_free_consensus in tag) and (pcr_free_consensus == '0/0')):
  362. consensus_call = 'pcr-speicifc'
  363. DETECTED = detected_number(strings[9:])
  364. strings[7] = strings[7] + ';DETECTED=' + DETECTED
  365. strings[7] = strings[7] + ';CONSENSUS=' + consensus_call + ';PCR=' + pcr_consensus + ';PCR_FREE=' + pcr_free_consensus
  366. all_sample_outLine = '\t'.join(strings) + '\n'
  367. all_sample_outfile.write(all_sample_outLine)
  368. elif (pcr_consensus == '0/0') and (pcr_free_consensus == '0/0'):
  369. consensus_call = 'confirm for parents'
  370. DETECTED = detected_number(strings[9:])
  371. strings[7] = strings[7] + ';DETECTED=' + DETECTED
  372. strings[7] = strings[7] + ';CONSENSUS=' + consensus_call
  373. all_sample_outLine = '\t'.join(strings) + '\n'
  374. all_sample_outfile.write(all_sample_outLine)
  375. else:
  376. consensus_call = 'filtered'
  377. DETECTED = detected_number(strings[9:])
  378. strings[7] = strings[7] + ';DETECTED=' + DETECTED
  379. strings[7] = strings[7] + ';CONSENSUS=' + consensus_call
  380. all_sample_outLine = '\t'.join(strings) + '\n'
  381. all_sample_outfile.write(all_sample_outLine)
  382. if __name__ == '__main__':
  383. main()