You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

преди 4 години
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. from __future__ import division
  2. from glob import glob
  3. import sys, argparse, os
  4. import fileinput
  5. import re
  6. import pandas as pd
  7. from operator import itemgetter
  8. from collections import Counter
  9. from itertools import islice
  10. from numpy import *
  11. import statistics
  12. # input arguments
  13. parser = argparse.ArgumentParser(description="this script is to merge mendelian and vcfinfo, and extract high_confidence_calls")
  14. parser.add_argument('-folder', '--folder', type=str, help='directory that holds all the mendelian info', required=True)
  15. parser.add_argument('-vcf', '--vcf', type=str, help='merged multiple sample vcf', required=True)
  16. args = parser.parse_args()
  17. folder = args.folder
  18. vcf = args.vcf
  19. # input files
  20. folder = folder + '/*.txt'
  21. filenames = glob(folder)
  22. dataframes = []
  23. for filename in filenames:
  24. dataframes.append(pd.read_table(filename,header=None))
  25. dfs = [df.set_index([0, 1]) for df in dataframes]
  26. merged_mendelian = pd.concat(dfs, axis=1).reset_index()
  27. family_name = [i.split('/')[-1].replace('.txt','') for i in filenames]
  28. columns = ['CHROM','POS'] + family_name
  29. merged_mendelian.columns = columns
  30. vcf_dat = pd.read_table(vcf)
  31. merged_df = pd.merge(merged_mendelian, vcf_dat, how='outer', left_on=['CHROM','POS'], right_on = ['#CHROM','POS'])
  32. merged_df = merged_df.fillna('nan')
  33. vcf_header = '''##fileformat=VCFv4.2
  34. ##fileDate=20200501
  35. ##source=high_confidence_calls_intergration(choppy app)
  36. ##reference=GRCh38.d1.vd1
  37. ##INFO=<ID=VOTED,Number=1,Type=Integer,Description="Number mendelian consisitent votes">
  38. ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
  39. ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Sum depth of all samples">
  40. ##FORMAT=<ID=ALT,Number=1,Type=Integer,Description="Sum alternative depth of all samples">
  41. ##contig=<ID=chr1,length=248956422>
  42. ##contig=<ID=chr2,length=242193529>
  43. ##contig=<ID=chr3,length=198295559>
  44. ##contig=<ID=chr4,length=190214555>
  45. ##contig=<ID=chr5,length=181538259>
  46. ##contig=<ID=chr6,length=170805979>
  47. ##contig=<ID=chr7,length=159345973>
  48. ##contig=<ID=chr8,length=145138636>
  49. ##contig=<ID=chr9,length=138394717>
  50. ##contig=<ID=chr10,length=133797422>
  51. ##contig=<ID=chr11,length=135086622>
  52. ##contig=<ID=chr12,length=133275309>
  53. ##contig=<ID=chr13,length=114364328>
  54. ##contig=<ID=chr14,length=107043718>
  55. ##contig=<ID=chr15,length=101991189>
  56. ##contig=<ID=chr16,length=90338345>
  57. ##contig=<ID=chr17,length=83257441>
  58. ##contig=<ID=chr18,length=80373285>
  59. ##contig=<ID=chr19,length=58617616>
  60. ##contig=<ID=chr20,length=64444167>
  61. ##contig=<ID=chr21,length=46709983>
  62. ##contig=<ID=chr22,length=50818468>
  63. ##contig=<ID=chrX,length=156040895>
  64. '''
  65. # output files
  66. benchmark_LCL5 = open('LCL5_voted.vcf','w')
  67. benchmark_LCL6 = open('LCL6_voted.vcf','w')
  68. benchmark_LCL7 = open('LCL7_voted.vcf','w')
  69. benchmark_LCL8 = open('LCL8_voted.vcf','w')
  70. all_sample_outfile = open('all_sample_information.txt','w')
  71. # write VCF
  72. LCL5_col = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL5_benchmark_calls\n'
  73. LCL6_col = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL6_benchmark_calls\n'
  74. LCL7_col = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL7_benchmark_calls\n'
  75. LCL8_col = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL8_benchmark_calls\n'
  76. benchmark_LCL5.write(vcf_header)
  77. benchmark_LCL5.write(LCL5_col)
  78. benchmark_LCL6.write(vcf_header)
  79. benchmark_LCL6.write(LCL6_col)
  80. benchmark_LCL7.write(vcf_header)
  81. benchmark_LCL7.write(LCL7_col)
  82. benchmark_LCL8.write(vcf_header)
  83. benchmark_LCL8.write(LCL8_col)
  84. # all info
  85. all_info_col = 'CHROM\tPOS\tLCL5_pcr_consensus\tLCL5_pcr_free_consensus\tLCL5_mendelian_num\tLCL5_consensus_call\tLCL5_consensus_alt_seq\tLCL5_alt\tLCL5_dp\tLCL5_detected_num\tLCL6_pcr_consensus\tLCL6_pcr_free_consensus\tLCL6_mendelian_num\tLCL6_consensus_call\tLCL6_consensus_alt_seq\tLCL6_alt\tLCL6_dp\tLCL6_detected_num\tLCL7_pcr_consensus\tLCL7_pcr_free_consensus\tLCL7_mendelian_num\t LCL7_consensus_call\tLCL7_consensus_alt_seq\tLCL7_alt\tLCL7_dp\tLCL7_detected_num\tLCL8_pcr_consensus\tLCL8_pcr_free_consensus\tLCL8_mendelian_num\tLCL8_consensus_call\tLCL8_consensus_alt_seq\tLCL8_alt\tLCL8_dp\tLCL8_detected_num\n'
  86. all_sample_outfile.write(all_info_col)
  87. # function
  88. def decide_by_rep(vcf_list,mendelian_list):
  89. consensus_rep = ''
  90. gt = [x.split(':')[0] for x in vcf_list]
  91. # mendelian consistent?
  92. mendelian_dict = Counter(mendelian_list)
  93. highest_mendelian = mendelian_dict.most_common(1)
  94. candidate_mendelian = highest_mendelian[0][0]
  95. freq_mendelian = highest_mendelian[0][1]
  96. if (candidate_mendelian == '1:1:1') and (freq_mendelian >= 2):
  97. con_loc = [i for i in range(len(mendelian_list)) if mendelian_list[i] == '1:1:1']
  98. gt_con = itemgetter(*con_loc)(gt)
  99. gt_num_dict = Counter(gt_con)
  100. highest_gt = gt_num_dict.most_common(1)
  101. candidate_gt = highest_gt[0][0]
  102. freq_gt = highest_gt[0][1]
  103. if (candidate_gt != './.') and (freq_gt >= 2):
  104. consensus_rep = candidate_gt
  105. elif (candidate_gt == './.') and (freq_gt >= 2):
  106. consensus_rep = 'noGTInfo'
  107. else:
  108. consensus_rep = 'inconGT'
  109. elif (candidate_mendelian == 'nan') and (freq_mendelian >= 2):
  110. consensus_rep = 'noMenInfo'
  111. else:
  112. consensus_rep = 'inconMen'
  113. return consensus_rep
  114. def consensus_call(vcf_info_list,mendelian_list,alt_seq):
  115. pcr_consensus = '.'
  116. pcr_free_consensus = '.'
  117. mendelian_num = '.'
  118. consensus_call = '.'
  119. consensus_alt_seq = '.'
  120. # pcr
  121. SEQ2000 = decide_by_rep(vcf_info_list[0:3],mendelian_list[0:3])
  122. XTen_ARD = decide_by_rep(vcf_info_list[18:21],mendelian_list[18:21])
  123. XTen_NVG = decide_by_rep(vcf_info_list[21:24],mendelian_list[21:24])
  124. XTen_WUX = decide_by_rep(vcf_info_list[24:27],mendelian_list[24:27])
  125. pcr_sequence_site = [SEQ2000,XTen_ARD,XTen_NVG,XTen_WUX]
  126. pcr_sequence_dict = Counter(pcr_sequence_site)
  127. pcr_highest_sequence = pcr_sequence_dict.most_common(1)
  128. pcr_candidate_sequence = pcr_highest_sequence[0][0]
  129. pcr_freq_sequence = pcr_highest_sequence[0][1]
  130. if pcr_freq_sequence > 2:
  131. pcr_consensus = pcr_candidate_sequence
  132. else:
  133. pcr_consensus = 'inconSequenceSite'
  134. # pcr-free
  135. T7_WGE = decide_by_rep(vcf_info_list[3:6],mendelian_list[3:6])
  136. Nova_ARD_1 = decide_by_rep(vcf_info_list[6:9],mendelian_list[6:9])
  137. Nova_ARD_2 = decide_by_rep(vcf_info_list[9:12],mendelian_list[9:12])
  138. Nova_BRG = decide_by_rep(vcf_info_list[12:15],mendelian_list[12:15])
  139. Nova_WUX = decide_by_rep(vcf_info_list[15:18],mendelian_list[15:18])
  140. sequence_site = [T7_WGE,Nova_ARD_1,Nova_ARD_2,Nova_BRG,Nova_WUX]
  141. sequence_dict = Counter(sequence_site)
  142. highest_sequence = sequence_dict.most_common(1)
  143. candidate_sequence = highest_sequence[0][0]
  144. freq_sequence = highest_sequence[0][1]
  145. if freq_sequence > 3:
  146. pcr_free_consensus = candidate_sequence
  147. else:
  148. pcr_free_consensus = 'inconSequenceSite'
  149. # net alt, dp
  150. # alt
  151. AD = [x.split(':')[1] for x in vcf_info_list]
  152. ALT = [x.split(',')[1] for x in AD]
  153. ALT = [int(x) for x in ALT]
  154. ALL_ALT = sum(ALT)
  155. # dp
  156. DP = [x.split(':')[2] for x in vcf_info_list]
  157. DP = [int(x) for x in DP]
  158. ALL_DP = sum(DP)
  159. # detected number
  160. gt = [x.split(':')[0] for x in vcf_info_list]
  161. gt = [x.replace('0/0','.') for x in gt]
  162. gt = [x.replace('./.','.') for x in gt]
  163. detected_num = 27 - gt.count('.')
  164. # decide consensus calls
  165. tag = ['inconGT','noMenInfo','inconMen','inconSequenceSite','noGTInfo']
  166. if (pcr_consensus != '0/0') and (pcr_consensus == pcr_free_consensus) and (pcr_consensus not in tag):
  167. consensus_call = pcr_consensus
  168. gt = [x.split(':')[0] for x in vcf_info_list]
  169. indices = [i for i, x in enumerate(gt) if x == consensus_call]
  170. matched_mendelian = itemgetter(*indices)(mendelian_list)
  171. mendelian_num = matched_mendelian.count('1:1:1')
  172. # Delete multiple alternative genotype to necessary expression
  173. alt_gt = alt_seq.split(',')
  174. if len(alt_gt) > 1:
  175. allele1 = consensus_call.split('/')[0]
  176. allele2 = consensus_call.split('/')[1]
  177. if allele1 == '0':
  178. allele2_seq = alt_gt[int(allele2) - 1]
  179. consensus_alt_seq = allele2_seq
  180. consensus_call = '0/1'
  181. else:
  182. allele1_seq = alt_gt[int(allele1) - 1]
  183. allele2_seq = alt_gt[int(allele2) - 1]
  184. if int(allele1) > int(allele2):
  185. consensus_alt_seq = allele2_seq + ',' + allele1_seq
  186. consensus_call = '1/2'
  187. elif int(allele1) < int(allele2):
  188. consensus_alt_seq = allele1_seq + ',' + allele2_seq
  189. consensus_call = '1/2'
  190. else:
  191. consensus_alt_seq = allele1_seq
  192. consensus_call = '1/1'
  193. else:
  194. consensus_alt_seq = alt_seq
  195. elif (pcr_consensus in tag) and (pcr_free_consensus in tag):
  196. consensus_call = 'filtered'
  197. elif ((pcr_consensus == './.') or (pcr_consensus in tag)) and ((pcr_free_consensus not in tag) and (pcr_free_consensus != './.')):
  198. consensus_call = 'pcr-free-speicifc'
  199. elif ((pcr_consensus != './.') or (pcr_consensus not in tag)) and ((pcr_free_consensus in tag) and (pcr_free_consensus == './.')):
  200. consensus_call = 'pcr-speicifc'
  201. elif (pcr_consensus == '0/0') and (pcr_free_consensus == '0/0'):
  202. consensus_call = '0/0'
  203. else:
  204. consensus_call = 'filtered'
  205. return pcr_consensus, pcr_free_consensus, mendelian_num, consensus_call, consensus_alt_seq, ALL_ALT, ALL_DP, detected_num
  206. for row in merged_df.itertuples():
  207. mendelian_list = [row.Quartet_DNA_BGI_SEQ2000_BGI_1_20180518,row.Quartet_DNA_BGI_SEQ2000_BGI_2_20180530,row.Quartet_DNA_BGI_SEQ2000_BGI_3_20180530, \
  208. row.Quartet_DNA_BGI_T7_WGE_1_20191105,row.Quartet_DNA_BGI_T7_WGE_2_20191105,row.Quartet_DNA_BGI_T7_WGE_3_20191105, \
  209. row.Quartet_DNA_ILM_Nova_ARD_1_20181108,row.Quartet_DNA_ILM_Nova_ARD_2_20181108,row.Quartet_DNA_ILM_Nova_ARD_3_20181108, \
  210. row.Quartet_DNA_ILM_Nova_ARD_4_20190111,row.Quartet_DNA_ILM_Nova_ARD_5_20190111,row.Quartet_DNA_ILM_Nova_ARD_6_20190111, \
  211. row.Quartet_DNA_ILM_Nova_BRG_1_20180930,row.Quartet_DNA_ILM_Nova_BRG_2_20180930,row.Quartet_DNA_ILM_Nova_BRG_3_20180930, \
  212. row.Quartet_DNA_ILM_Nova_WUX_1_20190917,row.Quartet_DNA_ILM_Nova_WUX_2_20190917,row.Quartet_DNA_ILM_Nova_WUX_3_20190917, \
  213. row.Quartet_DNA_ILM_XTen_ARD_1_20170403,row.Quartet_DNA_ILM_XTen_ARD_2_20170403,row.Quartet_DNA_ILM_XTen_ARD_3_20170403, \
  214. row.Quartet_DNA_ILM_XTen_NVG_1_20170329,row.Quartet_DNA_ILM_XTen_NVG_2_20170329,row.Quartet_DNA_ILM_XTen_NVG_3_20170329, \
  215. row.Quartet_DNA_ILM_XTen_WUX_1_20170216,row.Quartet_DNA_ILM_XTen_WUX_2_20170216,row.Quartet_DNA_ILM_XTen_WUX_3_20170216]
  216. lcl5_list = [row.Quartet_DNA_BGI_SEQ2000_BGI_LCL5_1_20180518,row.Quartet_DNA_BGI_SEQ2000_BGI_LCL5_2_20180530,row.Quartet_DNA_BGI_SEQ2000_BGI_LCL5_3_20180530, \
  217. row.Quartet_DNA_BGI_T7_WGE_LCL5_1_20191105,row.Quartet_DNA_BGI_T7_WGE_LCL5_2_20191105,row.Quartet_DNA_BGI_T7_WGE_LCL5_3_20191105, \
  218. row.Quartet_DNA_ILM_Nova_ARD_LCL5_1_20181108,row.Quartet_DNA_ILM_Nova_ARD_LCL5_2_20181108,row.Quartet_DNA_ILM_Nova_ARD_LCL5_3_20181108, \
  219. row.Quartet_DNA_ILM_Nova_ARD_LCL5_4_20190111,row.Quartet_DNA_ILM_Nova_ARD_LCL5_5_20190111,row.Quartet_DNA_ILM_Nova_ARD_LCL5_6_20190111, \
  220. row.Quartet_DNA_ILM_Nova_BRG_LCL5_1_20180930,row.Quartet_DNA_ILM_Nova_BRG_LCL5_2_20180930,row.Quartet_DNA_ILM_Nova_BRG_LCL5_3_20180930, \
  221. row.Quartet_DNA_ILM_Nova_WUX_LCL5_1_20190917,row.Quartet_DNA_ILM_Nova_WUX_LCL5_2_20190917,row.Quartet_DNA_ILM_Nova_WUX_LCL5_3_20190917, \
  222. row.Quartet_DNA_ILM_XTen_ARD_LCL5_1_20170403,row.Quartet_DNA_ILM_XTen_ARD_LCL5_2_20170403,row.Quartet_DNA_ILM_XTen_ARD_LCL5_3_20170403, \
  223. row.Quartet_DNA_ILM_XTen_NVG_LCL5_1_20170329,row.Quartet_DNA_ILM_XTen_NVG_LCL5_2_20170329,row.Quartet_DNA_ILM_XTen_NVG_LCL5_3_20170329, \
  224. row.Quartet_DNA_ILM_XTen_WUX_LCL5_1_20170216,row.Quartet_DNA_ILM_XTen_WUX_LCL5_2_20170216,row.Quartet_DNA_ILM_XTen_WUX_LCL5_3_20170216]
  225. lcl6_list = [row.Quartet_DNA_BGI_SEQ2000_BGI_LCL6_1_20180518,row.Quartet_DNA_BGI_SEQ2000_BGI_LCL6_2_20180530,row.Quartet_DNA_BGI_SEQ2000_BGI_LCL6_3_20180530, \
  226. row.Quartet_DNA_BGI_T7_WGE_LCL6_1_20191105,row.Quartet_DNA_BGI_T7_WGE_LCL6_2_20191105,row.Quartet_DNA_BGI_T7_WGE_LCL6_3_20191105, \
  227. row.Quartet_DNA_ILM_Nova_ARD_LCL6_1_20181108,row.Quartet_DNA_ILM_Nova_ARD_LCL6_2_20181108,row.Quartet_DNA_ILM_Nova_ARD_LCL6_3_20181108, \
  228. row.Quartet_DNA_ILM_Nova_ARD_LCL6_4_20190111,row.Quartet_DNA_ILM_Nova_ARD_LCL6_5_20190111,row.Quartet_DNA_ILM_Nova_ARD_LCL6_6_20190111, \
  229. row.Quartet_DNA_ILM_Nova_BRG_LCL6_1_20180930,row.Quartet_DNA_ILM_Nova_BRG_LCL6_2_20180930,row.Quartet_DNA_ILM_Nova_BRG_LCL6_3_20180930, \
  230. row.Quartet_DNA_ILM_Nova_WUX_LCL6_1_20190917,row.Quartet_DNA_ILM_Nova_WUX_LCL6_2_20190917,row.Quartet_DNA_ILM_Nova_WUX_LCL6_3_20190917, \
  231. row.Quartet_DNA_ILM_XTen_ARD_LCL6_1_20170403,row.Quartet_DNA_ILM_XTen_ARD_LCL6_2_20170403,row.Quartet_DNA_ILM_XTen_ARD_LCL6_3_20170403, \
  232. row.Quartet_DNA_ILM_XTen_NVG_LCL6_1_20170329,row.Quartet_DNA_ILM_XTen_NVG_LCL6_2_20170329,row.Quartet_DNA_ILM_XTen_NVG_LCL6_3_20170329, \
  233. row.Quartet_DNA_ILM_XTen_WUX_LCL6_1_20170216,row.Quartet_DNA_ILM_XTen_WUX_LCL6_2_20170216,row.Quartet_DNA_ILM_XTen_WUX_LCL6_3_20170216]
  234. lcl7_list = [row.Quartet_DNA_BGI_SEQ2000_BGI_LCL7_1_20180518,row.Quartet_DNA_BGI_SEQ2000_BGI_LCL7_2_20180530,row.Quartet_DNA_BGI_SEQ2000_BGI_LCL7_3_20180530, \
  235. row.Quartet_DNA_BGI_T7_WGE_LCL7_1_20191105,row.Quartet_DNA_BGI_T7_WGE_LCL7_2_20191105,row.Quartet_DNA_BGI_T7_WGE_LCL7_3_20191105, \
  236. row.Quartet_DNA_ILM_Nova_ARD_LCL7_1_20181108,row.Quartet_DNA_ILM_Nova_ARD_LCL7_2_20181108,row.Quartet_DNA_ILM_Nova_ARD_LCL7_3_20181108, \
  237. row.Quartet_DNA_ILM_Nova_ARD_LCL7_4_20190111,row.Quartet_DNA_ILM_Nova_ARD_LCL7_5_20190111,row.Quartet_DNA_ILM_Nova_ARD_LCL7_6_20190111, \
  238. row.Quartet_DNA_ILM_Nova_BRG_LCL7_1_20180930,row.Quartet_DNA_ILM_Nova_BRG_LCL7_2_20180930,row.Quartet_DNA_ILM_Nova_BRG_LCL7_3_20180930, \
  239. row.Quartet_DNA_ILM_Nova_WUX_LCL7_1_20190917,row.Quartet_DNA_ILM_Nova_WUX_LCL7_2_20190917,row.Quartet_DNA_ILM_Nova_WUX_LCL7_3_20190917, \
  240. row.Quartet_DNA_ILM_XTen_ARD_LCL7_1_20170403,row.Quartet_DNA_ILM_XTen_ARD_LCL7_2_20170403,row.Quartet_DNA_ILM_XTen_ARD_LCL7_3_20170403, \
  241. row.Quartet_DNA_ILM_XTen_NVG_LCL7_1_20170329,row.Quartet_DNA_ILM_XTen_NVG_LCL7_2_20170329,row.Quartet_DNA_ILM_XTen_NVG_LCL7_3_20170329, \
  242. row.Quartet_DNA_ILM_XTen_WUX_LCL7_1_20170216,row.Quartet_DNA_ILM_XTen_WUX_LCL7_2_20170216,row.Quartet_DNA_ILM_XTen_WUX_LCL7_3_20170216]
  243. lcl8_list = [row.Quartet_DNA_BGI_SEQ2000_BGI_LCL8_1_20180518,row.Quartet_DNA_BGI_SEQ2000_BGI_LCL8_2_20180530,row.Quartet_DNA_BGI_SEQ2000_BGI_LCL8_3_20180530, \
  244. row.Quartet_DNA_BGI_T7_WGE_LCL8_1_20191105,row.Quartet_DNA_BGI_T7_WGE_LCL8_2_20191105,row.Quartet_DNA_BGI_T7_WGE_LCL8_3_20191105, \
  245. row.Quartet_DNA_ILM_Nova_ARD_LCL8_1_20181108,row.Quartet_DNA_ILM_Nova_ARD_LCL8_2_20181108,row.Quartet_DNA_ILM_Nova_ARD_LCL8_3_20181108, \
  246. row.Quartet_DNA_ILM_Nova_ARD_LCL8_4_20190111,row.Quartet_DNA_ILM_Nova_ARD_LCL8_5_20190111,row.Quartet_DNA_ILM_Nova_ARD_LCL8_6_20190111, \
  247. row.Quartet_DNA_ILM_Nova_BRG_LCL8_1_20180930,row.Quartet_DNA_ILM_Nova_BRG_LCL8_2_20180930,row.Quartet_DNA_ILM_Nova_BRG_LCL8_3_20180930, \
  248. row.Quartet_DNA_ILM_Nova_WUX_LCL8_1_20190917,row.Quartet_DNA_ILM_Nova_WUX_LCL8_2_20190917,row.Quartet_DNA_ILM_Nova_WUX_LCL8_3_20190917, \
  249. row.Quartet_DNA_ILM_XTen_ARD_LCL8_1_20170403,row.Quartet_DNA_ILM_XTen_ARD_LCL8_2_20170403,row.Quartet_DNA_ILM_XTen_ARD_LCL8_3_20170403, \
  250. row.Quartet_DNA_ILM_XTen_NVG_LCL8_1_20170329,row.Quartet_DNA_ILM_XTen_NVG_LCL8_2_20170329,row.Quartet_DNA_ILM_XTen_NVG_LCL8_3_20170329, \
  251. row.Quartet_DNA_ILM_XTen_WUX_LCL8_1_20170216,row.Quartet_DNA_ILM_XTen_WUX_LCL8_2_20170216,row.Quartet_DNA_ILM_XTen_WUX_LCL8_3_20170216]
  252. # LCL5
  253. LCL5_pcr_consensus, LCL5_pcr_free_consensus, LCL5_mendelian_num, LCL5_consensus_call, LCL5_consensus_alt_seq, LCL5_alt, LCL5_dp, LCL5_detected_num = consensus_call(lcl5_list,mendelian_list,row.ALT)
  254. if LCL5_mendelian_num != '.':
  255. LCL5_output = row.CHROM + '\t' + str(row.POS) + '\t' + '.' + '\t' + row.REF + '\t' + LCL5_consensus_alt_seq + '\t' + '.' + '\t' + '.' + '\t' +'VOTED=' + str(LCL5_mendelian_num) + '\t' + 'GT:ALT:DP' + '\t' + LCL5_consensus_call + ':' + str(LCL5_alt) + ':' + str(LCL5_dp) + '\n'
  256. benchmark_LCL5.write(LCL5_output)
  257. # LCL6
  258. LCL6_pcr_consensus, LCL6_pcr_free_consensus, LCL6_mendelian_num, LCL6_consensus_call, LCL6_consensus_alt_seq, LCL6_alt, LCL6_dp, LCL6_detected_num = consensus_call(lcl6_list,mendelian_list,row.ALT)
  259. if LCL6_mendelian_num != '.':
  260. LCL6_output = row.CHROM + '\t' + str(row.POS) + '\t' + '.' + '\t' + row.REF + '\t' + LCL6_consensus_alt_seq + '\t' + '.' + '\t' + '.' + '\t' +'VOTED=' + str(LCL6_mendelian_num) + '\t' + 'GT:ALT:DP' + '\t' + LCL6_consensus_call + ':' + str(LCL6_alt) + ':' + str(LCL6_dp) + '\n'
  261. benchmark_LCL6.write(LCL6_output)
  262. # LCL7
  263. LCL7_pcr_consensus, LCL7_pcr_free_consensus, LCL7_mendelian_num, LCL7_consensus_call, LCL7_consensus_alt_seq, LCL7_alt, LCL7_dp, LCL7_detected_num = consensus_call(lcl7_list,mendelian_list,row.ALT)
  264. if LCL7_mendelian_num != '.':
  265. LCL7_output = row.CHROM + '\t' + str(row.POS) + '\t' + '.' + '\t' + row.REF + '\t' + LCL7_consensus_alt_seq + '\t' + '.' + '\t' + '.' + '\t' +'VOTED=' + str(LCL7_mendelian_num) + '\t' + 'GT:ALT:DP' + '\t' + LCL7_consensus_call + ':' + str(LCL7_alt) + ':' + str(LCL7_dp) + '\n'
  266. benchmark_LCL7.write(LCL7_output)
  267. # LCL8
  268. LCL8_pcr_consensus, LCL8_pcr_free_consensus, LCL8_mendelian_num, LCL8_consensus_call, LCL8_consensus_alt_seq, LCL8_alt, LCL8_dp, LCL8_detected_num = consensus_call(lcl8_list,mendelian_list,row.ALT)
  269. if LCL8_mendelian_num != '.':
  270. LCL8_output = row.CHROM + '\t' + str(row.POS) + '\t' + '.' + '\t' + row.REF + '\t' + LCL8_consensus_alt_seq + '\t' + '.' + '\t' + '.' + '\t' +'VOTED=' + str(LCL8_mendelian_num) + '\t' + 'GT:ALT:DP' + '\t' + LCL8_consensus_call + ':' + str(LCL8_alt) + ':' + str(LCL8_dp) + '\n'
  271. benchmark_LCL8.write(LCL8_output)
  272. # all data
  273. all_output = row.CHROM + '\t' + str(row.POS) + '\t' + LCL5_pcr_consensus + '\t' + LCL5_pcr_free_consensus + '\t' + str(LCL5_mendelian_num) + '\t' + LCL5_consensus_call + '\t' + LCL5_consensus_alt_seq + '\t' + str(LCL5_alt) + '\t' + str(LCL5_dp) + '\t' + str(LCL5_detected_num) + '\t' +\
  274. LCL6_pcr_consensus + '\t' + LCL6_pcr_free_consensus + '\t' + str(LCL6_mendelian_num) + '\t' + LCL6_consensus_call + '\t' + LCL6_consensus_alt_seq + '\t' + str(LCL6_alt) + '\t' + str(LCL6_dp) + '\t' + str(LCL6_detected_num) + '\t' +\
  275. LCL7_pcr_consensus + '\t' + LCL7_pcr_free_consensus + '\t' + str(LCL7_mendelian_num) + '\t' + LCL7_consensus_call + '\t' + LCL7_consensus_alt_seq + '\t' + str(LCL7_alt) + '\t' + str(LCL7_dp) + '\t' + str(LCL7_detected_num) + '\t' +\
  276. LCL8_pcr_consensus + '\t' + LCL8_pcr_free_consensus + '\t' + str(LCL8_mendelian_num) + '\t' + LCL8_consensus_call + '\t' + LCL8_consensus_alt_seq + '\t' + str(LCL8_alt) + '\t' + str(LCL8_dp) + '\t' + str(LCL8_detected_num) + '\n'
  277. all_sample_outfile.write(all_output)