Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

296 linhas
23KB

  1. from __future__ import division
  2. import pandas as pd
  3. import sys, argparse, os
  4. import fileinput
  5. import re
  6. # input arguments
  7. parser = argparse.ArgumentParser(description="this script is to get final high confidence calls and information of all replicates")
  8. parser.add_argument('-vcfInfo', '--vcfInfo', type=str, help='The txt file of variants information, this file is named as prefix__variant_quality_location.txt', required=True)
  9. parser.add_argument('-mendelianInfo', '--mendelianInfo', type=str, help='The merged mendelian information of all samples', required=True)
  10. parser.add_argument('-prefix', '--prefix', type=str, help='The prefix of output filenames', required=True)
  11. parser.add_argument('-sample', '--sample_name', type=str, help='which sample of quartet', required=True)
  12. args = parser.parse_args()
  13. vcfInfo = args.vcfInfo
  14. mendelianInfo = args.mendelianInfo
  15. prefix = args.prefix
  16. sample_name = args.sample_name
  17. vcf_header = '''##fileformat=VCFv4.2
  18. ##fileDate=20200331
  19. ##source=high_confidence_calls_intergration(choppy app)
  20. ##reference=GRCh38.d1.vd1
  21. ##INFO=<ID=location,Number=1,Type=String,Description="Repeat region">
  22. ##INFO=<ID=DPCT,Number=1,Type=Float,Description="Percentage of detected votes">
  23. ##INFO=<ID=VPCT,Number=1,Type=Float,Description="Percentage of consnesus votes">
  24. ##INFO=<ID=FPCT,Number=1,Type=Float,Description="Percentage of mendelian consisitent votes">
  25. ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
  26. ##FORMAT=<ID=DP,Number=1,Type=Int,Description="Depth">
  27. ##FORMAT=<ID=AF,Number=1,Type=Float,Description="Allele frequency">
  28. ##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype quality">
  29. ##FORMAT=<ID=MQ,Number=1,Type=Float,Description="Mapping quality">
  30. ##contig=<ID=chr1,length=248956422>
  31. ##contig=<ID=chr2,length=242193529>
  32. ##contig=<ID=chr3,length=198295559>
  33. ##contig=<ID=chr4,length=190214555>
  34. ##contig=<ID=chr5,length=181538259>
  35. ##contig=<ID=chr6,length=170805979>
  36. ##contig=<ID=chr7,length=159345973>
  37. ##contig=<ID=chr8,length=145138636>
  38. ##contig=<ID=chr9,length=138394717>
  39. ##contig=<ID=chr10,length=133797422>
  40. ##contig=<ID=chr11,length=135086622>
  41. ##contig=<ID=chr12,length=133275309>
  42. ##contig=<ID=chr13,length=114364328>
  43. ##contig=<ID=chr14,length=107043718>
  44. ##contig=<ID=chr15,length=101991189>
  45. ##contig=<ID=chr16,length=90338345>
  46. ##contig=<ID=chr17,length=83257441>
  47. ##contig=<ID=chr18,length=80373285>
  48. ##contig=<ID=chr19,length=58617616>
  49. ##contig=<ID=chr20,length=64444167>
  50. ##contig=<ID=chr21,length=46709983>
  51. ##contig=<ID=chr22,length=50818468>
  52. ##contig=<ID=chrX,length=156040895>
  53. '''
  54. vcf_header_all_sample = '''##fileformat=VCFv4.2
  55. ##fileDate=20200331
  56. ##reference=GRCh38.d1.vd1
  57. ##INFO=<ID=location,Number=1,Type=String,Description="Repeat region">
  58. ##INFO=<ID=DPCT,Number=1,Type=Float,Description="Percentage of detected votes">
  59. ##INFO=<ID=VPCT,Number=1,Type=Float,Description="Percentage of consnesus votes">
  60. ##INFO=<ID=FPCT,Number=1,Type=Float,Description="Percentage of mendelian consisitent votes">
  61. ##INFO=<ID=ALL_ALT,Number=1,Type=Float,Description="Sum of alternative reads of all samples">
  62. ##INFO=<ID=ALL_DP,Number=1,Type=Float,Description="Sum of depth of all samples">
  63. ##INFO=<ID=ALL_AF,Number=1,Type=Float,Description="Allele frequency of net alternatice reads and net depth">
  64. ##INFO=<ID=GQ_MEAN,Number=1,Type=Float,Description="Mean of genotype quality of all samples">
  65. ##INFO=<ID=MQ_MEAN,Number=1,Type=Float,Description="Mean of mapping quality of all samples">
  66. ##INFO=<ID=PCR,Number=1,Type=String,Description="Consensus of PCR votes">
  67. ##INFO=<ID=PCR_FREE,Number=1,Type=String,Description="Consensus of PCR-free votes">
  68. ##INFO=<ID=CONSENSUS,Number=1,Type=String,Description="Consensus calls">
  69. ##INFO=<ID=CONSENSUS_SEQ,Number=1,Type=String,Description="Consensus sequence">
  70. ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
  71. ##FORMAT=<ID=DP,Number=1,Type=String,Description="Depth">
  72. ##FORMAT=<ID=AF,Number=1,Type=String,Description="Allele frequency">
  73. ##FORMAT=<ID=GQ,Number=1,Type=String,Description="Genotype quality">
  74. ##FORMAT=<ID=MQ,Number=1,Type=String,Description="Mapping quality">
  75. ##FORMAT=<ID=TWINS,Number=1,Type=String,Description="1 is twins shared, 0 is twins discordant ">
  76. ##FORMAT=<ID=TRIO5,Number=1,Type=String,Description="1 is LCL7, LCL8 and LCL5 mendelian consistent, 0 is mendelian vioaltion">
  77. ##FORMAT=<ID=TRIO6,Number=1,Type=String,Description="1 is LCL7, LCL8 and LCL6 mendelian consistent, 0 is mendelian vioaltion">
  78. ##contig=<ID=chr1,length=248956422>
  79. ##contig=<ID=chr2,length=242193529>
  80. ##contig=<ID=chr3,length=198295559>
  81. ##contig=<ID=chr4,length=190214555>
  82. ##contig=<ID=chr5,length=181538259>
  83. ##contig=<ID=chr6,length=170805979>
  84. ##contig=<ID=chr7,length=159345973>
  85. ##contig=<ID=chr8,length=145138636>
  86. ##contig=<ID=chr9,length=138394717>
  87. ##contig=<ID=chr10,length=133797422>
  88. ##contig=<ID=chr11,length=135086622>
  89. ##contig=<ID=chr12,length=133275309>
  90. ##contig=<ID=chr13,length=114364328>
  91. ##contig=<ID=chr14,length=107043718>
  92. ##contig=<ID=chr15,length=101991189>
  93. ##contig=<ID=chr16,length=90338345>
  94. ##contig=<ID=chr17,length=83257441>
  95. ##contig=<ID=chr18,length=80373285>
  96. ##contig=<ID=chr19,length=58617616>
  97. ##contig=<ID=chr20,length=64444167>
  98. ##contig=<ID=chr21,length=46709983>
  99. ##contig=<ID=chr22,length=50818468>
  100. ##contig=<ID=chrX,length=156040895>
  101. '''
  102. # output file
  103. file_name = prefix + '_benchmarking_calls.vcf'
  104. outfile = open(file_name,'w')
  105. all_sample_file_name = prefix + '_all_sample_information.vcf'
  106. all_sample_outfile = open(all_sample_file_name, 'w')
  107. # write VCF
  108. outputcolumn = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t' + sample_name + '_high_confidence_calls\n'
  109. outfile.write(vcf_header)
  110. outfile.write(outputcolumn)
  111. outputcolumn_all_sample = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t'+ \
  112. 'Quartet_DNA_BGI_SEQ2000_BGI_1_20180518\tQuartet_DNA_BGI_SEQ2000_BGI_2_20180530\tQuartet_DNA_BGI_SEQ2000_BGI_3_20180530\t' + \
  113. 'Quartet_DNA_BGI_T7_WGE_1_20191105\tQuartet_DNA_BGI_T7_WGE_2_20191105\tQuartet_DNA_BGI_T7_WGE_3_20191105\t' + \
  114. 'Quartet_DNA_ILM_Nova_ARD_1_20181108\tQuartet_DNA_ILM_Nova_ARD_2_20181108\tQuartet_DNA_ILM_Nova_ARD_3_20181108\t' + \
  115. 'Quartet_DNA_ILM_Nova_ARD_4_20190111\tQuartet_DNA_ILM_Nova_ARD_5_20190111\tQuartet_DNA_ILM_Nova_ARD_6_20190111\t' + \
  116. 'Quartet_DNA_ILM_Nova_BRG_1_20180930\tQuartet_DNA_ILM_Nova_BRG_2_20180930\tQuartet_DNA_ILM_Nova_BRG_3_20180930\t' + \
  117. 'Quartet_DNA_ILM_Nova_WUX_1_20190917\tQuartet_DNA_ILM_Nova_WUX_2_20190917\tQuartet_DNA_ILM_Nova_WUX_3_20190917\t' + \
  118. 'Quartet_DNA_ILM_XTen_ARD_1_20170403\tQuartet_DNA_ILM_XTen_ARD_2_20170403\tQuartet_DNA_ILM_XTen_ARD_3_20170403\t' + \
  119. 'Quartet_DNA_ILM_XTen_NVG_1_20170329\tQuartet_DNA_ILM_XTen_NVG_2_20170329\tQuartet_DNA_ILM_XTen_NVG_3_20170329\t' + \
  120. 'Quartet_DNA_ILM_XTen_WUX_1_20170216\tQuartet_DNA_ILM_XTen_WUX_2_20170216\tQuartet_DNA_ILM_XTen_WUX_3_20170216\n'
  121. all_sample_outfile.write(vcf_header_all_sample)
  122. all_sample_outfile.write(outputcolumn_all_sample)
  123. # input files
  124. vcf_info = pd.read_table(vcfInfo)
  125. mendelian_info = pd.read_table(mendelianInfo)
  126. merged_df = pd.merge(vcf_info, mendelian_info, how='outer', left_on=['#CHROM','POS'], right_on = ['#CHROM','POS'])
  127. merged_df = merged_df.fillna('.')
  128. # function
  129. def single_sample_format(format_x,strings_x,strings_y):
  130. gt = '.'
  131. dp = '.'
  132. af = '.'
  133. gq = '.'
  134. mq = '.'
  135. twins = '.'
  136. trio5 = '.'
  137. trio6 = '.'
  138. # GT:DP:AF:GQ:MQ:TWINS:TRIO5:TRIO6
  139. # strings_x
  140. format_strings = format_x.split(':')
  141. if (strings_x == '.') and (strings_y != '.'):
  142. element_strings_y = str(strings_y).split(':')
  143. gt = '0/0'
  144. dp = '.'
  145. af = '.'
  146. gq = '.'
  147. mq = '.'
  148. twins = element_strings_y[1]
  149. trio5 = element_strings_y[2]
  150. trio6 = element_strings_y[3]
  151. elif (strings_x != '.') and (strings_y == '.'):
  152. element_strings_x = strings_x.split(':')
  153. formatDict = dict(zip(format_strings, element_strings_x))
  154. gt = formatDict['GT']
  155. dp = formatDict['DP']
  156. af = formatDict['AF']
  157. gq = formatDict['GQ']
  158. mq = formatDict['MQ']
  159. twins = '.'
  160. trio5 = '.'
  161. trio6 = '.'
  162. elif (strings_x != '.') and (strings_y != '.'):
  163. element_strings_y = str(strings_y).split(':')
  164. element_strings_x = strings_x.split(':')
  165. formatDict = dict(zip(format_strings, element_strings_x))
  166. gt = formatDict['GT']
  167. dp = formatDict['DP']
  168. af = formatDict['AF']
  169. gq = formatDict['GQ']
  170. mq = formatDict['MQ']
  171. twins = element_strings_y[1]
  172. trio5 = element_strings_y[2]
  173. trio6 = element_strings_y[3]
  174. else:
  175. pass
  176. merged_format = gt + ':' + dp + ':' + af + ':' + gq + ':' + mq + ':' + twins + ':' + trio5 + ':' + trio6
  177. return(merged_format)
  178. #
  179. for row in merged_df.itertuples():
  180. vcf_count = row[10:37].count('.')
  181. mendelian_count = row[50:77].count('.')
  182. if vcf_count == mendelian_count:
  183. info = 'location=' + str(row.location) + ';' + str(row.INFO_y)
  184. if row.FILTER_y == 'reproducible':
  185. ref = row.DP - row._42
  186. FORMAT = row[79] + ':' + str(int(ref)) + ',' + str(int(row._42)) + ':' + str(int(row.DP)) + ':' + str(round(row.AF,2)) + ':' + str(round(row.GQ,2)) + ':' + str(round(row.MQ,2))
  187. outline1 = str(row._1) + '\t' + str(row.POS) + '\t' + str(row.ID_x) + '\t' + str(row.REF_y) + '\t' + str(row[80]) + '\t' + '.' + '\t' + '.' + '\t' + str(info) + '\t' + 'GT:AD:DP:AF:GQ:MQ' + '\t' + str(FORMAT) + '\n'
  188. outfile.write(outline1)
  189. else:
  190. pass
  191. if row.INFO_x != '.':
  192. if row.AF=='.':
  193. info = 'location=' + str(row.location) + ';' + str(row.INFO_y) + ';' + 'ALL_ALT=' + str(int(row._42)) + ';' + 'ALL_DP=' + str(int(row.DP)) + ';' + 'ALL_AF=' + 'NA' + ';' + 'GQ_MEAN=' + str(row.GQ) + ';' + 'MQ_MEAN=' + str(row.MQ) + ';' + 'PCR=' + str(row[77]) + ';' + 'PCR_FREE=' + str(row[78]) + ';' + 'CONSENSUS=' + str(row[79]) + ';' + 'CONSENSUS_SEQ=' + str(row[80])
  194. else:
  195. info = 'location=' + str(row.location) + ';' + str(row.INFO_y) + ';' + 'ALL_ALT=' + str(int(row._42)) + ';' + 'ALL_DP=' + str(int(row.DP)) + ';' + 'ALL_AF=' + str(round(float(row.AF),2)) + ';' + 'GQ_MEAN=' + str(row.GQ) + ';' + 'MQ_MEAN=' + str(row.MQ) + ';' + 'PCR=' + str(row[77]) + ';' + 'PCR_FREE=' + str(row[78]) + ';' + 'CONSENSUS=' + str(row[79]) + ';' + 'CONSENSUS_SEQ=' + str(row[80])
  196. Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5_x, row.Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5_y)
  197. Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5_x, row.Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5_y)
  198. Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5_x, row.Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5_y)
  199. Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5_x, row.Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5_y)
  200. Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5_x, row.Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5_y)
  201. Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5_x, row.Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5_y)
  202. Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5_y)
  203. Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5_y)
  204. Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5_y)
  205. Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5_y)
  206. Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5_y)
  207. Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5_y)
  208. Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5_x, row.Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5_y)
  209. Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5_x, row.Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5_y)
  210. Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5_x, row.Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5_y)
  211. Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5_x, row.Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5_y)
  212. Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5_x, row.Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5_y)
  213. Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5_x, row.Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5_y)
  214. Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5_x, row.Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5_y)
  215. Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5_x, row.Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5_y)
  216. Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5_x, row.Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5_y)
  217. Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5_x, row.Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5_y)
  218. Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5_x, row.Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5_y)
  219. Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5_x, row.Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5_y)
  220. Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5_x, row.Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5_y)
  221. Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5_x, row.Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5_y)
  222. Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5_x, row.Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5_y)
  223. outline2 = str(row._1) + '\t' + str(row.POS) + '\t' + str(row.ID_x) +'\t' + str(row.REF_x) + '\t' + str(row.ALT_x) + '\t' + '.' + '\t' + '.' + '\t' + str(info) + '\t' + 'GT:DP:AF:GQ:MQ:TWINS:TRIO5:TRIO6' + '\t' \
  224. + str(Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5) + '\t' + str(Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5) + '\t' + str(Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5) + '\t' \
  225. + str(Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5) + '\t' + str(Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5) + '\t' + str(Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5) + '\t' \
  226. + str(Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5) + '\t' \
  227. + str(Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5) + '\t' \
  228. + str(Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5) + '\t' \
  229. + str(Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5) + '\t' \
  230. + str(Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5) + '\t' \
  231. + str(Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5) + '\t' \
  232. + str(Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5) + '\n'
  233. all_sample_outfile.write(outline2)
  234. else:
  235. info = '.'
  236. Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5_x, row.Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5_y)
  237. Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5_x, row.Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5_y)
  238. Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5_x, row.Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5_y)
  239. Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5_x, row.Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5_y)
  240. Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5_x, row.Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5_y)
  241. Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5_x, row.Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5_y)
  242. Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5_y)
  243. Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5_y)
  244. Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5_y)
  245. Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5_y)
  246. Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5_y)
  247. Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5_y)
  248. Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5_x, row.Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5_y)
  249. Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5_x, row.Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5_y)
  250. Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5_x, row.Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5_y)
  251. Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5_x, row.Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5_y)
  252. Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5_x, row.Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5_y)
  253. Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5_x, row.Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5_y)
  254. Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5_x, row.Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5_y)
  255. Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5_x, row.Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5_y)
  256. Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5_x, row.Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5_y)
  257. Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5_x, row.Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5_y)
  258. Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5_x, row.Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5_y)
  259. Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5_x, row.Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5_y)
  260. Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5_x, row.Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5_y)
  261. Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5_x, row.Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5_y)
  262. Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5_x, row.Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5_y)
  263. outline2 = str(row._1) + '\t' + str(row.POS) + '\t' + str(row.ID_x) +'\t' + str(row.REF_x) + '\t' + str(row.ALT_x) + '\t' + '.' + '\t' + '.' + '\t' + str(info) + '\t' + 'GT:DP:AF:GQ:MQ:TWINS:TRIO5:TRIO6' + '\t' \
  264. + str(Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5) + '\t' + str(Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5) + '\t' + str(Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5) + '\t' \
  265. + str(Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5) + '\t' + str(Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5) + '\t' + str(Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5) + '\t' \
  266. + str(Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5) + '\t' \
  267. + str(Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5) + '\t' \
  268. + str(Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5) + '\t' \
  269. + str(Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5) + '\t' \
  270. + str(Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5) + '\t' \
  271. + str(Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5) + '\t' \
  272. + str(Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5) + '\n'
  273. all_sample_outfile.write(outline2)
  274. else: