renluyao
/
benchmark_calls_gvcf


			
							from __future__ import division
import pandas as pd
import sys, argparse, os
import fileinput
import re

# input arguments
parser = argparse.ArgumentParser(description="this script is to extract mendelian concordance information")

parser.add_argument('-LCL5', '--LCL5', type=str, help='LCL5 family info',  required=True)
parser.add_argument('-LCL6', '--LCL6', type=str, help='LCL6 family info',  required=True)
parser.add_argument('-family', '--family', type=str, help='family name',  required=True)


args = parser.parse_args()
lcl5 = args.LCL5
lcl6 = args.LCL6
family = args.family


# output file
family_name = family + '.txt'

family_file = open(family_name,'w')

# input files
lcl5_dat = pd.read_table(lcl5)
lcl6_dat = pd.read_table(lcl6)

merged_df = pd.merge(lcl5_dat, lcl6_dat,  how='outer', left_on=['#CHROM','POS'], right_on = ['#CHROM','POS'])


for row in merged_df.itertuples():
	if row.CHILD_x == row.CHILD_y:
		mendelian = '1'
	else:
		mendelian = '0'
	if pd.isnull(row.INFO_x) == True:
		mendelian = mendelian + ':.'
	else:
		mendelian = mendelian + ':' + row.INFO_x.split('=')[1]
	if pd.isnull(row.INFO_y) == True:
		mendelian = mendelian + ':.'
	else:
		mendelian = mendelian + ':' + row.INFO_y.split('=')[1]


	outline = row._1 + '\t' + str(row.POS) + '\t' + mendelian + '\n'
	family_file.write(outline)