|
- from __future__ import division
- import pandas as pd
- import sys, argparse, os
- import fileinput
- import re
-
- # input arguments
- parser = argparse.ArgumentParser(description="this script is to extract mendelian concordance information")
-
- parser.add_argument('-LCL5', '--LCL5', type=str, help='LCL5 family info', required=True)
- parser.add_argument('-LCL6', '--LCL6', type=str, help='LCL6 family info', required=True)
- parser.add_argument('-family', '--family', type=str, help='family name', required=True)
-
-
- args = parser.parse_args()
- lcl5 = args.LCL5
- lcl6 = args.LCL6
- family = args.family
-
-
- # output file
- family_name = family + '.txt'
-
- family_file = open(family_name,'w')
-
- # input files
- lcl5_dat = pd.read_table(lcl5)
- lcl6_dat = pd.read_table(lcl6)
-
- merged_df = pd.merge(lcl5_dat, lcl6_dat, how='outer', left_on=['#CHROM','POS'], right_on = ['#CHROM','POS'])
-
-
- for row in merged_df.itertuples():
- if row.CHILD_x == row.CHILD_y:
- mendelian = '1'
- else:
- mendelian = '0'
- if pd.isnull(row.INFO_x) == True:
- mendelian = mendelian + ':.'
- else:
- mendelian = mendelian + ':' + row.INFO_x.split('=')[1]
- if pd.isnull(row.INFO_y) == True:
- mendelian = mendelian + ':.'
- else:
- mendelian = mendelian + ':' + row.INFO_y.split('=')[1]
-
-
- outline = row._1 + '\t' + str(row.POS) + '\t' + mendelian + '\n'
- family_file.write(outline)
|