from __future__ import division import pandas as pd import sys, argparse, os # input arguments parser = argparse.ArgumentParser(description="this script is to calculate reproducibility between Quartet_D5 and Quartet_D6s") parser.add_argument('-sister', '--sister', type=str, help='sister.txt', required=True) parser.add_argument('-project', '--project', type=str, help='project name', required=True) args = parser.parse_args() sister_file = args.sister project_name = args.project # output file output_name = project_name + '.sister.reproducibility.txt' output_file = open(output_name,'w') # input files sister_dat = pd.read_table(sister_file) indel_sister_same = 0 indel_sister_diff = 0 snv_sister_same = 0 snv_sister_diff = 0 for row in sister_dat.itertuples(): # snv indel if ',' in row[4]: alt = row[4].split(',') alt_len = [len(i) for i in alt] alt_max = max(alt_len) else: alt_max = len(row[4]) alt = alt_max ref = row[3] if len(ref) == 1 and alt == 1: cate = 'SNV' elif len(ref) > alt: cate = 'INDEL' elif alt > len(ref): cate = 'INDEL' elif len(ref) == alt: cate = 'INDEL' # sister if row[5] == row[6]: if row[5] == './.': mendelian = 'noInfo' sister_count = "no" elif row[5] == '0/0': mendelian = 'Ref' sister_count = "no" else: mendelian = '1' sister_count = "yes_same" else: mendelian = '0' if (row[5] == './.' or row[5] == '0/0') and (row[6] == './.' or row[6] == '0/0'): sister_count = "no" else: sister_count = "yes_diff" if cate == 'SNV': if sister_count == 'yes_same': snv_sister_same += 1 elif sister_count == 'yes_diff': snv_sister_diff += 1 else: pass elif cate == 'INDEL': if sister_count == 'yes_same': indel_sister_same += 1 elif sister_count == 'yes_diff': indel_sister_diff += 1 else: pass indel_sister = indel_sister_same/(indel_sister_same + indel_sister_diff) snv_sister = snv_sister_same/(snv_sister_same + snv_sister_diff) outcolumn = 'Project\tReproducibility_D5_D6\n' indel_outResult = project_name + '.INDEL' + '\t' + str(indel_sister) + '\n' snv_outResult = project_name + '.SNV' + '\t' + str(snv_sister) + '\n' output_file.write(outcolumn) output_file.write(indel_outResult) output_file.write(snv_outResult)