from __future__ import division | |||||
import pandas as pd | import pandas as pd | ||||
import sys, argparse, os | import sys, argparse, os | ||||
import fileinput | import fileinput | ||||
vcf_info = pd.read_table(vcfInfo) | vcf_info = pd.read_table(vcfInfo) | ||||
mendelian_info = pd.read_table(mendelianInfo) | mendelian_info = pd.read_table(mendelianInfo) | ||||
merged_df = pd.merge(vcf_info, mendelian_info, how='outer', left_on=['#CHROM','POS','REF'], right_on = ['#CHROM','POS','REF']) | |||||
merged_df = pd.merge(vcf_info, mendelian_info, how='outer', left_on=['#CHROM','POS'], right_on = ['#CHROM','POS']) | |||||
merged_df = merged_df.fillna('.') | merged_df = merged_df.fillna('.') | ||||
# function | # function | ||||
# | # | ||||
for row in merged_df.itertuples(): | for row in merged_df.itertuples(): | ||||
info = 'location=' + str(row.location) + ';' + str(row.INFO_y) | |||||
if row.FILTER_y == 'reproducible': | |||||
ref = row.DP - row._42 | |||||
FORMAT = row[77] + ':' + str(int(ref)) + ',' + str(int(row._42)) + ':' + str(int(row.DP)) + ':' + str(round(row.AF,2)) + ':' + str(round(row.GQ,2)) + ':' + str(round(row.MQ,2)) | |||||
outline1 = str(row._1) + '\t' + str(row.POS) + '\t' + str(row.ID_x) + '\t' + str(row.REF) + '\t' + str(row[78]) + '\t' + '.' + '\t' + '.' + '\t' + str(info) + '\t' + 'GT:AD:DP:AF:GQ:MQ' + '\t' + str(FORMAT) + '\n' | |||||
outfile.write(outline1) | |||||
else: | |||||
pass | |||||
if row.INFO_x != '.': | |||||
info = 'location=' + str(row.location) + ';' + str(row.INFO_y) + ';' + 'ALL_ALT=' + str(int(row._42)) + ';' + 'ALL_DP=' + str(int(row.DP)) + ';' + 'ALL_AF=' + str(round(row.AF,1)) + ';' + 'GQ_MEAN=' + str(round(row.GQ,1)) + ';' + 'MQ_MEAN=' + str(round(row.MQ,1)) + ';' + 'PCR=' + str(row[75]) + ';' + 'PCR_FREE=' + str(row[76]) + ';' + 'CONSENSUS=' + str(row[77]) + ';' + 'CONSENSUS_SEQ=' + str(row[78]) | |||||
Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5_x, row.Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5_y) | |||||
Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5_x, row.Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5_y) | |||||
Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5_x, row.Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5_y) | |||||
Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5_x, row.Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5_y) | |||||
Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5_x, row.Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5_y) | |||||
Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5_x, row.Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5_x, row.Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5_x, row.Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5_x, row.Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5_x, row.Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5_x, row.Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5_x, row.Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5_x, row.Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5_x, row.Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5_x, row.Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5_x, row.Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5_x, row.Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5_x, row.Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5_x, row.Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5_x, row.Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5_x, row.Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5_y) | |||||
outline2 = str(row._1) + '\t' + str(row.POS) + '\t' + str(row.ID_x) +'\t' + str(row.REF) + '\t' + str(row[5]) + '\t' + '.' + '\t' + '.' + '\t' + str(info) + '\t' + 'GT:DP:AF:GQ:MQ:TWINS:TRIO5:TRIO6' + '\t' \ | |||||
+ str(Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5) + '\t' + str(Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5) + '\t' + str(Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5) + '\t' + str(Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5) + '\t' + str(Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5) + '\n' | |||||
all_sample_outfile.write(outline2) | |||||
else: | |||||
info = '.' | |||||
Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5_x, row.Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5_y) | |||||
Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5_x, row.Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5_y) | |||||
Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5_x, row.Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5_y) | |||||
Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5_x, row.Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5_y) | |||||
Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5_x, row.Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5_y) | |||||
Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5_x, row.Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5_x, row.Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5_x, row.Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5_x, row.Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5_x, row.Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5_x, row.Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5_x, row.Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5_x, row.Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5_x, row.Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5_x, row.Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5_x, row.Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5_x, row.Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5_x, row.Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5_x, row.Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5_x, row.Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5_x, row.Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5_y) | |||||
outline2 = str(row._1) + '\t' + str(row.POS) + '\t' + str(row.ID_x) +'\t' + str(row.REF) + '\t' + str(row[5]) + '\t' + '.' + '\t' + '.' + '\t' + str(info) + '\t' + 'GT:DP:AF:GQ:MQ:TWINS:TRIO5:TRIO6' + '\t' \ | |||||
+ str(Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5) + '\t' + str(Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5) + '\t' + str(Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5) + '\t' + str(Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5) + '\t' + str(Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5) + '\n' | |||||
all_sample_outfile.write(outline2) | |||||
vcf_count = row[10:37].count('.') | |||||
mendelian_count = row[50:77].count('.') | |||||
if vcf_count == mendelian_count: | |||||
info = 'location=' + str(row.location) + ';' + str(row.INFO_y) | |||||
if row.FILTER_y == 'reproducible': | |||||
ref = row.DP - row._42 | |||||
FORMAT = row[79] + ':' + str(int(ref)) + ',' + str(int(row._42)) + ':' + str(int(row.DP)) + ':' + str(round(row.AF,2)) + ':' + str(round(row.GQ,2)) + ':' + str(round(row.MQ,2)) | |||||
outline1 = str(row._1) + '\t' + str(row.POS) + '\t' + str(row.ID_x) + '\t' + str(row.REF_y) + '\t' + str(row[80]) + '\t' + '.' + '\t' + '.' + '\t' + str(info) + '\t' + 'GT:AD:DP:AF:GQ:MQ' + '\t' + str(FORMAT) + '\n' | |||||
outfile.write(outline1) | |||||
else: | |||||
pass | |||||
if row.INFO_x != '.': | |||||
if row.AF=='.': | |||||
info = 'location=' + str(row.location) + ';' + str(row.INFO_y) + ';' + 'ALL_ALT=' + str(int(row._42)) + ';' + 'ALL_DP=' + str(int(row.DP)) + ';' + 'ALL_AF=' + 'NA' + ';' + 'GQ_MEAN=' + str(row.GQ) + ';' + 'MQ_MEAN=' + str(row.MQ) + ';' + 'PCR=' + str(row[77]) + ';' + 'PCR_FREE=' + str(row[78]) + ';' + 'CONSENSUS=' + str(row[79]) + ';' + 'CONSENSUS_SEQ=' + str(row[80]) | |||||
else: | |||||
info = 'location=' + str(row.location) + ';' + str(row.INFO_y) + ';' + 'ALL_ALT=' + str(int(row._42)) + ';' + 'ALL_DP=' + str(int(row.DP)) + ';' + 'ALL_AF=' + str(round(float(row.AF),2)) + ';' + 'GQ_MEAN=' + str(row.GQ) + ';' + 'MQ_MEAN=' + str(row.MQ) + ';' + 'PCR=' + str(row[77]) + ';' + 'PCR_FREE=' + str(row[78]) + ';' + 'CONSENSUS=' + str(row[79]) + ';' + 'CONSENSUS_SEQ=' + str(row[80]) | |||||
Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5_x, row.Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5_y) | |||||
Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5_x, row.Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5_y) | |||||
Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5_x, row.Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5_y) | |||||
Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5_x, row.Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5_y) | |||||
Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5_x, row.Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5_y) | |||||
Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5_x, row.Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5_x, row.Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5_x, row.Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5_x, row.Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5_x, row.Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5_x, row.Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5_x, row.Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5_x, row.Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5_x, row.Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5_x, row.Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5_x, row.Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5_x, row.Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5_x, row.Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5_x, row.Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5_x, row.Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5_x, row.Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5_y) | |||||
outline2 = str(row._1) + '\t' + str(row.POS) + '\t' + str(row.ID_x) +'\t' + str(row.REF_x) + '\t' + str(row.ALT_x) + '\t' + '.' + '\t' + '.' + '\t' + str(info) + '\t' + 'GT:DP:AF:GQ:MQ:TWINS:TRIO5:TRIO6' + '\t' \ | |||||
+ str(Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5) + '\t' + str(Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5) + '\t' + str(Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5) + '\t' + str(Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5) + '\t' + str(Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5) + '\n' | |||||
all_sample_outfile.write(outline2) | |||||
else: | |||||
info = '.' | |||||
Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5_x, row.Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5_y) | |||||
Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5_x, row.Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5_y) | |||||
Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5_x, row.Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5_y) | |||||
Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5_x, row.Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5_y) | |||||
Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5_x, row.Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5_y) | |||||
Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5_x, row.Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5_x, row.Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5_x, row.Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5_x, row.Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5_x, row.Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5_x, row.Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5_y) | |||||
Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5_x, row.Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5_x, row.Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5_x, row.Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5_x, row.Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5_x, row.Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5_x, row.Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5_x, row.Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5_x, row.Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5_x, row.Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5_y) | |||||
Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5_x, row.Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5_y) | |||||
outline2 = str(row._1) + '\t' + str(row.POS) + '\t' + str(row.ID_x) +'\t' + str(row.REF_x) + '\t' + str(row.ALT_x) + '\t' + '.' + '\t' + '.' + '\t' + str(info) + '\t' + 'GT:DP:AF:GQ:MQ:TWINS:TRIO5:TRIO6' + '\t' \ | |||||
+ str(Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5) + '\t' + str(Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5) + '\t' + str(Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5) + '\t' + str(Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5) + '\t' + str(Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5) + '\t' \ | |||||
+ str(Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5) + '\n' | |||||
all_sample_outfile.write(outline2) | |||||
else: | |||||
from __future__ import division | |||||
import pandas as pd | |||||
import sys, argparse, os | |||||
import fileinput | |||||
import re | |||||
# input arguments | |||||
parser = argparse.ArgumentParser(description="this script is to get final high confidence calls and information of all replicates") | |||||
parser.add_argument('-vcfInfo', '--vcfInfo', type=str, help='The txt file of variants information, this file is named as prefix__variant_quality_location.txt', required=True) | |||||
parser.add_argument('-mendelianInfo', '--mendelianInfo', type=str, help='The merged mendelian information of all samples', required=True) | |||||
parser.add_argument('-sample', '--sample_name', type=str, help='which sample of quartet', required=True) | |||||
args = parser.parse_args() | |||||
vcfInfo = args.vcfInfo | |||||
mendelianInfo = args.mendelianInfo | |||||
sample_name = args.sample_name | |||||
#GT:TWINS:TRIO5:TRIO6:DP:AF:GQ:QD:MQ:FS:QUAL | |||||
vcf_header = '''##fileformat=VCFv4.2 | |||||
##fileDate=20200331 | |||||
##source=high_confidence_calls_intergration(choppy app) | |||||
##reference=GRCh38.d1.vd1 | |||||
#FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | |||||
#FORMAT=<ID=TWINS,Number=1,Type=String,Description="1 for sister consistent, 0 for sister different"> | |||||
#FORMAT=<ID=TRIO5,Number=1,Type=String,Description="1 for LCL7, LCL8 and LCL5 mendelian consistent, 0 for family violation"> | |||||
#FORMAT=<ID=TRIO6,Number=1,Type=String,Description="1 for LCL7, LCL8 and LCL6 mendelian consistent, 0 for family violation"> | |||||
##FORMAT=<ID=DP,Number=1,Type=Int,Description="Depth"> | |||||
##FORMAT=<ID=AF,Number=1,Type=Float,Description="Allele frequency"> | |||||
##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype quality"> | |||||
##FORMAT=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth"> | |||||
##FORMAT=<ID=MQ,Number=1,Type=Float,Description="Mapping quality"> | |||||
##FORMAT=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bia"> | |||||
##FORMAT=<ID=QUAL,Number=1,Type=Float,Description="variant quality"> | |||||
##contig=<ID=chr1,length=248956422> | |||||
##contig=<ID=chr2,length=242193529> | |||||
##contig=<ID=chr3,length=198295559> | |||||
##contig=<ID=chr4,length=190214555> | |||||
##contig=<ID=chr5,length=181538259> | |||||
##contig=<ID=chr6,length=170805979> | |||||
##contig=<ID=chr7,length=159345973> | |||||
##contig=<ID=chr8,length=145138636> | |||||
##contig=<ID=chr9,length=138394717> | |||||
##contig=<ID=chr10,length=133797422> | |||||
##contig=<ID=chr11,length=135086622> | |||||
##contig=<ID=chr12,length=133275309> | |||||
##contig=<ID=chr13,length=114364328> | |||||
##contig=<ID=chr14,length=107043718> | |||||
##contig=<ID=chr15,length=101991189> | |||||
##contig=<ID=chr16,length=90338345> | |||||
##contig=<ID=chr17,length=83257441> | |||||
##contig=<ID=chr18,length=80373285> | |||||
##contig=<ID=chr19,length=58617616> | |||||
##contig=<ID=chr20,length=64444167> | |||||
##contig=<ID=chr21,length=46709983> | |||||
##contig=<ID=chr22,length=50818468> | |||||
##contig=<ID=chrX,length=156040895> | |||||
''' | |||||
# output file | |||||
file_name = sample_name + '_mendelian_vcfInfo.vcf' | |||||
outfile = open(file_name,'w') | |||||
outputcolumn = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t' + sample_name + '\n' | |||||
outfile.write(vcf_header) | |||||
outfile.write(outputcolumn) | |||||
# input files | |||||
vcf_info = pd.read_table(vcfInfo) | |||||
mendelian_info = pd.read_table(mendelianInfo) | |||||
merged_df = pd.merge(vcf_info, mendelian_info, how='outer', left_on=['#CHROM','POS'], right_on = ['#CHROM','POS']) | |||||
merged_df = merged_df.fillna('.') | |||||
# | |||||
def parse_INFO(info): | |||||
strings = info.strip().split(';') | |||||
keys = [] | |||||
values = [] | |||||
for i in strings: | |||||
kv = i.split('=') | |||||
if kv[0] == 'DB': | |||||
keys.append('DB') | |||||
values.append('1') | |||||
else: | |||||
keys.append(kv[0]) | |||||
values.append(kv[1]) | |||||
infoDict = dict(zip(keys, values)) | |||||
return infoDict | |||||
# | |||||
for row in merged_df.itertuples(): | |||||
if row.Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5 != '.': | |||||
# format | |||||
# GT:TWINS:TRIO5:TRIO6:DP:AF:GQ:QD:MQ:FS:QUAL | |||||
FORMAT_x = row.Quartet_DNA_BGI_SEQ2000_BGI_LCL5_1_20180518.split(':') | |||||
ALT = int(FORMAT_x[1].split(',')[1]) | |||||
if int(FORMAT_x[2]) != 0: | |||||
AF = round(ALT/int(FORMAT_x[2]),2) | |||||
else: | |||||
AF = '.' | |||||
INFO_x = parse_INFO(row.INFO_x) | |||||
if FORMAT_x[2] == '0': | |||||
INFO_x['QD'] = '.' | |||||
else: | |||||
pass | |||||
FORMAT = row.Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5 + ':' + FORMAT_x[2] + ':' + str(AF) + ':' + FORMAT_x[3] + ':' + INFO_x['QD'] + ':' + INFO_x['MQ'] + ':' + INFO_x['FS'] + ':' + str(row.QUAL_x) | |||||
# outline | |||||
outline = row._1 + '\t' + str(row.POS) + '\t' + row.ID_x + '\t' + row.REF_y + '\t' + row.ALT_y + '\t' + '.' + '\t' + '.' + '\t' + '.' + '\t' + 'GT:TWINS:TRIO5:TRIO6:DP:AF:GQ:QD:MQ:FS:QUAL' + '\t' + FORMAT + '\n' | |||||
else: | |||||
FORMAT_x = row.Quartet_DNA_BGI_SEQ2000_BGI_LCL5_1_20180518.split(':') | |||||
ALT = int(FORMAT_x[1].split(',')[1]) | |||||
if int(FORMAT_x[2]) != 0: | |||||
AF = round(ALT/int(FORMAT_x[2]),2) | |||||
else: | |||||
AF = '.' | |||||
INFO_x = parse_INFO(row.INFO_x) | |||||
if FORMAT_x[2] == '0': | |||||
INFO_x['QD'] = '.' | |||||
else: | |||||
pass | |||||
FORMAT = '.:.:.:.' + ':' + FORMAT_x[2] + ':' + str(AF) + ':' + FORMAT_x[3] + ':' + INFO_x['QD'] + ':' + INFO_x['MQ'] + ':' + INFO_x['FS'] + ':' + str(row.QUAL_x) | |||||
# outline | |||||
outline = row._1 + '\t' + str(row.POS) + '\t' + row.ID_x + '\t' + row.REF_y + '\t' + row.ALT_y + '\t' + '.' + '\t' + '.' + '\t' + '.' + '\t' + 'GT:TWINS:TRIO5:TRIO6:DP:AF:GQ:QD:MQ:FS:QUAL' + '\t' + FORMAT + '\n' | |||||
outfile.write(outline) |
from __future__ import division | |||||
import sys, argparse, os | import sys, argparse, os | ||||
import fileinput | import fileinput | ||||
import re | import re |
{ | { | ||||
"{{ project_name }}.LCL6normZip.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL5extract_info.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||||
"{{ project_name }}.LCL7merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | "{{ project_name }}.LCL7merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | ||||
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", | "{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", | ||||
"{{ project_name }}.LCL6familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL7votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||||
"{{ project_name }}.LCL6votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||||
"{{ project_name }}.LCL6FinalResult.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||||
"{{ project_name }}.LCL5VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | "{{ project_name }}.LCL5VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | ||||
"{{ project_name }}.LCL6allSampleReform.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||||
"{{ project_name }}.LCL8mergeInfo.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||||
"{{ project_name }}.LCL6mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | "{{ project_name }}.LCL6mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | ||||
"{{ project_name }}.LCL5mergeInfo.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||||
"{{ project_name }}.mergeSister.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | "{{ project_name }}.mergeSister.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | ||||
"{{ project_name }}.LCL7normZip.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL5mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | "{{ project_name }}.LCL5mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | ||||
"{{ project_name }}.LCL6allInfozipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL5allInfozipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.disk_size": "150", | "{{ project_name }}.disk_size": "150", | ||||
"{{ project_name }}.LCL7allSampleReform.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||||
"{{ project_name }}.LCL7mergeInfo.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||||
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}", | "{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}", | ||||
"{{ project_name }}.LCL6bedAnnotation.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.repeat_bed": "oss://pgx-result/renluyao/manuscript/all.repeat.bed", | |||||
"{{ project_name }}.LCL6merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | "{{ project_name }}.LCL6merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | ||||
"{{ project_name }}.LCL5FinalResult.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||||
"{{ project_name }}.LCL6variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | "{{ project_name }}.LCL6variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | ||||
"{{ project_name }}.LCL5mergeVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL6zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | "{{ project_name }}.LCL6zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | ||||
"{{ project_name }}.LCL8mergeVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL5votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||||
"{{ project_name }}.LCL7familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL8extract_info.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||||
"{{ project_name }}.LCL7FinalResult.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||||
"{{ project_name }}.LCL8FinalResult.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||||
"{{ project_name }}.LCL5allSampleReform.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||||
"{{ project_name }}.LCL5familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL5normZip.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL7allInfozipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL6mergeInfo.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||||
"{{ project_name }}.LCL6VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | "{{ project_name }}.LCL6VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | ||||
"{{ project_name }}.LCL7extract_info.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||||
"{{ project_name }}.LCL5merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | "{{ project_name }}.LCL5merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | ||||
"{{ project_name }}.LCL8votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||||
"{{ project_name }}.reformVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call:v1.1", | "{{ project_name }}.reformVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call:v1.1", | ||||
"{{ project_name }}.LCL5zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | "{{ project_name }}.LCL5zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | ||||
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.xlarge img-ubuntu-vpc", | |||||
"{{ project_name }}.LCL8familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL5bedAnnotation.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL6mergeVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL6extract_info.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||||
"{{ project_name }}.cluster_config": "OnDemand bcs.b4.xlarge img-ubuntu-vpc", | |||||
"{{ project_name }}.LCL8allInfozipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL7variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | "{{ project_name }}.LCL7variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | ||||
"{{ project_name }}.LCL7mergeVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL8allSampleReform.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||||
"{{ project_name }}.LCL8merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | "{{ project_name }}.LCL8merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | ||||
"{{ project_name }}.LCL5variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | "{{ project_name }}.LCL5variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | ||||
"{{ project_name }}.LCL7bedAnnotation.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL8variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | "{{ project_name }}.LCL8variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | ||||
"{{ project_name }}.LCL8bedAnnotation.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL8normZip.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/" | "{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/" | ||||
} | } | ||||
python /opt/variants_quality_location_intergration.py -vcf ${repeat_annotated_vcf} -prefix ${sample} | python /opt/variants_quality_location_intergration.py -vcf ${repeat_annotated_vcf} -prefix ${sample} | ||||
cat ${sample}_variant_quality_location.txt | grep '#CHROM' > header | |||||
for i in chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX | |||||
do | |||||
cat ${sample}_variant_quality_location.txt | grep -w $i | cat header - > ${sample}.$i.vcfInfo.txt | |||||
done | |||||
>>> | >>> | ||||
runtime { | runtime { | ||||
} | } | ||||
output { | output { | ||||
File extracted_info = "${sample}_variant_quality_location.txt" | File extracted_info = "${sample}_variant_quality_location.txt" | ||||
Array[File] chromo_vcfInfo = glob("*.vcfInfo.txt") | |||||
} | } | ||||
} | } |
task FinalResult { | task FinalResult { | ||||
File extracted_info | File extracted_info | ||||
File annotated_txt | File annotated_txt | ||||
String prefix | |||||
String prefix = basename(annotated_txt,".mendelian.txt") | |||||
String sample | String sample | ||||
String docker | String docker | ||||
String cluster_config | String cluster_config |
task extract_info { | |||||
File normed_vcf | |||||
String sampleName | |||||
task merge_info { | |||||
File vcfInfo | |||||
File mendelianInfo | |||||
String sample | |||||
String docker | String docker | ||||
String cluster_config | String cluster_config | ||||
String disk_size | String disk_size | ||||
command <<< | command <<< | ||||
python /opt/vcf_mq_af.py -vcf ${normed_vcf} -prefix ${sampleName} | |||||
python /opt/merge_mendelian_vcfinfo.py -vcfInfo ${vcfInfo} -mendelianInfo ${mendelianInfo} -sample ${sample} | |||||
>>> | >>> | ||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | ||||
} | } | ||||
output { | output { | ||||
File vcf_info = "${sampleName}_variant_quality_location.vcf" | |||||
File all_info = "${sample}_mendelian_vcfInfo.vcf" | |||||
} | } | ||||
} | } |
/opt/hall-lab/bcftools-1.9/bin/bcftools norm -f ${ref_dir}/${fasta} ${sampleName}.filtered.vcf > ${sampleName}.normed.vcf | /opt/hall-lab/bcftools-1.9/bin/bcftools norm -f ${ref_dir}/${fasta} ${sampleName}.filtered.vcf > ${sampleName}.normed.vcf | ||||
cat ${sampleName}.normed.vcf | grep -v '#' | cut -f8 | sed s'/MQ=/\t/g' | cut -f2 | sed s'/;/\t/g' | cut -f1 > MQ | |||||
cat ${sampleName}.normed.vcf | grep -v '#' | awk '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9":MQ""\t"$10}' | paste - MQ -d ":" > body | |||||
cat ${sampleName}.normed.vcf | grep '#' | cat - body > ${sampleName}.normed.mq.vcf | |||||
>>> | >>> | ||||
runtime { | runtime { | ||||
} | } | ||||
output { | output { | ||||
File normed_vcf = "${sampleName}.normed.vcf" | File normed_vcf = "${sampleName}.normed.vcf" | ||||
File normed_mq_vcf = "${sampleName}.normed.mq.vcf" | |||||
} | } | ||||
} | } |
python /opt/high_confidence_call_vote.py -vcf ${merged_vcf} -dup ${vcf_dup} -sample ${sample} -prefix ${prefix} | python /opt/high_confidence_call_vote.py -vcf ${merged_vcf} -dup ${vcf_dup} -sample ${sample} -prefix ${prefix} | ||||
cat ${prefix}_annotated.vcf | grep -v '##' > ${prefix}.txt | cat ${prefix}_annotated.vcf | grep -v '##' > ${prefix}.txt | ||||
cat ${prefix}.txt | grep '#CHROM' > header | |||||
for i in chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX | |||||
do | |||||
cat ${prefix}.txt | grep -w $i | cat header - > ${sample}.$i.mendelian.txt | |||||
done | |||||
>>> | >>> | ||||
runtime { | runtime { | ||||
output { | output { | ||||
File annotated_vcf = "${prefix}_annotated.vcf" | File annotated_vcf = "${prefix}_annotated.vcf" | ||||
File annotated_txt = "${prefix}.txt" | File annotated_txt = "${prefix}.txt" | ||||
Array[File] chromo_votes = glob("*.mendelian.txt") | |||||
} | } | ||||
} | } | ||||
import "./tasks/variantsNorm.wdl" as variantsNorm | import "./tasks/variantsNorm.wdl" as variantsNorm | ||||
import "./tasks/extract_info.wdl" as extract_info | |||||
import "./tasks/merge_info.wdl" as merge_info | |||||
import "./tasks/mendelian.wdl" as mendelian | import "./tasks/mendelian.wdl" as mendelian | ||||
import "./tasks/zipIndex.wdl" as zipIndex | import "./tasks/zipIndex.wdl" as zipIndex | ||||
import "./tasks/VCFrename.wdl" as VCFrename | import "./tasks/VCFrename.wdl" as VCFrename | ||||
import "./tasks/mergeSister.wdl" as mergeSister | import "./tasks/mergeSister.wdl" as mergeSister | ||||
import "./tasks/reformVCF.wdl" as reformVCF | import "./tasks/reformVCF.wdl" as reformVCF | ||||
import "./tasks/merge.wdl" as merge | import "./tasks/merge.wdl" as merge | ||||
import "./tasks/votes.wdl" as votes | |||||
import "./tasks/bed_annotation.wdl" as bed_annotation | |||||
import "./tasks/mergeVCFInfo.wdl" as mergeVCFInfo | |||||
import "./tasks/VCFinfo.wdl" as VCFinfo | |||||
import "./tasks/final_result.wdl" as FinalResult | |||||
workflow {{ project_name }} { | workflow {{ project_name }} { | ||||
File inputSamplesFile | File inputSamplesFile | ||||
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile) | Array[Array[File]] inputSamples = read_tsv(inputSamplesFile) | ||||
File ref_dir | File ref_dir | ||||
File repeat_bed | |||||
String fasta | String fasta | ||||
String cluster_config | String cluster_config | ||||
String disk_size | String disk_size | ||||
cluster_config=cluster_config, | cluster_config=cluster_config, | ||||
disk_size=disk_size | disk_size=disk_size | ||||
} | } | ||||
call extract_info.extract_info as LCL5extract_info { | |||||
input: | |||||
normed_vcf=LCL5variantsNorm.normed_vcf, | |||||
sampleName=quartet[4], | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call extract_info.extract_info as LCL6extract_info { | |||||
input: | |||||
normed_vcf=LCL6variantsNorm.normed_vcf, | |||||
sampleName=quartet[5], | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call extract_info.extract_info as LCL7extract_info { | |||||
input: | |||||
normed_vcf=LCL7variantsNorm.normed_vcf, | |||||
sampleName=quartet[6], | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call extract_info.extract_info as LCL8extract_info { | |||||
input: | |||||
normed_vcf=LCL8variantsNorm.normed_vcf, | |||||
sampleName=quartet[8], | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call zipIndex.zipIndex as LCL5normZip{ | |||||
input: | |||||
vcf=LCL5extract_info.vcf_info, | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call zipIndex.zipIndex as LCL6normZip{ | |||||
input: | |||||
vcf=LCL6extract_info.vcf_info, | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call zipIndex.zipIndex as LCL7normZip{ | |||||
input: | |||||
vcf=LCL7extract_info.vcf_info, | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call zipIndex.zipIndex as LCL8normZip{ | |||||
input: | |||||
vcf=LCL8extract_info.vcf_info, | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call mendelian.mendelian as LCL5mendelian { | call mendelian.mendelian as LCL5mendelian { | ||||
input: | input: | ||||
child_vcf=LCL5variantsNorm.normed_vcf, | child_vcf=LCL5variantsNorm.normed_vcf, | ||||
cluster_config=cluster_config, | cluster_config=cluster_config, | ||||
disk_size=disk_size | disk_size=disk_size | ||||
} | } | ||||
call zipIndex.zipIndex as LCL5familyzipIndex { | |||||
call merge_info.merge_info as LCL5mergeInfo { | |||||
input: | |||||
vcfInfo=LCL5variantsNorm.normed_vcf, | |||||
mendelianInfo=reformVCF.LCL5_family_info, | |||||
sample=quartet[4], | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call merge_info.merge_info as LCL6mergeInfo { | |||||
input: | |||||
vcfInfo=LCL6variantsNorm.normed_vcf, | |||||
mendelianInfo=reformVCF.LCL6_family_info, | |||||
sample=quartet[5], | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call merge_info.merge_info as LCL7mergeInfo { | |||||
input: | |||||
vcfInfo=LCL7variantsNorm.normed_vcf, | |||||
mendelianInfo=reformVCF.LCL7_family_info, | |||||
sample=quartet[6], | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call merge_info.merge_info as LCL8mergeInfo { | |||||
input: | |||||
vcfInfo=LCL8variantsNorm.normed_vcf, | |||||
mendelianInfo=reformVCF.LCL8_family_info, | |||||
sample=quartet[7], | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call zipIndex.zipIndex as LCL5allInfozipIndex { | |||||
input: | input: | ||||
vcf=reformVCF.LCL5_family_info, | |||||
vcf=LCL5mergeInfo.all_info, | |||||
cluster_config=cluster_config, | cluster_config=cluster_config, | ||||
disk_size=disk_size | disk_size=disk_size | ||||
} | } | ||||
call zipIndex.zipIndex as LCL6familyzipIndex { | |||||
call zipIndex.zipIndex as LCL6allInfozipIndex { | |||||
input: | input: | ||||
vcf=reformVCF.LCL6_family_info, | |||||
vcf=LCL6mergeInfo.all_info, | |||||
cluster_config=cluster_config, | cluster_config=cluster_config, | ||||
disk_size=disk_size | disk_size=disk_size | ||||
} | } | ||||
call zipIndex.zipIndex as LCL7familyzipIndex { | |||||
call zipIndex.zipIndex as LCL7allInfozipIndex { | |||||
input: | input: | ||||
vcf=reformVCF.LCL7_family_info, | |||||
vcf=LCL7mergeInfo.all_info, | |||||
cluster_config=cluster_config, | cluster_config=cluster_config, | ||||
disk_size=disk_size | disk_size=disk_size | ||||
} | } | ||||
call zipIndex.zipIndex as LCL8familyzipIndex { | |||||
call zipIndex.zipIndex as LCL8allInfozipIndex { | |||||
input: | input: | ||||
vcf=reformVCF.LCL8_family_info, | |||||
vcf=LCL8mergeInfo.all_info, | |||||
cluster_config=cluster_config, | cluster_config=cluster_config, | ||||
disk_size=disk_size | disk_size=disk_size | ||||
} | } | ||||
### family info merge | ### family info merge | ||||
call merge.merge as LCL5merge { | call merge.merge as LCL5merge { | ||||
input: | input: | ||||
family_vcf_gz=LCL5familyzipIndex.vcf_gz, | |||||
family_vcf_idx=LCL5familyzipIndex.vcf_idx, | |||||
family_vcf_gz=LCL5allInfozipIndex.vcf_gz, | |||||
family_vcf_idx=LCL5allInfozipIndex.vcf_idx, | |||||
sample="LCL5", | sample="LCL5", | ||||
cluster_config=cluster_config, | cluster_config=cluster_config, | ||||
disk_size=disk_size | disk_size=disk_size | ||||
} | } | ||||
call votes.votes as LCL5votes{ | |||||
input: | |||||
merged_vcf=LCL5merge.merged_vcf, | |||||
vcf_dup=LCL5merge.vcf_dup, | |||||
sample='LCL5', | |||||
prefix='LCL5_consensus', | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call merge.merge as LCL6merge { | call merge.merge as LCL6merge { | ||||
input: | input: | ||||
family_vcf_gz=LCL6familyzipIndex.vcf_gz, | |||||
family_vcf_idx=LCL6familyzipIndex.vcf_idx, | |||||
family_vcf_gz=LCL6allInfozipIndex.vcf_gz, | |||||
family_vcf_idx=LCL6allInfozipIndex.vcf_idx, | |||||
sample="LCL6", | sample="LCL6", | ||||
cluster_config=cluster_config, | cluster_config=cluster_config, | ||||
disk_size=disk_size | disk_size=disk_size | ||||
} | } | ||||
call votes.votes as LCL6votes { | |||||
input: | |||||
merged_vcf=LCL6merge.merged_vcf, | |||||
vcf_dup=LCL6merge.vcf_dup, | |||||
sample='LCL6', | |||||
prefix='LCL6_consensus', | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call merge.merge as LCL7merge { | call merge.merge as LCL7merge { | ||||
input: | input: | ||||
family_vcf_gz=LCL7familyzipIndex.vcf_gz, | |||||
family_vcf_idx=LCL7familyzipIndex.vcf_idx, | |||||
family_vcf_gz=LCL7allInfozipIndex.vcf_gz, | |||||
family_vcf_idx=LCL7allInfozipIndex.vcf_idx, | |||||
sample="LCL7", | sample="LCL7", | ||||
cluster_config=cluster_config, | cluster_config=cluster_config, | ||||
disk_size=disk_size | disk_size=disk_size | ||||
} | } | ||||
call votes.votes as LCL7votes { | |||||
input: | |||||
merged_vcf=LCL7merge.merged_vcf, | |||||
vcf_dup=LCL7merge.vcf_dup, | |||||
sample='LCL7', | |||||
prefix='LCL7_consensus', | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call merge.merge as LCL8merge { | call merge.merge as LCL8merge { | ||||
input: | input: | ||||
family_vcf_gz=LCL8familyzipIndex.vcf_gz, | |||||
family_vcf_idx=LCL8familyzipIndex.vcf_idx, | |||||
family_vcf_gz=LCL8allInfozipIndex.vcf_gz, | |||||
family_vcf_idx=LCL8allInfozipIndex.vcf_idx, | |||||
sample="LCL8", | sample="LCL8", | ||||
cluster_config=cluster_config, | cluster_config=cluster_config, | ||||
disk_size=disk_size | disk_size=disk_size | ||||
} | } | ||||
call votes.votes as LCL8votes { | |||||
input: | |||||
merged_vcf=LCL8merge.merged_vcf, | |||||
vcf_dup=LCL8merge.vcf_dup, | |||||
sample='LCL8', | |||||
prefix='LCL8_consensus', | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
### vcf original information | |||||
call mergeVCFInfo.mergeVCFInfo as LCL5mergeVCF { | |||||
input: | |||||
vcf_gz=LCL5normZip.vcf_gz, | |||||
vcf_idx=LCL5normZip.vcf_idx, | |||||
sample='LCL5', | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call bed_annotation.bed_annotation as LCL5bedAnnotation { | |||||
input: | |||||
merged_vcf=LCL5mergeVCF.merged_vcf, | |||||
merged_vcf_idx=LCL5mergeVCF.merged_vcf_idx, | |||||
repeat_bed=repeat_bed, | |||||
sample='LCL5', | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call VCFinfo.VCFinfo as LCL5allSampleReform { | |||||
input: | |||||
repeat_annotated_vcf=LCL5bedAnnotation.repeat_annotated_vcf, | |||||
sample='LCL5', | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call FinalResult.FinalResult as LCL5FinalResult { | |||||
input: | |||||
extracted_info=LCL5allSampleReform.extracted_info, | |||||
annotated_txt=LCL5votes.annotated_txt, | |||||
prefix='LCL5', | |||||
sample='LCL5', | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size, | |||||
} | |||||
call mergeVCFInfo.mergeVCFInfo as LCL6mergeVCF { | |||||
input: | |||||
vcf_gz=LCL6normZip.vcf_gz, | |||||
vcf_idx=LCL6normZip.vcf_idx, | |||||
sample='LCL6', | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call bed_annotation.bed_annotation as LCL6bedAnnotation { | |||||
input: | |||||
merged_vcf=LCL6mergeVCF.merged_vcf, | |||||
merged_vcf_idx=LCL6mergeVCF.merged_vcf_idx, | |||||
repeat_bed=repeat_bed, | |||||
sample='LCL6', | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call VCFinfo.VCFinfo as LCL6allSampleReform { | |||||
input: | |||||
repeat_annotated_vcf=LCL6bedAnnotation.repeat_annotated_vcf, | |||||
sample='LCL6', | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call FinalResult.FinalResult as LCL6FinalResult { | |||||
input: | |||||
extracted_info=LCL6allSampleReform.extracted_info, | |||||
annotated_txt=LCL6votes.annotated_txt, | |||||
prefix='LCL6', | |||||
sample='LCL6', | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size, | |||||
} | |||||
call mergeVCFInfo.mergeVCFInfo as LCL7mergeVCF { | |||||
input: | |||||
vcf_gz=LCL7normZip.vcf_gz, | |||||
vcf_idx=LCL7normZip.vcf_idx, | |||||
sample='LCL7', | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call bed_annotation.bed_annotation as LCL7bedAnnotation { | |||||
input: | |||||
merged_vcf=LCL7mergeVCF.merged_vcf, | |||||
merged_vcf_idx=LCL7mergeVCF.merged_vcf_idx, | |||||
repeat_bed=repeat_bed, | |||||
sample='LCL7', | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call VCFinfo.VCFinfo as LCL7allSampleReform { | |||||
input: | |||||
repeat_annotated_vcf=LCL7bedAnnotation.repeat_annotated_vcf, | |||||
sample='LCL7', | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call FinalResult.FinalResult as LCL7FinalResult { | |||||
input: | |||||
extracted_info=LCL7allSampleReform.extracted_info, | |||||
annotated_txt=LCL7votes.annotated_txt, | |||||
prefix='LCL7', | |||||
sample='LCL7', | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size, | |||||
} | |||||
call mergeVCFInfo.mergeVCFInfo as LCL8mergeVCF { | |||||
input: | |||||
vcf_gz=LCL8normZip.vcf_gz, | |||||
vcf_idx=LCL8normZip.vcf_idx, | |||||
sample='LCL8', | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call bed_annotation.bed_annotation as LCL8bedAnnotation { | |||||
input: | |||||
merged_vcf=LCL8mergeVCF.merged_vcf, | |||||
merged_vcf_idx=LCL8mergeVCF.merged_vcf_idx, | |||||
repeat_bed=repeat_bed, | |||||
sample='LCL8', | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call VCFinfo.VCFinfo as LCL8allSampleReform { | |||||
input: | |||||
repeat_annotated_vcf=LCL8bedAnnotation.repeat_annotated_vcf, | |||||
sample='LCL8', | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call FinalResult.FinalResult as LCL8FinalResult { | |||||
input: | |||||
extracted_info=LCL8allSampleReform.extracted_info, | |||||
annotated_txt=LCL8votes.annotated_txt, | |||||
prefix='LCL8', | |||||
sample='LCL8', | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size, | |||||
} | |||||
} | |||||
} |