5 år sedan · bb7ab9341a
--- a/codescripts/library_concordance.py
+++ b/codescripts/library_concordance.py
 dat = pd.read_table(input_dat)
 # output files
 sample_size = dat.shape[1]-2
 sample_size = dat.shape[1]-4
 inter_number = pd.DataFrame(index=range(sample_size),columns=range(sample_size))
 union_number = pd.DataFrame(index=range(sample_size),columns=range(sample_size))
 for i in range(sample_size):
    oneSNV_GT = dat.iloc[:,0].astype(str) + '_' + dat.iloc[:,1].astype(str) + '_' + dat.iloc[:,i+2].astype(str)
    oneSNV_GT = dat.iloc[:,0].astype(str) + '_' + dat.iloc[:,1].astype(str) + '_' + dat.iloc[:,i+4].astype(str)
    print(i+1)
    for j in range(sample_size):
        anotherSNV_GT = dat.iloc[:,0].astype(str) + '_' + dat.iloc[:,1].astype(str) + '_' + dat.iloc[:,j+2].astype(str)
        anotherSNV_GT = dat.iloc[:,0].astype(str) + '_' + dat.iloc[:,1].astype(str) + '_' + dat.iloc[:,j+4].astype(str)
        #remove './.' and '0/0'
        oneSNV_GT = [e for e in oneSNV_GT if './.' not in e]
        oneSNV_GT = [e for e in oneSNV_GT if '0/0' not in e]
        inter_number.iloc[i,j] = len(inter)
        union_number.iloc[i,j] = len(union)
 inter_number.columns = dat.columns[2:dat.shape[1]]
 inter_number.index = dat.columns[2:dat.shape[1]]
 union_number.columns = dat.columns[2:dat.shape[1]]
 union_number.index = dat.columns[2:dat.shape[1]]
 inter_number.columns = dat.columns[4:dat.shape[1]]
 inter_number.index = dat.columns[4:dat.shape[1]]
 union_number.columns = dat.columns[4:dat.shape[1]]
 union_number.index = dat.columns[4:dat.shape[1]]
 inter_number.to_csv(output_inter_name,sep='\t')
 union_number.to_csv(output_union_name,sep='\t')
--- a/tasks/Jaccard_Index.wdl
+++ b/tasks/Jaccard_Index.wdl
 	output {
 		File genotype = "${chromo}.gt"
 		File snv = "${chromo}.gt.snv.txt"
 		File indel = "${chromo}.indel.txt"
 		File indel = "${chromo}.gt.indel.txt"
 		File snv_inter = "${chromo}.snv.inter.txt"
 		File snv_union = "${chromo}.snv.union.txt"
 		File indel_inter = "${chromo}.indel.inter.txt"