Browse Source

python

master
LUYAO REN 5 years ago
parent
commit
bb7ab9341a
2 changed files with 8 additions and 8 deletions
  1. +7
    -7
      codescripts/library_concordance.py
  2. +1
    -1
      tasks/Jaccard_Index.wdl

+ 7
- 7
codescripts/library_concordance.py View File

dat = pd.read_table(input_dat) dat = pd.read_table(input_dat)


# output files # output files
sample_size = dat.shape[1]-2
sample_size = dat.shape[1]-4
inter_number = pd.DataFrame(index=range(sample_size),columns=range(sample_size)) inter_number = pd.DataFrame(index=range(sample_size),columns=range(sample_size))
union_number = pd.DataFrame(index=range(sample_size),columns=range(sample_size)) union_number = pd.DataFrame(index=range(sample_size),columns=range(sample_size))


for i in range(sample_size): for i in range(sample_size):
oneSNV_GT = dat.iloc[:,0].astype(str) + '_' + dat.iloc[:,1].astype(str) + '_' + dat.iloc[:,i+2].astype(str)
oneSNV_GT = dat.iloc[:,0].astype(str) + '_' + dat.iloc[:,1].astype(str) + '_' + dat.iloc[:,i+4].astype(str)
print(i+1) print(i+1)
for j in range(sample_size): for j in range(sample_size):
anotherSNV_GT = dat.iloc[:,0].astype(str) + '_' + dat.iloc[:,1].astype(str) + '_' + dat.iloc[:,j+2].astype(str)
anotherSNV_GT = dat.iloc[:,0].astype(str) + '_' + dat.iloc[:,1].astype(str) + '_' + dat.iloc[:,j+4].astype(str)
#remove './.' and '0/0' #remove './.' and '0/0'
oneSNV_GT = [e for e in oneSNV_GT if './.' not in e] oneSNV_GT = [e for e in oneSNV_GT if './.' not in e]
oneSNV_GT = [e for e in oneSNV_GT if '0/0' not in e] oneSNV_GT = [e for e in oneSNV_GT if '0/0' not in e]
inter_number.iloc[i,j] = len(inter) inter_number.iloc[i,j] = len(inter)
union_number.iloc[i,j] = len(union) union_number.iloc[i,j] = len(union)


inter_number.columns = dat.columns[2:dat.shape[1]]
inter_number.index = dat.columns[2:dat.shape[1]]
union_number.columns = dat.columns[2:dat.shape[1]]
union_number.index = dat.columns[2:dat.shape[1]]
inter_number.columns = dat.columns[4:dat.shape[1]]
inter_number.index = dat.columns[4:dat.shape[1]]
union_number.columns = dat.columns[4:dat.shape[1]]
union_number.index = dat.columns[4:dat.shape[1]]


inter_number.to_csv(output_inter_name,sep='\t') inter_number.to_csv(output_inter_name,sep='\t')
union_number.to_csv(output_union_name,sep='\t') union_number.to_csv(output_union_name,sep='\t')

+ 1
- 1
tasks/Jaccard_Index.wdl View File

output { output {
File genotype = "${chromo}.gt" File genotype = "${chromo}.gt"
File snv = "${chromo}.gt.snv.txt" File snv = "${chromo}.gt.snv.txt"
File indel = "${chromo}.indel.txt"
File indel = "${chromo}.gt.indel.txt"
File snv_inter = "${chromo}.snv.inter.txt" File snv_inter = "${chromo}.snv.inter.txt"
File snv_union = "${chromo}.snv.union.txt" File snv_union = "${chromo}.snv.union.txt"
File indel_inter = "${chromo}.indel.inter.txt" File indel_inter = "${chromo}.indel.inter.txt"

Loading…
Cancel
Save