import pandas as pd
import numpy as np

seeds = [5811, 6060, 1010, 8912]
A = len(seeds)
timepoints = ['day'+str(i) for i in [0,2,5,7,15,80]]
T = len(timepoints)
chros = ['chr'+str(i) for i in range(4,16)]
o = open('interseed_similarity.txt','w')
o.write('seed1\tseed2\tjaccard\n')

for a in range(A):
	for b in range(a+1,A):
		union = 0
		inter = 0
		for c in chros:
			df1 = pd.read_csv('scan_seed/output_{:d}/{:}/significant_boundaries_summit_only.txt'.format(seeds[a],c),sep="\t",index_col=False)
			df1.fillna(0,inplace=True)
			df2 = pd.read_csv('scan_seed/output_{:d}/{:}/significant_boundaries_summit_only.txt'.format(seeds[b],c),sep="\t",index_col=False)
			df2.fillna(0,inplace=True)
			for i in range(T):
				x = df1.values[:,i]
				y = df2.values[:,i]
				union += np.sum(np.logical_or(x,y))
				inter += np.sum(np.logical_and(x,y))
		o.write('{:d}\t{:d}\t{:.3f}\n'.format(seeds[a],seeds[b],inter/union))

o.close()
