'''
Matches motifs to significant windows found from conservation analysis.
Combines significant windows if they are right next to each other.
Usage: python3 match_motifs_sigWindows.py <sigWindows file> <motif files> <output file>
'''

import sys

if len(sys.argv) < 4:
	sys.exit(__doc__)

motifs = []
list_files = sys.argv[2:-1]
for filename in list_files:
	with open(filename, 'r') as f:
		header = f.readline()
		for line in f:
			if line.startswith('#'):
				continue
			elif line.strip() == '':
				continue
			fields = line.rstrip('\n').split('\t')
			if fields[1] == '':
				motif = fields[0]
			else:
				motif = fields[1] + '_' + fields[0]
			start = int(fields[3]) - 1 #Change to 0-based
			stop = int(fields[4]) - 1 #Change to 0-based
			strand = fields[5]
			motifs.append([start, stop, strand, motif, filename])

sorted_motifs = sorted(motifs)

windows = []
with open(sys.argv[1], 'r') as f:
	prev_start = -2
	for line in f:
		fields = line.rstrip('\n').split('\t')
		window_id = fields[0]
		start = int(fields[1])
		stop = int(fields[2])
		if prev_start + 1 == start: #previous window and current window are right next to each other, combine
			windows[-1][1] = stop #update the stop position, keep previous start
		else:
			windows.append([start, stop])
		prev_start = start

with open(sys.argv[-1], 'w') as o:
	o.write('Window_start\tWindow_stop\tMotif\tMotif_start\tMotif_stop\tMotif_strand\tMotif_filename\n')
	for window in windows:
		window_no_motif = True
		window_start = window[0]
		window_stop = window[1]
		for motif_info in sorted_motifs:
			motif_start = motif_info[0]
			motif_stop = motif_info[1]
			#Only identify motifs that are 100% within significant window
			if window_start < motif_start:
				if window_stop < motif_start: #all subsequent motifs are after window
					break
				if window_stop < motif_stop: #motif ends outside of window
					continue
				else: #motif is within window
					strand = motif_info[2]
					motif = motif_info[3]
					filename = motif_info[4]
					o.write(str(window_start) + '\t' + str(window_stop) + '\t' + motif + '\t' + str(motif_start) + '\t' + str(motif_stop) + '\t' +
						strand + '\t' + filename + '\n')
					window_no_motif = False
		if window_no_motif:
			o.write(str(window_start) + '\t' + str(window_stop) + '\tNA\tNA\tNA\tNA\tNA\n')

