import sys
import collections

end1_fn = sys.argv[1]
end2_fn = sys.argv[2]
# end1_originalGenome.bed is provided so we can check for interactions that were dropped because both ends failed to liftOver
end1_original_fn = sys.argv[3]
out_fn = sys.argv[4]
out_header_fn = sys.argv[5]

# Read the header into a variable
out_header = ""
with open(out_header_fn,"r") as out_header_file:
	 out_header = out_header_file.readlines()[0]

# Read lifted over coordinates for both ends of the interaction into dictionaries.
# The keys of the dictionaries will be unique interaction ID.
# The values of the dictionaries will be a list of lists to keep track of any cases where one region may lift to multiple regions in the target genome.
end1 = collections.defaultdict(list)
with open(end1_fn,"r") as end1_file:
	for line in end1_file:
		data = line.rstrip().split("\t")
		end1[data[4]].append(data)

end2 = collections.defaultdict(list)
with open(end2_fn,"r") as end2_file:
	for line in end2_file:
		data = line.rstrip().split("\t")
		end2[data[4]].append(data)

# Pull interaction IDs from the end1_original file
interaction_IDs = []
with open(end1_original_fn,"r") as end1_original_file:
	for line in end1_original_file:
		data = line.rstrip().split("\t")
		interaction_IDs.append(data[4])

# Synchronize the ends
with open(out_fn,"w") as out_file, \
	open("unmapped_interactions.txt","w") as unmapped_file:

	# Write an out file header
	out_file.write(out_header)

	for i in interaction_IDs:

		# Remove any interactions where both ends did not lift over
		if (i not in end1.keys()) or (i not in end2.keys()):
			unmapped_file.write("%s\tNot lifted on both ends\n" % i)
			continue

		# Get the lifted coordinates for each end
		end1_list = end1[i]
		end2_list = end2[i]

		# Remove any interactions where either end lifted to multiple locations in the target genome.
		# Because the values of the dictionaries are lists of lists, the length of the outter list should be 1 if the region only mapped to one location.
		if (len(end1_list) > 1) or (len(end2_list) > 1):
			unmapped_file.write("%s\tOne or both ends mapped to more than location in the target genome\n" % i)
			continue

		# Pull out the information for the given lifted region
		end1_data = end1_list[0]
		end2_data = end2_list[0]

		# Remove interactions where either end changed chromosomes between builds
		if (end1_data[0] != end1_data[5]) or (end2_data[0] != end2_data[9]):
			unmapped_file.write("%s\tOne or both ends mapped to a different chromosome in the target genome\n" % i)
			continue

		# Write interactions that passed filters to the out file
		out_file.write("%s\t%s\t%s\n" % (str.join("\t",end1_data[:4]),str.join("\t",end2_data[:4]),str.join("\t",end1_data[13:])))
