#!/usr/bin/env ruby

require 'set'
require 'gene_info'

gi = GeneInfo.for_species(GeneInfo::Human)
# fix names in the castle dataset
gi['TDP43'] = gi ['TARDBP']
gi['hnRNPC'] = gi ['HNRNPC']

events_filename = File.join(File.dirname(__FILE__), '../data_jul_31/castle_to_event_map.txt')
events = Hash.new { |hash, key| hash[key] = Set.new }
IO.foreach(events_filename) do |line| 
	event, sf = line.chomp.split(/\t/)
	$stderr.puts("Cannot match #{sf}") unless gi.include? sf
	events[gi[sf].entrez_id] << events
end

sharing_sfs = Set.new
events.to_a.repeated_combination(2).each do |sfs|
	sf1, sf2 = sfs

	sf1name = sf1[0]
	sf2name = sf2[0]
	next if sf1name == sf2name

	sf1name, sf2name = sf2name, sf1name if sf1name > sf2name

	sharing_sfs << [sf1name, sf2name] unless (sf1[1] & sf2[1]).empty?
end	

distances_filename = File.join(File.dirname(__FILE__), '../data_jul_31/sf_distances.txt')

IO.foreach(distances_filename) do |line| 
	from, to, distance = line.chomp.split(/\t/)
	next if from == to 

	from = from.to_i
	to = to.to_i

	from, to = to, from if from > to 

	puts line if sharing_sfs.include? [from, to]
end
