#!/usr/bin/env ruby

require 'set'
require 'gene_info'

gi = GeneInfo.for_species(GeneInfo::Human)

go_annotation = '/home/bnet/atiasnir/go/gene_association.goa_human'

sf_filename = File.join(File.dirname(__FILE__), '../data_aug_5/OnlyAlternativeSplicingFactors.txt')

sfs = []
IO.foreach(sf_filename) do |line| 
	name = line.chomp.strip

	if gi.include? name
		sfs << gi[name].default_id
	else
		$stderr.puts "Cannot map #{name}"
	end
end

sfs.uniq!

annotations = Hash.new { |hash, key| hash[key] = [] }
seen = Set.new

IO.foreach(go_annotation) do |line| 
	next if line[0] == '!'
	gene, category, flag, ns = line.chomp.split(/\t/).values_at(2, 4, 5, 8)
	next unless ns == 'P'
	next if flag.include? 'NOT'
	unless gi.include? gene
		unless seen.include? gene
			$stderr.puts("cannot map #{gene} in line")
			seen << gene
		end
		next
	end
	genes = gi[gene].default_id
	next unless sfs.include? gene

	annotations[gene] << category
end

annotated_sfs = annotations.keys

results = []
annotated_sfs.combination(2).each do |sf1, sf2|
	next if sf1.empty? && sf2.empty?
	go_sf1 = annotations[sf1]
	go_sf2 = annotations[sf2]
	jaccard = (go_sf1 & go_sf2).size.to_f / (go_sf1 | go_sf2).size

	results << [sf1, sf2, jaccard] if jaccard > 0
end	

results.sort_by(&:last).each { |r| puts r.join("\t") }
