#!/usr/bin/env ruby

require 'set'
require 'gene_info'

additional_interactions_filename = File.join(File.dirname(__FILE__), '../data_nov_1/Luhrmann/TableS3.txt')
anat_network_filename = '/home/bnet/atiasnir/anat/AnatData/H_sapiens/H_sapiens.net'

gi = GeneInfo.for_species(GeneInfo::Human)

ppi = Hash.new { |hash, key| hash[key] = {} }
ppi_count = 0
IO.foreach(anat_network_filename) do |line| 
	source, target, confidence = line.chomp.split(/\t/).first(3)

	from = source.to_i
	to = target.to_i
	confidence = confidence.to_f

	$stderr.puts "Cannot map Entrez ID: #{from} (#{source})" unless gi.include? from
	$stderr.puts "Cannot map Entrez ID: #{to} (#{target})" unless gi.include? to

	ppi[from][to] = confidence
	ppi[to][from] = confidence

	ppi_count += 1
end
$stderr.puts "#{ppi_count} PPIs loaded from #{anat_network_filename}"


additional = Set.new
header = true
IO.foreach(additional_interactions_filename) do |line| 
	from, to = line.chomp.split(/\t/).values_at(2,5)

	if header
		header = false
		next
	end

	next if from == to

	unless gi.include? from
		$stderr.puts "Cannot map name: #{from}" 
		next
	end

	unless gi.include? to
		$stderr.puts "Cannot map name: #{to}" 
		next
	end

	from = gi[from].entrez_id
	to = gi[to].entrez_id

	additional << [from, to]
end
$stderr.puts "#{additional.size} PPIs loaded from #{additional_interactions_filename}"

known, unknown = additional.to_a.partition { |from, to| ppi[from].include? to }

$stderr.puts "#{known.size} known PPIs"
$stderr.puts "#{unknown.size} unknown PPIs"

average = known.map {|from,to| ppi[from][to]}.sort.reduce(:+)  / known.size.to_f
$stderr.puts "Average weight of known PPIs: #{average}"

unknown.each do |from, to|
	puts [from, to, average, 0].join("\t")
end	
