#!/usr/bin/env ruby

require 'gene_info'
require 'returning'
require 'mean'
require 'set'
require 'thread'
require 'peach'

def total_network_size(filename, anchor_name)
	total = 0
	IO.foreach(filename) do |line| 
		confidence = line.chomp.split(/\s/)[2]
		total += -Math.log(confidence.to_f)
	end

	total
end

def read_network(filename)
	returning(Hash.new { |hash, key| hash[key] = {} }) do |network|
		IO.foreach(filename) do |line| 
			from, to, confidence = line.chomp.split(/\s+/).first(3)
			confidence = 1
			network[from][to] = confidence
			network[to][from] = confidence
		end
	end
end

def shortest_paths(from, network)
	returning(Hash.new(Float::INFINITY)) do |distances|
		distances[from] = 0
		queue = network.keys

		until queue.empty? 
			u = queue.min_by {|v| distances[v] }
			queue.delete(u)

			break if u == Float::INFINITY

			network[u].each do |v, d|
				alt = distances[u] + d
				distances[v] = alt if alt < distances[v] 
			end	
		end
	end
end

def max_distance_from_anchor(network, filename, anchor_name)
	shortest_paths(anchor_name, network).values.max
end

def mean_distance_from_anchor(network, filename, anchor_name)
	shortest_paths(anchor_name, network).values.mean
end

def num_nodes(network, filename, anchor_name)
	return network.count {|k,v| @gi.include?(k.to_i) } # remove terminals (these would have invalid gene names)
	# return network.size - terminals
end

def num_edges(network, filename, anchor_name)
	network.map {|k,v| @gi.include?(k.to_i) ? v.count{|i,j| @gi.include?(i.to_i)} : 0 }.reduce(:+) / 2
	# #require 'debugger'; debugger
	# terminals = network.count {|k,v| !@gi.include?(k.to_i) } # remove terminals (these would have invalid gene names)
	# network.map{|k,v| v.delete_if{|i| !@gi.include?(i.to_i)}; v.size}.reduce(:+) / 2	
end

def network_expression(network, filename, anchor_name)
	# require 'debugger'; debugger
	anchor_name = (@gi.include?(anchor_name.to_i) ? @gi[anchor_name.to_i].default_id : anchor_name)
	genes = Set.new
	IO.foreach(filename) do |line| 
		from, to = line.chomp.split(/\s/).first(2).map(&:to_i)
		genes << (@gi.include?(from) ? @gi[from].default_id : from) if from != 0
		genes << (@gi.include?(to) ? @gi[to].default_id : to) if to != 0
	end

	#tissue = filename.match(/_([^_]+)$/)[1].downcase + 'diff'
	tissue = 'colondiff'
	score = 0.0
	count = 0
	genes.each do |g|
		#	puts [g, anchor_name, g==anchor_name].map(&:to_s).join("\t")
		next if g == anchor_name
		if @expression.include?(g) && !@expression[g][tissue].nil?
			score += @expression[g][tissue].abs 
			count += 1
		end
	end	
	return score # / count
end

ScoringMethods = [:max_distance_from_anchor, :mean_distance_from_anchor, :num_nodes, :num_edges, :network_expression]
require 'optparse'

terminals_filename = nil
OptionParser.new do |opts|
	opts.banner = "Usage: #{File.basename($PROGRAM_NAME)} folder"

	opts.on( '-t', '--terminals FILENAME', String,
	         'terminals filename (default: <folder>.terminals.txt') do |opt|
		terminals_filename = opt
	end
	
end.parse!

folder_name = ARGV.shift unless ARGV.empty?
terminals_filename ||= "#{folder_name}.terminals.txt"

@gi = GeneInfo.for_species(GeneInfo::Human)

terminals = Set.new
IO.foreach(terminals_filename) do |line| 
	name = line.chomp 
	name = @gi[name].default_id if @gi.include? name

	terminals << name
end

expression_filename = File.join(File.dirname(__FILE__), '../data_jun_9_13/expression.diff.txt')
first = true;
@expression = Hash.new { |hash, key| hash[key] = {} }
titles = []
IO.foreach(expression_filename) do |line| 
	arr = line.chomp.split(/\t/)
	if first
		titles = arr[0...-1].map(&:downcase)
		first = false
	else 
		gene = arr.pop
		gene = @gi[gene].default_id if @gi.include? gene
		@expression[gene] = Hash[titles.zip(arr).select{|k,v| v != 'NA'}.map {|k,v| [k,v.to_f]}]
	end
end

pubmed_filename = '/home/bnet/atiasnir/anat/AnatData/H_sapiens/H_sapiens-ppi-pdna.eat'
pubmed = Hash.new { |hash, key| hash[key] = {} }
IO.foreach(pubmed_filename) do |line| 
	from, to, ids = line.chomp.split(/\t/)
	pubmed[from][to] = ids
	pubmed[to][from] = ids
end

#require 'debugger'; Debugger.start(:post_mortem => true)
# scores = Hash.new {|h,k| h[k] = Hash.new {|h,k| h[k] = {}}}
scores = Hash.new {|h,k| h[k] = []}
scores_mutex = Mutex.new
Dir["#{folder_name}/rnd_*/random_*/*.net"].peach(20) do |filename|
	next if filename =~ /.msg$/ || filename =~ /.freq$/ || filename =~ /.xgmml$/

	anchor_name, control, tissue = File.basename(filename).split('.')
	anchor_symbol = anchor_name
	if tissue.nil?
		tissue = control
		control = "None"
	end

	# $stderr.puts [anchor_name, control, tissue].join("|")

	# anchor_name = File.basename(filename)
	anchor_name = anchor_name.to_i
	anchor_symbol = @gi[anchor_name].name if @gi.include? anchor_name


#	score = scores[File.basename(filename)] = score_network(filename, anchor_name)
	# require 'debugger'; debugger
	network = read_network(filename)
	network_scoring = [anchor_symbol, control] + ScoringMethods.map { |m| self.send(m, network, filename, anchor_name.to_s) }

	scores_mutex.synchronize {
		scores[anchor_name] << network_scoring
	}

	# $stderr.puts filename
	#File.open(filename + '.xgmml', 'w') do |f| 
	#	anat_to_xgmml(filename, @gi[anchor_name].default_id, terminals, @gi, 0, pubmed).write_xml_to(f)
	#end unless File.exists? filename + '.xgmml'
end	

# require "debugger"; debugger

puts [['anchor', 'control'] + ScoringMethods].join("\t")
scores.to_a.sort_by {|k,v| v}.each do |info|
	info[1].each do |item|	 
		puts item.join("\t")
	end
end	

