#!/usr/bin/env ruby

require 'gene_info'
require 'returning'
require 'nokogiri'
require 'mean'
require 'set'

def network_expression(filename)
	# require 'debugger'; debugger
	genes = Set.new
	IO.foreach(filename) do |line| 
		from, to = line.chomp.split(/\s/).first(2).map &:to_i
		next if from ==0 || to == 0
		genes << (@gi.include?(from) ? @gi[from].default_id : from)
		genes << (@gi.include?(to) ? @gi[to].default_id : to)
	end

	tissue = File.basename(filename).split(/\./)[1]
	score = 0.0
	count = 0
	genes.each do |g|
		if @expression.include? g
			score += @expression[g][tissue].abs 
			count += 1
		end
	end	
	return score / count
end

folder_name = ARGV.shift unless ARGV.empty?
condition_name = ARGV.shift unless ARGV.empty?

@gi = GeneInfo.for_species(GeneInfo::Human)

terminals_filename = "#{folder_name}.terminals.txt"
terminals = Set.new
IO.foreach(terminals_filename) do |line| 
	name = line.chomp 
	name = @gi[name].default_id if @gi.include? name

	terminals << name
end

expression_filename = '../data_jun_9_13/expression.txt'
first = true;
@expression = Hash.new { |hash, key| hash[key] = {} }
titles = []
IO.foreach(expression_filename) do |line| 
	arr = line.chomp.split(/\t/)
	if first
		titles = arr[0...-1]
		first = false
	else 
		gene = arr.pop
		gene = @gi[gene].default_id if @gi.include? gene
		@expression[gene] = Hash[titles.zip(arr).select{|k,v| v != 'NA'}.map {|k,v| [k,v.to_f]}]
	end
end

#pubmed_filename = '/home/bnet/atiasnir/anat/AnatData/H_sapiens/H_sapiens-ppi-pdna.eat'
#pubmed = Hash.new { |hash, key| hash[key] = {} }
#IO.foreach(pubmed_filename) do |line| 
#	from, to, ids = line.chomp.split(/\t/)
#	pubmed[from][to] = ids
#	pubmed[to][from] = ids
#end

# scores = Hash.new {|h,k| h[k] = Hash.new {|h,k| h[k] = {}}}
scores = Hash.new {|h,k| h[k] = []}
Dir["#{folder_name}/*"].each do |filename|
	#next if filename =~ /.msg$/ || filename =~ /.freq$/ || filename =~ /.xgmml$/
	next unless filename =~ /\.#{condition}\.#{folder_name}$/

	arr = filename.split('.')

	tissue_filename = arr.values_at(0,-1).join('.')
	condition_filename = filename

	anchor_name, control, tissue = File.basename(filename).split('.')
	anchor_symbol = anchor_name
	if tissue.nil?
		tissue = control
		control = "None"
	end

	# $stderr.puts [anchor_name, control, tissue].join("|")

	# anchor_name = File.basename(filename)
	anchor_name = @gi[anchor_name].entrez_id if @gi.include? anchor_name

#	score = scores[File.basename(filename)] = score_network(filename, anchor_name)
	anchor_scores = scores[anchor_name] << [anchor_symbol, control] + ScoringMethods.map { |m| self.send(m, filename, anchor_name.to_s) }

	# $stderr.puts filename
	# File.open(filename + '.xgmml', 'w') do |f| 
	# 	anat_to_xgmml(filename, gi[anchor_name].default_id, terminals, gi, 0, pubmed).write_xml_to(f)
	# end unless File.exists? filename + '.xgmml'
end	

# require "debugger"; debugger

puts [['anchor', 'control'] + ScoringMethods].join("\t")
scores.to_a.sort_by {|k,v| v}.each do |info|
	info[1].each do |item|	 
		puts item.join("\t")
	end
end	

