#!/usr/bin/env ruby

require 'fileutils'

require 'tempfile'
require 'gene_info'
require 'optparse'
require 'set'
require 'nokogiri'
require 'returning'

def change_extension(filename, newext)
	base = File.basename(filename).partition('.').first
	File.join(File.dirname(filename), base + '.' + newext)
end

def edge_name(from, to)
	(from < to) ? "#{from}_#{to}" : "#{to}_#{from}"
end

def add_edge(edges, from, to, confidence, gi)
	#$stderr.puts [from, to].join(" ")
	from = gi[from.to_i].default_id if gi.include?(from.to_i)
	to = gi[to.to_i].default_id if gi.include?(to.to_i)
	returning edge_name(from, to) do |edge_str|
		edges[edge_str]['source'] = from
		edges[edge_str]['target'] = to
		edges[edge_str]['confidence'] = confidence
	end
end

def add_node(nodes, terminals, anchor, gi, from)
	from = gi[from.to_i].default_id if gi.include?(from.to_i)
	nodes[from]["type"] = 'terminal' if terminals.include?(from)
	nodes[from]["type"] = 'anchor' if from == anchor
	nodes[from]["name"] = from
end

gi = GeneInfo.for_species(GeneInfo::Human)

# read gene set
def read_gene_set(filename, gi)
	returning Set.new do |geneset|
		IO.foreach(filename) do |line| 
			terminal_name = line.chomp
			geneset << terminal_name
		end
	end
end

ARGV.each do |foldername|
	tissue_name = File.basename(foldername)

	$stderr.puts "Tissue " + tissue_name
	Dir[File.join(foldername, "*")].each do |filename|
		next unless filename =~ /\.#{tissue_name}$/

		$stderr.puts "Processing " + filename

		names = File.basename(filename).split('.')
		parent_folder = File.join(foldername, '..')
		terminals_filename =  File.join(parent_folder, names[1..-1].join('.') + '.terminals.txt')

		terminals = read_gene_set(terminals_filename, gi)
		anchor = names[0]

		nodes = Hash.new { |hash, key| hash[key] = {} }
		edges = Hash.new { |hash, key| hash[key] = {} }

		IO.foreach(filename) do |line| 
			from, to, confidence = line.chomp.split(/\s+/).first(3)
		
			add_node(nodes, terminals, anchor, gi, from)
			add_node(nodes, terminals, anchor, gi, to)
		
			add_edge(edges, from, to, confidence, gi)
		end

		doc = Nokogiri::XML::Builder.new do |xml|
			label = defined?(out_filename) ? File.basename(out_filename) : 'mock'
			xml.graph('xmlns' => 'http://www.cs.rpi.edu/XGMML', 'label' => label, 'directed' => '0') {
				nodes.each do |id, attr|
					xml.node(:id => id, :label => id) {
						xml.att :name => 'type', :type => 'string', :value => attr['type']
						xml.att(:name => 'name', :type => 'string', :value => attr['name']) if attr.include? 'name'
						case attr['type']
						when 'anchor'
							xml.graphics :type => "ELLIPSE", :h => "40.0", :w => "40.0", :fill => "#9999FF", :width => "1", :outline => "#666666"
						when 'terminal'
							xml.graphics :type => "ELLIPSE", :h => "40.0", :w => "40.0", :fill => "#00CCFF", :width => "1", :outline => "#666666"
						else
							xml.graphics :type => "ELLIPSE", :h => "40.0", :w => "40.0", :fill => "#CCCCCC", :width => "1", :outline => "#666666"
						end
					}
				end	
			
				edges.each do |id, attr|
					source = attr.delete("source")
					target = attr.delete("target")
					xml.edge(:source => source,
						:target => target,
						:label => "#{source} (pp) #{target}",
						:id => "#{source} (pp) #{target}") {
						xml.att(:name => 'interaction',
								:type => 'string', 
								:value => 'pp') 
						xml.att(:name => 'anat',
								:type => 'real', 
								:value => 1) if attr['anat']
						xml.att(:name => 'confidence',
								:type => 'real', 
								:value => attr['confidence'])
						xml.graphics :fill => "#000000", :width => (attr['confidence'].to_f * 10).to_i
					}
				end
			}
		end.doc

		doc.write_xml_to(File.open(filename + ".xgmml", "w"))
	end	
end	
