#!/usr/bin/env ruby

require 'bsearch'

Exon = Struct.new(:name, :gene, :coord )

class Gene
	attr_accessor :name
	attr_accessor :coord

	def initialize(n, c)
		@name = n
		@coord = c
	end

	def inspect
		[@name.inspect, @coord.inspect].join ("|")
	end

	def contains?(c)
		@coord.contains?(c)
	end
end

class Coord
	attr_accessor :chromosome
	attr_accessor :strand
	attr_accessor :interval

	def initialize(chr, st, startp, endp)
		@chromosome = chr
		@strand = (st == '+') ? true : false
		@interval = (startp.to_i)..(endp.to_i)
	end

	def contains?(coord)
		return false if coord.strand != strand || coord.chromosome != chromosome
		
		@interval.include?(coord.interval.first) && @interval.include?(coord.interval.last)
	end

	def update(startp, endp)
		@interval = startp..@interval.last if startp < @interval.first
		@interval = @interval.first..endp if endp > @interval.last
	end

	def format
		[chromosome, interval.first, interval.last, strand ? '+' : '-']
	end
end

def read_interaction_file(file1, interactions, columns, *score_args)
	score_column = score_args.shift unless score_args.empty?
	cutoff = score_args.shift unless score_args.empty?

	header = true
	IO.foreach(file1) do |line| 
		if header	
			header = false
			next
		end
		records = line.chomp.split(/\t/)
		chr, start, stop, strand, castle_event_name, dror_id = records.values_at(*columns)

		next unless castle_event_name =~ /CASEX_|ININ|ALT3|ALT5/

		start = start.to_i
		stop = stop.to_i
	
		if block_given?
			next unless yield(records[score_column].to_i)
		else
			next if records[score_column].to_i > cutoff unless score_column.nil?
		end

		if castle_event_name =~ /ININ/
			candidates = @exons[chr].find_all { |exon| (exon.coord.interval.last - 100 <= start && exon.coord.interval.last + 100 >= start) || 
													   (exon.coord.interval.first - 100 <= stop && exon.coord.interval.first + 100 >= stop) }
		else
			candidates = @exons[chr].find_all { |exon| exon.coord.interval.first - 20 <= start && exon.coord.interval.last + 20 >= stop}
		end
	
		candidates.each do |candidate|
			interactions << [candidate, start, stop, "#{candidate.coord.chromosome}: #{start}-#{stop}", castle_event_name, dror_id]
		end	
	end
end

exons_filename = ARGV.shift unless ARGV.empty?
out_filename = ARGV.shift unless ARGV.empty?

exons_filename ||= File.join(File.dirname(__FILE__), '../data_jul_12/exons_hg18_names.txt' )
@exons = Hash.new { |hash, key| hash[key] = [] }
IO.foreach(exons_filename) do |line| 
	chr, start, stop, strand, gene, name = line.chomp.split(/\t/)
	@exons[chr] << Exon.new(name, gene, Coord.new(chr, strand, start, stop))
end

interactions = []

####################################################################### 
file1 = File.join(File.dirname(__FILE__), '../input/CasFullCoor200FlankAllFields110712.txt')
read_interaction_file(file1, interactions, [1, 8, 9, 0, 6, 7])

File.open(out_filename, 'w') do |f| 
	interactions.each do |i|
		exon, event_name = i.values_at(0, -2)
		f.puts [exon.name, event_name].join("\t")
	end	
end
