#!/usr/bin/env ruby

require 'set'
require 'optparse'

out_filename = nil
OptionParser.new do |opts|
	opts.banner = "Usage: #{File.basename($PROGRAM_NAME)} -f output_filename"
	
	opts.on( '-f [FILENAME]', '--filename', String,
	         'Output filename') do |opt|
		out_filename = opt
	end
end.parse!

Exon = Struct.new(:name, :gene, :coord )

class Gene
	attr_accessor :name
	attr_accessor :coord

	def initialize(n, c)
		@name = n
		@coord = c
	end

	def inspect
		[@name.inspect, @coord.inspect].join ("|")
	end

	def contains?(c)
		@coord.contains?(c)
	end
end

class Coord
	attr_accessor :chromosome
	attr_accessor :strand
	attr_accessor :interval

	def initialize(chr, st, startp, endp)
		@chromosome = chr
		@strand = (st == '+') ? true : false
		@interval = (startp.to_i)..(endp.to_i)
	end

	def contains?(coord)
		return false if coord.strand != strand || coord.chromosome != chromosome
		
		@interval.include?(coord.interval.first) && @interval.include?(coord.interval.last)
	end

	def update(startp, endp)
		@interval = startp..@interval.last if startp < @interval.first
		@interval = @interval.first..endp if endp > @interval.last
	end

	def format
		[chromosome, interval.first, interval.last, strand ? '+' : '-']
	end
end

def read_interaction_file(file1, sf, interactions, columns, *score_args)
	score_column = score_args.shift unless score_args.empty?
	cutoff = score_args.shift unless score_args.empty?

	header = true
	IO.foreach(file1) do |line| 
		if header	
			header = false
			next
		end
		records = line.chomp.split(/\t/)
		chr, start, stop, strand, symbol, score = records.values_at(*columns)
		start = start.to_i
		stop = stop.to_i
	
		if block_given?
			next unless yield(records[score_column].to_i)
		else
			next if records[score_column].to_i > cutoff unless score_column.nil?
		end

		candidates = @exons[chr].find_all { |exon| exon.coord.interval.first - 300 <= start && exon.coord.interval.last + 300 >= stop}
	
		candidates.each do |candidate|
			if sf.is_a? Fixnum
				interactions << [records[sf], candidate, start, stop, symbol]
			else
				interactions << [sf, candidate, start, stop, symbol]
			end
		end	
	end
end

castle_filename = File.join(File.dirname(__FILE__), '../data_jul_12/CasFullCoor200FlankAllFields110712.txt')
@exons = Hash.new { |hash, key| hash[key] = [] }
IO.foreach(castle_filename) do |line| 
	chr, start, stop, strand, name = line.chomp.split(/\t/).values_at(1, 8, 9, 0, 6)
	next unless name =~ /CASEX_|ININ|ALT3|ALT5/
	@exons[chr] << Exon.new(name, nil, Coord.new(chr, strand, start, stop))
end


interactions = Set.new

####################################################################### 
file1 = File.join(File.dirname(__FILE__), '../data_jun_5/SFRS1clip.txt')
read_interaction_file(file1, 'SFRS1', interactions, [0,1,2,3,-2])

####################################################################### 
file2 = File.join(File.dirname(__FILE__), '../data_jun_5/PTBP1clip.txt' )
#read_interaction_file(file2, 'PTBP1', interactions, [0,1,2,3,-2], 5, 18)
read_interaction_file(file2, 'PTBP1', interactions, [0,1,2,3,-2])

####################################################################### 
file3 = File.join(File.dirname(__FILE__), '../data_jun_5/RBM9clip.txt')
#read_interaction_file(file3, 'RBM9', interactions, [0,1,2,3,-3], -2, 275.8)
read_interaction_file(file3, 'RBM9', interactions, [0,1,2,3,-3])

####################################################################### 
file4 = File.join(File.dirname(__FILE__), '../data_jun_5/doRiNA240512.txt')
#read_interaction_file(file4, 0, interactions, [3,4,5,6,1], -2, 95) {|score| score <= 5}
read_interaction_file(file4, 0, interactions, [3,4,5,6,1])

####################################################################### 

resultset = Set.new
interactions.each do |i|
	splicing_factor = i[0]
	exon = i[1]
	
	resultset << [exon.name, splicing_factor]
end	

resultset.to_a.uniq.sort.each {|a| puts a.join("\t")}
