#!/usr/bin/env ruby

require 'set'
require 'gene_info'

def self.filter_unknowns(genes, mapping)
	genes.delete_if do |gene|
		should_delete = !mapping.include?(gene)
		if( should_delete )
			STDERR.write("Unknown gene: #{gene}\n")
		end
		should_delete
	end
end

current_tissue = nil
expresion_folder = nil

fc = 10
current_tissue ||= "Tumor_lung"
expresion_folder ||= File.join(File.dirname(__FILE__), '../data')
regulators_filename = File.join(File.dirname(__FILE__), '../data_new/TfEnrGeneCancer.txt')

regulators = []
skip = true
IO.foreach(regulators_filename) do |line| 
	if skip
		skip = false
		next
	end
	name, tissue = line.chomp.split(/\t/).values_at(0, -2)
	regulators << name if tissue.downcase == current_tissue.downcase
end

STDERR.write("found #{regulators.size} regulators\n")

LogTh = Math.log(fc) / Math.log(10)

signature = []
skip = true
IO.foreach(File.join(expresion_folder, current_tissue)) do |line| 
	if skip
		skip = false
		next
	end

	gene, logfc  = line.chomp.split(/\t/).values_at(0,5)
	signature << gene if (logfc.to_f / LogTh).abs > 1
end

STDERR.write("found #{signature.size} differentially expressed genes\n")
if(signature.size > 100)
	STDERR.write("ABORTING - large signature\n")
	exit
end

gi = GeneInfo.for_species(GeneInfo::Human)

STDERR.write("Filtering regulators\n")
filter_unknowns(regulators, gi)
STDERR.write("Filtering signature\n")
filter_unknowns(signature, gi)

regulators.product(signature).each do |pair|
	puts pair.map{|i| gi[i].entrez_id}.join "\t"
end
