require 'rake/clean'
require 'gene_info'
require 'tempfile'
require 'peach'

Configuration = {
	"debug" => false,
	"windowsize" => 300,
	"refseq_filename" => File.join(File.dirname(__FILE__), '../input/refSeq_hg18.txt'),
	"exon_filename" => File.join(File.dirname(__FILE__), '../input/allExonsCast.txt'),
	"cancer_diff_genes" => File.join(File.dirname(__FILE__), '../input/cancerDiffAS.txt' ),
	"cancer_control_genes" => File.join(File.dirname(__FILE__), '../input/notCancerAS.txt' ),
	"network" => File.join(File.dirname(__FILE__), '../input/H_sapiens-ppi-pdna.net'),
	"network-ppi" => File.join(File.dirname(__FILE__), '../input/H_sapiens.net'),
	'alpha' => 0.25,
	'cores' => 15,
	'random_repeats' => 100
}

if File.exists?('config.json') 
	require 'json'
	$stderr.puts "Merging local configuration"
	Configuration.merge!(JSON.parse(IO.read('config.json')))
	$stderr.puts "Effective configuration #{Configuration.inspect}"
end

if Configuration.include?('include') && !Configuration['include'].nil?
	Configuration['include'] = [Configuration['include']] unless Configuration['include'].is_a? Enumerable
	Configuration['include'].each do |filename|
		$stderr.puts "Wording dir: #{Dir.getwd}"
		$stderr.puts "Including: #{filename}"
		load filename
	end	
	$stderr.puts "Effective configuration #{Configuration.inspect}"
end

Cores = Rake.application.options.dryrun ? 1 : Configuration['cores']

class Rake::Task
	attr_accessor :cores
	private
	def invoke_prerequisites(args, invocation_chain)
		@cores ||= Cores
		@prerequisites.peach(@cores) do |p|
			application[p].invoke_with_call_chain(args, invocation_chain)
		end
	end
end


ScriptFolder = File.dirname(__FILE__)
Steiner = File.join(File.dirname(__FILE__), '../anat')

def run_anat(terminals, anchor, output_filename)
	anat_input = Tempfile.new('set')
	
	IO.foreach(terminals) do |line| 
		anat_input.puts [anchor, line.chomp].join("\t")
	end
	anat_input.flush

	cmdline = "#{Steiner} -f . -n human.integrated.net -c 0 -b #{Configuration['alpha']} -s #{anat_input.path} -r #{output_filename} -l 0.75"

	$stderr.puts "RUNNING: #{cmdline}"
	IO.popen(cmdline) do |p|
		p.each {|l| $stderr.puts l}
	end

rescue
	$stderr.puts "!!! FAILED to run anat: " + (cmdline.nil? ? output_filename : cmdline)
ensure
	anat_input.close(true) unless anat_input.nil?
end

# CMD: ruby ../script/create_splicing_event_list.rb > ../data_jun_17/exons_hg18_names.txt
file_create 'exons_hg18_names.txt' do |t|
	sh %{#{RUBY} #{ScriptFolder}/create_splicing_event_list.rb #{Configuration['refseq_filename']} #{Configuration['exon_filename']} #{t.name}}
end

# ruby script/map_alternative_sp_microarray_id_to_our_exon_id.rb > data_jul_25/event_mapping.txt
file 'event_mapping.txt' => 'exons_hg18_names.txt' do |t|
	ruby %{#{ScriptFolder}/map_alternative_sp_microarray_id_to_our_exon_id.rb exons_hg18_names.txt #{t.name}}
end

file 'sf_interactions.txt' => 'exons_hg18_names.txt' do |t|
	sh %{ruby #{ScriptFolder}/create_exon_clip_intersect.rb exons_hg18_names.txt -w #{Configuration["windowsize"]} -f #{t.name} > dror_interactions.txt}
end

file 'sf_interactions.entrez.txt' => 'sf_interactions.txt' do |t|
	gi ||= GeneInfo.for_species(GeneInfo::Human)
	gi['HnRNPA1'] = gi['HNRNPA1']
	gi['HnRNPA2B1'] = gi['HNRNPA2B1']
	gi['hnRNPC'] = gi['HNRNPC']
	gi['HnRNPF'] = gi['HNRNPF']
	gi['HnRNPH'] = gi['HNRNPH1']
	gi['HnRNPM'] = gi['HNRNPM']
	gi['HnRNPU'] = gi['HNRNPU']
	gi['TDP43'] = gi['TARDBP']

	File.open(t.name, 'w') do |f| 
		IO.foreach('sf_interactions.txt') do |line| 
			from, to, confidence, flag = line.chomp.split(/\t/)

			from = gi[from].entrez_id if gi.include? from
			to = gi[to].entrez_id if gi.include? to

			f.puts [from, to, confidence, flag].join("\t")
		end
	end
end

file 'human.integrated.net' => 'sf_interactions.entrez.txt' do |t|
	sh %{cat #{Configuration['network']} #{File.join(File.dirname(__FILE__), '../input/y2h_interactions.txt')} sf_interactions.entrez.txt > #{t.name}}
end

file 'human-ppi.integrated.net' do |t|
	sh %{cat #{Configuration['network-ppi']} #{File.join(File.dirname(__FILE__), '../input/y2h_interactions.txt')} > #{t.name}}
end

file 'sf_interactions.xml' => 'sf_interactions.txt' do |t|
	ruby %{#{ScriptFolder}/interaction_to_bg_network.rb sf_interactions.txt #{t.name}}
end

file 'enriched_sfs.txt' => ['sf_interactions.txt', 'event_mapping.txt'] do |t|
	ruby %{#{ScriptFolder}/sf_enrichments.rb event_mapping.txt dror_interactions.txt #{t.name}}
end

#tissues = %w{A549 HCT116 SW480 Tumor_colon Tumor_lung} 
tissues = %w{Tumor_colon Tumor_lung} 
tissues = %w{Tumor_colon} 
controls = IO.readlines(Configuration["cancer_control_genes"]).map {|l| l.chomp.split(/\t/)[0].gsub("/", ".")}.uniq
controls.delete_if{ |o| o == "tissue"}

$stderr.puts "Tissues: #{tissues.join(' ')}"
$stderr.puts "Controls: #{controls.join(' ')}"

task :create_terminals_per_tissue => 'event_mapping.txt' do |t|
	sh %{#{ScriptFolder}/extract_splicing_terminals.r #{Configuration["cancer_diff_genes"]} event_mapping.txt ./} unless File.exists? 'Tumor_lung.terminals.txt'
end

directory 'random'

random_tissues = []

tissues.each do |tissue_name|
	terminals_filename = "#{tissue_name}.terminals.txt"

	file terminals_filename => ['event_mapping.txt'] do |t|
		Rake::Task[:create_terminals_per_tissue].invoke
	end

	random_tissue_name_tmp = "random_" + tissue_name + "_terminals_created.tag"
	file random_tissue_name_tmp => ['sf_interactions.txt', 'random', terminals_filename] do |t|
		ruby %{#{ScriptFolder}/generate_random_terminals.rb -r #{Configuration['random_repeats']} sf_interactions.txt #{terminals_filename} random}
		sh %{echo "DONE" > #{t.name}}
	end

	(0...Configuration['random_repeats']).map{|i| File.join('random', "random_#{i}_#{tissue_name}")}.each do |random_tissue_name|
		file "#{random_tissue_name}" => [random_tissue_name_tmp]
		random_tissues << random_tissue_name
	end	

end	

task :create_random_tissues => random_tissues

tissues = tissues + random_tissues

tfs = Set.new
IO.foreach(Configuration['network']) do |line| 
	from, to, conf, is_tf = line.chomp.split(/\t/)
	next unless is_tf == '1' && conf == '0.6'

	tfs << from
end

gi ||= GeneInfo.for_species(GeneInfo::Human)
tf_tasks = [] 
score_tasks = []
tissues.each do |tissue|
	tissue_tasks = []
	directory tissue 

	tissue_suffix = tissue + '.terminals.txt'
	terminals_filenames = [tissue_suffix]

	tfs.to_a.product(terminals_filenames) do |tf, terminals|
		tf_name = gi[tf.to_i].default_id if gi.include? tf.to_i
		tf_name ||= tf

		task_name = File.join(tissue, "#{tf_name}.#{File.basename(terminals,'.terminals.txt')}")
		file task_name => [tissue, 'human.integrated.net', terminals] do |t|
			run_anat(terminals, tf, t.name)
		end

		tissue_tasks << task_name
	end	

	tissue_score_filename = "#{tissue}_scores.txt"
	file tissue_score_filename => tissue_tasks do |t|
		ruby %{#{ScriptFolder}/score_network.rb #{tissue} > #{t.name} }
	end

	tissue_score_filename_diff = "#{tissue}_diff_exp.txt"
	file tissue_score_filename_diff => tissue_tasks do |t|
		ruby %{#{ScriptFolder}/extract_expression_diff.rb #{tissue} > #{t.name} }
	end

	tf_tasks << tissue_score_filename
	tf_tasks << tissue_score_filename_diff
	score_tasks << tissue_score_filename
	score_tasks << tissue_score_filename_diff
end	

spliceosome_core_filename = File.join(File.dirname(__FILE__), '../input/splicosome_core.txt')
file 'spliceosome_core.entrez.txt' => [spliceosome_core_filename] do |t|
	gi ||= GeneInfo.for_species(GeneInfo::Human)

	File.open(t.name, 'w') do |f| 
		IO.foreach(spliceosome_core_filename) do |line| 
			name = line.chomp

			name = gi[name].entrez_id if gi.include? name

			f.puts name
		end
	end
end

asf_only_filename = File.join(File.dirname(__FILE__), '../input/OnlyAlternativeSplicingFactors.txt')
file 'alternative_sf.entrez.txt' => [asf_only_filename] do |t|
	gi ||= GeneInfo.for_species(GeneInfo::Human)

	File.open(t.name, 'w') do |f| 
		IO.foreach(asf_only_filename) do |line| 
			name = line.chomp

			name = gi[name].entrez_id if gi.include? name

			f.puts name
		end
	end
end


file 'core.spliceosome.distances.txt' => ['human-ppi.integrated.net', 'spliceosome_core.entrez.txt'] do |t|
	sh %{bash -c 'source /home/bnet/atiasnir/mypy/bin/activate; python #{ScriptFolder}/ppi_extract_sf_distances.py human-ppi.integrated.net spliceosome_core.entrez.txt #{t.name}'}
end

file 'alt.sf.distances.txt' => ['human-ppi.integrated.net', 'alternative_sf.entrez.txt'] do |t|
	input_file = File.join(File.dirname(__FILE__), )
	sh %{bash -c 'source /home/bnet/atiasnir/mypy/bin/activate; python #{ScriptFolder}/ppi_extract_sf_distances.py human-ppi.integrated.net alternative_sf.entrez.txt #{t.name}'}
end

file 'ppi.distances.png' => %w{core.spliceosome.distances.txt alt.sf.distances.txt} do |t|
	sh %{#{ScriptFolder}/sf_distances_graph.r core.spliceosome.distances.txt alt.sf.distances.txt #{t.name}}
end

pvalue_tasks = []
%w{Tumor_lung_scores.txt Tumor_colon_scores.txt}.each do |tissue|
	filename = File.basename(tissue, ".txt") + "_pvalue.txt"
	file filename => score_tasks do |t|
		ruby %{#{ScriptFolder}/evaluate_randoms_score.rb #{tissue} > #{t.name}}
	end
	pvalue_tasks << filename
end

task :score_networks => pvalue_tasks
task :default => ['sf_interactions.xml', 'enriched_sfs.txt', 'ppi.distances.png'] + tf_tasks
