# functions for finding maximal directed acyclic graphs using networkx

import networkx
import sys
import random

# assumes input is a space-delimited list of links.  anything after the first two fields is ignored.
netfile = open(sys.argv[1])

min_arcset = int(sys.argv[2])

links = netfile.readlines()
netfile.close()
print 'read links'

outnet = open(sys.argv[1].replace('raw','dag'),'w')

# g is a graph, x is a node, all is a set- try to make it recursive
# recursion makes graph navigation straightforward
def all_predecessors(g,x,all):
	sub = set(g.predecessors(x))
	if sub.issubset(all):
		return all
	for suc in sub:
		if suc not in all:
			all.add(suc)
			all = all.union(all_predecessors(g,suc,all))
	return all

# basically same
def all_successors(g,x,all):
	sub = set(g.successors(x))
	if sub.issubset(all):
		return all
	for suc in sub:
		if suc not in all:
			all.add(suc)
			all = all.union(all_successors(g,suc,all))
	return all
# put this in a function to call for looking for different solutions
def feedback_arc_set(linklist,smallest):	
	num_edges = 0
	g = networkx.DiGraph()
	removed_edges = []
#	lowest = ''
	for link in linklist:
		#print linklist.index(link)
		if link.startswith('#'):
			continue
		nodes = link.strip().split()	
	
		node_tup = [(nodes[0],nodes[1])]
		# networkx graph objects are basically dictionaries, so .copy() method works
		h = g.copy()
		h.add_edges_from(node_tup)		
		source_pre = all_predecessors(h,nodes[0],set())
		source_suc = all_successors(h,nodes[0],set())
		if source_pre.intersection(source_suc):
			num_edges += 1
			print nodes[0:2]	#,source_pre, source_suc
			removed_edges.append(nodes[0:2])
			continue
		elif nodes[0] == nodes[1]:
			num_edges += 1
			removed_edges.append(nodes[0:2])
			print nodes[0:2]	#,source_pre, source_suc
			continue
		else:
			g = h
		if num_edges > smallest:
#			print 'too big'
			return [num_edges,g,removed_edges]
	#print num_edges
	if num_edges == 2:
			print '\n'.join([' '.join(badedge) for badedge in removed_edges])
#			failnets.write('edges\n'+'\n'.join([' '.join(badedge) for badedge in removed_edges])+'\n')
	return [num_edges,g,removed_edges]


#failnets = open('15edges_092414.txt','w')

#edgy = g.edges()
#edgy.sort()
#print len(edgy)
edgenums = []
#nets = []
lowest = ''
smallest = len(links)
for i in range(1000):
#    print i
    linklist = links
    random.shuffle(links)
    [num,net,nonedges] = feedback_arc_set(linklist,smallest)
    edgenums.append(num)
    smallest = min(edgenums)

    if num <= min_arcset:	# determined as a minimum set size from 1000 shuffles
#		print nonedges
		print num
		print 'smaller'
		g = net
		nonedges.sort()
		if num == min(edgenums):
			lowest = net

			for nonedge in nonedges:
				print ' '.join(nonedge)
		break
#	nets.append(net)
	# call feedback arc set whatever code
	# store final net in tuple with num links pruned
	#print edgeenums
    print num
#	print nonedges
if len(edgenums) > 0:
	print min(edgenums)
	if min(edgenums) > min_arcset:
		g = lowest
		print 'failed to reach defined minimal FAS of',min_arcset
# select net with fewest links pruned
#failnets.close()

# removed toposort function and step here

arcsetless_net = g.copy()

for i in list(networkx.generate_edgelist(arcsetless_net,data=False)):
	outnet.write(i+'\n')
	
outnet.close()	
# alternately can do an MCL cluster of the network first to rarefy to more self-contained networks,
# to avoid very large groupings in the sort output.  this would also probably get rid of some cycles.
