#!/usr/bin/env python

import sys
import graph_functions as gf
import networkx as nx

graph_file = sys.argv[1]
align_file=sys.argv[2]
chr_id = sys.argv[3]

belongs_to_component = {}
comp_len = {}
# we load the graph twice rather than re-implementing connected component search
G = nx.Graph()
gf.load_indirect_graph(graph_file, G)
comp = 0
for current_component in sorted(nx.connected_components(G), key=len, reverse=True):
    comp += 1

    for node in current_component:
        belongs_to_component[node] = comp
        comp_len[node] = len(current_component)

components = set()

with open(align_file) as f:
    for l in f:
        parts = l.strip().split('\t')
        idy = float(parts[-2].split(":")[-1])
        l   = int(parts[3])-int(parts[2])
        if idy >= 0.99 and l > 1000000 and parts[5] == chr_id:
            components.add(belongs_to_component[parts[0]])

sys.stderr.write("%s\t%d\n"%(components, len(components)))
