#  bedtools intersect -loj -a bubbles.bed -b CHM13.v2.0.gff3 > bubbles_liftoff.bed
import re

annot = open("gfa/bubbles_liftoff.bed", "r")

node_gene_annot = dict()
node_bubble_annot = dict()

gene_re = re.compile(r"gene_name=(.*?);")

for line in annot:
    fields = line.split()

    if fields[16] != "gene":
        continue

    nodes = fields[11].split(",")
    genes = re.findall(gene_re, fields[22])
    for node in nodes:
        if node not in node_gene_annot:
            node_gene_annot[node] = set()
        if node not in node_bubble_annot:
            node_bubble_annot[node] = set()

        node_gene_annot[node] |= set(genes)
        node_bubble_annot[node].add(fields[0] + ":" + fields[1] + "-" + fields[2])

# for node in node_gene_annot:
#     for gene in node_gene_annot[node]:
#         print(node, gene)

for node in node_bubble_annot:
    for bubble in node_bubble_annot[node]:
        print(node, bubble)
