#!/usr/bin/env python3
import gzip

node_sizes = {}
node_sizes[''] = 0

print("chrom", "start", "end", "Allele", "length")
with open("gfa/node_sizes.csv") as f:
    for line in f:
        node, size = line.split()
        node_sizes[node] = int(size)

with gzip.open("mg_methyl.tsv.gz") as f:
    f.readline()
    for line in f:
        fields = line.decode("ascii").split()[:6]
        allele = fields[5]
        chrom = fields[0]
        start = int(fields[1])
        end = int(fields[2])
        if allele == '*':
            print(chrom, start, end, allele, -(end - start))
            continue

        nodes = allele.replace(">", "<").split("<")
        allele_size = 0
        for node in nodes:
            allele_size = allele_size + node_sizes[node]
        print(chrom, start, end, allele, allele_size)
