import pysam
import gzip
import sys

def revcomp(s):
    return s.translate(str.maketrans("actgACTG", "tgacTGAC"))[::-1]

def parse_allele(s):
    node_pos = 1
    nodes = []
    while node_pos > -1:
        next_node_pos = s.find("s", node_pos + 1)
        node_name = None
        if next_node_pos == -1:
            node_name = s[node_pos:]
        else:
            node_name = s[node_pos:next_node_pos - 1]
        nodes.append((s[node_pos - 1], node_name))
        node_pos = next_node_pos
    return(nodes)

nodes_fasta = pysam.FastaFile("gfa/graph_xaf.fa")

def concat_seq(node):
    node_seq = nodes_fasta.fetch(node[1])
    if node[0] == "<":
        node_seq = revcomp(node_seq)
    return(node_seq)

i = 0
skipped = 0
allele_names_index = open("data/qtl_alleles_index.tsv", "w")
with gzip.open("data/qtl_alleles.tsv.gz") as svs:
    svs.readline()
    for sv in svs:
        fields = sv.decode().split()
        allele = fields[0]
        if allele == '*':
            sys.stderr.write("Skipping " + allele + " " + str(skipped) + "\n")
            skipped = skipped + 1
            continue
        nodes = parse_allele(allele)

        allele_name = str(i)
        allele_seq = "".join(map(concat_seq, nodes))

        i = i + 1
        allele_names_index.write(allele_name + "," + allele + "\n")
        print(">" + allele_name)
        print(allele_seq)

allele_names_index.close()
nodes_fasta.close()
