# %%
import pandas as pd
import re

df = pd.read_csv(snakemake.input[0])

# %%
df['ANNOTATION'] = df['ANNOTATION'].fillna("")

# %%
# impute rows without annotations based on previous entry
imputed_annotations = df['ANNOTATION'].copy()
for idx in range(1, len(imputed_annotations)):
    if imputed_annotations[idx] == "":
        imputed_annotations[idx] = imputed_annotations[idx - 1]

# %%
# split rows where a node is represented by two transposons
df['ANNOTATION_IMPUTE'] = [x.split(";") for x in imputed_annotations]
df = df.explode('ANNOTATION_IMPUTE').reset_index(drop=True)

# %%
# extract color info
df['COLOR'] = [re.search(r":\s*(.*)", x).group(1) for x in df['ANNOTATION_IMPUTE'] ]

# extract transposon name
idx_with_anno =(df['ANNOTATION'] != "")
df.loc[idx_with_anno, 'ANNOTATION'] = [
    re.search(r"(.*)\s*:", x).group(1) for x in df.loc[idx_with_anno, 'ANNOTATION_IMPUTE']
]

# %%
df[['NODE_ID', 'ANNOTATION', 'COLOR']].to_csv(snakemake.output[0], index=False)

# %%