# %%
import pandas as pd
import numpy as np
from subfunctions import get_non_overlapping_total_length, read_and_parse_BED6

# %%
age_filters = snakemake.config["age_filters"]
te_classes = snakemake.config["major_te_classes"] # # major TE classes
classes_to_remove = snakemake.config["entries_to_remove"] # repeat classes to remove
size_filter = snakemake.config["size_filter"]

# %%
# read total genome and flexible sizes
with open(snakemake.input.totalsizes, 'r') as infile:
    total_flex_size, total_genome_size = [int(x) for x in infile.readline().split(' ')]

if str(snakemake.params[0]) == "flexible":
    total_size = total_flex_size
else:
    total_size = total_genome_size

# %%
# read BED files of TE in flexible genome and genomewide
df_bed = read_and_parse_BED6(snakemake.input.bed, classes_to_remove, size_filter)

# %%
try:
    perm = str(snakemake.wildcards.i)
except:
    perm = "default"

# define empty dataframes to be filled up
df_percent = pd.DataFrame(
    {'te_class': ['All Transposons', 'All Transposons except Unknown'] + te_classes, 'perm': perm}
)

# fill up across different ages and TE classes
for age_filter in age_filters:
    tmp_df = df_bed.query("age < @age_filter")
    te_lengths = []

    # all 
    te_lengths.append(get_non_overlapping_total_length(tmp_df))

    # all except unknown
    te_lengths.append(get_non_overlapping_total_length(tmp_df.query("te_class!='Unknown'")))

    # the other TE classes
    for te_class in te_classes:
        te_lengths.append(
            get_non_overlapping_total_length(tmp_df.query("te_class==@te_class"))
        )

    df_percent[f'age_{age_filter}'] = np.round(np.array(te_lengths) / total_size * 100, 4)

# %%
df_percent.to_csv(snakemake.output[0], index=False)