# %%
import re
import pandas as pd
import natsort as ns

# %%
# import dataframe of segment information
df_segments = pd.read_csv(snakemake.input[0])
df_segments['segment'] = pd.Categorical(df_segments['segment'], 
    ordered=True, categories=ns.natsorted(df_segments['segment'].unique())
)

# %%
df_out = (df_segments
    .groupby("species")
    .agg({"length": 'sum', "rank": 'mean', 'segment': 'count'})
    .rename(columns={'length': 'n_bases', 'segment': 'n_segment'})
    .reset_index()
    .assign(rank = lambda dataframe: dataframe['rank'].astype(int))
    .sort_values(by = 'rank')
)

# %%
df_out.to_csv(snakemake.output[0], index=False)

# %%