# may need to changer directories before running

# %%
from pathlib import Path
REPO_DIR = Path(__file__).resolve().parent.parent
IN_DIR = REPO_DIR.joinpath('out/repmask_fig/')

sample_list = ['astCal', 'aulStu', 'mayZeb', 'rhaChi', 'troMau', 'oreKar']


# %%
import pandas as pd
df = None
for sample in sample_list:
    filepath = IN_DIR.joinpath(f'counts_{sample}.csv')
    assert filepath.is_file()
    df_current = (
        pd.read_csv(filepath)
          .rename(columns = {
            'sample': f'{sample}',
            'sample_scaled': f'{sample}_scaled',
            'control': f'{sample}_control'})
    )          
    df_current = pd.melt(df_current, id_vars='repeat_class', 
        var_name='species', value_name='percent_genome')
    if df is None:
        df = df_current
    else:
        df = df.append(df_current)
    del df_current

# %%
# included_species = [f'{x}_scaled' for x in sample_list]
# included_species = [f'{x}_scaled' for x in sample_list[:-1]]
included_species = sample_list[:-1]

import plotly.express as px
fig = px.bar(df.query('species.isin(@included_species) & repeat_class!="RC"'),
    x='species', y='percent_genome', color='repeat_class',
    barmode='group', template='seaborn',
    color_discrete_map={
        'Unknown'    :'#C4C4C4',
        'Other'      :'#4D4D4D',
        'DNA'        :'#FFBAA9',
        'LINE'       :'#98ABE6',
        'LTR'        :'#65C465',
        'Retroposon' :'#FF9500',
        'RC'         :'#FF70FF',
        'SINE'       :'#D2ADF7',
        'Total'      :'#fcba03'
    })
fig.update_layout(
    title=f'Young TE signatures (perc_div < 2, est. after ~20mya)',
    xaxis_title="species",
    yaxis_title="percentage of genome size",
    legend_title="repeat class",
    yaxis_range=[0,6]
)
fig.show()
# fig.write_html('counts.html')

# %%
