import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import datetime

from Bio import SeqIO
import csv
import os
import pandas as pd
import numpy as np

from padmet.classes import PadmetSpec
from cobra.io.sbml import read_sbml_model


def parse_genome(genome_path):
    genome_ecs = []
    for record in SeqIO.parse(genome_path, 'genbank'):
        for feature in record.features:
            for qualifier in feature.qualifiers:
                if qualifier == 'EC_number':
                    genome_ecs.extend(feature.qualifiers[qualifier])

    return len(genome_ecs), len(set(genome_ecs))

def parse_padmet(padmet_path):
    padmetSpec = PadmetSpec(padmet_path)

    ecs = [ec for node in padmetSpec.dicOfNode.values() if node.type == "reaction" and 'EC-NUMBER' in node.misc for ec in node.misc['EC-NUMBER']]

    reactions = [node for node in padmetSpec.dicOfNode.values() if node.type == "reaction"]
    reactions_with_genes = []
    reactions_without_genes = []
    for rxn_node in reactions:
        if any([rlt for rlt in padmetSpec.dicOfRelationIn[rxn_node.id] if rlt.type == "is_linked_to"]):
            reactions_with_genes.append(rxn_node.id)
        else:
            if 'SPONTANEOUS' not in rxn_node.misc:
                reactions_without_genes.append(rxn_node.id)
    spontaneous_reactions = [node.id for node in padmetSpec.dicOfNode.values() if node.type == "reaction" and 'SPONTANEOUS' in node.misc]

    all_rxns = [node for node in padmetSpec.dicOfNode.values() if node.type == "reaction"]
    total_pwy_id = set()
    for rxn_node in all_rxns:
        # Get all pathways having at least a reaction. Remove superpathways containing only pathways.
        pathways_ids = set([rlt.id_out for rlt in padmetSpec.dicOfRelationIn[rxn_node.id] if rlt.type == "is_in_pathway"])
        total_pwy_id.update(pathways_ids)

    return [len(ecs), len(set(ecs)), len(reactions), len(reactions_with_genes), len(reactions_without_genes), len(spontaneous_reactions)]

def parse_carveme_model(sbml_path):
    ecs = []
    sbml_1 = read_sbml_model(sbml_path)
    reactions_with_genes = []
    reactions_without_genes = []
    spontaneous_reactions = []
    for reaction in sbml_1.reactions:
        genes = [i.id for i in reaction.genes]
        if len(genes) > 0 and 'spontaneous' not in genes:
            reactions_with_genes.append(reaction.id)
        if len(genes) == 0:
            reactions_without_genes.append(reaction.id)
        if 'spontaneous' in genes:
            spontaneous_reactions.append(reaction.id)
        if 'ec-code' in reaction.annotation:
            if isinstance(reaction.annotation['ec-code'], list):
                ecs.extend(reaction.annotation['ec-code'])
            elif isinstance(reaction.annotation['ec-code'], str):
                ecs.append(reaction.annotation['ec-code'])

    return [len(ecs), len(set(ecs)), len(sbml_1.reactions), len(reactions_with_genes), len(reactions_without_genes), len(spontaneous_reactions)]


def parse_modelseed_model(sbml_path, tsv_reactions_file, modelseed_mapping_file):
    df_modelseed_mapping = pd.read_csv(modelseed_mapping_file, sep='\t')
    df_modelseed_mapping.set_index('ModelSEED ID', inplace=True)
    modelseed_mapping_ecs = df_modelseed_mapping['External ID'].to_dict()

    ecs = []
    """
    for record in SeqIO.parse('KBase_derived_K_12_MG_1655_annot.gbff', 'genbank'):
        for feature in record.features:
            for qualifier in feature.qualifiers:
                if qualifier == 'EC_number':
                    ecs.extend(feature.qualifiers[qualifier])
    """
    sbml_1 = read_sbml_model(sbml_path)
    reactions_with_genes = []
    reactions_without_genes = []
    for reaction in sbml_1.reactions:
        genes = [i.id for i in reaction.genes]
        if len(genes) > 0 and 'spontaneous' not in genes:
            reactions_with_genes.append(reaction.id)
        if len(genes) == 0:
            reactions_without_genes.append(reaction.id)

    df_modelseed = pd.read_csv(tsv_reactions_file, sep='\t')
    df_modelseed = df_modelseed.replace(np.nan, '')
    modelseed_ecs = [ec for ecs in df_modelseed['enzyme'] if ecs != '' for ec in ecs.split('|')]
    modelseed_pathways = [pathway for pathways in df_modelseed['metacyc pathways'] if pathways != '' for pathway in pathways.split('|')]

    modelseed_rxns_ecs = [modelseed_mapping_ecs[reaction.id.split('_')[0]] for reaction in sbml_1.reactions if reaction.id.split('_')[0] in modelseed_mapping_ecs]
    modelseed_ecs += modelseed_rxns_ecs
    return [len(modelseed_ecs), len(set(modelseed_ecs)), len(sbml_1.reactions), len(reactions_with_genes), len(reactions_without_genes), '']

def parse_eggnog_mapper(eggnog_file):
    df_eggnog = pd.read_csv(eggnog_file, sep='\t', comment='#')
    df_eggnog = df_eggnog.replace(np.nan, '')
    eggnog_ecs = [ec for all_ec in df_eggnog['EC'].str.split(',') for ec in all_ec if ec != '']

    return len(eggnog_ecs), len(set(ecs))


def parse_gapseq_model(gapseq_sbml, gapseq_reactions_file, gapseq_pathways_file):
    sbml_1 = read_sbml_model(gapseq_sbml)
    reactions_with_genes = []
    reactions_without_genes = []
    all_reactions = []
    ecs = []
    for reaction in sbml_1.reactions:
        genes = [i.id for i in reaction.genes]
        if 'reaction.id' != 'biol':
            if len(genes) > 0 and 'spontaneous' not in genes:
                reactions_with_genes.append(reaction.id)
            if len(genes) == 0:
                reactions_without_genes.append(reaction.id)
            if 'ec-code' in reaction.annotation:
                if isinstance(reaction.annotation['ec-code'], list):
                    ecs.extend(reaction.annotation['ec-code'])
                else:
                    ecs.append(reaction.annotation['ec-code'])
            all_reactions.append(reaction.id)

    exhcanges_rxns = []
    reactions_enz = []
    for rxn in all_reactions:
        if 'biol' != rxn:
            if 'EX' in rxn:
                exhcanges_rxns.append(rxn)
            else:
                reactions_enz.append(rxn.split('_')[0])
    df_gapseq = pd.read_csv(gapseq_reactions_file, sep='\t', comment='#')
    keep_index = []

    for index, row in df_gapseq.iterrows():
        if len(set(row['dbhit'].split(' ')).intersection(set(reactions_enz))) > 0:
            keep_index.append(index)
    df_gapseq = df_gapseq.iloc[keep_index]
    df_gapseq = df_gapseq.replace(np.nan, '')
    gapseq_ecs = [ec for ecs in df_gapseq['ec'].str.split('/') for ec in ecs if ec != '']

    spontaneous_reactions = df_gapseq[df_gapseq['status'] == 'spontaneous']['rxn'].tolist()
    spontaneous_reactions = set(spontaneous_reactions).intersection(all_reactions)

    reactions_without_genes = set(reactions_without_genes) - set(exhcanges_rxns)

    df_gapseq_pathway = pd.read_csv(gapseq_pathways_file, sep='\t', comment='#')

    df_gapseq_pathway = df_gapseq_pathway[df_gapseq_pathway['Prediction'] == True]

    df_gapseq_pathway['ID'] = df_gapseq_pathway['ID'].str.replace('|', '', regex=False)
    gapseq_pathways = df_gapseq_pathway['ID'].tolist()

    return [len(ecs), len(set(ecs)), len(sbml_1.reactions), len(reactions_with_genes), len(reactions_without_genes), len(spontaneous_reactions.union(set(exhcanges_rxns)))]

def compute_stat(carveme_folder, modelseed_folder, mapping_model_seed_file, gapseq_folder, aucome_folder):
    carveme_results = []
    for sbml_file in os.listdir(carveme_folder):
        sbml_pathname = os.path.join(carveme_folder, sbml_file)
        results = parse_carveme_model(sbml_pathname)
        results = [sbml_file.replace('.sbml', '')] + results
        carveme_results.append(results)

    with open('carveme_stat.tsv', 'w') as output_file:
        csvwriter = csv.writer(output_file, delimiter='\t')
        csvwriter.writerow(['Organisms', 'NB ECs (redundant)', 'NB ECs (unique)', 'NB total reactions', 'NB enzymatic reactions with genes', 'NB enzymatic reactions without genes', 'NB spontaneous reactions'])
        for data in carveme_results:
            csvwriter.writerow(data)

    modelseed_results = []
    sbml_modelseed_folder = os.path.join(modelseed_folder, 'sbml')
    tsv_modelseed_folder = os.path.join(modelseed_folder, 'tsv')
    for sbml_file in os.listdir(sbml_modelseed_folder):
        sbml_pathname = os.path.join(sbml_modelseed_folder, sbml_file)
        tsv_filename = sbml_file.replace('.xml', '') + '-reactions.tsv'
        tsv_pathname = os.path.join(tsv_modelseed_folder, tsv_filename)
        results = parse_modelseed_model(sbml_pathname, tsv_pathname, mapping_model_seed_file)
        results = [sbml_file.replace('.sbml', '')] + results
        modelseed_results.append(results)

    with open('modelseed_stat.tsv', 'w') as output_file:
        csvwriter = csv.writer(output_file, delimiter='\t')
        csvwriter.writerow(['Organisms', 'NB ECs (redundant)', 'NB ECs (unique)', 'NB total reactions', 'NB enzymatic reactions with genes', 'NB enzymatic reactions without genes', 'NB spontaneous reactions'])
        for data in modelseed_results:
            csvwriter.writerow(data)

    gapseq_results = []
    for model_folder in os.listdir(gapseq_folder):
        sbml_pathname = os.path.join(gapseq_folder, model_folder, model_folder+'.xml')
        reaction_file = sbml_pathname.replace('.xml', '-all-Reactions.tbl')
        pathway_file = sbml_pathname.replace('.xml', '-all-Pathways.tbl')
        results = parse_gapseq_model(sbml_pathname, reaction_file, pathway_file)
        results = [model_folder] + results
        gapseq_results.append(results)

    with open('gapseq_stat.tsv', 'w') as output_file:
        csvwriter = csv.writer(output_file, delimiter='\t')
        csvwriter.writerow(['Organisms', 'NB ECs (redundant)', 'NB ECs (unique)', 'NB total reactions', 'NB enzymatic reactions with genes', 'NB enzymatic reactions without genes', 'NB spontaneous reactions'])
        for data in gapseq_results:
            csvwriter.writerow(data)


    final_aucome_results = []
    for padmet_file in os.listdir(aucome_folder):
        sbml_pathname = os.path.join(aucome_folder, padmet_file)
        results = parse_padmet(sbml_pathname)
        results = [padmet_file.replace('.padmet', '').replace('output_pathwaytools_', '')] + results
        final_aucome_results.append(results)

    with open('aucome_final.tsv', 'w') as output_file:
        csvwriter = csv.writer(output_file, delimiter='\t')
        csvwriter.writerow(['Organisms', 'NB ECs (redundant)', 'NB ECs (unique)', 'NB total reactions', 'NB enzymatic reactions with genes', 'NB enzymatic reactions without genes', 'NB spontaneous reactions'])
        for data in final_aucome_results:
            csvwriter.writerow(data)

carveme_folder = 'networks_carveme'
modelseed_folder = 'networks_modelseed'
mapping_model_seed_file = 'mapping_modelseed_ec.tsv'
gapseq_folder = 'networks_gapseq'
aucome_folder = 'networks_aucome'

carveme_time_file = 'time_carveme.txt'
gapseq_time_file = 'time_gapseq.txt'

output_folder = 'Figure_S4_output'
if not os.path.exists(output_folder):
    os.mkdir(output_folder)

compute_stat(carveme_folder, modelseed_folder, mapping_model_seed_file, gapseq_folder, aucome_folder)

fontsize = 30

sns.set('paper', rc={'figure.figsize':(40,20), 'font.size': fontsize, 'axes.labelsize': fontsize,
        'axes.titlesize': fontsize, 'xtick.labelsize': fontsize, 'ytick.labelsize': fontsize, 'legend.fontsize': fontsize,
        'legend.title_fontsize': fontsize})
sns.set_style("white")
plt.rcParams['svg.fonttype'] = 'none'

dataframes = ['carveme_stat.tsv', 'aucome_final.tsv',
                'gapseq_stat.tsv', 'modelseed_stat.tsv']
methods = {'carveme_stat': 'CarveMe', 'aucome_final': 'AuCoMe',
            'gapseq_stat': 'gapseq', 'modelseed_stat': 'ModelSEED'}
values = []
object_numbered = []
method_used = []

for dataframe in dataframes:
    input_df = pd.read_csv(dataframe, sep='\t')
    nb_ec_redundants = input_df['NB ECs (redundant)'].tolist()
    nb_ec_uniques = input_df['NB ECs (unique)'].tolist()
    nb_total_reactions = input_df['NB total reactions'].tolist()
    nb_reaction_genes = input_df['NB enzymatic reactions with genes'].tolist()
    nb_reaction_without_genes = input_df['NB enzymatic reactions without genes'].tolist()
    nb_spont_reactions = input_df['NB spontaneous reactions'].tolist()

    values.extend(nb_total_reactions)
    values.extend(nb_reaction_genes)
    values.extend(nb_reaction_without_genes)
    values.extend(nb_spont_reactions)
    values.extend(nb_ec_redundants)
    values.extend(nb_ec_uniques)

    object_numbered.extend(['All reactions']*len(nb_total_reactions))
    object_numbered.extend(['Reactions with genes']*len(nb_reaction_genes))
    object_numbered.extend(['Reactions without genes']*len(nb_reaction_without_genes))
    object_numbered.extend(['Spontaneous reactions']*len(nb_spont_reactions))
    object_numbered.extend(['ECs redundant']*len(nb_ec_redundants))
    object_numbered.extend(['ECs unique']*len(nb_ec_uniques))

    all_data_len = len(nb_ec_redundants) + len(nb_ec_uniques) + len(nb_total_reactions) +len(nb_reaction_genes) + len(nb_reaction_without_genes) + len(nb_spont_reactions)
    method_used.extend([methods[dataframe.replace('.tsv', '')]]*all_data_len)

seaborn_df = pd.DataFrame({'values': values, 'object': object_numbered,'method': method_used})
sup_fig_4_tsv_path = os.path.join(output_folder, 'Figure_S4_boxplot_networks.tsv')
seaborn_df.to_csv(sup_fig_4_tsv_path, sep='\t')
g = sns.boxplot(x="object", y="values", hue="method", data=seaborn_df)
# Add line between hue position.
[g.axvline(x+0.5,color='k') for x in g.get_xticks()]
plt.ylabel('Number')
plt.xlabel('')
sup_fig_4_svg_path = os.path.join(output_folder, 'Figure_S4_boxplot_networks.svg')
plt.savefig(sup_fig_4_svg_path)

plt.clf()

modelseed_compute_time = datetime.timedelta(minutes=59, seconds=57) + datetime.timedelta(minutes=34, seconds=30) 
aucome_pwt_run_time = datetime.timedelta(hours=0, minutes=0, seconds=12) + datetime.timedelta(hours=0, minutes=19, seconds=29)
aucome_final_run_time = datetime.timedelta(hours=0, minutes=0, seconds=12) + datetime.timedelta(hours=0, minutes=19, seconds=29) + datetime.timedelta(hours=2, minutes=5, seconds=41) \
                        + datetime.timedelta(hours=2, minutes=24, seconds=11) + datetime.timedelta(hours=0, minutes=3, seconds=15)

total_carveme_time = datetime.timedelta(hours=0, minutes=0, seconds=0)
with open(carveme_time_file, 'r') as time_file:
    for line in time_file.read().splitlines():
        str_time = float(line.split(' = ')[1].replace('s', ''))
        time_run = datetime.timedelta(hours=0, minutes=0, seconds=str_time)
        total_carveme_time= total_carveme_time + time_run

total_gapseq_time = datetime.timedelta(hours=0, minutes=0, seconds=0)
with open(gapseq_time_file, 'r') as gapseq_time_file:
    for gapsesq_line in gapseq_time_file.read().splitlines():
        gapseq_str_time = gapsesq_line.split('real	')[1]
        minutes, seconds = gapseq_str_time.split('m')
        minutes = int(minutes)
        seconds = float(seconds.replace('s', ''))
        time_run = datetime.timedelta(hours=0, minutes=minutes, seconds=seconds)
        total_gapseq_time = total_gapseq_time + time_run
values = []
object_numbered = []
method_used = []

values.append(total_carveme_time.total_seconds())
object_numbered.append('Time')
method_used.append('CarveMe')

values.append(aucome_final_run_time.total_seconds())
object_numbered.append('Time')
method_used.append('AuCoMe')

values.append(total_gapseq_time.total_seconds())
object_numbered.append('Time')
method_used.append('gapseq')

values.append(modelseed_compute_time.total_seconds())
object_numbered.append('Time')
method_used.append('ModelSEED')

time_seaborn_df = pd.DataFrame({'values': values, 'object': object_numbered,'method': method_used})
fig, ax = plt.subplots(figsize=(20, 20))
g = sns.barplot(x="object", y="values", hue="method", data=time_seaborn_df)
g.set_yscale("log")
plt.xlabel('')
sup_fig_4_time_path = os.path.join(output_folder, 'Figure_S4_barplot_time_networks.svg')
plt.savefig(sup_fig_4_time_path)
