import csv
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import json

from padmet.classes import PadmetSpec
from cobra.io.sbml import read_sbml_model

from math import sqrt

sns.set('poster', rc={'figure.figsize':(15,15), 'lines.linewidth': 10})
sns.set_style("white", {'axes.grid' : False})

def parse_padmet(padmet_path):
    padmetSpec = PadmetSpec(padmet_path)

    ecs = [ec for node in padmetSpec.dicOfNode.values() if node.type == "reaction" and 'EC-NUMBER' in node.misc for ec in node.misc['EC-NUMBER']]

    reactions = [node for node in padmetSpec.dicOfNode.values() if node.type == "reaction"]
    reactions_with_genes = []
    reactions_without_genes = []
    for rxn_node in reactions:
        if any([rlt for rlt in padmetSpec.dicOfRelationIn[rxn_node.id] if rlt.type == "is_linked_to"]):
            reactions_with_genes.append(rxn_node.id)
        else:
            if 'SPONTANEOUS' not in rxn_node.misc:
                reactions_without_genes.append(rxn_node.id)
    spontaneous_reactions = [node.id for node in padmetSpec.dicOfNode.values() if node.type == "reaction" and 'SPONTANEOUS' in node.misc]

    all_rxns = [node for node in padmetSpec.dicOfNode.values() if node.type == "reaction"]
    total_pwy_id = set()
    for rxn_node in all_rxns:
        # Get all pathways having at least a reaction. Remove superpathways containing only pathways.
        pathways_ids = set([rlt.id_out for rlt in padmetSpec.dicOfRelationIn[rxn_node.id] if rlt.type == "is_in_pathway"])
        total_pwy_id.update(pathways_ids)

    return [ecs, set(ecs), len(reactions), len(reactions_with_genes), len(reactions_without_genes), len(spontaneous_reactions)]

output_folder = 'Figure_S5_output'
if not os.path.exists(output_folder):
    os.mkdir(output_folder)

df_modelseed_mapping = pd.read_csv('mapping_modelseed_ec.tsv', sep='\t')
df_modelseed_mapping.set_index('ModelSEED ID', inplace=True)
modelseed_mapping_ecs = df_modelseed_mapping['External ID'].to_dict()

all_ecs = []
bigg_ref_ecs = []
# Get ECs from Bigg.
for json_file in os.listdir('jsons_bigg'):
    json_path = os.path.join('jsons_bigg', json_file)
    json_name, file_extension = os.path.splitext(json_file)
    with open(json_path) as json_file:
        json_data = json.loads(json_file.read())
    
    for reaction in json_data['reactions']:
        for annot in reaction['annotation']:
            if 'ec-code' == annot:
                ecs = reaction['annotation'][annot]
                all_ecs.extend(ecs)
                bigg_ref_ecs.extend(ecs)
modelseed_ref_ecs = []
# Get ECs from modelseed
for json_file in os.listdir('jsons_modelseed'):
    json_path = os.path.join('jsons_modelseed', json_file)
    json_name, file_extension = os.path.splitext(json_file)
    with open(json_path) as json_file:
        json_data = json.loads(json_file.read())

    for reaction in json_data['modelreactions']:
        reaction_id = reaction['id'].split('_')[0]
        if reaction_id in modelseed_mapping_ecs:
            ec = modelseed_mapping_ecs[reaction_id]
            all_ecs.append(ec)
            modelseed_ref_ecs.append(ec)

import pandas as pd
from bioservices import KEGG


kegg_ecs = []
# Get enzyme associated with Escherichia coli K12 MG1655.
# s = KEGG()
#s_results = s.link("eco", "enzyme")

#with open('kegg_ecs.txt', 'w') as output_file:
#    for i in s_results.splitlines():
#        output_file.write(i+'\n')
#        all_ecs.append(i.split('\t')[0].replace('ec:', ''))
#        kegg_ecs.append(i.split('\t')[0].replace('ec:', ''))
with open('kegg_ecs.txt', 'r') as input_file:
    for i in input_file.read().splitlines():
        all_ecs.append(i.split('\t')[0].replace('ec:', ''))
        kegg_ecs.append(i.split('\t')[0].replace('ec:', ''))


# Get ECs from ecocyc.
ecocyc_met = parse_padmet('ecocyc.padmet')
ecocyc_ecs = ecocyc_met[1]
ecocyc_ecs = set([ec.replace('EC-', '')for ec in ecocyc_ecs])
all_ecs.extend(ecocyc_ecs)

from supervenn import supervenn
sets = [set(kegg_ecs), set(ecocyc_ecs), set(modelseed_ref_ecs), set(bigg_ref_ecs)]
all_ecs = set(all_ecs)
sup_fig_5_refence_ec_catalog_K12MG1655_tsv = os.path.join(output_folder, 'Figure_S5_refence_ec_catalog_K12MG1655.tsv')

with open(sup_fig_5_refence_ec_catalog_K12MG1655_tsv, 'w') as output_file:
    csvwriter = csv.writer(output_file, delimiter='\t')
    csvwriter.writerow(['EC','KEEG', 'EcoCyc', 'BiGG', 'ModelSEED'])
    for ec in sorted(all_ecs):
        kegg_ec = 1 if ec in kegg_ecs else 0
        eocyc_ec = 1 if ec in ecocyc_ecs else 0
        bigg_ec = 1 if ec in bigg_ref_ecs else 0
        modelseed_ec = 1 if ec in modelseed_ref_ecs else 0

        csvwriter.writerow([ec, kegg_ec, eocyc_ec, bigg_ec, modelseed_ec])

labels = ['KEGG', 'EcoCyc', 'ModelSEED', 'BiGG']
import matplotlib.pyplot as plt
plt.figure(figsize=(32, 16))
supervenn(sets, labels, chunks_ordering='size', reverse_sets_order=False, sets_ordering='size', color_cycle=['C3', 'C0', 'C2', 'C1'], min_width_for_annotation=10, fontsize=20)
sup_fig_5_ec_union_svg = os.path.join(output_folder, 'Figure_S5_ec_union.svg')
plt.savefig(sup_fig_5_ec_union_svg)
plt.clf()

import venn
venn_labels = venn.get_labels(sets, fill=['number', 'logic'])
fig, ax = venn.venn4(venn_labels, names=labels, colors=['C2', 'C1', 'C3', 'C0'])
sup_fig_5_ec_union_venn_svg = os.path.join(output_folder, 'Figure_S5_5_ec_union_venn.svg')
plt.savefig(sup_fig_5_ec_union_venn_svg)
plt.clf()

print(f'Union of ECs: {len(all_ecs)}')
# 1868 ECs
