from Bio import SeqIO
import csv
import os
import pandas as pd
import numpy as np
from cobra.io.sbml import read_sbml_model
from padmet.classes import PadmetSpec, PadmetRef
import datetime

total_gapseq_times = {'Laccaria_bicolor': datetime.timedelta(hours=0, minutes=0, seconds=76126),
                    'Neurospora_crassa': datetime.timedelta(hours=0, minutes=0, seconds=31252),
                    'Schizosaccharomyces_pombe': datetime.timedelta(hours=0, minutes=0, seconds=45598),
                    'Saccharomyces_cerevisiae_S288C': datetime.timedelta(hours=0, minutes=0, seconds=45565),
                    'Rhizopus_oryzae': datetime.timedelta(hours=0, minutes=0, seconds=49806),
                    }

modelseed_times = {'Laccaria_bicolor': datetime.timedelta(hours=3, minutes=11, seconds=0),
                    'Neurospora_crassa': datetime.timedelta(hours=3, minutes=6, seconds=0),
                    'Schizosaccharomyces_pombe': datetime.timedelta(hours=1, minutes=10, seconds=0),
                    'Saccharomyces_cerevisiae_S288C': datetime.timedelta(hours=1, minutes=41, seconds=0),
                    'Rhizopus_oryzae': datetime.timedelta(hours=1, minutes=13, seconds=0),
                    }

aucome_time = {'Laccaria_bicolor': datetime.timedelta(hours=25, minutes=1, seconds=22),
                    'Neurospora_crassa': datetime.timedelta(hours=25, minutes=1, seconds=22),
                    'Schizosaccharomyces_pombe': datetime.timedelta(hours=25, minutes=1, seconds=22),
                    'Saccharomyces_cerevisiae_S288C': datetime.timedelta(hours=25, minutes=1, seconds=22),
                    'Rhizopus_oryzae': datetime.timedelta(hours=25, minutes=1, seconds=22),
                    }

aucome_padmet_folder = 'networks_aucome'
gapseq_model_folder = 'networks_gapseq'
modelseed_folder = 'networks_modelseed'
metacyc_ref_file = 'metacyc_23.5.padmet'

output_folder = 'Figure_S7_output'
if not os.path.exists(output_folder):
    os.mkdir(output_folder)

run_data = []
run_data.append(['Organism', 'AuCoMe final EC', 'gapseq find EC', 'ModelSEED EC', 'AuCoMe final reactions', 'Gapseq find reactions', 'ModelSEED reactions', 'AuCoMe final pathways', 'Gapseq find pathways', 'ModelSEED pathways', 'Runtime AuCoMe', 'Runtime gapseq', 'Runtime modelseed'])
for organism in os.listdir(aucome_padmet_folder):
    organism = organism.replace('.padmet', '')
    padmetSpec = PadmetSpec(os.path.join(aucome_padmet_folder, organism + '.padmet'))

    all_rxns = [node for node in padmetSpec.dicOfNode.values() if node.type == "reaction"]
    aucome_rxn_ecs = [ec for node in padmetSpec.dicOfNode.values() if node.type == "reaction" and 'EC-NUMBER' in node.misc for ec in node.misc['EC-NUMBER']]

    total_pwy_id = set()
    for rxn_node in all_rxns:
        # Get all pathways having at least a reaction. Remove superpathways containing only pathways.
        pathways_ids = set([rlt.id_out for rlt in padmetSpec.dicOfRelationIn[rxn_node.id] if rlt.type == "is_in_pathway"])
        total_pwy_id.update(pathways_ids)
    padmetRef = PadmetRef(metacyc_ref_file)

    pathway_ratios = {}

    for pwy_id in total_pwy_id:
        in_rxns = set([rlt.id_in for rlt in padmetSpec.dicOfRelationOut.get(pwy_id,[]) if rlt.type == "is_in_pathway"])
        pwy_all_rxns = set([rlt.id_in for rlt in padmetRef.dicOfRelationOut.get(pwy_id,[]) if rlt.type == "is_in_pathway"])
        pathway_ratios[pwy_id] = (len(in_rxns)/len(pwy_all_rxns))*100
    df_padmet = pd.DataFrame.from_dict(pathway_ratios, orient='index')

    gapseq_fungi_pathways_file = os.path.join(gapseq_model_folder, organism + '_3', organism+'-all-Pathways.tbl')
    df_gapseq_pathway = pd.read_csv(gapseq_fungi_pathways_file, sep='\t', comment='#')

    df_gapseq_pathway = df_gapseq_pathway[df_gapseq_pathway['Prediction'] == True]

    df_gapseq_pathway['ID'] = df_gapseq_pathway['ID'].str.replace('|', '')
    gapseq_pathways = df_gapseq_pathway['ID'].tolist()

    intersections = set(total_pwy_id).intersection(gapseq_pathways)

    df_gapseq_pathway.set_index('ID', inplace=True)

    gapseq_pathway_ratios = df_gapseq_pathway['Completeness'].to_dict()

    pathway_rxns = [rxn for rxns in df_gapseq_pathway['ReactionsFound'].str.split(' ') for rxn in rxns if rxn != '']

    gapseq_fungi_reactions_file = os.path.join(gapseq_model_folder, organism + '_3', organism+'-all-Reactions.tbl')

    df_gapseq_reaction = pd.read_csv(gapseq_fungi_reactions_file, sep='\t', comment='#')
    df_gapseq_reaction = df_gapseq_reaction.replace(np.nan, '')
    df_gapseq_reaction.set_index('rxn', inplace=True)
    rxn_ecs = df_gapseq_reaction['ec'].to_dict()
    gapseq_ecs = []
    for rxn in pathway_rxns:
        if rxn in rxn_ecs:
            rxn_ec = [ec for ec in rxn_ecs[rxn].split('/') if ec != '']
            gapseq_ecs.extend(rxn_ec)

    import numpy as np

    boundaries = np.arange(0, 100, 10)
    data = []
    for i in boundaries:
        i_max = i + 10

        gap_boundary_pathways = [gap_pathway for gap_pathway in gapseq_pathway_ratios if gapseq_pathway_ratios[gap_pathway] > i and gapseq_pathway_ratios[gap_pathway] <= i_max] 
        padmet_boundary_pathways = [padmet_pathway for padmet_pathway in pathway_ratios if pathway_ratios[padmet_pathway] > i and pathway_ratios[padmet_pathway] <= i_max] 
        shared_pathways = set(gap_boundary_pathways).intersection(set(padmet_boundary_pathways))
        gapseq_unique_pathway = set(gap_boundary_pathways) - set(pathway_ratios.keys())
        padmet_unique_pathway= set(padmet_boundary_pathways) - set(gapseq_pathway_ratios.keys())
        gapseq_pathway_not_in_same = set(gap_boundary_pathways).intersection(pathway_ratios.keys()) - set(padmet_boundary_pathways)
        padmet_pathway_not_in_same = set(padmet_boundary_pathways).intersection(gapseq_pathway_ratios.keys()) - set(gap_boundary_pathways)
        padmet_gapseq_superior = [padmet_pathway for padmet_pathway in padmet_pathway_not_in_same if pathway_ratios[padmet_pathway] < gapseq_pathway_ratios[padmet_pathway]]
        padmet_gapseq_inferior = [padmet_pathway for padmet_pathway in padmet_pathway_not_in_same if pathway_ratios[padmet_pathway] >= gapseq_pathway_ratios[padmet_pathway]]
        gapseq_padmet_superior = [gapseq_pathway for gapseq_pathway in gapseq_pathway_not_in_same if gapseq_pathway_ratios[gapseq_pathway] < pathway_ratios[gapseq_pathway]]
        gapseq_padmet_inferior = [gapseq_pathway for gapseq_pathway in gapseq_pathway_not_in_same if gapseq_pathway_ratios[gapseq_pathway] >= pathway_ratios[gapseq_pathway]]

        boundary = str(i) + '_' + str(i_max)
        data.append([boundary, len(gapseq_unique_pathway), 'gapseq'])
        data.append([boundary, len(gapseq_padmet_superior), 'gapseq (AuCoMe sup)'])
        data.append([boundary, len(gapseq_padmet_inferior), 'gapseq (AuCoMe inf)'])

        data.append([boundary, len(padmet_unique_pathway), 'AuCoMe'])
        data.append([boundary, len(padmet_gapseq_superior), 'AuCoMe (gapseq sup)'])
        data.append([boundary, len(padmet_gapseq_inferior), 'AuCoMe (gapseq inf)'])
        data.append([boundary, len(shared_pathways), 'Intersection'])

    df = pd.DataFrame(data)
    df.columns = ['Pathways completeness (percent)', 'nb_set', 'Pathways in']

    import matplotlib.pyplot as plt
    import seaborn as sns
    fontsize = 20
    sns.set('paper', rc={'figure.figsize':(20,20), 'lines.linewidth': 20, 'font.size': 20.0, 'axes.labelsize': fontsize,
            'axes.titlesize': fontsize, 'xtick.labelsize': fontsize, 'ytick.labelsize': fontsize, 'legend.fontsize': fontsize,
            'legend.title_fontsize': fontsize})
    sns.set_style("whitegrid")

    ax = sns.histplot(df, x='Pathways completeness (percent)', hue='Pathways in', weights='nb_set',
                hue_order=['gapseq', 'AuCoMe', 'Intersection', 'gapseq (AuCoMe sup)', 'gapseq (AuCoMe inf)', 'AuCoMe (gapseq sup)', 'AuCoMe (gapseq inf)'],
                multiple='stack')
    plt.rcParams['svg.fonttype'] = 'none'
    completion_pathway_file = os.path.join(output_folder, 'completion_pathway_'+organism+'.svg')
    plt.savefig(completion_pathway_file)
    plt.clf()

    modelseed_reactions_file = os.path.join(modelseed_folder, 'tsv', organism+'.gbk_genome_draftModel-reactions.tsv')
    df_pathway_modelseed = pd.read_csv(modelseed_reactions_file, sep='\t')

    df_pathway_modelseed = df_pathway_modelseed.replace(np.nan, '')
    modelseed_ecs = [ec for ecs in df_pathway_modelseed['enzyme'] if ecs != '' for ec in ecs.split('|')]
    modelseed_pathways = [pathway for pathways in df_pathway_modelseed['metacyc pathways'] if pathways != '' for pathway in pathways.split('|')]

    metacyc_pathways = set()
    metacyc_rxns = [node for node in padmetRef.dicOfNode.values() if node.type == "reaction"]
    for rxn_node in metacyc_rxns:
        # Get all pathways having at least a reaction. Remove superpathways containing only pathways.
        pathways_ids = set([rlt.id_out for rlt in padmetRef.dicOfRelationIn[rxn_node.id] if rlt.type == "is_in_pathway"])
        metacyc_pathways.update(pathways_ids)
    all_metacyc_pathways = set([node.id for node in padmetRef.dicOfNode.values() if node.type == "pathway"])
    metacyc_super_pathways = all_metacyc_pathways - metacyc_pathways

    modelseed_pathways = set(modelseed_pathways) - metacyc_super_pathways

    import pylab as plt
    from matplotlib_venn import venn3

    v = venn3([set(modelseed_pathways), set(gapseq_pathways), set(total_pwy_id)], set_labels = ('ModelSeed', 'Gapseq', 'AuCoMe'))

    pathway_venn_file = os.path.join(output_folder, 'pathway_venn_'+organism+'.png')
    plt.savefig(pathway_venn_file)
    plt.clf()
    run_data.append([organism, str(len(aucome_rxn_ecs)) + ' ({})'.format(len(set(aucome_rxn_ecs))), str(len(gapseq_ecs)) + ' ({})'.format(len(set(gapseq_ecs))),
                    str(len(modelseed_ecs)) + ' ({})'.format(len(set(modelseed_ecs))), len(all_rxns), len(pathway_rxns),
                    len(df_pathway_modelseed), len(total_pwy_id), len(gapseq_pathways), len(modelseed_pathways),
                    aucome_time[organism].total_seconds(), total_gapseq_times[organism].total_seconds(), modelseed_times[organism].total_seconds()])

fungi_pathway_file = os.path.join(output_folder, 'fungi_stats.tsv')
with open(fungi_pathway_file, 'w') as output_file:
    csvwriter = csv.writer(output_file, delimiter='\t')
    for dat in run_data:
        csvwriter.writerow(dat)
