from Bio import SeqIO
import csv
import os
import pandas as pd
import numpy as np
from cobra.io.sbml import read_sbml_model
from padmet.classes import PadmetSpec, PadmetRef
import datetime
import matplotlib.pyplot as plt
import seaborn as sns

sns.set('poster', rc={'figure.figsize':(15,15), 'lines.linewidth': 10})
sns.set_style("white", {'axes.grid' : False})

saccharomyces_aucome_file = os.path.join('networks_aucome', 'Saccharomyces_cerevisiae_S288C.padmet')
padmet_metacyc_ref_file = 'metacyc_23.5.padmet'
gapseq_pathway_file = os.path.join('networks_gapseq', 'Saccharomyces_cerevisiae_S288C_3', 'Saccharomyces_cerevisiae_S288C-all-Pathways.tbl')
yeast_cyc_ref_file = 'All-pathways-of-S.-cerevisiae-S288c.txt'

output_folder = 'Figures_S8_S9_output'
if not os.path.exists(output_folder):
    os.mkdir(output_folder)

padmetSpec = PadmetSpec(saccharomyces_aucome_file)

all_rxns = [node for node in padmetSpec.dicOfNode.values() if node.type == "reaction"]
aucome_rxn_ecs = [ec for node in padmetSpec.dicOfNode.values() if node.type == "reaction" and 'EC-NUMBER' in node.misc for ec in node.misc['EC-NUMBER']]

total_pwy_id = set()
for rxn_node in all_rxns:
    # Get all pathways having at least a reaction. Remove superpathways containing only pathways.
    pathways_ids = set([rlt.id_out for rlt in padmetSpec.dicOfRelationIn[rxn_node.id] if rlt.type == "is_in_pathway"])
    total_pwy_id.update(pathways_ids)
padmetRef = PadmetRef(padmet_metacyc_ref_file)

pathway_ratios = {}
pwy_rxns = {}
for pwy_id in total_pwy_id:
    in_rxns = set([rlt.id_in for rlt in padmetSpec.dicOfRelationOut.get(pwy_id,[]) if rlt.type == "is_in_pathway"])
    pwy_all_rxns = set([rlt.id_in for rlt in padmetRef.dicOfRelationOut.get(pwy_id,[]) if rlt.type == "is_in_pathway"])
    pathway_ratios[pwy_id] = (len(in_rxns)/len(pwy_all_rxns))*100
    pwy_rxns[pwy_id] = in_rxns
df_padmet = pd.DataFrame.from_dict(pathway_ratios, orient='index')

df_gapseq_pathway = pd.read_csv(gapseq_pathway_file, sep='\t', comment='#')

df_gapseq_pathway = df_gapseq_pathway[df_gapseq_pathway['Prediction'] == True]

df_gapseq_pathway['ID'] = df_gapseq_pathway['ID'].str.replace('|', '')
gapseq_pathways = set(df_gapseq_pathway['ID'].tolist())
df_gapseq_pathway.set_index('ID', inplace=True)
gapseq_pathway_ratios = df_gapseq_pathway['Completeness'].to_dict()

intersections = set(total_pwy_id).intersection(gapseq_pathways)

# Pathways from Yeastcyc: https://yeast.biocyc.org/group?id=:ALL-PATHWAYS&org-id=YEAST
df_yeast_cyc = pd.read_csv(yeast_cyc_ref_file)
yeast_cyc_pathways = set(df_yeast_cyc['Pathways'].tolist())

gapseq_true_positives = len(yeast_cyc_pathways.intersection(gapseq_pathways))
aucome_true_positives = len(yeast_cyc_pathways.intersection(total_pwy_id))

gapseq_false_positives = len(gapseq_pathways - yeast_cyc_pathways)
aucome_false_positives = len(total_pwy_id - yeast_cyc_pathways)

gapseq_false_negatives = len(yeast_cyc_pathways - gapseq_pathways)
aucome_false_negatives = len(yeast_cyc_pathways - total_pwy_id)

yeastcyc_comparison_tsv_file = os.path.join(output_folder, 'yeast_cyc_comparison.tsv')
with open(yeastcyc_comparison_tsv_file, 'w') as output_file:
    csvwriter = csv.writer(output_file, delimiter="\t")
    csvwriter.writerow(['confusion_matrix', 'values', 'tools'])
    csvwriter.writerow(['true_positivies', gapseq_true_positives, 'gapseq'])
    csvwriter.writerow(['false_positives', gapseq_false_positives, 'gapseq'])
    csvwriter.writerow(['false_negatives', gapseq_false_negatives, 'gapseq'])
    csvwriter.writerow(['true_positivies', aucome_true_positives, 'AuCoMe'])
    csvwriter.writerow(['false_positives', aucome_false_positives, 'AuCoMe'])
    csvwriter.writerow(['false_negatives', aucome_false_negatives, 'AuCoMe'])


df = pd.read_csv(yeastcyc_comparison_tsv_file, sep='\t')
sns.barplot(data=df, x="confusion_matrix", y="values", hue="tools")
yeastcyc_comparison_png_file = os.path.join(output_folder, 'comparison_yeastcyc.png')
plt.savefig(yeastcyc_comparison_png_file)
plt.clf()

boundaries = np.arange(0, 110, 10)
data_aucome = []
data_gapseq = []
data_gapseq.append(['0', 0, 'gapseq', 'true_positivies'])
data_aucome.append(['0', 0, 'AuCoMe', 'true_positivies'])
data_gapseq.append(['0', 0, 'gapseq', 'false_positives'])
data_aucome.append(['0', 0, 'AuCoMe', 'false_positives'])
data_gapseq.append(['0', gapseq_false_negatives, 'gapseq', 'false_negatives'])
data_aucome.append(['0', aucome_false_negatives, 'AuCoMe', 'false_negatives'])

false_positives_total = set()

for i in boundaries:
    i_max = i + 10
 
    gap_boundary_pathways = set([gap_pathway for gap_pathway in gapseq_pathway_ratios if gapseq_pathway_ratios[gap_pathway] >= i and gapseq_pathway_ratios[gap_pathway] < i_max])
    padmet_boundary_pathways = set([padmet_pathway for padmet_pathway in pathway_ratios if pathway_ratios[padmet_pathway] >= i and pathway_ratios[padmet_pathway] < i_max])

    gapseq_true_positives = len(yeast_cyc_pathways.intersection(gap_boundary_pathways))
    aucome_true_positives = len(yeast_cyc_pathways.intersection(padmet_boundary_pathways))
    gapseq_false_positives = len(gap_boundary_pathways - yeast_cyc_pathways)
    aucome_false_positives = len(padmet_boundary_pathways - yeast_cyc_pathways)

    boundary = str(i) + '_' + str(i_max)
    print(['PWY-7340' in yeast_cyc_pathways.intersection(padmet_boundary_pathways)], boundary)

    data_gapseq.append([boundary, gapseq_true_positives, 'gapseq', 'true_positivies'])
    data_aucome.append([boundary, aucome_true_positives, 'AuCoMe', 'true_positivies'])
    data_gapseq.append([boundary, gapseq_false_positives, 'gapseq', 'false_positives'])
    data_aucome.append([boundary, aucome_false_positives, 'AuCoMe', 'false_positives'])
    if i_max > 70:
        false_positives_total = false_positives_total.union(yeast_cyc_pathways.intersection(padmet_boundary_pathways))

df = pd.DataFrame(data_aucome)
df.columns = ['Pathways completeness', 'nb_set', 'Tools', 'confusion_matrix']
fontsize = 20
sns.set('paper', rc={'figure.figsize':(20,20), 'lines.linewidth': 20, 'font.size': 20.0, 'axes.labelsize': fontsize,
        'axes.titlesize': fontsize, 'xtick.labelsize': fontsize, 'ytick.labelsize': fontsize, 'legend.fontsize': fontsize,
        'legend.title_fontsize': fontsize})
sns.set_style("whitegrid")

ax = sns.histplot(data=df, x='confusion_matrix', weights='nb_set', hue='Pathways completeness', multiple='stack')
ax.set(ylim=(0, 820))
plt.rcParams['svg.fonttype'] = 'none'
sup_fig_8_completion_svg_file = os.path.join(output_folder, 'Figure_S8_completion_pathway_aucome.svg')
plt.savefig(sup_fig_8_completion_svg_file)
plt.clf()

df = pd.DataFrame(data_gapseq)
df.columns = ['Pathways completeness', 'nb_set', 'Tools', 'confusion_matrix']

import matplotlib.pyplot as plt
import seaborn as sns
fontsize = 20
sns.set('paper', rc={'figure.figsize':(20,20), 'lines.linewidth': 20, 'font.size': 20.0, 'axes.labelsize': fontsize,
        'axes.titlesize': fontsize, 'xtick.labelsize': fontsize, 'ytick.labelsize': fontsize, 'legend.fontsize': fontsize,
        'legend.title_fontsize': fontsize})
sns.set_style("whitegrid")

ax = sns.histplot(data=df, x='confusion_matrix', weights='nb_set', hue='Pathways completeness', multiple='stack')
plt.rcParams['svg.fonttype'] = 'none'
ax.set(ylim=(0, 820))
completion_pathway_gapseq_fig = os.path.join(output_folder, 'completion_pathway_gapseq.svg')
plt.savefig(completion_pathway_gapseq_fig)
plt.clf()

import matplotlib.pyplot as plt
from matplotlib_venn import venn3

venn3([set(yeast_cyc_pathways), set([i for i in gapseq_pathway_ratios]), set([i for i in pathway_ratios])], ('YeastCyc', 'gapseq', 'AuCoMe'))

venn_diagram_svg = os.path.join(output_folder, 'venn_diagram.svg')
plt.savefig(venn_diagram_svg)
plt.clf()

venn3([set(yeast_cyc_pathways), set([i for i in gapseq_pathway_ratios if gapseq_pathway_ratios[i] >= 70]), set([i for i in pathway_ratios if pathway_ratios[i] >= 70])], ('YeastCyc', 'gapseq', 'AuCoMe'))

sup_fig_9_venn_diagram_70_100_svg = os.path.join(output_folder, 'Figure_S9_venn_diagram_70_100.svg')
plt.savefig(sup_fig_9_venn_diagram_70_100_svg)
plt.clf()

venn3([set(yeast_cyc_pathways), set([i for i in gapseq_pathway_ratios]), set([i for i in pathway_ratios if pathway_ratios[i] >= 70])], ('YeastCyc', 'gapseq', 'AuCoMe'))

venn_diagram_70_100_gapseq_svg = os.path.join(output_folder, 'venn_diagram_70_100_gapseq.svg')
plt.savefig(venn_diagram_70_100_gapseq_svg)
plt.clf()

venn3([set(yeast_cyc_pathways), set([i for i in gapseq_pathway_ratios if gapseq_pathway_ratios[i] >= 50 and gapseq_pathway_ratios[i] < 70]), set([i for i in pathway_ratios if pathway_ratios[i] >= 50 and pathway_ratios[i] < 70])], ('YeastCyc', 'gapseq', 'AuCoMe'))

venn_diagram_50_70_svg = os.path.join(output_folder, 'venn_diagram_50_70.svg')
plt.savefig(venn_diagram_50_70_svg)
plt.clf()

venn3([set(yeast_cyc_pathways), set([i for i in gapseq_pathway_ratios]), set([i for i in pathway_ratios if pathway_ratios[i] >= 50 and pathway_ratios[i] < 70])], ('YeastCyc', 'gapseq', 'AuCoMe'))

venn_diagram_50_70_gapseq_svg = os.path.join(output_folder, 'venn_diagram_50_70_gapseq.svg')
plt.savefig(venn_diagram_50_70_gapseq_svg)
plt.clf()

venn3([set(yeast_cyc_pathways), set([i for i in gapseq_pathway_ratios if gapseq_pathway_ratios[i] < 50]), set([i for i in pathway_ratios if pathway_ratios[i] < 50])], ('YeastCyc', 'gapseq', 'AuCoMe'))

venn_diagram_50_svg = os.path.join(output_folder, 'venn_diagram_50.svg')
plt.savefig(venn_diagram_50_svg)
plt.clf()

venn3([set(yeast_cyc_pathways), set([i for i in gapseq_pathway_ratios]), set([i for i in pathway_ratios if pathway_ratios[i] < 50])], ('YeastCyc', 'gapseq', 'AuCoMe'))

venn_diagram_50_gapseq = os.path.join(output_folder, 'venn_diagram_50_gapseq.svg')
plt.savefig(venn_diagram_50_gapseq)
plt.clf()
