# -*- coding: utf-8 -*-
"""
Created on Mon Mar 13 11:25:55 2023

@author: pspea
"""

import pandas as pd
import numpy as np

go_filename = ('C:/Gresham/tiny_projects/Project_Grace/Review_GB/insert_GSEA/STable 4_SupplementaryTable3_NEW.tsv.txt')
#go_filename = ('C:/Gresham/tiny_projects/Project_Grace/Review_GB/insert_GSEA/STable 4_SupplementaryTable3_NEW_pvalue.txt')
df = pd.read_table(go_filename)
go_dict = df.to_dict('index')

revigo_filename = ('C:/Gresham/tiny_projects/Project_Grace/Review_GB/Revigo_BP_OnScreenTable.tsv')
#revigo_filename = ('C:/Gresham/tiny_projects/Project_Grace/Review_GB/Revigo_BP_OnScreenTable_pvalue.tsv')
df = pd.read_table(revigo_filename, index_col=0)
revigo_dict = df.to_dict('index')

goid_counter = {}
goid_enrichment = {}

for index in go_dict:
    goid = go_dict[index]["ID"]
    if goid in revigo_dict:
        
        #additional filter:
        #if go_dict[index]['qvalue'] <= 0.05:
    
        if goid not in goid_counter:
            goid_counter[goid] = 0
        goid_counter[goid] += 1
        
        if goid not in goid_enrichment:
            goid_enrichment[goid] = []
            
        goid_enrichment[goid].append(go_dict[index]['enrichmentScore'])
        
goid_set = set()

for goid in goid_counter:
    if goid_counter[goid] > 0:
        goid_set.add(goid)
        
goid_median_enrichment = {}

for goid in goid_enrichment:
    if goid in goid_set:
        goid_median_enrichment[goid] = np.median(goid_enrichment[goid])
        
sorted_gme = (sorted(goid_median_enrichment.items(), key=lambda item: item[1],reverse = True))

ordered_gme = {}

for index in range(len(sorted_gme)):
    goid = sorted_gme[index][0]
    
    
    ordered_gme[goid] = index + 1   
        
go_out_file = open('C:/Gresham/tiny_projects/Project_Grace/Review_GB/insert_GSEA/STable 4_SupplementaryTable3_revigo_all.csv', 'w')

go_file = open(go_filename)

for line in go_file:
    
    if 'qvalue' in line:
        newline = line.replace('\t',',')
        go_out_file.write(newline)
        
    else:
        newline = ''
        for col in line.split('\t')[2:14]:
            newline+= col + ','
        newline = newline[:-1]
        
        goid = line.split('\t')[2]
        
        #additional_filter
        
        
        
        if goid in ordered_gme:
            order_num = ordered_gme[goid]
        else:
            order_num = ''

        processed = False
        
        if goid not in revigo_dict and not processed:
            outline = ('{keep},{order_num},{newline},{details}\n').format(
                keep = 'No',
                order_num = order_num,
                newline = newline,
                details = 'Revigo compressed')
            processed = True

        
        if goid not in goid_set and not processed:
            outline = ('{keep},{order_num},{newline},{details}\n').format(
                keep = 'No',
                order_num = order_num,
                newline = newline,
                details = 'Manually compressed, strain specific')
            processed = True
            
        
        if goid in goid_set:
            outline = ('{keep},{order_num},{newline},{details}\n').format(
                keep = 'Yes',
                order_num = order_num,
                newline = newline,
                details = '')
            
        go_out_file.write(outline)

go_file.close()
go_out_file.close()