# -*- coding: utf-8 -*-
"""
Created on Tue Nov 29 11:33:36 2022

@author: pspea
"""

import pandas as pd

base_dir = 'C:/Gresham/tiny_projects/Project_Grace/supplemental_figures/'

strain_list = ['DGY1728','DGY1734','DGY1736','DGY1740','DGY1744','DGY1747','DGY1751']

sig_outlier_dict = {}

for strain in strain_list:
    print(strain)
    filename = ('{}Supplemental_Fig2C_SigOutliers_{}.txt').format(base_dir, strain)
    
    df = pd.read_table(filename, index_col=0)
    sig_dict = df.to_dict('index')
    
    for gene in sig_dict:
        
        if gene not in sig_outlier_dict:
            sig_outlier_dict[gene] = {'high':set(), 'low':set()}
        
        if sig_dict[gene]['standardized_residuals'] > 0:
            sig_outlier_dict[gene]['high'].add(strain)
 
        if sig_dict[gene]['standardized_residuals'] < 0:
            sig_outlier_dict[gene]['low'].add(strain)

max_dict =  {'high':0, 'low':0}

outfile_name = ('{}Supplemental_Fig2C_SigOutliers_combined.txt').format(base_dir)
outfile = open(outfile_name, 'w')
            
for gene in sig_outlier_dict:
    for each in ['high', 'low']:
        if len(sig_outlier_dict[gene][each]) > max_dict[each]:
            max_dict[each] = len(sig_outlier_dict[gene][each])
            
    
    outline = ('{gene}\t{high_ct}\t{low_ct}\t{high_list}\t{low_list}\n').format(gene=gene,
                high_ct = len(sig_outlier_dict[gene]['high']),
                low_ct = len(sig_outlier_dict[gene]['low']),
                high_list = str(list(sig_outlier_dict[gene]['high'])).replace('[','').replace(']','').replace("'",""),
                low_list = str(list(sig_outlier_dict[gene]['low'])).replace('[','').replace(']','').replace("'",""),
                )
    
    outfile.write(outline)
    
outfile.close()


#Supplemental_Fig2C_SigOutliers_CNV_DGY1751.txt 

base_dir = 'C:/Gresham/tiny_projects/Project_Grace/supplemental_figures/'

strain_list = ['DGY1728','DGY1734','DGY1736','DGY1740','DGY1744','DGY1747','DGY1751']

sig_outlier_dict = {}

for strain in strain_list:
    print(strain)
    filename = ('{}Supplemental_Fig2C_SigOutliers_CNV_{}.txt').format(base_dir, strain)
    
    df = pd.read_table(filename, index_col=0)
    sig_dict = df.to_dict('index')
    
    for gene in sig_dict:
        
        if gene not in sig_outlier_dict:
            sig_outlier_dict[gene] = {'high':set(), 'low':set()}
        
        if sig_dict[gene]['standardized_residuals'] > 0:
            sig_outlier_dict[gene]['high'].add(strain)
 
        if sig_dict[gene]['standardized_residuals'] < 0:
            sig_outlier_dict[gene]['low'].add(strain)

max_dict =  {'high':0, 'low':0}

outfile_name = ('{}Supplemental_Fig2C_SigOutliers_CNV_combined.txt').format(base_dir)
outfile = open(outfile_name, 'w')
            
for gene in sig_outlier_dict:
    for each in ['high', 'low']:
        if len(sig_outlier_dict[gene][each]) > max_dict[each]:
            max_dict[each] = len(sig_outlier_dict[gene][each])
            
    
    outline = ('{gene}\t{high_ct}\t{low_ct}\t{high_list}\t{low_list}\n').format(gene=gene,
                high_ct = len(sig_outlier_dict[gene]['high']),
                low_ct = len(sig_outlier_dict[gene]['low']),
                high_list = str(list(sig_outlier_dict[gene]['high'])).replace('[','').replace(']','').replace("'",""),
                low_list = str(list(sig_outlier_dict[gene]['low'])).replace('[','').replace(']','').replace("'",""),
                )
    
    outfile.write(outline)
    
outfile.close()
   