# -*- coding: utf-8 -*-
"""
Created on Wed Nov  2 12:05:39 2022

@author: pspea
"""
import pandas as pd
import numpy as np
from scipy.stats import fisher_exact
#import plotly.graph_objects as go

import plotly.io as pio
pio.renderers.default = "browser"


import plotly.graph_objects as go

#DAL80
target_gene_set_name = 'DAL80'
tf_gene = 'YKR034W'
target_gene_set = set(['YKR034W',
                       'YOR094W','YCR020C-A','YHR091C','YKR039W','YDL049C',
                       'YDL210W','YDR042C','YDR127W','YGR019W','YLR082C',
                       'YMR257C','YNR063W','YOL042W','YOL043C','YOR093C',
                       'YEL063C', 'YIR028W'])

#BAS1
target_gene_set_name = 'BAS1'
tf_gene = 'YKR099W'
target_gene_set = set(['YKR099W',
                        'YGR204W',
                        'YAR015W','YNL220W','YLR359W','YLR359W','YMR120C','YMR120C','YOR128C','YOR128C','YGR204W','YGR204W','YMR300C','YGL234W','YGL234W','YGR061C','YDR408C','YPL061W','YLR089C','YJR148W','YAL060W','YJR060W','YCL064C','YNR001C','YCR005C','YPL189C-A','YLR348C','YAL007C','YLR060W','YOR280C','YDR019C','YDR019C','YMR189W','YMR189W','YMR189W','YAL044C','YAL044C','YAL044C','YOL059W','YBR010W','YER055C','YCL030C','YCL030C','YIL116W','YIL116W','YDR399W','YOR267C','YMR186W','YFL014W','YPL240C','YDR225W','YER092W','YER092W','YJR016C','YLR355C','YKL217W','YAR018C','YJL134W','YLL007C','YOL126C','YER091C','YIL051C','YLR057W','YMR188C','YMR188C','YKR080W','YKR080W','YKR080W','YGL236C','YNL036W','YOR269W','YPR002W','YKL043W','YBR093C','YNCL0012C','YLR059C','YOL143C','YDL083C','YER131W','YLR162W-A','YDR041W','YOR184W','YLR058C','YLR058C','YLR058C','YDR409W','YKL218C','YGL184C','YAR042W','YGL186C','YKR079C','YBR265W','YKL216W','YLR420W','YBL005W-A','YBL005W-B','YBR012W-A','YBR012W-B','YCL020W','YDR034C-C','YDR034C-D','YDR098C-A','YDR170W-A','YDR210W-A','YDR239C','YDR261C-C','YDR261W-A','YDR316W-B','YDR365W-A','YDR444W','YER137C-A','YER138C','YER159C-A','YFL002W-A','YFL002W-B','YNL160W','YGR027W-A','YGR038C-A','YGR161C-C','YGR161W-A','YGR161W-B','YHR214C-C','YJR026W','YJR027W','YJR028W','YJR029W','YKL030W','YKL097C','YLR157C-A','YLR227W-A','YLR256W-A','YLR349W','YLR410W-A','YLR410W-B','YML039W','YML040W','YML045W','YML045W-A','YMR045C','YMR050C','YMR051C','YMR244W','YNL054W-A','YNL284C-A','YOL103W-A','YOL103W-B','YOR192C-A','YOR192C-B','YPL197C','YPL257W-A','YPL257W-B','YPR158W-A','YMR243C'])

#PUT3
target_gene_set_name = 'PUT3'
tf_gene = 'YKL015W'
target_gene_set = set(['YKL015W',
                      'YHR208W', 'YJR148W','YJR148W','YGL009C', 'YCL018W', 'YOR306C','YLR142W',
                      'YNR044W','YGL032C','YIL088C','YPR122W','YIL015W','YER155C','YJL194W','YNL298W','YNR028W','YKL096W-A','YIR029W','YDL160C','YJL196C','YPL101W','YHR176W','YNL256W','YBL016W','YDR309C','YHR005C','YFL027C','YCL055W','YBL063W','YOR306C','YOR306C','YJL127C-B','YLR303W','YOL064C','YDR461W','YNL145W','YLR042C','YNR053C','YIR035C','YNL289W','YAR071W','YDL236W','YBL018C','YIL117C','YBL064C','YHR037W','YHR037W','YLR084C','YJL173C','YOR138C','YIL123W','YLR452C','YHR084W','YFL026W','YKL178C','YKL209C','YBR083W','YGL050W','YER144C','YEL040W','YCL069W','YDR061W','YHR214C-C'])
                       
#IXR1
target_gene_set_name = 'IXR1'
tf_gene = 'YKL032C'
target_gene_set = set(['YKL032C',
                       'YHR208W','YHR208W','YKL152C','YCL035C','YCL035C','YDR044W','YOR278W','YKR069W','YKR069W','YPR167C','YPR167C','YDR253C','YDR253C','YJR137C','YJR137C','YEL049W','YER070W','YER070W','YER070W','YER150W','YER011W','YIL011W','YOR009W',
                       'YMR056C','YCR107W','YCR107W','YMR072W','YMR072W','YJR108W','YBL015W','YBL015W','YDL203C','YDL203C','YNL220W','YGR204W','YGR204W','YGL234W','YGL234W','YMR303C','YMR303C','YMR083W','YMR083W','YBR145W','YBR145W','YMR009W','YMR009W','YJR105W','YJR105W','YGL032C','YBR132C','YBR132C','YIL158W','YBR108W','YPL158C','YMR169C','YOR374W','YOR374W','YER073W','YOR175C','YPL227C','YOR002W','YOR002W','YOL130W','YOL130W','YDR242W','YDR242W','YBR211C','YBR211C','YEL036C','YEL036C','YHR126C','YPR128C','YBR286W','YPL195W','YNL065W','YNL065W','YLL052C','YLR370C','YCR048W','YOL058W','YOL058W','YHR018C','YER069W','YMR062C','YMR042W','YMR042W','YML099C','YML099C','YDR173C','YBR164C','YHL047C','YDR380W','YBR249C','YGL202W','YGL202W','YPR145W','YGR124W','YPR093C','YOR377W','YOR377W','YLL042C','YPR185W','YLR431C','YJL178C','YDR022C','YLR312C','YNL223W','YBR039W','YFL010W-A','YJR001W','YJR001W','YEL064C','YPR122W','YPR122W','YLL063C','YLL063C','YGR224W','YGR224W','YDR046C','YDR046C','YHR208W','YML077W','YGR286C','YJL060W','YFR047C','YAL044W-A','YGL220W','YNL042W','YBL097W','YGR188C','YGR188C','YOR026W','YFL023W','YLR319C','YER048C','YKL007W','YOR125C','YPL178W','YNL161W','YNL161W','YPL215W','YGR174C','YAL021C','YAL021C','YLR110C','YCR002C','YDL220C','YAL038W','YAL038W','YBR160W','YBR160W','YFL009W','YGL155W','YLR103C','YCR094W','YJR057W','YJR057W','YDR301W','YDR254W','YGR207C','YLR346C','YBR135W','YNL298W','YNL298W','YGR108W','YPR120C','YPR120C','YGR167W','YMR012W','YFR014C','YLR433C','YLR433C','YLL050C','YPR105C','YLR201C','YER141W','YLR253W','YOR100C','YOR031W','YOL007C','YBR291C','YGR088W','YHR053C','YPR158W','YNL155W','YDL209C','YKL096W','YKL096W-A','YNL111C','YGR155W','YKR083C','YML070W','YIR023W','YIR023W','YJR150C','YJR150C','YML113W','YPL119C','YKR024C','YLR276C','YLR270W','YOR163W','YOL052C-A','YOR204W','YHR019C','YKL054C','YAL013W','YOR030W','YCR007C','YLR348C','YKL002W','YLR437C','YPL265W','YPL265W','YEL071W','YDR093W','YKR071C','YHR143W','YHR143W','YOL087C','YGL061C','YHL016C','YHL016C','YHR068W','YDR359C','YEL030W','YEL030W','YBL001C','YLR228C','YGR007W','YGR007W','YPL095C','YAL003W','YPL037C','YPL037C','YPL046C','YMR312W','YBR247C','YPL236C','YOR246C','YHR032W','YHR032W','YDR414C','YHR007C','YHR007C','YGL012W','YGL012W','YML130C','YOR393W','YOR393W','YGR029W','YIL009C-A','YIL009C-A','YIL009W','YNL127W','YLR051C','YMR319C','YMR319C','YBR040W','YDR534C','YOR383C','YDR070C','YLR454W','YPL103C','YKR049C','YKR049C','YEL047C','YEL047C','YOR384W','YPL141C','YOR271C','YBL016W','YLR068W','YGL254W','YGL254W','YOL051W','YOL051W','YLR343W','YFR009W','YMR189W','YOR120W','YOR120W','YOR375C','YOR375C','YAL062W','YAL062W','YEL022W','YOR205C','YOR164C','YLR149C','YMR135C','YML094W','YDR507C','YEL011W','YEL011W','YCL040W','YCL040W','YML004C','YOR040W','YOR040W','YEL046C','YFL017C','YHR005C','YOL059W','YOL059W','YKL152C','YKL152C','YOR262W','YBR244W','YBR244W','YPL223C','YCL035C','YCL035C','YDR513W','YLR364W','YML048W','YML048W','YOR185C','YFR015C','YFR015C','YLR258W','YEL043W','YIR038C','YIR038C','YLR289W','YGL237C','YGL237C','YBL021C','YNL014W','YNL014W','YDL119C','YDL205C','YOR278W','YOR237W','YOR237W','YBR009C','YGR191W','YGR191W','YCL030C','YCL030C','YCR096C','YOR032C','YDR305C','YLR113W','YLR113W','YMR251W-A','YBR215W','YOL155C','YLR301W','YFL014W','YFL014W','YJL159W','YJL159W','YDR258C','YOR025W','YGR268C','YFR053C','YHR094C','YHR094C','YMR011W','YMR011W','YDR345C','YHR092C','YHR092C','YHR096C','YHR096C','YDR342C','YDR342C','YIR037W','YIR037W','YBL059W','YNL164C','YIL090W','YER078C','YER092W','YFR017C','YDR090C','YMR108W','YMR108W','YJR016C','YJR016C','YLR355C','YLR355C','YCL009C','YCL009C','YHR216W','YHR216W','YLR432W','YLR432W','YML056C','YGL192W','YJL082W','YDL181W','YGL150C','YMR163C','YLR095C','YPL209C','YPL209C','YBR011C','YLR247C','YFR038W','YFR055W','YFR055W','YER019W','YOL103W','YOL103W','YKL032C','YDR492W','YJL162C','YLL057C','YFR042W','YPL145C','YIL125W','YDR148C','YJL094C','YDL108W','YKL168C','YKL168C','YBR199W','YBR199W','YOL025W','YOR181W','YDR062W','YOR171C','YOR171C','YLR260W','YLL049W','YBR204C','YPL054W','YGL009C','YGL009C','YNL104C','YNL104C','YOR108W','YOR108W','YJR070C','YHR156C','YMR298W','YLL007C','YFL018C','YFL018C','YNL147W','YPR073C','YNL268W','YNL268W','YGL086W','YJL030W','YPR051W','YCR019W','YBR298C','YBR298C','YLR244C','YCR039C','YKL093W','YMR043W','YMR043W','YJR135C','YOL111C','YNL142W','YGR264C','YKR069W','YKR069W','YFR030W','YKL001C','YPR167C','YPR167C','YIR017C','YJR010W','YJR010W','YDR253C','YJR137C','YJR137C','YOR232W','YDR296W','YBR262C','YFR011C','YLR332W','YGL035C','YGL035C','YER028C','YJR077C','YDR031W','YPL191C','YNL076W','YNL076W','YJR039W','YPL224C','YMR070W','YMR070W','YOL088C','YDL247W','YDL247W','YGL075C','YGL064C','YGL136C','YPR166C','YNL122C','YMR193W','YDR322W','YBR268W','YML009C','YJL096W','YDR237W','YKR052C','YFR045W','YML128C','YPR047W','YMR097C','YKL074C','YBR057C','YGR055W','YHL036W','YER042W','YER042W','YGL122C','YPL190C','YMR080C','YMR080C','YGR232W','YGL211W','YOR372C','YOL104C','YLR265C','YHR004C','YLR138W','YIL164C','YIL164C','YJL126W','YJL126W','YOL041C','YNL061W','YPR072W','YDL046W','YNL183C','YNL183C','YGL067W','YGL067W','YGR043C','YOR071C','YOR071C','YGR159C','YGR159C','YOL043C','YJL061W','YDL116W','YLR093C','YKL120W','YPL134C','YPL134C','YGL055W','YGL055W','YBR230C','YHL020C','YMR226C','YMR174C','YMR174C','YDR251W','YDR251W','YEL049W','YOR394W','YOR394W','YNL015W','YNL015W','YGR178C','YDL053C','YDR228C','YBR186W','YLR134W','YLR134W','YLR455W','YGL013C','YDR079W','YJL023C','YNL003C','YMR026C','YDL065C','YOR193W','YHR150W','YNL329C','YMR205C','YMR205C','YOR122C','YGL025C','YOL084W','YJL198W','YLR273C','YKL164C','YKL164C','YKL163W','YKL163W','YOL100W','YIL042C','YIL042C','YGL059W','YGL059W','YGL008C','YGL006W','YGL037C','YNR052C','YNR052C','YBL018C','YDR435C','YMR297W','YMR278W','YNR011C','YKL116C','YKL116C','YKL181W','YHL011C','YHL011C','YJL079C','YKR013W','YDL055C','YNL169C','YLL010C','YLL010C','YLR019W','YLR019W','YAL043C','YCR079W','YCR079W','YKR093W','YLL013C','YNR062C','YNR063W','YGL063W','YLR165C','YJR111C','YNL010W','YJL166W','YIL121W','YIL121W','YBR043C','YGR258C','YER162C','YDR217C','YLR084C','YOR265W','YML030W','YOL010W','YBR005W','YDL135C','YLR387C','YAR007C','YOR217W','YKL038W','YKL038W','YLR039C','YHL027W','YHL027W','YMR139W','YPL089C','YKL132C','YEL072W','YGR044C','YFR022W','YFR022W','YNL248C','YOL005C','YKL180W','YGL135W','YOL127W','YFR032C-A','YGL076C','YPL198W','YER021W','YOR261C','YLR340W','YOR369C','YJL191W','YHL015W','YKL145W','YKL145W','YPL152W','YOR305W','YNL213C','YNL213C','YCR035C','YOR294W','YNL105W','YPL193W','YLR357W','YJR127C','YER125W','YER125W','YNL254C','YOR118W','YPL183W-A','YDL204W','YGR161C','YGR161C','YHR154W','YDR139C','YDR388W','YER043C','YLR180W','YLR180W','YDR502C','YDR502C','YPL274W','YPL274W','YHR083W','YER047C','YER047C','YOR213C','YPR129W','YOR367W','YGR279C','YDR469W','YGL056C','YGL233W','YIL109C','YDR077W','YDR077W','YMR059W','YER081W','YIL074C','YDL168W','YOR315W','YKL006C-A','YBL102W','YGL066W','YOR007C','YLR164W','YOR137C','YIL123W','YMR175W','YNL032W','YKR100C','YHR149C','YHR206W','YKL108W','YGR271W','YIL105C','YBR266C','YIL147C','YIL147C','YFR031C','YLR275W','YBR289W','YBR289W','YGR197C','YGR197C','YIL016W','YIL061C','YOR357C','YDR006C','YGR248W','YLL021W','YDR356W','YGR236C','YER150W','YHR152W','YHR152W','YER046W','YER115C','YHR139C','YOR313C','YCR018C','YPL033C','YLR119W','YLL024C','YER103W','YNL209W','YNL209W','YDR293C','YDR293C','YBR169C','YLR369W','YDR086C','YOR212W','YGR008C','YMR125W','YHR006W','YGL184C','YBR294W','YLR092W','YHR181W','YPL163C','YPL163C','YNL081C','YDR145W','YGR046W','YML072C','YKL027W','YER071C','YJL052W','YJR009C','YGR192C','YBR083W','YJR019C','YDR460W','YPR056W','YOR352W','YLR178C','YGR024C','YDL244W','YFL058W','YOR192C','YOR143C','YOR143C','YHR167W','YBR067C','YBR067C','YGL145W','YER011W','YER011W','YIL011W','YOR009W','YOR009W','YPR074C','YBR117C','YLR118C','YER175C','YOR045W','YNL088W','YLR234W','YBR162C','YGL096W','YJL016W','YLL028W','YLL028W','YGR138C','YGR138C','YOR273C','YOR273C','YDR074W','YDR074W','YOR256C','YER090W','YHR106W','YHR106W','YMR218C','YGR209C','YGR209C','YKR079C','YDR453C','YML100W','YCR084C','YPL207W','YGL050W','YOR295W','YKL210W','YDL064W','YLL039C','YLL039C','YKR098C','YMR304W','YOR124C','YOR124C','YER151C','YJL048C','YKL010C','YOR075W','YKL035W','YPL186C','YLR168C','YMR271C','YPR152C','YKR042W','YKR042W','YKL099C','YHR196W','YGR094W','YGL258W','YDR247W','YDR247W','YNL212W','YGL227W','YHR060W','YGR020C','YOR270C','YOR270C','YMR077C','YKR020W','YML041C','YAL002W','YLR181C','YFL004W','YFL004W','YIL173W','YOR359W','YBR241C','YBR241C','YOR229W','YGR194C','YDR369C','YJL141C','YJL141C','YAR068W','YPL239W','YER024W','YER024W','YBL053W','YBL062W','YBL070C','YBR016W','YBR016W','YBR206W','YBR238C','YBR284W','YDR325W','YNL154C','YNL154C','YER123W','YCR013C','YCR043C','YCR051W','YCR102C','YDL023C','YDL085C-A','YDL144C','YDL177C','YDR210W','YDR341C','YDR415C','YDR476C','YDR491C','YEL067C','YER034W','YER152C','YMR040W','YFL012W','YFR006W','YFR018C','YFR054C','YGL114W','YGL117W','YGL177W','YGL185C','YGL199C','YGL242C','YNL160W','YGR051C','YGR122W','YGR153W','YGR201C','YHR029C','YHL017W','YDR451C','YHR020W','YHR078W','YHR095W','YHR125W','YHR138C','YHR138C','YHR210C','YIL067C','YMR152W','YGL161C','YIR042C','YJL009W','YJL193W','YJL213W','YJR018W','YJR096W','YJR096W','YJR154W','YLR200W','YKL066W','YKR012C','YKR018C','YKR040C','YKR075C','YKL196C','YLL044W','YLL053C','YLR101C','YLR152C','YLR171W','YLR217W','YLR334C','YLR339C','YLR428C','YLR460C','YML096W','YMR086C-A','YMR099C','YFR049W','YNL011C','YNL013C','YNL033W','YNL043C','YNL058C','YNL109W','YNL208W','YNL234W','YNL234W','YNL276C','YNR025C','YOL107W','YOR097C','YGR281W','YOR102W','YOR338W','YOR387C','YFR003C','YPL044C','YPL229W','YPL264C','YPR099C','YPR126C','YPR197C','YDR002W','YOR272W','YJR099W','YJR099W','YJL056C','YFL052W','YFL052W','YGL255W'])

#RGT1
target_gene_set_name = 'RGT1'
tf_gene = 'YKL038W'
target_gene_set = set(['YKL038W',
                        'YIR019C','YHR094C','YHR094C','YMR011W','YMR011W','YMR011W','YDR345C','YHR092C','YHR092C','YDL194W','YDL194W','YIL162W','YIL162W',
                        'YKR076W','YHR094C','YLR303W','YPL187W','YGL089C','YKL178C','YOR132W','YKL177W','YKR075C'])
                       
#MSN4
target_gene_set_name = 'MSN4'
tf_gene = 'YKL062W'
target_gene_set = set(['YKL062W',
                        'YMR169C','YMR169C','YGL156W','YGL156W','YPR192W','YLR312C','YIL124W','YIL124W','YPL014W','YGR088W','YGR088W','YGR088W','YGR088W','YGR088W','YGR088W','YML070W','YPL223C','YPL223C','YFL014W','YFL014W','YDR406W','YER037W','YOR161C','YHR087W','YDL204W','YDL204W','YMR175W','YHR139C','YHR139C','YBR126C','YHR022C','YHR140W',
                        'YOL086C','YCR010C','YDR085C','YCL025C','YBR151W','YDL192W','YDR380W','YMR119W','YNL275W','YKR066C','YLR110C','YDR134C','YAL038W','YJL158C','YMR199W','YOL007C','YPR030W','YGR088W','YML101C','YML101C','YPL177C','YKL096W-A','YML054C','YOL052C-A','YMR173W','YBR078W','YAL003W','YGR254W','YHR174W','YGR071C','YGL012W','YOR388C','YGR052W','YER145C','YMR307W','YPR184W','YDR309C','YMR135C','YER054C','YCL040W','YPR035W','YHR183W','YDR508C','YKL152C','YKR067W','YHR104W','YPR008W','YDL223C','YOR227W','YMR032W','YMR251W-A','YOL155C','YFL014W','YJL159W','YBR072W','YBR072W','YCR021C','YDR171W','YDR258C','YDR258C','YPL240C','YFR053C','YNL037C','YJL153C','YOR226C','YKR061W','YGL009C','YLR438C-A','YNL006W','YBR298C','YBR299W','YOR228C','YKL085W','YJR010W','YGR249W','YIL051C','YLR439W','YKR052C','YGR014W','YKL062W','YDR277C','YGR055W','YNL036W','YNL200C','YJL212C','YPL171C','YMR174C','YKR097W','YNL289W','YIL050W','YLR044C','YGR087C','YOR328W','YPL058C','YJL210W','YBR168W','YCR012W','YKL127W','YOL084W','YDR490C','YGL037C','YIL122W','YBR167C','YMR297W','YPR103W','YJL079C','YKR093W','YOR347C','YOR368W','YNR018W','YIL057C','YNL180C','YOR018W','YPR065W','YER169W','YPR102C','YLR162W-A','YLR281C','YHR087W','YMR305C','YIL123W','YNL007C','YNCA0003W','YIL166C','YJR104C','YHR008C','YMR016C','YOR247W','YAL005C','YAL005C','YER103W','YBR294W','YNL066W','YOL020W','YJL052W','YJR009C','YGR192C','YGR192C','YBR083W','YLR327C','YNL300W','YGL096W','YGR138C','YDR074W','YDR453C','YML100W','YML100W','YDR084C','YGR072W','YDR400W','YKR042W','YKL099C','YHL028W','YOR229W','YAL004W','YAL037C-B','YAR035W','YBL029C-A','YBL113W-A','YBR085C-A','YBR298C-A','YCL042W','YCR013C','YLL055W','YDR133C','YDR524C-B','YER188C-A','YNL160W','YNL160W','YMR241W','YHR033W','YHR086W-A','YHR213W-B','YJR115W','YKR041W','YLR154W-E','YLR154W-F','YLR157C-B','YLR162W','YLR279W','YLR280C','YML100W-A','YML101C-A','YMR135W-A','YMR173W-A','YMR206W','YMR307C-A','YNL144C','YNL179C','YNL193W','YNL194C','YOR343C','YML027W','YPR063C','YNL241C'])

#HAP4
target_gene_set_name = 'HAP4'
tf_gene = 'YKL109W'
target_gene_set = set(['YKL109W',
                        'YPR145W','YJR048W','YFL018C','YKL148C','YKL141W',
                        'YBR158W','YBL099W','YLR295C','YPL271W','YDL004W','YDR377W','YOL077W-A','YPR020W','YBR039W','YKL016C','YBL045C','YLR038C','YGL191W','YGL187C','YNL052W','YHR051W','YMR256C','YLR395C','YDL067C','YOR065W','YHR143W','YNR067C','YDL066W','YDL181W','YPL270W','YDL005C','YPL272C','YBL030C','YNL055C','YKL015W','YDR529C','YEL024W','YGL028C','YIL104C','YGR104C','YDR322C-A','YBL029C-A','YBL029W','YBL044W','YEL025C','YGR017W','YHR050W-A','YLR297W','YOR064C','YNL054W-A'])

#ABF1
target_gene_set_name = 'ABF1'
tf_gene = 'YKL112W'
target_gene_set = set(['YKL112W',
                        'YPL111W','YAL038W','YHR051W','YHR174W','YCR012W','YJL166W',
                        'YAL054C','YMR120C','YOL086C','YNR044W','YGL032C','YDR214W','YHL021C','YHR199C','YMR092C','YBR110W','YBR211C','YPR128C','YKL135C','YHR018C','YMR033W','YDR384C','YML081C-A','YKL016C','YIL015W','YDR361C','YIL033C','YAL060W','YPR176C','YGR282C','YOL077C','YDL037C','YJL111W','YLR390W-A','YAL041W','YDR168W','YBR029C','YCL064C','YLR330W','YMR198W','YBR109C','YKL190W','YBR155W','YJL062W-A','YDL145C','YHR116W','YKL096W','YIR023W','YNR038W','YKL078W','YLR348C','YHR115C','YIR010W','YPR017C','YFR044C','YKL172W','YBL001C','YBR078W','YGL222C','YDR385W','YJL196C','YFL048C','YNL313C','YGR254W','YGL012W','YAL042W','YKR096W','YKL060C','YDR339C','YBR101C','YPR104C','YJR093C','YLR342W','YAL053W','YIL134W','YCL011C','YGL195W','YDR283C','YAL044C','YOR205C','YDR309C','YNL255C','YCL040W','YKR030W','YDL022W','YOR262W','YDL035C','YLR293C','YDR454C','YDR295C','YIL116W','YDR158W','YGL194C','YOL155C','YLR259C','YOL068C','YGR268C','YNL037C','YLR355C','YGL192W','YJR138W','YLR095C','YPL242C','YLR096W','YNL322C','YNL322C','YJL174W','YKL008C','YJL062W','YMR296C','YGL009C','YJR070C','YFL018C','YKL143W','YDR234W','YKL029C','YNL307C','YOL126C','YOL076W','YJR137C','YER091C','YDR461W','YDR296W','YKL195W','YGR235C','YJR077C','YNL085W','YGL106W','YIL051C','YDR245W','YJL183W','YDR366C','YHR162W','YDR405W','YBL038W','YJL063C','YNR036C','YPL013C','YNL306W','YOL116W','YDR194C','YGR055W','YHR086W','YGR232W','YNL036W','YOL104C','YIL048W','YBR212W','YDR456W','YHR077C','YOR056C','YOR206W','YNR053C','YOR310C','YNL183C','YDL105W','YJR062C','YBL079W','YDL116W','YDL193W','YHL029C','YBR129C','YJL002C','YHR179W','YKR065C','YER178W','YBL080C','YPL159C','YAL055W','YGR231C','YHR215W','YDL106C','YBR092C','YNL267W','YKL163W','YBL105C','YML107C','YAL023C','YMR129W','YNL055C','YMR297W','YBL041W','YDL039C','YPR178W','YAL043C','YKR093W','YGR253C','YGR169C','YKL015W','YHR074W','YEL037C','YNCL0012C','YDR195W','YDR028C','YNL312W','YFL047W','YBR030W','YPR018W','YGL107C','YMR061W','YNL072W','YJL026W','YMR200W','YJR063W','YBR154C','YDR404C','YKL144C','YIL018W','YHR200W','YOR261C','YOR116C','YLR340W','YOL039W','YOR117W','YNL213C','YBL014C','YPL012W','YCL031C','YLR162W-A','YGR056W','YNR037C','YDR233C','YMR060C','YPR129W','YBR080C','YNL287W','YDR363W-A','YKR029C','YOR057W','YBR263W','YLR058C','YDR422C','YBL007C','YNL243W','YBR077C','YBR266C','YIL147C','YFL017W-A','YLR025W','YDR011W','YNCE0011C','YNCL0042C','YNCL0043C','YNCL0041C','YNCI0001W','YJR104C','YDR356W','YGR059W','YNL189W','YDR312W','YBR283C','YLR452C','YHR064C','YDR410C','YFL026W','YDL130W-A','YIL162W','YNL244C','YJL176C','YGR129W','YMR005W','YBR069C','YJR116W','YML081W','YJL052W','YKL028W','YGR186W','YCR053W','YMR260C','YOR194C','YOR045W','YGR185C','YDL122W','YLR024C','YBR273C','YKL035W','YIL031W','YBL039C','YKL014C','YLR222C','YMR093W','YGR128C','YIL135C','YNL212W','YPR036W','YNL321W','YLR396C','YGL225W','YMR197C','YHL028W','YOR230W','YML007W','YHR161C','YAR009C','YBL028C','YBR012W-B','YBR238C','YBR284W','YPL087W','YDL023C','YDR034C-A','YDR098C-B','YDR194W-A','YDR261C-D','YDR316W-A','YDR365W-B','YDR444W','YER041W','YER137C-A','YER138C','YER138W-A','YFL002W-A','YGR027W-B','YGR038C-B','YGR151C','YGR161C-D','YGR161W-B','YGR259C','YGR234W','YHR078W','YIL060W','YIL134C-A','YJR027W','YJR029W','YKL030W','YKL196C','YLR035C-A','YLR157C-B','YLR162W','YLR227W-B','YLR317W','YLR349W','YML039W','YML040W','YML045W','YML045W-A','YMR045C','YMR046W-A','YMR050C','YMR051C','YMR085W','YHR090C','YNL054W-B','YNL284C-B','YOL036W','YOL103W-A','YOL103W-B','YPR028W','YOR192C-B','YPL257W-A','YPL257W-B','YPR136C','YPR158W-A','YPR158W-B','YNL339C','YDR326C','YGR270W'])

#ASH1
target_gene_set_name = 'ASH1'
tf_gene = 'YKL185W'
target_gene_set = set(['YKL185W',
                        'YGL173C',
                        'YDR226W','YDR524C','YBR068C','YGR217W','YHR166C','YDR054C','YJL194W','YGR189C','YNR010W','YOL007C','YKL096W','YMR238W','YJL196C','YLR300W','YIL019W','YMR306W','YOR168W','YLR243W','YMR108W','YDL182W','YIL046W','YDR461W','YNR059W','YHR162W','YGR220C','YMR122W-A','YDR383C','YLR285W','YOR310C','YNL289W','YNR045W','YMR123W','YIL122W','YDR452W','YDL055C','YDR055W','YOL114C','YDR527W','YDL135C','YHR177W','YMR121C','YBL027W','YLR344W','YER021W','YDR447C','YLR141W','YMR305C','YDR077W','YOL113W','YGR229C','YIL161W','YDR525W-A','YJL004C','YML100W','YKR042W','YOR270C','YOL105C','YBR259W','YLR272C','YDR524C-B','YDR524W-C','YGR054W','YDR451C','YKR041W','YML020W','YMR315W-A','YNCK0019W','YGR281W','YPR063C','YKL214C','YOR087W'])




#output_figure_name = ('C:/Gresham/tiny_projects/Project_Grace/figures/CNV_map.pdf')
copy_number_filename = ('C:/Gresham/tiny_projects/Project_Grace/relative_depth_DNA_corrected_v3.txt')
df = pd.read_table(copy_number_filename, index_col=0)
cn_dict = df.to_dict('index')

strain_list = list(cn_dict['YKR039W'].keys())
strain_list.sort()

strain_list = ['DGY1728','DGY1734','DGY1736','DGY1740','DGY1744','DGY1747','DGY1751']
#We need to populate the gene list with those genes that are detected in every strain - 
# otherwise there will be misalignement on between genes between strains on the global heatmap

gene_count_strain = {}

for istype in ['Obs']:
    for evo_strain in strain_list:
        if evo_strain != 'DGY1657':
            deseq_results_filename =  ('C:/Gresham/tiny_projects/Project_Grace/DESeq_{}_DGY1657_{}.txt').format(istype, evo_strain)
            deseq_results_file = open(deseq_results_filename)
            df = pd.read_table(deseq_results_filename, index_col=0)
            deseq_results = df.to_dict('index')
            
            for gene in deseq_results:
                if gene[0] == 'Y' and gene in target_gene_set:
                    if gene not in gene_count_strain:
                        gene_count_strain[gene] = set()
                        
                    gene_count_strain[gene].add(evo_strain)
                    
                    
complete_gene_list = set()
strain_max = 0

for gene in gene_count_strain:
    #if len(gene_count_strain[gene]) >= strain_max:
    #    strain_max = len(gene_count_strain[gene])
    
    #if len(gene_count_strain[gene]) >= 7:
    complete_gene_list.add(gene)
    #else:
    #    print(gene)
                         

# for gene in complete_gene_list:
#     if gene[0] == 'Y':
#         if gene not in gene_count_strain:
#             gene_count_strain[gene] = True
            
#         for strain in strain_list:
#             if strain not in  cn_dict[gene]:
#                 gene_count_strain[gene] = False
#                 1/0
y_list = []
for gene in complete_gene_list:
    y_list.append(gene)

exp = []

for strain in strain_list:
    exp_sub = []
    for gene in y_list:
        exp_sub.append(round(cn_dict[gene][strain]))
    exp.append(exp_sub)
        
fig = go.Figure(data=go.Heatmap(
                   z=exp,
                   x=y_list,
                   y=strain_list,
                   hoverongaps = False,
                   colorscale= 'Reds'))
fig.show()
#
obs_dict = {}

for evo_strain in strain_list:
    if evo_strain != 'DGY1657':
        deseq_results_filename =  ('C:/Gresham/tiny_projects/Project_Grace/DESeq_{}_DGY1657_{}.txt').format('Obs', evo_strain)
        deseq_results_file = open(deseq_results_filename)
        df = pd.read_table(deseq_results_filename, index_col=0)
        deseq_results = df.to_dict('index')
    
        if tf_gene in deseq_results:
            obs_dict[evo_strain] = deseq_results[tf_gene]
        else:
            obs_dict[evo_strain] = {}

#
log2fc_dict = {}
sig_dict = {}

runtype = 'Exp'

for istype in [runtype]:
    for evo_strain in strain_list:
        if evo_strain != 'DGY1657':
    #evo_strain = 'DGY1728'
            evo_is_high = True
            deseq_results_filename =  ('C:/Gresham/tiny_projects/Project_Grace/DESeq_{}_DGY1657_{}.txt').format(istype, evo_strain)
            deseq_results_file = open(deseq_results_filename)
            df = pd.read_table(deseq_results_filename, index_col=0)
            deseq_results = df.to_dict('index')
            
            for gene in deseq_results:
                if gene in target_gene_set:
                    if gene not in log2fc_dict:
                        log2fc_dict[gene] = {}
                        sig_dict[gene] = {}
                        
                    #if deseq_results[gene]['padj'] <= 0.05:
                    log2fc_dict[gene][evo_strain] = deseq_results[gene]['log2FoldChange']
                    sig_dict[gene][evo_strain] = deseq_results[gene]['padj']
                    #else:
                    #    log2fc_dict[gene][evo_strain] = 0

genes_dict = {}
#noncnv_genes_dict = {}

for gene in log2fc_dict:
    for strain in strain_list:
        if strain not in genes_dict:
            genes_dict[strain] = {}
        
        gene_cn = ('{}_copyn').format(gene)
        gene_fc = ('{}_log2FC').format(gene)
        gene_p = ('{}_padj').format(gene)
        
        if gene_cn not in genes_dict[strain]:
            genes_dict[strain][gene_cn] = np.nan
            
        if gene_fc not in genes_dict[strain]:
            genes_dict[strain][gene_fc] = np.nan
            
        if gene_p not in genes_dict[strain]:
            genes_dict[strain][gene_p] = np.nan
            
        genes_dict[strain][gene_cn] = cn_dict[gene][strain]
        
        if strain in log2fc_dict[gene]:                
            genes_dict[strain][gene_fc] = (log2fc_dict[gene][strain])
                    
        if strain in sig_dict[gene]:
            genes_dict[strain][gene_p] = sig_dict[gene][strain]
            

genes_df = pd.DataFrame.from_dict(genes_dict, orient='index')

output_figure_name = ('C:/Gresham/tiny_projects/Project_Grace/Review_GB/TF_analysis/'
                      '{target_gene_set_name}_{runtype}_targeted_expression.csv').format(
                          target_gene_set_name = target_gene_set_name,
                          runtype = runtype)
genes_df.to_csv(path_or_buf=output_figure_name, na_rep = np.nan)

tf_cnv_strain_set = set()
tf_cnn_strain_set = set()

tf_FET = {'cnv_sig':[],
          'cnv_not':[],
          'cnn_sig':[],
          'cnn_not':[]}
#for cnv
# for strain in cn_dict[tf_gene]:
#     cn = cn_dict[tf_gene][strain]
#     if cn > 1:
#         tf_cnv_strain_set.add(strain)
#     else:
#         tf_cnn_strain_set.add(strain)
        
#for sig
for strain in strain_list:
    sig = 1
    
    if strain in obs_dict:
        if 'pvalue' in obs_dict[strain]:
            sig = obs_dict[strain]['pvalue']
    
    if sig <= 0.05:
        tf_cnv_strain_set.add(strain)
    else:
        tf_cnn_strain_set.add(strain)
        
for gene in target_gene_set:
    if gene in sig_dict:
        for strain in sig_dict[gene]:
            
            if strain in tf_cnv_strain_set:
                cn_is_cnv = True
            else:
                cn_is_cnv = False
                
            if (sig_dict[gene][strain]) <= 0.05:
                sig_is = True
            else:
                sig_is = False
                
            if gene != tf_gene:
                if cn_is_cnv and sig_is:
                    tf_FET['cnv_sig'].append(gene)
        
                if cn_is_cnv and not sig_is:
                    tf_FET['cnv_not'].append(gene)      
                    
                if not cn_is_cnv and sig_is:
                    tf_FET['cnn_sig'].append(gene)
            
                if not cn_is_cnv and not sig_is:
                    tf_FET['cnn_not'].append(gene)
                    
for strain in obs_dict:
    cn_is_cnv = cn_dict[tf_gene][strain]        
    print(strain, cn_is_cnv, obs_dict[strain])
        
#tf_FET
a = len(tf_FET['cnv_sig'])
b = len(tf_FET['cnv_not'])
c = len(tf_FET['cnn_sig'])
d = len(tf_FET['cnn_not'])
print(a, b, c, d)
results = fisher_exact([[a,b],[c,d]])
print(results)
print(tf_cnv_strain_set)


#define Jaccard Similarity function
def jaccard(list1, list2):
    intersection = len(list(set(list1).intersection(list2)))
    union = (len(list1) + len(list2)) - intersection
    return float(intersection) / union

#find Jaccard Similarity between the two sets 



a = ['DGY1747','DGY1744','DGY1728']
b = ['DGY1728'] 

jaccard(a, b)
