# -*- coding: utf-8 -*-
"""
Created on Wed Mar 15 11:47:39 2023

@author: pspea
"""
import pandas as pd
import scipy.stats as stats

chromo_dict = {'chrV':576874,
               'chrXI':1078177} 

ncRNA_filename =  ('C:/Gresham/tiny_projects/Project_Grace/Review_GB/ncRNA/ncRNA.tsv')
ncRNA_file = open(ncRNA_filename)
df = pd.read_table(ncRNA_file, index_col=0)
ncrna_results = df.to_dict('index')


def overlap_test(nc_start, nc_stop, cnv_start, cnv_stop):
    #left boundary
    if nc_start <= cnv_start and nc_stop >= cnv_start:
        return(True)
    
    #right boundary
    if nc_start <= cnv_stop and nc_stop >= cnv_stop:
        return(True)
    
    #nc_inside
    if nc_start >= cnv_start and nc_stop <= cnv_stop:
        return(True)
    
    #cnv_inside
    if cnv_start >= nc_start and cnv_stop <= nc_stop:
        return(True)
    
    return(False)

def test_ncrna(cnv, chromo, trna_filter):
    global chromo_dict, ncrna_results 
    cnv_hit = 0
    cnv_miss = 0
    
    cnv_nt = cnv["stop"]-cnv["start"]

    for sgd_id in ncrna_results:
        if ncrna_results[sgd_id]['chromosome'] == chromo:
            
            process = True
            
            if trna_filter:
                process = False
            
            if 'trna' not in ncrna_results[sgd_id]['rna_type']:
                process = True
            
            if process:
                nc_start = ncrna_results[sgd_id]['start']
                nc_stop = ncrna_results[sgd_id]['stop']
                cnv_start = cnv["start"]
                cnv_stop = cnv["stop"]
                
    
                if overlap_test(nc_start, nc_stop, cnv_start, cnv_stop):
                    cnv_hit += 1
                    print(sgd_id, ncrna_results[sgd_id]['Systematic_name'])
                else:
                    cnv_miss += 1
         
    data = [[cnv_hit,cnv_nt],[cnv_miss, chromo_dict[chromo]]]
    stat, pval = stats.fisher_exact(data)
    print('FET_statistic p-value')
    print(stat, pval)
    

#glc7_yer137c
chromo = 'chrV'
cnv = {'start':431490, "stop":449320}

test_ncrna(cnv, chromo, False)
test_ncrna(cnv, chromo, True)

#ComQuad
chromo = 'chrXI'
cnv = {'start':498246, "stop":530596}

test_ncrna(cnv, chromo, False)
test_ncrna(cnv, chromo, True)

#Trip1
chromo = 'chrXI'
cnv = {'start':441064, "stop":527730}

test_ncrna(cnv, chromo, False)
test_ncrna(cnv, chromo, True)

#ComSup
chromo = 'chrXI'
cnv = {'start':396107, "stop":514409}

test_ncrna(cnv, chromo, False)
test_ncrna(cnv, chromo, True)

#Trip2
chromo = 'chrXI'
cnv = {'start':474768, "stop":634236}

test_ncrna(cnv, chromo, False)
test_ncrna(cnv, chromo, True)

#Sup
chromo = 'chrXI'
cnv = {'start':436093, "stop":505002}

test_ncrna(cnv, chromo, False)
test_ncrna(cnv, chromo, True)

#ComTrip
chromo = 'chrXI'
cnv = {'start':485707, "stop":613813}

test_ncrna(cnv, chromo, False)
test_ncrna(cnv, chromo, True)