#!/usr/bin/env python
import numpy as np
from scipy.stats import fisher_exact
import os
os.chdir('/data/reddylab/projects/GGR/results/integrative/enhancers_by_EP300_analysis/iter0/nGRE/')

def find_num_nGREs_in_file(inf):
    with open(inf, 'r') as f:
        num_nGREs = np.array([int(line.strip().split()[1]) for line in f])
    
    return(num_nGREs)

activated_IR0_nGRE_matches = find_num_nGREs_in_file('activated.IR0_nGRE_matches.txt')
activated_IR1_nGRE_matches = find_num_nGREs_in_file('activated.IR1_nGRE_matches.txt')
activated_IR2_nGRE_matches = find_num_nGREs_in_file('activated.IR2_nGRE_matches.txt')
        
repressed_IR0_nGRE_matches = find_num_nGREs_in_file('repressed.IR0_nGRE_matches.txt')
repressed_IR1_nGRE_matches = find_num_nGREs_in_file('repressed.IR1_nGRE_matches.txt')
repressed_IR2_nGRE_matches = find_num_nGREs_in_file('repressed.IR2_nGRE_matches.txt')

static_IR0_nGRE_matches = find_num_nGREs_in_file('static.IR0_nGRE_matches.txt')
static_IR1_nGRE_matches = find_num_nGREs_in_file('static.IR1_nGRE_matches.txt')
static_IR2_nGRE_matches = find_num_nGREs_in_file('static.IR2_nGRE_matches.txt')

A = activated_IR0_nGRE_matches.sum()
B = len(activated_IR0_nGRE_matches) - A
C = repressed_IR0_nGRE_matches.sum()
D = len(repressed_IR0_nGRE_matches) - C

_,p = fisher_exact([[A,B],[C,D]])

A = activated_IR1_nGRE_matches.sum()
B = len(activated_IR1_nGRE_matches) - A
C = repressed_IR1_nGRE_matches.sum()
D = len(repressed_IR1_nGRE_matches) - C

_,p = fisher_exact([[A,B],[C,D]])

A = activated_IR2_nGRE_matches.sum()
B = len(activated_IR2_nGRE_matches) - A
C = repressed_IR2_nGRE_matches.sum()
D = len(repressed_IR2_nGRE_matches) - C

_,p = fisher_exact([[A,B],[C,D]])

repressed_IRall_nGRE_matches = (repressed_IR0_nGRE_matches+repressed_IR1_nGRE_matches+repressed_IR2_nGRE_matches)
repressed_IRall_nGRE_matches[repressed_IRall_nGRE_matches > 1] = 1
perc = 100 *float(repressed_IRall_nGRE_matches.sum())/len(repressed_IRall_nGRE_matches)
print "%0.2f%% of repressed enhancers have either IR0, IR1, or IR2 nGREs"%(perc)

activated_IRall_nGRE_matches = (activated_IR0_nGRE_matches+activated_IR1_nGRE_matches+activated_IR2_nGRE_matches)
activated_IRall_nGRE_matches[activated_IRall_nGRE_matches > 1] = 1
perc = 100 *float(activated_IRall_nGRE_matches.sum())/len(activated_IRall_nGRE_matches)
print "%0.2f%% of activated enhancers have either IR0, IR1, or IR2 nGREs"%(perc)

static_IRall_nGRE_matches = (static_IR0_nGRE_matches+static_IR1_nGRE_matches+static_IR2_nGRE_matches)
static_IRall_nGRE_matches[static_IRall_nGRE_matches > 1] = 1
perc = 100 *float(static_IRall_nGRE_matches.sum())/len(static_IRall_nGRE_matches)
print "%0.2f%% of static enhancers have either IR0, IR1, or IR2 nGREs"%(perc)
print

A = repressed_IRall_nGRE_matches.sum()
B = len(repressed_IRall_nGRE_matches) - A
C = activated_IRall_nGRE_matches.sum()
D = len(activated_IRall_nGRE_matches) - C

_,p = fisher_exact([[A,B],[C,D]])
print "Fisher's Exact Test, repressed vs. activated, proportion w/ nGREs, OR = %0.2f, p = %0.2e"%(_,p)

A = activated_IRall_nGRE_matches.sum()
B = len(activated_IRall_nGRE_matches) - A
C = static_IRall_nGRE_matches.sum()
D = len(static_IRall_nGRE_matches) - C

_,p = fisher_exact([[A,B],[C,D]])
print "Fisher's Exact Test, activated vs. static, proportion w/ nGREs, OR = %0.2f, p = %0.2e"%(_,p)

A = repressed_IRall_nGRE_matches.sum()
B = len(repressed_IRall_nGRE_matches) - A
C = static_IRall_nGRE_matches.sum()
D = len(static_IRall_nGRE_matches) - C

_,p = fisher_exact([[A,B],[C,D]])
print "Fisher's Exact Test, repressed vs. static, proportion w/ nGREs, OR = %0.2f, p = %0.2e"%(_,p)