import glob
import pylab as PL
import scipy as SP
import scipy.stats as ST
from gfpvar.tools.common import *
from gfpvar.analysis.cyto.util import *
from gfpvar.analysis.cyto.fcs import read_plate_fcs_random_data
from gfpvar.analysis.img.util import read_combined_file
from gfpvar.analysis.mrna.util import *
from gfpvar.analysis.tecan.util import *
from gfpvar.tools.io import *
import pdb

TRAIT_NAMES = ["mean(log10(GFP))", "median(log10(GFP))", "var(log10(GFP))", "mean(GFP)", "median(GFP)", "var(GFP)"]
TRAIT_DEVIATION_LIMITS = {}
for t in TRAIT_NAMES: TRAIT_DEVIATION_LIMITS[t] = 0.2*(1 + 4.*t.count("var")) # allow more lenience for variance

meta, slice, parentals, orfs, plates, header = read_cyto_tab("%s/summary/cyto/Pilot_screen_BYxRM/paper_haploid_annotated.tab"%DATA_DIR, normalise=False, return_header=True, includes_all=False, includes_flag=True, log_linear=False)
meta[SP.where(meta[:,2] == "P4CD")[0], 2] = "Bad plate"
for i in range(len(meta)):
    if meta[i,2].count("AB") + meta[i,2].count("CD") > 0:
        meta[i,2] = meta[i,2][0:-2]



def get_screens_plate_data(min_cells=500, trait=3, screens=["haploid-R1", "haploid-R2"], align=True):
    values, used_orfs = {}, {}
    
    for p in ["P%d"%i for i in range(1,12)]: # for each plate
        vals, uorfs = [], [] # plate values and plate orfs

        # 1. Get indices corresponding to the plates and screens
        I1,I2 = SP.zeros(len(meta), bool), SP.zeros(len(meta), bool)
        for screen in screens:
            I1 = I1 | ((meta[:,1] == screen) & (meta[:,5] == "BY") & (meta[:,2] == p) & (meta[:,7] == "OK")) # Filter the two replicate screens to values from this plate
            I2 = I2 | ((meta[:,1] == screen) & (meta[:,5] == "RM") & (meta[:,2] == p) & (meta[:,7] == "OK"))
        I1, I2 = SP.where(I1)[0], SP.where(I2)[0]

        # 2. # Retain ORFs are ones that have designation ok, and have data for both parents
        ok_orfs = list(set(meta[I1,4]) & set(meta[I2,4])) 
        if len(I1)*len(I2) == 0: # if nont for this plate, skip the plate
            LOG.debug("Weird - no entries for screens %s, plate %s"%(str(screens), p))
            continue 

        # 3. Calculate stats for the good ORFs.
        for o in ok_orfs: # for each OK orf
            i1,i2 = SP.where(meta[I1,4] == o)[0][0], SP.where(meta[I2,4] == o)[0][0] # get its index in the dataset
            c1,c2 = slice[I1[i1],6], slice[I2[i2],6] # get counts
            if min(c1,c2) < min_cells:
                #LOG.debug("Skipping %s for screens %s - %d/%d cells for BY/RM, %d needed"%(o, str(screens), c1, c2, min_cells))
                continue # Do not consider ORFs that don't have enough cells observed
            vals.append([slice[I1[i1], trait], slice[I2[i2], trait]]) # Trait 3 = mean GFP; trait 5 = var(gfp)
            uorfs.append(o)

        # 4. done with creating indices and processing ORFs, store results
        values[p], used_orfs[p] = SP.array(vals), SP.array(uorfs)
        values[p] = values[p] - SP.median(values[p], axis=0) + values[p].mean() # normalise mean
        if align: values[p] = SP.array([align_vals(values[p][:,0], values[p][:,1]), values[p][:,1]]).T
            
    return values, used_orfs





def get_raw_microscopy_data(plates):
    img = SP.loadtxt("%s/summary/paper/table_S2-img.tab"%DATA_DIR, delimiter="\t", dtype=object)
    data = {}
    for plate in plates: # read in all plates
        I = SP.where(img[:,1] == str(plate))[0]
        data[plate] = SP.zeros([16,24,20], float)*SP.nan
        for i in I:
            well = img[i,2]
            r = ord(well[0]) - ord('A')
            c = int(well[1:]) - 1
            data[plate][r,c] = map(float, img[i][3:])
    return data # separate values into individual ones


def get_cyto_well_orf(screen):
    header, meta, slice, all = read_cyto_tab("%s/summary/cyto/Pilot_screen_BYxRM/clean_haploid.tab"%DATA_DIR)
    result = {}
    for (e,s,pl,w,o,pa,ploidy) in meta:
        if s == screen:
            result[w] = o
    return result
#get_figure_S4_data()


def main():
  return
  get_parent_plate_data(min_cells=500, trait=3, parent="RM")
    #figure_S2a()
  figure_S2c()
  return
  figure_S2c()
  if True:
    for trait in [3,5]:
        for parent in ("RM", "BY"):
            figure_S2a(200,trait,parent)


if __name__ == '__main__':
    main()
