import time
from parts2014_gfpvar.tools.common import *
from parts2014_gfpvar.analysis.img.util import *


""" Read in image output file. Create and return an array of image objects, each associated with cell objects that will be further populated
@param filename .csv file with image data, usually named /path/to/data/DefaultOUT_Image.csv
@param experiment name of overall experiment
@param screen name of screen within experiment
@param plate plate for the image
@return [CellImage object] , where [image ID] = object for corresponding image
"""
def read_image_file(filename, experiment, screen, plate):
    headers,data = read_csv(filename, ",")
    col_images = headers.index("ImageNumber") # get column for image ID
    images = [None]*(max(map(int, data[:,col_images])) + 1) # create array for images
    
    for i in range(len(data)): # populate the array with CellImage objects. 
        images[int(data[i,col_images])] = cCellImage(experiment=experiment, screen=screen, plate=plate, headers=headers, image_data=data[i])

    return images


""" Read in nucleus objects and their measurements from the nucleus file.
@param filename .csv file with nucleus data, usually named DefaultOUT_1_Nuclei.csv
@param images list of CellImage objects that can be indexed by image ID
@param setname global name for the set of features extracted from this file; should reflect the contents of file
@param index_col_name column name that indexes the cells in the image. For nucleus-related features, this is ObjectNumber. For others, this ought to be the parent that refers to a nucleus-based feature
@effect modifies CellImage objects in the images list to populate individual Cells with nucleus data
"""
def read_object_file(filename, images, setname="nucleus", index_col_name="ObjectNumber", index="nucleus"):
    headers, data = read_csv(filename, ',')
    col_img = headers.index("ImageNumber")
    col_obj = headers.index(index_col_name)
    
    for i in range(len(data)): # for each object in the data file
        exists = False
        img, obj = int(data[i, col_img]), int(data[i, col_obj])
        if index == "cell":
            exists = (obj in images[img].cell_index)
            if exists:  obj = images[img].cell_index[obj] # if indexing is done via cell number, get the right index from the CellImage object
        else:
            exists = obj in images[img].nucleus_index
            
        if exists: # if this object is actually tracked
            images[img].cells[obj].update(setname, headers, data[i]) # write the data from this file to the cell's list of features
        
    return images


""" Read information about GFP objects, and store summary statistics for the cells in images.
@param filename input file with data on GFP objects
@param images list of CellImage objects that each have lists of cells as well
@effect modifies cells in the images list to median GFP object features in its feature list, as well as number of objects, largest object size, brightest object brightness, variation in the intensity
"""
def read_gfpobjects(filename, images):
    headers, data = read_csv(filename, ",")
    col_parent = headers.index("Parent_2_CellsFromNuclei")
    col_img = headers.index("ImageNumber")

    # 0. create a place to store all the GFP objects for now - don't want to overload the cell objects with all of them, will collect first, and store summaries
    gfpobj = {} 
    for i in range(1,len(images)):
        gfpobj[i] = {}
        for j in range(1,len(images[i].cells)):
            gfpobj[i][j] = []

    # 1. populate the GFP object features for each cell and image
    for i in range(len(data)): 
        img = int(data[i, col_img])
        parent_i = int(data[i, col_parent])
        gfpobj[img][images[img].cell_index[parent_i]].append(map(float, data[i]))

    # 2. Calculate summaries for each feature for each cell in every image
    col_size = headers.index("AreaShape_Area")
    col_raw = headers.index("Intensity_IntegratedIntensity_0_Raw_protein_GFP")
    col_res = headers.index("Intensity_IntegratedIntensity_02_Rescaled_protein_GFP")
    for i in gfpobj: # for each image and cell
        for j in gfpobj[i]:
            if j not in images[i].nucleus_index.values(): continue # skip cells that are no longer tracked for whatever filtering reason
            objs = SP.array(gfpobj[i][j]) # turn to arrays to take medians etc more easily
            for h in range(len(headers)): # calculate and store medians for each feature
                if len(objs) == 0: images[i].cells[j].features["gfpobj_%s_median"%headers[h]] = SP.nan
                else: images[i].cells[j].features["gfpobj_%s_median"%headers[h]] = SP.median(objs[:,h])
            images[i].cells[j].features["gfpobj_count"] = len(objs) # extra features I think are interesting
            if len(objs) > 0:
                images[i].cells[j].features["gfpobj_largest"] = max(objs[:,col_size])
                images[i].cells[j].features["gfpobj_brightest"] = max(objs[:,col_raw])
                images[i].cells[j].features["gfpobj_varintensity"] = var(objs[:,col_res])
            else:
                images[i].cells[j].features["gfpobj_largest"] = SP.nan
                images[i].cells[j].features["gfpobj_brightest"] = SP.nan
                images[i].cells[j].features["gfpobj_varintensity"] = SP.nan
            
    return images


""" Calculate log-ratio of total GFP intensity inside nucleus and outside
@param images list of CellImage objects with populated Cell objects
@effect updates each cell object with additional feature of log-ratio
"""
def calculate_gfp_nucleus_overlaps(images):
    intensity_feature = "Intensity_IntegratedIntensity_02_Rescaled_protein_GFP"
    for i in images[1:]:
        for ci in i.nucleus_index.values():
            cell = i.cells[ci]
            cell.features["cell_nuclear-log10-gfp-ratio"] =  SP.log10(cell.features["gfp-in-nucleus_%s"%intensity_feature])
            cell.features["cell_nuclear-log10-gfp-ratio"] -= SP.log10(cell.features["gfp-outside-nucleus_%s"%intensity_feature])


""" Combine CP output files (features in all channels and objects) in a given plate and experiment into cCellImage objects
@param outdir CellProfiler output directory
@param experiment name of the experiment every output image object is endowed with
@param screen screen within the experiment every output image object is endowed with
@param plate name of the plate within experiment each image object is endowed with
@return list of cCellImage objects with cells in each image having features from all CellProfiler output files
@requires files in outdir:  *Image*.csv, *1_Nuclei.csv, *2_CellsFromNuclei.csv, *21_GFP_in_nucleus.csv, *21_GFP_outside_nucleus.csv, *3_GFP_objects.csv
"""
def combine_cp_output_files(outdir, experiment, screen, plate):
    t = time.time()
    LOG.debug("Creating combined CP output from %s"%outdir)
    # 1. create images
    images = read_image_file(filename=glob.glob("%s/*Image*.csv"%outdir)[0], experiment=experiment, screen=screen, plate=plate)
    # 2. fill in nuclei
    read_object_file(filename=glob.glob("%s/*1_Nuclei.csv"%outdir)[0], images=images, setname="nucleus", index_col_name="ObjectNumber")
    # 3. update cells
    read_object_file(filename=glob.glob("%s/*2_CellsFromNuclei.csv"%outdir)[0], images=images, setname="cell", index_col_name="Parent_2_ShrunkenNuclei")
    for i in images[1:]:
        if i is not None: i.update_cellindex() # update the mapping of cells->objects just in case
    # 4. update GFP in and out of nucleus, calculate log-ratio of intensity in and out of nucleus
    read_object_file(filename=glob.glob("%s/*21_GFP_in_nucleus.csv"%outdir)[0], images=images, setname="gfp-in-nucleus", index_col_name="Parent_2_CellsFromNuclei", index="cell")
    read_object_file(filename=glob.glob("%s/*21_GFP_outside_nucleus.csv"%outdir)[0], images=images, setname="gfp-outside-nucleus", index_col_name="Parent_2_CellsFromNuclei", index="cell")
    calculate_gfp_nucleus_overlaps(images)
    # 5. update GFP objects
    read_gfpobjects(filename=glob.glob("%s/*3_GFP_objects.csv"%outdir)[0], images=images)
    LOG.debug("Done combining, time=%.1f"%(time.time() - t))

    return images


""" Write a single output file for all images in given list to outfilename (in append mode)
@param images list of cCellImage objects
@param outfilename output file
@param max_cells how many maximum cells per image output (large number of cells can mean empty image with random segmentation)
@param min_cellsize minimum size of output cells in pixels
@param max_cellsize maximum size of output cells in pixels
@param write_header whether to write column names
"""
def write_combined_cp_output(images, outfilename, max_cells=400, min_cellsize=250, max_cellsize=3500, write_header=False):
    ofh = file(outfilename, 'a')
    features = images[1].cells[images[1].nucleus_index.values()[0]].features.keys()
    if write_header: ofh.write("Experiment\tScreen\tPlate\tWell\tImage_number\tCell_number\t%s\n"%("\t".join(features))) # create header
    
    for i in images:
        if (i is None) or (i.cells is None) or (len(i.cells) == 0) or len(i.cells) > max_cells: continue # skip empty images and ones with too many cells
        for ni in i.nucleus_index.values():
            c = i.cells[ni]
            if (c.features['cell_AreaShape_Area'] > max_cellsize) or (c.features['cell_AreaShape_Area'] < min_cellsize): continue # skip huge and tiny things
            ofh.write("%s\t%s\t%s\t%s\t%d\t%d"%(i.experiment, i.screen, i.plate, i.well, i.image_number, c.cell_number))
            for f in features:
                ofh.write("\t%.3e"%(c.features[f]))
            ofh.write("\n")
    ofh.close()


""" Combine results from multiple CellProfiler runs into a single output file
@param basedir directory that holds directories for all outputs. Files will be looked for in <basedir>/*/*.csv
@param outfilename output file to write combined data to
@param experiment experiment name
@param screen name of screen within experiment
@param plate plate name
"""
def combine_cp_batches(basedir, outfilename, experiment, screen, plate):
    for i,indir in enumerate(glob.glob("%s/*/*Image*.csv"%basedir)):
        dirs = indir.split("/")
        set = dirs[-2]
        indir = "/".join(dirs[0:-1])
        write_combined_cp_output(combine_cp_output_files(indir, experiment=experiment, screen=screen, plate=plate), outfilename, write_header=(i == 0))


def test():
    images = combine_cp_output_files("/Users/leopold/data/projects/gfpvar/img/training/sets/annotation/vacuole/out", "test-experiment", "test-plate")
    write_combined_cp_output(images, "test.tab")
