import os
import sys
import glob
import scipy as SP
import pdb
from parts2014_gfpvar.tools.common import *

#BATCH_ANALYSIS_SCRIPT = "/home/lparts/prog/projects/gfpvar/analysis/img/run_cluster.py"
DIR_BASE = "/home/morphology/shared/lparts/data/projects/gfpvar"
if os.popen("hostname").next().strip() == "can1.local": DIR_BASE = "/Users/leopold/data/projects/gfpvar"
DIR_SCAN_DATA = "%s/cluster"%DIR_BASE
DIR_CP_SCRIPTS = "%s/cp_scripts"%DIR_BASE
OUTDIR = "%s/img/cp_output"%DIR_BASE
HOMEPATH = "lparts@bc2.ccbr.utoronto.ca:/home/morphology/shared/lparts/data/projects/gfpvar/img/cp_output/pilot_base"


class cCellImage:

    def __init__(self, experiment, screen, plate, image_data, headers):
        self.experiment = experiment
        self.screen = screen
        self.plate = plate
        col_path = headers.index("PathName_0_Raw_Media")
        col_filename = headers.index("FileName_0_Raw_Media")
        col_imagen = headers.index("ImageNumber")
        col_nuclei = headers.index("Count_1_Nuclei")
        
        self.image_number = int(image_data[col_imagen])
        self.filename = image_data[col_path] + image_data[col_filename]
        row = chr(ord('A') + int(image_data[col_filename][0:3]) - 1)
        col = int(image_data[col_filename][3:6])
        self.well = "%s%d"%(row, col)
        self.id = "%s_%s_%s_%s_img-%d"%(experiment, plate, screen, self.well, self.image_number)

        self.cells = [cCellObject(self, i) for i in range(int(float(image_data[col_nuclei])) + 1)] # list of cells for this image
        self.cell_index = {} # map of cell object number to index in cells array. Should be identity, but in case it breaks down, there's this index
        self.nucleus_index = {} # map of nucleus object number to index in cells array. Should be identity, but in case it breaks down, there's this index
        for c, cell in enumerate(self.cells):
            if c != 0:
                self.nucleus_index[cell.cell_number] = c


    """ Populate the index of cell number -> index in the CellImage.cells list"""
    def update_cellindex(self):
        for i,cell in enumerate(self.cells):
            if i == 0: continue

            if "cell_ObjectNumber" in cell.features:
                self.cell_index[cell.features["cell_ObjectNumber"]] = i
            else:
                self.nucleus_index.pop(cell.features["nucleus_ObjectNumber"])


class cCellObject:
    def __init__(self, parent_image, cell_number):
        self.parent_image = parent_image
        self.parent_image_number = parent_image
        self.cell_number = cell_number
        self.id = "%s_cell-%d"%(self.parent_image.id, self.cell_number)
        self.features = {}

    
    def update(self, set, headers, data):
        if self.cell_number == 0: return
        if len(headers) != len(data): pdb.set_trace()
        for i in range(len(headers)):
            self.features[set + "_" + headers[i]] = float(data[i]) # only FP features


""" Read data matrix from combined CP output
@param screen name of screen in experiment
@param experiment name of experiment
@param trainingset if not None, the order of features in given training set is used to order the features
@param n_meta_col number of metadata columns in the matrix
@require metadata columns are the leading ones
@return NxF data matrix, NxM metadata matrix, length-F list of data feature strings, length-M list of metadata feature strings
"""
def read_combined_cp_matrix(screen, experiment="Pilot_screen_BYxRM", trainingset="training_small", pipeline="pilot_base_smoothfirst", n_meta_col=4):
    LOG.debug("Reading combined CP matrix for screen %s"%screen)
    features, data0 = read_csv('%s/img/combined/%s/%s/combined_thin.tab'%(DATA_DIR, experiment, screen))
    original_features = file("%s/img/training/sets/%s/trainingset.tab"%(DATA_DIR, trainingset)).next().strip().split('\t')[1:]

    data = SP.zeros([data0.shape[0], data0.shape[1] - n_meta_col]) # first columns are metadata
    if trainingset is not None: # if trainingset given
        for f in range(len(original_features)): # put features in the same order as training data so classifiers can work
            data[:,f] = data0[:,features.index(original_features[f])]
    else: # else just copy data over
        data[:,:] = data0[:,n_meta_col:]
        
    meta = data0[:,0:n_meta_col]
    meta_features = features[0:n_meta_col]
    data_features = features[n_meta_col:]
    return data, meta, data_features, meta_features


""" Read the combined output from a tab file
"""
def read_combined_file(cp_pipeline="pilot_base_smoothfirst", experiment="Pilot_screen_BYxRM", screen="haploid-R3", plate="P1"):
    ifh = file("%s/img/combined/%s/%s/%s/plate_%s.tab"%(DATA_DIR, cp_pipeline, experiment, screen, plate), 'r')
    headers = ifh.next().strip().split("\t")
    gfp_features = ["cell_Intensity_MeanIntensity_0_Raw_protein_GFP", "cell_AreaShape_Area"]
    Ih = [headers.index(f) for f in gfp_features]
    all_data = SP.array([l.strip().split("\t") for l in ifh if len(l.split()) > 300])
    meta = all_data[:,0:5]
    data = SP.array(all_data[:,5:], float)
    gfp_data = SP.array(all_data[:,Ih], float)
    return all_data, meta, data, gfp_data, headers, gfp_features
    
""" Read a csv file into a SP array.
@param filename file to read in
@param sep token used to split individual lines
@return headers - first line of the file separated by token, and SP.array of rest of lines """
def read_csv(filename, sep="\t"):
    ifh = file(filename, 'r')
    headers = ifh.next().strip().split(sep)
    data = []
    for l in ifh:
        data.append(l.strip().split(sep))
    data = SP.array(data)
    return headers, data


""" Output imaging heritability data
@param min_cell_pairs 
@param vars Nx6 array of variances (BY near, RM near, any near, BY far, RM far, any far. "near" = pairs of cells at most 80 pixels apart. "far" = pairs of cells 160-240 pixels apart).
@param meta Nx3 array of ORF, plate, well
@return none
@effect creates outfilename, outputs means, variances, and heritabilities for individual ORFs
"""
def get_img_data(min_cell_pairs=20):
    d = SP.loadtxt("%s/summary/paper/table_S2-img.tab"%DATA_DIR, delimiter="\t", dtype=object)
    meandata = SP.loadtxt("%s/summary/img/parental_means.tab"%DATA_DIR, delimiter="\t", dtype=object)
    vardata = SP.loadtxt("%s/summary/img/variance_median.tab"%DATA_DIR, delimiter="\t", dtype=object)
    means, vars, meta = {"BY":{},"RM":{}}, [], []

    # 0. fill in parental means for each ORF from the file
    for m in range(len(d)):
        orf,m_by,m_rm = d[m][0], meandata[m][0], float(meandata[m][1]), float(meandata[m][2])
        means["BY"][orf], means["RM"][orf] = m_by, m_rm    

    # 1. create arrays of variances and variance metadata
    J = [3,5,7,9,11,13] # indexes of interesting variables (the others are metadata and counts of the cell pairs used for  the estimates)    
    for i in range(1, len(vardata)):
        v = [float(vardata[i][j]) for j in J if int(vardata[i][j + 1]) > min_cell_pairs] # Get six variance estimates if enough cell pairs used to estimate them
        if (not SP.isnan(v).any()) and (len(v) == len(J)): # store variances and metadata if all estimates good
            vars.append(v)
            meta.append(vardata[i][0:3])
        
    return means, SP.array(vars), meta




""" Output imaging heritability data
@param means map of parent->orf->mean GFP
@param vars Nx6 array of variances (BY near, RM near, any near, BY far, RM far, any far. "near" = pairs of cells at most 80 pixels apart. "far" = pairs of cells 160-240 pixels apart).
@param meta Nx3 array of ORF, plate, well
@return none
@effect creates outfilename, outputs means, variances, and heritabilities for individual ORFs
"""
def output_imaging_h2(means, vars, meta, outfilename="%s/summary/img/table_S3-img.tab"%DATA_DIR):
    ofh = file(outfilename, 'w')
    ofh.write("#Orf\tPlate\tWell")
    for trait in ["mean", "MSD(near)", "MSD(far)"]:
        for cross in ["BYxBY", "RMxBY", "all"]:
            ofh.write("\t%s(log10GFP_%s)"%(trait, cross))
    ofh.write("\tHeritability(log10GFP_single_cell_mean)\n")
    for i in range(len(meta)):
        o = meta[i][0]
        if o not in means["BY"] or o not in means["RM"]: continue
        ofh.write("%s\t%s\t%s"%(tuple(meta[i])))
        ofh.write("\t%.3f\t%.3f\t%.3f"%(means["BY"][o], means["RM"][o], 0.5*(means["BY"][o] + means["RM"][o])))
        ofh.write("\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f"%(tuple(vars[i])))
        delta = means["RM"][o] - means["BY"][o]
        v_by, v_rm = vars[i][3:5]
        frac_clonal = v_by/((delta/2)**2 + 0.5*(v_by + v_rm))
        if frac_clonal > 1: frac_clonal = 1.
        ofh.write("\t%.3f\n"%(1. - frac_clonal))
    ofh.close()
    


def get_raw_microscopy_data(plates):
    result = {}
    for plate in plates: # read in all plates
        result[plate] = map(SP.array, read_combined_file(plate=plate)) # this is a list of six items as can be inferred from below
    return result # separate values into individual ones


def testu():
    read_cp_outputs("/home/lparts/data/projects/gfpvar/img/CP_output/pilot_base/test2") 


def escape(f):
    return f.replace(" ", "\\ ").replace("(", "\\(").replace(")","\\)")


def main():
    return
    cmd = sys.argv[1]

    if cmd == "combine_cp":
        combine_cp_outputs(sys.argv[2], sys.argv[3:])
    if cmd == "create_pickle":
        read_cp_outputs(sys.argv[2], sys.argv[3])
    if cmd == "organise":
        organise_images(sys.argv[2], sys.argv[3])


if __name__ == '__main__':
    main()
