import os
import glob
import scipy as SP
import pdb
from parts2014_gfpvar.tools.common import *
from parts2014_gfpvar.tools.io import get_array_data

# ALL INDICES HERE ARE 1-BASED (first row is row 1, not row 0) TO BE CONSISTENT WITH OTHER ANNOTATIONS


""" Return map of row,col->well annotation for the plate in given screen and project
The rows of the plate annotation file are:
#Row    Col     Type    Ploidy      SGD_name        Common_name     TAP_GFP_level   Localisation
1       1       BY      Haploid     nan     nan     nan     nan
1       2       RM      Haploid     nan     nan     nan     nan
1       3       BY      Haploid     YNL088W TOP2    5730    nucleus
1       4       RM      Haploid     YNL088W TOP2    5730    nucleus
1       5       BY      Haploid     YMR190C SGS1    not visualized  "cytoplasm,nucleus"
1       6       RM      Haploid     YMR190C SGS1    not visualized  "cytoplasm,nucleus"
...
The third column can also be "BY_RM_combined"
"""
def read_plateannot(plate, screen, project="Pilot_screen_BYxRM", format=384):
    filename = "%s/cytometry/%s/annotation/%s/%s.tab"%(DATA_DIR, project, screen, plate)
    res = {}
    for line in file(filename, 'r'):
        if line[0] == "#": continue
        row,col,type, ploidy, sgd_name, common_name, tap_level, loc = line.strip().split('\t')
        res[(int(row), int(col))] = (type.replace("BY_RM_", "BY/RM "), ploidy, sgd_name, common_name, tap_level, loc)

    return res


""" Create annotation file for a random plate.
@param location_data
@param subplates - list of subplates that were used at each of the four 384 subplates for this plate; e.g. AAAA would mean the 96 colonies in the "A" location are used in 4 replicas
"""
def create_randomplate_plateannot(location_data, subplates="ABCD", screen="interesting_candidate", outfilename="annot", subplate_types=None, ploidy=["Haploid"]*4):
    if subplate_types is None: subplate_types = ["BY_RM_combined"]*4
    # read row,col->gene id data
    array_data = get_array_data(skip_nan=False)  
    # create annotation file based on data in the full array annotation
    outdir = "%s/cytometry/Pilot_screen_BYxRM/annotation/%s"%(DATA_DIR, screen)
    if not os.path.exists(outdir): os.system("mkdir -p %s"%outdir)        

    # write out all the data for each subplate separately
    ofh = file("%s/%s"%(outdir, outfilename), 'w')
    ofh.write("#Row\tCol\tType\tPloidy\tSGD_name\tCommon_name\tTAP_GFP_level\tLocalisation\n")
    for i,subplate in enumerate(subplates): # each of the four subplates is one of ABCD as given
        for row in range(8):
            for col in range(12):
                c_row = 2*row + 1 + (subplate == 'C') + (subplate == 'D') # row and column in cherrypick file based on the subplate
                c_col = 2*col + 1 + (subplate == 'B') + (subplate == 'D')
                if (c_row, c_col) in location_data: # if well filled
                    ofh.write("%s\t%s\t%s\t%s"%(2*row + 1 + (i > 1), 2*col + 1 + (i % 2), subplate_types[i], ploidy[i])) # row and column of current strain in the plate
                    plate_array384,row_array384,col_array384 = location_data[(c_row, c_col)] # strain corresponding to the cherrypick data
                    ofh.write("\t%s\t%s\t%s\t%s\n"%(tuple(array_data[plate_array384,row_array384,col_array384])))
    ofh.close()


""" Return row,col -> plate_array384,row_array384,col_array384 name mapping from cherrypick file with extra annotation
@param cherrypick_file path to tab-delimited file to read cherrypick data from
@param extra_annotation True if file lines are in form col, row, orf, plate384, row384, col384; False if col, row, orf
@require first five lines of cherrypick_file can be discarded
@require column format as described under extra_annotation
@return map (row,col)->orf, if !extra_annotation; (row, col) -> (plate, row, col) if extra_annotation
"""
def read_cherrypick_data(cherrypick_file, extra_annotation=True):
    cherrypick_raw_data = [x.strip().split("\t") for x in file(cherrypick_file, 'r').readlines()[5:]]
    cherrypick_data = {}
    for d in cherrypick_raw_data: # create map of (cherry row, cherry col) => (array plate, array row, array col)
        if extra_annotation:
            (c_col, c_row, orf, plateloc) = d
            cherrypick_data[(int(c_row), int(c_col))] = map(int, plateloc.split()[1:])
        else:
            (c_col, c_row, orf) = d
            cherrypick_data[(int(c_row), int(c_col))] = orf
    return cherrypick_data


""" Create annotation files for the entire array based on previous data """ 
def create_gfparray_plateannot(screen, layout="combined", project="Pilot_screen_BYxRM", ploidy="Haploid"):
    array_data = get_array_data(skip_nan=False)  
    outdir = "%s/cytometry/%s/annotation/%s"%(DATA_DIR, project, screen)
    os.system("mkdir -p %s"%outdir)
    
    if layout == "combined":
        for plate in range(1, 12):
            ofh = file("%s/P%d.tab"%(outdir, plate), 'w')
            ofh.write("#Row\tCol\tType\tPloidy\tSGD_name\tCommon_name\tTAP_GFP_level\tLocalisation\n")
            
            for row in range(1,17):
                for col in range(1,25):
                    ofh.write("%d\t%d\tBY/RM_combined\t%s"%(row,col,ploidy))
                    # TODO: fix this so that it is done elsewhere
                    if (plate == 4) and (screen == "haploid-H2O2-R1") and ((row % 2) == 0):
                        j,k = (col-1)/2, (col-1) % 2
                        ofh.write("\t%s\t%s\t%s\t%s\n"%(tuple(array_data[plate,row,2*j + 2 - k]))) # switch C,D plates
                    else:
                        ofh.write("\t%s\t%s\t%s\t%s\n"%(tuple(array_data[plate,row,col])))
            ofh.close()
            
    elif layout.count("/") > 0: # parents separately. One plate of spots AB, (p1 A, p2 A in one row, p1 B, p2 B in next etc), other of CD.
        parents = layout.split("/")
        for plate in range(1, 12): # for each array plate
            for i, wells in enumerate(["AB","CD"]): # for each subplate of AB spots or CD spots
                ofh = file("%s/P%d%s.tab"%(outdir, plate, wells), 'w') # create file and write header
                if screen == "Hap1": ofh = file("%s/Hap1-P%d%s.tab"%(outdir, plate, wells), 'w') # create file and write header
                ofh.write("#Row\tCol\tType\tPloidy\tSGD_name\tCommon_name\tTAP_GFP_level\tLocalisation\n")

                for row in range(1,17): # for each row and column
                    for col in range(1, 25):
                        arrayrow = i + 1 + ((row - 1)/2)*2
                        arraycol = 1 + 2*((col-1)/2) + ((row - 1) % 2) # B,D get a +1 from the row
                        ofh.write("%d\t%d\t%s\t%s"%(row,col, parents[(col+1)%2], ploidy))
                        ofh.write("\t%s\t%s\t%s\t%s\n"%(tuple(array_data[plate,arrayrow,arraycol])))
            ofh.close()
            
        
""" Convenience method to create annotations for all genome-wide screens"""
def create_all_screen_annotations():
    for screen in ["haploid-H2O2-R1", "haploid-R1", "haploid-R2", "haploid-R3", "haploid-R4", "diploid-R1", "diploid-R2", "Hap1"]:
        ploidy = "Haploid"
        if screen.count("diploid") > 0: ploidy = "Diploid"
        layout = "combined"
        if screen == "haploid-R1": layout = "BY/RM"
        elif screen == "Hap1": layout = "BY/BY-HAP1"
        create_gfparray_plateannot(screen, layout, ploidy=ploidy)


""" Convenience method to create annotations for all interesting candidates"""
def create_all_interestingplate_annotations():
    plateannotfile = "%s/candidates/2012-07-28_Cherrypicking-Leo_candidates-384.tab"%(DATA_DIR)
    datadir = "%s/cytometry/Pilot_screen_BYxRM/interesting_candidate"%DATA_DIR
    for platefile in glob.glob("%s/*"%datadir) + glob.glob("%s/stats*/stats-*"%datadir): # for all files, whether they are fcs or already processed stats
        outfilename = platefile.split("/")[-1] + ".tab" # for files that look like Candidates-AAAA-dilutions.tab
        if platefile.count("stats") == 1: continue # (skip the directory that contains data)
        elif platefile.count("stats") > 1: # if raw data not present any more, take the info from stats files that look like stats_prior-data.../stats-Candidates-AAAA-dilutions.pickle
            outfilename = platefile.split("/")[-1][6:].replace(".pickle",".tab")
        subplates = outfilename.split("-")[1] # in any case, the subplates are the second dash-separated part of the filename
        create_randomplate_plateannot(read_cherrypick_data(plateannotfile), subplates=subplates, outfilename=outfilename)


""" Convenience method to create annotations for all interesting candidates"""
def create_all_reciprocal_annotations():
    platefile = "%s/candidates/2012-09-05_reciprocal_candidates.tab"%(DATA_DIR)
    location_data = {}
    for l in file(platefile, 'r'): # a line is like ORF,common name,GFP plate,GFP row,GFP col,GFP name,cis LOD, trans peak,sorted,original array loc,rearray loc (primer order)
        if l[0:3] == "ORF": continue
        d = l.strip().split("\t")
        p384,r384,c384 = map(int, d[2:5])
        row0, col0 = ord(d[9][0]) - ord('A'), int(d[9][1:]) - 1 # 0-based row and column
        location_data[(2*row0 + 1, 2*col0 + 1)] = (p384, r384, c384) # store for subplate "A"

    # 1. Create annotations for each plate. Assume the plate name has format Reciprocals(-[ploidy][subplate+])+  where ploidy in [Haploid, Diploid], subplate in ABCD
    for platefile in glob.glob("%s/cytometry/Pilot_screen_BYxRM/reciprocal_candidate/Reciprocals-*"%DATA_DIR):
        outfilename = platefile.split("/")[-1] + ".tab" # plate + tab
        subplates = "AAAA" # only 96 wells, fit so that they are in the A subplate
        ploidies = ["Haploid"]*4 # default ploidies
        for plate_ploidy in outfilename[0:-4].split("-")[1:]: # filenames look like Reciprocals-HaploidAD-DiploidBC.tab - get the HaploidAD-DiploidBC part
            ploidy = plate_ploidy[0:7] # ploidy is first 7 characters, subplates it corresponds to are the rest
            for p in plate_ploidy[7:]: # store the ploidy of all subplates
                ploidies[ord(p) - ord('A')] = ploidy
        create_randomplate_plateannot(location_data, subplates=subplates, outfilename=outfilename, ploidy=ploidies, screen="reciprocal_candidate")


def get_array_locs(loc_orfs):
    array_data = get_array_data(skip_nan=False)  
    res = {}
    for loc in loc_orfs:
        i = SP.where(array_data[:,:,:,0] == loc_orfs[loc])
        res[loc] = [a[0] for a in i] # collapse list of lists into a single value
    return res


""" Convenience function for creating annotations for all-h2 single plate screens """
def create_allh2plate_annotations():
    plateannotfile = "%s/candidates/2012-12-19_all-h2_excelfmt.tab"%DATA_DIR
    for rep in [1,2]:
        for ploi in ["Haploid", "Diploid"]:
            orfs = read_cherrypick_data(plateannotfile, False)
            array_locs = get_array_locs(orfs)
            create_randomplate_plateannot(array_locs, outfilename="all-h2-%s-R%d.tab"%(ploi, rep), screen="all-h2", ploidy=[ploi]*4)


def create_fillin_plate_annotations():
    for p in range(1,4):
        plateannotfile = "%s/candidates/2013-02-22_repeat-%d.tab"%(DATA_DIR, p)
        orfs = read_cherrypick_data(plateannotfile, False)
        array_locs = get_array_locs(orfs)
        create_randomplate_plateannot(array_locs, outfilename="Haploid-fillin_P%d.tab"%(p), screen="fillin", ploidy=["Haploid"]*4)


def create_target_plate_annotations():
    plateannotfile = "%s/candidates/2013-07-02_Hap1-Mkt1-Ira2-targets_cherrypick.txt"%DATA_DIR
    array_locs = get_array_locs(read_cherrypick_data(plateannotfile, False))
    array_data = get_array_data(skip_nan=False)
    subplate_types = [["BY-HAP1-MKT1-IRA2","BY-MKT1-IRA2"],["BY-HAP1","BY"]]
    
    for replicate in (1,2):
        ofh = file("%s/cytometry/Pilot_screen_BYxRM/annotation/targets/Targets-R%d.tab"%(DATA_DIR,replicate), 'w')
        ofh.write("#Row\tCol\tType\tPloidy\tSGD_name\tCommon_name\tTAP_GFP_level\tLocalisation\n")
        for c_row in range(1,17):
            for c_col in range(1,25):
                if (c_row, c_col) in array_locs: # if well filled
                    ofh.write("%s\t%s\t%s\tHaploid"%(c_row, c_col, subplate_types[c_row%2][c_col%2])) # row and column of current strain in the plate
                    plate_array384,row_array384,col_array384 = array_locs[(c_row, c_col)] # strain corresponding to the cherrypick data
                    ofh.write("\t%s\t%s\t%s\t%s\n"%(tuple(array_data[plate_array384,row_array384,col_array384])))
        ofh.close()


def create_haploidR5RMswaps_plate_annotations():
    array_data = get_array_data(skip_nan=False)  
    outdir = "%s/cytometry/Pilot_screen_BYxRM/annotation/haploid-R5_RMswaps"%(DATA_DIR)
    os.system("mkdir -p %s"%outdir)
    
    parents = ["BY/RM-MKT1-IRA2", "RM"]
    for plate in range(1, 12): # for each array plate
        for i, wells in enumerate(["AB","CD"]): # for each subplate of AB spots or CD spots
            ofh = file("%s/BYRM2-RM_P%d%s.tab"%(outdir, plate, wells), 'w') # create file and write header
            ofh.write("#Row\tCol\tType\tPloidy\tSGD_name\tCommon_name\tTAP_GFP_level\tLocalisation\n")

            for row in range(1,17): # for each row and column
                for col in range(1, 25):
                    arrayrow = i + 1 + ((row - 1)/2)*2
                    arraycol = 1 + 2*((col-1)/2) + ((row - 1) % 2) # B,D get a +1 from the row
                    ofh.write("%d\t%d\t%s\tHaploid"%(row,col, parents[(col+1)%2]))
                    ofh.write("\t%s\t%s\t%s\t%s\n"%(tuple(array_data[plate,arrayrow,arraycol])))
            ofh.close()


def create_allh2_peakswap_plate_annotations():
    plateannotfile = "%s/candidates/2012-12-19_all-h2_excelfmt.tab"%DATA_DIR
    orfs = read_cherrypick_data(plateannotfile, False)
    array_locs = get_array_locs(orfs)
    
    array_data = get_array_data(skip_nan=False)  
    outdir = "%s/cytometry/Pilot_screen_BYxRM/annotation/swaps"%(DATA_DIR)
    os.system("mkdir -p %s"%outdir)

    alleles = {"RM": ["RM","RM-IRA2","RM-MKT1","RM-IRA2-MKT1"], "BY":["BY", "BY-MKT1", "BY-IRA2", "BY-IRA2-MKT1"], "BY-haploid-series":["BY", "BY-HAP1", "BY-IRA2-MKT1", "BY-HAP1-IRA2-MKT1"], "BYx3":["BY-HAP1-IRA2-MKT1"]*4}

    for plateset in alleles:
        plates = ["A","B","C","D"]
        if plateset == "BYx3": plates = ["ABCD"]
        for plate in plates:
            plate_rowextra, plate_colextra = (plate in "CD"), (plate in "BD")
            ofh = file("%s/%s-%s.tab"%(outdir, plateset, plate), 'w') # create file and write header
            ofh.write("#Row\tCol\tType\tPloidy\tSGD_name\tCommon_name\tTAP_GFP_level\tLocalisation\n")
            for a, allele in enumerate(alleles[plateset]):
                i, j = a/2, a%2
                if plate == "ABCD": plate_rowextra, plate_colextra = i,j
                for row in range(8): # for each row and column
                    for col in range(12):
                        if (2*row + plate_rowextra + 1, 2*col + plate_colextra + 1) not in array_locs: continue
                        ap, ar, ac = array_locs[(2*row + plate_rowextra + 1, 2*col + plate_colextra + 1)]
                        ofh.write("%d\t%d\t%s\tHaploid"%(2*row + i + 1, 2*col + j + 1, allele))
                        ofh.write("\t%s\t%s\t%s\t%s\n"%(tuple(array_data[ap,ar,ac])))
            ofh.close()


def create_GFPswap_plate_annotations():
    plateannotfile = "%s/candidates/2013-05-05_RM-GFP_Amy-List.tab"%DATA_DIR
    loc_raw_data = [x.strip().split("\t") for x in file(plateannotfile, 'r').readlines()[1:]]
    plate_locs = {}
    for d in loc_raw_data: # create map of (plate row, plate col) => (array plate, array row, array col)
        well, gene = d[0:2]
        if gene[0] != "Y": continue # if no ORF, skip - "Leoxx" annotation, or EMPTY
        row = ord(well[0]) - ord('A') # row and col are 0-based
        col = int(well[1:3]) - 1
        for i in range(2):
            for j in range(2):
                plate_locs[(2*row + i + 1, 2*col + j + 1)] = gene

    array_locs = get_array_locs(plate_locs)
    array_data = get_array_data(skip_nan=False)
    subplate_types = [["RM-RMgfp","RM-BYgfp"], ["BY-RMgfp","BY-BYgfp"]]
    
    for ploidy in ("Haploid", "Diploid"):
        ofh = file("%s/cytometry/Pilot_screen_BYxRM/annotation/swaps/GFP_%s-R1_4x.tab"%(DATA_DIR, ploidy), 'w')
        ofh.write("#Row\tCol\tType\tPloidy\tSGD_name\tCommon_name\tTAP_GFP_level\tLocalisation\n")
        for c_row in range(1,17):
            for c_col in range(1,25):
                if (c_row, c_col) in array_locs: # if well filled
                    ofh.write("%s\t%s\t%s\t%s"%(c_row, c_col, subplate_types[c_row%2][c_col%2], ploidy)) # row and column of current strain in the plate
                    plate_array384,row_array384,col_array384 = array_locs[(c_row, c_col)] # strain corresponding to the cherrypick data
                    ofh.write("\t%s\t%s\t%s\t%s\n"%(tuple(array_data[plate_array384,row_array384,col_array384])))
        ofh.close()





def create_test_plateannot():
    platefile = "%s/candidates/2012-07-28_Cherrypicking-Leo_candidates-384.tab"%(DATA_DIR)
    for testdir in glob.glob("%s/cytometry/Pilot_screen_BYxRM/test-screen/P*"%DATA_DIR):
        plate = testdir.split("/")[-1]
        outfilename = plate + ".tab"
        subplates = "ABCD"
        if plate.count("_") > 0:
            subplates = plate.split("_")[-1]
        subplate_types = None
        if plate.count("Hap1") > 0:
            subplate_types = ["BY","BY-HAP1"]*2
        elif plate.count("sepa") > 0:
            subplate_types = ["BY","RM"]*2
        create_randomplate_plateannot(read_cherrypick_data(platefile), subplates=subplates, outfilename=outfilename, screen="test-screen", subplate_types=subplate_types)

create_gfparray_plateannot(screen="Hap1", layout="BY/BY-HAP1", ploidy="Haploid")
