import matplotlib.backends
matplotlib.backends.backend = "pdf"
from parts2014_gfpvar.analysis.cyto.fcs import *
from parts2014_gfpvar.analysis.array.util import *
from parts2014_gfpvar.analysis.array.infer import *
from parts2014_gfpvar.analysis.seq.util import *
from parts2014_gfpvar.analysis.seq.plot import *
from parts2014_gfpvar.analysis.qtl.repro import *
from parts2014_gfpvar.analysis.qtl.util import *
from parts2014_gfpvar.analysis.tecan.plot import *
from parts2014_gfpvar.analysis.img.plot import *
from parts2014_gfpvar.analysis.img.util import read_combined_file, get_raw_microscopy_data
from parts2014_gfpvar.tools.io import *
from parts2014_gfpvar.analysis.mrna.util import *

PLOT_PARAMS = {'text.fontsize':10, 'xtick.labelsize':8, 'ytick.labelsize':8, 
               'text.size':10, 'axes.titlesize':10, 'axes.labelsize':10, 
               'figure.figsize':(16,10), 'legend.fontsize':10, 'backend':'ps'}
PL.rcParams.update(PLOT_PARAMS)

EXTENSIONS = ["pdf"] # ["svg", "png", "pdf"]


""" Trait = 3 means linear mean. In general, 0=log mean, 1=log median, 2=log SD, 3=linear mean, 4=linear median, 5=linear SD, 6=cell count. All these are from the 10% slice"""
def get_parent_plate_data(min_cells=500, trait=3, parent="RM"):
    d = SP.loadtxt("%s/paper/table_S2-singles.tab"%DATA_DIR, delimiter="\t", dtype=object)
    values, flags, used_orfs = {}, {}, {}
    
    for p in ["P%d"%i for i in range(1,12)]: # for each plate
        platevals, plateflags = [], []
        I1 = SP.where((d[:,1] == "haploid-R1") & (d[:,5] == parent) & (d[:,2] == p))[0] # Replicate 1 for this parent and plate
        I2 = SP.where((d[:,1] == "haploid-R2") & (d[:,5] == parent) & (d[:,2] == p))[0] # Replicate 2 for this parent and plate
        plateorfs = SP.unique(d[I1,4])
        for o in plateorfs: # for each ORF in plate
            i1, i2 = SP.where(d[I1,4] == o)[0], SP.where(d[I2,4] == o)[0]
            if len(i1)*len(i2) != 1:
                platevals.append([SP.nan, SP.nan])
                plateflags.append(False)
                continue
            i1, i2 = i1[0], i2[0]
            platevals.append([d[I1[i1], 8+trait], d[I2[i2], 8+trait]])
            plateflags.append((int(d[I1[i1], 14]) >= min_cells) and (int(d[I2[i2], 14]) >= min_cells) and (d[I1[i1], 7] == "OK") and (d[I2[i2],7] == "OK"))
        values[p], flags[p], used_orfs[p] = SP.array(platevals, float), SP.array(plateflags, bool), plateorfs
    return values, flags, used_orfs

""" """    
def get_cyto_plate_diffs(min_cells, trait):
    plate_vals, plate_flags, plate_orfs = get_parent_plate_data(min_cells, trait, "BY")
    data, meta = get_plate_screen_parent_vals("%s/paper/table_S2-singles.tab"%DATA_DIR)
    crosses = ["BYxBY", "RMxBY"]
    reps = ["Rep1","Rep2"]
    diffs = {}
    for plate in range(1,12):
        strplate, orf_flags = "P%d"%plate, {}
        #print len(plate_orfs[strplate]), len(plate_flags[strplate])
        for i in range(len(plate_orfs[strplate])): orf_flags[plate_orfs[strplate][i]] = plate_flags[strplate][i]
	vals = get_combined_cytometry_plate_values(crosses, reps, plate, data) # this is Parents x Reps x rows x cols x traits
        flags = SP.zeros([16,24], bool)
        for r in range(16):
            for c in range(24): 
                if (r,c) not in meta["Rep1"]["BY"][plate]: continue
                orf = meta["Rep1"]["BY"][plate][(r,c)][4]
                if orf in orf_flags: flags[r,c] = orf_flags[orf]
        delta1,delta2 = vals[1,:,:,:,trait] - vals[0,:,:,:,trait] # delta
        if trait >= 3: delta1, delta2 = SP.log10(vals[1,:,:,:,trait]) - SP.log10(vals[0,:,:,:,trait])
        I = SP.where(flags.reshape(384))[0]
        diffs[plate] = [delta1.reshape(384)[I], delta2.reshape(384)[I]]
    return diffs


""" """
def get_cyto_similarities():
    data = SP.loadtxt("%s/paper/table_S2-combined.tab"%DATA_DIR, delimiter="\t", dtype=object)
    vals = []
    c = [[61],[13,39],[10,36]]
    for i in c: print data[0][i]
    #print data[0][c] # this has to produce [average(Difference_in_linear_10pct_mean) average(RMxBY_10pct_linear_SD) MSE(RMxBY_10pct_linear_mean)]
    for d in data[1:]:
        vals.append([SP.log10(abs(SP.mean(map(float, d[i])))) for i in c])
    return SP.array(vals, float).T



""" """
def get_cyto_parent_sds():
    data = SP.loadtxt("%s/paper/table_S2-combined.tab"%DATA_DIR, delimiter="\t", dtype=object)
    vals = []
    c = [13,39]
    for i in c: print data[0][i] # should be the two SDs
    #print data[0][c] # this has to produce [average(Difference_in_linear_10pct_mean) average(RMxBY_10pct_linear_SD) MSE(RMxBY_10pct_linear_mean)]
    for d in data[1:]:
        vals.append([SP.log10(float(d[i])) for i in c])
    return SP.array(vals, float).T

def get_img_mean():
    raw = SP.loadtxt("%s/paper/table_S2-img.tab"%DATA_DIR, dtype=object)
    d = SP.zeros([12,384], float)*SP.nan
    for i in range(len(raw)):
        plate, well, val = int(raw[i,1]), raw[i,2], float(raw[i,3])
        d[plate, 24*(ord(well[0]) - ord('A')) + int(well[1:]) - 1] = val
    return d
    

def get_figure_S4_cyto_img_data(cyto_screen="haploid-R2", min_cyto_cells=200, min_img_total_cells=50, min_img_pic_cells=10, cyto_mean_trait=3, cyto_var_trait=5, img_mean_trait=0, img_var_trait=7):
    #img_data = get_raw_microscopy_data(range(1,11))
    #    img_mean[plate] = img_data[plate][:,:,img_mean_trait].reshape(384)
    #    img_var[plate] = SP.log10(img_data[plate][:,:,img_var_trait].reshape(384))
    cyto_data, meta = get_plate_screen_parent_vals("%s/paper/table_S2-singles.tab"%DATA_DIR)
    cyto_mean, cyto_var, img_mean, img_var = {}, {}, {}, {}
    #pdb.set_trace()
    # populate value for means, variances for microscopy and cytometry
    for plate in range(1,11): # for each plate
        cyto_mean[plate] = cyto_data['Rep1']['BY'][plate][:,:,cyto_mean_trait].reshape(384)
        cyto_var[plate] = cyto_data['Rep1']['BY'][plate][:,:,cyto_var_trait].reshape(384)
    return cyto_mean, cyto_var, get_img_mean(), None #img_mean, img_var


def get_figure_S4_data(cyto_mean_trait=3):
    cyto_imgplate, t, img_meanplate, t = get_figure_S4_cyto_img_data() # plate->[384 values]
    cyto_data, meta = get_plate_screen_parent_vals("%s/paper/table_S2-singles.tab"%DATA_DIR)
    mrna_data = get_sample_mrna()["B2"] # orf->value
    tecan_data = combine_tecans(get_tecans(), calc_sds=False) # plate->parents->[16x24]
    clean_tecan_plates(tecan_data)
    tecan_data = tecan_data["BYs"]
    tap_data = get_array_data(skip_nan=False, debug_skipped=False)[:,1:][:,:,1:] # now 0-indexed, [11x16x24x4]
    cyto_mean, means = [],[[],[],[],[]]

    for plate in range(1,11):
        cym, imm, mrm, tem, tam = [],[],[],[],[]
        for row in range(16):
            for col in range(24):
                orf = tap_data[plate][row][col][0]
                cym.append(cyto_imgplate[plate][24*row+col])
                imm.append(img_meanplate[plate][24*row+col])   # for imaging, take from plates
                if orf in mrna_data: mrm.append(SP.log10(mrna_data[orf])) # for mRNA, get the value from ORF, else nan
                else: mrm.append(SP.nan)
                tem.append(tecan_data[plate]["BY"][row][col]) # for tecan, take from plate
                v = tap_data[plate][row][col][2]
                if v not in ["low signal", "not visualized", "technical problem"]:  tam.append(SP.log10(float(v)))
                else:  tam.append(SP.nan)  # for tap, take from array data
        for m,mu in enumerate([imm, mrm, tem, tam]): # for each type of assay, align plate to cytometry, extend total list
            means[m].extend(SP.array(mu) - ST.nanmean(mu) + 2)
        cyto_mean.extend(SP.array(cym) - ST.nanmean(cym) + 2)
    return map(SP.array, [cyto_mean] + means)



""" Figure S1 - gating and clustering of one cytometry plate """
def figure_S1(screen="haploid-R3", plate="P9", threshold=0.8):
    data, cols, filters = read_plate_fcs_random_data(screen, plate, filters=[])
    
    # Plot random 2000 points and appropriate red rectangle to demonstrate FSC and SSC filters
    smalldata = data[SP.random.choice(range(len(data)), 2000, replace=False)] 
    f,s,m = cols.index("FSC-A"), cols.index("SSC-W"), cols.index("mCherry-A") 
    PL.figure(figsize=(7,7))
    PL.subplot(221)
    PL.plot(smalldata[:,f], smalldata[:,s], ".", markersize=18, alpha=0.04)
    x1,y1,x2,y2 = 4.25,4.79,4.91,4.95
    PL.plot([x1,x2],[y1,y1], 'r--', linewidth=3, alpha=0.5)
    PL.plot([x1,x2],[y2,y2], 'r--', linewidth=3, alpha=0.5)
    PL.plot([x1,x1],[y1,y2], 'r--', linewidth=3, alpha=0.5)
    PL.plot([x2,x2],[y1,y2], 'r--', linewidth=3, alpha=0.5)
    PL.xlim(4.2,5.3); PL.ylim(4.78,5.2)
    PL.xlabel("FSC-A"); PL.ylabel("SSC-W")

    # Plot the random 2000 points and appropriate red rectangle to demonstrate FSC and RFP filters
    PL.subplot(222)
    PL.plot(smalldata[:,f], smalldata[:,m], ".", markersize=18, alpha=0.04)
    x1,y1,x2,y2 = 4.25,2.45,4.91,4
    PL.plot([x1,x2],[y1,y1], 'r--', linewidth=3, alpha=0.5)
    PL.plot([x1,x2],[y2,y2], 'r--', linewidth=3, alpha=0.5)
    PL.plot([x1,x1],[y1,y2], 'r--', linewidth=3, alpha=0.5)
    PL.plot([x2,x2],[y1,y2], 'r--', linewidth=3, alpha=0.5)
    PL.xlim(4.2,5.3); PL.ylim(1.8,4.2)
    PL.xlabel("FSC-A"); PL.ylabel("mCherry-A")

    # Plot clustering of all points
    # First, read the precomputed clusters
    r = cl(glob.glob("%s/cytometry/Pilot_screen_BYxRM/%s/stats*/cluster-gmm_plate-%s.pickle"%(DATA_DIR, screen,plate))[0])
    gmm, rfp_gmms, means, sds, cols = r['budding_GMM'], r['rfp_GMMs'], r['means'], r['sds'], r['features']
    data = (data-means)/sds # normalise data to make the features directly comparable
    p = gmm.predict_proba(data[:,[f,s]]) # Cluster budded/unbudded

    PL.subplot(223)
    total_clustered = 0
    for c in range(2): # for budded and unbudded clusters
        I = SP.where(p[:,c] > threshold)[0] # take data in the cluster
        p_rfp = rfp_gmms[c].predict_proba(data[I][:,1:2]) # cluster it further based on RFP
        for j in range(2):
            Ir = SP.where(p_rfp[:,j] > threshold)[0] # plot each of the two clusters (RFP vs SSC)
            PL.plot(data[I,m][Ir],data[I,s][Ir], ".", markersize=10, alpha=0.01)
            total_clustered += len(Ir)
        Ir = SP.where(p_rfp.max(axis=1) < threshold)[0] # unassigned data points
        PL.plot(data[I,m][Ir],data[I,s][Ir], "k.", markersize=5, alpha=0.01)
    PL.xlabel("mCherry-A; %d cells clustered out of %d"%(total_clustered, len(data))); PL.ylabel("SSC-W")

    PL.subplot(224)
    total_clustered = 0
    for c in [1]: # for unbudded clusters only
        I = SP.where(p[:,c] > threshold)[0] # take data in the cluster
        p_rfp = rfp_gmms[c].predict_proba(data[I][:,1:2]) # cluster it further based on RFP
        for j in range(2):
            Ir = SP.where(p_rfp[:,j] > threshold)[0] # plot each of the two clusters (RFP vs SSC)
            PL.hist(data[I,m][Ir], bins=SP.arange(-4,2, 0.25), log=True)
        #PL.ylim(0,max(data[I,m])*1.2)
        #Ir = SP.where(p_rfp.max(axis=1) < threshold)[0] # unassigned data points
        #PL.plot(data[I,m][Ir],data[I,s][Ir], "k.", markersize=5, alpha=0.01)
    PL.xlabel("mCherry-A (log10 scale)"); PL.ylabel("Frequency")
    for ext in EXTENSIONS: PL.savefig("%s/paper/figure_S1.%s"%(DATA_DIR, ext), dpi=300)


""" Reproducibility of means and variances across screens """
def figure_S2(min_cells=200, trait=3, parent="RM", lim=(2.5,5)):
    pi, diffs = 1, []
    plate_vals, plate_flags, plate_orfs = get_parent_plate_data(min_cells, trait, parent)
    all_vals, all_flags = [], []

    PL.figure(figsize=(7.5,11.5))
    for p in sorted(plate_vals):
        vals, flags = plate_vals[p], plate_flags[p]
        all_vals.extend(vals)
        all_flags.extend(flags)
        Igood = SP.where(flags)[0]
        Ibad = SP.where(~flags)[0]
        ax = PL.subplot(4,3,pi); pi += 1
        PL.plot(vals[Ibad,0], vals[Ibad,1], "r.", markersize=15, alpha=0.2) # bad wells
        PL.plot(vals[Igood,0], vals[Igood,1], "b.", markersize=15, alpha=0.2) # good wells
        PL.xlabel("Plate %s R1 "%(p[1:]))
        if pi % 3 == 2: PL.ylabel("R2") # only on leftmost subplots
        PL.xlim(*lim); PL.ylim(*lim)
        PL.text(0.1,0.8, "r=%.2f\nrho=%.2f"%(SP.corrcoef(vals[Igood,0],vals[Igood,1])[0,1], ST.spearmanr(vals[Igood,0],vals[Igood,1])[0]), transform=ax.transAxes)
    if trait not in [2,5]:
        for ext in EXTENSIONS: PL.savefig("%s/paper/figure_S2a.%s"%(DATA_DIR, ext), dpi=300)
    else:
        for ext in EXTENSIONS: PL.savefig("%s/paper/figure_S2b.%s"%(DATA_DIR, ext), dpi=300)
    #PL.show()
    

def plot_lim(x,y,xlim,ylim, alpha=0.2, markersize=12):
    x[SP.where(x < -xlim)[0]] = -xlim
    x[SP.where(x > xlim)[0]] = xlim
    y[SP.where(y < -ylim)[0]] = -ylim
    y[SP.where(y > ylim)[0]] = ylim
    PL.plot(x, y, "b.", alpha=alpha, markersize=markersize)
    PL.xlim(-1.02*xlim, 1.02*xlim)
    PL.ylim(-1.02*ylim, 1.02*ylim)

    
""" Reproducibility of the change in protein levels """
def figure_S2c(min_cells=500, trait=4, plot_individual=False):
    diffs, alldiffs = get_cyto_plate_diffs(min_cells, trait), [[],[]]
    if plot_individual: PL.figure(figsize=(7.5,11.5)); pi = 1
        
    for plate in range(1,12):
        alldiffs[0].extend(diffs[plate][0]); alldiffs[1].extend(diffs[plate][1])
        if plot_individual: PL.subplot(4,3,pi); pi += 1; plot_lim(diffs[plate][0], diffs[plate][1], 0.3,0.3)

    PL.figure(None, [3.5,3.5])
    plot_lim(SP.array(alldiffs[0]), SP.array(alldiffs[1]), 0.3,0.3, alpha=0.1)
    print ST.pearsonr(alldiffs[0],alldiffs[1])[0], ST.spearmanr(alldiffs[0],alldiffs[1])[0]
    PL.text(0.05,0.85, "r=%.2f\nrho=%.2f"%(ST.pearsonr(alldiffs[0],alldiffs[1])[0],ST.spearmanr(alldiffs[0],alldiffs[1])[0]), transform=PL.gca().transAxes)
    PL.xlabel("BYxBY - RMxBY R1"); PL.ylabel("R2")
    for ext in EXTENSIONS: PL.savefig("%s/paper/figure_S2c.%s"%(DATA_DIR, ext), dpi=300)


def figure_S2d(p1="X2", p2="X3", min_cov=20):
    sample_gene_covs = get_sample_mrna()
    vals = []
    for o in sample_gene_covs[p1]:
        if o in sample_gene_covs[p2]:
            if (sample_gene_covs[p1][o] > min_cov) and (sample_gene_covs[p2][o] > min_cov):
                vals.append([SP.log10(sample_gene_covs[p1][o]+0.5), SP.log10(sample_gene_covs[p2][o]+0.5)])
    vals = SP.array(vals).T

    PL.figure(None, [3.5,3.5])
    PL.plot(vals[0], vals[1], ".", markersize=10, alpha=0.05)
    PL.xlabel("mRNA-seq replicate 1")
    PL.ylabel("mRNA-seq replicate 2")
    I = SP.where(~(SP.isnan(vals).any(axis=0)))[0]
    PL.text(1.25, 3.5, "r=%.2f\nrho=%.2f"%(ST.pearsonr(vals[0][I], vals[1][I])[0], ST.spearmanr(vals[0][I], vals[1][I])[0]))
    for ext in EXTENSIONS: PL.savefig("%s/paper/figure_S2d.%s"%(DATA_DIR, ext), dpi=300)


def figure_S3ab():
    sims =  get_cyto_similarities()
    PL.figure(None, [7.5,3.5])
    PL.subplot(121)
    PL.plot(sims[1]/2., sims[0], "b.", alpha=0.06, markersize=12)
    x,y = 2,5
    PL.plot([x,y],[x,y], 'r--')
    #; PL.plot([x,y],[x+d,y+d], 'r--'); PL.plot([x,y],[x-d,y-d], 'r--')
    PL.xlim(2,5); PL.ylim(0,5)
    PL.xlabel("log10(cell-cell variance)/2")
    PL.ylabel("log10(|parent-offspring difference|)")
    #PL.text(-0.2,0.15, "r=%.2f\nrho=%.2f"%(ST.pearsonr(alldiffs[0],alldiffs[1])[0],ST.spearmanr(alldiffs[0],alldiffs[1])[0]))
    PL.subplot(122)
    PL.plot(sims[2], sims[0], "b.", alpha=0.06, markersize=12)
    x = 0
    PL.plot([x,y],[x,y], 'r--')
    #; PL.plot([x,y],[x+d,y+d], 'r--'); PL.plot([x,y],[x-d,y-d], 'r--')
    PL.xlim(x,y); PL.ylim(x,y)
    PL.xlabel("log10(experiment-experiment variance)/2")
    PL.ylabel("log10(|parent-offspring difference|)")
    for ext in EXTENSIONS: PL.savefig("%s/paper/figure_S3ab.%s"%(DATA_DIR, ext), dpi=300)


def figure_S3c():
    vals = get_cyto_parent_sds()
    PL.figure(None, [3.5,3.5])
    PL.plot(vals[0], vals[1], "b.", markersize=12, alpha=0.1)
    PL.plot([4,10],[4,10], 'r--')
    PL.xlabel("log10(BYxBY cell-cell variance)")
    PL.ylabel("log10(RMxBY cell-cell variance)")
    PL.text(0.05,0.9, "rho=%.2f"%(ST.spearmanr(vals[0],vals[1])[0]), transform=PL.gca().transAxes)    
    #PL.text(0.05,0.85, "r=%.2f\nrho=%.2f"%(ST.pearsonr(vals[0],vals[1])[0], ST.spearmanr(vals[0],vals[1])[0]), transform=PL.gca().transAxes)    
    for ext in EXTENSIONS: PL.savefig("%s/paper/figure_S3c.%s"%(DATA_DIR, ext), dpi=300)
    

def figure_S3defghijk():
    d = SP.loadtxt("%s/paper/table_S4.tab"%DATA_DIR, delimiter="\t", dtype=object)
    f = PL.figure(None, [7,10])
    pi = 1
    scales = ["GFP","GFP","log10(GFP)","log10(GFP)","GFP","GFP","log10(GFP)","log10(GFP)"]
    priors = ["raw", "EB"]
    types = ["single cell level", "population average", "single cell level", "population average", "single cell level", "population average", "single cell level", "population average"]

    for c in [3,5,4,6,7,9,8,10]: # for all the different heritability estimates
        PL.subplot(4,2,pi); pi += 1
        PL.hist([float(x[c]) for x in d[1:]], range=(0,1), bins=10, alpha=0.6)
        PL.xlabel("%s $H^2$: %s, %s"%(priors[(pi-2)/4], scales[pi-2], types[pi-2]))
        if c in [3,4,7,8]: PL.ylabel("# genes")
    f.subplots_adjust(hspace=0.4)
    for ext in EXTENSIONS: PL.savefig("%s/paper/figure_S3defghijk.%s"%(DATA_DIR, ext), dpi=300)


def plot_figure_S4e(min_cov=20):
    sample_gene_covs = get_sample_mrna()
    cyto, cval = SP.loadtxt("%s/paper/table_S2-combined.tab"%DATA_DIR, delimiter="\t", dtype=object), {}
    for i in range(1, len(cyto)):  cval[cyto[i][0]] = [SP.log10(float(cyto[i][9])), SP.log10(float(cyto[i][35]))]
    means = {s:ST.nanmean(sample_gene_covs[s].values()) for s in sample_gene_covs}
    log10corrfactor = SP.log10(means["X2"]*means["X3"]/(means["B2"]*means["B3"]))
    vals, I, bads = [], [], {}
    for o in sample_gene_covs["B2"]:
        if o in sample_gene_covs["B3"] and o in sample_gene_covs["X2"] and o in sample_gene_covs["X3"] and o in cval:
            if sample_gene_covs["B2"][o] > min_cov and sample_gene_covs["B3"][o] > min_cov and sample_gene_covs["X2"][o] > min_cov and sample_gene_covs["X3"][o] > min_cov:
                vals.append([log10corrfactor + SP.log10(sample_gene_covs["B2"][o]+0.5) + SP.log10(sample_gene_covs["B3"][o]+0.5) - SP.log10(sample_gene_covs["X2"][o]+0.5) - SP.log10(sample_gene_covs["X3"][o]+0.5), cval[o][0] - cval[o][1]])
                I.append(True)
                if abs((SP.log2(sample_gene_covs["B2"][o]+0.5) - SP.log2(means["B2"])) - (SP.log2(sample_gene_covs["B3"][o]+0.5) - SP.log2(means["B3"]))) <= 1:
                    if abs((SP.log2(sample_gene_covs["X2"][o]+0.5) - SP.log2(means["X2"])) - (SP.log2(sample_gene_covs["X3"][o]+0.5) - SP.log2(means["X3"]))) <= 1:
                        I[-1] = False # not bad only if both BYxBY and RMxBY not discrepant
    vals = SP.array(vals).T
    I0 = SP.where(I)[0]
    I1 = SP.where(~SP.array(I, bool))[0]
    PL.plot(vals[1][I1], vals[0][I1], "b.", alpha=0.05, markersize=15)
    PL.ylim(-0.8,0.8)
    PL.yticks(SP.arange(-0.6,0.61,0.3))
    PL.xlim(-0.4,0.4)
    PL.xticks(SP.arange(-0.4,0.41,0.2))
    PL.ylabel("BYxBY mRNA - RMxBY mRNA")
    PL.xlabel("BYxBY protein - RMxBY protein")
    PL.text(0.05,0.9, "rho=%.2f"%(ST.spearmanr(vals[1][I1],vals[0][I1])[0]), transform=PL.gca().transAxes)    


def plot_figure_S4f():
    d = SP.loadtxt("%s/paper/table_S5.tab"%DATA_DIR, skiprows=1, dtype=object, delimiter="\t")
    cis, trans = SP.array(d[:,2], float), SP.array(d[:,3], float)
    PL.plot(abs(cis), abs(trans), ".", markersize=12)
    mx = max(abs(cis).max(), abs(trans).max())
    I = SP.where(abs(cis) + abs(trans) > 0.4)[0]
    PL.plot([0,mx], [0,mx], 'k--', alpha=0.5)
    PL.xlabel("cis effect size")
    PL.ylabel("trans effect size")


""" Plot mean or variance comparison of microscopy and cytometry """
def figure_S4(cyto_mean_trait=3, cyto_var_trait=5):
    cyto_mean, img_mean, mrna_mean, tecan_mean, tap_mean = get_figure_S4_data(cyto_mean_trait=cyto_mean_trait)
    assays = ["TAP","Microscopy", "Tecan", "mRNA-seq"]

    PL.figure(figsize=(7.5,11))
    pi = 1
    for i,other_mean in enumerate([tap_mean, img_mean, tecan_mean, mrna_mean]):
        PL.subplot(3,2,pi); pi +=1 
        PL.plot(cyto_mean, other_mean, ".", markersize=10, alpha=0.05)
        PL.xlabel("Cytometry log10(mean)")
        PL.ylabel("%s log10(mean)"%(assays[i]))
        I = SP.where((~SP.isnan(cyto_mean)) & (~SP.isnan(other_mean)))[0]
        #PL.text(0.05,0.85, "r=%.2f\nrho=%.2f"%(SP.corrcoef(cyto_mean[I],other_mean[I])[0,1], ST.spearmanr(cyto_mean[I],other_mean[I])[0]), transform=PL.gca().transAxes)
        PL.text(0.05,0.9, "rho=%.2f"%(ST.spearmanr(cyto_mean[I],other_mean[I])[0]), transform=PL.gca().transAxes)
    PL.subplot(3,2,pi)
    plot_figure_S4e()
    PL.subplot(3,2,pi+1)
    plot_figure_S4f()
    for ext in EXTENSIONS: PL.savefig("%s/paper/figure_S4.%s"%(DATA_DIR, ext), dpi=300)
    #PL.show()

    
def figure_S5():
    plot_triangle()
    for ext in EXTENSIONS: PL.savefig("%s/paper/figure_S5.%s"%(DATA_DIR, ext), dpi=300)
    
    
def figure_S6():
    pi = 1
    PL.figure(None, [7.5,7.5])
    strains = ["HXT2", "PDR5"]
    pops = ["mm","m","p","pp"]
    colors = {"mm":"#082902", "m":"#145c05", "p":"#23b509", "pp":"#30ed0e"} 
    
    for strain in strains:
        PL.subplot(2,1,pi); pi += 1
        plots = []
        for pop in pops:
            d = read_fcs("%s/cytometry/post-sort/%s_%s.fcs"%(DATA_DIR, strain, pop), log=True)[1]
            I = SP.where(d[:,3] < 4.82)[0]
            plots.append(PL.hist(d[I,0], alpha=0.3 + 0.4*(pop in ["m","pp"]), range=(1.5,4), bins=40, color=colors[pop]))
            PL.ylabel("# of cells")
        PL.title(strain.capitalize() + "-GFP")
        PL.legend(["GFP very low\n(0-5%)", "GFP low\n(5-10%)", "GFP high\n(90-95%)", "GFP very high\n(95-100%)"])
    PL.xlabel("GFP fluorescence (log10-scale)")
    for ext in EXTENSIONS: PL.savefig("%s/paper/figure_S6.%s"%(DATA_DIR, ext), dpi=300)

        
""" Reproducibility of 
a) arrays and sequencing - one chromosome; 
bc) technical replicates of sorting (pp vs p)
d) biological replicates of sorting 
"""
def figure_S7abc(orf="YAL044C", array_chrm="2"):
    PL.figure(None, [7.5,9])
    PL.subplot(311)
    figure_S7a()
    PL.subplot(312)
    figure_S7b()
    PL.subplot(313)
    figure_S7c()
    PL.subplots_adjust(hspace=0.4)
    for ext in EXTENSIONS: PL.savefig("%s/paper/figure_S7abc.%s"%(DATA_DIR, ext), dpi=300)

    
""" Reproducibility of 
a) arrays and sequencing - one chromosome; 
bc) technical replicates of sorting (pp vs p)
d) biological replicates of sorting 
"""
def figure_S7a(orf="YAL044C", array_chrm="2"):
    # get data for same array/sequencing sample
    roman = "0 I II III IV V VI VII VIII IX X XI XII XIII XIV XV XVI".split()
    seq_chrm = [x for x in get_seq_chr_names() if get_seq_chr_names()[x] == roman[int(array_chrm)]][0]
    array_sample = "A19_plus"
    seq_sample = "Sample2_Plate_A1"
    arrayfile = glob.glob("%s/arrays/condensed/*/%s.tab"%(DATA_DIR, array_sample))[0]
    array_dataset = arrayfile.split("/")[-2] 
    array_locs, array_data, array_sd = read_array_file(array_sample, array_dataset) # read data for the samples                                                         
    array_m, array_sd, array_z = infer_mean(array_locs[array_chrm], array_data[array_chrm], threshold=0.8, return_all=True)
    I = SP.where(array_z[:,2] > 0.5)[0]
    seq_ml_mean, seq_mean, seq_post_params, seq_bad_loc, seq_locs, seq_coverage = get_sample_seq_data(seq_sample)[seq_chrm]
    #print array_data[array_chrm]
    #PL.figure(figsize=(7.5,3.5))
    PL.plot(seq_locs, seq_mean, linewidth=5, alpha=0.5)
    PL.plot(array_locs[array_chrm], array_m + SP.median(seq_mean) - SP.median(array_m), "g-", linewidth=5, alpha=0.5)
    PL.plot(seq_locs, seq_ml_mean, "b.", markersize=12, alpha=0.03)
    PL.plot(array_locs[array_chrm][I], array_data[array_chrm][I] + SP.median(seq_mean) - SP.median(array_m), "g.", markersize=12, alpha=0.1)
    PL.title("%s - chrm %s"%(orf, get_seq_chr_names()[seq_chrm]))
    PL.xlabel("Genomic coordinates")
    PL.ylabel("RM allele signal")
    PL.legend(["Sequencing (coverage %dx)"%(SP.median(seq_coverage)),"Array"], ncol=2, loc="upper left")
    PL.ylim(-0.05, 0.75)
    PL.xlim(0,820000)


""" Reproducibility of 
a) arrays and sequencing - one chromosome; 
bc) technical replicates of sorting (pp vs p)
d) biological replicates of sorting 
"""
def figure_S7b(orf="YAL044C", chrm="2"):
    # get data for same array/sequencing sample
    roman = "0 I II III IV V VI VII VIII IX X XI XII XIII XIV XV XVI".split()
    seq_chrm = [x for x in get_seq_chr_names() if get_seq_chr_names()[x] == roman[int(chrm)]][0]
    seq_sample1 = "Sample2_Plate_A1"
    seq_sample2 = "Sample2_Plate_B1"
    seq_ml_mean1, seq_mean1, seq_post_params1, seq_bad_loc1, seq_locs1, seq_coverage1 = get_sample_seq_data(seq_sample1)[seq_chrm]
    seq_ml_mean2, seq_mean2, seq_post_params2, seq_bad_loc2, seq_locs2, seq_coverage2 = get_sample_seq_data(seq_sample2)[seq_chrm]
    #PL.figure(figsize=(7.5,3.5))
    PL.plot(seq_locs1, seq_mean1, 'b-', linewidth=5, alpha=0.5)
    PL.plot(seq_locs2, seq_mean2, 'g-', linewidth=5, alpha=0.5)
    PL.plot(seq_locs1, seq_ml_mean1, "b.", markersize=12, alpha=0.03)
    PL.plot(seq_locs2, seq_ml_mean2, "g.", markersize=12, alpha=0.03)
    PL.title("%s - chrm %s"%(orf, get_seq_chr_names()[seq_chrm]))
    PL.xlabel("Genomic coordinates")
    PL.ylabel("RM allele frequency")
    PL.legend(["GFP very high (coverage %dx)"%(SP.median(seq_coverage1)),"GFP high (coverage %dx)"%(SP.median(seq_coverage2))], ncol=2, loc="upper left")
    PL.ylim(-0.05, 1.05)
    PL.xlim(0,820000)

    
""" Reproducibility of 
a) arrays and sequencing - one chromosome; 
bc) technical replicates of sorting (pp vs p)
d) biological replicates of sorting 
"""
def figure_S7c(orf="YAL044C", chrm="2"):
    # get data for same array/sequencing sample
    roman = "0 I II III IV V VI VII VIII IX X XI XII XIII XIV XV XVI".split()
    seq_chrm = [x for x in get_seq_chr_names() if get_seq_chr_names()[x] == roman[int(chrm)]][0]
    seq_sample1 = "Sample2_Plate_A1"
    seq_sample2 = "Sample4_73"
    seq_ml_mean1, seq_mean1, seq_post_params1, seq_bad_loc1, seq_locs1, seq_coverage1 = get_sample_seq_data(seq_sample1)[seq_chrm]
    seq_ml_mean2, seq_mean2, seq_post_params2, seq_bad_loc2, seq_locs2, seq_coverage2 = get_sample_seq_data(seq_sample2)[seq_chrm]
    #PL.figure(figsize=(7.5,3.5))
    PL.plot(seq_locs1, seq_mean1, 'b-', linewidth=5, alpha=0.5)
    PL.plot(seq_locs2, seq_mean2, 'g-', linewidth=5, alpha=0.5)
    PL.plot(seq_locs1, seq_ml_mean1, "b.", markersize=12, alpha=0.03)
    PL.plot(seq_locs2, seq_ml_mean2, "g.", markersize=12, alpha=0.03)
    PL.title("%s - chrm %s"%(orf, get_seq_chr_names()[seq_chrm]))
    PL.xlabel("Genomic coordinates")
    PL.ylabel("RM allele frequency")
    PL.legend(["R1 (coverage %dx)"%(SP.median(seq_coverage1)),"R2 (coverage %dx)"%(SP.median(seq_coverage2))], ncol=2, loc="upper left")
    PL.ylim(-0.05, 1.05)
    PL.xlim(0,820000)
    

def figure_S7def():
    PL.figure(None, [7.5,7.5])
    PL.subplot(221)
    figure_S7d()
    #PL.figure(None, [7.5,3.5])
    #PL.subplot(121)
    PL.subplot(222)
    figure_S7e()
    PL.subplot(223)
    figure_S7f()
    for ext in EXTENSIONS: PL.savefig("%s/paper/figure_S7def.%s"%(DATA_DIR, ext), dpi=300)
    
    
""" 
Concordance of allele frequency differences at mapped QTLs. 
QTL allele frequency difference in replicate 1 (x-axis) and replicate 2 (y-axis) for all regions with at least 0.15 mean and 4 standard deviations allele frequency difference in replicate 1.
"""
def figure_S7d(separate=False): # this is a replicate of figure S7d
    af_cutoff = 0.2
    rep_cutoff = 0.1
    d = SP.loadtxt("%s/paper/table_S7.tab"%DATA_DIR, delimiter="\t", dtype=object)
    qtls = SP.array([map(lambda x:float(str(x).replace('"', '')), a) for a in d[:,8:14]])
    af,rep = qtls[:,0], qtls[:,3]
    Isd1 = SP.where((af*rep > 0) & (abs(rep) >= rep_cutoff))[0]
    Isd0 = SP.where((af*rep <= 0) | (abs(rep) < rep_cutoff))[0]
    if separate: PL.figure(figsize=(7.5,7))
    PL.plot(af[Isd1], rep[Isd1], "b.", markersize=15, alpha=0.1)
    PL.plot(af[Isd0], rep[Isd0], "r.", markersize=15, alpha=0.1)
    x,y = af_cutoff, rep_cutoff
    lim = 0.8
    #PL.plot([-x,x], [-y,-y], 'r-')
    #PL.plot([-x,x], [y,y], 'r-')
    #PL.plot([x,x], [-y,y], 'r-')
    #PL.plot([-x,-x], [-y,y], 'r-')
    PL.plot([-lim, lim], [0,0], 'k-')
    PL.plot([-lim, lim], [-lim,lim], 'k-')
    PL.plot([-0.2,-0.2],[-lim, lim], 'k--')
    PL.plot([0.2,0.2],[-lim, lim], 'k--')
    PL.xlim(-lim+0.1, lim-0.1); PL.ylim(-lim+0.1, lim-0.1)
    PL.xlabel("Discovery AFs")
    PL.ylabel("Replication AFs")
    if separate: PL.show()

    
""" 
QTL yield at different allele frequency difference cutoffs. 
Total number of mapped QTLs in first replicate of sorting and sequencing (y-axis) depending on the cutoff (x-axis).
The cutoff for calling a QTL (solid red line) is used in the analyses.
"""     
def figure_S7e(separate=False):
    af_cutoff = 0.2
    d = SP.loadtxt("%s/paper/table_S7.tab"%DATA_DIR, delimiter="\t", dtype=object)
    qtls = SP.array([map(lambda x:float(str(x).replace('"', '')), a) for a in d[:,8:14]])
    af, rep = qtls[:,0], qtls[:,3]
    x = SP.arange(0.15,0.7, 0.01)
    y = [sum(abs(af) > f) for f in x]
    if separate: PL.figure(figsize=(7.5,7))
    PL.plot(x, y, linewidth=4, alpha=0.5)
    PL.plot([af_cutoff,af_cutoff], [0,350], 'r--', linewidth=3, alpha=0.8)
    PL.xlabel("AF difference")
    PL.ylabel("# QTLs")
    PL.xlim(0.15, 0.7)
    #rep_cutoff = 0.1
    #I = SP.where((~SP.isnan(af)) & (~SP.isnan(rep)))[0]
    #reproduced = (abs(rep[I]) > rep_cutoff)
    #I1 = SP.where(abs(af[I]) > af_cutoff)[0]
    #PL.title("QTL yield %d at cutoff %.2f, %d of %d tested reproduce at %.2f difference"%(sum(abs(af) > af_cutoff), af_cutoff, sum(reproduced[I1]), len(I1), rep_cutoff))
    if separate: PL.show()
    
    
"""
QTL reproducibility at different allele frequency difference cutoffs. 
"""
def figure_S7f(separate=False, cutoffs=(0.05,0.1)):
    qtl_cutoff = 0.2
    d = SP.loadtxt("%s/paper/table_S7.tab"%DATA_DIR, delimiter="\t", dtype=object)
    qtls = SP.array([map(lambda x:float(str(x).replace('"', '')), a) for a in d[:,8:14]])
    af, rep, pv = qtls[:,0], qtls[:,3], qtls[:,5]
    I = SP.where((~SP.isnan(af)) & (~SP.isnan(rep)))[0]
    af, rep, pv = af[I], rep[I], pv[I]
    x,y = SP.arange(0.15,0.67, 0.01), []
    for c in cutoffs: 
        #print c, f, "AF>F; REP>C", [(sum(abs(af) >= f), sum(abs(rep) >= c)) for f in x]
        y.append([1.*sum((abs(af) >= f) & (af*rep > 0) & (abs(rep) >= c))/sum(abs(af) >= f) for f in x])
    y.append([1.-pv[SP.where(abs(af) >= f)[0]].mean() for f in x])
    if separate: PL.figure(figsize=(7.5,7))
    for yval in y:
        PL.plot(x, yval, linewidth=4, alpha=0.5)
    PL.plot([qtl_cutoff,qtl_cutoff], [0,2], 'r--', linewidth=2, alpha=0.8)
    PL.xlabel("|pQTL signal|")
    PL.ylabel("Reproducibility | replication")#Fraction of replicating QTLs | Expected fraction of true positives")
    #PL.title("Local window of %.2f"%window)
    PL.xlim(0.15, 0.6)
    PL.ylim(0, 1.05)
    if separate: PL.show()


def figure_S8():
    PL.figure(figsize=(3.5,3.5))    
    plot_pqtl_eqtl_comparison(p_cutoff=1.1, val_cutoff=0.15)
    PL.xlim(-0.5, 0.7)
    for ext in EXTENSIONS: PL.savefig("%s/paper/figure_S8.%s"%(DATA_DIR, ext), dpi=300)


def transform(i):
    r = int((i-1)/5)
    c = int((i-1)%5)
    return 3*c + r + 1


def figure_S9():
    cyto_diffs, mrna_diffs, tecan_diffs, orfs, peak_orfs, Ipeak, mrna_data = get_mrna_comparison_data()
    cyto_diff_map = {"HAP1":cyto_diffs, "MKT1":cyto_diffs, "IRA2":cyto_diffs}
    signals = ["Total (cytometry)", "Allele (tecan)"]
    PL.figure(None, [14,21])
    pi = 1
    for p in ["HAP1", "MKT1", "IRA2"]:
        PL.subplot(5,3,transform(pi)); pi += 1
        plot_mrna_signal(peaks=[p], tecan_delta=mrna_diffs, plate_orfs=orfs, mrna_data=mrna_data, peak_orfs=peak_orfs, xlim=1.)
        PL.xlabel("mRNA signal (this study)")
        PL.ylabel("mRNA signal (Smith and Kruglyak)")
        for s,protein_diffs in enumerate([cyto_diff_map, tecan_diffs]):
            # plot comparison signals; "tecan delta" is just x-axis 
            PL.subplot(5,3,transform(pi)); pi += 1
            plot_mrna_signal(peaks=[p], tecan_delta=protein_diffs[p], plate_orfs=orfs, mrna_data=mrna_data, peak_orfs=peak_orfs)
            PL.xlabel("%s signal (this study)"%(signals[s]))
            PL.ylabel("Allele mRNA signal (Smith and Kruglyak)")
            ax = PL.subplot(5,3,transform(pi)); pi += 1
            Ip0 = SP.where(~(SP.array(Ipeak[p]) & (~SP.isnan(protein_diffs[p])) & (~SP.isnan(mrna_diffs))))[0]# & ((abs(vs[0])) > 0.1))[0]
            Ip1 = SP.where(SP.array(Ipeak[p]) & (~SP.isnan(protein_diffs[p])) & (~SP.isnan(mrna_diffs)))[0]# & ((abs(vs[0])) > 0.1))[0]
            PL.plot(mrna_diffs[Ip0], protein_diffs[p][Ip0], ".", alpha=0.05, markersize=14)
            PL.plot(mrna_diffs[Ip1], protein_diffs[p][Ip1], "r.", alpha=0.3, markersize=10)
            #PL.text(0.05, 0.85, "r=%.2f\nrho=%.2f"%(ST.pearsonr(protein_diffs[p][Ip1], mrna_diffs[Ip1])[0], ST.spearmanr(cyto_diffs[Ip1], mrna_diffs[Ip1])[0]), transform=ax.transAxes)
            PL.text(0.05, 0.9, "$\\rho=%.2f$"%(ST.spearmanr(cyto_diffs[Ip1], mrna_diffs[Ip1])[0]), transform=ax.transAxes)
            PL.xlabel("Total mRNA signal (this study)")
            PL.ylabel("%s signal (this study)"%(signals[s]))
            PL.xlim(-1.5,1.5)
    for ext in EXTENSIONS: PL.savefig("%s/paper/figure_S9.%s"%(DATA_DIR, ext), dpi=300)


def main():
    figure_S2(trait=3, min_cells=500)
    figure_S2(trait=5, lim=(4,10), min_cells=500, parent="BY")
    figure_S2c()
    figure_S2d()
    figure_S3ab()
    figure_S3c()
    figure_S3defghijk()    
    return
    #figure_S1()
    figure_S2(trait=3, min_cells=500)    
    figure_S2(trait=5, lim=(4,10), min_cells=500, parent="BY")
    figure_S2c()
    figure_S2d()
    figure_S3ab()
    figure_S3c()
    figure_S3defghijk()
    figure_S4()
    figure_S5()
    figure_S6()
    figure_S7abc()
    figure_S7def()
    figure_S8()
    figure_S9()

if __name__ == '__main__':
    main()

