import scipy as SP
from parts2014_gfpvar.tools.common import *
from parts2014_gfpvar.tools.io import *
from parts2014_gfpvar.analysis.qtl.util import read_mrna_data
from parts2014_gfpvar.analysis.tecan.util import *
import time
import pdb


def get_parental_meanvar():
    d = SP.loadtxt("%s/mRNA/pbiol_parents.tab"%DATA_DIR, delimiter="\t", dtype=object)
    strain, condition = d[1:3]
    Iby = SP.where((strain == "BY") & (condition == "glucose"))[0]
    Irm = SP.where((strain == "RM") & (condition == "glucose"))[0]
    orfs = d[5:,0]
    data = SP.array(d[5:][:,1:], float)
    mu_by, mu_rm = data[:,Iby-1].mean(axis=1), data[:,Irm-1].mean(axis=1)
    var_by, var_rm = data[:,Iby-1].var(axis=1), data[:,Irm-1].var(axis=1)
    return orfs, mu_by, mu_rm, var_by, var_rm


def get_sample_mrna(median=False, recalc=False):
    medstr = ["mean", "median"][median]
    if (not recalc) and os.path.exists("%s/mrna/hiseq_%s.pickle"%(DATA_DIR, medstr)):
        return cl("%s/mrna/hiseq_%s.pickle"%(DATA_DIR, medstr))
    ROMAN = "0 I II III IV V VI VII VIII IX X XI XII XIII XIV XV XVI mitochondrion".split()
    chrnames, intnames = get_seq_chr_names(), {}
    for c in chrnames: intnames[ROMAN.index(chrnames[c])] = c

    sample_mrna = {}
    for f in glob.glob("%s/mrna/SampleR_??.pileup.pickle"%DATA_DIR):
        sample = f.split('/')[-1][8:10]
        d = cl(f)
        for c in d: d[c] = map(SP.array, d[c])
        sample_mrna[sample] = d

    gene_locs = read_gene_locs()
    orfs = gene_locs.keys()

    sample_gene_covs = {}
    for s in sample_mrna: sample_gene_covs[s] = {}

    for o,orf in enumerate(orfs):
        if (o % 1000) == 999: print o + 1, len(orfs)
        chrm, start, end = gene_locs[orf]
        for sample in sample_mrna:
            l, c = sample_mrna[sample][intnames[chrm]]
            I = SP.where((l >= start) & (l <= end))[0]
            if not median:  sample_gene_covs[sample][orf] = (c[I]).mean()
            else: sample_gene_covs[sample][orf] = SP.median(c[SP.where((l >= start) & (l <= end))[0]])

    cdm(sample_gene_covs, "%s/mrna/hiseq_%s.pickle"%(DATA_DIR, medstr))
    return sample_gene_covs


def output_sample_mrna(outfilename="%s/mrna/sample_mrna.tab"%DATA_DIR):
    rna = get_sample_mrna(median=False, recalc=False)
    ofh = file(outfilename, 'w')
    ofh.write("#ORF")
    for parent in ["BYxBY", "RMxBY"]:
        for rep in '12': ofh.write("\tMean_coverage_%s_R%s"%(parent, rep))
    ofh.write("\n")    
    for o in rna.values()[0]:
        ofh.write(o)
        for s in ["B2","B3","X2","X3"]:
            if o in rna[s]:
                ofh.write("\t%.2f"%(rna[s][o]))
            else:
                ofh.write("NA")
        ofh.write("\n")
    ofh.close()


def get_mrna_comparison_data():
    # first, cytometry data
    cyto, cval = SP.loadtxt("%s/paper/table_S2-combined.tab"%DATA_DIR, delimiter="\t", dtype=object), {} 
    for i in range(1, len(cyto)):  cval[cyto[i][0]] = [SP.log10(float(cyto[i][9])), SP.log10(float(cyto[i][35]))]  # BY,RM  
    # Smith & Kruglyak mRNA
    mrna_data = read_mrna_data()
    # This study RNAseq coverage
    covs, mean_covs = get_sample_mrna(), {}
    for s in covs: mean_covs[s] = ST.nanmean(covs[s].values())        
    corrfactor = mean_covs["X2"]*mean_covs["X3"]/(mean_covs["B2"]*mean_covs["B3"])
    # Smith & Kruglyak eQTL
    peak_orfs, Ipeak = get_screen_target_orfs(eqtl_lod_cutoff=5), {}
    # Tecan screens
    vals = combine_tecans(get_tecans(), bad_cutoff=0.4, calc_sds=False)
    clean_tecan_plates(vals, cutoff=1.03)#0.6)
    tecan_lists, tecan_orfs, val_parents = get_val_lists(vals["BYs"], get_array_data())
    tecan_loc = {}
    for o, orf in enumerate(tecan_orfs): tecan_loc[orf] = o
    
    # overlapping ORFs
    orfs = SP.array([o for o in cval if o in covs["B2"]])
    # differences for both assays
    cyto_diffs = SP.array([cval[o][0] - cval[o][1] for o in orfs], float) # BY-RM
    mrna_diffs = SP.array([SP.log10(corrfactor*covs["B2"][o]*covs["B3"][o]/covs["X2"][o]/covs["X3"][o]) for o in orfs], float) # BY-RM
    tecan_diffs = {}
    I = SP.argsort(abs(cyto_diffs))[::-1]
    for p in peak_orfs:  Ipeak[p] = [o in peak_orfs[p] for o in orfs[I]] # store whether ORF eQTL peak target
    for (p, i) in [("HAP1",1), ("MKT1",2), ("IRA2",2)]:
        tecan_diffs[p] = SP.zeros(len(I))*SP.nan
        for o, orf in enumerate(orfs[I]): 
            if orf in tecan_loc: tecan_diffs[p][o] = tecan_lists[0][tecan_loc[orf]] - tecan_lists[i][tecan_loc[orf]] # BY-RM
    return cyto_diffs[I], mrna_diffs[I], tecan_diffs, orfs[I], peak_orfs, Ipeak, mrna_data
