import os
from parts2014_gfpvar.analysis.cyto.quantify import *
from parts2014_gfpvar.analysis.cyto.h2 import *
from parts2014_gfpvar.analysis.cyto.util import *
from parts2014_gfpvar.analysis.tecan.util import *
from parts2014_gfpvar.analysis.mrna.util import *
from parts2014_gfpvar.analysis.qtl.repro import *
from parts2014_gfpvar.analysis.qtl.call import *
from parts2014_gfpvar.analysis.img.combine_csv import *

GPROFILER_QUERY="http://biit.cs.ut.ee/gprofiler/index.cgi?organism=scerevisiae&query=%s&term=&significant=1&ordered_query=1&sort_by_structure=1&hierfiltering=&output=png&custbg_file=&custbg=&user_thr=1.00&max_set_size=0&prefix=ENTREZGENE_ACC&threshold_algo=analytical&domain_size_type=annotated&advanced_options_on=0"


def create_table_S2_cyto():
    rawfilename = "tmp.tab"
    outfilename = "%s/paper/table_S2-singles.tab"%DATA_DIR
    # Combine data from all screens. Require at least 150 cells for the FSC-A slice, at most 0.5 standard deviation in log10 scale GFP level, only haploid screens
    create_combined_mat(min_cells_slice=250, max_gfp_sd=0.5, screens=("haploid-R1", "haploid-R2", "haploid-R3", "fillin"), outfilename=rawfilename, allowed_ploidies=["Haploid"], renames={"haploid-R1":"haploid-R2", "haploid-R3":"haploid-R1"})
    #create_combined_mat(min_cells_slice=250, max_gfp_sd=0.35, screens=["haploid-R2"], outfilename=rawfilename,  allowed_ploidies=["Haploid"], renames={"haploid-R1":"haploid-R2", "haploid-R3":"haploid-R1"})
    # Flag whether the replicates are reproducible. Look at linear median and variance (traits 4,5), which have difference limits of 0.2 and 1. (see supporting figures of reproducibility to understand what the scale is like). Now, require 200 cells at least.
    flag_replicates(infilename=rawfilename, traits=[4,5], cutoffs=[0.2,1], names=["mean","var"], min_cells=250, debug=False, outfilename=outfilename)

    # Create the combined replicates file
    combined_file = "%s/paper/table_S2-combined.tab"%DATA_DIR
    create_combined_cytometry_output_table(infilename=outfilename, outfilename=combined_file)
    # Remove intermediate file
    os.system("rm %s"%rawfilename)


def create_table_S2_img():
    output_cells_from_images(overwrite=True) # take image analysis output, and combine them all to one large file
    save_img_orfstats(*calc_img_orfstats(combine_img_cell_data(recalc=True), recalc_meanvar=True), outfilename="%s/paper/table_S2-img.tab"%DATA_DIR) # combine data from many cells for one ORF, calculate statistics, and output


def create_table_S2_mrna():
    output_sample_mrna(outfilename="%s/paper/table_S2_mrna.tab"%(DATA_DIR))


def create_table_S3(cutoff=0.2, n_mrna_orf=100):
    data = SP.loadtxt("%s/paper/table_S2-combined.tab"%DATA_DIR, delimiter="\t", dtype=object)
    eqtl_peak_orfs = reduce(lambda x,y: x | y, get_screen_target_orfs(eqtl_lod_cutoff=5).values())
    i1, i2 = 9, 35 # indices for BY and RM values in the large table
    orf, by,rm = data[1:,0], SP.log10(SP.array(data[1:,i1], float)), SP.log10(SP.array(data[1:,i2], float)) # get data for ORF name, BY expression values, RM expression values
    d = rm - by # calculate difference, and sort it
    I = SP.argsort(d)
    Imrna, mrna_orf = get_mrna_differences()
    
    kids_lower_str = "%0A".join([orf[i] for i in I[0:300] if d[i] <= SP.log10(1-cutoff)]) # ordered list of ORFs; up to 300 crosses where RMxBY < BYxBY
    kids_higher_str = "%0A".join([orf[i] for i in I[::-1][0:300] if d[i] >= SP.log10(1+cutoff)]) # ordered list of ORFS; up to 300 crosses where RMxBY > BYxBY
    gene_str = "%0A".join([orf[i] for i in list(I[0:300]) + list(I[::-1][0:300]) if ((d[i] <= SP.log10(1-cutoff)) or (d[i] >= SP.log10(1+cutoff))) and (orf[i] not in eqtl_peak_orfs)]) # list of all ORFs where |BYxBY - RMxBY| large enough, and the ORFs are not eQTL targets of IRA2, MKT1, HAP1 genes according to data from Smith et al, 2008.
    kids_higher_mrna = "%0A".join([mrna_orf[i] for i in Imrna[0:n_mrna_orf]])
    kids_lower_mrna = "%0A".join([mrna_orf[i] for i in Imrna[::-1][0:n_mrna_orf]])

    ofh = file("%s/paper/table_S3.html"%DATA_DIR, 'w')
    ofh.write("RMxBY lower than BYxBY: %s\n\nRMxBY higher than BYxBY: %s\n\n"%(GPROFILER_QUERY%kids_lower_str, GPROFILER_QUERY%kids_higher_str))
    ofh.write("RMxBY different from BYxBY, but not eQTL target: %s\n\n"%((GPROFILER_QUERY%gene_str).replace("ordered_query=1","ordered_query=0")))
    ofh.write("RMxBY mRNA lower than BYxBY: %s\n\nRMxBY mRNA higher than BYxBY: %s\n\n"%(GPROFILER_QUERY%kids_lower_mrna, GPROFILER_QUERY%kids_higher_mrna))
    ofh.close()


def get_mrna_differences():
    sample_gene_covs = get_sample_mrna()
    means = {}
    for s in sample_gene_covs: means[s] = ST.nanmean(sample_gene_covs[s].values())
    corrfactor = means["X2"]*means["X3"]/(means["B2"]*means["B3"])
    log10corrfactor = SP.log10(corrfactor)
    vals, orfs, min_cov = [], [], 20
    p11,p12,p21,p22 = "B2","B3","X2","X3"
    for o in sample_gene_covs[p11]:
        if o in sample_gene_covs[p12] and o in sample_gene_covs[p21] and o in sample_gene_covs[p22]:
            if sample_gene_covs[p11][o] > min_cov and sample_gene_covs[p12][o] > min_cov and sample_gene_covs[p21][o] > min_cov and sample_gene_covs[p22][o] > min_cov:
                m11, m12 = SP.log2(sample_gene_covs[p11][o]+0.5) - SP.log2(means[p11]), SP.log2(sample_gene_covs[p12][o]+0.5) - SP.log2(means[p12])
                m21, m22 = SP.log2(sample_gene_covs[p21][o]+0.5) - SP.log2(means[p21]), SP.log2(sample_gene_covs[p22][o]+0.5) - SP.log2(means[p22])
                if (abs(m11 - m12) < 1) and (abs(m21 - m22) < 1):
                    vals.append(0.5*(m11+m12-m21-m22))
                    orfs.append(o)
    I = SP.argsort(vals)
    return SP.argsort(vals), orfs


def create_table_S4(combined_file="%s/paper/table_S2-combined.tab"%DATA_DIR):
    create_heritabilities_output_table(infilename=combined_file, outfilename="%s/paper/table_S4.tab"%DATA_DIR)


def create_table_S5():
    data, counts, orfs = get_gfpswap_data(debug=False, recalc=False)
    data, cis, trans, interact, total, p = get_gfpswap_decomposition(data, counts, cutoff=200)
    orfs = get_gfpswap_annot().reshape(96)    
    ofh = file("%s/paper/table_S5.tab"%DATA_DIR, 'w')
    ofh.write("#ORF      \tTotal_effect\tcis_effect\ttrans_effect")
    for parent in p:
        for rep in [1,2]:
            ofh.write("\t%s_R%d"%(parent, rep))
    ofh.write("\n")
    
    # also output actual values
    for i in range(96):
        if SP.isnan(total[i]): continue
        ofh.write("%s  \t%.2f\t%.2f\t%.2f"%(orfs[i], total[i], cis[i], trans[i]))
        for parent in p:
            for rep in [0,1]:
                ofh.write("\t%.3f"%(data[parent][rep].reshape(96)[i]))
        ofh.write("\n")
    ofh.close()    



def create_table_S6():
    meta = SP.loadtxt("%s/seq/samples/meta.tab"%DATA_DIR, delimiter="\t", dtype=object)
    meta_lines = file("%s/seq/samples/meta.tab"%DATA_DIR, "r").readlines()
    ofh = file("%s/seq/samples/coverages.txt"%DATA_DIR, 'w')
    ofh_paper = file("%s/paper/table_S6.tab"%DATA_DIR, 'w')
    ofh.write("#Sample\tCoverage\n")
    ofh_paper.write("%s\tCoverage\n"%(meta_lines[0].strip("\n").strip("\r")))
    for i in range(len(meta)):
        sample, comment, discovery_set = meta[i][0], meta[i][7], meta[i][8]
        if comment == "discard" or discovery_set == "none" or sample.count("000") > 0:
            ofh.write("%s\tNA\n"%sample)
            ofh_paper.write("%s\tNA\n"%(meta_lines[i+1].strip("\n").strip("\r")))
        else:
            coverage = SP.median(reduce(lambda x,y:x+y, [list(v[5]) for v in cl("%s/seq/afs/%s_af_rec-0.90.pickle"%(DATA_DIR, sample)).values()]))
            ofh.write("%s\t%d\n"%(sample, coverage))
            ofh_paper.write("%s\t%d\n"%(meta_lines[i+1].strip("\n").strip("\r"), coverage))
    ofh.close()
    ofh_paper.close()


def create_table_S7():
    call_all_seq_qtls(recalc=True)
    call_clean_set_seq_qtls(save=True)
    calc_all_sample_pval(save=True, recalc=False, repro_af_cutoff=0.1)
    output_qtl_replication_stats(sample_set="all", af_stringent=0.15, sd_stringent=5, qtl_region_span=1000, outfilename="%s/paper/table_S7.tab"%DATA_DIR)


def create_table_S8():
    output_parents = {"BYs": ["BY", "BY-HAP1", "BY-MKT1-IRA2", "BY-HAP1-MKT1-IRA2"]}
    all_parents = {"BYs": ["BY", "BY-HAP1", "BY-MKT1-IRA2", "BY-HAP1-MKT1-IRA2"],
           "RMs": ["RM", "RM-IRA2","RM-MKT1","RM-MKT1-IRA2"],
           "BYRM": ["BY", "BY-HAP1-MKT1-IRA2", "RM-MKT1-IRA2", "RM"]}
    vals, sds = combine_tecans(get_tecans(parents=all_parents), bad_cutoff=0.4, calc_sds=True)
    clean_tecan_plates(vals, cutoff=1.03)
    val_lists, sd_lists, plate_orfs = get_combined_val_lists(vals, sds, output_parents)
    output_tecans("%s/paper/table_S8.tab"%(DATA_DIR), val_lists, sd_lists, output_parents)
    

def main():
    create_table_S3()
    return
    create_table_S2_cyto()
    #return
    #create_table_S2_img()
    create_table_S2_mrna()
    create_table_S3()
    #return
    create_table_S4()
    create_table_S5()
    create_table_S6()
    create_table_S7()
    create_table_S8()


if __name__ == '__main__':
    main()
