import numpy as np

from statsmodels.nonparametric.smoothers_lowess import lowess

from empirical_model_bins import empirical_model_bins

def lowess_empirical_model_bins(counts, pseudocount=1, frac=0.8,
                                logging_function=np.log,
                                exponentiating_function=np.exp,
                                is_global=True):
    """
    Make a one-dimensional bin-level expected model by performing
    lowess regression in log-counts space, excluding the first third of the
    distance scales and only using the emprical geometric means there instead.

    Parameters
    ----------
    counts : Dict[str, np.ndarray]
        The observed counts dict to fit the model to.
    pseudocount : int
        The pseudocount to add to the counts before logging.
    frac : float
        The lowess smoothing fraction parameter to use.

    Returns
    -------
    List[float]
        The one-dimensional expected model. The ``i`` th element of the list
        corresponds to the expected value for interactions between loci
        separated by ``i`` bins. The length of this list will match the size of
        the largest region in the input counts dict.
    """
    if is_global:
        # log counts
        log_counts = {region: logging_function(counts[region] + pseudocount)
                      for region in counts.keys()}

        # make offdiagonals
        offdiagonals = [np.concatenate([np.diag(log_counts[region], k=i)
                                        for region in log_counts.keys()])
                                        for i in range(max([len(log_counts[region])
                                        for region in log_counts.keys()]))]
    else:
        log_counts = logging_function(counts + pseudocount)

        # make offdiagonals
        offdiagonals = [np.diag(log_counts, k=i)
                        for i in range(len(log_counts))]

    # get empirical expected
    empirical = empirical_model_bins(log_counts, log_transform=False,is_global=is_global)

    # make data of the form [distance, count], ignoring nans
    data = np.asarray([[dist, count]
                       for dist in range(len(offdiagonals))
                       for count in offdiagonals[dist]
                       if np.isfinite(count)])

    # don't try to fit the first third of the region
    key_index = np.int(len(offdiagonals) / 3)
    filtered_data = np.asarray(filter(lambda x: x[0] >= key_index, data))

    # do the lowess fit
    fit = lowess(filtered_data[:, 1], filtered_data[:, 0], frac=frac, it=3)

    # filter the fit to just the region above the join point
    filtered_fit = np.asarray(filter(lambda x: x[0] > key_index, fit))

    # construct an array that will represent the joined fit
    joined_fit = np.zeros(len(offdiagonals))
    for i in range(key_index + 1):
        joined_fit[i] = empirical[i]
    for i in range(key_index + 1, len(offdiagonals)):
        query_result = filter(lambda x: x[0] == i, filtered_fit)
        if query_result:
            joined_fit[i] = query_result[0][1]
        else:
            joined_fit[i] = empirical[i]

    # unlog
    joined_fit = [exponentiating_function(x) - pseudocount for x in joined_fit]

    return joined_fit

