import numpy as np
from sklearn.neighbors import KernelDensity
from sklearn.grid_search import GridSearchCV
from statsmodels.distributions.empirical_distribution import ECDF
#from statsmodels.api.nonparametric import KDEUnivariate

#--------------------------------------------------------------------------
# cdf
#--------------------------------------------------------------------------
def getCDF(x):
  cdf = ECDF(x, side='right')
  return cdf

#--------------------------------------------------------------------------
# kde 
#--------------------------------------------------------------------------
class KDE(object):
  def __init__(self, kde_sklearn):
    self.kde = kde_sklearn

  def pdf(self, val):
    log_pdf = self.kde.score(val)
    #log_pdf = self.kde.score_samples(val)
    return np.exp(log_pdf)

  def pdf_samples(self, grid):
    # augment with empty axis if 1d
    if len(grid.shape) == 1:
      grid = grid[:, np.newaxis]
      grid = grid.T
    log_pdf = self.kde.score_samples(grid.T)
    return np.exp(log_pdf)

def getKDE(
  x,
  bandwidth=0.2,
  bwselect=False,
  rtol=1e-4,
  atol=1e-5,
  **kwargs
):
  # augment with empty axis if 1d
  if len(x.shape) == 1:
    x = x[:, np.newaxis]
    x = x.T

  if bwselect:
    grid = GridSearchCV(
      KernelDensity(),
      {'bandwidth': np.linspace(0.1, 1.0, 20)},
      cv=20,
    )
    grid.fit(x.T)
    bandwidth = grid.best_params_['bandwidth']
    print 'sel bandwidth ', bandwidth
    #kde_skl = grid.best_estimator_

  kde_skl = KernelDensity(
    bandwidth=bandwidth,
    rtol=rtol,
    atol=atol,
    **kwargs
  )
  kde_skl.fit(x.T)

  return KDE(kde_skl)

class EmpiricalDist(object):
  def __init__(
    self,
    vals,
    numBins,
    intDirection='right',
  ):
    assert intDirection in ['right', 'left']

    #self.bins = np.linspace(min(vals), max(vals), numBins)
    #freq, val = np.histogram(vals, self.bins)
    freq, self.bins = np.histogram(vals, numBins)
    # renormalize
    self.freq = 1. * freq / sum(freq)
    # direction to integrate when compute cdf
    self.intDirection = intDirection

  def cdf(self, val):
    pos = np.digitize([val], self.bins[0:-1])
    idx = pos[0] - 1
    if self.intDirection == 'right':
      return sum(self.freq[idx:])
    else:
      return sum(self.freq[:idx+1])
 
  def pdf(self, val):
    pos = np.digitize([val], self.bins[0:-1])
    idx = pos[0] - 1
    prob = self.freq[idx]
    return prob

