115 lines
3.1 KiB
Python
115 lines
3.1 KiB
Python
|
from statsmodels.compat.python import lzip
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
from statsmodels.tools.validation import array_like
|
||
|
from . import kernels
|
||
|
|
||
|
|
||
|
#TODO: should this be a function?
|
||
|
class KDE:
|
||
|
"""
|
||
|
Kernel Density Estimator
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x : array_like
|
||
|
N-dimensional array from which the density is to be estimated
|
||
|
kernel : Kernel Class
|
||
|
Should be a class from *
|
||
|
"""
|
||
|
#TODO: amend docs for Nd case?
|
||
|
def __init__(self, x, kernel=None):
|
||
|
x = array_like(x, "x", maxdim=2, contiguous=True)
|
||
|
if x.ndim == 1:
|
||
|
x = x[:,None]
|
||
|
|
||
|
nobs, n_series = x.shape
|
||
|
|
||
|
if kernel is None:
|
||
|
kernel = kernels.Gaussian() # no meaningful bandwidth yet
|
||
|
|
||
|
if n_series > 1:
|
||
|
if isinstance( kernel, kernels.CustomKernel ):
|
||
|
kernel = kernels.NdKernel(n_series, kernels = kernel)
|
||
|
|
||
|
self.kernel = kernel
|
||
|
self.n = n_series #TODO change attribute
|
||
|
self.x = x
|
||
|
|
||
|
def density(self, x):
|
||
|
return self.kernel.density(self.x, x)
|
||
|
|
||
|
def __call__(self, x, h="scott"):
|
||
|
return np.array([self.density(xx) for xx in x])
|
||
|
|
||
|
def evaluate(self, x, h="silverman"):
|
||
|
density = self.kernel.density
|
||
|
return np.array([density(xx) for xx in x])
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
from numpy import random
|
||
|
import matplotlib.pyplot as plt
|
||
|
import statsmodels.nonparametric.bandwidths as bw
|
||
|
from statsmodels.sandbox.nonparametric.testdata import kdetest
|
||
|
|
||
|
# 1-D case
|
||
|
random.seed(142)
|
||
|
x = random.standard_t(4.2, size = 50)
|
||
|
h = bw.bw_silverman(x)
|
||
|
#NOTE: try to do it with convolution
|
||
|
support = np.linspace(-10,10,512)
|
||
|
|
||
|
|
||
|
kern = kernels.Gaussian(h = h)
|
||
|
kde = KDE( x, kern)
|
||
|
print(kde.density(1.015469))
|
||
|
print(0.2034675)
|
||
|
Xs = np.arange(-10,10,0.1)
|
||
|
|
||
|
fig = plt.figure()
|
||
|
ax = fig.add_subplot(111)
|
||
|
ax.plot(Xs, kde(Xs), "-")
|
||
|
ax.set_ylim(-10, 10)
|
||
|
ax.set_ylim(0,0.4)
|
||
|
|
||
|
|
||
|
# 2-D case
|
||
|
x = lzip(kdetest.faithfulData["eruptions"], kdetest.faithfulData["waiting"])
|
||
|
x = np.array(x)
|
||
|
x = (x - x.mean(0))/x.std(0)
|
||
|
nobs = x.shape[0]
|
||
|
H = kdetest.Hpi
|
||
|
kern = kernels.NdKernel( 2 )
|
||
|
kde = KDE( x, kern )
|
||
|
print(kde.density( np.matrix( [1,2 ]))) #.T
|
||
|
plt.figure()
|
||
|
plt.plot(x[:,0], x[:,1], 'o')
|
||
|
|
||
|
|
||
|
n_grid = 50
|
||
|
xsp = np.linspace(x.min(0)[0], x.max(0)[0], n_grid)
|
||
|
ysp = np.linspace(x.min(0)[1], x.max(0)[1], n_grid)
|
||
|
# xsorted = np.sort(x)
|
||
|
# xlow = xsorted[nobs/4]
|
||
|
# xupp = xsorted[3*nobs/4]
|
||
|
# xsp = np.linspace(xlow[0], xupp[0], n_grid)
|
||
|
# ysp = np.linspace(xlow[1], xupp[1], n_grid)
|
||
|
xr, yr = np.meshgrid(xsp, ysp)
|
||
|
kde_vals = np.array([kde.density( np.matrix( [xi, yi ]) ) for xi, yi in
|
||
|
zip(xr.ravel(), yr.ravel())])
|
||
|
plt.contour(xsp, ysp, kde_vals.reshape(n_grid, n_grid))
|
||
|
|
||
|
plt.show()
|
||
|
|
||
|
|
||
|
# 5 D case
|
||
|
# random.seed(142)
|
||
|
# mu = [1.0, 4.0, 3.5, -2.4, 0.0]
|
||
|
# sigma = np.matrix(
|
||
|
# [[ 0.6 - 0.1*abs(i-j) if i != j else 1.0 for j in xrange(5)] for i in xrange(5)])
|
||
|
# x = random.multivariate_normal(mu, sigma, size = 100)
|
||
|
# kern = kernel.Gaussian()
|
||
|
# kde = KernelEstimate( x, kern )
|