AIM-PIbd-32-Kurbanova-A-A/aimenv/Lib/site-packages/statsmodels/nonparametric/tests/test_kde.py
2024-10-02 22:15:59 +04:00

401 lines
13 KiB
Python

import os
import numpy.testing as npt
import numpy as np
import pandas as pd
import pytest
from scipy import stats
from statsmodels.distributions.mixture_rvs import mixture_rvs
from statsmodels.nonparametric.kde import KDEUnivariate as KDE
import statsmodels.sandbox.nonparametric.kernels as kernels
import statsmodels.nonparametric.bandwidths as bandwidths
# get results from Stata
curdir = os.path.dirname(os.path.abspath(__file__))
rfname = os.path.join(curdir, 'results', 'results_kde.csv')
# print rfname
KDEResults = np.genfromtxt(open(rfname, 'rb'), delimiter=",", names=True)
rfname = os.path.join(curdir, 'results', 'results_kde_univ_weights.csv')
KDEWResults = np.genfromtxt(open(rfname, 'rb'), delimiter=",", names=True)
# get results from R
curdir = os.path.dirname(os.path.abspath(__file__))
rfname = os.path.join(curdir, 'results', 'results_kcde.csv')
# print rfname
KCDEResults = np.genfromtxt(open(rfname, 'rb'), delimiter=",", names=True)
# setup test data
np.random.seed(12345)
Xi = mixture_rvs([.25, .75], size=200, dist=[stats.norm, stats.norm],
kwargs=(dict(loc=-1, scale=.5), dict(loc=1, scale=.5)))
class TestKDEExceptions:
@classmethod
def setup_class(cls):
cls.kde = KDE(Xi)
cls.weights_200 = np.linspace(1, 100, 200)
cls.weights_100 = np.linspace(1, 100, 100)
def test_check_is_fit_exception(self):
with pytest.raises(ValueError):
self.kde.evaluate(0)
def test_non_weighted_fft_exception(self):
with pytest.raises(NotImplementedError):
self.kde.fit(kernel="gau", gridsize=50, weights=self.weights_200,
fft=True, bw="silverman")
def test_wrong_weight_length_exception(self):
with pytest.raises(ValueError):
self.kde.fit(kernel="gau", gridsize=50, weights=self.weights_100,
fft=False, bw="silverman")
def test_non_gaussian_fft_exception(self):
with pytest.raises(NotImplementedError):
self.kde.fit(kernel="epa", gridsize=50, fft=True, bw="silverman")
class CheckKDE:
decimal_density = 7
def test_density(self):
npt.assert_almost_equal(self.res1.density, self.res_density,
self.decimal_density)
def test_evaluate(self):
# disable test
# fails for Epan, Triangular and Biweight, only Gaussian is correct
# added it as test method to TestKDEGauss below
# inDomain is not vectorized
# kde_vals = self.res1.evaluate(self.res1.support)
kde_vals = [np.squeeze(self.res1.evaluate(xi)) for xi in self.res1.support]
kde_vals = np.squeeze(kde_vals) # kde_vals is a "column_list"
mask_valid = np.isfinite(kde_vals)
# TODO: nans at the boundaries
kde_vals[~mask_valid] = 0
npt.assert_almost_equal(kde_vals, self.res_density,
self.decimal_density)
class TestKDEGauss(CheckKDE):
@classmethod
def setup_class(cls):
res1 = KDE(Xi)
res1.fit(kernel="gau", fft=False, bw="silverman")
cls.res1 = res1
cls.res_density = KDEResults["gau_d"]
def test_evaluate(self):
# kde_vals = self.res1.evaluate(self.res1.support)
kde_vals = [self.res1.evaluate(xi) for xi in self.res1.support]
kde_vals = np.squeeze(kde_vals) # kde_vals is a "column_list"
mask_valid = np.isfinite(kde_vals)
# TODO: nans at the boundaries
kde_vals[~mask_valid] = 0
npt.assert_almost_equal(kde_vals, self.res_density,
self.decimal_density)
# The following tests are regression tests
# Values have been checked to be very close to R 'ks' package (Dec 2013)
def test_support_gridded(self):
kde = self.res1
support = KCDEResults['gau_support']
npt.assert_allclose(support, kde.support)
def test_cdf_gridded(self):
kde = self.res1
cdf = KCDEResults['gau_cdf']
npt.assert_allclose(cdf, kde.cdf)
def test_sf_gridded(self):
kde = self.res1
sf = KCDEResults['gau_sf']
npt.assert_allclose(sf, kde.sf)
def test_icdf_gridded(self):
kde = self.res1
icdf = KCDEResults['gau_icdf']
npt.assert_allclose(icdf, kde.icdf)
class TestKDEGaussPandas(TestKDEGauss):
@classmethod
def setup_class(cls):
res1 = KDE(pd.Series(Xi))
res1.fit(kernel="gau", fft=False, bw="silverman")
cls.res1 = res1
cls.res_density = KDEResults["gau_d"]
class TestKDEEpanechnikov(CheckKDE):
@classmethod
def setup_class(cls):
res1 = KDE(Xi)
res1.fit(kernel="epa", fft=False, bw="silverman")
cls.res1 = res1
cls.res_density = KDEResults["epa2_d"]
class TestKDETriangular(CheckKDE):
@classmethod
def setup_class(cls):
res1 = KDE(Xi)
res1.fit(kernel="tri", fft=False, bw="silverman")
cls.res1 = res1
cls.res_density = KDEResults["tri_d"]
class TestKDEBiweight(CheckKDE):
@classmethod
def setup_class(cls):
res1 = KDE(Xi)
res1.fit(kernel="biw", fft=False, bw="silverman")
cls.res1 = res1
cls.res_density = KDEResults["biw_d"]
# FIXME: enable/xfail/skip or delete
# NOTE: This is a knownfailure due to a definitional difference of Cosine kernel
# class TestKDECosine(CheckKDE):
# @classmethod
# def setup_class(cls):
# res1 = KDE(Xi)
# res1.fit(kernel="cos", fft=False, bw="silverman")
# cls.res1 = res1
# cls.res_density = KDEResults["cos_d"]
# weighted estimates taken from matlab so we can allow len(weights) != gridsize
class TestKdeWeights(CheckKDE):
@classmethod
def setup_class(cls):
res1 = KDE(Xi)
weights = np.linspace(1, 100, 200)
res1.fit(kernel="gau", gridsize=50, weights=weights, fft=False,
bw="silverman")
cls.res1 = res1
fname = os.path.join(curdir, 'results', 'results_kde_weights.csv')
cls.res_density = np.genfromtxt(open(fname, 'rb'), skip_header=1)
def test_evaluate(self):
# kde_vals = self.res1.evaluate(self.res1.support)
kde_vals = [self.res1.evaluate(xi) for xi in self.res1.support]
kde_vals = np.squeeze(kde_vals) # kde_vals is a "column_list"
mask_valid = np.isfinite(kde_vals)
# TODO: nans at the boundaries
kde_vals[~mask_valid] = 0
npt.assert_almost_equal(kde_vals, self.res_density,
self.decimal_density)
class TestKDEGaussFFT(CheckKDE):
@classmethod
def setup_class(cls):
cls.decimal_density = 2 # low accuracy because binning is different
res1 = KDE(Xi)
res1.fit(kernel="gau", fft=True, bw="silverman")
cls.res1 = res1
rfname2 = os.path.join(curdir, 'results', 'results_kde_fft.csv')
cls.res_density = np.genfromtxt(open(rfname2, 'rb'))
class CheckKDEWeights:
@classmethod
def setup_class(cls):
cls.x = x = KDEWResults['x']
weights = KDEWResults['weights']
res1 = KDE(x)
# default kernel was scott when reference values computed
res1.fit(kernel=cls.kernel_name, weights=weights, fft=False, bw="scott")
cls.res1 = res1
cls.res_density = KDEWResults[cls.res_kernel_name]
decimal_density = 7
@pytest.mark.xfail(reason="Not almost equal to 7 decimals",
raises=AssertionError, strict=True)
def test_density(self):
npt.assert_almost_equal(self.res1.density, self.res_density,
self.decimal_density)
def test_evaluate(self):
if self.kernel_name == 'cos':
pytest.skip("Cosine kernel fails against Stata")
kde_vals = [self.res1.evaluate(xi) for xi in self.x]
kde_vals = np.squeeze(kde_vals) # kde_vals is a "column_list"
npt.assert_almost_equal(kde_vals, self.res_density,
self.decimal_density)
def test_compare(self):
xx = self.res1.support
kde_vals = [np.squeeze(self.res1.evaluate(xi)) for xi in xx]
kde_vals = np.squeeze(kde_vals) # kde_vals is a "column_list"
mask_valid = np.isfinite(kde_vals)
# TODO: nans at the boundaries
kde_vals[~mask_valid] = 0
npt.assert_almost_equal(self.res1.density, kde_vals,
self.decimal_density)
# regression test, not compared to another package
nobs = len(self.res1.endog)
kern = self.res1.kernel
v = kern.density_var(kde_vals, nobs)
v_direct = kde_vals * kern.L2Norm / kern.h / nobs
npt.assert_allclose(v, v_direct, rtol=1e-10)
ci = kern.density_confint(kde_vals, nobs)
crit = 1.9599639845400545 # stats.norm.isf(0.05 / 2)
hw = kde_vals - ci[:, 0]
npt.assert_allclose(hw, crit * np.sqrt(v), rtol=1e-10)
hw = ci[:, 1] - kde_vals
npt.assert_allclose(hw, crit * np.sqrt(v), rtol=1e-10)
def test_kernel_constants(self):
kern = self.res1.kernel
nc = kern.norm_const
# trigger numerical integration
kern._norm_const = None
nc2 = kern.norm_const
npt.assert_allclose(nc, nc2, rtol=1e-10)
l2n = kern.L2Norm
# trigger numerical integration
kern._L2Norm = None
l2n2 = kern.L2Norm
npt.assert_allclose(l2n, l2n2, rtol=1e-10)
v = kern.kernel_var
# trigger numerical integration
kern._kernel_var = None
v2 = kern.kernel_var
npt.assert_allclose(v, v2, rtol=1e-10)
class TestKDEWGauss(CheckKDEWeights):
kernel_name = "gau"
res_kernel_name = "x_gau_wd"
class TestKDEWEpa(CheckKDEWeights):
kernel_name = "epa"
res_kernel_name = "x_epan2_wd"
class TestKDEWTri(CheckKDEWeights):
kernel_name = "tri"
res_kernel_name = "x_" + kernel_name + "_wd"
class TestKDEWBiw(CheckKDEWeights):
kernel_name = "biw"
res_kernel_name = "x_bi_wd"
class TestKDEWCos(CheckKDEWeights):
kernel_name = "cos"
res_kernel_name = "x_cos_wd"
class TestKDEWCos2(CheckKDEWeights):
kernel_name = "cos2"
res_kernel_name = "x_cos_wd"
class _TestKDEWRect(CheckKDEWeights):
# TODO in docstring but not in kernel_switch
kernel_name = "rect"
res_kernel_name = "x_rec_wd"
class _TestKDEWPar(CheckKDEWeights):
# TODO in docstring but not implemented in kernels
kernel_name = "par"
res_kernel_name = "x_par_wd"
class TestKdeRefit:
np.random.seed(12345)
data1 = np.random.randn(100) * 100
pdf = KDE(data1)
pdf.fit()
data2 = np.random.randn(100) * 100
pdf2 = KDE(data2)
pdf2.fit()
for attr in ['icdf', 'cdf', 'sf']:
npt.assert_(not np.allclose(getattr(pdf, attr)[:10],
getattr(pdf2, attr)[:10]))
class TestNormConstant:
def test_norm_constant_calculation(self):
custom_gauss = kernels.CustomKernel(lambda x: np.exp(-x ** 2 / 2.0))
gauss_true_const = 0.3989422804014327
npt.assert_almost_equal(gauss_true_const, custom_gauss.norm_const)
def test_kde_bw_positive():
# GH 6679
x = np.array([4.59511985, 4.59511985, 4.59511985, 4.59511985, 4.59511985,
4.59511985, 4.59511985, 4.59511985, 4.59511985, 4.59511985,
5.67332327, 6.19847872, 7.43189192])
kde = KDE(x)
kde.fit()
assert kde.bw > 0
def test_fit_self(reset_randomstate):
x = np.random.standard_normal(100)
kde = KDE(x)
assert isinstance(kde, KDE)
assert isinstance(kde.fit(), KDE)
class TestKDECustomBandwidth:
decimal_density = 7
@classmethod
def setup_class(cls):
cls.kde = KDE(Xi)
cls.weights_200 = np.linspace(1, 100, 200)
cls.weights_100 = np.linspace(1, 100, 100)
def test_check_is_fit_ok_with_custom_bandwidth(self):
def custom_bw(X, kern):
return np.std(X) * len(X)
kde = self.kde.fit(bw=custom_bw)
assert isinstance(kde, KDE)
def test_check_is_fit_ok_with_standard_custom_bandwidth(self):
# Note, we are passing the function, not the string - this is intended
kde = self.kde.fit(bw=bandwidths.bw_silverman)
s1 = kde.support.copy()
d1 = kde.density.copy()
kde = self.kde.fit(bw='silverman')
npt.assert_almost_equal(s1, kde.support, self.decimal_density)
npt.assert_almost_equal(d1, kde.density, self.decimal_density)
@pytest.mark.parametrize("fft", [True, False])
def test_check_is_fit_ok_with_float_bandwidth(self, fft):
# Note, we are passing the function, not the string - this is intended
kde = self.kde.fit(bw=bandwidths.bw_silverman, fft=fft)
s1 = kde.support.copy()
d1 = kde.density.copy()
kde = self.kde.fit(bw=kde.bw, fft=fft)
npt.assert_almost_equal(s1, kde.support, self.decimal_density)
npt.assert_almost_equal(d1, kde.density, self.decimal_density)