AIM-PIbd-32-Kurbanova-A-A/aimenv/Lib/site-packages/statsmodels/robust/tests/test_scale.py
2024-10-02 22:15:59 +04:00

311 lines
8.4 KiB
Python

"""
Test functions for models.robust.scale
"""
import numpy as np
from numpy.random import standard_normal
from numpy.testing import assert_almost_equal, assert_equal
import pytest
from scipy.stats import norm as Gaussian
import statsmodels.api as sm
import statsmodels.robust.scale as scale
from statsmodels.robust.scale import mad
# Example from Section 5.5, Venables & Ripley (2002)
DECIMAL = 4
# TODO: Can replicate these tests using stackloss data and R if this
# data is a problem
class TestChem:
@classmethod
def setup_class(cls):
cls.chem = np.array(
[
2.20,
2.20,
2.4,
2.4,
2.5,
2.7,
2.8,
2.9,
3.03,
3.03,
3.10,
3.37,
3.4,
3.4,
3.4,
3.5,
3.6,
3.7,
3.7,
3.7,
3.7,
3.77,
5.28,
28.95,
]
)
def test_mean(self):
assert_almost_equal(np.mean(self.chem), 4.2804, DECIMAL)
def test_median(self):
assert_almost_equal(np.median(self.chem), 3.385, DECIMAL)
def test_mad(self):
assert_almost_equal(scale.mad(self.chem), 0.52632, DECIMAL)
def test_iqr(self):
assert_almost_equal(scale.iqr(self.chem), 0.68570, DECIMAL)
def test_qn(self):
assert_almost_equal(scale.qn_scale(self.chem), 0.73231, DECIMAL)
def test_huber_scale(self):
assert_almost_equal(scale.huber(self.chem)[0], 3.20549, DECIMAL)
def test_huber_location(self):
assert_almost_equal(scale.huber(self.chem)[1], 0.67365, DECIMAL)
def test_huber_huberT(self):
n = scale.norms.HuberT()
n.t = 1.5
h = scale.Huber(norm=n)
assert_almost_equal(
scale.huber(self.chem)[0], h(self.chem)[0], DECIMAL
)
assert_almost_equal(
scale.huber(self.chem)[1], h(self.chem)[1], DECIMAL
)
def test_huber_Hampel(self):
hh = scale.Huber(norm=scale.norms.Hampel())
assert_almost_equal(hh(self.chem)[0], 3.17434, DECIMAL)
assert_almost_equal(hh(self.chem)[1], 0.66782, DECIMAL)
class TestMad:
@classmethod
def setup_class(cls):
np.random.seed(54321)
cls.X = standard_normal((40, 10))
def test_mad(self):
m = scale.mad(self.X)
assert_equal(m.shape, (10,))
def test_mad_empty(self):
empty = np.empty(0)
assert np.isnan(scale.mad(empty))
empty = np.empty((10, 100, 0))
assert_equal(scale.mad(empty, axis=1), np.empty((10, 0)))
empty = np.empty((100, 100, 0, 0))
assert_equal(scale.mad(empty, axis=-1), np.empty((100, 100, 0)))
def test_mad_center(self):
n = scale.mad(self.X, center=0)
assert_equal(n.shape, (10,))
with pytest.raises(TypeError):
scale.mad(self.X, center=None)
assert_almost_equal(
scale.mad(self.X, center=1),
np.median(np.abs(self.X - 1), axis=0) / Gaussian.ppf(3 / 4.0),
DECIMAL,
)
class TestMadAxes:
@classmethod
def setup_class(cls):
np.random.seed(54321)
cls.X = standard_normal((40, 10, 30))
def test_axis0(self):
m = scale.mad(self.X, axis=0)
assert_equal(m.shape, (10, 30))
def test_axis1(self):
m = scale.mad(self.X, axis=1)
assert_equal(m.shape, (40, 30))
def test_axis2(self):
m = scale.mad(self.X, axis=2)
assert_equal(m.shape, (40, 10))
def test_axisneg1(self):
m = scale.mad(self.X, axis=-1)
assert_equal(m.shape, (40, 10))
class TestIqr:
@classmethod
def setup_class(cls):
np.random.seed(54321)
cls.X = standard_normal((40, 10))
def test_iqr(self):
m = scale.iqr(self.X)
assert_equal(m.shape, (10,))
def test_iqr_empty(self):
empty = np.empty(0)
assert np.isnan(scale.iqr(empty))
empty = np.empty((10, 100, 0))
assert_equal(scale.iqr(empty, axis=1), np.empty((10, 0)))
empty = np.empty((100, 100, 0, 0))
assert_equal(scale.iqr(empty, axis=-1), np.empty((100, 100, 0)))
empty = np.empty(shape=())
with pytest.raises(ValueError):
scale.iqr(empty)
class TestIqrAxes:
@classmethod
def setup_class(cls):
np.random.seed(54321)
cls.X = standard_normal((40, 10, 30))
def test_axis0(self):
m = scale.iqr(self.X, axis=0)
assert_equal(m.shape, (10, 30))
def test_axis1(self):
m = scale.iqr(self.X, axis=1)
assert_equal(m.shape, (40, 30))
def test_axis2(self):
m = scale.iqr(self.X, axis=2)
assert_equal(m.shape, (40, 10))
def test_axisneg1(self):
m = scale.iqr(self.X, axis=-1)
assert_equal(m.shape, (40, 10))
class TestQn:
@classmethod
def setup_class(cls):
np.random.seed(54321)
cls.normal = standard_normal(size=40)
cls.range = np.arange(0, 40)
cls.exponential = np.random.exponential(size=40)
cls.stackloss = sm.datasets.stackloss.load_pandas().data
cls.sunspot = sm.datasets.sunspots.load_pandas().data.SUNACTIVITY
def test_qn_naive(self):
assert_almost_equal(
scale.qn_scale(self.normal), scale._qn_naive(self.normal), DECIMAL
)
assert_almost_equal(
scale.qn_scale(self.range), scale._qn_naive(self.range), DECIMAL
)
assert_almost_equal(
scale.qn_scale(self.exponential),
scale._qn_naive(self.exponential),
DECIMAL,
)
def test_qn_robustbase(self):
# from R's robustbase with finite.corr = FALSE
assert_almost_equal(scale.qn_scale(self.range), 13.3148, DECIMAL)
assert_almost_equal(
scale.qn_scale(self.stackloss),
np.array([8.87656, 8.87656, 2.21914, 4.43828]),
DECIMAL,
)
# sunspot.year from datasets in R only goes up to 289
assert_almost_equal(
scale.qn_scale(self.sunspot[0:289]), 33.50901, DECIMAL
)
def test_qn_empty(self):
empty = np.empty(0)
assert np.isnan(scale.qn_scale(empty))
empty = np.empty((10, 100, 0))
assert_equal(scale.qn_scale(empty, axis=1), np.empty((10, 0)))
empty = np.empty((100, 100, 0, 0))
assert_equal(scale.qn_scale(empty, axis=-1), np.empty((100, 100, 0)))
empty = np.empty(shape=())
with pytest.raises(ValueError):
scale.iqr(empty)
class TestQnAxes:
@classmethod
def setup_class(cls):
np.random.seed(54321)
cls.X = standard_normal((40, 10, 30))
def test_axis0(self):
m = scale.qn_scale(self.X, axis=0)
assert_equal(m.shape, (10, 30))
def test_axis1(self):
m = scale.qn_scale(self.X, axis=1)
assert_equal(m.shape, (40, 30))
def test_axis2(self):
m = scale.qn_scale(self.X, axis=2)
assert_equal(m.shape, (40, 10))
def test_axisneg1(self):
m = scale.qn_scale(self.X, axis=-1)
assert_equal(m.shape, (40, 10))
class TestHuber:
@classmethod
def setup_class(cls):
np.random.seed(54321)
cls.X = standard_normal((40, 10))
def test_huber_result_shape(self):
h = scale.Huber(maxiter=100)
m, s = h(self.X)
assert_equal(m.shape, (10,))
class TestHuberAxes:
@classmethod
def setup_class(cls):
np.random.seed(54321)
cls.X = standard_normal((40, 10, 30))
cls.h = scale.Huber(maxiter=1000, tol=1.0e-05)
def test_default(self):
m, s = self.h(self.X, axis=0)
assert_equal(m.shape, (10, 30))
def test_axis1(self):
m, s = self.h(self.X, axis=1)
assert_equal(m.shape, (40, 30))
def test_axis2(self):
m, s = self.h(self.X, axis=2)
assert_equal(m.shape, (40, 10))
def test_axisneg1(self):
m, s = self.h(self.X, axis=-1)
assert_equal(m.shape, (40, 10))
def test_mad_axis_none():
# GH 7027
a = np.array([[0, 1, 2], [2, 3, 2]])
def m(x):
return np.median(x)
direct = mad(a=a, axis=None)
custom = mad(a=a, axis=None, center=m)
axis0 = mad(a=a.ravel(), axis=0)
np.testing.assert_allclose(direct, custom)
np.testing.assert_allclose(direct, axis0)