248 lines
8.8 KiB
Python
248 lines
8.8 KiB
Python
|
import numpy as np
|
||
|
from numpy.testing import assert_almost_equal, assert_equal
|
||
|
import pytest
|
||
|
|
||
|
from statsmodels.datasets import elnino
|
||
|
from statsmodels.graphics.functional import (
|
||
|
banddepth,
|
||
|
fboxplot,
|
||
|
hdrboxplot,
|
||
|
rainbowplot,
|
||
|
)
|
||
|
|
||
|
try:
|
||
|
import matplotlib.pyplot as plt
|
||
|
except ImportError:
|
||
|
pass
|
||
|
|
||
|
|
||
|
data = elnino.load()
|
||
|
data.raw_data = np.asarray(data.raw_data)
|
||
|
labels = data.raw_data[:, 0].astype(int)
|
||
|
data = data.raw_data[:, 1:]
|
||
|
|
||
|
|
||
|
@pytest.mark.matplotlib
|
||
|
def test_hdr_basic(close_figures):
|
||
|
try:
|
||
|
_, hdr = hdrboxplot(data, labels=labels, seed=12345)
|
||
|
|
||
|
assert len(hdr.extra_quantiles) == 0
|
||
|
|
||
|
median_t = [24.247, 25.625, 25.964, 24.999, 23.648, 22.302,
|
||
|
21.231, 20.366, 20.168, 20.434, 21.111, 22.299]
|
||
|
|
||
|
assert_almost_equal(hdr.median, median_t, decimal=2)
|
||
|
|
||
|
quant = np.vstack([hdr.outliers, hdr.hdr_90, hdr.hdr_50])
|
||
|
quant_t = np.vstack([[24.36, 25.42, 25.40, 24.96, 24.21, 23.35,
|
||
|
22.50, 21.89, 22.04, 22.88, 24.57, 25.89],
|
||
|
[27.25, 28.23, 28.85, 28.82, 28.37, 27.43,
|
||
|
25.73, 23.88, 22.26, 22.22, 22.21, 23.19],
|
||
|
[23.70, 26.08, 27.17, 26.74, 26.77, 26.15,
|
||
|
25.59, 24.95, 24.69, 24.64, 25.85, 27.08],
|
||
|
[28.12, 28.82, 29.24, 28.45, 27.36, 25.19,
|
||
|
23.61, 22.27, 21.31, 21.37, 21.60, 22.81],
|
||
|
[25.48, 26.99, 27.51, 27.04, 26.23, 24.94,
|
||
|
23.69, 22.72, 22.26, 22.64, 23.33, 24.44],
|
||
|
[23.11, 24.50, 24.66, 23.44, 21.74, 20.58,
|
||
|
19.68, 18.84, 18.76, 18.99, 19.66, 20.86],
|
||
|
[24.84, 26.23, 26.67, 25.93, 24.87, 23.57,
|
||
|
22.46, 21.45, 21.26, 21.57, 22.14, 23.41],
|
||
|
[23.62, 25.10, 25.34, 24.22, 22.74, 21.52,
|
||
|
20.40, 19.56, 19.63, 19.67, 20.37, 21.76]])
|
||
|
|
||
|
assert_almost_equal(quant, quant_t, decimal=0)
|
||
|
|
||
|
labels_pos = np.all(np.isin(data, hdr.outliers).reshape(data.shape),
|
||
|
axis=1)
|
||
|
outliers = labels[labels_pos]
|
||
|
assert_equal([1982, 1983, 1997, 1998], outliers)
|
||
|
assert_equal(labels[hdr.outliers_idx], outliers)
|
||
|
except OSError:
|
||
|
pytest.xfail('Multiprocess randomly crashes in Windows testing')
|
||
|
|
||
|
|
||
|
@pytest.mark.slow
|
||
|
@pytest.mark.matplotlib
|
||
|
def test_hdr_basic_brute(close_figures, reset_randomstate):
|
||
|
try:
|
||
|
_, hdr = hdrboxplot(data, ncomp=2, labels=labels, use_brute=True)
|
||
|
assert len(hdr.extra_quantiles) == 0
|
||
|
median_t = [24.247, 25.625, 25.964, 24.999, 23.648, 22.302,
|
||
|
21.231, 20.366, 20.168, 20.434, 21.111, 22.299]
|
||
|
|
||
|
assert_almost_equal(hdr.median, median_t, decimal=2)
|
||
|
except OSError:
|
||
|
pytest.xfail('Multiprocess randomly crashes in Windows testing')
|
||
|
|
||
|
|
||
|
@pytest.mark.slow
|
||
|
@pytest.mark.matplotlib
|
||
|
def test_hdr_plot(close_figures):
|
||
|
fig = plt.figure()
|
||
|
ax = fig.add_subplot(111)
|
||
|
|
||
|
try:
|
||
|
hdrboxplot(data, labels=labels.tolist(), ax=ax, threshold=1,
|
||
|
seed=12345)
|
||
|
|
||
|
ax.set_xlabel("Month of the year")
|
||
|
ax.set_ylabel("Sea surface temperature (C)")
|
||
|
ax.set_xticks(np.arange(13, step=3) - 1)
|
||
|
ax.set_xticklabels(["", "Mar", "Jun", "Sep", "Dec"])
|
||
|
ax.set_xlim([-0.2, 11.2])
|
||
|
except OSError:
|
||
|
pytest.xfail('Multiprocess randomly crashes in Windows testing')
|
||
|
|
||
|
|
||
|
@pytest.mark.slow
|
||
|
@pytest.mark.matplotlib
|
||
|
def test_hdr_alpha(close_figures):
|
||
|
try:
|
||
|
_, hdr = hdrboxplot(data, alpha=[0.7], seed=12345)
|
||
|
|
||
|
extra_quant_t = np.vstack([[25.1, 26.5, 27.0, 26.4, 25.4, 24.1,
|
||
|
23.0, 22.0, 21.7, 22.1, 22.7, 23.8],
|
||
|
[23.4, 24.8, 25.0, 23.9, 22.4, 21.1,
|
||
|
20.0, 19.3, 19.2, 19.4, 20.1, 21.3]])
|
||
|
assert_almost_equal(hdr.extra_quantiles, extra_quant_t, decimal=0)
|
||
|
except OSError:
|
||
|
pytest.xfail('Multiprocess randomly crashes in Windows testing')
|
||
|
|
||
|
|
||
|
@pytest.mark.slow
|
||
|
@pytest.mark.matplotlib
|
||
|
def test_hdr_multiple_alpha(close_figures):
|
||
|
try:
|
||
|
_, hdr = hdrboxplot(data, alpha=[0.4, 0.92], seed=12345)
|
||
|
|
||
|
extra_quant_t = [[25.712, 27.052, 27.711, 27.200,
|
||
|
26.162, 24.833, 23.639, 22.378,
|
||
|
22.250, 22.640, 23.472, 24.649],
|
||
|
[22.973, 24.526, 24.608, 23.343,
|
||
|
21.908, 20.655, 19.750, 19.046,
|
||
|
18.812, 18.989, 19.520, 20.685],
|
||
|
[24.667, 26.033, 26.416, 25.584,
|
||
|
24.308, 22.849, 21.684, 20.948,
|
||
|
20.483, 21.019, 21.751, 22.890],
|
||
|
[23.873, 25.371, 25.667, 24.644,
|
||
|
23.177, 21.923, 20.791, 20.015,
|
||
|
19.697, 19.951, 20.622, 21.858]]
|
||
|
assert_almost_equal(hdr.extra_quantiles, np.vstack(extra_quant_t),
|
||
|
decimal=0)
|
||
|
except OSError:
|
||
|
pytest.xfail('Multiprocess randomly crashes in Windows testing')
|
||
|
|
||
|
|
||
|
@pytest.mark.slow
|
||
|
@pytest.mark.matplotlib
|
||
|
def test_hdr_threshold(close_figures):
|
||
|
try:
|
||
|
_, hdr = hdrboxplot(data, alpha=[0.8], threshold=0.93,
|
||
|
seed=12345)
|
||
|
labels_pos = np.all(np.isin(data, hdr.outliers).reshape(data.shape),
|
||
|
axis=1)
|
||
|
outliers = labels[labels_pos]
|
||
|
assert_equal([1968, 1982, 1983, 1997, 1998], outliers)
|
||
|
except OSError:
|
||
|
pytest.xfail('Multiprocess randomly crashes in Windows testing')
|
||
|
|
||
|
|
||
|
@pytest.mark.matplotlib
|
||
|
def test_hdr_bw(close_figures):
|
||
|
try:
|
||
|
_, hdr = hdrboxplot(data, bw='cv_ml', seed=12345)
|
||
|
|
||
|
median_t = [24.25, 25.64, 25.99, 25.04, 23.71, 22.38,
|
||
|
|
||
|
21.31, 20.44, 20.24, 20.51, 21.19, 22.38]
|
||
|
assert_almost_equal(hdr.median, median_t, decimal=2)
|
||
|
except OSError:
|
||
|
pytest.xfail('Multiprocess randomly crashes in Windows testing')
|
||
|
|
||
|
|
||
|
@pytest.mark.slow
|
||
|
@pytest.mark.matplotlib
|
||
|
def test_hdr_ncomp(close_figures):
|
||
|
try:
|
||
|
_, hdr = hdrboxplot(data, ncomp=3, seed=12345)
|
||
|
median_t = [24.33, 25.71, 26.04, 25.08, 23.74, 22.40,
|
||
|
21.32, 20.45, 20.25, 20.53, 21.20, 22.39]
|
||
|
assert_almost_equal(hdr.median, median_t, decimal=2)
|
||
|
except OSError:
|
||
|
pytest.xfail('Multiprocess randomly crashes in Windows testing')
|
||
|
|
||
|
|
||
|
def test_banddepth_BD2():
|
||
|
xx = np.arange(500) / 150.
|
||
|
y1 = 1 + 0.5 * np.sin(xx)
|
||
|
y2 = 0.3 + np.sin(xx + np.pi/6)
|
||
|
y3 = -0.5 + np.sin(xx + np.pi/6)
|
||
|
y4 = -1 + 0.3 * np.cos(xx + np.pi/6)
|
||
|
|
||
|
data = np.asarray([y1, y2, y3, y4])
|
||
|
depth = banddepth(data, method='BD2')
|
||
|
expected_depth = [0.5, 5./6, 5./6, 0.5]
|
||
|
assert_almost_equal(depth, expected_depth)
|
||
|
|
||
|
# Plot to visualize why we expect this output
|
||
|
# fig = plt.figure()
|
||
|
# ax = fig.add_subplot(111)
|
||
|
# for ii, yy in enumerate([y1, y2, y3, y4]):
|
||
|
# ax.plot(xx, yy, label="y%s" % ii)
|
||
|
|
||
|
# ax.legend()
|
||
|
# plt.close(fig)
|
||
|
|
||
|
|
||
|
def test_banddepth_MBD():
|
||
|
xx = np.arange(5001) / 5000.
|
||
|
y1 = np.zeros(xx.shape)
|
||
|
y2 = 2 * xx - 1
|
||
|
y3 = np.ones(xx.shape) * 0.5
|
||
|
y4 = np.ones(xx.shape) * -0.25
|
||
|
|
||
|
data = np.asarray([y1, y2, y3, y4])
|
||
|
depth = banddepth(data, method='MBD')
|
||
|
expected_depth = [5./6, (2*(0.75-3./8)+3)/6, 3.5/6, (2*3./8+3)/6]
|
||
|
assert_almost_equal(depth, expected_depth, decimal=4)
|
||
|
|
||
|
|
||
|
@pytest.mark.matplotlib
|
||
|
def test_fboxplot_rainbowplot(close_figures):
|
||
|
# Test fboxplot and rainbowplot together, is much faster.
|
||
|
def harmfunc(t):
|
||
|
"""Test function, combination of a few harmonic terms."""
|
||
|
# Constant, 0 with p=0.9, 1 with p=1 - for creating outliers
|
||
|
ci = int(np.random.random() > 0.9)
|
||
|
a1i = np.random.random() * 0.05
|
||
|
a2i = np.random.random() * 0.05
|
||
|
b1i = (0.15 - 0.1) * np.random.random() + 0.1
|
||
|
b2i = (0.15 - 0.1) * np.random.random() + 0.1
|
||
|
|
||
|
func = (1 - ci) * (a1i * np.sin(t) + a2i * np.cos(t)) + \
|
||
|
ci * (b1i * np.sin(t) + b2i * np.cos(t))
|
||
|
|
||
|
return func
|
||
|
|
||
|
np.random.seed(1234567)
|
||
|
# Some basic test data, Model 6 from Sun and Genton.
|
||
|
t = np.linspace(0, 2 * np.pi, 250)
|
||
|
data = [harmfunc(t) for _ in range(20)]
|
||
|
|
||
|
# fboxplot test
|
||
|
fig = plt.figure()
|
||
|
ax = fig.add_subplot(111)
|
||
|
_, depth, ix_depth, ix_outliers = fboxplot(data, wfactor=2, ax=ax)
|
||
|
|
||
|
ix_expected = np.array([13, 4, 15, 19, 8, 6, 3, 16, 9, 7, 1, 5, 2,
|
||
|
12, 17, 11, 14, 10, 0, 18])
|
||
|
assert_equal(ix_depth, ix_expected)
|
||
|
ix_expected2 = np.array([2, 11, 17, 18])
|
||
|
assert_equal(ix_outliers, ix_expected2)
|
||
|
|
||
|
# rainbowplot test (re-uses depth variable)
|
||
|
xdata = np.arange(data[0].size)
|
||
|
fig = rainbowplot(data, xdata=xdata, depth=depth, cmap=plt.cm.rainbow)
|