226 lines
5.8 KiB
226 lines
5.8 KiB
from statsmodels.compat.platform import PLATFORM_OSX
from statsmodels.regression.process_regression import (
ProcessMLE, GaussianCovariance)
import numpy as np
import pandas as pd
import pytest
import collections
import statsmodels.tools.numdiff as nd
from numpy.testing import assert_allclose, assert_equal
# Parameters for a test model, with or without additive
# noise.
def model1(noise):
mn_par = np.r_[1, 0, -1, 0]
sc_par = np.r_[1, 1]
sm_par = np.r_[0.5, 0.1]
if noise:
no_par = np.r_[0.25, 0.25]
no_par = np.array([])
return mn_par, sc_par, sm_par, no_par
def setup1(n, get_model, noise):
mn_par, sc_par, sm_par, no_par = get_model(noise)
groups = np.kron(np.arange(n // 5), np.ones(5))
time = np.kron(np.ones(n // 5), np.arange(5))
time_z = (time - time.mean()) / time.std()
x_mean = np.random.normal(size=(n, len(mn_par)))
x_sc = np.random.normal(size=(n, len(sc_par)))
x_sc[:, 0] = 1
x_sc[:, 1] = time_z
x_sm = np.random.normal(size=(n, len(sm_par)))
x_sm[:, 0] = 1
x_sm[:, 1] = time_z
mn = np.dot(x_mean, mn_par)
sc = np.exp(np.dot(x_sc, sc_par))
sm = np.exp(np.dot(x_sm, sm_par))
if noise:
x_no = np.random.normal(size=(n, len(no_par)))
x_no[:, 0] = 1
x_no[:, 1] = time_z
no = np.exp(np.dot(x_no, no_par))
x_no = None
y = mn.copy()
gc = GaussianCovariance()
ix = collections.defaultdict(list)
for i, g in enumerate(groups):
for g, ii in ix.items():
c = gc.get_cov(time[ii], sc[ii], sm[ii])
r = np.linalg.cholesky(c)
y[ii] += np.dot(r, np.random.normal(size=len(ii)))
# Additive white noise
if noise:
y += no * np.random.normal(size=y.shape)
return y, x_mean, x_sc, x_sm, x_no, time, groups
def run_arrays(n, get_model, noise):
y, x_mean, x_sc, x_sm, x_no, time, groups = setup1(n, get_model, noise)
preg = ProcessMLE(y, x_mean, x_sc, x_sm, x_no, time, groups)
return preg.fit()
@pytest.mark.parametrize("noise", [False, True])
def test_arrays(noise):
f = run_arrays(1000, model1, noise)
mod = f.model
f.summary() # Smoke test
# Compare the parameter estimates to population values.
epar = np.concatenate(model1(noise))
assert_allclose(f.params, epar, atol=0.3, rtol=0.3)
# Test the fitted covariance matrix
cv = f.covariance(mod.time[0:5], mod.exog_scale[0:5, :],
mod.exog_smooth[0:5, :])
assert_allclose(cv, cv.T) # Check symmetry
a, _ = np.linalg.eig(cv)
assert_equal(a > 0, True) # Check PSD
# Test predict
yhat = f.predict()
assert_equal(np.corrcoef(yhat, mod.endog)[0, 1] > 0.2, True)
yhatm = f.predict(exog=mod.exog)
assert_equal(yhat, yhatm)
yhat0 = mod.predict(params=f.params, exog=mod.exog)
assert_equal(yhat, yhat0)
# Smoke test t-test
def run_formula(n, get_model, noise):
y, x_mean, x_sc, x_sm, x_no, time, groups = setup1(n, get_model, noise)
df = pd.DataFrame({
"y": y,
"x1": x_mean[:, 0],
"x2": x_mean[:, 1],
"x3": x_mean[:, 2],
"x4": x_mean[:, 3],
"xsc1": x_sc[:, 0],
"xsc2": x_sc[:, 1],
"xsm1": x_sm[:, 0],
"xsm2": x_sm[:, 1],
"time": time,
"groups": groups
if noise:
df["xno1"] = x_no[:, 0]
df["xno2"] = x_no[:, 1]
mean_formula = "y ~ 0 + x1 + x2 + x3 + x4"
scale_formula = "0 + xsc1 + xsc2"
smooth_formula = "0 + xsm1 + xsm2"
if noise:
noise_formula = "0 + xno1 + xno2"
noise_formula = None
preg = ProcessMLE.from_formula(
f = preg.fit()
return f, df
@pytest.mark.parametrize("noise", [False, True])
def test_formulas(noise):
f, df = run_formula(1000, model1, noise)
mod = f.model
f.summary() # Smoke test
# Compare the parameter estimates to population values.
epar = np.concatenate(model1(noise))
assert_allclose(f.params, epar, atol=0.1, rtol=1)
# Test the fitted covariance matrix
exog_scale = pd.DataFrame(mod.exog_scale[0:5, :],
columns=["xsc1", "xsc2"])
exog_smooth = pd.DataFrame(mod.exog_smooth[0:5, :],
columns=["xsm1", "xsm2"])
cv = f.covariance(mod.time[0:5], exog_scale, exog_smooth)
assert_allclose(cv, cv.T)
a, _ = np.linalg.eig(cv)
assert_equal(a > 0, True)
# Test predict
yhat = f.predict()
assert_equal(np.corrcoef(yhat, mod.endog)[0, 1] > 0.2, True)
yhatm = f.predict(exog=df)
assert_allclose(yhat, yhatm, rtol=1e-11)
yhat0 = mod.predict(params=f.params, exog=df)
assert_allclose(yhat, yhat0, rtol=1e-11)
# Smoke test t-test
# Test the score functions using numerical derivatives.
@pytest.mark.parametrize("noise", [False, True])
def test_score_numdiff(noise):
y, x_mean, x_sc, x_sm, x_no, time, groups = setup1(1000, model1, noise)
preg = ProcessMLE(y, x_mean, x_sc, x_sm, x_no, time, groups)
def loglike(x):
return preg.loglike(x)
q = x_mean.shape[1] + x_sc.shape[1] + x_sm.shape[1]
if noise:
q += x_no.shape[1]
atol = 2e-3 if PLATFORM_OSX else 1e-2
for _ in range(5):
par0 = preg._get_start()
par = par0 + 0.1 * np.random.normal(size=q)
score = preg.score(par)
score_nd = nd.approx_fprime(par, loglike, epsilon=1e-7)
assert_allclose(score, score_nd, atol=atol, rtol=1e-4)