114 lines
3.8 KiB
Python
114 lines
3.8 KiB
Python
import numpy as np
|
|
from numpy.testing import assert_allclose
|
|
import pandas as pd
|
|
import pytest
|
|
from statsmodels.genmod.qif import (QIF, QIFIndependence, QIFExchangeable,
|
|
QIFAutoregressive)
|
|
from statsmodels.tools.numdiff import approx_fprime
|
|
from statsmodels.genmod import families
|
|
|
|
|
|
@pytest.mark.parametrize("fam", [families.Gaussian(), families.Poisson(),
|
|
families.Binomial()])
|
|
@pytest.mark.parametrize("cov_struct", [QIFIndependence(), QIFExchangeable(),
|
|
QIFAutoregressive()])
|
|
def test_qif_numdiff(fam, cov_struct):
|
|
# Test the analytic scores against numeric derivatives
|
|
|
|
np.random.seed(234234)
|
|
n = 200
|
|
q = 4
|
|
x = np.random.normal(size=(n, 3))
|
|
if isinstance(fam, families.Gaussian):
|
|
e = np.kron(np.random.normal(size=n//q), np.ones(q))
|
|
e = np.sqrt(0.5)*e + np.sqrt(1 - 0.5**2)*np.random.normal(size=n)
|
|
y = x.sum(1) + e
|
|
elif isinstance(fam, families.Poisson):
|
|
y = np.random.poisson(5, size=n)
|
|
elif isinstance(fam, families.Binomial):
|
|
y = np.random.randint(0, 2, size=n)
|
|
g = np.kron(np.arange(n//q), np.ones(q)).astype(int)
|
|
|
|
model = QIF(y, x, groups=g, family=fam, cov_struct=cov_struct)
|
|
|
|
for _ in range(5):
|
|
|
|
pt = np.random.normal(size=3)
|
|
|
|
# Check the Jacobian of the vector of estimating equations.
|
|
_, grad, _, _, gn_deriv = model.objective(pt)
|
|
|
|
def llf_gn(params):
|
|
return model.objective(params)[3]
|
|
gn_numdiff = approx_fprime(pt, llf_gn, 1e-7)
|
|
assert_allclose(gn_deriv, gn_numdiff, 1e-4)
|
|
|
|
# Check the gradient of the QIF
|
|
def llf(params):
|
|
return model.objective(params)[0]
|
|
grad_numdiff = approx_fprime(pt, llf, 1e-7)
|
|
assert_allclose(grad, grad_numdiff, 1e-4)
|
|
|
|
|
|
@pytest.mark.parametrize("fam", [families.Gaussian(), families.Poisson(),
|
|
families.Binomial()])
|
|
@pytest.mark.parametrize("cov_struct", [QIFIndependence(), QIFExchangeable(),
|
|
QIFAutoregressive()])
|
|
def test_qif_fit(fam, cov_struct):
|
|
|
|
np.random.seed(234234)
|
|
|
|
n = 1000
|
|
q = 4
|
|
params = np.r_[1, -0.5, 0.2]
|
|
x = np.random.normal(size=(n, len(params)))
|
|
if isinstance(fam, families.Gaussian):
|
|
e = np.kron(np.random.normal(size=n//q), np.ones(q))
|
|
e = np.sqrt(0.5)*e + np.sqrt(1 - 0.5**2)*np.random.normal(size=n)
|
|
y = np.dot(x, params) + e
|
|
elif isinstance(fam, families.Poisson):
|
|
lpr = np.dot(x, params)
|
|
mean = np.exp(lpr)
|
|
y = np.random.poisson(mean)
|
|
elif isinstance(fam, families.Binomial):
|
|
lpr = np.dot(x, params)
|
|
mean = 1 / (1 + np.exp(-lpr))
|
|
y = (np.random.uniform(0, 1, size=n) < mean).astype(int)
|
|
g = np.kron(np.arange(n // q), np.ones(q)).astype(int)
|
|
|
|
model = QIF(y, x, groups=g, family=fam, cov_struct=cov_struct)
|
|
rslt = model.fit()
|
|
|
|
# Slack comparison to population values
|
|
assert_allclose(rslt.params, params, atol=0.05, rtol=0.05)
|
|
|
|
# Smoke test
|
|
_ = rslt.summary()
|
|
|
|
|
|
@pytest.mark.parametrize("cov_struct", [QIFIndependence(), QIFExchangeable(),
|
|
QIFAutoregressive()])
|
|
def test_formula(cov_struct):
|
|
|
|
np.random.seed(3423)
|
|
|
|
y = np.random.normal(size=100)
|
|
x = np.random.normal(size=(100, 2))
|
|
groups = np.kron(np.arange(25), np.ones(4))
|
|
|
|
model1 = QIF(y, x, groups=groups, cov_struct=cov_struct)
|
|
result1 = model1.fit()
|
|
|
|
df = pd.DataFrame({"y": y, "x1": x[:, 0], "x2": x[:, 1], "groups": groups})
|
|
|
|
model2 = QIF.from_formula("y ~ 0 + x1 + x2", groups="groups",
|
|
cov_struct=cov_struct, data=df)
|
|
result2 = model2.fit()
|
|
|
|
assert_allclose(result1.params, result2.params)
|
|
assert_allclose(result1.bse, result2.bse)
|
|
|
|
if not isinstance(cov_struct, QIFIndependence):
|
|
_ = result2.bic
|
|
_ = result2.aic
|