AIM-PIbd-32-Kurbanova-A-A/aimenv/Lib/site-packages/statsmodels/discrete/tests/test_discrete.py

2623 lines
97 KiB
Python
Raw Normal View History

2024-10-02 22:15:59 +04:00
"""
Tests for discrete models
Notes
-----
DECIMAL_3 is used because it seems that there is a loss of precision
in the Stata *.dta -> *.csv output, NOT the estimator for the Poisson
tests.
"""
# pylint: disable-msg=E1101
from statsmodels.compat.pandas import assert_index_equal
import os
import warnings
import numpy as np
from numpy.testing import (
assert_,
assert_allclose,
assert_almost_equal,
assert_array_equal,
assert_array_less,
assert_equal,
assert_raises,
)
import pandas as pd
import pytest
from scipy import stats
from scipy.stats import nbinom
import statsmodels.api as sm
from statsmodels.discrete.discrete_margins import _iscount, _isdummy
from statsmodels.discrete.discrete_model import (
CountModel,
GeneralizedPoisson,
Logit,
MNLogit,
NegativeBinomial,
NegativeBinomialP,
Poisson,
Probit,
)
import statsmodels.formula.api as smf
from statsmodels.tools.sm_exceptions import (
ConvergenceWarning,
PerfectSeparationError,
SpecificationWarning,
ValueWarning,
)
from .results.results_discrete import Anes, DiscreteL1, RandHIE, Spector
try:
import cvxopt # noqa:F401
has_cvxopt = True
except ImportError:
has_cvxopt = False
DECIMAL_14 = 14
DECIMAL_10 = 10
DECIMAL_9 = 9
DECIMAL_4 = 4
DECIMAL_3 = 3
DECIMAL_2 = 2
DECIMAL_1 = 1
DECIMAL_0 = 0
def load_anes96():
data = sm.datasets.anes96.load()
data.endog = np.asarray(data.endog)
data.exog = np.asarray(data.exog)
return data
def load_spector():
data = sm.datasets.spector.load()
data.endog = np.asarray(data.endog)
data.exog = np.asarray(data.exog)
return data
def load_randhie():
data = sm.datasets.randhie.load()
data.endog = np.asarray(data.endog)
data.exog = np.asarray(data.exog, dtype=float)
return data
def check_jac(self, res=None):
# moved from CheckModelResults
if res is None:
res1 = self.res1
else:
res1 = res
exog = res1.model.exog
# basic cross check
jacsum = res1.model.score_obs(res1.params).sum(0)
score = res1.model.score(res1.params)
assert_almost_equal(jacsum, score, DECIMAL_9) # Poisson has low precision ?
if isinstance(res1.model, (NegativeBinomial, MNLogit)):
# skip the rest
return
# check score_factor
# TODO: change when score_obs uses score_factor for DRYing
s1 = res1.model.score_obs(res1.params)
sf = res1.model.score_factor(res1.params)
if not isinstance(sf, tuple):
s2 = sf[:, None] * exog
else:
sf0, sf1 = sf
s2 = np.column_stack((sf0[:, None] * exog, sf1))
assert_allclose(s2, s1, rtol=1e-10)
# check hessian_factor
h1 = res1.model.hessian(res1.params)
hf = res1.model.hessian_factor(res1.params)
if not isinstance(hf, tuple):
h2 = (hf * exog.T).dot(exog)
else:
hf0, hf1, hf2 = hf
h00 = (hf0 * exog.T).dot(exog)
h10 = np.atleast_2d(hf1.T.dot(exog))
h11 = np.atleast_2d(hf2.sum(0))
h2 = np.vstack((np.column_stack((h00, h10.T)),
np.column_stack((h10, h11))))
assert_allclose(h2, h1, rtol=1e-10)
def check_distr(res):
distr = res.get_distribution()
distr1 = res.model.get_distribution(res.params)
m = res.predict()
m2 = distr.mean()
assert_allclose(m, np.squeeze(m2), rtol=1e-10)
m2 = distr1.mean()
assert_allclose(m, np.squeeze(m2), rtol=1e-10)
v = res.predict(which="var")
v2 = distr.var()
assert_allclose(v, np.squeeze(v2), rtol=1e-10)
class CheckModelMixin:
# Assertions about the Model object, as opposed to the Results
# Assumes that mixed-in class implements:
# res1
def test_fit_regularized_invalid_method(self):
# GH#5224 check we get ValueError when passing invalid "method" arg
model = self.res1.model
with pytest.raises(ValueError, match=r'is not supported, use either'):
model.fit_regularized(method="foo")
class CheckModelResults(CheckModelMixin):
"""
res2 should be the test results from RModelWrap
or the results as defined in model_results_data
"""
def test_params(self):
assert_almost_equal(self.res1.params, self.res2.params, DECIMAL_4)
def test_conf_int(self):
assert_allclose(self.res1.conf_int(), self.res2.conf_int, rtol=8e-5)
def test_zstat(self):
assert_almost_equal(self.res1.tvalues, self.res2.z, DECIMAL_4)
def test_pvalues(self):
assert_almost_equal(self.res1.pvalues, self.res2.pvalues, DECIMAL_4)
def test_cov_params(self):
if not hasattr(self.res2, "cov_params"):
raise pytest.skip("TODO: implement res2.cov_params")
assert_almost_equal(self.res1.cov_params(),
self.res2.cov_params,
DECIMAL_4)
def test_llf(self):
assert_almost_equal(self.res1.llf, self.res2.llf, DECIMAL_4)
def test_llnull(self):
assert_almost_equal(self.res1.llnull, self.res2.llnull, DECIMAL_4)
def test_llr(self):
assert_almost_equal(self.res1.llr, self.res2.llr, DECIMAL_3)
def test_llr_pvalue(self):
assert_almost_equal(self.res1.llr_pvalue,
self.res2.llr_pvalue,
DECIMAL_4)
@pytest.mark.xfail(reason="Test has not been implemented for this class.",
strict=True, raises=NotImplementedError)
def test_normalized_cov_params(self):
raise NotImplementedError
def test_bse(self):
assert_almost_equal(self.res1.bse, self.res2.bse, DECIMAL_4)
def test_dof(self):
assert_equal(self.res1.df_model, self.res2.df_model)
assert_equal(self.res1.df_resid, self.res2.df_resid)
def test_aic(self):
assert_almost_equal(self.res1.aic, self.res2.aic, DECIMAL_3)
def test_bic(self):
assert_almost_equal(self.res1.bic, self.res2.bic, DECIMAL_3)
def test_predict(self):
assert_almost_equal(self.res1.model.predict(self.res1.params),
self.res2.phat, DECIMAL_4)
def test_predict_xb(self):
assert_almost_equal(self.res1.model.predict(self.res1.params,
which="linear"),
self.res2.yhat, DECIMAL_4)
def test_loglikeobs(self):
#basic cross check
llobssum = self.res1.model.loglikeobs(self.res1.params).sum()
assert_almost_equal(llobssum, self.res1.llf, DECIMAL_14)
def test_jac(self):
check_jac(self)
def test_summary_latex(self):
# see #7747, last line of top table was dropped
summ = self.res1.summary()
ltx = summ.as_latex()
n_lines = len(ltx.splitlines())
if not isinstance(self.res1.model, MNLogit):
# skip MNLogit which creates several params tables
assert n_lines == 19 + np.size(self.res1.params)
assert "Covariance Type:" in ltx
def test_distr(self):
check_distr(self.res1)
class CheckBinaryResults(CheckModelResults):
def test_pred_table(self):
assert_array_equal(self.res1.pred_table(), self.res2.pred_table)
def test_resid_dev(self):
assert_almost_equal(self.res1.resid_dev, self.res2.resid_dev,
DECIMAL_4)
def test_resid_generalized(self):
assert_almost_equal(self.res1.resid_generalized,
self.res2.resid_generalized, DECIMAL_4)
@pytest.mark.smoke
def test_resid_response(self):
self.res1.resid_response
class CheckMargEff:
"""
Test marginal effects (margeff) and its options
"""
def test_nodummy_dydxoverall(self):
me = self.res1.get_margeff()
assert_almost_equal(me.margeff,
self.res2.margeff_nodummy_dydx, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_nodummy_dydx_se, DECIMAL_4)
me_frame = me.summary_frame()
eff = me_frame["dy/dx"].values
assert_allclose(eff, me.margeff, rtol=1e-13)
assert_equal(me_frame.shape, (me.margeff.size, 6))
def test_nodummy_dydxmean(self):
me = self.res1.get_margeff(at='mean')
assert_almost_equal(me.margeff,
self.res2.margeff_nodummy_dydxmean, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_nodummy_dydxmean_se, DECIMAL_4)
def test_nodummy_dydxmedian(self):
me = self.res1.get_margeff(at='median')
assert_almost_equal(me.margeff,
self.res2.margeff_nodummy_dydxmedian, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_nodummy_dydxmedian_se, DECIMAL_4)
def test_nodummy_dydxzero(self):
me = self.res1.get_margeff(at='zero')
assert_almost_equal(me.margeff,
self.res2.margeff_nodummy_dydxzero, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_nodummy_dydxzero, DECIMAL_4)
def test_nodummy_dyexoverall(self):
me = self.res1.get_margeff(method='dyex')
assert_almost_equal(me.margeff,
self.res2.margeff_nodummy_dyex, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_nodummy_dyex_se, DECIMAL_4)
def test_nodummy_dyexmean(self):
me = self.res1.get_margeff(at='mean', method='dyex')
assert_almost_equal(me.margeff,
self.res2.margeff_nodummy_dyexmean, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_nodummy_dyexmean_se, DECIMAL_4)
def test_nodummy_dyexmedian(self):
me = self.res1.get_margeff(at='median', method='dyex')
assert_almost_equal(me.margeff,
self.res2.margeff_nodummy_dyexmedian, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_nodummy_dyexmedian_se, DECIMAL_4)
def test_nodummy_dyexzero(self):
me = self.res1.get_margeff(at='zero', method='dyex')
assert_almost_equal(me.margeff,
self.res2.margeff_nodummy_dyexzero, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_nodummy_dyexzero_se, DECIMAL_4)
def test_nodummy_eydxoverall(self):
me = self.res1.get_margeff(method='eydx')
assert_almost_equal(me.margeff,
self.res2.margeff_nodummy_eydx, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_nodummy_eydx_se, DECIMAL_4)
def test_nodummy_eydxmean(self):
me = self.res1.get_margeff(at='mean', method='eydx')
assert_almost_equal(me.margeff,
self.res2.margeff_nodummy_eydxmean, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_nodummy_eydxmean_se, DECIMAL_4)
def test_nodummy_eydxmedian(self):
me = self.res1.get_margeff(at='median', method='eydx')
assert_almost_equal(me.margeff,
self.res2.margeff_nodummy_eydxmedian, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_nodummy_eydxmedian_se, DECIMAL_4)
def test_nodummy_eydxzero(self):
me = self.res1.get_margeff(at='zero', method='eydx')
assert_almost_equal(me.margeff,
self.res2.margeff_nodummy_eydxzero, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_nodummy_eydxzero_se, DECIMAL_4)
def test_nodummy_eyexoverall(self):
me = self.res1.get_margeff(method='eyex')
assert_almost_equal(me.margeff,
self.res2.margeff_nodummy_eyex, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_nodummy_eyex_se, DECIMAL_4)
def test_nodummy_eyexmean(self):
me = self.res1.get_margeff(at='mean', method='eyex')
assert_almost_equal(me.margeff,
self.res2.margeff_nodummy_eyexmean, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_nodummy_eyexmean_se, DECIMAL_4)
def test_nodummy_eyexmedian(self):
me = self.res1.get_margeff(at='median', method='eyex')
assert_almost_equal(me.margeff,
self.res2.margeff_nodummy_eyexmedian, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_nodummy_eyexmedian_se, DECIMAL_4)
def test_nodummy_eyexzero(self):
me = self.res1.get_margeff(at='zero', method='eyex')
assert_almost_equal(me.margeff,
self.res2.margeff_nodummy_eyexzero, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_nodummy_eyexzero_se, DECIMAL_4)
def test_dummy_dydxoverall(self):
me = self.res1.get_margeff(dummy=True)
assert_almost_equal(me.margeff,
self.res2.margeff_dummy_dydx, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_dummy_dydx_se, DECIMAL_4)
def test_dummy_dydxmean(self):
me = self.res1.get_margeff(at='mean', dummy=True)
assert_almost_equal(me.margeff,
self.res2.margeff_dummy_dydxmean, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_dummy_dydxmean_se, DECIMAL_4)
def test_dummy_eydxoverall(self):
me = self.res1.get_margeff(method='eydx', dummy=True)
assert_almost_equal(me.margeff,
self.res2.margeff_dummy_eydx, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_dummy_eydx_se, DECIMAL_4)
def test_dummy_eydxmean(self):
me = self.res1.get_margeff(at='mean', method='eydx', dummy=True)
assert_almost_equal(me.margeff,
self.res2.margeff_dummy_eydxmean, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_dummy_eydxmean_se, DECIMAL_4)
def test_count_dydxoverall(self):
me = self.res1.get_margeff(count=True)
assert_almost_equal(me.margeff,
self.res2.margeff_count_dydx, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_count_dydx_se, DECIMAL_4)
def test_count_dydxmean(self):
me = self.res1.get_margeff(count=True, at='mean')
assert_almost_equal(me.margeff,
self.res2.margeff_count_dydxmean, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_count_dydxmean_se, DECIMAL_4)
def test_count_dummy_dydxoverall(self):
me = self.res1.get_margeff(count=True, dummy=True)
assert_almost_equal(me.margeff,
self.res2.margeff_count_dummy_dydxoverall, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_count_dummy_dydxoverall_se, DECIMAL_4)
def test_count_dummy_dydxmean(self):
me = self.res1.get_margeff(count=True, dummy=True, at='mean')
assert_almost_equal(me.margeff,
self.res2.margeff_count_dummy_dydxmean, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_count_dummy_dydxmean_se, DECIMAL_4)
class TestProbitNewton(CheckBinaryResults):
@classmethod
def setup_class(cls):
data = load_spector()
data.exog = sm.add_constant(data.exog, prepend=False)
cls.res1 = Probit(data.endog, data.exog).fit(method="newton", disp=0)
res2 = Spector.probit
cls.res2 = res2
def test_init_kwargs(self):
endog = self.res1.model.endog
exog = self.res1.model.exog
z = np.ones(len(endog))
with pytest.warns(ValueWarning, match="unknown kwargs"):
# unsupported keyword
Probit(endog, exog, weights=z)
class TestProbitBFGS(CheckBinaryResults):
@classmethod
def setup_class(cls):
data = load_spector()
data.exog = sm.add_constant(data.exog, prepend=False)
cls.res1 = Probit(data.endog, data.exog).fit(method="bfgs",
disp=0)
res2 = Spector.probit
cls.res2 = res2
class TestProbitNM(CheckBinaryResults):
@classmethod
def setup_class(cls):
data = load_spector()
data.exog = sm.add_constant(data.exog, prepend=False)
res2 = Spector.probit
cls.res2 = res2
cls.res1 = Probit(data.endog, data.exog).fit(method="nm",
disp=0, maxiter=500)
class TestProbitPowell(CheckBinaryResults):
@classmethod
def setup_class(cls):
data = load_spector()
data.exog = sm.add_constant(data.exog, prepend=False)
res2 = Spector.probit
cls.res2 = res2
cls.res1 = Probit(data.endog, data.exog).fit(method="powell",
disp=0, ftol=1e-8)
class TestProbitCG(CheckBinaryResults):
@classmethod
def setup_class(cls):
data = load_spector()
data.exog = sm.add_constant(data.exog, prepend=False)
res2 = Spector.probit
cls.res2 = res2
# fmin_cg fails to converge on some machines - reparameterize
from statsmodels.tools.transform_model import StandardizeTransform
transf = StandardizeTransform(data.exog)
exog_st = transf(data.exog)
res1_st = Probit(data.endog,
exog_st).fit(method="cg", disp=0, maxiter=1000,
gtol=1e-08)
start_params = transf.transform_params(res1_st.params)
assert_allclose(start_params, res2.params, rtol=1e-5, atol=1e-6)
cls.res1 = Probit(data.endog,
data.exog).fit(start_params=start_params,
method="cg", maxiter=1000,
gtol=1e-05, disp=0)
assert_array_less(cls.res1.mle_retvals['fcalls'], 100)
class TestProbitNCG(CheckBinaryResults):
@classmethod
def setup_class(cls):
data = load_spector()
data.exog = sm.add_constant(data.exog, prepend=False)
res2 = Spector.probit
cls.res2 = res2
cls.res1 = Probit(data.endog, data.exog).fit(method="ncg",
disp=0, avextol=1e-8,
warn_convergence=False)
# converges close enough but warnflag is 2 for precision loss
class TestProbitBasinhopping(CheckBinaryResults):
@classmethod
def setup_class(cls):
data = load_spector()
data.exog = sm.add_constant(data.exog, prepend=False)
res2 = Spector.probit
cls.res2 = res2
fit = Probit(data.endog, data.exog).fit
np.random.seed(1)
cls.res1 = fit(method="basinhopping", disp=0, niter=5,
minimizer={'method' : 'L-BFGS-B', 'tol' : 1e-8})
class TestProbitMinimizeDefault(CheckBinaryResults):
@classmethod
def setup_class(cls):
data = load_spector()
data.exog = sm.add_constant(data.exog, prepend=False)
res2 = Spector.probit
cls.res2 = res2
fit = Probit(data.endog, data.exog).fit
cls.res1 = fit(method="minimize", disp=0, niter=5, tol = 1e-8)
class TestProbitMinimizeDogleg(CheckBinaryResults):
@classmethod
def setup_class(cls):
data = load_spector()
data.exog = sm.add_constant(data.exog, prepend=False)
res2 = Spector.probit
cls.res2 = res2
fit = Probit(data.endog, data.exog).fit
cls.res1 = fit(method="minimize", disp=0, niter=5, tol = 1e-8,
min_method = 'dogleg')
class TestProbitMinimizeAdditionalOptions(CheckBinaryResults):
@classmethod
def setup_class(cls):
data = load_spector()
data.exog = sm.add_constant(data.exog, prepend=False)
res2 = Spector.probit
cls.res2 = res2
cls.res1 = Probit(data.endog, data.exog).fit(method="minimize", disp=0,
maxiter=500,
min_method='Nelder-Mead',
xatol=1e-4, fatol=1e-4)
class CheckLikelihoodModelL1:
"""
For testing results generated with L1 regularization
"""
def test_params(self):
assert_almost_equal(self.res1.params, self.res2.params, DECIMAL_4)
def test_conf_int(self):
assert_almost_equal(
self.res1.conf_int(), self.res2.conf_int, DECIMAL_4)
def test_bse(self):
assert_almost_equal(self.res1.bse, self.res2.bse, DECIMAL_4)
def test_nnz_params(self):
assert_almost_equal(
self.res1.nnz_params, self.res2.nnz_params, DECIMAL_4)
def test_aic(self):
assert_almost_equal(
self.res1.aic, self.res2.aic, DECIMAL_3)
def test_bic(self):
assert_almost_equal(
self.res1.bic, self.res2.bic, DECIMAL_3)
class TestProbitL1(CheckLikelihoodModelL1):
@classmethod
def setup_class(cls):
data = load_spector()
data.exog = sm.add_constant(data.exog, prepend=True)
alpha = np.array([0.1, 0.2, 0.3, 10]) #/ data.exog.shape[0]
cls.res1 = Probit(data.endog, data.exog).fit_regularized(
method="l1", alpha=alpha, disp=0, trim_mode='auto',
auto_trim_tol=0.02, acc=1e-10, maxiter=1000)
res2 = DiscreteL1.probit
cls.res2 = res2
def test_cov_params(self):
assert_almost_equal(
self.res1.cov_params(), self.res2.cov_params, DECIMAL_4)
class TestMNLogitL1(CheckLikelihoodModelL1):
@classmethod
def setup_class(cls):
anes_data = load_anes96()
anes_exog = anes_data.exog
anes_exog = sm.add_constant(anes_exog, prepend=False)
mlogit_mod = sm.MNLogit(anes_data.endog, anes_exog)
alpha = 10. * np.ones((mlogit_mod.J - 1, mlogit_mod.K)) #/ anes_exog.shape[0]
alpha[-1,:] = 0
cls.res1 = mlogit_mod.fit_regularized(
method='l1', alpha=alpha, trim_mode='auto', auto_trim_tol=0.02,
acc=1e-10, disp=0)
res2 = DiscreteL1.mnlogit
cls.res2 = res2
class TestLogitL1(CheckLikelihoodModelL1):
@classmethod
def setup_class(cls):
data = load_spector()
data.exog = sm.add_constant(data.exog, prepend=True)
cls.alpha = 3 * np.array([0., 1., 1., 1.]) #/ data.exog.shape[0]
cls.res1 = Logit(data.endog, data.exog).fit_regularized(
method="l1", alpha=cls.alpha, disp=0, trim_mode='size',
size_trim_tol=1e-5, acc=1e-10, maxiter=1000)
res2 = DiscreteL1.logit
cls.res2 = res2
def test_cov_params(self):
assert_almost_equal(
self.res1.cov_params(), self.res2.cov_params, DECIMAL_4)
@pytest.mark.skipif(not has_cvxopt, reason='Skipped test_cvxopt since cvxopt '
'is not available')
class TestCVXOPT:
@classmethod
def setup_class(cls):
if not has_cvxopt:
pytest.skip('Skipped test_cvxopt since cvxopt is not available')
cls.data = sm.datasets.spector.load()
cls.data.endog = np.asarray(cls.data.endog)
cls.data.exog = np.asarray(cls.data.exog)
cls.data.exog = sm.add_constant(cls.data.exog, prepend=True)
def test_cvxopt_versus_slsqp(self):
# Compares results from cvxopt to the standard slsqp
self.alpha = 3. * np.array([0, 1, 1, 1.]) #/ self.data.endog.shape[0]
res_slsqp = Logit(self.data.endog, self.data.exog).fit_regularized(
method="l1", alpha=self.alpha, disp=0, acc=1e-10, maxiter=1000,
trim_mode='auto')
res_cvxopt = Logit(self.data.endog, self.data.exog).fit_regularized(
method="l1_cvxopt_cp", alpha=self.alpha, disp=0, abstol=1e-10,
trim_mode='auto', auto_trim_tol=0.01, maxiter=1000)
assert_almost_equal(res_slsqp.params, res_cvxopt.params, DECIMAL_4)
class TestSweepAlphaL1:
@classmethod
def setup_class(cls):
data = load_spector()
data.exog = sm.add_constant(data.exog, prepend=True)
cls.model = Logit(data.endog, data.exog)
cls.alphas = np.array(
[[0.1, 0.1, 0.1, 0.1],
[0.4, 0.4, 0.5, 0.5],
[0.5, 0.5, 1, 1]]) #/ data.exog.shape[0]
cls.res1 = DiscreteL1.sweep
def test_sweep_alpha(self):
for i in range(3):
alpha = self.alphas[i, :]
res2 = self.model.fit_regularized(
method="l1", alpha=alpha, disp=0, acc=1e-10,
trim_mode='off', maxiter=1000)
assert_almost_equal(res2.params, self.res1.params[i], DECIMAL_4)
class CheckL1Compatability:
"""
Tests compatability between l1 and unregularized by setting alpha such
that certain parameters should be effectively unregularized, and others
should be ignored by the model.
"""
def test_params(self):
m = self.m
assert_almost_equal(
self.res_unreg.params[:m], self.res_reg.params[:m], DECIMAL_4)
# The last entry should be close to zero
# handle extra parameter of NegativeBinomial
kvars = self.res_reg.model.exog.shape[1]
assert_almost_equal(0, self.res_reg.params[m:kvars], DECIMAL_4)
def test_cov_params(self):
m = self.m
# The restricted cov_params should be equal
assert_almost_equal(
self.res_unreg.cov_params()[:m, :m],
self.res_reg.cov_params()[:m, :m],
DECIMAL_1)
def test_df(self):
assert_equal(self.res_unreg.df_model, self.res_reg.df_model)
assert_equal(self.res_unreg.df_resid, self.res_reg.df_resid)
def test_t_test(self):
m = self.m
kvars = self.kvars
# handle extra parameter of NegativeBinomial
extra = getattr(self, 'k_extra', 0)
t_unreg = self.res_unreg.t_test(np.eye(len(self.res_unreg.params)))
t_reg = self.res_reg.t_test(np.eye(kvars + extra))
assert_almost_equal(t_unreg.effect[:m], t_reg.effect[:m], DECIMAL_3)
assert_almost_equal(t_unreg.sd[:m], t_reg.sd[:m], DECIMAL_3)
assert_almost_equal(np.nan, t_reg.sd[m])
assert_allclose(t_unreg.tvalue[:m], t_reg.tvalue[:m], atol=3e-3)
assert_almost_equal(np.nan, t_reg.tvalue[m])
def test_f_test(self):
m = self.m
kvars = self.kvars
# handle extra parameter of NegativeBinomial
extra = getattr(self, 'k_extra', 0)
f_unreg = self.res_unreg.f_test(np.eye(len(self.res_unreg.params))[:m])
f_reg = self.res_reg.f_test(np.eye(kvars + extra)[:m])
assert_allclose(f_unreg.fvalue, f_reg.fvalue, rtol=3e-5, atol=1e-3)
assert_almost_equal(f_unreg.pvalue, f_reg.pvalue, DECIMAL_3)
def test_bad_r_matrix(self):
kvars = self.kvars
assert_raises(ValueError, self.res_reg.f_test, np.eye(kvars) )
class TestPoissonL1Compatability(CheckL1Compatability):
@classmethod
def setup_class(cls):
cls.kvars = 10 # Number of variables
cls.m = 7 # Number of unregularized parameters
rand_data = load_randhie()
rand_exog = rand_data.exog.view(float).reshape(len(rand_data.exog), -1)
rand_exog = sm.add_constant(rand_exog, prepend=True)
# Drop some columns and do an unregularized fit
exog_no_PSI = rand_exog[:, :cls.m]
mod_unreg = sm.Poisson(rand_data.endog, exog_no_PSI)
cls.res_unreg = mod_unreg.fit(method="newton", disp=False)
# Do a regularized fit with alpha, effectively dropping the last column
alpha = 10 * len(rand_data.endog) * np.ones(cls.kvars)
alpha[:cls.m] = 0
cls.res_reg = sm.Poisson(rand_data.endog, rand_exog).fit_regularized(
method='l1', alpha=alpha, disp=False, acc=1e-10, maxiter=2000,
trim_mode='auto')
class TestNegativeBinomialL1Compatability(CheckL1Compatability):
@classmethod
def setup_class(cls):
cls.kvars = 10 # Number of variables
cls.m = 7 # Number of unregularized parameters
rand_data = load_randhie()
rand_exog = rand_data.exog.view(float).reshape(len(rand_data.exog), -1)
rand_exog_st = (rand_exog - rand_exog.mean(0)) / rand_exog.std(0)
rand_exog = sm.add_constant(rand_exog_st, prepend=True)
# Drop some columns and do an unregularized fit
exog_no_PSI = rand_exog[:, :cls.m]
mod_unreg = sm.NegativeBinomial(rand_data.endog, exog_no_PSI)
cls.res_unreg = mod_unreg.fit(method="newton", disp=False)
# Do a regularized fit with alpha, effectively dropping the last column
alpha = 10 * len(rand_data.endog) * np.ones(cls.kvars + 1)
alpha[:cls.m] = 0
alpha[-1] = 0 # do not penalize alpha
mod_reg = sm.NegativeBinomial(rand_data.endog, rand_exog)
cls.res_reg = mod_reg.fit_regularized(
method='l1', alpha=alpha, disp=False, acc=1e-10, maxiter=2000,
trim_mode='auto')
cls.k_extra = 1 # 1 extra parameter in nb2
class TestNegativeBinomialGeoL1Compatability(CheckL1Compatability):
@classmethod
def setup_class(cls):
cls.kvars = 10 # Number of variables
cls.m = 7 # Number of unregularized parameters
rand_data = load_randhie()
rand_exog = rand_data.exog.view(float).reshape(len(rand_data.exog), -1)
rand_exog = sm.add_constant(rand_exog, prepend=True)
# Drop some columns and do an unregularized fit
exog_no_PSI = rand_exog[:, :cls.m]
mod_unreg = sm.NegativeBinomial(rand_data.endog, exog_no_PSI,
loglike_method='geometric')
cls.res_unreg = mod_unreg.fit(method="newton", disp=False)
# Do a regularized fit with alpha, effectively dropping the last columns
alpha = 10 * len(rand_data.endog) * np.ones(cls.kvars)
alpha[:cls.m] = 0
mod_reg = sm.NegativeBinomial(rand_data.endog, rand_exog,
loglike_method='geometric')
cls.res_reg = mod_reg.fit_regularized(
method='l1', alpha=alpha, disp=False, acc=1e-10, maxiter=2000,
trim_mode='auto')
assert_equal(mod_reg.loglike_method, 'geometric')
class TestLogitL1Compatability(CheckL1Compatability):
@classmethod
def setup_class(cls):
cls.kvars = 4 # Number of variables
cls.m = 3 # Number of unregularized parameters
data = load_spector()
data.exog = sm.add_constant(data.exog, prepend=True)
# Do a regularized fit with alpha, effectively dropping the last column
alpha = np.array([0, 0, 0, 10])
cls.res_reg = Logit(data.endog, data.exog).fit_regularized(
method="l1", alpha=alpha, disp=0, acc=1e-15, maxiter=2000,
trim_mode='auto')
# Actually drop the last columnand do an unregularized fit
exog_no_PSI = data.exog[:, :cls.m]
cls.res_unreg = Logit(data.endog, exog_no_PSI).fit(disp=0, tol=1e-15)
class TestMNLogitL1Compatability(CheckL1Compatability):
@classmethod
def setup_class(cls):
cls.kvars = 4 # Number of variables
cls.m = 3 # Number of unregularized parameters
data = load_spector()
data.exog = sm.add_constant(data.exog, prepend=True)
alpha = np.array([0, 0, 0, 10])
cls.res_reg = MNLogit(data.endog, data.exog).fit_regularized(
method="l1", alpha=alpha, disp=0, acc=1e-15, maxiter=2000,
trim_mode='auto')
# Actually drop the last columnand do an unregularized fit
exog_no_PSI = data.exog[:, :cls.m]
cls.res_unreg = MNLogit(data.endog, exog_no_PSI).fit(
disp=0, gtol=1e-15, method='bfgs', maxiter=1000)
def test_t_test(self):
m = self.m
kvars = self.kvars
t_unreg = self.res_unreg.t_test(np.eye(m))
t_reg = self.res_reg.t_test(np.eye(kvars))
assert_almost_equal(t_unreg.effect, t_reg.effect[:m], DECIMAL_3)
assert_almost_equal(t_unreg.sd, t_reg.sd[:m], DECIMAL_3)
assert_almost_equal(np.nan, t_reg.sd[m])
assert_almost_equal(t_unreg.tvalue, t_reg.tvalue[:m], DECIMAL_3)
@pytest.mark.skip("Skipped test_f_test for MNLogit")
def test_f_test(self):
pass
class TestProbitL1Compatability(CheckL1Compatability):
@classmethod
def setup_class(cls):
cls.kvars = 4 # Number of variables
cls.m = 3 # Number of unregularized parameters
data = load_spector()
data.exog = sm.add_constant(data.exog, prepend=True)
alpha = np.array([0, 0, 0, 10])
cls.res_reg = Probit(data.endog, data.exog).fit_regularized(
method="l1", alpha=alpha, disp=0, acc=1e-15, maxiter=2000,
trim_mode='auto')
# Actually drop the last columnand do an unregularized fit
exog_no_PSI = data.exog[:, :cls.m]
cls.res_unreg = Probit(data.endog, exog_no_PSI).fit(disp=0, tol=1e-15)
class CompareL1:
"""
For checking results for l1 regularization.
Assumes self.res1 and self.res2 are two legitimate models to be compared.
"""
def test_basic_results(self):
assert_almost_equal(self.res1.params, self.res2.params, DECIMAL_4)
assert_almost_equal(self.res1.cov_params(), self.res2.cov_params(),
DECIMAL_4)
assert_almost_equal(self.res1.conf_int(), self.res2.conf_int(),
DECIMAL_4)
assert_almost_equal(self.res1.pvalues, self.res2.pvalues, DECIMAL_4)
assert_almost_equal(self.res1.pred_table(), self.res2.pred_table(),
DECIMAL_4)
assert_almost_equal(self.res1.bse, self.res2.bse, DECIMAL_4)
assert_almost_equal(self.res1.llf, self.res2.llf, DECIMAL_4)
assert_almost_equal(self.res1.aic, self.res2.aic, DECIMAL_4)
assert_almost_equal(self.res1.bic, self.res2.bic, DECIMAL_4)
assert_almost_equal(self.res1.pvalues, self.res2.pvalues, DECIMAL_4)
assert_(self.res1.mle_retvals['converged'] is True)
class CompareL11D(CompareL1):
"""
Check t and f tests. This only works for 1-d results
"""
def test_tests(self):
restrictmat = np.eye(len(self.res1.params.ravel()))
assert_almost_equal(self.res1.t_test(restrictmat).pvalue,
self.res2.t_test(restrictmat).pvalue, DECIMAL_4)
assert_almost_equal(self.res1.f_test(restrictmat).pvalue,
self.res2.f_test(restrictmat).pvalue, DECIMAL_4)
class TestL1AlphaZeroLogit(CompareL11D):
# Compares l1 model with alpha = 0 to the unregularized model.
@classmethod
def setup_class(cls):
data = load_spector()
data.exog = sm.add_constant(data.exog, prepend=True)
cls.res1 = Logit(data.endog, data.exog).fit_regularized(
method="l1", alpha=0, disp=0, acc=1e-15, maxiter=1000,
trim_mode='auto', auto_trim_tol=0.01)
cls.res2 = Logit(data.endog, data.exog).fit(disp=0, tol=1e-15)
def test_converged(self):
res = self.res1.model.fit_regularized(
method="l1", alpha=0, disp=0, acc=1e-15, maxiter=1,
trim_mode='auto', auto_trim_tol=0.01)
# see #2857
assert_(res.mle_retvals['converged'] is False)
class TestL1AlphaZeroProbit(CompareL11D):
# Compares l1 model with alpha = 0 to the unregularized model.
@classmethod
def setup_class(cls):
data = load_spector()
data.exog = sm.add_constant(data.exog, prepend=True)
cls.res1 = Probit(data.endog, data.exog).fit_regularized(
method="l1", alpha=0, disp=0, acc=1e-15, maxiter=1000,
trim_mode='auto', auto_trim_tol=0.01)
cls.res2 = Probit(data.endog, data.exog).fit(disp=0, tol=1e-15)
class TestL1AlphaZeroMNLogit(CompareL1):
@classmethod
def setup_class(cls):
data = load_anes96()
data.exog = sm.add_constant(data.exog, prepend=False)
cls.res1 = MNLogit(data.endog, data.exog).fit_regularized(
method="l1", alpha=0, disp=0, acc=1e-15, maxiter=1000,
trim_mode='auto', auto_trim_tol=0.01)
cls.res2 = MNLogit(data.endog, data.exog).fit(disp=0, gtol=1e-15,
method='bfgs',
maxiter=1000)
class TestLogitNewton(CheckBinaryResults, CheckMargEff):
@classmethod
def setup_class(cls):
data = load_spector()
data.exog = sm.add_constant(data.exog, prepend=False)
cls.res1 = Logit(data.endog, data.exog).fit(method="newton", disp=0)
res2 = Spector.logit
cls.res2 = res2
def test_resid_pearson(self):
assert_almost_equal(self.res1.resid_pearson,
self.res2.resid_pearson, 5)
def test_nodummy_exog1(self):
me = self.res1.get_margeff(atexog={0 : 2.0, 2 : 1.})
assert_almost_equal(me.margeff,
self.res2.margeff_nodummy_atexog1, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_nodummy_atexog1_se, DECIMAL_4)
def test_nodummy_exog2(self):
me = self.res1.get_margeff(atexog={1 : 21., 2 : 0}, at='mean')
assert_almost_equal(me.margeff,
self.res2.margeff_nodummy_atexog2, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_nodummy_atexog2_se, DECIMAL_4)
def test_dummy_exog1(self):
me = self.res1.get_margeff(atexog={0 : 2.0, 2 : 1.}, dummy=True)
assert_almost_equal(me.margeff,
self.res2.margeff_dummy_atexog1, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_dummy_atexog1_se, DECIMAL_4)
def test_dummy_exog2(self):
me = self.res1.get_margeff(atexog={1 : 21., 2 : 0}, at='mean',
dummy=True)
assert_almost_equal(me.margeff,
self.res2.margeff_dummy_atexog2, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_dummy_atexog2_se, DECIMAL_4)
def test_diagnostic(self):
# Hosmer-Lemeshow
# Stata 14: `estat gof, group(5) table`
n_groups = 5
chi2 = 1.630883318257913
pvalue = 0.6524
df = 3
import statsmodels.stats.diagnostic_gen as dia
fitted = self.res1.predict()
en = self.res1.model.endog
counts = np.column_stack((en, 1 - en))
expected = np.column_stack((fitted, 1 - fitted))
# replicate splits in Stata estat gof
group_sizes = [7, 6, 7, 6, 6]
indices = np.cumsum(group_sizes)[:-1]
res = dia.test_chisquare_binning(counts, expected, sort_var=fitted,
bins=indices, df=None)
assert_allclose(res.statistic, chi2, rtol=1e-11)
assert_equal(res.df, df)
assert_allclose(res.pvalue, pvalue, atol=6e-5)
assert_equal(res.freqs.shape, (n_groups, 2))
assert_equal(res.freqs.sum(1), group_sizes)
class TestLogitNewtonPrepend(CheckMargEff):
# same as previous version but adjusted for add_constant prepend=True
# bug #3695
@classmethod
def setup_class(cls):
data = load_spector()
data.exog = sm.add_constant(data.exog, prepend=True)
cls.res1 = Logit(data.endog, data.exog).fit(method="newton", disp=0)
res2 = Spector.logit
cls.res2 = res2
cls.slice = np.roll(np.arange(len(cls.res1.params)), 1) #.astype(int)
def test_resid_pearson(self):
assert_almost_equal(self.res1.resid_pearson,
self.res2.resid_pearson, 5)
def test_nodummy_exog1(self):
me = self.res1.get_margeff(atexog={1 : 2.0, 3 : 1.})
assert_almost_equal(me.margeff,
self.res2.margeff_nodummy_atexog1, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_nodummy_atexog1_se, DECIMAL_4)
def test_nodummy_exog2(self):
me = self.res1.get_margeff(atexog={2 : 21., 3 : 0}, at='mean')
assert_almost_equal(me.margeff,
self.res2.margeff_nodummy_atexog2, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_nodummy_atexog2_se, DECIMAL_4)
def test_dummy_exog1(self):
me = self.res1.get_margeff(atexog={1 : 2.0, 3 : 1.}, dummy=True)
assert_almost_equal(me.margeff,
self.res2.margeff_dummy_atexog1, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_dummy_atexog1_se, DECIMAL_4)
def test_dummy_exog2(self):
me = self.res1.get_margeff(atexog={2 : 21., 3 : 0}, at='mean',
dummy=True)
assert_almost_equal(me.margeff,
self.res2.margeff_dummy_atexog2, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_dummy_atexog2_se, DECIMAL_4)
class TestLogitBFGS(CheckBinaryResults, CheckMargEff):
@classmethod
def setup_class(cls):
data = load_spector()
data.exog = sm.add_constant(data.exog, prepend=False)
res2 = Spector.logit
cls.res2 = res2
cls.res1 = Logit(data.endog, data.exog).fit(method="bfgs", disp=0)
class TestPoissonNewton(CheckModelResults):
@classmethod
def setup_class(cls):
data = load_randhie()
exog = sm.add_constant(data.exog, prepend=False)
cls.res1 = Poisson(data.endog, exog).fit(method='newton', disp=0)
res2 = RandHIE.poisson
cls.res2 = res2
def test_margeff_overall(self):
me = self.res1.get_margeff()
assert_almost_equal(me.margeff,
self.res2.margeff_nodummy_overall, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_nodummy_overall_se, DECIMAL_4)
def test_margeff_dummy_overall(self):
me = self.res1.get_margeff(dummy=True)
assert_almost_equal(me.margeff,
self.res2.margeff_dummy_overall, DECIMAL_4)
assert_almost_equal(me.margeff_se,
self.res2.margeff_dummy_overall_se, DECIMAL_4)
def test_resid(self):
assert_almost_equal(self.res1.resid, self.res2.resid, 2)
def test_predict_prob(self):
cur_dir = os.path.dirname(os.path.abspath(__file__))
path = os.path.join(cur_dir, "results", "predict_prob_poisson.csv")
probs_res = np.loadtxt(path, delimiter=",")
# just check the first 100 obs. vs R to save memory
probs = self.res1.predict_prob()[:100]
assert_almost_equal(probs, probs_res, 8)
@pytest.mark.xfail(reason="res2.cov_params is a zero-dim array of None",
strict=True)
def test_cov_params(self):
super().test_cov_params()
class CheckNegBinMixin:
# Test methods shared by TestNegativeBinomialXYZ classes
@pytest.mark.xfail(reason="pvalues do not match, in some cases wrong size",
strict=True, raises=AssertionError)
def test_pvalues(self):
assert_almost_equal(self.res1.pvalues,
self.res2.pvalues,
DECIMAL_4)
class TestNegativeBinomialNB2Newton(CheckNegBinMixin, CheckModelResults):
@classmethod
def setup_class(cls):
data = load_randhie()
exog = sm.add_constant(data.exog, prepend=False)
cls.res1 = NegativeBinomial(data.endog, exog, 'nb2').fit(method='newton', disp=0)
res2 = RandHIE.negativebinomial_nb2_bfgs
cls.res2 = res2
#NOTE: The bse is much closer precitions to stata
def test_bse(self):
assert_almost_equal(self.res1.bse, self.res2.bse, DECIMAL_3)
def test_params(self):
assert_almost_equal(self.res1.params, self.res2.params, DECIMAL_4)
def test_alpha(self):
self.res1.bse # attaches alpha_std_err
assert_almost_equal(self.res1.lnalpha, self.res2.lnalpha,
DECIMAL_4)
assert_almost_equal(self.res1.lnalpha_std_err,
self.res2.lnalpha_std_err, DECIMAL_4)
def test_conf_int(self):
assert_almost_equal(self.res1.conf_int(), self.res2.conf_int,
DECIMAL_3)
def test_zstat(self): # Low precision because Z vs. t
assert_almost_equal(self.res1.pvalues[:-1], self.res2.pvalues,
DECIMAL_2)
def test_fittedvalues(self):
assert_almost_equal(self.res1.fittedvalues[:10],
self.res2.fittedvalues[:10], DECIMAL_3)
def test_predict(self):
assert_almost_equal(self.res1.predict()[:10],
np.exp(self.res2.fittedvalues[:10]), DECIMAL_3)
def test_predict_xb(self):
assert_almost_equal(self.res1.predict(which="linear")[:10],
self.res2.fittedvalues[:10], DECIMAL_3)
class TestNegativeBinomialNB1Newton(CheckNegBinMixin, CheckModelResults):
@classmethod
def setup_class(cls):
data = load_randhie()
exog = sm.add_constant(data.exog, prepend=False)
model = NegativeBinomial(data.endog, exog, 'nb1')
cls.res1 = model.fit(method="newton", maxiter=100, disp=0)
res2 = RandHIE.negativebinomial_nb1_bfgs
cls.res2 = res2
def test_zstat(self):
assert_almost_equal(self.res1.tvalues, self.res2.z, DECIMAL_1)
def test_lnalpha(self):
self.res1.bse # attaches alpha_std_err
assert_almost_equal(self.res1.lnalpha, self.res2.lnalpha, 3)
assert_almost_equal(self.res1.lnalpha_std_err,
self.res2.lnalpha_std_err, DECIMAL_4)
def test_params(self):
assert_almost_equal(self.res1.params, self.res2.params, DECIMAL_4)
def test_conf_int(self):
# the bse for alpha is not high precision from the hessian
# approximation
assert_almost_equal(self.res1.conf_int(), self.res2.conf_int,
DECIMAL_2)
@pytest.mark.xfail(reason="Test has not been implemented for this class.",
strict=True, raises=NotImplementedError)
def test_predict(self):
raise NotImplementedError
@pytest.mark.xfail(reason="Test has not been implemented for this class.",
strict=True, raises=NotImplementedError)
def test_predict_xb(self):
raise NotImplementedError
class TestNegativeBinomialNB2BFGS(CheckNegBinMixin, CheckModelResults):
@classmethod
def setup_class(cls):
data = load_randhie()
exog = sm.add_constant(data.exog, prepend=False)
cls.res1 = NegativeBinomial(data.endog, exog, 'nb2').fit(
method='bfgs', disp=0,
maxiter=1000)
res2 = RandHIE.negativebinomial_nb2_bfgs
cls.res2 = res2
#NOTE: The bse is much closer precitions to stata
def test_bse(self):
assert_almost_equal(self.res1.bse, self.res2.bse, DECIMAL_3)
def test_params(self):
assert_almost_equal(self.res1.params, self.res2.params, DECIMAL_4)
def test_alpha(self):
self.res1.bse # attaches alpha_std_err
assert_almost_equal(self.res1.lnalpha, self.res2.lnalpha,
DECIMAL_4)
assert_almost_equal(self.res1.lnalpha_std_err,
self.res2.lnalpha_std_err, DECIMAL_4)
def test_conf_int(self):
assert_almost_equal(self.res1.conf_int(), self.res2.conf_int,
DECIMAL_3)
def test_zstat(self): # Low precision because Z vs. t
assert_almost_equal(self.res1.pvalues[:-1], self.res2.pvalues,
DECIMAL_2)
def test_fittedvalues(self):
assert_almost_equal(self.res1.fittedvalues[:10],
self.res2.fittedvalues[:10], DECIMAL_3)
def test_predict(self):
assert_almost_equal(self.res1.predict()[:10],
np.exp(self.res2.fittedvalues[:10]), DECIMAL_3)
def test_predict_xb(self):
assert_almost_equal(self.res1.predict(which="linear")[:10],
self.res2.fittedvalues[:10], DECIMAL_3)
class TestNegativeBinomialNB1BFGS(CheckNegBinMixin, CheckModelResults):
@classmethod
def setup_class(cls):
data = load_randhie()
exog = sm.add_constant(data.exog, prepend=False)
cls.res1 = NegativeBinomial(data.endog, exog, 'nb1').fit(method="bfgs",
maxiter=100,
disp=0)
res2 = RandHIE.negativebinomial_nb1_bfgs
cls.res2 = res2
def test_zstat(self):
assert_almost_equal(self.res1.tvalues, self.res2.z, DECIMAL_1)
def test_lnalpha(self):
self.res1.bse # attaches alpha_std_err
assert_almost_equal(self.res1.lnalpha, self.res2.lnalpha, 3)
assert_almost_equal(self.res1.lnalpha_std_err,
self.res2.lnalpha_std_err, DECIMAL_4)
def test_params(self):
assert_almost_equal(self.res1.params, self.res2.params, DECIMAL_4)
def test_conf_int(self):
# the bse for alpha is not high precision from the hessian
# approximation
assert_almost_equal(self.res1.conf_int(), self.res2.conf_int,
DECIMAL_2)
@pytest.mark.xfail(reason="Test has not been implemented for this class.",
strict=True, raises=NotImplementedError)
def test_predict(self):
raise NotImplementedError
@pytest.mark.xfail(reason="Test has not been implemented for this class.",
strict=True, raises=NotImplementedError)
def test_predict_xb(self):
raise NotImplementedError
class TestNegativeBinomialGeometricBFGS(CheckNegBinMixin, CheckModelResults):
# Cannot find another implementation of the geometric to cross-check results
# we only test fitted values because geometric has fewer parameters
# than nb1 and nb2
# and we want to make sure that predict() np.dot(exog, params) works
@classmethod
def setup_class(cls):
data = load_randhie()
exog = sm.add_constant(data.exog, prepend=False)
mod = NegativeBinomial(data.endog, exog, 'geometric')
cls.res1 = mod.fit(method='bfgs', disp=0)
res2 = RandHIE.negativebinomial_geometric_bfgs
cls.res2 = res2
# the following are regression tests, could be inherited instead
def test_aic(self):
assert_almost_equal(self.res1.aic, self.res2.aic, DECIMAL_3)
def test_bic(self):
assert_almost_equal(self.res1.bic, self.res2.bic, DECIMAL_3)
def test_conf_int(self):
assert_almost_equal(self.res1.conf_int(), self.res2.conf_int,
DECIMAL_3)
def test_fittedvalues(self):
assert_almost_equal(self.res1.fittedvalues[:10],
self.res2.fittedvalues[:10], DECIMAL_3)
def test_predict(self):
assert_almost_equal(self.res1.predict()[:10],
np.exp(self.res2.fittedvalues[:10]), DECIMAL_3)
def test_params(self):
assert_almost_equal(self.res1.params, self.res2.params, DECIMAL_3)
def test_predict_xb(self):
assert_almost_equal(self.res1.predict(which="linear")[:10],
self.res2.fittedvalues[:10], DECIMAL_3)
def test_zstat(self): # Low precision because Z vs. t
assert_almost_equal(self.res1.tvalues, self.res2.z, DECIMAL_1)
def test_llf(self):
assert_almost_equal(self.res1.llf, self.res2.llf, DECIMAL_1)
def test_llr(self):
assert_almost_equal(self.res1.llr, self.res2.llr, DECIMAL_2)
def test_bse(self):
assert_almost_equal(self.res1.bse, self.res2.bse, DECIMAL_3)
class CheckMNLogitBaseZero(CheckModelResults):
def test_margeff_overall(self):
me = self.res1.get_margeff()
assert_almost_equal(me.margeff, self.res2.margeff_dydx_overall, 6)
assert_almost_equal(me.margeff_se, self.res2.margeff_dydx_overall_se, 6)
me_frame = me.summary_frame()
eff = me_frame["dy/dx"].values.reshape(me.margeff.shape, order="F")
assert_allclose(eff, me.margeff, rtol=1e-13)
assert_equal(me_frame.shape, (np.size(me.margeff), 6))
def test_margeff_mean(self):
me = self.res1.get_margeff(at='mean')
assert_almost_equal(me.margeff, self.res2.margeff_dydx_mean, 7)
assert_almost_equal(me.margeff_se, self.res2.margeff_dydx_mean_se, 7)
def test_margeff_dummy(self):
data = self.data
vote = data.data['vote']
exog = np.column_stack((data.exog, vote))
exog = sm.add_constant(exog, prepend=False)
res = MNLogit(data.endog, exog).fit(method="newton", disp=0)
me = res.get_margeff(dummy=True)
assert_almost_equal(me.margeff, self.res2.margeff_dydx_dummy_overall,
6)
assert_almost_equal(me.margeff_se,
self.res2.margeff_dydx_dummy_overall_se, 6)
me = res.get_margeff(dummy=True, method="eydx")
assert_almost_equal(me.margeff, self.res2.margeff_eydx_dummy_overall,
5)
assert_almost_equal(me.margeff_se,
self.res2.margeff_eydx_dummy_overall_se, 6)
def test_j(self):
assert_equal(self.res1.model.J, self.res2.J)
def test_k(self):
assert_equal(self.res1.model.K, self.res2.K)
def test_endog_names(self):
assert_equal(self.res1._get_endog_name(None,None)[1],
['y=1', 'y=2', 'y=3', 'y=4', 'y=5', 'y=6'])
def test_pred_table(self):
# fitted results taken from gretl
pred = [6, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 6, 0, 1, 6, 0, 0,
1, 1, 6, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 6, 0, 0, 6, 6, 0, 0, 1,
1, 6, 1, 6, 0, 0, 0, 1, 0, 1, 0, 0, 0, 6, 0, 0, 6, 0, 0, 0, 1,
1, 0, 0, 6, 6, 6, 6, 1, 0, 5, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0,
6, 0, 6, 6, 1, 0, 1, 1, 6, 5, 1, 0, 0, 0, 5, 0, 0, 6, 0, 1, 0,
0, 0, 0, 0, 1, 1, 0, 6, 6, 6, 6, 5, 0, 1, 1, 0, 1, 0, 6, 6, 0,
0, 0, 6, 0, 0, 0, 6, 6, 0, 5, 1, 0, 0, 0, 0, 6, 0, 5, 6, 6, 0,
0, 0, 0, 6, 1, 0, 0, 1, 0, 1, 6, 1, 1, 1, 1, 1, 0, 0, 0, 6, 0,
5, 1, 0, 6, 6, 6, 0, 0, 0, 0, 1, 6, 6, 0, 0, 0, 1, 1, 5, 6, 0,
6, 1, 0, 0, 1, 6, 0, 0, 1, 0, 6, 6, 0, 5, 6, 6, 0, 0, 6, 1, 0,
6, 0, 1, 0, 1, 6, 0, 1, 1, 1, 6, 0, 5, 0, 0, 6, 1, 0, 6, 5, 5,
0, 6, 1, 1, 1, 0, 0, 6, 0, 0, 5, 0, 0, 6, 6, 6, 6, 6, 0, 1, 0,
0, 6, 6, 0, 0, 1, 6, 0, 0, 6, 1, 6, 1, 1, 1, 0, 1, 6, 5, 0, 0,
1, 5, 0, 1, 6, 6, 1, 0, 0, 1, 6, 1, 5, 6, 1, 0, 0, 1, 1, 0, 6,
1, 6, 0, 1, 1, 5, 6, 6, 5, 1, 1, 1, 0, 6, 1, 6, 1, 0, 1, 0, 0,
1, 5, 0, 1, 1, 0, 5, 6, 0, 5, 1, 1, 6, 5, 0, 6, 0, 0, 0, 0, 0,
0, 1, 6, 1, 0, 5, 1, 0, 0, 1, 6, 0, 0, 6, 6, 6, 0, 2, 1, 6, 5,
6, 1, 1, 0, 5, 1, 1, 1, 6, 1, 6, 6, 5, 6, 0, 1, 0, 1, 6, 0, 6,
1, 6, 0, 0, 6, 1, 0, 6, 1, 0, 0, 0, 0, 6, 6, 6, 6, 5, 6, 6, 0,
0, 6, 1, 1, 6, 0, 0, 6, 6, 0, 6, 6, 0, 0, 6, 0, 0, 6, 6, 6, 1,
0, 6, 0, 0, 0, 6, 1, 1, 0, 1, 5, 0, 0, 5, 0, 0, 0, 1, 1, 6, 1,
0, 0, 0, 6, 6, 1, 1, 6, 5, 5, 0, 6, 6, 0, 1, 1, 0, 6, 6, 0, 6,
5, 5, 6, 5, 1, 0, 6, 0, 6, 1, 0, 1, 6, 6, 6, 1, 0, 6, 0, 5, 6,
6, 5, 0, 5, 1, 0, 6, 0, 6, 1, 5, 5, 0, 1, 5, 5, 2, 6, 6, 6, 5,
0, 0, 1, 6, 1, 0, 1, 6, 1, 0, 0, 1, 5, 6, 6, 0, 0, 0, 5, 6, 6,
6, 1, 5, 6, 1, 0, 0, 6, 5, 0, 1, 1, 1, 6, 6, 0, 1, 0, 0, 0, 5,
0, 0, 6, 1, 6, 0, 6, 1, 5, 5, 6, 5, 0, 0, 0, 0, 1, 1, 0, 5, 5,
0, 0, 0, 0, 1, 0, 6, 6, 1, 1, 6, 6, 0, 5, 5, 0, 0, 0, 6, 6, 1,
6, 0, 0, 5, 0, 1, 6, 5, 6, 6, 5, 5, 6, 6, 1, 0, 1, 6, 6, 1, 6,
0, 6, 0, 6, 5, 0, 6, 6, 0, 5, 6, 0, 6, 6, 5, 0, 1, 6, 6, 1, 0,
1, 0, 6, 6, 1, 0, 6, 6, 6, 0, 1, 6, 0, 1, 5, 1, 1, 5, 6, 6, 0,
1, 6, 6, 1, 5, 0, 5, 0, 6, 0, 1, 6, 1, 0, 6, 1, 6, 0, 6, 1, 0,
0, 0, 6, 6, 0, 1, 1, 6, 6, 6, 1, 6, 0, 5, 6, 0, 5, 6, 6, 5, 5,
5, 6, 0, 6, 0, 0, 0, 5, 0, 6, 1, 2, 6, 6, 6, 5, 1, 6, 0, 6, 0,
0, 0, 0, 6, 5, 0, 5, 1, 6, 5, 1, 6, 5, 1, 1, 0, 0, 6, 1, 1, 5,
6, 6, 0, 5, 2, 5, 5, 0, 5, 5, 5, 6, 5, 6, 6, 5, 2, 6, 5, 6, 0,
0, 6, 5, 0, 6, 0, 0, 6, 6, 6, 0, 5, 1, 1, 6, 6, 5, 2, 1, 6, 5,
6, 0, 6, 6, 1, 1, 5, 1, 6, 6, 6, 0, 0, 6, 1, 0, 5, 5, 1, 5, 6,
1, 6, 0, 1, 6, 5, 0, 0, 6, 1, 5, 1, 0, 6, 0, 6, 6, 5, 5, 6, 6,
6, 6, 2, 6, 6, 6, 5, 5, 5, 0, 1, 0, 0, 0, 6, 6, 1, 0, 6, 6, 6,
6, 6, 1, 0, 6, 1, 5, 5, 6, 6, 6, 6, 6, 5, 6, 1, 6, 2, 5, 5, 6,
5, 6, 6, 5, 6, 6, 5, 5, 6, 1, 5, 1, 6, 0, 2, 5, 0, 5, 0, 2, 1,
6, 0, 0, 6, 6, 1, 6, 0, 5, 5, 6, 6, 1, 6, 6, 6, 5, 6, 6, 1, 6,
5, 6, 1, 1, 0, 6, 6, 5, 1, 0, 0, 6, 6, 5, 6, 0, 1, 6, 0, 5, 6,
5, 2, 5, 2, 0, 0, 1, 6, 6, 1, 5, 6, 6, 0, 6, 6, 6, 6, 6, 5]
assert_array_equal(self.res1.predict().argmax(1), pred)
# the rows should add up for pred table
assert_array_equal(self.res1.pred_table().sum(0), np.bincount(pred))
# note this is just a regression test, gretl does not have a prediction
# table
pred = [[ 126., 41., 2., 0., 0., 12., 19.],
[ 77., 73., 3., 0., 0., 15., 12.],
[ 37., 43., 2., 0., 0., 19., 7.],
[ 12., 9., 1., 0., 0., 9., 6.],
[ 19., 10., 2., 0., 0., 20., 43.],
[ 22., 25., 1., 0., 0., 31., 71.],
[ 9., 7., 1., 0., 0., 18., 140.]]
assert_array_equal(self.res1.pred_table(), pred)
def test_resid(self):
assert_array_equal(self.res1.resid_misclassified, self.res2.resid)
@pytest.mark.xfail(reason="res2.cov_params is a zero-dim array of None",
strict=True)
def test_cov_params(self):
super().test_cov_params()
@pytest.mark.xfail(reason="Test has not been implemented for this class.",
strict=True, raises=NotImplementedError)
def test_distr(self):
super().test_distr()
class TestMNLogitNewtonBaseZero(CheckMNLogitBaseZero):
@classmethod
def setup_class(cls):
cls.data = data = load_anes96()
exog = data.exog
exog = sm.add_constant(exog, prepend=False)
cls.res1 = MNLogit(data.endog, exog).fit(method="newton", disp=0)
res2 = Anes.mnlogit_basezero
cls.res2 = res2
class TestMNLogitLBFGSBaseZero(CheckMNLogitBaseZero):
@classmethod
def setup_class(cls):
cls.data = data = load_anes96()
exog = data.exog
exog = sm.add_constant(exog, prepend=False)
mymodel = MNLogit(data.endog, exog)
cls.res1 = mymodel.fit(method="lbfgs", disp=0, maxiter=50000,
#m=12, pgtol=1e-7, factr=1e3, # 5 failures
#m=20, pgtol=1e-8, factr=1e2, # 3 failures
#m=30, pgtol=1e-9, factr=1e1, # 1 failure
m=40, pgtol=1e-10, factr=5e0,
loglike_and_score=mymodel.loglike_and_score)
res2 = Anes.mnlogit_basezero
cls.res2 = res2
def test_mnlogit_basinhopping():
def callb(*args):
return 1
x = np.random.randint(0, 100, 1000)
y = np.random.randint(0, 3, 1000)
model = MNLogit(y, sm.add_constant(x))
# smoke tests for basinhopping and callback #8665
model.fit(method='basinhopping')
model.fit(method='basinhopping', callback=callb)
def test_perfect_prediction():
cur_dir = os.path.dirname(os.path.abspath(__file__))
iris_dir = os.path.join(cur_dir, '..', '..', 'genmod', 'tests', 'results')
iris_dir = os.path.abspath(iris_dir)
iris = np.genfromtxt(os.path.join(iris_dir, 'iris.csv'), delimiter=",",
skip_header=1)
y = iris[:, -1]
X = iris[:, :-1]
X = X[y != 2]
y = y[y != 2]
X = sm.add_constant(X, prepend=True)
mod = Logit(y, X)
mod.raise_on_perfect_prediction = True
assert_raises(PerfectSeparationError, mod.fit, maxiter=1000)
# turn off raise PerfectSeparationError
mod.raise_on_perfect_prediction = False
# this will raise if you set maxiter high enough with a singular matrix
with pytest.warns(ConvergenceWarning):
res = mod.fit(disp=False, maxiter=50) # should not raise but does warn
assert_(not res.mle_retvals['converged'])
# The following does not warn but message in summary()
mod.fit(method="bfgs", disp=False, maxiter=50)
def test_poisson_predict():
#GH: 175, make sure poisson predict works without offset and exposure
data = load_randhie()
exog = sm.add_constant(data.exog, prepend=True)
res = sm.Poisson(data.endog, exog).fit(method='newton', disp=0)
pred1 = res.predict()
pred2 = res.predict(exog)
assert_almost_equal(pred1, pred2)
#exta options
pred3 = res.predict(exog, offset=0, exposure=1)
assert_almost_equal(pred1, pred3)
pred3 = res.predict(exog, offset=0, exposure=2)
assert_almost_equal(2*pred1, pred3)
pred3 = res.predict(exog, offset=np.log(2), exposure=1)
assert_almost_equal(2*pred1, pred3)
def test_poisson_newton():
#GH: 24, Newton does not work well sometimes
nobs = 10000
np.random.seed(987689)
x = np.random.randn(nobs, 3)
x = sm.add_constant(x, prepend=True)
y_count = np.random.poisson(np.exp(x.sum(1)))
mod = sm.Poisson(y_count, x)
# this is not thread-safe
with pytest.warns(ConvergenceWarning):
res = mod.fit(start_params=-np.ones(4), method='newton', disp=0)
assert_(not res.mle_retvals['converged'])
def test_issue_339():
# make sure MNLogit summary works for J != K.
data = load_anes96()
exog = data.exog
# leave out last exog column
exog = exog[:,:-1]
exog = sm.add_constant(exog, prepend=True)
res1 = sm.MNLogit(data.endog, exog).fit(method="newton", disp=0)
# strip the header from the test
smry = "\n".join(res1.summary().as_text().split('\n')[9:])
cur_dir = os.path.dirname(os.path.abspath(__file__))
test_case_file = os.path.join(cur_dir, 'results', 'mn_logit_summary.txt')
with open(test_case_file, encoding="utf-8") as fd:
test_case = fd.read()
np.testing.assert_equal(smry, test_case[:-1])
# smoke test for summary2
res1.summary2() # see #3651
def test_issue_341():
data = load_anes96()
exog = data.exog
# leave out last exog column
exog = exog[:,:-1]
exog = sm.add_constant(exog, prepend=True)
res1 = sm.MNLogit(data.endog, exog).fit(method="newton", disp=0)
x = exog[0]
np.testing.assert_equal(res1.predict(x).shape, (1,7))
np.testing.assert_equal(res1.predict(x[None]).shape, (1,7))
def test_negative_binomial_default_alpha_param():
with pytest.warns(UserWarning, match='Negative binomial'
' dispersion parameter alpha not set'):
sm.families.NegativeBinomial()
with pytest.warns(UserWarning, match='Negative binomial'
' dispersion parameter alpha not set'):
sm.families.NegativeBinomial(link=sm.families.links.nbinom(alpha=1.0))
with warnings.catch_warnings():
warnings.simplefilter("error")
sm.families.NegativeBinomial(alpha=1.0)
with pytest.warns(FutureWarning):
sm.families.NegativeBinomial(link=sm.families.links.nbinom(alpha=1.0),
alpha=1.0)
def test_iscount():
X = np.random.random((50, 10))
X[:,2] = np.random.randint(1, 10, size=50)
X[:,6] = np.random.randint(1, 10, size=50)
X[:,4] = np.random.randint(0, 2, size=50)
X[:,1] = np.random.randint(-10, 10, size=50) # not integers
count_ind = _iscount(X)
assert_equal(count_ind, [2, 6])
def test_isdummy():
X = np.random.random((50, 10))
X[:,2] = np.random.randint(1, 10, size=50)
X[:,6] = np.random.randint(0, 2, size=50)
X[:,4] = np.random.randint(0, 2, size=50)
X[:,1] = np.random.randint(-10, 10, size=50) # not integers
count_ind = _isdummy(X)
assert_equal(count_ind, [4, 6])
def test_non_binary():
y = [1, 2, 1, 2, 1, 2]
X = np.random.randn(6, 2)
assert_raises(ValueError, Logit, y, X)
y = [0, 1, 0, 0, 1, 0.5]
assert_raises(ValueError, Probit, y, X)
def test_mnlogit_factor():
dta = sm.datasets.anes96.load_pandas()
dta['endog'] = dta.endog.replace(dict(zip(range(7), 'ABCDEFG')))
exog = sm.add_constant(dta.exog, prepend=True)
mod = sm.MNLogit(dta.endog, exog)
res = mod.fit(disp=0)
# smoke tests
params = res.params
summary = res.summary()
predicted = res.predict(exog.iloc[:5, :])
# check endog is series with no name #8672
endogn = dta['endog']
endogn.name = None
mod = sm.MNLogit(endogn, exog)
# with patsy
mod = smf.mnlogit('PID ~ ' + ' + '.join(dta.exog.columns), dta.data)
res2 = mod.fit(disp=0)
params_f = res2.params
summary = res2.summary()
assert_allclose(params_f, params, rtol=1e-10)
predicted_f = res2.predict(dta.exog.iloc[:5, :])
assert_allclose(predicted_f, predicted, rtol=1e-10)
def test_mnlogit_factor_categorical():
dta = sm.datasets.anes96.load_pandas()
dta['endog'] = dta.endog.replace(dict(zip(range(7), 'ABCDEFG')))
exog = sm.add_constant(dta.exog, prepend=True)
mod = sm.MNLogit(dta.endog, exog)
res = mod.fit(disp=0)
dta['endog'] = dta['endog'].astype('category')
mod = sm.MNLogit(dta.endog, exog)
res_cat = mod.fit(disp=0)
assert_allclose(res.params, res_cat.params)
def test_formula_missing_exposure():
# see 2083
d = {'Foo': [1, 2, 10, 149], 'Bar': [1, 2, 3, np.nan],
'constant': [1] * 4, 'exposure' : np.random.uniform(size=4),
'x': [1, 3, 2, 1.5]}
df = pd.DataFrame(d)
# should work
mod1 = smf.poisson('Foo ~ Bar', data=df, exposure=df['exposure'])
assert_(type(mod1.exposure) is np.ndarray, msg='Exposure is not ndarray')
# make sure this raises
exposure = pd.Series(np.random.uniform(size=5))
df.loc[3, 'Bar'] = 4 # nan not relevant for ValueError for shape mismatch
assert_raises(ValueError, sm.Poisson, df.Foo, df[['constant', 'Bar']],
exposure=exposure)
def test_predict_with_exposure():
# Case where CountModel.predict is called with exog = None and exposure
# or offset not-None
# See 3565
# Setup copied from test_formula_missing_exposure
import pandas as pd
d = {'Foo': [1, 2, 10, 149], 'Bar': [1, 2, 3, 4],
'constant': [1] * 4, 'exposure' : [np.exp(1)]*4,
'x': [1, 3, 2, 1.5]}
df = pd.DataFrame(d)
mod1 = CountModel.from_formula('Foo ~ Bar', data=df,
exposure=df['exposure'])
params = np.array([1, .4])
pred = mod1.predict(params, which="linear")
# No exposure is passed, so default to using mod1.exposure, which
# should have been logged
X = df[['constant', 'Bar']].values # mod1.exog
expected = np.dot(X, params) + 1
assert_allclose(pred, expected)
# The above should have passed without the current patch. The next
# test would fail under the old code
pred2 = mod1.predict(params, exposure=[np.exp(2)]*4, which="linear")
expected2 = expected + 1
assert_allclose(pred2, expected2)
def test_binary_pred_table_zeros():
# see 2968
nobs = 10
y = np.zeros(nobs)
y[[1,3]] = 1
res = Logit(y, np.ones(nobs)).fit(disp=0)
expected = np.array([[ 8., 0.], [ 2., 0.]])
assert_equal(res.pred_table(), expected)
res = MNLogit(y, np.ones(nobs)).fit(disp=0)
expected = np.array([[ 8., 0.], [ 2., 0.]])
assert_equal(res.pred_table(), expected)
class TestGeneralizedPoisson_p2:
# Test Generalized Poisson model
@classmethod
def setup_class(cls):
data = load_randhie()
data.exog = sm.add_constant(data.exog, prepend=False)
mod = GeneralizedPoisson(data.endog, data.exog, p=2)
cls.res1 = mod.fit(method='newton', disp=0)
res2 = RandHIE.generalizedpoisson_gp2
cls.res2 = res2
def test_bse(self):
assert_allclose(self.res1.bse, self.res2.bse, atol=1e-5)
def test_params(self):
assert_allclose(self.res1.params, self.res2.params, atol=1e-5)
def test_alpha(self):
assert_allclose(self.res1.lnalpha, self.res2.lnalpha)
assert_allclose(self.res1.lnalpha_std_err,
self.res2.lnalpha_std_err, atol=1e-5)
def test_conf_int(self):
assert_allclose(self.res1.conf_int(), self.res2.conf_int,
atol=1e-3)
def test_aic(self):
assert_allclose(self.res1.aic, self.res2.aic)
def test_bic(self):
assert_allclose(self.res1.bic, self.res2.bic)
def test_df(self):
assert_equal(self.res1.df_model, self.res2.df_model)
def test_llf(self):
assert_allclose(self.res1.llf, self.res2.llf)
def test_wald(self):
result = self.res1.wald_test(np.eye(len(self.res1.params))[:-2],
scalar=True)
assert_allclose(result.statistic, self.res2.wald_statistic)
assert_allclose(result.pvalue, self.res2.wald_pvalue, atol=1e-15)
def test_t(self):
unit_matrix = np.identity(self.res1.params.size)
t_test = self.res1.t_test(unit_matrix)
assert_allclose(self.res1.tvalues, t_test.tvalue)
def test_jac(self):
check_jac(self)
def test_distr(self):
check_distr(self.res1)
class TestGeneralizedPoisson_transparams:
# Test Generalized Poisson model
@classmethod
def setup_class(cls):
data = load_randhie()
data.exog = sm.add_constant(data.exog, prepend=False)
cls.res1 = GeneralizedPoisson(data.endog, data.exog, p=2).fit(
method='newton', disp=0)
res2 = RandHIE.generalizedpoisson_gp2
cls.res2 = res2
def test_bse(self):
assert_allclose(self.res1.bse, self.res2.bse, atol=1e-5)
def test_params(self):
assert_allclose(self.res1.params, self.res2.params, atol=1e-5)
def test_alpha(self):
assert_allclose(self.res1.lnalpha, self.res2.lnalpha)
assert_allclose(self.res1.lnalpha_std_err,
self.res2.lnalpha_std_err, atol=1e-5)
def test_conf_int(self):
assert_allclose(self.res1.conf_int(), self.res2.conf_int,
atol=1e-3)
def test_aic(self):
assert_allclose(self.res1.aic, self.res2.aic)
def test_bic(self):
assert_allclose(self.res1.bic, self.res2.bic)
def test_df(self):
assert_equal(self.res1.df_model, self.res2.df_model)
def test_llf(self):
assert_allclose(self.res1.llf, self.res2.llf)
class TestGeneralizedPoisson_p1:
# Test Generalized Poisson model
@classmethod
def setup_class(cls):
cls.data = load_randhie()
cls.data.exog = sm.add_constant(cls.data.exog, prepend=False)
cls.res1 = GeneralizedPoisson(
cls.data.endog, cls.data.exog, p=1).fit(method='newton', disp=0)
def test_llf(self):
poisson_llf = sm.Poisson(
self.data.endog, self.data.exog).loglike(
self.res1.params[:-1])
genpoisson_llf = sm.GeneralizedPoisson(
self.data.endog, self.data.exog, p=1).loglike(
list(self.res1.params[:-1]) + [0])
assert_allclose(genpoisson_llf, poisson_llf)
def test_score(self):
poisson_score = sm.Poisson(
self.data.endog, self.data.exog).score(
self.res1.params[:-1])
genpoisson_score = sm.GeneralizedPoisson(
self.data.endog, self.data.exog, p=1).score(
list(self.res1.params[:-1]) + [0])
assert_allclose(genpoisson_score[:-1], poisson_score, atol=1e-9)
def test_hessian(self):
poisson_score = sm.Poisson(
self.data.endog, self.data.exog).hessian(
self.res1.params[:-1])
genpoisson_score = sm.GeneralizedPoisson(
self.data.endog, self.data.exog, p=1).hessian(
list(self.res1.params[:-1]) + [0])
assert_allclose(genpoisson_score[:-1,:-1], poisson_score, atol=1e-10)
def test_t(self):
unit_matrix = np.identity(self.res1.params.size)
t_test = self.res1.t_test(unit_matrix)
assert_allclose(self.res1.tvalues, t_test.tvalue)
def test_fit_regularized(self):
model = self.res1.model
# do not penalize constant and dispersion parameter
alpha = np.ones(len(self.res1.params))
alpha[-2:] = 0
# the first prints currently a warning, irrelevant here
res_reg1 = model.fit_regularized(alpha=alpha*0.01, disp=0)
res_reg2 = model.fit_regularized(alpha=alpha*100, disp=0)
res_reg3 = model.fit_regularized(alpha=alpha*1000, disp=0)
assert_allclose(res_reg1.params, self.res1.params, atol=5e-5)
assert_allclose(res_reg1.bse, self.res1.bse, atol=1e-5)
# check shrinkage, regression numbers
assert_allclose((self.res1.params[:-2]**2).mean(),
0.016580955543320779, rtol=1e-5)
assert_allclose((res_reg1.params[:-2]**2).mean(),
0.016580734975068664, rtol=1e-5)
assert_allclose((res_reg2.params[:-2]**2).mean(),
0.010672558641545994, rtol=1e-5)
assert_allclose((res_reg3.params[:-2]**2).mean(),
0.00035544919793048415, rtol=1e-5)
def test_init_kwds(self):
kwds = self.res1.model._get_init_kwds()
assert_('p' in kwds)
assert_equal(kwds['p'], 1)
def test_distr(self):
check_distr(self.res1)
class TestGeneralizedPoisson_underdispersion:
@classmethod
def setup_class(cls):
cls.expected_params = [1, -0.5, -0.05]
np.random.seed(1234)
nobs = 200
exog = np.ones((nobs, 2))
exog[:nobs//2, 1] = 2
mu_true = np.exp(exog.dot(cls.expected_params[:-1]))
cls.endog = sm.distributions.genpoisson_p.rvs(mu_true,
cls.expected_params[-1], 1, size=len(mu_true))
model_gp = sm.GeneralizedPoisson(cls.endog, exog, p=1)
cls.res = model_gp.fit(method='nm', xtol=1e-6, maxiter=5000,
maxfun=5000, disp=0)
def test_basic(self):
res = self.res
endog = res.model.endog
# check random data generation, regression test
assert_allclose(endog.mean(), 1.42, rtol=1e-3)
assert_allclose(endog.var(), 1.2836, rtol=1e-3)
# check estimation
assert_allclose(res.params, self.expected_params, atol=0.07, rtol=0.1)
assert_(res.mle_retvals['converged'] is True)
assert_allclose(res.mle_retvals['fopt'], 1.418753161722015, rtol=0.01)
def test_newton(self):
# check newton optimization with start_params
res = self.res
res2 = res.model.fit(start_params=res.params, method='newton', disp=0)
assert_allclose(res.model.score(res.params),
np.zeros(len(res2.params)), atol=0.01)
assert_allclose(res.model.score(res2.params),
np.zeros(len(res2.params)), atol=1e-10)
assert_allclose(res.params, res2.params, atol=1e-4)
def test_mean_var(self):
assert_allclose(self.res.predict().mean(), self.endog.mean(),
atol=1e-1, rtol=1e-1)
assert_allclose(
self.res.predict().mean() * self.res._dispersion_factor.mean(),
self.endog.var(), atol=2e-1, rtol=2e-1)
def test_predict_prob(self):
res = self.res
endog = res.model.endog
freq = np.bincount(endog.astype(int))
pr = res.predict(which='prob')
pr2 = sm.distributions.genpoisson_p.pmf(np.arange(6)[:, None],
res.predict(), res.params[-1], 1).T
assert_allclose(pr, pr2, rtol=1e-10, atol=1e-10)
expected = pr.sum(0)
# add expected obs from right tail to last bin
expected[-1] += pr.shape[0] - expected.sum()
# scipy requires observed and expected add to the same at rtol=1e-8
assert_allclose(freq.sum(), expected.sum(), rtol=1e-13)
from scipy import stats
chi2 = stats.chisquare(freq, expected)
# numbers are regression test, we should not reject
assert_allclose(chi2[:], (0.5511787456691261, 0.9901293016678583),
rtol=0.01)
def test_jac(self):
check_jac(self, res=self.res)
def test_distr(self):
check_distr(self.res)
class TestNegativeBinomialPNB2Newton(CheckNegBinMixin, CheckModelResults):
@classmethod
def setup_class(cls):
data = load_randhie()
exog = sm.add_constant(data.exog, prepend=False)
mod = NegativeBinomialP(data.endog, exog, p=2)
cls.res1 = mod.fit(method='newton', disp=0)
res2 = RandHIE.negativebinomial_nb2_bfgs
cls.res2 = res2
#NOTE: The bse is much closer precitions to stata
def test_bse(self):
assert_allclose(self.res1.bse, self.res2.bse,
atol=1e-3, rtol=1e-3)
def test_params(self):
assert_allclose(self.res1.params, self.res2.params,
atol=1e-7)
def test_alpha(self):
self.res1.bse # attaches alpha_std_err
assert_allclose(self.res1.lnalpha, self.res2.lnalpha)
assert_allclose(self.res1.lnalpha_std_err,
self.res2.lnalpha_std_err,
atol=1e-7)
def test_conf_int(self):
assert_allclose(self.res1.conf_int(), self.res2.conf_int,
atol=1e-3, rtol=1e-3)
def test_zstat(self): # Low precision because Z vs. t
assert_allclose(self.res1.pvalues[:-1], self.res2.pvalues,
atol=5e-3, rtol=5e-3)
def test_fittedvalues(self):
assert_allclose(self.res1.fittedvalues[:10],
self.res2.fittedvalues[:10])
def test_predict(self):
assert_allclose(self.res1.predict()[:10],
np.exp(self.res2.fittedvalues[:10]))
def test_predict_xb(self):
assert_allclose(self.res1.predict(which='linear')[:10],
self.res2.fittedvalues[:10])
class TestNegativeBinomialPNB1Newton(CheckNegBinMixin, CheckModelResults):
@classmethod
def setup_class(cls):
data = load_randhie()
exog = sm.add_constant(data.exog, prepend=False)
mod = NegativeBinomialP(data.endog, exog, p=1)
cls.res1 = mod.fit(method="newton", maxiter=100, disp=0)
res2 = RandHIE.negativebinomial_nb1_bfgs
cls.res2 = res2
def test_zstat(self):
assert_allclose(self.res1.tvalues, self.res2.z,
atol=5e-3, rtol=5e-3)
def test_lnalpha(self):
self.res1.bse # attaches alpha_std_err
assert_allclose(self.res1.lnalpha, self.res2.lnalpha)
assert_allclose(self.res1.lnalpha_std_err,
self.res2.lnalpha_std_err)
def test_params(self):
assert_allclose(self.res1.params, self.res2.params)
def test_conf_int(self):
# the bse for alpha is not high precision from the hessian
# approximation
assert_allclose(self.res1.conf_int(), self.res2.conf_int,
atol=1e-3, rtol=1e-3)
def test_predict(self):
assert_allclose(self.res1.predict()[:10],
np.exp(self.res2.fittedvalues[:10]),
atol=1e-3, rtol=1e-3)
def test_predict_xb(self):
assert_allclose(self.res1.predict(which='linear')[:10],
self.res2.fittedvalues[:10],
atol=1e-3, rtol=1e-3)
class TestNegativeBinomialPNB2BFGS(CheckNegBinMixin, CheckModelResults):
@classmethod
def setup_class(cls):
data = load_randhie()
exog = sm.add_constant(data.exog, prepend=False)
cls.res1 = NegativeBinomialP(data.endog, exog, p=2).fit(
method='bfgs', disp=0,
maxiter=1000)
res2 = RandHIE.negativebinomial_nb2_bfgs
cls.res2 = res2
#NOTE: The bse is much closer precitions to stata
def test_bse(self):
assert_allclose(self.res1.bse, self.res2.bse,
atol=1e-3, rtol=1e-3)
def test_params(self):
assert_allclose(self.res1.params, self.res2.params,
atol=1e-3, rtol=1e-3)
def test_alpha(self):
self.res1.bse # attaches alpha_std_err
assert_allclose(self.res1.lnalpha, self.res2.lnalpha,
atol=1e-5, rtol=1e-5)
assert_allclose(self.res1.lnalpha_std_err,
self.res2.lnalpha_std_err,
atol=1e-5, rtol=1e-5)
def test_conf_int(self):
assert_allclose(self.res1.conf_int(), self.res2.conf_int,
atol=1e-3, rtol=1e-3)
def test_zstat(self): # Low precision because Z vs. t
assert_allclose(self.res1.pvalues[:-1], self.res2.pvalues,
atol=5e-3, rtol=5e-3)
def test_fittedvalues(self):
assert_allclose(self.res1.fittedvalues[:10],
self.res2.fittedvalues[:10],
atol=1e-4, rtol=1e-4)
def test_predict(self):
assert_allclose(self.res1.predict()[:10],
np.exp(self.res2.fittedvalues[:10]),
atol=1e-3, rtol=1e-3)
def test_predict_xb(self):
assert_allclose(self.res1.predict(which='linear')[:10],
self.res2.fittedvalues[:10],
atol=1e-3, rtol=1e-3)
class TestNegativeBinomialPNB1BFGS(CheckNegBinMixin, CheckModelResults):
@classmethod
def setup_class(cls):
data = load_randhie()
exog = sm.add_constant(data.exog, prepend=False)
cls.res1 = NegativeBinomialP(data.endog, exog, p=1).fit(method="bfgs",
maxiter=100,
disp=0)
res2 = RandHIE.negativebinomial_nb1_bfgs
cls.res2 = res2
def test_bse(self):
assert_allclose(self.res1.bse, self.res2.bse,
atol=5e-3, rtol=5e-3)
def test_aic(self):
assert_allclose(self.res1.aic, self.res2.aic,
atol=0.5, rtol=0.5)
def test_bic(self):
assert_allclose(self.res1.bic, self.res2.bic,
atol=0.5, rtol=0.5)
def test_llf(self):
assert_allclose(self.res1.llf, self.res2.llf,
atol=1e-3, rtol=1e-3)
def test_llr(self):
assert_allclose(self.res1.llf, self.res2.llf,
atol=1e-3, rtol=1e-3)
def test_zstat(self):
assert_allclose(self.res1.tvalues, self.res2.z,
atol=0.5, rtol=0.5)
def test_lnalpha(self):
assert_allclose(self.res1.lnalpha, self.res2.lnalpha,
atol=1e-3, rtol=1e-3)
assert_allclose(self.res1.lnalpha_std_err,
self.res2.lnalpha_std_err,
atol=1e-3, rtol=1e-3)
def test_params(self):
assert_allclose(self.res1.params, self.res2.params,
atol=5e-2, rtol=5e-2)
def test_conf_int(self):
# the bse for alpha is not high precision from the hessian
# approximation
assert_allclose(self.res1.conf_int(), self.res2.conf_int,
atol=5e-2, rtol=5e-2)
def test_predict(self):
assert_allclose(self.res1.predict()[:10],
np.exp(self.res2.fittedvalues[:10]),
atol=5e-3, rtol=5e-3)
def test_predict_xb(self):
assert_allclose(self.res1.predict(which='linear')[:10],
self.res2.fittedvalues[:10],
atol=5e-3, rtol=5e-3)
def test_init_kwds(self):
kwds = self.res1.model._get_init_kwds()
assert_('p' in kwds)
assert_equal(kwds['p'], 1)
class TestNegativeBinomialPL1Compatability(CheckL1Compatability):
@classmethod
def setup_class(cls):
cls.kvars = 10 # Number of variables
cls.m = 7 # Number of unregularized parameters
rand_data = load_randhie()
rand_data.endog = np.asarray(rand_data.endog)
rand_data.exog = np.asarray(rand_data.exog, dtype=float)
rand_exog = rand_data.exog.view(float).reshape(len(rand_data.exog), -1)
rand_exog_st = (rand_exog - rand_exog.mean(0)) / rand_exog.std(0)
rand_exog = sm.add_constant(rand_exog_st, prepend=True)
# Drop some columns and do an unregularized fit
exog_no_PSI = rand_exog[:, :cls.m]
mod_unreg = sm.NegativeBinomialP(rand_data.endog, exog_no_PSI)
cls.res_unreg = mod_unreg.fit(method="newton", disp=0)
# Do a regularized fit with alpha, effectively dropping the last column
alpha = 10 * len(rand_data.endog) * np.ones(cls.kvars + 1)
alpha[:cls.m] = 0
alpha[-1] = 0 # do not penalize alpha
mod_reg = sm.NegativeBinomialP(rand_data.endog, rand_exog)
cls.res_reg = mod_reg.fit_regularized(
method='l1', alpha=alpha, disp=False, acc=1e-10, maxiter=2000,
trim_mode='auto')
cls.k_extra = 1 # 1 extra parameter in nb2
class TestNegativeBinomialPPredictProb:
def test_predict_prob_p1(self):
expected_params = [1, -0.5]
np.random.seed(1234)
nobs = 200
exog = np.ones((nobs, 2))
exog[:nobs//2, 1] = 2
mu_true = np.exp(exog.dot(expected_params))
alpha = 0.05
size = 1. / alpha * mu_true
prob = size / (size + mu_true)
endog = nbinom.rvs(size, prob, size=len(mu_true))
res = sm.NegativeBinomialP(endog, exog).fit(disp=0)
mu = res.predict()
size = 1. / alpha * mu
prob = size / (size + mu)
probs = res.predict(which='prob')
assert_allclose(probs,
nbinom.pmf(np.arange(8)[:,None], size, prob).T,
atol=1e-2, rtol=1e-2)
probs_ex = res.predict(exog=exog[[0, -1]], which='prob')
assert_allclose(probs_ex, probs[[0, -1]], rtol=1e-10, atol=1e-15)
def test_predict_prob_p2(self):
expected_params = [1, -0.5]
np.random.seed(1234)
nobs = 200
exog = np.ones((nobs, 2))
exog[:nobs//2, 1] = 2
mu_true = np.exp(exog.dot(expected_params))
alpha = 0.05
size = 1. / alpha
prob = size / (size + mu_true)
endog = nbinom.rvs(size, prob, size=len(mu_true))
res = sm.NegativeBinomialP(endog, exog, p=2).fit(disp=0)
mu = res.predict()
size = 1. / alpha
prob = size / (size + mu)
assert_allclose(res.predict(which='prob'),
nbinom.pmf(np.arange(8)[:,None], size, prob).T,
atol=1e-2, rtol=1e-2)
class CheckNull:
@classmethod
def _get_data(cls):
x = np.array([ 20., 25., 30., 35., 40., 45., 50.])
nobs = len(x)
exog = np.column_stack((np.ones(nobs), x))
endog = np.array([ 469, 5516, 6854, 6837, 5952, 4066, 3242])
return endog, exog
def test_llnull(self):
res = self.model.fit(start_params=self.start_params, disp=0)
res._results._attach_nullmodel = True
llf0 = res.llnull
res_null0 = res.res_null
assert_allclose(llf0, res_null0.llf, rtol=1e-6)
res_null1 = self.res_null
assert_allclose(llf0, res_null1.llf, rtol=1e-6)
# Note default convergence tolerance does not get lower rtol
# from different starting values (using bfgs)
assert_allclose(res_null0.params, res_null1.params, rtol=5e-5)
class TestPoissonNull(CheckNull):
@classmethod
def setup_class(cls):
endog, exog = cls._get_data()
cls.model = Poisson(endog, exog)
cls.res_null = Poisson(endog, exog[:, 0]).fit(start_params=[8.5], disp=0)
# use start params to avoid warnings
cls.start_params = [8.5, 0]
class TestNegativeBinomialNB1Null(CheckNull):
@classmethod
def setup_class(cls):
endog, exog = cls._get_data()
cls.model = NegativeBinomial(endog, exog, loglike_method='nb1')
cls.model_null = NegativeBinomial(endog, exog[:, 0],
loglike_method='nb1')
cls.res_null = cls.model_null.fit(start_params=[8, 1000],
method='bfgs', gtol=1e-08,
maxiter=300, disp=0)
# for convergence with bfgs, I needed to round down alpha start_params
cls.start_params = np.array([7.730452, 2.01633068e-02, 1763.0])
class TestNegativeBinomialNB2Null(CheckNull):
@classmethod
def setup_class(cls):
endog, exog = cls._get_data()
cls.model = NegativeBinomial(endog, exog, loglike_method='nb2')
cls.model_null = NegativeBinomial(endog, exog[:, 0],
loglike_method='nb2')
cls.res_null = cls.model_null.fit(start_params=[8, 0.5],
method='bfgs', gtol=1e-06,
maxiter=300, disp=0)
cls.start_params = np.array([8.07216448, 0.01087238, 0.44024134])
class TestNegativeBinomialNBP2Null(CheckNull):
@classmethod
def setup_class(cls):
endog, exog = cls._get_data()
cls.model = NegativeBinomialP(endog, exog, p=2)
cls.model_null = NegativeBinomialP(endog, exog[:, 0], p=2)
cls.res_null = cls.model_null.fit(start_params=[8, 1],
method='bfgs', gtol=1e-06,
maxiter=300, disp=0)
cls.start_params = np.array([8.07216448, 0.01087238, 0.44024134])
def test_start_null(self):
endog, exog = self.model.endog, self.model.exog
model_nb2 = NegativeBinomial(endog, exog, loglike_method='nb2')
sp1 = model_nb2._get_start_params_null()
sp0 = self.model._get_start_params_null()
assert_allclose(sp0, sp1, rtol=1e-12)
class TestNegativeBinomialNBP1Null(CheckNull):
@classmethod
def setup_class(cls):
endog, exog = cls._get_data()
cls.model = NegativeBinomialP(endog, exog, p=1.)
cls.model_null = NegativeBinomialP(endog, exog[:, 0], p=1)
cls.res_null = cls.model_null.fit(start_params=[8, 1],
method='bfgs', gtol=1e-06,
maxiter=300, disp=0)
cls.start_params = np.array([7.730452, 2.01633068e-02, 1763.0])
def test_start_null(self):
endog, exog = self.model.endog, self.model.exog
model_nb2 = NegativeBinomial(endog, exog, loglike_method='nb1')
sp1 = model_nb2._get_start_params_null()
sp0 = self.model._get_start_params_null()
assert_allclose(sp0, sp1, rtol=1e-12)
class TestGeneralizedPoissonNull(CheckNull):
@classmethod
def setup_class(cls):
endog, exog = cls._get_data()
cls.model = GeneralizedPoisson(endog, exog, p=1.5)
cls.model_null = GeneralizedPoisson(endog, exog[:, 0], p=1.5)
cls.res_null = cls.model_null.fit(start_params=[8.4, 1],
method='bfgs', gtol=1e-08,
maxiter=300, disp=0)
cls.start_params = np.array([6.91127148, 0.04501334, 0.88393736])
def test_null_options():
# this is a "nice" case because we only check that options are used
# correctly
nobs = 10
exog = np.ones((20, 2))
exog[:nobs // 2, 1] = 0
mu = np.exp(exog.sum(1))
endog = np.random.poisson(mu) # Note no size=nobs in np.random
res = Poisson(endog, exog).fit(start_params=np.log([1, 1]), disp=0)
llnull0 = res.llnull
assert_(hasattr(res, 'res_llnull') is False)
res.set_null_options(attach_results=True)
# default optimization
lln = res.llnull # access to trigger computation
assert_allclose(res.res_null.mle_settings['start_params'],
np.log(endog.mean()), rtol=1e-10)
assert_equal(res.res_null.mle_settings['optimizer'], 'bfgs')
assert_allclose(lln, llnull0)
res.set_null_options(attach_results=True, start_params=[0.5], method='nm')
lln = res.llnull # access to trigger computation
assert_allclose(res.res_null.mle_settings['start_params'], [0.5],
rtol=1e-10)
assert_equal(res.res_null.mle_settings['optimizer'], 'nm')
res.summary() # call to fill cache
assert_('prsquared' in res._cache)
assert_equal(res._cache['llnull'], lln)
assert_('prsquared' in res._cache)
assert_equal(res._cache['llnull'], lln)
# check setting cache
res.set_null_options(llnull=999)
assert_('prsquared' not in res._cache)
assert_equal(res._cache['llnull'], 999)
def test_optim_kwds_prelim():
# test that fit options for preliminary fit is correctly transmitted
cur_dir = os.path.dirname(os.path.abspath(__file__))
filepath = os.path.join(cur_dir, "results", "sm3533.csv")
df = pd.read_csv(filepath)
features = ['pp']
X = (df[features] - df[features].mean())/df[features].std()
y = df['num'].values
exog = sm.add_constant(X[features].copy())
# offset=np.log(df['population'].values + 1)
# offset currently not used
offset = None
# we use "nm", "bfgs" does not work for Poisson/exp with older scipy
optim_kwds_prelim = dict(method='nm', maxiter=5000)
model = Poisson(y, exog, offset=offset) #
res_poi = model.fit(disp=0, **optim_kwds_prelim)
model = NegativeBinomial(y, exog, offset=offset)
res = model.fit(disp=0, optim_kwds_prelim=optim_kwds_prelim)
assert_allclose(res.mle_settings['start_params'][:-1], res_poi.params,
rtol=1e-4)
assert_equal(res.mle_settings['optim_kwds_prelim'], optim_kwds_prelim)
assert_allclose(res.predict().mean(), y.mean(), rtol=0.1)
# NBP22 and GPP p=1.5 also fail on older scipy with bfgs, use nm instead
optim_kwds_prelim = dict(method='nm', maxiter=5000)
model = NegativeBinomialP(y, exog, offset=offset, p=2)
res = model.fit(disp=0, optim_kwds_prelim=optim_kwds_prelim)
assert_allclose(res.mle_settings['start_params'][:-1], res_poi.params,
rtol=1e-4)
assert_equal(res.mle_settings['optim_kwds_prelim'], optim_kwds_prelim)
assert_allclose(res.predict().mean(), y.mean(), rtol=0.1)
# GPP with p=1.5 converges correctly,
# GPP fails when p=2 even with good start_params
model = GeneralizedPoisson(y, exog, offset=offset, p=1.5)
res = model.fit(disp=0, maxiter=200, optim_kwds_prelim=optim_kwds_prelim)
assert_allclose(res.mle_settings['start_params'][:-1], res_poi.params,
rtol=1e-4)
assert_equal(res.mle_settings['optim_kwds_prelim'], optim_kwds_prelim)
# rough check that convergence makes sense
assert_allclose(res.predict().mean(), y.mean(), rtol=0.1)
def test_unchanging_degrees_of_freedom():
data = load_randhie()
# see GH3734
warnings.simplefilter('error')
model = sm.NegativeBinomial(data.endog, data.exog, loglike_method='nb2')
params = np.array([-0.05654134, -0.21213734, 0.08783102, -0.02991825,
0.22902315, 0.06210253, 0.06799444, 0.08406794,
0.18530092, 1.36645186])
res1 = model.fit(start_params=params, disp=0)
assert_equal(res1.df_model, 8)
reg_params = np.array([-0.04854 , -0.15019404, 0.08363671, -0.03032834, 0.17592454,
0.06440753, 0.01584555, 0. , 0. , 1.36984628])
res2 = model.fit_regularized(alpha=100, start_params=reg_params, disp=0)
assert_(res2.df_model != 8)
# If res2.df_model == res1.df_model, then this test is invalid.
res3 = model.fit(start_params=params, disp=0)
# Test that the call to `fit_regularized` did not
# modify model.df_model inplace.
assert_equal(res3.df_model, res1.df_model)
assert_equal(res3.df_resid, res1.df_resid)
def test_mnlogit_float_name():
df = pd.DataFrame({"A": [0., 1.1, 0, 0, 1.1], "B": [0, 1, 0, 1, 1]})
with pytest.warns(SpecificationWarning,
match='endog contains values are that not int-like'):
result = smf.mnlogit(formula="A ~ B", data=df).fit()
summ = result.summary().as_text()
assert 'A=1.1' in summ
def test_cov_confint_pandas():
data = sm.datasets.anes96.load_pandas()
exog = sm.add_constant(data.exog, prepend=False)
res1 = sm.MNLogit(data.endog, exog).fit(method="newton", disp=0)
cov = res1.cov_params()
ci = res1.conf_int()
se = np.sqrt(np.diag(cov))
se2 = (ci.iloc[:, 1] - ci.iloc[:, 0]) / (2 * stats.norm.ppf(0.975))
assert_allclose(se, se2)
assert_index_equal(ci.index, cov.index)
assert_index_equal(cov.index, cov.columns)
assert isinstance(ci.index, pd.MultiIndex)
def test_mlogit_t_test():
# GH669, check t_test works in multivariate model
data = sm.datasets.anes96.load()
exog = sm.add_constant(data.exog, prepend=False)
res1 = sm.MNLogit(data.endog, exog).fit(disp=0)
r = np.ones(res1.cov_params().shape[0])
t1 = res1.t_test(r)
f1 = res1.f_test(r)
exog = sm.add_constant(data.exog, prepend=False)
endog, exog = np.asarray(data.endog), np.asarray(exog)
res2 = sm.MNLogit(endog, exog).fit(disp=0)
t2 = res2.t_test(r)
f2 = res2.f_test(r)
assert_allclose(t1.effect, t2.effect)
assert_allclose(f1.statistic, f2.statistic)
tt = res1.t_test(np.eye(np.size(res2.params)))
assert_allclose(tt.tvalue.reshape(6,6, order="F"), res1.tvalues.to_numpy())
tt = res2.t_test(np.eye(np.size(res2.params)))
assert_allclose(tt.tvalue.reshape(6,6, order="F"), res2.tvalues)
wt = res1.wald_test(np.eye(np.size(res2.params))[0], scalar=True)
assert_allclose(wt.pvalue, res1.pvalues.to_numpy()[0, 0])
tt = res1.t_test("y1_logpopul")
wt = res1.wald_test("y1_logpopul", scalar=True)
assert_allclose(tt.pvalue, wt.pvalue)
wt = res1.wald_test("y1_logpopul, y2_logpopul", scalar=True)
# regression test
assert_allclose(wt.statistic, 5.68660562, rtol=1e-8)