""" Tests for discrete models Notes ----- DECIMAL_3 is used because it seems that there is a loss of precision in the Stata *.dta -> *.csv output, NOT the estimator for the Poisson tests. """ # pylint: disable-msg=E1101 from statsmodels.compat.pandas import assert_index_equal import os import warnings import numpy as np from numpy.testing import ( assert_, assert_allclose, assert_almost_equal, assert_array_equal, assert_array_less, assert_equal, assert_raises, ) import pandas as pd import pytest from scipy import stats from scipy.stats import nbinom import statsmodels.api as sm from statsmodels.discrete.discrete_margins import _iscount, _isdummy from statsmodels.discrete.discrete_model import ( CountModel, GeneralizedPoisson, Logit, MNLogit, NegativeBinomial, NegativeBinomialP, Poisson, Probit, ) import statsmodels.formula.api as smf from statsmodels.tools.sm_exceptions import ( ConvergenceWarning, PerfectSeparationError, SpecificationWarning, ValueWarning, ) from .results.results_discrete import Anes, DiscreteL1, RandHIE, Spector try: import cvxopt # noqa:F401 has_cvxopt = True except ImportError: has_cvxopt = False DECIMAL_14 = 14 DECIMAL_10 = 10 DECIMAL_9 = 9 DECIMAL_4 = 4 DECIMAL_3 = 3 DECIMAL_2 = 2 DECIMAL_1 = 1 DECIMAL_0 = 0 def load_anes96(): data = sm.datasets.anes96.load() data.endog = np.asarray(data.endog) data.exog = np.asarray(data.exog) return data def load_spector(): data = sm.datasets.spector.load() data.endog = np.asarray(data.endog) data.exog = np.asarray(data.exog) return data def load_randhie(): data = sm.datasets.randhie.load() data.endog = np.asarray(data.endog) data.exog = np.asarray(data.exog, dtype=float) return data def check_jac(self, res=None): # moved from CheckModelResults if res is None: res1 = self.res1 else: res1 = res exog = res1.model.exog # basic cross check jacsum = res1.model.score_obs(res1.params).sum(0) score = res1.model.score(res1.params) assert_almost_equal(jacsum, score, DECIMAL_9) # Poisson has low precision ? if isinstance(res1.model, (NegativeBinomial, MNLogit)): # skip the rest return # check score_factor # TODO: change when score_obs uses score_factor for DRYing s1 = res1.model.score_obs(res1.params) sf = res1.model.score_factor(res1.params) if not isinstance(sf, tuple): s2 = sf[:, None] * exog else: sf0, sf1 = sf s2 = np.column_stack((sf0[:, None] * exog, sf1)) assert_allclose(s2, s1, rtol=1e-10) # check hessian_factor h1 = res1.model.hessian(res1.params) hf = res1.model.hessian_factor(res1.params) if not isinstance(hf, tuple): h2 = (hf * exog.T).dot(exog) else: hf0, hf1, hf2 = hf h00 = (hf0 * exog.T).dot(exog) h10 = np.atleast_2d(hf1.T.dot(exog)) h11 = np.atleast_2d(hf2.sum(0)) h2 = np.vstack((np.column_stack((h00, h10.T)), np.column_stack((h10, h11)))) assert_allclose(h2, h1, rtol=1e-10) def check_distr(res): distr = res.get_distribution() distr1 = res.model.get_distribution(res.params) m = res.predict() m2 = distr.mean() assert_allclose(m, np.squeeze(m2), rtol=1e-10) m2 = distr1.mean() assert_allclose(m, np.squeeze(m2), rtol=1e-10) v = res.predict(which="var") v2 = distr.var() assert_allclose(v, np.squeeze(v2), rtol=1e-10) class CheckModelMixin: # Assertions about the Model object, as opposed to the Results # Assumes that mixed-in class implements: # res1 def test_fit_regularized_invalid_method(self): # GH#5224 check we get ValueError when passing invalid "method" arg model = self.res1.model with pytest.raises(ValueError, match=r'is not supported, use either'): model.fit_regularized(method="foo") class CheckModelResults(CheckModelMixin): """ res2 should be the test results from RModelWrap or the results as defined in model_results_data """ def test_params(self): assert_almost_equal(self.res1.params, self.res2.params, DECIMAL_4) def test_conf_int(self): assert_allclose(self.res1.conf_int(), self.res2.conf_int, rtol=8e-5) def test_zstat(self): assert_almost_equal(self.res1.tvalues, self.res2.z, DECIMAL_4) def test_pvalues(self): assert_almost_equal(self.res1.pvalues, self.res2.pvalues, DECIMAL_4) def test_cov_params(self): if not hasattr(self.res2, "cov_params"): raise pytest.skip("TODO: implement res2.cov_params") assert_almost_equal(self.res1.cov_params(), self.res2.cov_params, DECIMAL_4) def test_llf(self): assert_almost_equal(self.res1.llf, self.res2.llf, DECIMAL_4) def test_llnull(self): assert_almost_equal(self.res1.llnull, self.res2.llnull, DECIMAL_4) def test_llr(self): assert_almost_equal(self.res1.llr, self.res2.llr, DECIMAL_3) def test_llr_pvalue(self): assert_almost_equal(self.res1.llr_pvalue, self.res2.llr_pvalue, DECIMAL_4) @pytest.mark.xfail(reason="Test has not been implemented for this class.", strict=True, raises=NotImplementedError) def test_normalized_cov_params(self): raise NotImplementedError def test_bse(self): assert_almost_equal(self.res1.bse, self.res2.bse, DECIMAL_4) def test_dof(self): assert_equal(self.res1.df_model, self.res2.df_model) assert_equal(self.res1.df_resid, self.res2.df_resid) def test_aic(self): assert_almost_equal(self.res1.aic, self.res2.aic, DECIMAL_3) def test_bic(self): assert_almost_equal(self.res1.bic, self.res2.bic, DECIMAL_3) def test_predict(self): assert_almost_equal(self.res1.model.predict(self.res1.params), self.res2.phat, DECIMAL_4) def test_predict_xb(self): assert_almost_equal(self.res1.model.predict(self.res1.params, which="linear"), self.res2.yhat, DECIMAL_4) def test_loglikeobs(self): #basic cross check llobssum = self.res1.model.loglikeobs(self.res1.params).sum() assert_almost_equal(llobssum, self.res1.llf, DECIMAL_14) def test_jac(self): check_jac(self) def test_summary_latex(self): # see #7747, last line of top table was dropped summ = self.res1.summary() ltx = summ.as_latex() n_lines = len(ltx.splitlines()) if not isinstance(self.res1.model, MNLogit): # skip MNLogit which creates several params tables assert n_lines == 19 + np.size(self.res1.params) assert "Covariance Type:" in ltx def test_distr(self): check_distr(self.res1) class CheckBinaryResults(CheckModelResults): def test_pred_table(self): assert_array_equal(self.res1.pred_table(), self.res2.pred_table) def test_resid_dev(self): assert_almost_equal(self.res1.resid_dev, self.res2.resid_dev, DECIMAL_4) def test_resid_generalized(self): assert_almost_equal(self.res1.resid_generalized, self.res2.resid_generalized, DECIMAL_4) @pytest.mark.smoke def test_resid_response(self): self.res1.resid_response class CheckMargEff: """ Test marginal effects (margeff) and its options """ def test_nodummy_dydxoverall(self): me = self.res1.get_margeff() assert_almost_equal(me.margeff, self.res2.margeff_nodummy_dydx, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_nodummy_dydx_se, DECIMAL_4) me_frame = me.summary_frame() eff = me_frame["dy/dx"].values assert_allclose(eff, me.margeff, rtol=1e-13) assert_equal(me_frame.shape, (me.margeff.size, 6)) def test_nodummy_dydxmean(self): me = self.res1.get_margeff(at='mean') assert_almost_equal(me.margeff, self.res2.margeff_nodummy_dydxmean, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_nodummy_dydxmean_se, DECIMAL_4) def test_nodummy_dydxmedian(self): me = self.res1.get_margeff(at='median') assert_almost_equal(me.margeff, self.res2.margeff_nodummy_dydxmedian, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_nodummy_dydxmedian_se, DECIMAL_4) def test_nodummy_dydxzero(self): me = self.res1.get_margeff(at='zero') assert_almost_equal(me.margeff, self.res2.margeff_nodummy_dydxzero, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_nodummy_dydxzero, DECIMAL_4) def test_nodummy_dyexoverall(self): me = self.res1.get_margeff(method='dyex') assert_almost_equal(me.margeff, self.res2.margeff_nodummy_dyex, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_nodummy_dyex_se, DECIMAL_4) def test_nodummy_dyexmean(self): me = self.res1.get_margeff(at='mean', method='dyex') assert_almost_equal(me.margeff, self.res2.margeff_nodummy_dyexmean, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_nodummy_dyexmean_se, DECIMAL_4) def test_nodummy_dyexmedian(self): me = self.res1.get_margeff(at='median', method='dyex') assert_almost_equal(me.margeff, self.res2.margeff_nodummy_dyexmedian, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_nodummy_dyexmedian_se, DECIMAL_4) def test_nodummy_dyexzero(self): me = self.res1.get_margeff(at='zero', method='dyex') assert_almost_equal(me.margeff, self.res2.margeff_nodummy_dyexzero, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_nodummy_dyexzero_se, DECIMAL_4) def test_nodummy_eydxoverall(self): me = self.res1.get_margeff(method='eydx') assert_almost_equal(me.margeff, self.res2.margeff_nodummy_eydx, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_nodummy_eydx_se, DECIMAL_4) def test_nodummy_eydxmean(self): me = self.res1.get_margeff(at='mean', method='eydx') assert_almost_equal(me.margeff, self.res2.margeff_nodummy_eydxmean, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_nodummy_eydxmean_se, DECIMAL_4) def test_nodummy_eydxmedian(self): me = self.res1.get_margeff(at='median', method='eydx') assert_almost_equal(me.margeff, self.res2.margeff_nodummy_eydxmedian, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_nodummy_eydxmedian_se, DECIMAL_4) def test_nodummy_eydxzero(self): me = self.res1.get_margeff(at='zero', method='eydx') assert_almost_equal(me.margeff, self.res2.margeff_nodummy_eydxzero, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_nodummy_eydxzero_se, DECIMAL_4) def test_nodummy_eyexoverall(self): me = self.res1.get_margeff(method='eyex') assert_almost_equal(me.margeff, self.res2.margeff_nodummy_eyex, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_nodummy_eyex_se, DECIMAL_4) def test_nodummy_eyexmean(self): me = self.res1.get_margeff(at='mean', method='eyex') assert_almost_equal(me.margeff, self.res2.margeff_nodummy_eyexmean, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_nodummy_eyexmean_se, DECIMAL_4) def test_nodummy_eyexmedian(self): me = self.res1.get_margeff(at='median', method='eyex') assert_almost_equal(me.margeff, self.res2.margeff_nodummy_eyexmedian, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_nodummy_eyexmedian_se, DECIMAL_4) def test_nodummy_eyexzero(self): me = self.res1.get_margeff(at='zero', method='eyex') assert_almost_equal(me.margeff, self.res2.margeff_nodummy_eyexzero, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_nodummy_eyexzero_se, DECIMAL_4) def test_dummy_dydxoverall(self): me = self.res1.get_margeff(dummy=True) assert_almost_equal(me.margeff, self.res2.margeff_dummy_dydx, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_dummy_dydx_se, DECIMAL_4) def test_dummy_dydxmean(self): me = self.res1.get_margeff(at='mean', dummy=True) assert_almost_equal(me.margeff, self.res2.margeff_dummy_dydxmean, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_dummy_dydxmean_se, DECIMAL_4) def test_dummy_eydxoverall(self): me = self.res1.get_margeff(method='eydx', dummy=True) assert_almost_equal(me.margeff, self.res2.margeff_dummy_eydx, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_dummy_eydx_se, DECIMAL_4) def test_dummy_eydxmean(self): me = self.res1.get_margeff(at='mean', method='eydx', dummy=True) assert_almost_equal(me.margeff, self.res2.margeff_dummy_eydxmean, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_dummy_eydxmean_se, DECIMAL_4) def test_count_dydxoverall(self): me = self.res1.get_margeff(count=True) assert_almost_equal(me.margeff, self.res2.margeff_count_dydx, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_count_dydx_se, DECIMAL_4) def test_count_dydxmean(self): me = self.res1.get_margeff(count=True, at='mean') assert_almost_equal(me.margeff, self.res2.margeff_count_dydxmean, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_count_dydxmean_se, DECIMAL_4) def test_count_dummy_dydxoverall(self): me = self.res1.get_margeff(count=True, dummy=True) assert_almost_equal(me.margeff, self.res2.margeff_count_dummy_dydxoverall, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_count_dummy_dydxoverall_se, DECIMAL_4) def test_count_dummy_dydxmean(self): me = self.res1.get_margeff(count=True, dummy=True, at='mean') assert_almost_equal(me.margeff, self.res2.margeff_count_dummy_dydxmean, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_count_dummy_dydxmean_se, DECIMAL_4) class TestProbitNewton(CheckBinaryResults): @classmethod def setup_class(cls): data = load_spector() data.exog = sm.add_constant(data.exog, prepend=False) cls.res1 = Probit(data.endog, data.exog).fit(method="newton", disp=0) res2 = Spector.probit cls.res2 = res2 def test_init_kwargs(self): endog = self.res1.model.endog exog = self.res1.model.exog z = np.ones(len(endog)) with pytest.warns(ValueWarning, match="unknown kwargs"): # unsupported keyword Probit(endog, exog, weights=z) class TestProbitBFGS(CheckBinaryResults): @classmethod def setup_class(cls): data = load_spector() data.exog = sm.add_constant(data.exog, prepend=False) cls.res1 = Probit(data.endog, data.exog).fit(method="bfgs", disp=0) res2 = Spector.probit cls.res2 = res2 class TestProbitNM(CheckBinaryResults): @classmethod def setup_class(cls): data = load_spector() data.exog = sm.add_constant(data.exog, prepend=False) res2 = Spector.probit cls.res2 = res2 cls.res1 = Probit(data.endog, data.exog).fit(method="nm", disp=0, maxiter=500) class TestProbitPowell(CheckBinaryResults): @classmethod def setup_class(cls): data = load_spector() data.exog = sm.add_constant(data.exog, prepend=False) res2 = Spector.probit cls.res2 = res2 cls.res1 = Probit(data.endog, data.exog).fit(method="powell", disp=0, ftol=1e-8) class TestProbitCG(CheckBinaryResults): @classmethod def setup_class(cls): data = load_spector() data.exog = sm.add_constant(data.exog, prepend=False) res2 = Spector.probit cls.res2 = res2 # fmin_cg fails to converge on some machines - reparameterize from statsmodels.tools.transform_model import StandardizeTransform transf = StandardizeTransform(data.exog) exog_st = transf(data.exog) res1_st = Probit(data.endog, exog_st).fit(method="cg", disp=0, maxiter=1000, gtol=1e-08) start_params = transf.transform_params(res1_st.params) assert_allclose(start_params, res2.params, rtol=1e-5, atol=1e-6) cls.res1 = Probit(data.endog, data.exog).fit(start_params=start_params, method="cg", maxiter=1000, gtol=1e-05, disp=0) assert_array_less(cls.res1.mle_retvals['fcalls'], 100) class TestProbitNCG(CheckBinaryResults): @classmethod def setup_class(cls): data = load_spector() data.exog = sm.add_constant(data.exog, prepend=False) res2 = Spector.probit cls.res2 = res2 cls.res1 = Probit(data.endog, data.exog).fit(method="ncg", disp=0, avextol=1e-8, warn_convergence=False) # converges close enough but warnflag is 2 for precision loss class TestProbitBasinhopping(CheckBinaryResults): @classmethod def setup_class(cls): data = load_spector() data.exog = sm.add_constant(data.exog, prepend=False) res2 = Spector.probit cls.res2 = res2 fit = Probit(data.endog, data.exog).fit np.random.seed(1) cls.res1 = fit(method="basinhopping", disp=0, niter=5, minimizer={'method' : 'L-BFGS-B', 'tol' : 1e-8}) class TestProbitMinimizeDefault(CheckBinaryResults): @classmethod def setup_class(cls): data = load_spector() data.exog = sm.add_constant(data.exog, prepend=False) res2 = Spector.probit cls.res2 = res2 fit = Probit(data.endog, data.exog).fit cls.res1 = fit(method="minimize", disp=0, niter=5, tol = 1e-8) class TestProbitMinimizeDogleg(CheckBinaryResults): @classmethod def setup_class(cls): data = load_spector() data.exog = sm.add_constant(data.exog, prepend=False) res2 = Spector.probit cls.res2 = res2 fit = Probit(data.endog, data.exog).fit cls.res1 = fit(method="minimize", disp=0, niter=5, tol = 1e-8, min_method = 'dogleg') class TestProbitMinimizeAdditionalOptions(CheckBinaryResults): @classmethod def setup_class(cls): data = load_spector() data.exog = sm.add_constant(data.exog, prepend=False) res2 = Spector.probit cls.res2 = res2 cls.res1 = Probit(data.endog, data.exog).fit(method="minimize", disp=0, maxiter=500, min_method='Nelder-Mead', xatol=1e-4, fatol=1e-4) class CheckLikelihoodModelL1: """ For testing results generated with L1 regularization """ def test_params(self): assert_almost_equal(self.res1.params, self.res2.params, DECIMAL_4) def test_conf_int(self): assert_almost_equal( self.res1.conf_int(), self.res2.conf_int, DECIMAL_4) def test_bse(self): assert_almost_equal(self.res1.bse, self.res2.bse, DECIMAL_4) def test_nnz_params(self): assert_almost_equal( self.res1.nnz_params, self.res2.nnz_params, DECIMAL_4) def test_aic(self): assert_almost_equal( self.res1.aic, self.res2.aic, DECIMAL_3) def test_bic(self): assert_almost_equal( self.res1.bic, self.res2.bic, DECIMAL_3) class TestProbitL1(CheckLikelihoodModelL1): @classmethod def setup_class(cls): data = load_spector() data.exog = sm.add_constant(data.exog, prepend=True) alpha = np.array([0.1, 0.2, 0.3, 10]) #/ data.exog.shape[0] cls.res1 = Probit(data.endog, data.exog).fit_regularized( method="l1", alpha=alpha, disp=0, trim_mode='auto', auto_trim_tol=0.02, acc=1e-10, maxiter=1000) res2 = DiscreteL1.probit cls.res2 = res2 def test_cov_params(self): assert_almost_equal( self.res1.cov_params(), self.res2.cov_params, DECIMAL_4) class TestMNLogitL1(CheckLikelihoodModelL1): @classmethod def setup_class(cls): anes_data = load_anes96() anes_exog = anes_data.exog anes_exog = sm.add_constant(anes_exog, prepend=False) mlogit_mod = sm.MNLogit(anes_data.endog, anes_exog) alpha = 10. * np.ones((mlogit_mod.J - 1, mlogit_mod.K)) #/ anes_exog.shape[0] alpha[-1,:] = 0 cls.res1 = mlogit_mod.fit_regularized( method='l1', alpha=alpha, trim_mode='auto', auto_trim_tol=0.02, acc=1e-10, disp=0) res2 = DiscreteL1.mnlogit cls.res2 = res2 class TestLogitL1(CheckLikelihoodModelL1): @classmethod def setup_class(cls): data = load_spector() data.exog = sm.add_constant(data.exog, prepend=True) cls.alpha = 3 * np.array([0., 1., 1., 1.]) #/ data.exog.shape[0] cls.res1 = Logit(data.endog, data.exog).fit_regularized( method="l1", alpha=cls.alpha, disp=0, trim_mode='size', size_trim_tol=1e-5, acc=1e-10, maxiter=1000) res2 = DiscreteL1.logit cls.res2 = res2 def test_cov_params(self): assert_almost_equal( self.res1.cov_params(), self.res2.cov_params, DECIMAL_4) @pytest.mark.skipif(not has_cvxopt, reason='Skipped test_cvxopt since cvxopt ' 'is not available') class TestCVXOPT: @classmethod def setup_class(cls): if not has_cvxopt: pytest.skip('Skipped test_cvxopt since cvxopt is not available') cls.data = sm.datasets.spector.load() cls.data.endog = np.asarray(cls.data.endog) cls.data.exog = np.asarray(cls.data.exog) cls.data.exog = sm.add_constant(cls.data.exog, prepend=True) def test_cvxopt_versus_slsqp(self): # Compares results from cvxopt to the standard slsqp self.alpha = 3. * np.array([0, 1, 1, 1.]) #/ self.data.endog.shape[0] res_slsqp = Logit(self.data.endog, self.data.exog).fit_regularized( method="l1", alpha=self.alpha, disp=0, acc=1e-10, maxiter=1000, trim_mode='auto') res_cvxopt = Logit(self.data.endog, self.data.exog).fit_regularized( method="l1_cvxopt_cp", alpha=self.alpha, disp=0, abstol=1e-10, trim_mode='auto', auto_trim_tol=0.01, maxiter=1000) assert_almost_equal(res_slsqp.params, res_cvxopt.params, DECIMAL_4) class TestSweepAlphaL1: @classmethod def setup_class(cls): data = load_spector() data.exog = sm.add_constant(data.exog, prepend=True) cls.model = Logit(data.endog, data.exog) cls.alphas = np.array( [[0.1, 0.1, 0.1, 0.1], [0.4, 0.4, 0.5, 0.5], [0.5, 0.5, 1, 1]]) #/ data.exog.shape[0] cls.res1 = DiscreteL1.sweep def test_sweep_alpha(self): for i in range(3): alpha = self.alphas[i, :] res2 = self.model.fit_regularized( method="l1", alpha=alpha, disp=0, acc=1e-10, trim_mode='off', maxiter=1000) assert_almost_equal(res2.params, self.res1.params[i], DECIMAL_4) class CheckL1Compatability: """ Tests compatability between l1 and unregularized by setting alpha such that certain parameters should be effectively unregularized, and others should be ignored by the model. """ def test_params(self): m = self.m assert_almost_equal( self.res_unreg.params[:m], self.res_reg.params[:m], DECIMAL_4) # The last entry should be close to zero # handle extra parameter of NegativeBinomial kvars = self.res_reg.model.exog.shape[1] assert_almost_equal(0, self.res_reg.params[m:kvars], DECIMAL_4) def test_cov_params(self): m = self.m # The restricted cov_params should be equal assert_almost_equal( self.res_unreg.cov_params()[:m, :m], self.res_reg.cov_params()[:m, :m], DECIMAL_1) def test_df(self): assert_equal(self.res_unreg.df_model, self.res_reg.df_model) assert_equal(self.res_unreg.df_resid, self.res_reg.df_resid) def test_t_test(self): m = self.m kvars = self.kvars # handle extra parameter of NegativeBinomial extra = getattr(self, 'k_extra', 0) t_unreg = self.res_unreg.t_test(np.eye(len(self.res_unreg.params))) t_reg = self.res_reg.t_test(np.eye(kvars + extra)) assert_almost_equal(t_unreg.effect[:m], t_reg.effect[:m], DECIMAL_3) assert_almost_equal(t_unreg.sd[:m], t_reg.sd[:m], DECIMAL_3) assert_almost_equal(np.nan, t_reg.sd[m]) assert_allclose(t_unreg.tvalue[:m], t_reg.tvalue[:m], atol=3e-3) assert_almost_equal(np.nan, t_reg.tvalue[m]) def test_f_test(self): m = self.m kvars = self.kvars # handle extra parameter of NegativeBinomial extra = getattr(self, 'k_extra', 0) f_unreg = self.res_unreg.f_test(np.eye(len(self.res_unreg.params))[:m]) f_reg = self.res_reg.f_test(np.eye(kvars + extra)[:m]) assert_allclose(f_unreg.fvalue, f_reg.fvalue, rtol=3e-5, atol=1e-3) assert_almost_equal(f_unreg.pvalue, f_reg.pvalue, DECIMAL_3) def test_bad_r_matrix(self): kvars = self.kvars assert_raises(ValueError, self.res_reg.f_test, np.eye(kvars) ) class TestPoissonL1Compatability(CheckL1Compatability): @classmethod def setup_class(cls): cls.kvars = 10 # Number of variables cls.m = 7 # Number of unregularized parameters rand_data = load_randhie() rand_exog = rand_data.exog.view(float).reshape(len(rand_data.exog), -1) rand_exog = sm.add_constant(rand_exog, prepend=True) # Drop some columns and do an unregularized fit exog_no_PSI = rand_exog[:, :cls.m] mod_unreg = sm.Poisson(rand_data.endog, exog_no_PSI) cls.res_unreg = mod_unreg.fit(method="newton", disp=False) # Do a regularized fit with alpha, effectively dropping the last column alpha = 10 * len(rand_data.endog) * np.ones(cls.kvars) alpha[:cls.m] = 0 cls.res_reg = sm.Poisson(rand_data.endog, rand_exog).fit_regularized( method='l1', alpha=alpha, disp=False, acc=1e-10, maxiter=2000, trim_mode='auto') class TestNegativeBinomialL1Compatability(CheckL1Compatability): @classmethod def setup_class(cls): cls.kvars = 10 # Number of variables cls.m = 7 # Number of unregularized parameters rand_data = load_randhie() rand_exog = rand_data.exog.view(float).reshape(len(rand_data.exog), -1) rand_exog_st = (rand_exog - rand_exog.mean(0)) / rand_exog.std(0) rand_exog = sm.add_constant(rand_exog_st, prepend=True) # Drop some columns and do an unregularized fit exog_no_PSI = rand_exog[:, :cls.m] mod_unreg = sm.NegativeBinomial(rand_data.endog, exog_no_PSI) cls.res_unreg = mod_unreg.fit(method="newton", disp=False) # Do a regularized fit with alpha, effectively dropping the last column alpha = 10 * len(rand_data.endog) * np.ones(cls.kvars + 1) alpha[:cls.m] = 0 alpha[-1] = 0 # do not penalize alpha mod_reg = sm.NegativeBinomial(rand_data.endog, rand_exog) cls.res_reg = mod_reg.fit_regularized( method='l1', alpha=alpha, disp=False, acc=1e-10, maxiter=2000, trim_mode='auto') cls.k_extra = 1 # 1 extra parameter in nb2 class TestNegativeBinomialGeoL1Compatability(CheckL1Compatability): @classmethod def setup_class(cls): cls.kvars = 10 # Number of variables cls.m = 7 # Number of unregularized parameters rand_data = load_randhie() rand_exog = rand_data.exog.view(float).reshape(len(rand_data.exog), -1) rand_exog = sm.add_constant(rand_exog, prepend=True) # Drop some columns and do an unregularized fit exog_no_PSI = rand_exog[:, :cls.m] mod_unreg = sm.NegativeBinomial(rand_data.endog, exog_no_PSI, loglike_method='geometric') cls.res_unreg = mod_unreg.fit(method="newton", disp=False) # Do a regularized fit with alpha, effectively dropping the last columns alpha = 10 * len(rand_data.endog) * np.ones(cls.kvars) alpha[:cls.m] = 0 mod_reg = sm.NegativeBinomial(rand_data.endog, rand_exog, loglike_method='geometric') cls.res_reg = mod_reg.fit_regularized( method='l1', alpha=alpha, disp=False, acc=1e-10, maxiter=2000, trim_mode='auto') assert_equal(mod_reg.loglike_method, 'geometric') class TestLogitL1Compatability(CheckL1Compatability): @classmethod def setup_class(cls): cls.kvars = 4 # Number of variables cls.m = 3 # Number of unregularized parameters data = load_spector() data.exog = sm.add_constant(data.exog, prepend=True) # Do a regularized fit with alpha, effectively dropping the last column alpha = np.array([0, 0, 0, 10]) cls.res_reg = Logit(data.endog, data.exog).fit_regularized( method="l1", alpha=alpha, disp=0, acc=1e-15, maxiter=2000, trim_mode='auto') # Actually drop the last columnand do an unregularized fit exog_no_PSI = data.exog[:, :cls.m] cls.res_unreg = Logit(data.endog, exog_no_PSI).fit(disp=0, tol=1e-15) class TestMNLogitL1Compatability(CheckL1Compatability): @classmethod def setup_class(cls): cls.kvars = 4 # Number of variables cls.m = 3 # Number of unregularized parameters data = load_spector() data.exog = sm.add_constant(data.exog, prepend=True) alpha = np.array([0, 0, 0, 10]) cls.res_reg = MNLogit(data.endog, data.exog).fit_regularized( method="l1", alpha=alpha, disp=0, acc=1e-15, maxiter=2000, trim_mode='auto') # Actually drop the last columnand do an unregularized fit exog_no_PSI = data.exog[:, :cls.m] cls.res_unreg = MNLogit(data.endog, exog_no_PSI).fit( disp=0, gtol=1e-15, method='bfgs', maxiter=1000) def test_t_test(self): m = self.m kvars = self.kvars t_unreg = self.res_unreg.t_test(np.eye(m)) t_reg = self.res_reg.t_test(np.eye(kvars)) assert_almost_equal(t_unreg.effect, t_reg.effect[:m], DECIMAL_3) assert_almost_equal(t_unreg.sd, t_reg.sd[:m], DECIMAL_3) assert_almost_equal(np.nan, t_reg.sd[m]) assert_almost_equal(t_unreg.tvalue, t_reg.tvalue[:m], DECIMAL_3) @pytest.mark.skip("Skipped test_f_test for MNLogit") def test_f_test(self): pass class TestProbitL1Compatability(CheckL1Compatability): @classmethod def setup_class(cls): cls.kvars = 4 # Number of variables cls.m = 3 # Number of unregularized parameters data = load_spector() data.exog = sm.add_constant(data.exog, prepend=True) alpha = np.array([0, 0, 0, 10]) cls.res_reg = Probit(data.endog, data.exog).fit_regularized( method="l1", alpha=alpha, disp=0, acc=1e-15, maxiter=2000, trim_mode='auto') # Actually drop the last columnand do an unregularized fit exog_no_PSI = data.exog[:, :cls.m] cls.res_unreg = Probit(data.endog, exog_no_PSI).fit(disp=0, tol=1e-15) class CompareL1: """ For checking results for l1 regularization. Assumes self.res1 and self.res2 are two legitimate models to be compared. """ def test_basic_results(self): assert_almost_equal(self.res1.params, self.res2.params, DECIMAL_4) assert_almost_equal(self.res1.cov_params(), self.res2.cov_params(), DECIMAL_4) assert_almost_equal(self.res1.conf_int(), self.res2.conf_int(), DECIMAL_4) assert_almost_equal(self.res1.pvalues, self.res2.pvalues, DECIMAL_4) assert_almost_equal(self.res1.pred_table(), self.res2.pred_table(), DECIMAL_4) assert_almost_equal(self.res1.bse, self.res2.bse, DECIMAL_4) assert_almost_equal(self.res1.llf, self.res2.llf, DECIMAL_4) assert_almost_equal(self.res1.aic, self.res2.aic, DECIMAL_4) assert_almost_equal(self.res1.bic, self.res2.bic, DECIMAL_4) assert_almost_equal(self.res1.pvalues, self.res2.pvalues, DECIMAL_4) assert_(self.res1.mle_retvals['converged'] is True) class CompareL11D(CompareL1): """ Check t and f tests. This only works for 1-d results """ def test_tests(self): restrictmat = np.eye(len(self.res1.params.ravel())) assert_almost_equal(self.res1.t_test(restrictmat).pvalue, self.res2.t_test(restrictmat).pvalue, DECIMAL_4) assert_almost_equal(self.res1.f_test(restrictmat).pvalue, self.res2.f_test(restrictmat).pvalue, DECIMAL_4) class TestL1AlphaZeroLogit(CompareL11D): # Compares l1 model with alpha = 0 to the unregularized model. @classmethod def setup_class(cls): data = load_spector() data.exog = sm.add_constant(data.exog, prepend=True) cls.res1 = Logit(data.endog, data.exog).fit_regularized( method="l1", alpha=0, disp=0, acc=1e-15, maxiter=1000, trim_mode='auto', auto_trim_tol=0.01) cls.res2 = Logit(data.endog, data.exog).fit(disp=0, tol=1e-15) def test_converged(self): res = self.res1.model.fit_regularized( method="l1", alpha=0, disp=0, acc=1e-15, maxiter=1, trim_mode='auto', auto_trim_tol=0.01) # see #2857 assert_(res.mle_retvals['converged'] is False) class TestL1AlphaZeroProbit(CompareL11D): # Compares l1 model with alpha = 0 to the unregularized model. @classmethod def setup_class(cls): data = load_spector() data.exog = sm.add_constant(data.exog, prepend=True) cls.res1 = Probit(data.endog, data.exog).fit_regularized( method="l1", alpha=0, disp=0, acc=1e-15, maxiter=1000, trim_mode='auto', auto_trim_tol=0.01) cls.res2 = Probit(data.endog, data.exog).fit(disp=0, tol=1e-15) class TestL1AlphaZeroMNLogit(CompareL1): @classmethod def setup_class(cls): data = load_anes96() data.exog = sm.add_constant(data.exog, prepend=False) cls.res1 = MNLogit(data.endog, data.exog).fit_regularized( method="l1", alpha=0, disp=0, acc=1e-15, maxiter=1000, trim_mode='auto', auto_trim_tol=0.01) cls.res2 = MNLogit(data.endog, data.exog).fit(disp=0, gtol=1e-15, method='bfgs', maxiter=1000) class TestLogitNewton(CheckBinaryResults, CheckMargEff): @classmethod def setup_class(cls): data = load_spector() data.exog = sm.add_constant(data.exog, prepend=False) cls.res1 = Logit(data.endog, data.exog).fit(method="newton", disp=0) res2 = Spector.logit cls.res2 = res2 def test_resid_pearson(self): assert_almost_equal(self.res1.resid_pearson, self.res2.resid_pearson, 5) def test_nodummy_exog1(self): me = self.res1.get_margeff(atexog={0 : 2.0, 2 : 1.}) assert_almost_equal(me.margeff, self.res2.margeff_nodummy_atexog1, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_nodummy_atexog1_se, DECIMAL_4) def test_nodummy_exog2(self): me = self.res1.get_margeff(atexog={1 : 21., 2 : 0}, at='mean') assert_almost_equal(me.margeff, self.res2.margeff_nodummy_atexog2, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_nodummy_atexog2_se, DECIMAL_4) def test_dummy_exog1(self): me = self.res1.get_margeff(atexog={0 : 2.0, 2 : 1.}, dummy=True) assert_almost_equal(me.margeff, self.res2.margeff_dummy_atexog1, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_dummy_atexog1_se, DECIMAL_4) def test_dummy_exog2(self): me = self.res1.get_margeff(atexog={1 : 21., 2 : 0}, at='mean', dummy=True) assert_almost_equal(me.margeff, self.res2.margeff_dummy_atexog2, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_dummy_atexog2_se, DECIMAL_4) def test_diagnostic(self): # Hosmer-Lemeshow # Stata 14: `estat gof, group(5) table` n_groups = 5 chi2 = 1.630883318257913 pvalue = 0.6524 df = 3 import statsmodels.stats.diagnostic_gen as dia fitted = self.res1.predict() en = self.res1.model.endog counts = np.column_stack((en, 1 - en)) expected = np.column_stack((fitted, 1 - fitted)) # replicate splits in Stata estat gof group_sizes = [7, 6, 7, 6, 6] indices = np.cumsum(group_sizes)[:-1] res = dia.test_chisquare_binning(counts, expected, sort_var=fitted, bins=indices, df=None) assert_allclose(res.statistic, chi2, rtol=1e-11) assert_equal(res.df, df) assert_allclose(res.pvalue, pvalue, atol=6e-5) assert_equal(res.freqs.shape, (n_groups, 2)) assert_equal(res.freqs.sum(1), group_sizes) class TestLogitNewtonPrepend(CheckMargEff): # same as previous version but adjusted for add_constant prepend=True # bug #3695 @classmethod def setup_class(cls): data = load_spector() data.exog = sm.add_constant(data.exog, prepend=True) cls.res1 = Logit(data.endog, data.exog).fit(method="newton", disp=0) res2 = Spector.logit cls.res2 = res2 cls.slice = np.roll(np.arange(len(cls.res1.params)), 1) #.astype(int) def test_resid_pearson(self): assert_almost_equal(self.res1.resid_pearson, self.res2.resid_pearson, 5) def test_nodummy_exog1(self): me = self.res1.get_margeff(atexog={1 : 2.0, 3 : 1.}) assert_almost_equal(me.margeff, self.res2.margeff_nodummy_atexog1, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_nodummy_atexog1_se, DECIMAL_4) def test_nodummy_exog2(self): me = self.res1.get_margeff(atexog={2 : 21., 3 : 0}, at='mean') assert_almost_equal(me.margeff, self.res2.margeff_nodummy_atexog2, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_nodummy_atexog2_se, DECIMAL_4) def test_dummy_exog1(self): me = self.res1.get_margeff(atexog={1 : 2.0, 3 : 1.}, dummy=True) assert_almost_equal(me.margeff, self.res2.margeff_dummy_atexog1, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_dummy_atexog1_se, DECIMAL_4) def test_dummy_exog2(self): me = self.res1.get_margeff(atexog={2 : 21., 3 : 0}, at='mean', dummy=True) assert_almost_equal(me.margeff, self.res2.margeff_dummy_atexog2, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_dummy_atexog2_se, DECIMAL_4) class TestLogitBFGS(CheckBinaryResults, CheckMargEff): @classmethod def setup_class(cls): data = load_spector() data.exog = sm.add_constant(data.exog, prepend=False) res2 = Spector.logit cls.res2 = res2 cls.res1 = Logit(data.endog, data.exog).fit(method="bfgs", disp=0) class TestPoissonNewton(CheckModelResults): @classmethod def setup_class(cls): data = load_randhie() exog = sm.add_constant(data.exog, prepend=False) cls.res1 = Poisson(data.endog, exog).fit(method='newton', disp=0) res2 = RandHIE.poisson cls.res2 = res2 def test_margeff_overall(self): me = self.res1.get_margeff() assert_almost_equal(me.margeff, self.res2.margeff_nodummy_overall, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_nodummy_overall_se, DECIMAL_4) def test_margeff_dummy_overall(self): me = self.res1.get_margeff(dummy=True) assert_almost_equal(me.margeff, self.res2.margeff_dummy_overall, DECIMAL_4) assert_almost_equal(me.margeff_se, self.res2.margeff_dummy_overall_se, DECIMAL_4) def test_resid(self): assert_almost_equal(self.res1.resid, self.res2.resid, 2) def test_predict_prob(self): cur_dir = os.path.dirname(os.path.abspath(__file__)) path = os.path.join(cur_dir, "results", "predict_prob_poisson.csv") probs_res = np.loadtxt(path, delimiter=",") # just check the first 100 obs. vs R to save memory probs = self.res1.predict_prob()[:100] assert_almost_equal(probs, probs_res, 8) @pytest.mark.xfail(reason="res2.cov_params is a zero-dim array of None", strict=True) def test_cov_params(self): super().test_cov_params() class CheckNegBinMixin: # Test methods shared by TestNegativeBinomialXYZ classes @pytest.mark.xfail(reason="pvalues do not match, in some cases wrong size", strict=True, raises=AssertionError) def test_pvalues(self): assert_almost_equal(self.res1.pvalues, self.res2.pvalues, DECIMAL_4) class TestNegativeBinomialNB2Newton(CheckNegBinMixin, CheckModelResults): @classmethod def setup_class(cls): data = load_randhie() exog = sm.add_constant(data.exog, prepend=False) cls.res1 = NegativeBinomial(data.endog, exog, 'nb2').fit(method='newton', disp=0) res2 = RandHIE.negativebinomial_nb2_bfgs cls.res2 = res2 #NOTE: The bse is much closer precitions to stata def test_bse(self): assert_almost_equal(self.res1.bse, self.res2.bse, DECIMAL_3) def test_params(self): assert_almost_equal(self.res1.params, self.res2.params, DECIMAL_4) def test_alpha(self): self.res1.bse # attaches alpha_std_err assert_almost_equal(self.res1.lnalpha, self.res2.lnalpha, DECIMAL_4) assert_almost_equal(self.res1.lnalpha_std_err, self.res2.lnalpha_std_err, DECIMAL_4) def test_conf_int(self): assert_almost_equal(self.res1.conf_int(), self.res2.conf_int, DECIMAL_3) def test_zstat(self): # Low precision because Z vs. t assert_almost_equal(self.res1.pvalues[:-1], self.res2.pvalues, DECIMAL_2) def test_fittedvalues(self): assert_almost_equal(self.res1.fittedvalues[:10], self.res2.fittedvalues[:10], DECIMAL_3) def test_predict(self): assert_almost_equal(self.res1.predict()[:10], np.exp(self.res2.fittedvalues[:10]), DECIMAL_3) def test_predict_xb(self): assert_almost_equal(self.res1.predict(which="linear")[:10], self.res2.fittedvalues[:10], DECIMAL_3) class TestNegativeBinomialNB1Newton(CheckNegBinMixin, CheckModelResults): @classmethod def setup_class(cls): data = load_randhie() exog = sm.add_constant(data.exog, prepend=False) model = NegativeBinomial(data.endog, exog, 'nb1') cls.res1 = model.fit(method="newton", maxiter=100, disp=0) res2 = RandHIE.negativebinomial_nb1_bfgs cls.res2 = res2 def test_zstat(self): assert_almost_equal(self.res1.tvalues, self.res2.z, DECIMAL_1) def test_lnalpha(self): self.res1.bse # attaches alpha_std_err assert_almost_equal(self.res1.lnalpha, self.res2.lnalpha, 3) assert_almost_equal(self.res1.lnalpha_std_err, self.res2.lnalpha_std_err, DECIMAL_4) def test_params(self): assert_almost_equal(self.res1.params, self.res2.params, DECIMAL_4) def test_conf_int(self): # the bse for alpha is not high precision from the hessian # approximation assert_almost_equal(self.res1.conf_int(), self.res2.conf_int, DECIMAL_2) @pytest.mark.xfail(reason="Test has not been implemented for this class.", strict=True, raises=NotImplementedError) def test_predict(self): raise NotImplementedError @pytest.mark.xfail(reason="Test has not been implemented for this class.", strict=True, raises=NotImplementedError) def test_predict_xb(self): raise NotImplementedError class TestNegativeBinomialNB2BFGS(CheckNegBinMixin, CheckModelResults): @classmethod def setup_class(cls): data = load_randhie() exog = sm.add_constant(data.exog, prepend=False) cls.res1 = NegativeBinomial(data.endog, exog, 'nb2').fit( method='bfgs', disp=0, maxiter=1000) res2 = RandHIE.negativebinomial_nb2_bfgs cls.res2 = res2 #NOTE: The bse is much closer precitions to stata def test_bse(self): assert_almost_equal(self.res1.bse, self.res2.bse, DECIMAL_3) def test_params(self): assert_almost_equal(self.res1.params, self.res2.params, DECIMAL_4) def test_alpha(self): self.res1.bse # attaches alpha_std_err assert_almost_equal(self.res1.lnalpha, self.res2.lnalpha, DECIMAL_4) assert_almost_equal(self.res1.lnalpha_std_err, self.res2.lnalpha_std_err, DECIMAL_4) def test_conf_int(self): assert_almost_equal(self.res1.conf_int(), self.res2.conf_int, DECIMAL_3) def test_zstat(self): # Low precision because Z vs. t assert_almost_equal(self.res1.pvalues[:-1], self.res2.pvalues, DECIMAL_2) def test_fittedvalues(self): assert_almost_equal(self.res1.fittedvalues[:10], self.res2.fittedvalues[:10], DECIMAL_3) def test_predict(self): assert_almost_equal(self.res1.predict()[:10], np.exp(self.res2.fittedvalues[:10]), DECIMAL_3) def test_predict_xb(self): assert_almost_equal(self.res1.predict(which="linear")[:10], self.res2.fittedvalues[:10], DECIMAL_3) class TestNegativeBinomialNB1BFGS(CheckNegBinMixin, CheckModelResults): @classmethod def setup_class(cls): data = load_randhie() exog = sm.add_constant(data.exog, prepend=False) cls.res1 = NegativeBinomial(data.endog, exog, 'nb1').fit(method="bfgs", maxiter=100, disp=0) res2 = RandHIE.negativebinomial_nb1_bfgs cls.res2 = res2 def test_zstat(self): assert_almost_equal(self.res1.tvalues, self.res2.z, DECIMAL_1) def test_lnalpha(self): self.res1.bse # attaches alpha_std_err assert_almost_equal(self.res1.lnalpha, self.res2.lnalpha, 3) assert_almost_equal(self.res1.lnalpha_std_err, self.res2.lnalpha_std_err, DECIMAL_4) def test_params(self): assert_almost_equal(self.res1.params, self.res2.params, DECIMAL_4) def test_conf_int(self): # the bse for alpha is not high precision from the hessian # approximation assert_almost_equal(self.res1.conf_int(), self.res2.conf_int, DECIMAL_2) @pytest.mark.xfail(reason="Test has not been implemented for this class.", strict=True, raises=NotImplementedError) def test_predict(self): raise NotImplementedError @pytest.mark.xfail(reason="Test has not been implemented for this class.", strict=True, raises=NotImplementedError) def test_predict_xb(self): raise NotImplementedError class TestNegativeBinomialGeometricBFGS(CheckNegBinMixin, CheckModelResults): # Cannot find another implementation of the geometric to cross-check results # we only test fitted values because geometric has fewer parameters # than nb1 and nb2 # and we want to make sure that predict() np.dot(exog, params) works @classmethod def setup_class(cls): data = load_randhie() exog = sm.add_constant(data.exog, prepend=False) mod = NegativeBinomial(data.endog, exog, 'geometric') cls.res1 = mod.fit(method='bfgs', disp=0) res2 = RandHIE.negativebinomial_geometric_bfgs cls.res2 = res2 # the following are regression tests, could be inherited instead def test_aic(self): assert_almost_equal(self.res1.aic, self.res2.aic, DECIMAL_3) def test_bic(self): assert_almost_equal(self.res1.bic, self.res2.bic, DECIMAL_3) def test_conf_int(self): assert_almost_equal(self.res1.conf_int(), self.res2.conf_int, DECIMAL_3) def test_fittedvalues(self): assert_almost_equal(self.res1.fittedvalues[:10], self.res2.fittedvalues[:10], DECIMAL_3) def test_predict(self): assert_almost_equal(self.res1.predict()[:10], np.exp(self.res2.fittedvalues[:10]), DECIMAL_3) def test_params(self): assert_almost_equal(self.res1.params, self.res2.params, DECIMAL_3) def test_predict_xb(self): assert_almost_equal(self.res1.predict(which="linear")[:10], self.res2.fittedvalues[:10], DECIMAL_3) def test_zstat(self): # Low precision because Z vs. t assert_almost_equal(self.res1.tvalues, self.res2.z, DECIMAL_1) def test_llf(self): assert_almost_equal(self.res1.llf, self.res2.llf, DECIMAL_1) def test_llr(self): assert_almost_equal(self.res1.llr, self.res2.llr, DECIMAL_2) def test_bse(self): assert_almost_equal(self.res1.bse, self.res2.bse, DECIMAL_3) class CheckMNLogitBaseZero(CheckModelResults): def test_margeff_overall(self): me = self.res1.get_margeff() assert_almost_equal(me.margeff, self.res2.margeff_dydx_overall, 6) assert_almost_equal(me.margeff_se, self.res2.margeff_dydx_overall_se, 6) me_frame = me.summary_frame() eff = me_frame["dy/dx"].values.reshape(me.margeff.shape, order="F") assert_allclose(eff, me.margeff, rtol=1e-13) assert_equal(me_frame.shape, (np.size(me.margeff), 6)) def test_margeff_mean(self): me = self.res1.get_margeff(at='mean') assert_almost_equal(me.margeff, self.res2.margeff_dydx_mean, 7) assert_almost_equal(me.margeff_se, self.res2.margeff_dydx_mean_se, 7) def test_margeff_dummy(self): data = self.data vote = data.data['vote'] exog = np.column_stack((data.exog, vote)) exog = sm.add_constant(exog, prepend=False) res = MNLogit(data.endog, exog).fit(method="newton", disp=0) me = res.get_margeff(dummy=True) assert_almost_equal(me.margeff, self.res2.margeff_dydx_dummy_overall, 6) assert_almost_equal(me.margeff_se, self.res2.margeff_dydx_dummy_overall_se, 6) me = res.get_margeff(dummy=True, method="eydx") assert_almost_equal(me.margeff, self.res2.margeff_eydx_dummy_overall, 5) assert_almost_equal(me.margeff_se, self.res2.margeff_eydx_dummy_overall_se, 6) def test_j(self): assert_equal(self.res1.model.J, self.res2.J) def test_k(self): assert_equal(self.res1.model.K, self.res2.K) def test_endog_names(self): assert_equal(self.res1._get_endog_name(None,None)[1], ['y=1', 'y=2', 'y=3', 'y=4', 'y=5', 'y=6']) def test_pred_table(self): # fitted results taken from gretl pred = [6, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 6, 0, 1, 6, 0, 0, 1, 1, 6, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 6, 0, 0, 6, 6, 0, 0, 1, 1, 6, 1, 6, 0, 0, 0, 1, 0, 1, 0, 0, 0, 6, 0, 0, 6, 0, 0, 0, 1, 1, 0, 0, 6, 6, 6, 6, 1, 0, 5, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 6, 0, 6, 6, 1, 0, 1, 1, 6, 5, 1, 0, 0, 0, 5, 0, 0, 6, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 6, 6, 6, 6, 5, 0, 1, 1, 0, 1, 0, 6, 6, 0, 0, 0, 6, 0, 0, 0, 6, 6, 0, 5, 1, 0, 0, 0, 0, 6, 0, 5, 6, 6, 0, 0, 0, 0, 6, 1, 0, 0, 1, 0, 1, 6, 1, 1, 1, 1, 1, 0, 0, 0, 6, 0, 5, 1, 0, 6, 6, 6, 0, 0, 0, 0, 1, 6, 6, 0, 0, 0, 1, 1, 5, 6, 0, 6, 1, 0, 0, 1, 6, 0, 0, 1, 0, 6, 6, 0, 5, 6, 6, 0, 0, 6, 1, 0, 6, 0, 1, 0, 1, 6, 0, 1, 1, 1, 6, 0, 5, 0, 0, 6, 1, 0, 6, 5, 5, 0, 6, 1, 1, 1, 0, 0, 6, 0, 0, 5, 0, 0, 6, 6, 6, 6, 6, 0, 1, 0, 0, 6, 6, 0, 0, 1, 6, 0, 0, 6, 1, 6, 1, 1, 1, 0, 1, 6, 5, 0, 0, 1, 5, 0, 1, 6, 6, 1, 0, 0, 1, 6, 1, 5, 6, 1, 0, 0, 1, 1, 0, 6, 1, 6, 0, 1, 1, 5, 6, 6, 5, 1, 1, 1, 0, 6, 1, 6, 1, 0, 1, 0, 0, 1, 5, 0, 1, 1, 0, 5, 6, 0, 5, 1, 1, 6, 5, 0, 6, 0, 0, 0, 0, 0, 0, 1, 6, 1, 0, 5, 1, 0, 0, 1, 6, 0, 0, 6, 6, 6, 0, 2, 1, 6, 5, 6, 1, 1, 0, 5, 1, 1, 1, 6, 1, 6, 6, 5, 6, 0, 1, 0, 1, 6, 0, 6, 1, 6, 0, 0, 6, 1, 0, 6, 1, 0, 0, 0, 0, 6, 6, 6, 6, 5, 6, 6, 0, 0, 6, 1, 1, 6, 0, 0, 6, 6, 0, 6, 6, 0, 0, 6, 0, 0, 6, 6, 6, 1, 0, 6, 0, 0, 0, 6, 1, 1, 0, 1, 5, 0, 0, 5, 0, 0, 0, 1, 1, 6, 1, 0, 0, 0, 6, 6, 1, 1, 6, 5, 5, 0, 6, 6, 0, 1, 1, 0, 6, 6, 0, 6, 5, 5, 6, 5, 1, 0, 6, 0, 6, 1, 0, 1, 6, 6, 6, 1, 0, 6, 0, 5, 6, 6, 5, 0, 5, 1, 0, 6, 0, 6, 1, 5, 5, 0, 1, 5, 5, 2, 6, 6, 6, 5, 0, 0, 1, 6, 1, 0, 1, 6, 1, 0, 0, 1, 5, 6, 6, 0, 0, 0, 5, 6, 6, 6, 1, 5, 6, 1, 0, 0, 6, 5, 0, 1, 1, 1, 6, 6, 0, 1, 0, 0, 0, 5, 0, 0, 6, 1, 6, 0, 6, 1, 5, 5, 6, 5, 0, 0, 0, 0, 1, 1, 0, 5, 5, 0, 0, 0, 0, 1, 0, 6, 6, 1, 1, 6, 6, 0, 5, 5, 0, 0, 0, 6, 6, 1, 6, 0, 0, 5, 0, 1, 6, 5, 6, 6, 5, 5, 6, 6, 1, 0, 1, 6, 6, 1, 6, 0, 6, 0, 6, 5, 0, 6, 6, 0, 5, 6, 0, 6, 6, 5, 0, 1, 6, 6, 1, 0, 1, 0, 6, 6, 1, 0, 6, 6, 6, 0, 1, 6, 0, 1, 5, 1, 1, 5, 6, 6, 0, 1, 6, 6, 1, 5, 0, 5, 0, 6, 0, 1, 6, 1, 0, 6, 1, 6, 0, 6, 1, 0, 0, 0, 6, 6, 0, 1, 1, 6, 6, 6, 1, 6, 0, 5, 6, 0, 5, 6, 6, 5, 5, 5, 6, 0, 6, 0, 0, 0, 5, 0, 6, 1, 2, 6, 6, 6, 5, 1, 6, 0, 6, 0, 0, 0, 0, 6, 5, 0, 5, 1, 6, 5, 1, 6, 5, 1, 1, 0, 0, 6, 1, 1, 5, 6, 6, 0, 5, 2, 5, 5, 0, 5, 5, 5, 6, 5, 6, 6, 5, 2, 6, 5, 6, 0, 0, 6, 5, 0, 6, 0, 0, 6, 6, 6, 0, 5, 1, 1, 6, 6, 5, 2, 1, 6, 5, 6, 0, 6, 6, 1, 1, 5, 1, 6, 6, 6, 0, 0, 6, 1, 0, 5, 5, 1, 5, 6, 1, 6, 0, 1, 6, 5, 0, 0, 6, 1, 5, 1, 0, 6, 0, 6, 6, 5, 5, 6, 6, 6, 6, 2, 6, 6, 6, 5, 5, 5, 0, 1, 0, 0, 0, 6, 6, 1, 0, 6, 6, 6, 6, 6, 1, 0, 6, 1, 5, 5, 6, 6, 6, 6, 6, 5, 6, 1, 6, 2, 5, 5, 6, 5, 6, 6, 5, 6, 6, 5, 5, 6, 1, 5, 1, 6, 0, 2, 5, 0, 5, 0, 2, 1, 6, 0, 0, 6, 6, 1, 6, 0, 5, 5, 6, 6, 1, 6, 6, 6, 5, 6, 6, 1, 6, 5, 6, 1, 1, 0, 6, 6, 5, 1, 0, 0, 6, 6, 5, 6, 0, 1, 6, 0, 5, 6, 5, 2, 5, 2, 0, 0, 1, 6, 6, 1, 5, 6, 6, 0, 6, 6, 6, 6, 6, 5] assert_array_equal(self.res1.predict().argmax(1), pred) # the rows should add up for pred table assert_array_equal(self.res1.pred_table().sum(0), np.bincount(pred)) # note this is just a regression test, gretl does not have a prediction # table pred = [[ 126., 41., 2., 0., 0., 12., 19.], [ 77., 73., 3., 0., 0., 15., 12.], [ 37., 43., 2., 0., 0., 19., 7.], [ 12., 9., 1., 0., 0., 9., 6.], [ 19., 10., 2., 0., 0., 20., 43.], [ 22., 25., 1., 0., 0., 31., 71.], [ 9., 7., 1., 0., 0., 18., 140.]] assert_array_equal(self.res1.pred_table(), pred) def test_resid(self): assert_array_equal(self.res1.resid_misclassified, self.res2.resid) @pytest.mark.xfail(reason="res2.cov_params is a zero-dim array of None", strict=True) def test_cov_params(self): super().test_cov_params() @pytest.mark.xfail(reason="Test has not been implemented for this class.", strict=True, raises=NotImplementedError) def test_distr(self): super().test_distr() class TestMNLogitNewtonBaseZero(CheckMNLogitBaseZero): @classmethod def setup_class(cls): cls.data = data = load_anes96() exog = data.exog exog = sm.add_constant(exog, prepend=False) cls.res1 = MNLogit(data.endog, exog).fit(method="newton", disp=0) res2 = Anes.mnlogit_basezero cls.res2 = res2 class TestMNLogitLBFGSBaseZero(CheckMNLogitBaseZero): @classmethod def setup_class(cls): cls.data = data = load_anes96() exog = data.exog exog = sm.add_constant(exog, prepend=False) mymodel = MNLogit(data.endog, exog) cls.res1 = mymodel.fit(method="lbfgs", disp=0, maxiter=50000, #m=12, pgtol=1e-7, factr=1e3, # 5 failures #m=20, pgtol=1e-8, factr=1e2, # 3 failures #m=30, pgtol=1e-9, factr=1e1, # 1 failure m=40, pgtol=1e-10, factr=5e0, loglike_and_score=mymodel.loglike_and_score) res2 = Anes.mnlogit_basezero cls.res2 = res2 def test_mnlogit_basinhopping(): def callb(*args): return 1 x = np.random.randint(0, 100, 1000) y = np.random.randint(0, 3, 1000) model = MNLogit(y, sm.add_constant(x)) # smoke tests for basinhopping and callback #8665 model.fit(method='basinhopping') model.fit(method='basinhopping', callback=callb) def test_perfect_prediction(): cur_dir = os.path.dirname(os.path.abspath(__file__)) iris_dir = os.path.join(cur_dir, '..', '..', 'genmod', 'tests', 'results') iris_dir = os.path.abspath(iris_dir) iris = np.genfromtxt(os.path.join(iris_dir, 'iris.csv'), delimiter=",", skip_header=1) y = iris[:, -1] X = iris[:, :-1] X = X[y != 2] y = y[y != 2] X = sm.add_constant(X, prepend=True) mod = Logit(y, X) mod.raise_on_perfect_prediction = True assert_raises(PerfectSeparationError, mod.fit, maxiter=1000) # turn off raise PerfectSeparationError mod.raise_on_perfect_prediction = False # this will raise if you set maxiter high enough with a singular matrix with pytest.warns(ConvergenceWarning): res = mod.fit(disp=False, maxiter=50) # should not raise but does warn assert_(not res.mle_retvals['converged']) # The following does not warn but message in summary() mod.fit(method="bfgs", disp=False, maxiter=50) def test_poisson_predict(): #GH: 175, make sure poisson predict works without offset and exposure data = load_randhie() exog = sm.add_constant(data.exog, prepend=True) res = sm.Poisson(data.endog, exog).fit(method='newton', disp=0) pred1 = res.predict() pred2 = res.predict(exog) assert_almost_equal(pred1, pred2) #exta options pred3 = res.predict(exog, offset=0, exposure=1) assert_almost_equal(pred1, pred3) pred3 = res.predict(exog, offset=0, exposure=2) assert_almost_equal(2*pred1, pred3) pred3 = res.predict(exog, offset=np.log(2), exposure=1) assert_almost_equal(2*pred1, pred3) def test_poisson_newton(): #GH: 24, Newton does not work well sometimes nobs = 10000 np.random.seed(987689) x = np.random.randn(nobs, 3) x = sm.add_constant(x, prepend=True) y_count = np.random.poisson(np.exp(x.sum(1))) mod = sm.Poisson(y_count, x) # this is not thread-safe with pytest.warns(ConvergenceWarning): res = mod.fit(start_params=-np.ones(4), method='newton', disp=0) assert_(not res.mle_retvals['converged']) def test_issue_339(): # make sure MNLogit summary works for J != K. data = load_anes96() exog = data.exog # leave out last exog column exog = exog[:,:-1] exog = sm.add_constant(exog, prepend=True) res1 = sm.MNLogit(data.endog, exog).fit(method="newton", disp=0) # strip the header from the test smry = "\n".join(res1.summary().as_text().split('\n')[9:]) cur_dir = os.path.dirname(os.path.abspath(__file__)) test_case_file = os.path.join(cur_dir, 'results', 'mn_logit_summary.txt') with open(test_case_file, encoding="utf-8") as fd: test_case = fd.read() np.testing.assert_equal(smry, test_case[:-1]) # smoke test for summary2 res1.summary2() # see #3651 def test_issue_341(): data = load_anes96() exog = data.exog # leave out last exog column exog = exog[:,:-1] exog = sm.add_constant(exog, prepend=True) res1 = sm.MNLogit(data.endog, exog).fit(method="newton", disp=0) x = exog[0] np.testing.assert_equal(res1.predict(x).shape, (1,7)) np.testing.assert_equal(res1.predict(x[None]).shape, (1,7)) def test_negative_binomial_default_alpha_param(): with pytest.warns(UserWarning, match='Negative binomial' ' dispersion parameter alpha not set'): sm.families.NegativeBinomial() with pytest.warns(UserWarning, match='Negative binomial' ' dispersion parameter alpha not set'): sm.families.NegativeBinomial(link=sm.families.links.nbinom(alpha=1.0)) with warnings.catch_warnings(): warnings.simplefilter("error") sm.families.NegativeBinomial(alpha=1.0) with pytest.warns(FutureWarning): sm.families.NegativeBinomial(link=sm.families.links.nbinom(alpha=1.0), alpha=1.0) def test_iscount(): X = np.random.random((50, 10)) X[:,2] = np.random.randint(1, 10, size=50) X[:,6] = np.random.randint(1, 10, size=50) X[:,4] = np.random.randint(0, 2, size=50) X[:,1] = np.random.randint(-10, 10, size=50) # not integers count_ind = _iscount(X) assert_equal(count_ind, [2, 6]) def test_isdummy(): X = np.random.random((50, 10)) X[:,2] = np.random.randint(1, 10, size=50) X[:,6] = np.random.randint(0, 2, size=50) X[:,4] = np.random.randint(0, 2, size=50) X[:,1] = np.random.randint(-10, 10, size=50) # not integers count_ind = _isdummy(X) assert_equal(count_ind, [4, 6]) def test_non_binary(): y = [1, 2, 1, 2, 1, 2] X = np.random.randn(6, 2) assert_raises(ValueError, Logit, y, X) y = [0, 1, 0, 0, 1, 0.5] assert_raises(ValueError, Probit, y, X) def test_mnlogit_factor(): dta = sm.datasets.anes96.load_pandas() dta['endog'] = dta.endog.replace(dict(zip(range(7), 'ABCDEFG'))) exog = sm.add_constant(dta.exog, prepend=True) mod = sm.MNLogit(dta.endog, exog) res = mod.fit(disp=0) # smoke tests params = res.params summary = res.summary() predicted = res.predict(exog.iloc[:5, :]) # check endog is series with no name #8672 endogn = dta['endog'] endogn.name = None mod = sm.MNLogit(endogn, exog) # with patsy mod = smf.mnlogit('PID ~ ' + ' + '.join(dta.exog.columns), dta.data) res2 = mod.fit(disp=0) params_f = res2.params summary = res2.summary() assert_allclose(params_f, params, rtol=1e-10) predicted_f = res2.predict(dta.exog.iloc[:5, :]) assert_allclose(predicted_f, predicted, rtol=1e-10) def test_mnlogit_factor_categorical(): dta = sm.datasets.anes96.load_pandas() dta['endog'] = dta.endog.replace(dict(zip(range(7), 'ABCDEFG'))) exog = sm.add_constant(dta.exog, prepend=True) mod = sm.MNLogit(dta.endog, exog) res = mod.fit(disp=0) dta['endog'] = dta['endog'].astype('category') mod = sm.MNLogit(dta.endog, exog) res_cat = mod.fit(disp=0) assert_allclose(res.params, res_cat.params) def test_formula_missing_exposure(): # see 2083 d = {'Foo': [1, 2, 10, 149], 'Bar': [1, 2, 3, np.nan], 'constant': [1] * 4, 'exposure' : np.random.uniform(size=4), 'x': [1, 3, 2, 1.5]} df = pd.DataFrame(d) # should work mod1 = smf.poisson('Foo ~ Bar', data=df, exposure=df['exposure']) assert_(type(mod1.exposure) is np.ndarray, msg='Exposure is not ndarray') # make sure this raises exposure = pd.Series(np.random.uniform(size=5)) df.loc[3, 'Bar'] = 4 # nan not relevant for ValueError for shape mismatch assert_raises(ValueError, sm.Poisson, df.Foo, df[['constant', 'Bar']], exposure=exposure) def test_predict_with_exposure(): # Case where CountModel.predict is called with exog = None and exposure # or offset not-None # See 3565 # Setup copied from test_formula_missing_exposure import pandas as pd d = {'Foo': [1, 2, 10, 149], 'Bar': [1, 2, 3, 4], 'constant': [1] * 4, 'exposure' : [np.exp(1)]*4, 'x': [1, 3, 2, 1.5]} df = pd.DataFrame(d) mod1 = CountModel.from_formula('Foo ~ Bar', data=df, exposure=df['exposure']) params = np.array([1, .4]) pred = mod1.predict(params, which="linear") # No exposure is passed, so default to using mod1.exposure, which # should have been logged X = df[['constant', 'Bar']].values # mod1.exog expected = np.dot(X, params) + 1 assert_allclose(pred, expected) # The above should have passed without the current patch. The next # test would fail under the old code pred2 = mod1.predict(params, exposure=[np.exp(2)]*4, which="linear") expected2 = expected + 1 assert_allclose(pred2, expected2) def test_binary_pred_table_zeros(): # see 2968 nobs = 10 y = np.zeros(nobs) y[[1,3]] = 1 res = Logit(y, np.ones(nobs)).fit(disp=0) expected = np.array([[ 8., 0.], [ 2., 0.]]) assert_equal(res.pred_table(), expected) res = MNLogit(y, np.ones(nobs)).fit(disp=0) expected = np.array([[ 8., 0.], [ 2., 0.]]) assert_equal(res.pred_table(), expected) class TestGeneralizedPoisson_p2: # Test Generalized Poisson model @classmethod def setup_class(cls): data = load_randhie() data.exog = sm.add_constant(data.exog, prepend=False) mod = GeneralizedPoisson(data.endog, data.exog, p=2) cls.res1 = mod.fit(method='newton', disp=0) res2 = RandHIE.generalizedpoisson_gp2 cls.res2 = res2 def test_bse(self): assert_allclose(self.res1.bse, self.res2.bse, atol=1e-5) def test_params(self): assert_allclose(self.res1.params, self.res2.params, atol=1e-5) def test_alpha(self): assert_allclose(self.res1.lnalpha, self.res2.lnalpha) assert_allclose(self.res1.lnalpha_std_err, self.res2.lnalpha_std_err, atol=1e-5) def test_conf_int(self): assert_allclose(self.res1.conf_int(), self.res2.conf_int, atol=1e-3) def test_aic(self): assert_allclose(self.res1.aic, self.res2.aic) def test_bic(self): assert_allclose(self.res1.bic, self.res2.bic) def test_df(self): assert_equal(self.res1.df_model, self.res2.df_model) def test_llf(self): assert_allclose(self.res1.llf, self.res2.llf) def test_wald(self): result = self.res1.wald_test(np.eye(len(self.res1.params))[:-2], scalar=True) assert_allclose(result.statistic, self.res2.wald_statistic) assert_allclose(result.pvalue, self.res2.wald_pvalue, atol=1e-15) def test_t(self): unit_matrix = np.identity(self.res1.params.size) t_test = self.res1.t_test(unit_matrix) assert_allclose(self.res1.tvalues, t_test.tvalue) def test_jac(self): check_jac(self) def test_distr(self): check_distr(self.res1) class TestGeneralizedPoisson_transparams: # Test Generalized Poisson model @classmethod def setup_class(cls): data = load_randhie() data.exog = sm.add_constant(data.exog, prepend=False) cls.res1 = GeneralizedPoisson(data.endog, data.exog, p=2).fit( method='newton', disp=0) res2 = RandHIE.generalizedpoisson_gp2 cls.res2 = res2 def test_bse(self): assert_allclose(self.res1.bse, self.res2.bse, atol=1e-5) def test_params(self): assert_allclose(self.res1.params, self.res2.params, atol=1e-5) def test_alpha(self): assert_allclose(self.res1.lnalpha, self.res2.lnalpha) assert_allclose(self.res1.lnalpha_std_err, self.res2.lnalpha_std_err, atol=1e-5) def test_conf_int(self): assert_allclose(self.res1.conf_int(), self.res2.conf_int, atol=1e-3) def test_aic(self): assert_allclose(self.res1.aic, self.res2.aic) def test_bic(self): assert_allclose(self.res1.bic, self.res2.bic) def test_df(self): assert_equal(self.res1.df_model, self.res2.df_model) def test_llf(self): assert_allclose(self.res1.llf, self.res2.llf) class TestGeneralizedPoisson_p1: # Test Generalized Poisson model @classmethod def setup_class(cls): cls.data = load_randhie() cls.data.exog = sm.add_constant(cls.data.exog, prepend=False) cls.res1 = GeneralizedPoisson( cls.data.endog, cls.data.exog, p=1).fit(method='newton', disp=0) def test_llf(self): poisson_llf = sm.Poisson( self.data.endog, self.data.exog).loglike( self.res1.params[:-1]) genpoisson_llf = sm.GeneralizedPoisson( self.data.endog, self.data.exog, p=1).loglike( list(self.res1.params[:-1]) + [0]) assert_allclose(genpoisson_llf, poisson_llf) def test_score(self): poisson_score = sm.Poisson( self.data.endog, self.data.exog).score( self.res1.params[:-1]) genpoisson_score = sm.GeneralizedPoisson( self.data.endog, self.data.exog, p=1).score( list(self.res1.params[:-1]) + [0]) assert_allclose(genpoisson_score[:-1], poisson_score, atol=1e-9) def test_hessian(self): poisson_score = sm.Poisson( self.data.endog, self.data.exog).hessian( self.res1.params[:-1]) genpoisson_score = sm.GeneralizedPoisson( self.data.endog, self.data.exog, p=1).hessian( list(self.res1.params[:-1]) + [0]) assert_allclose(genpoisson_score[:-1,:-1], poisson_score, atol=1e-10) def test_t(self): unit_matrix = np.identity(self.res1.params.size) t_test = self.res1.t_test(unit_matrix) assert_allclose(self.res1.tvalues, t_test.tvalue) def test_fit_regularized(self): model = self.res1.model # do not penalize constant and dispersion parameter alpha = np.ones(len(self.res1.params)) alpha[-2:] = 0 # the first prints currently a warning, irrelevant here res_reg1 = model.fit_regularized(alpha=alpha*0.01, disp=0) res_reg2 = model.fit_regularized(alpha=alpha*100, disp=0) res_reg3 = model.fit_regularized(alpha=alpha*1000, disp=0) assert_allclose(res_reg1.params, self.res1.params, atol=5e-5) assert_allclose(res_reg1.bse, self.res1.bse, atol=1e-5) # check shrinkage, regression numbers assert_allclose((self.res1.params[:-2]**2).mean(), 0.016580955543320779, rtol=1e-5) assert_allclose((res_reg1.params[:-2]**2).mean(), 0.016580734975068664, rtol=1e-5) assert_allclose((res_reg2.params[:-2]**2).mean(), 0.010672558641545994, rtol=1e-5) assert_allclose((res_reg3.params[:-2]**2).mean(), 0.00035544919793048415, rtol=1e-5) def test_init_kwds(self): kwds = self.res1.model._get_init_kwds() assert_('p' in kwds) assert_equal(kwds['p'], 1) def test_distr(self): check_distr(self.res1) class TestGeneralizedPoisson_underdispersion: @classmethod def setup_class(cls): cls.expected_params = [1, -0.5, -0.05] np.random.seed(1234) nobs = 200 exog = np.ones((nobs, 2)) exog[:nobs//2, 1] = 2 mu_true = np.exp(exog.dot(cls.expected_params[:-1])) cls.endog = sm.distributions.genpoisson_p.rvs(mu_true, cls.expected_params[-1], 1, size=len(mu_true)) model_gp = sm.GeneralizedPoisson(cls.endog, exog, p=1) cls.res = model_gp.fit(method='nm', xtol=1e-6, maxiter=5000, maxfun=5000, disp=0) def test_basic(self): res = self.res endog = res.model.endog # check random data generation, regression test assert_allclose(endog.mean(), 1.42, rtol=1e-3) assert_allclose(endog.var(), 1.2836, rtol=1e-3) # check estimation assert_allclose(res.params, self.expected_params, atol=0.07, rtol=0.1) assert_(res.mle_retvals['converged'] is True) assert_allclose(res.mle_retvals['fopt'], 1.418753161722015, rtol=0.01) def test_newton(self): # check newton optimization with start_params res = self.res res2 = res.model.fit(start_params=res.params, method='newton', disp=0) assert_allclose(res.model.score(res.params), np.zeros(len(res2.params)), atol=0.01) assert_allclose(res.model.score(res2.params), np.zeros(len(res2.params)), atol=1e-10) assert_allclose(res.params, res2.params, atol=1e-4) def test_mean_var(self): assert_allclose(self.res.predict().mean(), self.endog.mean(), atol=1e-1, rtol=1e-1) assert_allclose( self.res.predict().mean() * self.res._dispersion_factor.mean(), self.endog.var(), atol=2e-1, rtol=2e-1) def test_predict_prob(self): res = self.res endog = res.model.endog freq = np.bincount(endog.astype(int)) pr = res.predict(which='prob') pr2 = sm.distributions.genpoisson_p.pmf(np.arange(6)[:, None], res.predict(), res.params[-1], 1).T assert_allclose(pr, pr2, rtol=1e-10, atol=1e-10) expected = pr.sum(0) # add expected obs from right tail to last bin expected[-1] += pr.shape[0] - expected.sum() # scipy requires observed and expected add to the same at rtol=1e-8 assert_allclose(freq.sum(), expected.sum(), rtol=1e-13) from scipy import stats chi2 = stats.chisquare(freq, expected) # numbers are regression test, we should not reject assert_allclose(chi2[:], (0.5511787456691261, 0.9901293016678583), rtol=0.01) def test_jac(self): check_jac(self, res=self.res) def test_distr(self): check_distr(self.res) class TestNegativeBinomialPNB2Newton(CheckNegBinMixin, CheckModelResults): @classmethod def setup_class(cls): data = load_randhie() exog = sm.add_constant(data.exog, prepend=False) mod = NegativeBinomialP(data.endog, exog, p=2) cls.res1 = mod.fit(method='newton', disp=0) res2 = RandHIE.negativebinomial_nb2_bfgs cls.res2 = res2 #NOTE: The bse is much closer precitions to stata def test_bse(self): assert_allclose(self.res1.bse, self.res2.bse, atol=1e-3, rtol=1e-3) def test_params(self): assert_allclose(self.res1.params, self.res2.params, atol=1e-7) def test_alpha(self): self.res1.bse # attaches alpha_std_err assert_allclose(self.res1.lnalpha, self.res2.lnalpha) assert_allclose(self.res1.lnalpha_std_err, self.res2.lnalpha_std_err, atol=1e-7) def test_conf_int(self): assert_allclose(self.res1.conf_int(), self.res2.conf_int, atol=1e-3, rtol=1e-3) def test_zstat(self): # Low precision because Z vs. t assert_allclose(self.res1.pvalues[:-1], self.res2.pvalues, atol=5e-3, rtol=5e-3) def test_fittedvalues(self): assert_allclose(self.res1.fittedvalues[:10], self.res2.fittedvalues[:10]) def test_predict(self): assert_allclose(self.res1.predict()[:10], np.exp(self.res2.fittedvalues[:10])) def test_predict_xb(self): assert_allclose(self.res1.predict(which='linear')[:10], self.res2.fittedvalues[:10]) class TestNegativeBinomialPNB1Newton(CheckNegBinMixin, CheckModelResults): @classmethod def setup_class(cls): data = load_randhie() exog = sm.add_constant(data.exog, prepend=False) mod = NegativeBinomialP(data.endog, exog, p=1) cls.res1 = mod.fit(method="newton", maxiter=100, disp=0) res2 = RandHIE.negativebinomial_nb1_bfgs cls.res2 = res2 def test_zstat(self): assert_allclose(self.res1.tvalues, self.res2.z, atol=5e-3, rtol=5e-3) def test_lnalpha(self): self.res1.bse # attaches alpha_std_err assert_allclose(self.res1.lnalpha, self.res2.lnalpha) assert_allclose(self.res1.lnalpha_std_err, self.res2.lnalpha_std_err) def test_params(self): assert_allclose(self.res1.params, self.res2.params) def test_conf_int(self): # the bse for alpha is not high precision from the hessian # approximation assert_allclose(self.res1.conf_int(), self.res2.conf_int, atol=1e-3, rtol=1e-3) def test_predict(self): assert_allclose(self.res1.predict()[:10], np.exp(self.res2.fittedvalues[:10]), atol=1e-3, rtol=1e-3) def test_predict_xb(self): assert_allclose(self.res1.predict(which='linear')[:10], self.res2.fittedvalues[:10], atol=1e-3, rtol=1e-3) class TestNegativeBinomialPNB2BFGS(CheckNegBinMixin, CheckModelResults): @classmethod def setup_class(cls): data = load_randhie() exog = sm.add_constant(data.exog, prepend=False) cls.res1 = NegativeBinomialP(data.endog, exog, p=2).fit( method='bfgs', disp=0, maxiter=1000) res2 = RandHIE.negativebinomial_nb2_bfgs cls.res2 = res2 #NOTE: The bse is much closer precitions to stata def test_bse(self): assert_allclose(self.res1.bse, self.res2.bse, atol=1e-3, rtol=1e-3) def test_params(self): assert_allclose(self.res1.params, self.res2.params, atol=1e-3, rtol=1e-3) def test_alpha(self): self.res1.bse # attaches alpha_std_err assert_allclose(self.res1.lnalpha, self.res2.lnalpha, atol=1e-5, rtol=1e-5) assert_allclose(self.res1.lnalpha_std_err, self.res2.lnalpha_std_err, atol=1e-5, rtol=1e-5) def test_conf_int(self): assert_allclose(self.res1.conf_int(), self.res2.conf_int, atol=1e-3, rtol=1e-3) def test_zstat(self): # Low precision because Z vs. t assert_allclose(self.res1.pvalues[:-1], self.res2.pvalues, atol=5e-3, rtol=5e-3) def test_fittedvalues(self): assert_allclose(self.res1.fittedvalues[:10], self.res2.fittedvalues[:10], atol=1e-4, rtol=1e-4) def test_predict(self): assert_allclose(self.res1.predict()[:10], np.exp(self.res2.fittedvalues[:10]), atol=1e-3, rtol=1e-3) def test_predict_xb(self): assert_allclose(self.res1.predict(which='linear')[:10], self.res2.fittedvalues[:10], atol=1e-3, rtol=1e-3) class TestNegativeBinomialPNB1BFGS(CheckNegBinMixin, CheckModelResults): @classmethod def setup_class(cls): data = load_randhie() exog = sm.add_constant(data.exog, prepend=False) cls.res1 = NegativeBinomialP(data.endog, exog, p=1).fit(method="bfgs", maxiter=100, disp=0) res2 = RandHIE.negativebinomial_nb1_bfgs cls.res2 = res2 def test_bse(self): assert_allclose(self.res1.bse, self.res2.bse, atol=5e-3, rtol=5e-3) def test_aic(self): assert_allclose(self.res1.aic, self.res2.aic, atol=0.5, rtol=0.5) def test_bic(self): assert_allclose(self.res1.bic, self.res2.bic, atol=0.5, rtol=0.5) def test_llf(self): assert_allclose(self.res1.llf, self.res2.llf, atol=1e-3, rtol=1e-3) def test_llr(self): assert_allclose(self.res1.llf, self.res2.llf, atol=1e-3, rtol=1e-3) def test_zstat(self): assert_allclose(self.res1.tvalues, self.res2.z, atol=0.5, rtol=0.5) def test_lnalpha(self): assert_allclose(self.res1.lnalpha, self.res2.lnalpha, atol=1e-3, rtol=1e-3) assert_allclose(self.res1.lnalpha_std_err, self.res2.lnalpha_std_err, atol=1e-3, rtol=1e-3) def test_params(self): assert_allclose(self.res1.params, self.res2.params, atol=5e-2, rtol=5e-2) def test_conf_int(self): # the bse for alpha is not high precision from the hessian # approximation assert_allclose(self.res1.conf_int(), self.res2.conf_int, atol=5e-2, rtol=5e-2) def test_predict(self): assert_allclose(self.res1.predict()[:10], np.exp(self.res2.fittedvalues[:10]), atol=5e-3, rtol=5e-3) def test_predict_xb(self): assert_allclose(self.res1.predict(which='linear')[:10], self.res2.fittedvalues[:10], atol=5e-3, rtol=5e-3) def test_init_kwds(self): kwds = self.res1.model._get_init_kwds() assert_('p' in kwds) assert_equal(kwds['p'], 1) class TestNegativeBinomialPL1Compatability(CheckL1Compatability): @classmethod def setup_class(cls): cls.kvars = 10 # Number of variables cls.m = 7 # Number of unregularized parameters rand_data = load_randhie() rand_data.endog = np.asarray(rand_data.endog) rand_data.exog = np.asarray(rand_data.exog, dtype=float) rand_exog = rand_data.exog.view(float).reshape(len(rand_data.exog), -1) rand_exog_st = (rand_exog - rand_exog.mean(0)) / rand_exog.std(0) rand_exog = sm.add_constant(rand_exog_st, prepend=True) # Drop some columns and do an unregularized fit exog_no_PSI = rand_exog[:, :cls.m] mod_unreg = sm.NegativeBinomialP(rand_data.endog, exog_no_PSI) cls.res_unreg = mod_unreg.fit(method="newton", disp=0) # Do a regularized fit with alpha, effectively dropping the last column alpha = 10 * len(rand_data.endog) * np.ones(cls.kvars + 1) alpha[:cls.m] = 0 alpha[-1] = 0 # do not penalize alpha mod_reg = sm.NegativeBinomialP(rand_data.endog, rand_exog) cls.res_reg = mod_reg.fit_regularized( method='l1', alpha=alpha, disp=False, acc=1e-10, maxiter=2000, trim_mode='auto') cls.k_extra = 1 # 1 extra parameter in nb2 class TestNegativeBinomialPPredictProb: def test_predict_prob_p1(self): expected_params = [1, -0.5] np.random.seed(1234) nobs = 200 exog = np.ones((nobs, 2)) exog[:nobs//2, 1] = 2 mu_true = np.exp(exog.dot(expected_params)) alpha = 0.05 size = 1. / alpha * mu_true prob = size / (size + mu_true) endog = nbinom.rvs(size, prob, size=len(mu_true)) res = sm.NegativeBinomialP(endog, exog).fit(disp=0) mu = res.predict() size = 1. / alpha * mu prob = size / (size + mu) probs = res.predict(which='prob') assert_allclose(probs, nbinom.pmf(np.arange(8)[:,None], size, prob).T, atol=1e-2, rtol=1e-2) probs_ex = res.predict(exog=exog[[0, -1]], which='prob') assert_allclose(probs_ex, probs[[0, -1]], rtol=1e-10, atol=1e-15) def test_predict_prob_p2(self): expected_params = [1, -0.5] np.random.seed(1234) nobs = 200 exog = np.ones((nobs, 2)) exog[:nobs//2, 1] = 2 mu_true = np.exp(exog.dot(expected_params)) alpha = 0.05 size = 1. / alpha prob = size / (size + mu_true) endog = nbinom.rvs(size, prob, size=len(mu_true)) res = sm.NegativeBinomialP(endog, exog, p=2).fit(disp=0) mu = res.predict() size = 1. / alpha prob = size / (size + mu) assert_allclose(res.predict(which='prob'), nbinom.pmf(np.arange(8)[:,None], size, prob).T, atol=1e-2, rtol=1e-2) class CheckNull: @classmethod def _get_data(cls): x = np.array([ 20., 25., 30., 35., 40., 45., 50.]) nobs = len(x) exog = np.column_stack((np.ones(nobs), x)) endog = np.array([ 469, 5516, 6854, 6837, 5952, 4066, 3242]) return endog, exog def test_llnull(self): res = self.model.fit(start_params=self.start_params, disp=0) res._results._attach_nullmodel = True llf0 = res.llnull res_null0 = res.res_null assert_allclose(llf0, res_null0.llf, rtol=1e-6) res_null1 = self.res_null assert_allclose(llf0, res_null1.llf, rtol=1e-6) # Note default convergence tolerance does not get lower rtol # from different starting values (using bfgs) assert_allclose(res_null0.params, res_null1.params, rtol=5e-5) class TestPoissonNull(CheckNull): @classmethod def setup_class(cls): endog, exog = cls._get_data() cls.model = Poisson(endog, exog) cls.res_null = Poisson(endog, exog[:, 0]).fit(start_params=[8.5], disp=0) # use start params to avoid warnings cls.start_params = [8.5, 0] class TestNegativeBinomialNB1Null(CheckNull): @classmethod def setup_class(cls): endog, exog = cls._get_data() cls.model = NegativeBinomial(endog, exog, loglike_method='nb1') cls.model_null = NegativeBinomial(endog, exog[:, 0], loglike_method='nb1') cls.res_null = cls.model_null.fit(start_params=[8, 1000], method='bfgs', gtol=1e-08, maxiter=300, disp=0) # for convergence with bfgs, I needed to round down alpha start_params cls.start_params = np.array([7.730452, 2.01633068e-02, 1763.0]) class TestNegativeBinomialNB2Null(CheckNull): @classmethod def setup_class(cls): endog, exog = cls._get_data() cls.model = NegativeBinomial(endog, exog, loglike_method='nb2') cls.model_null = NegativeBinomial(endog, exog[:, 0], loglike_method='nb2') cls.res_null = cls.model_null.fit(start_params=[8, 0.5], method='bfgs', gtol=1e-06, maxiter=300, disp=0) cls.start_params = np.array([8.07216448, 0.01087238, 0.44024134]) class TestNegativeBinomialNBP2Null(CheckNull): @classmethod def setup_class(cls): endog, exog = cls._get_data() cls.model = NegativeBinomialP(endog, exog, p=2) cls.model_null = NegativeBinomialP(endog, exog[:, 0], p=2) cls.res_null = cls.model_null.fit(start_params=[8, 1], method='bfgs', gtol=1e-06, maxiter=300, disp=0) cls.start_params = np.array([8.07216448, 0.01087238, 0.44024134]) def test_start_null(self): endog, exog = self.model.endog, self.model.exog model_nb2 = NegativeBinomial(endog, exog, loglike_method='nb2') sp1 = model_nb2._get_start_params_null() sp0 = self.model._get_start_params_null() assert_allclose(sp0, sp1, rtol=1e-12) class TestNegativeBinomialNBP1Null(CheckNull): @classmethod def setup_class(cls): endog, exog = cls._get_data() cls.model = NegativeBinomialP(endog, exog, p=1.) cls.model_null = NegativeBinomialP(endog, exog[:, 0], p=1) cls.res_null = cls.model_null.fit(start_params=[8, 1], method='bfgs', gtol=1e-06, maxiter=300, disp=0) cls.start_params = np.array([7.730452, 2.01633068e-02, 1763.0]) def test_start_null(self): endog, exog = self.model.endog, self.model.exog model_nb2 = NegativeBinomial(endog, exog, loglike_method='nb1') sp1 = model_nb2._get_start_params_null() sp0 = self.model._get_start_params_null() assert_allclose(sp0, sp1, rtol=1e-12) class TestGeneralizedPoissonNull(CheckNull): @classmethod def setup_class(cls): endog, exog = cls._get_data() cls.model = GeneralizedPoisson(endog, exog, p=1.5) cls.model_null = GeneralizedPoisson(endog, exog[:, 0], p=1.5) cls.res_null = cls.model_null.fit(start_params=[8.4, 1], method='bfgs', gtol=1e-08, maxiter=300, disp=0) cls.start_params = np.array([6.91127148, 0.04501334, 0.88393736]) def test_null_options(): # this is a "nice" case because we only check that options are used # correctly nobs = 10 exog = np.ones((20, 2)) exog[:nobs // 2, 1] = 0 mu = np.exp(exog.sum(1)) endog = np.random.poisson(mu) # Note no size=nobs in np.random res = Poisson(endog, exog).fit(start_params=np.log([1, 1]), disp=0) llnull0 = res.llnull assert_(hasattr(res, 'res_llnull') is False) res.set_null_options(attach_results=True) # default optimization lln = res.llnull # access to trigger computation assert_allclose(res.res_null.mle_settings['start_params'], np.log(endog.mean()), rtol=1e-10) assert_equal(res.res_null.mle_settings['optimizer'], 'bfgs') assert_allclose(lln, llnull0) res.set_null_options(attach_results=True, start_params=[0.5], method='nm') lln = res.llnull # access to trigger computation assert_allclose(res.res_null.mle_settings['start_params'], [0.5], rtol=1e-10) assert_equal(res.res_null.mle_settings['optimizer'], 'nm') res.summary() # call to fill cache assert_('prsquared' in res._cache) assert_equal(res._cache['llnull'], lln) assert_('prsquared' in res._cache) assert_equal(res._cache['llnull'], lln) # check setting cache res.set_null_options(llnull=999) assert_('prsquared' not in res._cache) assert_equal(res._cache['llnull'], 999) def test_optim_kwds_prelim(): # test that fit options for preliminary fit is correctly transmitted cur_dir = os.path.dirname(os.path.abspath(__file__)) filepath = os.path.join(cur_dir, "results", "sm3533.csv") df = pd.read_csv(filepath) features = ['pp'] X = (df[features] - df[features].mean())/df[features].std() y = df['num'].values exog = sm.add_constant(X[features].copy()) # offset=np.log(df['population'].values + 1) # offset currently not used offset = None # we use "nm", "bfgs" does not work for Poisson/exp with older scipy optim_kwds_prelim = dict(method='nm', maxiter=5000) model = Poisson(y, exog, offset=offset) # res_poi = model.fit(disp=0, **optim_kwds_prelim) model = NegativeBinomial(y, exog, offset=offset) res = model.fit(disp=0, optim_kwds_prelim=optim_kwds_prelim) assert_allclose(res.mle_settings['start_params'][:-1], res_poi.params, rtol=1e-4) assert_equal(res.mle_settings['optim_kwds_prelim'], optim_kwds_prelim) assert_allclose(res.predict().mean(), y.mean(), rtol=0.1) # NBP22 and GPP p=1.5 also fail on older scipy with bfgs, use nm instead optim_kwds_prelim = dict(method='nm', maxiter=5000) model = NegativeBinomialP(y, exog, offset=offset, p=2) res = model.fit(disp=0, optim_kwds_prelim=optim_kwds_prelim) assert_allclose(res.mle_settings['start_params'][:-1], res_poi.params, rtol=1e-4) assert_equal(res.mle_settings['optim_kwds_prelim'], optim_kwds_prelim) assert_allclose(res.predict().mean(), y.mean(), rtol=0.1) # GPP with p=1.5 converges correctly, # GPP fails when p=2 even with good start_params model = GeneralizedPoisson(y, exog, offset=offset, p=1.5) res = model.fit(disp=0, maxiter=200, optim_kwds_prelim=optim_kwds_prelim) assert_allclose(res.mle_settings['start_params'][:-1], res_poi.params, rtol=1e-4) assert_equal(res.mle_settings['optim_kwds_prelim'], optim_kwds_prelim) # rough check that convergence makes sense assert_allclose(res.predict().mean(), y.mean(), rtol=0.1) def test_unchanging_degrees_of_freedom(): data = load_randhie() # see GH3734 warnings.simplefilter('error') model = sm.NegativeBinomial(data.endog, data.exog, loglike_method='nb2') params = np.array([-0.05654134, -0.21213734, 0.08783102, -0.02991825, 0.22902315, 0.06210253, 0.06799444, 0.08406794, 0.18530092, 1.36645186]) res1 = model.fit(start_params=params, disp=0) assert_equal(res1.df_model, 8) reg_params = np.array([-0.04854 , -0.15019404, 0.08363671, -0.03032834, 0.17592454, 0.06440753, 0.01584555, 0. , 0. , 1.36984628]) res2 = model.fit_regularized(alpha=100, start_params=reg_params, disp=0) assert_(res2.df_model != 8) # If res2.df_model == res1.df_model, then this test is invalid. res3 = model.fit(start_params=params, disp=0) # Test that the call to `fit_regularized` did not # modify model.df_model inplace. assert_equal(res3.df_model, res1.df_model) assert_equal(res3.df_resid, res1.df_resid) def test_mnlogit_float_name(): df = pd.DataFrame({"A": [0., 1.1, 0, 0, 1.1], "B": [0, 1, 0, 1, 1]}) with pytest.warns(SpecificationWarning, match='endog contains values are that not int-like'): result = smf.mnlogit(formula="A ~ B", data=df).fit() summ = result.summary().as_text() assert 'A=1.1' in summ def test_cov_confint_pandas(): data = sm.datasets.anes96.load_pandas() exog = sm.add_constant(data.exog, prepend=False) res1 = sm.MNLogit(data.endog, exog).fit(method="newton", disp=0) cov = res1.cov_params() ci = res1.conf_int() se = np.sqrt(np.diag(cov)) se2 = (ci.iloc[:, 1] - ci.iloc[:, 0]) / (2 * stats.norm.ppf(0.975)) assert_allclose(se, se2) assert_index_equal(ci.index, cov.index) assert_index_equal(cov.index, cov.columns) assert isinstance(ci.index, pd.MultiIndex) def test_mlogit_t_test(): # GH669, check t_test works in multivariate model data = sm.datasets.anes96.load() exog = sm.add_constant(data.exog, prepend=False) res1 = sm.MNLogit(data.endog, exog).fit(disp=0) r = np.ones(res1.cov_params().shape[0]) t1 = res1.t_test(r) f1 = res1.f_test(r) exog = sm.add_constant(data.exog, prepend=False) endog, exog = np.asarray(data.endog), np.asarray(exog) res2 = sm.MNLogit(endog, exog).fit(disp=0) t2 = res2.t_test(r) f2 = res2.f_test(r) assert_allclose(t1.effect, t2.effect) assert_allclose(f1.statistic, f2.statistic) tt = res1.t_test(np.eye(np.size(res2.params))) assert_allclose(tt.tvalue.reshape(6,6, order="F"), res1.tvalues.to_numpy()) tt = res2.t_test(np.eye(np.size(res2.params))) assert_allclose(tt.tvalue.reshape(6,6, order="F"), res2.tvalues) wt = res1.wald_test(np.eye(np.size(res2.params))[0], scalar=True) assert_allclose(wt.pvalue, res1.pvalues.to_numpy()[0, 0]) tt = res1.t_test("y1_logpopul") wt = res1.wald_test("y1_logpopul", scalar=True) assert_allclose(tt.pvalue, wt.pvalue) wt = res1.wald_test("y1_logpopul, y2_logpopul", scalar=True) # regression test assert_allclose(wt.statistic, 5.68660562, rtol=1e-8)