5692 lines
195 KiB
Python
5692 lines
195 KiB
Python
"""
|
|
Limited dependent variable and qualitative variables.
|
|
|
|
Includes binary outcomes, count data, (ordered) ordinal data and limited
|
|
dependent variables.
|
|
|
|
General References
|
|
--------------------
|
|
|
|
A.C. Cameron and P.K. Trivedi. `Regression Analysis of Count Data`.
|
|
Cambridge, 1998
|
|
|
|
G.S. Madalla. `Limited-Dependent and Qualitative Variables in Econometrics`.
|
|
Cambridge, 1983.
|
|
|
|
W. Greene. `Econometric Analysis`. Prentice Hall, 5th. edition. 2003.
|
|
"""
|
|
__all__ = ["Poisson", "Logit", "Probit", "MNLogit", "NegativeBinomial",
|
|
"GeneralizedPoisson", "NegativeBinomialP", "CountModel"]
|
|
|
|
from statsmodels.compat.pandas import Appender
|
|
|
|
import warnings
|
|
|
|
import numpy as np
|
|
from pandas import MultiIndex, get_dummies
|
|
from scipy import special, stats
|
|
from scipy.special import digamma, gammaln, loggamma, polygamma
|
|
from scipy.stats import nbinom
|
|
|
|
from statsmodels.base.data import handle_data # for mnlogit
|
|
from statsmodels.base.l1_slsqp import fit_l1_slsqp
|
|
import statsmodels.base.model as base
|
|
import statsmodels.base.wrapper as wrap
|
|
from statsmodels.base._constraints import fit_constrained_wrap
|
|
import statsmodels.base._parameter_inference as pinfer
|
|
from statsmodels.base import _prediction_inference as pred
|
|
from statsmodels.distributions import genpoisson_p
|
|
import statsmodels.regression.linear_model as lm
|
|
from statsmodels.tools import data as data_tools, tools
|
|
from statsmodels.tools.decorators import cache_readonly
|
|
from statsmodels.tools.numdiff import approx_fprime_cs
|
|
from statsmodels.tools.sm_exceptions import (
|
|
PerfectSeparationError,
|
|
PerfectSeparationWarning,
|
|
SpecificationWarning,
|
|
)
|
|
|
|
|
|
try:
|
|
import cvxopt # noqa:F401
|
|
have_cvxopt = True
|
|
except ImportError:
|
|
have_cvxopt = False
|
|
|
|
|
|
# TODO: When we eventually get user-settable precision, we need to change
|
|
# this
|
|
FLOAT_EPS = np.finfo(float).eps
|
|
|
|
# Limit for exponentials to avoid overflow
|
|
EXP_UPPER_LIMIT = np.log(np.finfo(np.float64).max) - 1.0
|
|
|
|
# TODO: add options for the parameter covariance/variance
|
|
# ie., OIM, EIM, and BHHH see Green 21.4
|
|
|
|
_discrete_models_docs = """
|
|
"""
|
|
|
|
_discrete_results_docs = """
|
|
%(one_line_description)s
|
|
|
|
Parameters
|
|
----------
|
|
model : A DiscreteModel instance
|
|
params : array_like
|
|
The parameters of a fitted model.
|
|
hessian : array_like
|
|
The hessian of the fitted model.
|
|
scale : float
|
|
A scale parameter for the covariance matrix.
|
|
|
|
Attributes
|
|
----------
|
|
df_resid : float
|
|
See model definition.
|
|
df_model : float
|
|
See model definition.
|
|
llf : float
|
|
Value of the loglikelihood
|
|
%(extra_attr)s"""
|
|
|
|
_l1_results_attr = """ nnz_params : int
|
|
The number of nonzero parameters in the model. Train with
|
|
trim_params == True or else numerical error will distort this.
|
|
trimmed : bool array
|
|
trimmed[i] == True if the ith parameter was trimmed from the model."""
|
|
|
|
_get_start_params_null_docs = """
|
|
Compute one-step moment estimator for null (constant-only) model
|
|
|
|
This is a preliminary estimator used as start_params.
|
|
|
|
Returns
|
|
-------
|
|
params : ndarray
|
|
parameter estimate based one one-step moment matching
|
|
|
|
"""
|
|
|
|
_check_rank_doc = """
|
|
check_rank : bool
|
|
Check exog rank to determine model degrees of freedom. Default is
|
|
True. Setting to False reduces model initialization time when
|
|
exog.shape[1] is large.
|
|
"""
|
|
|
|
|
|
# helper for MNLogit (will be generally useful later)
|
|
def _numpy_to_dummies(endog):
|
|
if endog.ndim == 2 and endog.dtype.kind not in ["S", "O"]:
|
|
endog_dummies = endog
|
|
ynames = range(endog.shape[1])
|
|
else:
|
|
dummies = get_dummies(endog, drop_first=False)
|
|
ynames = {i: dummies.columns[i] for i in range(dummies.shape[1])}
|
|
endog_dummies = np.asarray(dummies, dtype=float)
|
|
|
|
return endog_dummies, ynames
|
|
|
|
return endog_dummies, ynames
|
|
|
|
|
|
def _pandas_to_dummies(endog):
|
|
if endog.ndim == 2:
|
|
if endog.shape[1] == 1:
|
|
yname = endog.columns[0]
|
|
endog_dummies = get_dummies(endog.iloc[:, 0])
|
|
else: # assume already dummies
|
|
yname = 'y'
|
|
endog_dummies = endog
|
|
else:
|
|
yname = endog.name
|
|
if yname is None:
|
|
yname = 'y'
|
|
endog_dummies = get_dummies(endog)
|
|
ynames = endog_dummies.columns.tolist()
|
|
|
|
return endog_dummies, ynames, yname
|
|
|
|
|
|
def _validate_l1_method(method):
|
|
"""
|
|
As of 0.10.0, the supported values for `method` in `fit_regularized`
|
|
are "l1" and "l1_cvxopt_cp". If an invalid value is passed, raise
|
|
with a helpful error message
|
|
|
|
Parameters
|
|
----------
|
|
method : str
|
|
|
|
Raises
|
|
------
|
|
ValueError
|
|
"""
|
|
if method not in ['l1', 'l1_cvxopt_cp']:
|
|
raise ValueError('`method` = {method} is not supported, use either '
|
|
'"l1" or "l1_cvxopt_cp"'.format(method=method))
|
|
|
|
|
|
#### Private Model Classes ####
|
|
|
|
|
|
class DiscreteModel(base.LikelihoodModel):
|
|
"""
|
|
Abstract class for discrete choice models.
|
|
|
|
This class does not do anything itself but lays out the methods and
|
|
call signature expected of child classes in addition to those of
|
|
statsmodels.model.LikelihoodModel.
|
|
"""
|
|
|
|
def __init__(self, endog, exog, check_rank=True, **kwargs):
|
|
self._check_rank = check_rank
|
|
super().__init__(endog, exog, **kwargs)
|
|
self.raise_on_perfect_prediction = False # keep for backwards compat
|
|
self.k_extra = 0
|
|
|
|
def initialize(self):
|
|
"""
|
|
Initialize is called by
|
|
statsmodels.model.LikelihoodModel.__init__
|
|
and should contain any preprocessing that needs to be done for a model.
|
|
"""
|
|
if self._check_rank:
|
|
# assumes constant
|
|
rank = tools.matrix_rank(self.exog, method="qr")
|
|
else:
|
|
# If rank check is skipped, assume full
|
|
rank = self.exog.shape[1]
|
|
self.df_model = float(rank - 1)
|
|
self.df_resid = float(self.exog.shape[0] - rank)
|
|
|
|
def cdf(self, X):
|
|
"""
|
|
The cumulative distribution function of the model.
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
def pdf(self, X):
|
|
"""
|
|
The probability density (mass) function of the model.
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
def _check_perfect_pred(self, params, *args):
|
|
endog = self.endog
|
|
fittedvalues = self.predict(params)
|
|
if np.allclose(fittedvalues - endog, 0):
|
|
if self.raise_on_perfect_prediction:
|
|
# backwards compatibility for attr raise_on_perfect_prediction
|
|
msg = "Perfect separation detected, results not available"
|
|
raise PerfectSeparationError(msg)
|
|
else:
|
|
msg = ("Perfect separation or prediction detected, "
|
|
"parameter may not be identified")
|
|
warnings.warn(msg, category=PerfectSeparationWarning)
|
|
|
|
@Appender(base.LikelihoodModel.fit.__doc__)
|
|
def fit(self, start_params=None, method='newton', maxiter=35,
|
|
full_output=1, disp=1, callback=None, **kwargs):
|
|
"""
|
|
Fit the model using maximum likelihood.
|
|
|
|
The rest of the docstring is from
|
|
statsmodels.base.model.LikelihoodModel.fit
|
|
"""
|
|
if callback is None:
|
|
callback = self._check_perfect_pred
|
|
else:
|
|
pass # TODO: make a function factory to have multiple call-backs
|
|
|
|
mlefit = super().fit(start_params=start_params,
|
|
method=method,
|
|
maxiter=maxiter,
|
|
full_output=full_output,
|
|
disp=disp,
|
|
callback=callback,
|
|
**kwargs)
|
|
|
|
return mlefit # It is up to subclasses to wrap results
|
|
|
|
def fit_regularized(self, start_params=None, method='l1',
|
|
maxiter='defined_by_method', full_output=1, disp=True,
|
|
callback=None, alpha=0, trim_mode='auto',
|
|
auto_trim_tol=0.01, size_trim_tol=1e-4, qc_tol=0.03,
|
|
qc_verbose=False, **kwargs):
|
|
"""
|
|
Fit the model using a regularized maximum likelihood.
|
|
|
|
The regularization method AND the solver used is determined by the
|
|
argument method.
|
|
|
|
Parameters
|
|
----------
|
|
start_params : array_like, optional
|
|
Initial guess of the solution for the loglikelihood maximization.
|
|
The default is an array of zeros.
|
|
method : 'l1' or 'l1_cvxopt_cp'
|
|
See notes for details.
|
|
maxiter : {int, 'defined_by_method'}
|
|
Maximum number of iterations to perform.
|
|
If 'defined_by_method', then use method defaults (see notes).
|
|
full_output : bool
|
|
Set to True to have all available output in the Results object's
|
|
mle_retvals attribute. The output is dependent on the solver.
|
|
See LikelihoodModelResults notes section for more information.
|
|
disp : bool
|
|
Set to True to print convergence messages.
|
|
fargs : tuple
|
|
Extra arguments passed to the likelihood function, i.e.,
|
|
loglike(x,*args).
|
|
callback : callable callback(xk)
|
|
Called after each iteration, as callback(xk), where xk is the
|
|
current parameter vector.
|
|
retall : bool
|
|
Set to True to return list of solutions at each iteration.
|
|
Available in Results object's mle_retvals attribute.
|
|
alpha : non-negative scalar or numpy array (same size as parameters)
|
|
The weight multiplying the l1 penalty term.
|
|
trim_mode : 'auto, 'size', or 'off'
|
|
If not 'off', trim (set to zero) parameters that would have been
|
|
zero if the solver reached the theoretical minimum.
|
|
If 'auto', trim params using the Theory above.
|
|
If 'size', trim params if they have very small absolute value.
|
|
size_trim_tol : float or 'auto' (default = 'auto')
|
|
Tolerance used when trim_mode == 'size'.
|
|
auto_trim_tol : float
|
|
Tolerance used when trim_mode == 'auto'.
|
|
qc_tol : float
|
|
Print warning and do not allow auto trim when (ii) (above) is
|
|
violated by this much.
|
|
qc_verbose : bool
|
|
If true, print out a full QC report upon failure.
|
|
**kwargs
|
|
Additional keyword arguments used when fitting the model.
|
|
|
|
Returns
|
|
-------
|
|
Results
|
|
A results instance.
|
|
|
|
Notes
|
|
-----
|
|
Using 'l1_cvxopt_cp' requires the cvxopt module.
|
|
|
|
Extra parameters are not penalized if alpha is given as a scalar.
|
|
An example is the shape parameter in NegativeBinomial `nb1` and `nb2`.
|
|
|
|
Optional arguments for the solvers (available in Results.mle_settings)::
|
|
|
|
'l1'
|
|
acc : float (default 1e-6)
|
|
Requested accuracy as used by slsqp
|
|
'l1_cvxopt_cp'
|
|
abstol : float
|
|
absolute accuracy (default: 1e-7).
|
|
reltol : float
|
|
relative accuracy (default: 1e-6).
|
|
feastol : float
|
|
tolerance for feasibility conditions (default: 1e-7).
|
|
refinement : int
|
|
number of iterative refinement steps when solving KKT
|
|
equations (default: 1).
|
|
|
|
Optimization methodology
|
|
|
|
With :math:`L` the negative log likelihood, we solve the convex but
|
|
non-smooth problem
|
|
|
|
.. math:: \\min_\\beta L(\\beta) + \\sum_k\\alpha_k |\\beta_k|
|
|
|
|
via the transformation to the smooth, convex, constrained problem
|
|
in twice as many variables (adding the "added variables" :math:`u_k`)
|
|
|
|
.. math:: \\min_{\\beta,u} L(\\beta) + \\sum_k\\alpha_k u_k,
|
|
|
|
subject to
|
|
|
|
.. math:: -u_k \\leq \\beta_k \\leq u_k.
|
|
|
|
With :math:`\\partial_k L` the derivative of :math:`L` in the
|
|
:math:`k^{th}` parameter direction, theory dictates that, at the
|
|
minimum, exactly one of two conditions holds:
|
|
|
|
(i) :math:`|\\partial_k L| = \\alpha_k` and :math:`\\beta_k \\neq 0`
|
|
(ii) :math:`|\\partial_k L| \\leq \\alpha_k` and :math:`\\beta_k = 0`
|
|
"""
|
|
_validate_l1_method(method)
|
|
# Set attributes based on method
|
|
cov_params_func = self.cov_params_func_l1
|
|
|
|
### Bundle up extra kwargs for the dictionary kwargs. These are
|
|
### passed through super(...).fit() as kwargs and unpacked at
|
|
### appropriate times
|
|
alpha = np.array(alpha)
|
|
assert alpha.min() >= 0
|
|
try:
|
|
kwargs['alpha'] = alpha
|
|
except TypeError:
|
|
kwargs = dict(alpha=alpha)
|
|
kwargs['alpha_rescaled'] = kwargs['alpha'] / float(self.endog.shape[0])
|
|
kwargs['trim_mode'] = trim_mode
|
|
kwargs['size_trim_tol'] = size_trim_tol
|
|
kwargs['auto_trim_tol'] = auto_trim_tol
|
|
kwargs['qc_tol'] = qc_tol
|
|
kwargs['qc_verbose'] = qc_verbose
|
|
|
|
### Define default keyword arguments to be passed to super(...).fit()
|
|
if maxiter == 'defined_by_method':
|
|
if method == 'l1':
|
|
maxiter = 1000
|
|
elif method == 'l1_cvxopt_cp':
|
|
maxiter = 70
|
|
|
|
## Parameters to pass to super(...).fit()
|
|
# For the 'extra' parameters, pass all that are available,
|
|
# even if we know (at this point) we will only use one.
|
|
extra_fit_funcs = {'l1': fit_l1_slsqp}
|
|
if have_cvxopt and method == 'l1_cvxopt_cp':
|
|
from statsmodels.base.l1_cvxopt import fit_l1_cvxopt_cp
|
|
extra_fit_funcs['l1_cvxopt_cp'] = fit_l1_cvxopt_cp
|
|
elif method.lower() == 'l1_cvxopt_cp':
|
|
raise ValueError("Cannot use l1_cvxopt_cp as cvxopt "
|
|
"was not found (install it, or use method='l1' instead)")
|
|
|
|
if callback is None:
|
|
callback = self._check_perfect_pred
|
|
else:
|
|
pass # make a function factory to have multiple call-backs
|
|
|
|
mlefit = super().fit(start_params=start_params,
|
|
method=method,
|
|
maxiter=maxiter,
|
|
full_output=full_output,
|
|
disp=disp,
|
|
callback=callback,
|
|
extra_fit_funcs=extra_fit_funcs,
|
|
cov_params_func=cov_params_func,
|
|
**kwargs)
|
|
|
|
return mlefit # up to subclasses to wrap results
|
|
|
|
def cov_params_func_l1(self, likelihood_model, xopt, retvals):
|
|
"""
|
|
Computes cov_params on a reduced parameter space
|
|
corresponding to the nonzero parameters resulting from the
|
|
l1 regularized fit.
|
|
|
|
Returns a full cov_params matrix, with entries corresponding
|
|
to zero'd values set to np.nan.
|
|
"""
|
|
H = likelihood_model.hessian(xopt)
|
|
trimmed = retvals['trimmed']
|
|
nz_idx = np.nonzero(~trimmed)[0]
|
|
nnz_params = (~trimmed).sum()
|
|
if nnz_params > 0:
|
|
H_restricted = H[nz_idx[:, None], nz_idx]
|
|
# Covariance estimate for the nonzero params
|
|
H_restricted_inv = np.linalg.inv(-H_restricted)
|
|
else:
|
|
H_restricted_inv = np.zeros(0)
|
|
|
|
cov_params = np.nan * np.ones(H.shape)
|
|
cov_params[nz_idx[:, None], nz_idx] = H_restricted_inv
|
|
|
|
return cov_params
|
|
|
|
def predict(self, params, exog=None, which="mean", linear=None):
|
|
"""
|
|
Predict response variable of a model given exogenous variables.
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
def _derivative_exog(self, params, exog=None, dummy_idx=None,
|
|
count_idx=None):
|
|
"""
|
|
This should implement the derivative of the non-linear function
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
def _derivative_exog_helper(self, margeff, params, exog, dummy_idx,
|
|
count_idx, transform):
|
|
"""
|
|
Helper for _derivative_exog to wrap results appropriately
|
|
"""
|
|
from .discrete_margins import _get_count_effects, _get_dummy_effects
|
|
|
|
if count_idx is not None:
|
|
margeff = _get_count_effects(margeff, exog, count_idx, transform,
|
|
self, params)
|
|
if dummy_idx is not None:
|
|
margeff = _get_dummy_effects(margeff, exog, dummy_idx, transform,
|
|
self, params)
|
|
|
|
return margeff
|
|
|
|
|
|
class BinaryModel(DiscreteModel):
|
|
_continuous_ok = False
|
|
|
|
def __init__(self, endog, exog, offset=None, check_rank=True, **kwargs):
|
|
# unconditional check, requires no extra kwargs added by subclasses
|
|
self._check_kwargs(kwargs)
|
|
super().__init__(endog, exog, offset=offset, check_rank=check_rank,
|
|
**kwargs)
|
|
if not issubclass(self.__class__, MultinomialModel):
|
|
if not np.all((self.endog >= 0) & (self.endog <= 1)):
|
|
raise ValueError("endog must be in the unit interval.")
|
|
|
|
if offset is None:
|
|
delattr(self, 'offset')
|
|
|
|
if (not self._continuous_ok and
|
|
np.any(self.endog != np.round(self.endog))):
|
|
raise ValueError("endog must be binary, either 0 or 1")
|
|
|
|
def predict(self, params, exog=None, which="mean", linear=None,
|
|
offset=None):
|
|
"""
|
|
Predict response variable of a model given exogenous variables.
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
Fitted parameters of the model.
|
|
exog : array_like
|
|
1d or 2d array of exogenous values. If not supplied, the
|
|
whole exog attribute of the model is used.
|
|
which : {'mean', 'linear', 'var', 'prob'}, optional
|
|
Statistic to predict. Default is 'mean'.
|
|
|
|
- 'mean' returns the conditional expectation of endog E(y | x),
|
|
i.e. exp of linear predictor.
|
|
- 'linear' returns the linear predictor of the mean function.
|
|
- 'var' returns the estimated variance of endog implied by the
|
|
model.
|
|
|
|
.. versionadded: 0.14
|
|
|
|
``which`` replaces and extends the deprecated ``linear``
|
|
argument.
|
|
|
|
linear : bool
|
|
If True, returns the linear predicted values. If False or None,
|
|
then the statistic specified by ``which`` will be returned.
|
|
|
|
.. deprecated: 0.14
|
|
|
|
The ``linear` keyword is deprecated and will be removed,
|
|
use ``which`` keyword instead.
|
|
|
|
Returns
|
|
-------
|
|
array
|
|
Fitted values at exog.
|
|
"""
|
|
if linear is not None:
|
|
msg = 'linear keyword is deprecated, use which="linear"'
|
|
warnings.warn(msg, FutureWarning)
|
|
if linear is True:
|
|
which = "linear"
|
|
|
|
# Use fit offset if appropriate
|
|
if offset is None and exog is None and hasattr(self, 'offset'):
|
|
offset = self.offset
|
|
elif offset is None:
|
|
offset = 0.
|
|
|
|
if exog is None:
|
|
exog = self.exog
|
|
|
|
linpred = np.dot(exog, params) + offset
|
|
|
|
if which == "mean":
|
|
return self.cdf(linpred)
|
|
elif which == "linear":
|
|
return linpred
|
|
if which == "var":
|
|
mu = self.cdf(linpred)
|
|
var_ = mu * (1 - mu)
|
|
return var_
|
|
else:
|
|
raise ValueError('Only `which` is "mean", "linear" or "var" are'
|
|
' available.')
|
|
|
|
@Appender(DiscreteModel.fit_regularized.__doc__)
|
|
def fit_regularized(self, start_params=None, method='l1',
|
|
maxiter='defined_by_method', full_output=1, disp=1, callback=None,
|
|
alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4,
|
|
qc_tol=0.03, **kwargs):
|
|
|
|
_validate_l1_method(method)
|
|
|
|
bnryfit = super().fit_regularized(start_params=start_params,
|
|
method=method,
|
|
maxiter=maxiter,
|
|
full_output=full_output,
|
|
disp=disp,
|
|
callback=callback,
|
|
alpha=alpha,
|
|
trim_mode=trim_mode,
|
|
auto_trim_tol=auto_trim_tol,
|
|
size_trim_tol=size_trim_tol,
|
|
qc_tol=qc_tol,
|
|
**kwargs)
|
|
|
|
discretefit = L1BinaryResults(self, bnryfit)
|
|
return L1BinaryResultsWrapper(discretefit)
|
|
|
|
def fit_constrained(self, constraints, start_params=None, **fit_kwds):
|
|
|
|
res = fit_constrained_wrap(self, constraints, start_params=None,
|
|
**fit_kwds)
|
|
return res
|
|
|
|
fit_constrained.__doc__ = fit_constrained_wrap.__doc__
|
|
|
|
def _derivative_predict(self, params, exog=None, transform='dydx',
|
|
offset=None):
|
|
"""
|
|
For computing marginal effects standard errors.
|
|
|
|
This is used only in the case of discrete and count regressors to
|
|
get the variance-covariance of the marginal effects. It returns
|
|
[d F / d params] where F is the predict.
|
|
|
|
Transform can be 'dydx' or 'eydx'. Checking is done in margeff
|
|
computations for appropriate transform.
|
|
"""
|
|
if exog is None:
|
|
exog = self.exog
|
|
linpred = self.predict(params, exog, offset=offset, which="linear")
|
|
dF = self.pdf(linpred)[:,None] * exog
|
|
if 'ey' in transform:
|
|
dF /= self.predict(params, exog, offset=offset)[:,None]
|
|
return dF
|
|
|
|
def _derivative_exog(self, params, exog=None, transform='dydx',
|
|
dummy_idx=None, count_idx=None, offset=None):
|
|
"""
|
|
For computing marginal effects returns dF(XB) / dX where F(.) is
|
|
the predicted probabilities
|
|
|
|
transform can be 'dydx', 'dyex', 'eydx', or 'eyex'.
|
|
|
|
Not all of these make sense in the presence of discrete regressors,
|
|
but checks are done in the results in get_margeff.
|
|
"""
|
|
# Note: this form should be appropriate for
|
|
# group 1 probit, logit, logistic, cloglog, heckprob, xtprobit
|
|
if exog is None:
|
|
exog = self.exog
|
|
|
|
linpred = self.predict(params, exog, offset=offset, which="linear")
|
|
margeff = np.dot(self.pdf(linpred)[:,None],
|
|
params[None,:])
|
|
|
|
if 'ex' in transform:
|
|
margeff *= exog
|
|
if 'ey' in transform:
|
|
margeff /= self.predict(params, exog)[:, None]
|
|
|
|
return self._derivative_exog_helper(margeff, params, exog,
|
|
dummy_idx, count_idx, transform)
|
|
|
|
def _deriv_mean_dparams(self, params):
|
|
"""
|
|
Derivative of the expected endog with respect to the parameters.
|
|
|
|
Parameters
|
|
----------
|
|
params : ndarray
|
|
parameter at which score is evaluated
|
|
|
|
Returns
|
|
-------
|
|
The value of the derivative of the expected endog with respect
|
|
to the parameter vector.
|
|
"""
|
|
link = self.link
|
|
lin_pred = self.predict(params, which="linear")
|
|
idl = link.inverse_deriv(lin_pred)
|
|
dmat = self.exog * idl[:, None]
|
|
return dmat
|
|
|
|
def get_distribution(self, params, exog=None, offset=None):
|
|
"""Get frozen instance of distribution based on predicted parameters.
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model.
|
|
exog : ndarray, optional
|
|
Explanatory variables for the main count model.
|
|
If ``exog`` is None, then the data from the model will be used.
|
|
offset : ndarray, optional
|
|
Offset is added to the linear predictor of the mean function with
|
|
coefficient equal to 1.
|
|
Default is zero if exog is not None, and the model offset if exog
|
|
is None.
|
|
exposure : ndarray, optional
|
|
Log(exposure) is added to the linear predictor of the mean
|
|
function with coefficient equal to 1. If exposure is specified,
|
|
then it will be logged by the method. The user does not need to
|
|
log it first.
|
|
Default is one if exog is is not None, and it is the model exposure
|
|
if exog is None.
|
|
|
|
Returns
|
|
-------
|
|
Instance of frozen scipy distribution.
|
|
"""
|
|
mu = self.predict(params, exog=exog, offset=offset)
|
|
# distr = stats.bernoulli(mu[:, None])
|
|
distr = stats.bernoulli(mu)
|
|
return distr
|
|
|
|
|
|
class MultinomialModel(BinaryModel):
|
|
|
|
def _handle_data(self, endog, exog, missing, hasconst, **kwargs):
|
|
if data_tools._is_using_ndarray_type(endog, None):
|
|
endog_dummies, ynames = _numpy_to_dummies(endog)
|
|
yname = 'y'
|
|
elif data_tools._is_using_pandas(endog, None):
|
|
endog_dummies, ynames, yname = _pandas_to_dummies(endog)
|
|
else:
|
|
endog = np.asarray(endog)
|
|
endog_dummies, ynames = _numpy_to_dummies(endog)
|
|
yname = 'y'
|
|
|
|
if not isinstance(ynames, dict):
|
|
ynames = dict(zip(range(endog_dummies.shape[1]), ynames))
|
|
|
|
self._ynames_map = ynames
|
|
data = handle_data(endog_dummies, exog, missing, hasconst, **kwargs)
|
|
data.ynames = yname # overwrite this to single endog name
|
|
data.orig_endog = endog
|
|
self.wendog = data.endog
|
|
|
|
# repeating from upstream...
|
|
for key in kwargs:
|
|
if key in ['design_info', 'formula']: # leave attached to data
|
|
continue
|
|
try:
|
|
setattr(self, key, data.__dict__.pop(key))
|
|
except KeyError:
|
|
pass
|
|
return data
|
|
|
|
def initialize(self):
|
|
"""
|
|
Preprocesses the data for MNLogit.
|
|
"""
|
|
super().initialize()
|
|
# This is also a "whiten" method in other models (eg regression)
|
|
self.endog = self.endog.argmax(1) # turn it into an array of col idx
|
|
self.J = self.wendog.shape[1]
|
|
self.K = self.exog.shape[1]
|
|
self.df_model *= (self.J-1) # for each J - 1 equation.
|
|
self.df_resid = self.exog.shape[0] - self.df_model - (self.J-1)
|
|
|
|
def predict(self, params, exog=None, which="mean", linear=None):
|
|
"""
|
|
Predict response variable of a model given exogenous variables.
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
2d array of fitted parameters of the model. Should be in the
|
|
order returned from the model.
|
|
exog : array_like
|
|
1d or 2d array of exogenous values. If not supplied, the
|
|
whole exog attribute of the model is used. If a 1d array is given
|
|
it assumed to be 1 row of exogenous variables. If you only have
|
|
one regressor and would like to do prediction, you must provide
|
|
a 2d array with shape[1] == 1.
|
|
which : {'mean', 'linear', 'var', 'prob'}, optional
|
|
Statistic to predict. Default is 'mean'.
|
|
|
|
- 'mean' returns the conditional expectation of endog E(y | x),
|
|
i.e. exp of linear predictor.
|
|
- 'linear' returns the linear predictor of the mean function.
|
|
- 'var' returns the estimated variance of endog implied by the
|
|
model.
|
|
|
|
.. versionadded: 0.14
|
|
|
|
``which`` replaces and extends the deprecated ``linear``
|
|
argument.
|
|
|
|
linear : bool
|
|
If True, returns the linear predicted values. If False or None,
|
|
then the statistic specified by ``which`` will be returned.
|
|
|
|
.. deprecated: 0.14
|
|
|
|
The ``linear` keyword is deprecated and will be removed,
|
|
use ``which`` keyword instead.
|
|
|
|
Notes
|
|
-----
|
|
Column 0 is the base case, the rest conform to the rows of params
|
|
shifted up one for the base case.
|
|
"""
|
|
if linear is not None:
|
|
msg = 'linear keyword is deprecated, use which="linear"'
|
|
warnings.warn(msg, FutureWarning)
|
|
if linear is True:
|
|
which = "linear"
|
|
|
|
if exog is None: # do here to accommodate user-given exog
|
|
exog = self.exog
|
|
if exog.ndim == 1:
|
|
exog = exog[None]
|
|
|
|
pred = super().predict(params, exog, which=which)
|
|
if which == "linear":
|
|
pred = np.column_stack((np.zeros(len(exog)), pred))
|
|
return pred
|
|
|
|
@Appender(DiscreteModel.fit.__doc__)
|
|
def fit(self, start_params=None, method='newton', maxiter=35,
|
|
full_output=1, disp=1, callback=None, **kwargs):
|
|
if start_params is None:
|
|
start_params = np.zeros(self.K * (self.J-1))
|
|
else:
|
|
start_params = np.asarray(start_params)
|
|
|
|
if callback is None:
|
|
# placeholder until check_perfect_pred
|
|
callback = lambda x, *args : None
|
|
# skip calling super to handle results from LikelihoodModel
|
|
mnfit = base.LikelihoodModel.fit(self, start_params = start_params,
|
|
method=method, maxiter=maxiter, full_output=full_output,
|
|
disp=disp, callback=callback, **kwargs)
|
|
mnfit.params = mnfit.params.reshape(self.K, -1, order='F')
|
|
mnfit = MultinomialResults(self, mnfit)
|
|
return MultinomialResultsWrapper(mnfit)
|
|
|
|
@Appender(DiscreteModel.fit_regularized.__doc__)
|
|
def fit_regularized(self, start_params=None, method='l1',
|
|
maxiter='defined_by_method', full_output=1, disp=1, callback=None,
|
|
alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4,
|
|
qc_tol=0.03, **kwargs):
|
|
if start_params is None:
|
|
start_params = np.zeros(self.K * (self.J-1))
|
|
else:
|
|
start_params = np.asarray(start_params)
|
|
mnfit = DiscreteModel.fit_regularized(
|
|
self, start_params=start_params, method=method, maxiter=maxiter,
|
|
full_output=full_output, disp=disp, callback=callback,
|
|
alpha=alpha, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol,
|
|
size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs)
|
|
mnfit.params = mnfit.params.reshape(self.K, -1, order='F')
|
|
mnfit = L1MultinomialResults(self, mnfit)
|
|
return L1MultinomialResultsWrapper(mnfit)
|
|
|
|
def _derivative_predict(self, params, exog=None, transform='dydx'):
|
|
"""
|
|
For computing marginal effects standard errors.
|
|
|
|
This is used only in the case of discrete and count regressors to
|
|
get the variance-covariance of the marginal effects. It returns
|
|
[d F / d params] where F is the predicted probabilities for each
|
|
choice. dFdparams is of shape nobs x (J*K) x (J-1)*K.
|
|
The zero derivatives for the base category are not included.
|
|
|
|
Transform can be 'dydx' or 'eydx'. Checking is done in margeff
|
|
computations for appropriate transform.
|
|
"""
|
|
if exog is None:
|
|
exog = self.exog
|
|
if params.ndim == 1: # will get flatted from approx_fprime
|
|
params = params.reshape(self.K, self.J-1, order='F')
|
|
|
|
eXB = np.exp(np.dot(exog, params))
|
|
sum_eXB = (1 + eXB.sum(1))[:,None]
|
|
J = int(self.J)
|
|
K = int(self.K)
|
|
repeat_eXB = np.repeat(eXB, J, axis=1)
|
|
X = np.tile(exog, J-1)
|
|
# this is the derivative wrt the base level
|
|
F0 = -repeat_eXB * X / sum_eXB ** 2
|
|
# this is the derivative wrt the other levels when
|
|
# dF_j / dParams_j (ie., own equation)
|
|
#NOTE: this computes too much, any easy way to cut down?
|
|
F1 = eXB.T[:,:,None]*X * (sum_eXB - repeat_eXB) / (sum_eXB**2)
|
|
F1 = F1.transpose((1,0,2)) # put the nobs index first
|
|
|
|
# other equation index
|
|
other_idx = ~np.kron(np.eye(J-1), np.ones(K)).astype(bool)
|
|
F1[:, other_idx] = (-eXB.T[:,:,None]*X*repeat_eXB / \
|
|
(sum_eXB**2)).transpose((1,0,2))[:, other_idx]
|
|
dFdX = np.concatenate((F0[:, None,:], F1), axis=1)
|
|
|
|
if 'ey' in transform:
|
|
dFdX /= self.predict(params, exog)[:, :, None]
|
|
return dFdX
|
|
|
|
def _derivative_exog(self, params, exog=None, transform='dydx',
|
|
dummy_idx=None, count_idx=None):
|
|
"""
|
|
For computing marginal effects returns dF(XB) / dX where F(.) is
|
|
the predicted probabilities
|
|
|
|
transform can be 'dydx', 'dyex', 'eydx', or 'eyex'.
|
|
|
|
Not all of these make sense in the presence of discrete regressors,
|
|
but checks are done in the results in get_margeff.
|
|
|
|
For Multinomial models the marginal effects are
|
|
|
|
P[j] * (params[j] - sum_k P[k]*params[k])
|
|
|
|
It is returned unshaped, so that each row contains each of the J
|
|
equations. This makes it easier to take derivatives of this for
|
|
standard errors. If you want average marginal effects you can do
|
|
margeff.reshape(nobs, K, J, order='F).mean(0) and the marginal effects
|
|
for choice J are in column J
|
|
"""
|
|
J = int(self.J) # number of alternative choices
|
|
K = int(self.K) # number of variables
|
|
# Note: this form should be appropriate for
|
|
# group 1 probit, logit, logistic, cloglog, heckprob, xtprobit
|
|
if exog is None:
|
|
exog = self.exog
|
|
if params.ndim == 1: # will get flatted from approx_fprime
|
|
params = params.reshape(K, J-1, order='F')
|
|
|
|
zeroparams = np.c_[np.zeros(K), params] # add base in
|
|
|
|
cdf = self.cdf(np.dot(exog, params))
|
|
|
|
# TODO: meaningful interpretation for `iterm`?
|
|
iterm = np.array([cdf[:, [i]] * zeroparams[:, i]
|
|
for i in range(int(J))]).sum(0)
|
|
|
|
margeff = np.array([cdf[:, [j]] * (zeroparams[:, j] - iterm)
|
|
for j in range(J)])
|
|
|
|
# swap the axes to make sure margeff are in order nobs, K, J
|
|
margeff = np.transpose(margeff, (1, 2, 0))
|
|
|
|
if 'ex' in transform:
|
|
margeff *= exog
|
|
if 'ey' in transform:
|
|
margeff /= self.predict(params, exog)[:,None,:]
|
|
|
|
margeff = self._derivative_exog_helper(margeff, params, exog,
|
|
dummy_idx, count_idx, transform)
|
|
return margeff.reshape(len(exog), -1, order='F')
|
|
|
|
def get_distribution(self, params, exog=None, offset=None):
|
|
"""get frozen instance of distribution
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
|
|
class CountModel(DiscreteModel):
|
|
def __init__(self, endog, exog, offset=None, exposure=None, missing='none',
|
|
check_rank=True, **kwargs):
|
|
self._check_kwargs(kwargs)
|
|
super().__init__(endog, exog, check_rank, missing=missing,
|
|
offset=offset, exposure=exposure, **kwargs)
|
|
if exposure is not None:
|
|
self.exposure = np.asarray(self.exposure)
|
|
self.exposure = np.log(self.exposure)
|
|
if offset is not None:
|
|
self.offset = np.asarray(self.offset)
|
|
self._check_inputs(self.offset, self.exposure, self.endog)
|
|
if offset is None:
|
|
delattr(self, 'offset')
|
|
if exposure is None:
|
|
delattr(self, 'exposure')
|
|
|
|
# promote dtype to float64 if needed
|
|
dt = np.promote_types(self.endog.dtype, np.float64)
|
|
self.endog = np.asarray(self.endog, dt)
|
|
dt = np.promote_types(self.exog.dtype, np.float64)
|
|
self.exog = np.asarray(self.exog, dt)
|
|
|
|
|
|
def _check_inputs(self, offset, exposure, endog):
|
|
if offset is not None and offset.shape[0] != endog.shape[0]:
|
|
raise ValueError("offset is not the same length as endog")
|
|
|
|
if exposure is not None and exposure.shape[0] != endog.shape[0]:
|
|
raise ValueError("exposure is not the same length as endog")
|
|
|
|
def _get_init_kwds(self):
|
|
# this is a temporary fixup because exposure has been transformed
|
|
# see #1609
|
|
kwds = super()._get_init_kwds()
|
|
if 'exposure' in kwds and kwds['exposure'] is not None:
|
|
kwds['exposure'] = np.exp(kwds['exposure'])
|
|
return kwds
|
|
|
|
def _get_predict_arrays(self, exog=None, offset=None, exposure=None):
|
|
|
|
# convert extras if not None
|
|
if exposure is not None:
|
|
exposure = np.log(np.asarray(exposure))
|
|
if offset is not None:
|
|
offset = np.asarray(offset)
|
|
|
|
# get defaults
|
|
if exog is None:
|
|
# prediction is in-sample
|
|
exog = self.exog
|
|
if exposure is None:
|
|
exposure = getattr(self, 'exposure', 0)
|
|
if offset is None:
|
|
offset = getattr(self, 'offset', 0)
|
|
else:
|
|
# user specified
|
|
exog = np.asarray(exog)
|
|
if exposure is None:
|
|
exposure = 0
|
|
if offset is None:
|
|
offset = 0
|
|
|
|
return exog, offset, exposure
|
|
|
|
def predict(self, params, exog=None, exposure=None, offset=None,
|
|
which='mean', linear=None):
|
|
"""
|
|
Predict response variable of a count model given exogenous variables
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
Model parameters
|
|
exog : array_like, optional
|
|
Design / exogenous data. Is exog is None, model exog is used.
|
|
exposure : array_like, optional
|
|
Log(exposure) is added to the linear prediction with
|
|
coefficient equal to 1. If exposure is not provided and exog
|
|
is None, uses the model's exposure if present. If not, uses
|
|
0 as the default value.
|
|
offset : array_like, optional
|
|
Offset is added to the linear prediction with coefficient
|
|
equal to 1. If offset is not provided and exog
|
|
is None, uses the model's offset if present. If not, uses
|
|
0 as the default value.
|
|
which : 'mean', 'linear', 'var', 'prob' (optional)
|
|
Statitistic to predict. Default is 'mean'.
|
|
|
|
- 'mean' returns the conditional expectation of endog E(y | x),
|
|
i.e. exp of linear predictor.
|
|
- 'linear' returns the linear predictor of the mean function.
|
|
- 'var' variance of endog implied by the likelihood model
|
|
- 'prob' predicted probabilities for counts.
|
|
|
|
.. versionadded: 0.14
|
|
|
|
``which`` replaces and extends the deprecated ``linear``
|
|
argument.
|
|
|
|
linear : bool
|
|
If True, returns the linear predicted values. If False or None,
|
|
then the statistic specified by ``which`` will be returned.
|
|
|
|
.. deprecated: 0.14
|
|
|
|
The ``linear` keyword is deprecated and will be removed,
|
|
use ``which`` keyword instead.
|
|
|
|
|
|
Notes
|
|
-----
|
|
If exposure is specified, then it will be logged by the method.
|
|
The user does not need to log it first.
|
|
"""
|
|
if linear is not None:
|
|
msg = 'linear keyword is deprecated, use which="linear"'
|
|
warnings.warn(msg, FutureWarning)
|
|
if linear is True:
|
|
which = "linear"
|
|
|
|
# the following is copied from GLM predict (without family/link check)
|
|
# Use fit offset if appropriate
|
|
if offset is None and exog is None and hasattr(self, 'offset'):
|
|
offset = self.offset
|
|
elif offset is None:
|
|
offset = 0.
|
|
|
|
# Use fit exposure if appropriate
|
|
if exposure is None and exog is None and hasattr(self, 'exposure'):
|
|
# Already logged
|
|
exposure = self.exposure
|
|
elif exposure is None:
|
|
exposure = 0.
|
|
else:
|
|
exposure = np.log(exposure)
|
|
|
|
if exog is None:
|
|
exog = self.exog
|
|
|
|
fitted = np.dot(exog, params[:exog.shape[1]])
|
|
linpred = fitted + exposure + offset
|
|
if which == "mean":
|
|
return np.exp(linpred)
|
|
elif which.startswith("lin"):
|
|
return linpred
|
|
else:
|
|
raise ValueError('keyword which has to be "mean" and "linear"')
|
|
|
|
def _derivative_predict(self, params, exog=None, transform='dydx'):
|
|
"""
|
|
For computing marginal effects standard errors.
|
|
|
|
This is used only in the case of discrete and count regressors to
|
|
get the variance-covariance of the marginal effects. It returns
|
|
[d F / d params] where F is the predict.
|
|
|
|
Transform can be 'dydx' or 'eydx'. Checking is done in margeff
|
|
computations for appropriate transform.
|
|
"""
|
|
if exog is None:
|
|
exog = self.exog
|
|
#NOTE: this handles offset and exposure
|
|
dF = self.predict(params, exog)[:,None] * exog
|
|
if 'ey' in transform:
|
|
dF /= self.predict(params, exog)[:,None]
|
|
return dF
|
|
|
|
def _derivative_exog(self, params, exog=None, transform="dydx",
|
|
dummy_idx=None, count_idx=None):
|
|
"""
|
|
For computing marginal effects. These are the marginal effects
|
|
d F(XB) / dX
|
|
For the Poisson model F(XB) is the predicted counts rather than
|
|
the probabilities.
|
|
|
|
transform can be 'dydx', 'dyex', 'eydx', or 'eyex'.
|
|
|
|
Not all of these make sense in the presence of discrete regressors,
|
|
but checks are done in the results in get_margeff.
|
|
"""
|
|
# group 3 poisson, nbreg, zip, zinb
|
|
if exog is None:
|
|
exog = self.exog
|
|
k_extra = getattr(self, 'k_extra', 0)
|
|
params_exog = params if k_extra == 0 else params[:-k_extra]
|
|
margeff = self.predict(params, exog)[:,None] * params_exog[None,:]
|
|
if 'ex' in transform:
|
|
margeff *= exog
|
|
if 'ey' in transform:
|
|
margeff /= self.predict(params, exog)[:,None]
|
|
|
|
return self._derivative_exog_helper(margeff, params, exog,
|
|
dummy_idx, count_idx, transform)
|
|
|
|
def _deriv_mean_dparams(self, params):
|
|
"""
|
|
Derivative of the expected endog with respect to the parameters.
|
|
|
|
Parameters
|
|
----------
|
|
params : ndarray
|
|
parameter at which score is evaluated
|
|
|
|
Returns
|
|
-------
|
|
The value of the derivative of the expected endog with respect
|
|
to the parameter vector.
|
|
"""
|
|
from statsmodels.genmod.families import links
|
|
link = links.Log()
|
|
lin_pred = self.predict(params, which="linear")
|
|
idl = link.inverse_deriv(lin_pred)
|
|
dmat = self.exog * idl[:, None]
|
|
if self.k_extra > 0:
|
|
dmat_extra = np.zeros((dmat.shape[0], self.k_extra))
|
|
dmat = np.column_stack((dmat, dmat_extra))
|
|
return dmat
|
|
|
|
|
|
@Appender(DiscreteModel.fit.__doc__)
|
|
def fit(self, start_params=None, method='newton', maxiter=35,
|
|
full_output=1, disp=1, callback=None, **kwargs):
|
|
cntfit = super().fit(start_params=start_params,
|
|
method=method,
|
|
maxiter=maxiter,
|
|
full_output=full_output,
|
|
disp=disp,
|
|
callback=callback,
|
|
**kwargs)
|
|
discretefit = CountResults(self, cntfit)
|
|
return CountResultsWrapper(discretefit)
|
|
|
|
@Appender(DiscreteModel.fit_regularized.__doc__)
|
|
def fit_regularized(self, start_params=None, method='l1',
|
|
maxiter='defined_by_method', full_output=1, disp=1, callback=None,
|
|
alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4,
|
|
qc_tol=0.03, **kwargs):
|
|
|
|
_validate_l1_method(method)
|
|
|
|
cntfit = super().fit_regularized(start_params=start_params,
|
|
method=method,
|
|
maxiter=maxiter,
|
|
full_output=full_output,
|
|
disp=disp,
|
|
callback=callback,
|
|
alpha=alpha,
|
|
trim_mode=trim_mode, auto_trim_tol=auto_trim_tol,
|
|
size_trim_tol=size_trim_tol,
|
|
qc_tol=qc_tol,
|
|
**kwargs)
|
|
|
|
discretefit = L1CountResults(self, cntfit)
|
|
return L1CountResultsWrapper(discretefit)
|
|
|
|
|
|
# Public Model Classes
|
|
|
|
|
|
class Poisson(CountModel):
|
|
__doc__ = """
|
|
Poisson Model
|
|
|
|
%(params)s
|
|
%(extra_params)s
|
|
|
|
Attributes
|
|
----------
|
|
endog : ndarray
|
|
A reference to the endogenous response variable
|
|
exog : ndarray
|
|
A reference to the exogenous design.
|
|
""" % {'params': base._model_params_doc,
|
|
'extra_params':
|
|
"""offset : array_like
|
|
Offset is added to the linear prediction with coefficient equal to 1.
|
|
exposure : array_like
|
|
Log(exposure) is added to the linear prediction with coefficient
|
|
equal to 1.
|
|
""" + base._missing_param_doc + _check_rank_doc}
|
|
|
|
@cache_readonly
|
|
def family(self):
|
|
from statsmodels.genmod import families
|
|
return families.Poisson()
|
|
|
|
def cdf(self, X):
|
|
"""
|
|
Poisson model cumulative distribution function
|
|
|
|
Parameters
|
|
----------
|
|
X : array_like
|
|
`X` is the linear predictor of the model. See notes.
|
|
|
|
Returns
|
|
-------
|
|
The value of the Poisson CDF at each point.
|
|
|
|
Notes
|
|
-----
|
|
The CDF is defined as
|
|
|
|
.. math:: \\exp\\left(-\\lambda\\right)\\sum_{i=0}^{y}\\frac{\\lambda^{i}}{i!}
|
|
|
|
where :math:`\\lambda` assumes the loglinear model. I.e.,
|
|
|
|
.. math:: \\ln\\lambda_{i}=X\\beta
|
|
|
|
The parameter `X` is :math:`X\\beta` in the above formula.
|
|
"""
|
|
y = self.endog
|
|
return stats.poisson.cdf(y, np.exp(X))
|
|
|
|
def pdf(self, X):
|
|
"""
|
|
Poisson model probability mass function
|
|
|
|
Parameters
|
|
----------
|
|
X : array_like
|
|
`X` is the linear predictor of the model. See notes.
|
|
|
|
Returns
|
|
-------
|
|
pdf : ndarray
|
|
The value of the Poisson probability mass function, PMF, for each
|
|
point of X.
|
|
|
|
Notes
|
|
-----
|
|
The PMF is defined as
|
|
|
|
.. math:: \\frac{e^{-\\lambda_{i}}\\lambda_{i}^{y_{i}}}{y_{i}!}
|
|
|
|
where :math:`\\lambda` assumes the loglinear model. I.e.,
|
|
|
|
.. math:: \\ln\\lambda_{i}=x_{i}\\beta
|
|
|
|
The parameter `X` is :math:`x_{i}\\beta` in the above formula.
|
|
"""
|
|
y = self.endog
|
|
return np.exp(stats.poisson.logpmf(y, np.exp(X)))
|
|
|
|
def loglike(self, params):
|
|
"""
|
|
Loglikelihood of Poisson model
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model.
|
|
|
|
Returns
|
|
-------
|
|
loglike : float
|
|
The log-likelihood function of the model evaluated at `params`.
|
|
See notes.
|
|
|
|
Notes
|
|
-----
|
|
.. math:: \\ln L=\\sum_{i=1}^{n}\\left[-\\lambda_{i}+y_{i}x_{i}^{\\prime}\\beta-\\ln y_{i}!\\right]
|
|
"""
|
|
offset = getattr(self, "offset", 0)
|
|
exposure = getattr(self, "exposure", 0)
|
|
XB = np.dot(self.exog, params) + offset + exposure
|
|
endog = self.endog
|
|
return np.sum(
|
|
-np.exp(np.clip(XB, None, EXP_UPPER_LIMIT))
|
|
+ endog * XB
|
|
- gammaln(endog + 1)
|
|
)
|
|
|
|
def loglikeobs(self, params):
|
|
"""
|
|
Loglikelihood for observations of Poisson model
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model.
|
|
|
|
Returns
|
|
-------
|
|
loglike : array_like
|
|
The log likelihood for each observation of the model evaluated
|
|
at `params`. See Notes
|
|
|
|
Notes
|
|
-----
|
|
.. math:: \\ln L_{i}=\\left[-\\lambda_{i}+y_{i}x_{i}^{\\prime}\\beta-\\ln y_{i}!\\right]
|
|
|
|
for observations :math:`i=1,...,n`
|
|
"""
|
|
offset = getattr(self, "offset", 0)
|
|
exposure = getattr(self, "exposure", 0)
|
|
XB = np.dot(self.exog, params) + offset + exposure
|
|
endog = self.endog
|
|
#np.sum(stats.poisson.logpmf(endog, np.exp(XB)))
|
|
return -np.exp(XB) + endog*XB - gammaln(endog+1)
|
|
|
|
@Appender(_get_start_params_null_docs)
|
|
def _get_start_params_null(self):
|
|
offset = getattr(self, "offset", 0)
|
|
exposure = getattr(self, "exposure", 0)
|
|
const = (self.endog / np.exp(offset + exposure)).mean()
|
|
params = [np.log(const)]
|
|
return params
|
|
|
|
@Appender(DiscreteModel.fit.__doc__)
|
|
def fit(self, start_params=None, method='newton', maxiter=35,
|
|
full_output=1, disp=1, callback=None, **kwargs):
|
|
|
|
if start_params is None and self.data.const_idx is not None:
|
|
# k_params or k_exog not available?
|
|
start_params = 0.001 * np.ones(self.exog.shape[1])
|
|
start_params[self.data.const_idx] = self._get_start_params_null()[0]
|
|
|
|
kwds = {}
|
|
if kwargs.get('cov_type') is not None:
|
|
kwds['cov_type'] = kwargs.get('cov_type')
|
|
kwds['cov_kwds'] = kwargs.get('cov_kwds', {})
|
|
|
|
cntfit = super(CountModel, self).fit(start_params=start_params,
|
|
method=method,
|
|
maxiter=maxiter,
|
|
full_output=full_output,
|
|
disp=disp,
|
|
callback=callback,
|
|
**kwargs)
|
|
|
|
discretefit = PoissonResults(self, cntfit, **kwds)
|
|
return PoissonResultsWrapper(discretefit)
|
|
|
|
@Appender(DiscreteModel.fit_regularized.__doc__)
|
|
def fit_regularized(self, start_params=None, method='l1',
|
|
maxiter='defined_by_method', full_output=1, disp=1, callback=None,
|
|
alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4,
|
|
qc_tol=0.03, **kwargs):
|
|
|
|
_validate_l1_method(method)
|
|
|
|
cntfit = super(CountModel, self).fit_regularized(
|
|
start_params=start_params, method=method, maxiter=maxiter,
|
|
full_output=full_output, disp=disp, callback=callback,
|
|
alpha=alpha, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol,
|
|
size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs)
|
|
|
|
discretefit = L1PoissonResults(self, cntfit)
|
|
return L1PoissonResultsWrapper(discretefit)
|
|
|
|
def fit_constrained(self, constraints, start_params=None, **fit_kwds):
|
|
"""fit the model subject to linear equality constraints
|
|
|
|
The constraints are of the form `R params = q`
|
|
where R is the constraint_matrix and q is the vector of
|
|
constraint_values.
|
|
|
|
The estimation creates a new model with transformed design matrix,
|
|
exog, and converts the results back to the original parameterization.
|
|
|
|
Parameters
|
|
----------
|
|
constraints : formula expression or tuple
|
|
If it is a tuple, then the constraint needs to be given by two
|
|
arrays (constraint_matrix, constraint_value), i.e. (R, q).
|
|
Otherwise, the constraints can be given as strings or list of
|
|
strings.
|
|
see t_test for details
|
|
start_params : None or array_like
|
|
starting values for the optimization. `start_params` needs to be
|
|
given in the original parameter space and are internally
|
|
transformed.
|
|
**fit_kwds : keyword arguments
|
|
fit_kwds are used in the optimization of the transformed model.
|
|
|
|
Returns
|
|
-------
|
|
results : Results instance
|
|
"""
|
|
|
|
#constraints = (R, q)
|
|
# TODO: temporary trailing underscore to not overwrite the monkey
|
|
# patched version
|
|
# TODO: decide whether to move the imports
|
|
from patsy import DesignInfo
|
|
from statsmodels.base._constraints import (fit_constrained,
|
|
LinearConstraints)
|
|
|
|
# same pattern as in base.LikelihoodModel.t_test
|
|
lc = DesignInfo(self.exog_names).linear_constraint(constraints)
|
|
R, q = lc.coefs, lc.constants
|
|
|
|
# TODO: add start_params option, need access to tranformation
|
|
# fit_constrained needs to do the transformation
|
|
params, cov, res_constr = fit_constrained(self, R, q,
|
|
start_params=start_params,
|
|
fit_kwds=fit_kwds)
|
|
#create dummy results Instance, TODO: wire up properly
|
|
res = self.fit(maxiter=0, method='nm', disp=0,
|
|
warn_convergence=False) # we get a wrapper back
|
|
res.mle_retvals['fcall'] = res_constr.mle_retvals.get('fcall', np.nan)
|
|
res.mle_retvals['iterations'] = res_constr.mle_retvals.get(
|
|
'iterations', np.nan)
|
|
res.mle_retvals['converged'] = res_constr.mle_retvals['converged']
|
|
res._results.params = params
|
|
res._results.cov_params_default = cov
|
|
cov_type = fit_kwds.get('cov_type', 'nonrobust')
|
|
if cov_type != 'nonrobust':
|
|
res._results.normalized_cov_params = cov # assume scale=1
|
|
else:
|
|
res._results.normalized_cov_params = None
|
|
k_constr = len(q)
|
|
res._results.df_resid += k_constr
|
|
res._results.df_model -= k_constr
|
|
res._results.constraints = LinearConstraints.from_patsy(lc)
|
|
res._results.k_constr = k_constr
|
|
res._results.results_constrained = res_constr
|
|
return res
|
|
|
|
def score(self, params):
|
|
"""
|
|
Poisson model score (gradient) vector of the log-likelihood
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model
|
|
|
|
Returns
|
|
-------
|
|
score : ndarray, 1-D
|
|
The score vector of the model, i.e. the first derivative of the
|
|
loglikelihood function, evaluated at `params`
|
|
|
|
Notes
|
|
-----
|
|
.. math:: \\frac{\\partial\\ln L}{\\partial\\beta}=\\sum_{i=1}^{n}\\left(y_{i}-\\lambda_{i}\\right)x_{i}
|
|
|
|
where the loglinear model is assumed
|
|
|
|
.. math:: \\ln\\lambda_{i}=x_{i}\\beta
|
|
"""
|
|
offset = getattr(self, "offset", 0)
|
|
exposure = getattr(self, "exposure", 0)
|
|
X = self.exog
|
|
L = np.exp(np.dot(X,params) + offset + exposure)
|
|
return np.dot(self.endog - L, X)
|
|
|
|
def score_obs(self, params):
|
|
"""
|
|
Poisson model Jacobian of the log-likelihood for each observation
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model
|
|
|
|
Returns
|
|
-------
|
|
score : array_like
|
|
The score vector (nobs, k_vars) of the model evaluated at `params`
|
|
|
|
Notes
|
|
-----
|
|
.. math:: \\frac{\\partial\\ln L_{i}}{\\partial\\beta}=\\left(y_{i}-\\lambda_{i}\\right)x_{i}
|
|
|
|
for observations :math:`i=1,...,n`
|
|
|
|
where the loglinear model is assumed
|
|
|
|
.. math:: \\ln\\lambda_{i}=x_{i}\\beta
|
|
"""
|
|
offset = getattr(self, "offset", 0)
|
|
exposure = getattr(self, "exposure", 0)
|
|
X = self.exog
|
|
L = np.exp(np.dot(X,params) + offset + exposure)
|
|
return (self.endog - L)[:,None] * X
|
|
|
|
def score_factor(self, params):
|
|
"""
|
|
Poisson model score_factor for each observation
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model
|
|
|
|
Returns
|
|
-------
|
|
score : array_like
|
|
The score factor (nobs, ) of the model evaluated at `params`
|
|
|
|
Notes
|
|
-----
|
|
.. math:: \\frac{\\partial\\ln L_{i}}{\\partial\\beta}=\\left(y_{i}-\\lambda_{i}\\right)
|
|
|
|
for observations :math:`i=1,...,n`
|
|
|
|
where the loglinear model is assumed
|
|
|
|
.. math:: \\ln\\lambda_{i}=x_{i}\\beta
|
|
"""
|
|
offset = getattr(self, "offset", 0)
|
|
exposure = getattr(self, "exposure", 0)
|
|
X = self.exog
|
|
L = np.exp(np.dot(X,params) + offset + exposure)
|
|
return (self.endog - L)
|
|
|
|
def hessian(self, params):
|
|
"""
|
|
Poisson model Hessian matrix of the loglikelihood
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model
|
|
|
|
Returns
|
|
-------
|
|
hess : ndarray, (k_vars, k_vars)
|
|
The Hessian, second derivative of loglikelihood function,
|
|
evaluated at `params`
|
|
|
|
Notes
|
|
-----
|
|
.. math:: \\frac{\\partial^{2}\\ln L}{\\partial\\beta\\partial\\beta^{\\prime}}=-\\sum_{i=1}^{n}\\lambda_{i}x_{i}x_{i}^{\\prime}
|
|
|
|
where the loglinear model is assumed
|
|
|
|
.. math:: \\ln\\lambda_{i}=x_{i}\\beta
|
|
"""
|
|
offset = getattr(self, "offset", 0)
|
|
exposure = getattr(self, "exposure", 0)
|
|
X = self.exog
|
|
L = np.exp(np.dot(X,params) + exposure + offset)
|
|
return -np.dot(L*X.T, X)
|
|
|
|
def hessian_factor(self, params):
|
|
"""
|
|
Poisson model Hessian factor
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model
|
|
|
|
Returns
|
|
-------
|
|
hess : ndarray, (nobs,)
|
|
The Hessian factor, second derivative of loglikelihood function
|
|
with respect to the linear predictor evaluated at `params`
|
|
|
|
Notes
|
|
-----
|
|
.. math:: \\frac{\\partial^{2}\\ln L}{\\partial\\beta\\partial\\beta^{\\prime}}=-\\sum_{i=1}^{n}\\lambda_{i}
|
|
|
|
where the loglinear model is assumed
|
|
|
|
.. math:: \\ln\\lambda_{i}=x_{i}\\beta
|
|
"""
|
|
offset = getattr(self, "offset", 0)
|
|
exposure = getattr(self, "exposure", 0)
|
|
X = self.exog
|
|
L = np.exp(np.dot(X,params) + exposure + offset)
|
|
return -L
|
|
|
|
def _deriv_score_obs_dendog(self, params, scale=None):
|
|
"""derivative of score_obs w.r.t. endog
|
|
|
|
Parameters
|
|
----------
|
|
params : ndarray
|
|
parameter at which score is evaluated
|
|
scale : None or float
|
|
If scale is None, then the default scale will be calculated.
|
|
Default scale is defined by `self.scaletype` and set in fit.
|
|
If scale is not None, then it is used as a fixed scale.
|
|
|
|
Returns
|
|
-------
|
|
derivative : ndarray_2d
|
|
The derivative of the score_obs with respect to endog. This
|
|
can is given by `score_factor0[:, None] * exog` where
|
|
`score_factor0` is the score_factor without the residual.
|
|
"""
|
|
return self.exog
|
|
|
|
def predict(self, params, exog=None, exposure=None, offset=None,
|
|
which='mean', linear=None, y_values=None):
|
|
"""
|
|
Predict response variable of a model given exogenous variables.
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
2d array of fitted parameters of the model. Should be in the
|
|
order returned from the model.
|
|
exog : array_like, optional
|
|
1d or 2d array of exogenous values. If not supplied, then the
|
|
exog attribute of the model is used. If a 1d array is given
|
|
it assumed to be 1 row of exogenous variables. If you only have
|
|
one regressor and would like to do prediction, you must provide
|
|
a 2d array with shape[1] == 1.
|
|
offset : array_like, optional
|
|
Offset is added to the linear predictor with coefficient equal
|
|
to 1.
|
|
Default is zero if exog is not None, and the model offset if exog
|
|
is None.
|
|
exposure : array_like, optional
|
|
Log(exposure) is added to the linear prediction with coefficient
|
|
equal to 1.
|
|
Default is one if exog is is not None, and is the model exposure
|
|
if exog is None.
|
|
which : 'mean', 'linear', 'var', 'prob' (optional)
|
|
Statitistic to predict. Default is 'mean'.
|
|
|
|
- 'mean' returns the conditional expectation of endog E(y | x),
|
|
i.e. exp of linear predictor.
|
|
- 'linear' returns the linear predictor of the mean function.
|
|
- 'var' returns the estimated variance of endog implied by the
|
|
model.
|
|
- 'prob' return probabilities for counts from 0 to max(endog) or
|
|
for y_values if those are provided.
|
|
|
|
.. versionadded: 0.14
|
|
|
|
``which`` replaces and extends the deprecated ``linear``
|
|
argument.
|
|
|
|
linear : bool
|
|
The ``linear` keyword is deprecated and will be removed,
|
|
use ``which`` keyword instead.
|
|
If True, returns the linear predicted values. If False or None,
|
|
then the statistic specified by ``which`` will be returned.
|
|
|
|
.. deprecated: 0.14
|
|
|
|
The ``linear` keyword is deprecated and will be removed,
|
|
use ``which`` keyword instead.
|
|
|
|
y_values : array_like
|
|
Values of the random variable endog at which pmf is evaluated.
|
|
Only used if ``which="prob"``
|
|
"""
|
|
# Note docstring is reused by other count models
|
|
|
|
if linear is not None:
|
|
msg = 'linear keyword is deprecated, use which="linear"'
|
|
warnings.warn(msg, FutureWarning)
|
|
if linear is True:
|
|
which = "linear"
|
|
|
|
if which.startswith("lin"):
|
|
which = "linear"
|
|
if which in ["mean", "linear"]:
|
|
return super().predict(params, exog=exog, exposure=exposure,
|
|
offset=offset,
|
|
which=which, linear=linear)
|
|
# TODO: add full set of which
|
|
elif which == "var":
|
|
mu = self.predict(params, exog=exog,
|
|
exposure=exposure, offset=offset,
|
|
)
|
|
return mu
|
|
elif which == "prob":
|
|
if y_values is not None:
|
|
y_values = np.atleast_2d(y_values)
|
|
else:
|
|
y_values = np.atleast_2d(
|
|
np.arange(0, np.max(self.endog) + 1))
|
|
mu = self.predict(params, exog=exog,
|
|
exposure=exposure, offset=offset,
|
|
)[:, None]
|
|
# uses broadcasting
|
|
return stats.poisson._pmf(y_values, mu)
|
|
else:
|
|
raise ValueError('Value of the `which` option is not recognized')
|
|
|
|
def _prob_nonzero(self, mu, params=None):
|
|
"""Probability that count is not zero
|
|
|
|
internal use in Censored model, will be refactored or removed
|
|
"""
|
|
prob_nz = - np.expm1(-mu)
|
|
return prob_nz
|
|
|
|
def _var(self, mu, params=None):
|
|
"""variance implied by the distribution
|
|
|
|
internal use, will be refactored or removed
|
|
"""
|
|
return mu
|
|
|
|
def get_distribution(self, params, exog=None, exposure=None, offset=None):
|
|
"""Get frozen instance of distribution based on predicted parameters.
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model.
|
|
exog : ndarray, optional
|
|
Explanatory variables for the main count model.
|
|
If ``exog`` is None, then the data from the model will be used.
|
|
offset : ndarray, optional
|
|
Offset is added to the linear predictor of the mean function with
|
|
coefficient equal to 1.
|
|
Default is zero if exog is not None, and the model offset if exog
|
|
is None.
|
|
exposure : ndarray, optional
|
|
Log(exposure) is added to the linear predictor of the mean
|
|
function with coefficient equal to 1. If exposure is specified,
|
|
then it will be logged by the method. The user does not need to
|
|
log it first.
|
|
Default is one if exog is is not None, and it is the model exposure
|
|
if exog is None.
|
|
|
|
Returns
|
|
-------
|
|
Instance of frozen scipy distribution subclass.
|
|
"""
|
|
mu = self.predict(params, exog=exog, exposure=exposure, offset=offset)
|
|
distr = stats.poisson(mu)
|
|
return distr
|
|
|
|
|
|
class GeneralizedPoisson(CountModel):
|
|
__doc__ = """
|
|
Generalized Poisson Model
|
|
|
|
%(params)s
|
|
%(extra_params)s
|
|
|
|
Attributes
|
|
----------
|
|
endog : ndarray
|
|
A reference to the endogenous response variable
|
|
exog : ndarray
|
|
A reference to the exogenous design.
|
|
""" % {'params': base._model_params_doc,
|
|
'extra_params':
|
|
"""
|
|
p : scalar
|
|
P denotes parameterizations for GP regression. p=1 for GP-1 and
|
|
p=2 for GP-2. Default is p=1.
|
|
offset : array_like
|
|
Offset is added to the linear prediction with coefficient equal to 1.
|
|
exposure : array_like
|
|
Log(exposure) is added to the linear prediction with coefficient
|
|
equal to 1.""" + base._missing_param_doc + _check_rank_doc}
|
|
|
|
def __init__(self, endog, exog, p=1, offset=None,
|
|
exposure=None, missing='none', check_rank=True, **kwargs):
|
|
super().__init__(endog,
|
|
exog,
|
|
offset=offset,
|
|
exposure=exposure,
|
|
missing=missing,
|
|
check_rank=check_rank,
|
|
**kwargs)
|
|
self.parameterization = p - 1
|
|
self.exog_names.append('alpha')
|
|
self.k_extra = 1
|
|
self._transparams = False
|
|
|
|
def _get_init_kwds(self):
|
|
kwds = super()._get_init_kwds()
|
|
kwds['p'] = self.parameterization + 1
|
|
return kwds
|
|
|
|
def _get_exogs(self):
|
|
return (self.exog, None)
|
|
|
|
def loglike(self, params):
|
|
"""
|
|
Loglikelihood of Generalized Poisson model
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model.
|
|
|
|
Returns
|
|
-------
|
|
loglike : float
|
|
The log-likelihood function of the model evaluated at `params`.
|
|
See notes.
|
|
|
|
Notes
|
|
-----
|
|
.. math:: \\ln L=\\sum_{i=1}^{n}\\left[\\mu_{i}+(y_{i}-1)*ln(\\mu_{i}+
|
|
\\alpha*\\mu_{i}^{p-1}*y_{i})-y_{i}*ln(1+\\alpha*\\mu_{i}^{p-1})-
|
|
ln(y_{i}!)-\\frac{\\mu_{i}+\\alpha*\\mu_{i}^{p-1}*y_{i}}{1+\\alpha*
|
|
\\mu_{i}^{p-1}}\\right]
|
|
"""
|
|
return np.sum(self.loglikeobs(params))
|
|
|
|
def loglikeobs(self, params):
|
|
"""
|
|
Loglikelihood for observations of Generalized Poisson model
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model.
|
|
|
|
Returns
|
|
-------
|
|
loglike : ndarray
|
|
The log likelihood for each observation of the model evaluated
|
|
at `params`. See Notes
|
|
|
|
Notes
|
|
-----
|
|
.. math:: \\ln L=\\sum_{i=1}^{n}\\left[\\mu_{i}+(y_{i}-1)*ln(\\mu_{i}+
|
|
\\alpha*\\mu_{i}^{p-1}*y_{i})-y_{i}*ln(1+\\alpha*\\mu_{i}^{p-1})-
|
|
ln(y_{i}!)-\\frac{\\mu_{i}+\\alpha*\\mu_{i}^{p-1}*y_{i}}{1+\\alpha*
|
|
\\mu_{i}^{p-1}}\\right]
|
|
|
|
for observations :math:`i=1,...,n`
|
|
"""
|
|
if self._transparams:
|
|
alpha = np.exp(params[-1])
|
|
else:
|
|
alpha = params[-1]
|
|
params = params[:-1]
|
|
p = self.parameterization
|
|
endog = self.endog
|
|
mu = self.predict(params)
|
|
mu_p = np.power(mu, p)
|
|
a1 = 1 + alpha * mu_p
|
|
a2 = mu + (a1 - 1) * endog
|
|
a1 = np.maximum(1e-20, a1)
|
|
a2 = np.maximum(1e-20, a2)
|
|
return (np.log(mu) + (endog - 1) * np.log(a2) - endog *
|
|
np.log(a1) - gammaln(endog + 1) - a2 / a1)
|
|
|
|
@Appender(_get_start_params_null_docs)
|
|
def _get_start_params_null(self):
|
|
offset = getattr(self, "offset", 0)
|
|
exposure = getattr(self, "exposure", 0)
|
|
|
|
const = (self.endog / np.exp(offset + exposure)).mean()
|
|
params = [np.log(const)]
|
|
mu = const * np.exp(offset + exposure)
|
|
resid = self.endog - mu
|
|
a = self._estimate_dispersion(mu, resid, df_resid=resid.shape[0] - 1)
|
|
params.append(a)
|
|
|
|
return np.array(params)
|
|
|
|
def _estimate_dispersion(self, mu, resid, df_resid=None):
|
|
q = self.parameterization
|
|
if df_resid is None:
|
|
df_resid = resid.shape[0]
|
|
a = ((np.abs(resid) / np.sqrt(mu) - 1) * mu**(-q)).sum() / df_resid
|
|
return a
|
|
|
|
|
|
@Appender(
|
|
"""
|
|
use_transparams : bool
|
|
This parameter enable internal transformation to impose
|
|
non-negativity. True to enable. Default is False.
|
|
use_transparams=True imposes the no underdispersion (alpha > 0)
|
|
constraint. In case use_transparams=True and method="newton" or
|
|
"ncg" transformation is ignored.
|
|
""")
|
|
@Appender(DiscreteModel.fit.__doc__)
|
|
def fit(self, start_params=None, method='bfgs', maxiter=35,
|
|
full_output=1, disp=1, callback=None, use_transparams=False,
|
|
cov_type='nonrobust', cov_kwds=None, use_t=None, optim_kwds_prelim=None,
|
|
**kwargs):
|
|
if use_transparams and method not in ['newton', 'ncg']:
|
|
self._transparams = True
|
|
else:
|
|
if use_transparams:
|
|
warnings.warn('Parameter "use_transparams" is ignored',
|
|
RuntimeWarning)
|
|
self._transparams = False
|
|
|
|
if start_params is None:
|
|
offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0)
|
|
if np.size(offset) == 1 and offset == 0:
|
|
offset = None
|
|
kwds_prelim = {'disp': 0, 'skip_hessian': True,
|
|
'warn_convergence': False}
|
|
if optim_kwds_prelim is not None:
|
|
kwds_prelim.update(optim_kwds_prelim)
|
|
mod_poi = Poisson(self.endog, self.exog, offset=offset)
|
|
with warnings.catch_warnings():
|
|
warnings.simplefilter("always")
|
|
res_poi = mod_poi.fit(**kwds_prelim)
|
|
start_params = res_poi.params
|
|
a = self._estimate_dispersion(res_poi.predict(), res_poi.resid,
|
|
df_resid=res_poi.df_resid)
|
|
start_params = np.append(start_params, max(-0.1, a))
|
|
|
|
if callback is None:
|
|
# work around perfect separation callback #3895
|
|
callback = lambda *x: x
|
|
|
|
mlefit = super().fit(start_params=start_params,
|
|
maxiter=maxiter,
|
|
method=method,
|
|
disp=disp,
|
|
full_output=full_output,
|
|
callback=callback,
|
|
**kwargs)
|
|
if optim_kwds_prelim is not None:
|
|
mlefit.mle_settings["optim_kwds_prelim"] = optim_kwds_prelim
|
|
if use_transparams and method not in ["newton", "ncg"]:
|
|
self._transparams = False
|
|
mlefit._results.params[-1] = np.exp(mlefit._results.params[-1])
|
|
|
|
gpfit = GeneralizedPoissonResults(self, mlefit._results)
|
|
result = GeneralizedPoissonResultsWrapper(gpfit)
|
|
|
|
if cov_kwds is None:
|
|
cov_kwds = {}
|
|
|
|
result._get_robustcov_results(cov_type=cov_type,
|
|
use_self=True, use_t=use_t, **cov_kwds)
|
|
return result
|
|
|
|
@Appender(DiscreteModel.fit_regularized.__doc__)
|
|
def fit_regularized(self, start_params=None, method='l1',
|
|
maxiter='defined_by_method', full_output=1, disp=1, callback=None,
|
|
alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4,
|
|
qc_tol=0.03, **kwargs):
|
|
|
|
_validate_l1_method(method)
|
|
|
|
if np.size(alpha) == 1 and alpha != 0:
|
|
k_params = self.exog.shape[1] + self.k_extra
|
|
alpha = alpha * np.ones(k_params)
|
|
alpha[-1] = 0
|
|
|
|
alpha_p = alpha[:-1] if (self.k_extra and np.size(alpha) > 1) else alpha
|
|
self._transparams = False
|
|
if start_params is None:
|
|
offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0)
|
|
if np.size(offset) == 1 and offset == 0:
|
|
offset = None
|
|
mod_poi = Poisson(self.endog, self.exog, offset=offset)
|
|
with warnings.catch_warnings():
|
|
warnings.simplefilter("always")
|
|
start_params = mod_poi.fit_regularized(
|
|
start_params=start_params, method=method, maxiter=maxiter,
|
|
full_output=full_output, disp=0, callback=callback,
|
|
alpha=alpha_p, trim_mode=trim_mode,
|
|
auto_trim_tol=auto_trim_tol, size_trim_tol=size_trim_tol,
|
|
qc_tol=qc_tol, **kwargs).params
|
|
start_params = np.append(start_params, 0.1)
|
|
|
|
cntfit = super(CountModel, self).fit_regularized(
|
|
start_params=start_params, method=method, maxiter=maxiter,
|
|
full_output=full_output, disp=disp, callback=callback,
|
|
alpha=alpha, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol,
|
|
size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs)
|
|
|
|
discretefit = L1GeneralizedPoissonResults(self, cntfit)
|
|
return L1GeneralizedPoissonResultsWrapper(discretefit)
|
|
|
|
def score_obs(self, params):
|
|
if self._transparams:
|
|
alpha = np.exp(params[-1])
|
|
else:
|
|
alpha = params[-1]
|
|
|
|
params = params[:-1]
|
|
p = self.parameterization
|
|
exog = self.exog
|
|
y = self.endog[:,None]
|
|
mu = self.predict(params)[:,None]
|
|
mu_p = np.power(mu, p)
|
|
a1 = 1 + alpha * mu_p
|
|
a2 = mu + alpha * mu_p * y
|
|
a3 = alpha * p * mu ** (p - 1)
|
|
a4 = a3 * y
|
|
dmudb = mu * exog
|
|
|
|
dalpha = (mu_p * (y * ((y - 1) / a2 - 2 / a1) + a2 / a1**2))
|
|
dparams = dmudb * (-a4 / a1 +
|
|
a3 * a2 / (a1 ** 2) +
|
|
(1 + a4) * ((y - 1) / a2 - 1 / a1) +
|
|
1 / mu)
|
|
|
|
return np.concatenate((dparams, np.atleast_2d(dalpha)),
|
|
axis=1)
|
|
|
|
def score(self, params):
|
|
score = np.sum(self.score_obs(params), axis=0)
|
|
if self._transparams:
|
|
score[-1] == score[-1] ** 2
|
|
return score
|
|
else:
|
|
return score
|
|
|
|
def score_factor(self, params, endog=None):
|
|
params = np.asarray(params)
|
|
if self._transparams:
|
|
alpha = np.exp(params[-1])
|
|
else:
|
|
alpha = params[-1]
|
|
|
|
params = params[:-1]
|
|
p = self.parameterization
|
|
y = self.endog if endog is None else endog
|
|
|
|
mu = self.predict(params)
|
|
mu_p = np.power(mu, p)
|
|
a1 = 1 + alpha * mu_p
|
|
a2 = mu + alpha * mu_p * y
|
|
a3 = alpha * p * mu ** (p - 1)
|
|
a4 = a3 * y
|
|
dmudb = mu
|
|
|
|
dalpha = (mu_p * (y * ((y - 1) / a2 - 2 / a1) + a2 / a1**2))
|
|
dparams = dmudb * (-a4 / a1 +
|
|
a3 * a2 / (a1 ** 2) +
|
|
(1 + a4) * ((y - 1) / a2 - 1 / a1) +
|
|
1 / mu)
|
|
|
|
return dparams, dalpha
|
|
|
|
def _score_p(self, params):
|
|
"""
|
|
Generalized Poisson model derivative of the log-likelihood by p-parameter
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model
|
|
|
|
Returns
|
|
-------
|
|
dldp : float
|
|
dldp is first derivative of the loglikelihood function,
|
|
evaluated at `p-parameter`.
|
|
"""
|
|
if self._transparams:
|
|
alpha = np.exp(params[-1])
|
|
else:
|
|
alpha = params[-1]
|
|
params = params[:-1]
|
|
p = self.parameterization
|
|
y = self.endog[:,None]
|
|
mu = self.predict(params)[:,None]
|
|
mu_p = np.power(mu, p)
|
|
a1 = 1 + alpha * mu_p
|
|
a2 = mu + alpha * mu_p * y
|
|
|
|
dp = np.sum(np.log(mu) * ((a2 - mu) * ((y - 1) / a2 - 2 / a1) +
|
|
(a1 - 1) * a2 / a1 ** 2))
|
|
return dp
|
|
|
|
def hessian(self, params):
|
|
"""
|
|
Generalized Poisson model Hessian matrix of the loglikelihood
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model
|
|
|
|
Returns
|
|
-------
|
|
hess : ndarray, (k_vars, k_vars)
|
|
The Hessian, second derivative of loglikelihood function,
|
|
evaluated at `params`
|
|
"""
|
|
if self._transparams:
|
|
alpha = np.exp(params[-1])
|
|
else:
|
|
alpha = params[-1]
|
|
|
|
params = params[:-1]
|
|
p = self.parameterization
|
|
exog = self.exog
|
|
y = self.endog[:,None]
|
|
mu = self.predict(params)[:,None]
|
|
mu_p = np.power(mu, p)
|
|
a1 = 1 + alpha * mu_p
|
|
a2 = mu + alpha * mu_p * y
|
|
a3 = alpha * p * mu ** (p - 1)
|
|
a4 = a3 * y
|
|
a5 = p * mu ** (p - 1)
|
|
dmudb = mu * exog
|
|
|
|
# for dl/dparams dparams
|
|
dim = exog.shape[1]
|
|
hess_arr = np.empty((dim+1,dim+1))
|
|
|
|
for i in range(dim):
|
|
for j in range(i + 1):
|
|
hess_val = np.sum(mu * exog[:,i,None] * exog[:,j,None] *
|
|
(mu * (a3 * a4 / a1**2 -
|
|
2 * a3**2 * a2 / a1**3 +
|
|
2 * a3 * (a4 + 1) / a1**2 -
|
|
a4 * p / (mu * a1) +
|
|
a3 * p * a2 / (mu * a1**2) +
|
|
(y - 1) * a4 * (p - 1) / (a2 * mu) -
|
|
(y - 1) * (1 + a4)**2 / a2**2 -
|
|
a4 * (p - 1) / (a1 * mu)) +
|
|
((y - 1) * (1 + a4) / a2 -
|
|
(1 + a4) / a1)), axis=0)
|
|
hess_arr[i, j] = np.squeeze(hess_val)
|
|
tri_idx = np.triu_indices(dim, k=1)
|
|
hess_arr[tri_idx] = hess_arr.T[tri_idx]
|
|
|
|
# for dl/dparams dalpha
|
|
dldpda = np.sum((2 * a4 * mu_p / a1**2 -
|
|
2 * a3 * mu_p * a2 / a1**3 -
|
|
mu_p * y * (y - 1) * (1 + a4) / a2**2 +
|
|
mu_p * (1 + a4) / a1**2 +
|
|
a5 * y * (y - 1) / a2 -
|
|
2 * a5 * y / a1 +
|
|
a5 * a2 / a1**2) * dmudb,
|
|
axis=0)
|
|
|
|
hess_arr[-1,:-1] = dldpda
|
|
hess_arr[:-1,-1] = dldpda
|
|
|
|
# for dl/dalpha dalpha
|
|
dldada = mu_p**2 * (3 * y / a1**2 -
|
|
(y / a2)**2. * (y - 1) -
|
|
2 * a2 / a1**3)
|
|
|
|
hess_arr[-1,-1] = dldada.sum()
|
|
|
|
return hess_arr
|
|
|
|
def hessian_factor(self, params):
|
|
"""
|
|
Generalized Poisson model Hessian matrix of the loglikelihood
|
|
|
|
Parameters
|
|
----------
|
|
params : array-like
|
|
The parameters of the model
|
|
|
|
Returns
|
|
-------
|
|
hess : ndarray, (nobs, 3)
|
|
The Hessian factor, second derivative of loglikelihood function
|
|
with respect to linear predictor and dispersion parameter
|
|
evaluated at `params`
|
|
The first column contains the second derivative w.r.t. linpred,
|
|
the second column contains the cross derivative, and the
|
|
third column contains the second derivative w.r.t. the dispersion
|
|
parameter.
|
|
|
|
"""
|
|
params = np.asarray(params)
|
|
if self._transparams:
|
|
alpha = np.exp(params[-1])
|
|
else:
|
|
alpha = params[-1]
|
|
|
|
params = params[:-1]
|
|
p = self.parameterization
|
|
y = self.endog
|
|
mu = self.predict(params)
|
|
mu_p = np.power(mu, p)
|
|
a1 = 1 + alpha * mu_p
|
|
a2 = mu + alpha * mu_p * y
|
|
a3 = alpha * p * mu ** (p - 1)
|
|
a4 = a3 * y
|
|
a5 = p * mu ** (p - 1)
|
|
dmudb = mu
|
|
|
|
dbb = mu * (
|
|
mu * (a3 * a4 / a1**2 -
|
|
2 * a3**2 * a2 / a1**3 +
|
|
2 * a3 * (a4 + 1) / a1**2 -
|
|
a4 * p / (mu * a1) +
|
|
a3 * p * a2 / (mu * a1**2) +
|
|
a4 / (mu * a1) -
|
|
a3 * a2 / (mu * a1**2) +
|
|
(y - 1) * a4 * (p - 1) / (a2 * mu) -
|
|
(y - 1) * (1 + a4)**2 / a2**2 -
|
|
a4 * (p - 1) / (a1 * mu) -
|
|
1 / mu**2) +
|
|
(-a4 / a1 +
|
|
a3 * a2 / a1**2 +
|
|
(y - 1) * (1 + a4) / a2 -
|
|
(1 + a4) / a1 +
|
|
1 / mu))
|
|
|
|
# for dl/dlinpred dalpha
|
|
dba = ((2 * a4 * mu_p / a1**2 -
|
|
2 * a3 * mu_p * a2 / a1**3 -
|
|
mu_p * y * (y - 1) * (1 + a4) / a2**2 +
|
|
mu_p * (1 + a4) / a1**2 +
|
|
a5 * y * (y - 1) / a2 -
|
|
2 * a5 * y / a1 +
|
|
a5 * a2 / a1**2) * dmudb)
|
|
|
|
# for dl/dalpha dalpha
|
|
daa = mu_p**2 * (3 * y / a1**2 -
|
|
(y / a2)**2. * (y - 1) -
|
|
2 * a2 / a1**3)
|
|
|
|
return dbb, dba, daa
|
|
|
|
@Appender(Poisson.predict.__doc__)
|
|
def predict(self, params, exog=None, exposure=None, offset=None,
|
|
which='mean', y_values=None):
|
|
|
|
if exog is None:
|
|
exog = self.exog
|
|
|
|
if exposure is None:
|
|
exposure = getattr(self, 'exposure', 0)
|
|
elif exposure != 0:
|
|
exposure = np.log(exposure)
|
|
|
|
if offset is None:
|
|
offset = getattr(self, 'offset', 0)
|
|
|
|
fitted = np.dot(exog, params[:exog.shape[1]])
|
|
linpred = fitted + exposure + offset
|
|
|
|
if which == 'mean':
|
|
return np.exp(linpred)
|
|
elif which == 'linear':
|
|
return linpred
|
|
elif which == 'var':
|
|
mean = np.exp(linpred)
|
|
alpha = params[-1]
|
|
pm1 = self.parameterization # `p - 1` in GPP
|
|
var_ = mean * (1 + alpha * mean**pm1)**2
|
|
return var_
|
|
elif which == 'prob':
|
|
if y_values is None:
|
|
y_values = np.atleast_2d(np.arange(0, np.max(self.endog)+1))
|
|
mu = self.predict(params, exog=exog, exposure=exposure,
|
|
offset=offset)[:, None]
|
|
return genpoisson_p.pmf(y_values, mu, params[-1],
|
|
self.parameterization + 1)
|
|
else:
|
|
raise ValueError('keyword \'which\' not recognized')
|
|
|
|
def _deriv_score_obs_dendog(self, params):
|
|
"""derivative of score_obs w.r.t. endog
|
|
|
|
Parameters
|
|
----------
|
|
params : ndarray
|
|
parameter at which score is evaluated
|
|
|
|
Returns
|
|
-------
|
|
derivative : ndarray_2d
|
|
The derivative of the score_obs with respect to endog.
|
|
"""
|
|
# code duplication with NegativeBinomialP
|
|
from statsmodels.tools.numdiff import _approx_fprime_cs_scalar
|
|
|
|
def f(y):
|
|
if y.ndim == 2 and y.shape[1] == 1:
|
|
y = y[:, 0]
|
|
sf = self.score_factor(params, endog=y)
|
|
return np.column_stack(sf)
|
|
|
|
dsf = _approx_fprime_cs_scalar(self.endog[:, None], f)
|
|
# deriv is 2d vector
|
|
d1 = dsf[:, :1] * self.exog
|
|
d2 = dsf[:, 1:2]
|
|
|
|
return np.column_stack((d1, d2))
|
|
|
|
def _var(self, mu, params=None):
|
|
"""variance implied by the distribution
|
|
|
|
internal use, will be refactored or removed
|
|
"""
|
|
alpha = params[-1]
|
|
pm1 = self.parameterization # `p-1` in GPP
|
|
var_ = mu * (1 + alpha * mu**pm1)**2
|
|
return var_
|
|
|
|
def _prob_nonzero(self, mu, params):
|
|
"""Probability that count is not zero
|
|
|
|
internal use in Censored model, will be refactored or removed
|
|
"""
|
|
alpha = params[-1]
|
|
pm1 = self.parameterization # p-1 in GPP
|
|
prob_zero = np.exp(- mu / (1 + alpha * mu**pm1))
|
|
prob_nz = 1 - prob_zero
|
|
return prob_nz
|
|
|
|
@Appender(Poisson.get_distribution.__doc__)
|
|
def get_distribution(self, params, exog=None, exposure=None, offset=None):
|
|
"""get frozen instance of distribution
|
|
"""
|
|
mu = self.predict(params, exog=exog, exposure=exposure, offset=offset)
|
|
p = self.parameterization + 1
|
|
# distr = genpoisson_p(mu[:, None], params[-1], p)
|
|
distr = genpoisson_p(mu, params[-1], p)
|
|
return distr
|
|
|
|
|
|
class Logit(BinaryModel):
|
|
__doc__ = """
|
|
Logit Model
|
|
|
|
{params}
|
|
offset : array_like
|
|
Offset is added to the linear prediction with coefficient equal to 1.
|
|
{extra_params}
|
|
|
|
Attributes
|
|
----------
|
|
endog : ndarray
|
|
A reference to the endogenous response variable
|
|
exog : ndarray
|
|
A reference to the exogenous design.
|
|
""".format(params=base._model_params_doc,
|
|
extra_params=base._missing_param_doc + _check_rank_doc)
|
|
|
|
_continuous_ok = True
|
|
|
|
@cache_readonly
|
|
def link(self):
|
|
from statsmodels.genmod.families import links
|
|
link = links.Logit()
|
|
return link
|
|
|
|
def cdf(self, X):
|
|
"""
|
|
The logistic cumulative distribution function
|
|
|
|
Parameters
|
|
----------
|
|
X : array_like
|
|
`X` is the linear predictor of the logit model. See notes.
|
|
|
|
Returns
|
|
-------
|
|
1/(1 + exp(-X))
|
|
|
|
Notes
|
|
-----
|
|
In the logit model,
|
|
|
|
.. math:: \\Lambda\\left(x^{\\prime}\\beta\\right)=
|
|
\\text{Prob}\\left(Y=1|x\\right)=
|
|
\\frac{e^{x^{\\prime}\\beta}}{1+e^{x^{\\prime}\\beta}}
|
|
"""
|
|
X = np.asarray(X)
|
|
return 1/(1+np.exp(-X))
|
|
|
|
def pdf(self, X):
|
|
"""
|
|
The logistic probability density function
|
|
|
|
Parameters
|
|
----------
|
|
X : array_like
|
|
`X` is the linear predictor of the logit model. See notes.
|
|
|
|
Returns
|
|
-------
|
|
pdf : ndarray
|
|
The value of the Logit probability mass function, PMF, for each
|
|
point of X. ``np.exp(-x)/(1+np.exp(-X))**2``
|
|
|
|
Notes
|
|
-----
|
|
In the logit model,
|
|
|
|
.. math:: \\lambda\\left(x^{\\prime}\\beta\\right)=\\frac{e^{-x^{\\prime}\\beta}}{\\left(1+e^{-x^{\\prime}\\beta}\\right)^{2}}
|
|
"""
|
|
X = np.asarray(X)
|
|
return np.exp(-X)/(1+np.exp(-X))**2
|
|
|
|
@cache_readonly
|
|
def family(self):
|
|
from statsmodels.genmod import families
|
|
return families.Binomial()
|
|
|
|
def loglike(self, params):
|
|
"""
|
|
Log-likelihood of logit model.
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the logit model.
|
|
|
|
Returns
|
|
-------
|
|
loglike : float
|
|
The log-likelihood function of the model evaluated at `params`.
|
|
See notes.
|
|
|
|
Notes
|
|
-----
|
|
.. math::
|
|
|
|
\\ln L=\\sum_{i}\\ln\\Lambda
|
|
\\left(q_{i}x_{i}^{\\prime}\\beta\\right)
|
|
|
|
Where :math:`q=2y-1`. This simplification comes from the fact that the
|
|
logistic distribution is symmetric.
|
|
"""
|
|
q = 2*self.endog - 1
|
|
linpred = self.predict(params, which="linear")
|
|
return np.sum(np.log(self.cdf(q * linpred)))
|
|
|
|
def loglikeobs(self, params):
|
|
"""
|
|
Log-likelihood of logit model for each observation.
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the logit model.
|
|
|
|
Returns
|
|
-------
|
|
loglike : ndarray
|
|
The log likelihood for each observation of the model evaluated
|
|
at `params`. See Notes
|
|
|
|
Notes
|
|
-----
|
|
.. math::
|
|
|
|
\\ln L=\\sum_{i}\\ln\\Lambda
|
|
\\left(q_{i}x_{i}^{\\prime}\\beta\\right)
|
|
|
|
for observations :math:`i=1,...,n`
|
|
|
|
where :math:`q=2y-1`. This simplification comes from the fact that the
|
|
logistic distribution is symmetric.
|
|
"""
|
|
q = 2*self.endog - 1
|
|
linpred = self.predict(params, which="linear")
|
|
return np.log(self.cdf(q * linpred))
|
|
|
|
def score(self, params):
|
|
"""
|
|
Logit model score (gradient) vector of the log-likelihood
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model
|
|
|
|
Returns
|
|
-------
|
|
score : ndarray, 1-D
|
|
The score vector of the model, i.e. the first derivative of the
|
|
loglikelihood function, evaluated at `params`
|
|
|
|
Notes
|
|
-----
|
|
.. math:: \\frac{\\partial\\ln L}{\\partial\\beta}=\\sum_{i=1}^{n}\\left(y_{i}-\\Lambda_{i}\\right)x_{i}
|
|
"""
|
|
|
|
y = self.endog
|
|
X = self.exog
|
|
fitted = self.predict(params)
|
|
return np.dot(y - fitted, X)
|
|
|
|
def score_obs(self, params):
|
|
"""
|
|
Logit model Jacobian of the log-likelihood for each observation
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model
|
|
|
|
Returns
|
|
-------
|
|
jac : array_like
|
|
The derivative of the loglikelihood for each observation evaluated
|
|
at `params`.
|
|
|
|
Notes
|
|
-----
|
|
.. math:: \\frac{\\partial\\ln L_{i}}{\\partial\\beta}=\\left(y_{i}-\\Lambda_{i}\\right)x_{i}
|
|
|
|
for observations :math:`i=1,...,n`
|
|
"""
|
|
|
|
y = self.endog
|
|
X = self.exog
|
|
fitted = self.predict(params)
|
|
return (y - fitted)[:,None] * X
|
|
|
|
def score_factor(self, params):
|
|
"""
|
|
Logit model derivative of the log-likelihood with respect to linpred.
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model
|
|
|
|
Returns
|
|
-------
|
|
score_factor : array_like
|
|
The derivative of the loglikelihood for each observation evaluated
|
|
at `params`.
|
|
|
|
Notes
|
|
-----
|
|
.. math:: \\frac{\\partial\\ln L_{i}}{\\partial\\beta}=\\left(y_{i}-\\lambda_{i}\\right)
|
|
|
|
for observations :math:`i=1,...,n`
|
|
|
|
where the loglinear model is assumed
|
|
|
|
.. math:: \\ln\\lambda_{i}=x_{i}\\beta
|
|
"""
|
|
y = self.endog
|
|
fitted = self.predict(params)
|
|
return (y - fitted)
|
|
|
|
def hessian(self, params):
|
|
"""
|
|
Logit model Hessian matrix of the log-likelihood
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model
|
|
|
|
Returns
|
|
-------
|
|
hess : ndarray, (k_vars, k_vars)
|
|
The Hessian, second derivative of loglikelihood function,
|
|
evaluated at `params`
|
|
|
|
Notes
|
|
-----
|
|
.. math:: \\frac{\\partial^{2}\\ln L}{\\partial\\beta\\partial\\beta^{\\prime}}=-\\sum_{i}\\Lambda_{i}\\left(1-\\Lambda_{i}\\right)x_{i}x_{i}^{\\prime}
|
|
"""
|
|
X = self.exog
|
|
L = self.predict(params)
|
|
return -np.dot(L*(1-L)*X.T,X)
|
|
|
|
def hessian_factor(self, params):
|
|
"""
|
|
Logit model Hessian factor
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model
|
|
|
|
Returns
|
|
-------
|
|
hess : ndarray, (nobs,)
|
|
The Hessian factor, second derivative of loglikelihood function
|
|
with respect to the linear predictor evaluated at `params`
|
|
"""
|
|
L = self.predict(params)
|
|
return -L * (1 - L)
|
|
|
|
@Appender(DiscreteModel.fit.__doc__)
|
|
def fit(self, start_params=None, method='newton', maxiter=35,
|
|
full_output=1, disp=1, callback=None, **kwargs):
|
|
bnryfit = super().fit(start_params=start_params,
|
|
method=method,
|
|
maxiter=maxiter,
|
|
full_output=full_output,
|
|
disp=disp,
|
|
callback=callback,
|
|
**kwargs)
|
|
|
|
discretefit = LogitResults(self, bnryfit)
|
|
return BinaryResultsWrapper(discretefit)
|
|
|
|
def _deriv_score_obs_dendog(self, params):
|
|
"""derivative of score_obs w.r.t. endog
|
|
|
|
Parameters
|
|
----------
|
|
params : ndarray
|
|
parameter at which score is evaluated
|
|
|
|
Returns
|
|
-------
|
|
derivative : ndarray_2d
|
|
The derivative of the score_obs with respect to endog. This
|
|
can is given by `score_factor0[:, None] * exog` where
|
|
`score_factor0` is the score_factor without the residual.
|
|
"""
|
|
return self.exog
|
|
|
|
|
|
class Probit(BinaryModel):
|
|
__doc__ = """
|
|
Probit Model
|
|
|
|
{params}
|
|
offset : array_like
|
|
Offset is added to the linear prediction with coefficient equal to 1.
|
|
{extra_params}
|
|
|
|
Attributes
|
|
----------
|
|
endog : ndarray
|
|
A reference to the endogenous response variable
|
|
exog : ndarray
|
|
A reference to the exogenous design.
|
|
""".format(params=base._model_params_doc,
|
|
extra_params=base._missing_param_doc + _check_rank_doc)
|
|
|
|
@cache_readonly
|
|
def link(self):
|
|
from statsmodels.genmod.families import links
|
|
link = links.Probit()
|
|
return link
|
|
|
|
def cdf(self, X):
|
|
"""
|
|
Probit (Normal) cumulative distribution function
|
|
|
|
Parameters
|
|
----------
|
|
X : array_like
|
|
The linear predictor of the model (XB).
|
|
|
|
Returns
|
|
-------
|
|
cdf : ndarray
|
|
The cdf evaluated at `X`.
|
|
|
|
Notes
|
|
-----
|
|
This function is just an alias for scipy.stats.norm.cdf
|
|
"""
|
|
return stats.norm._cdf(X)
|
|
|
|
def pdf(self, X):
|
|
"""
|
|
Probit (Normal) probability density function
|
|
|
|
Parameters
|
|
----------
|
|
X : array_like
|
|
The linear predictor of the model (XB).
|
|
|
|
Returns
|
|
-------
|
|
pdf : ndarray
|
|
The value of the normal density function for each point of X.
|
|
|
|
Notes
|
|
-----
|
|
This function is just an alias for scipy.stats.norm.pdf
|
|
"""
|
|
X = np.asarray(X)
|
|
return stats.norm._pdf(X)
|
|
|
|
|
|
def loglike(self, params):
|
|
"""
|
|
Log-likelihood of probit model (i.e., the normal distribution).
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model.
|
|
|
|
Returns
|
|
-------
|
|
loglike : float
|
|
The log-likelihood function of the model evaluated at `params`.
|
|
See notes.
|
|
|
|
Notes
|
|
-----
|
|
.. math:: \\ln L=\\sum_{i}\\ln\\Phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right)
|
|
|
|
Where :math:`q=2y-1`. This simplification comes from the fact that the
|
|
normal distribution is symmetric.
|
|
"""
|
|
|
|
q = 2*self.endog - 1
|
|
linpred = self.predict(params, which="linear")
|
|
return np.sum(np.log(np.clip(self.cdf(q * linpred), FLOAT_EPS, 1)))
|
|
|
|
def loglikeobs(self, params):
|
|
"""
|
|
Log-likelihood of probit model for each observation
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model.
|
|
|
|
Returns
|
|
-------
|
|
loglike : array_like
|
|
The log likelihood for each observation of the model evaluated
|
|
at `params`. See Notes
|
|
|
|
Notes
|
|
-----
|
|
.. math:: \\ln L_{i}=\\ln\\Phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right)
|
|
|
|
for observations :math:`i=1,...,n`
|
|
|
|
where :math:`q=2y-1`. This simplification comes from the fact that the
|
|
normal distribution is symmetric.
|
|
"""
|
|
|
|
q = 2*self.endog - 1
|
|
linpred = self.predict(params, which="linear")
|
|
return np.log(np.clip(self.cdf(q*linpred), FLOAT_EPS, 1))
|
|
|
|
|
|
def score(self, params):
|
|
"""
|
|
Probit model score (gradient) vector
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model
|
|
|
|
Returns
|
|
-------
|
|
score : ndarray, 1-D
|
|
The score vector of the model, i.e. the first derivative of the
|
|
loglikelihood function, evaluated at `params`
|
|
|
|
Notes
|
|
-----
|
|
.. math:: \\frac{\\partial\\ln L}{\\partial\\beta}=\\sum_{i=1}^{n}\\left[\\frac{q_{i}\\phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right)}{\\Phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right)}\\right]x_{i}
|
|
|
|
Where :math:`q=2y-1`. This simplification comes from the fact that the
|
|
normal distribution is symmetric.
|
|
"""
|
|
y = self.endog
|
|
X = self.exog
|
|
XB = self.predict(params, which="linear")
|
|
q = 2*y - 1
|
|
# clip to get rid of invalid divide complaint
|
|
L = q*self.pdf(q*XB)/np.clip(self.cdf(q*XB), FLOAT_EPS, 1 - FLOAT_EPS)
|
|
return np.dot(L,X)
|
|
|
|
def score_obs(self, params):
|
|
"""
|
|
Probit model Jacobian for each observation
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model
|
|
|
|
Returns
|
|
-------
|
|
jac : array_like
|
|
The derivative of the loglikelihood for each observation evaluated
|
|
at `params`.
|
|
|
|
Notes
|
|
-----
|
|
.. math:: \\frac{\\partial\\ln L_{i}}{\\partial\\beta}=\\left[\\frac{q_{i}\\phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right)}{\\Phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right)}\\right]x_{i}
|
|
|
|
for observations :math:`i=1,...,n`
|
|
|
|
Where :math:`q=2y-1`. This simplification comes from the fact that the
|
|
normal distribution is symmetric.
|
|
"""
|
|
y = self.endog
|
|
X = self.exog
|
|
XB = self.predict(params, which="linear")
|
|
q = 2*y - 1
|
|
# clip to get rid of invalid divide complaint
|
|
L = q*self.pdf(q*XB)/np.clip(self.cdf(q*XB), FLOAT_EPS, 1 - FLOAT_EPS)
|
|
return L[:,None] * X
|
|
|
|
def score_factor(self, params):
|
|
"""
|
|
Probit model Jacobian for each observation
|
|
|
|
Parameters
|
|
----------
|
|
params : array-like
|
|
The parameters of the model
|
|
|
|
Returns
|
|
-------
|
|
score_factor : array_like (nobs,)
|
|
The derivative of the loglikelihood function for each observation
|
|
with respect to linear predictor evaluated at `params`
|
|
|
|
Notes
|
|
-----
|
|
.. math:: \\frac{\\partial\\ln L_{i}}{\\partial\\beta}=\\left[\\frac{q_{i}\\phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right)}{\\Phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right)}\\right]x_{i}
|
|
|
|
for observations :math:`i=1,...,n`
|
|
|
|
Where :math:`q=2y-1`. This simplification comes from the fact that the
|
|
normal distribution is symmetric.
|
|
"""
|
|
y = self.endog
|
|
XB = self.predict(params, which="linear")
|
|
q = 2*y - 1
|
|
# clip to get rid of invalid divide complaint
|
|
L = q*self.pdf(q*XB)/np.clip(self.cdf(q*XB), FLOAT_EPS, 1 - FLOAT_EPS)
|
|
return L
|
|
|
|
|
|
def hessian(self, params):
|
|
"""
|
|
Probit model Hessian matrix of the log-likelihood
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model
|
|
|
|
Returns
|
|
-------
|
|
hess : ndarray, (k_vars, k_vars)
|
|
The Hessian, second derivative of loglikelihood function,
|
|
evaluated at `params`
|
|
|
|
Notes
|
|
-----
|
|
.. math:: \\frac{\\partial^{2}\\ln L}{\\partial\\beta\\partial\\beta^{\\prime}}=-\\lambda_{i}\\left(\\lambda_{i}+x_{i}^{\\prime}\\beta\\right)x_{i}x_{i}^{\\prime}
|
|
|
|
where
|
|
|
|
.. math:: \\lambda_{i}=\\frac{q_{i}\\phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right)}{\\Phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right)}
|
|
|
|
and :math:`q=2y-1`
|
|
"""
|
|
X = self.exog
|
|
XB = self.predict(params, which="linear")
|
|
q = 2*self.endog - 1
|
|
L = q*self.pdf(q*XB)/self.cdf(q*XB)
|
|
return np.dot(-L*(L+XB)*X.T,X)
|
|
|
|
def hessian_factor(self, params):
|
|
"""
|
|
Probit model Hessian factor of the log-likelihood
|
|
|
|
Parameters
|
|
----------
|
|
params : array-like
|
|
The parameters of the model
|
|
|
|
Returns
|
|
-------
|
|
hess : ndarray, (nobs,)
|
|
The Hessian factor, second derivative of loglikelihood function
|
|
with respect to linear predictor evaluated at `params`
|
|
|
|
Notes
|
|
-----
|
|
.. math:: \\frac{\\partial^{2}\\ln L}{\\partial\\beta\\partial\\beta^{\\prime}}=-\\lambda_{i}\\left(\\lambda_{i}+x_{i}^{\\prime}\\beta\\right)x_{i}x_{i}^{\\prime}
|
|
|
|
where
|
|
|
|
.. math:: \\lambda_{i}=\\frac{q_{i}\\phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right)}{\\Phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right)}
|
|
|
|
and :math:`q=2y-1`
|
|
"""
|
|
XB = self.predict(params, which="linear")
|
|
q = 2 * self.endog - 1
|
|
L = q * self.pdf(q * XB) / self.cdf(q * XB)
|
|
return -L * (L + XB)
|
|
|
|
@Appender(DiscreteModel.fit.__doc__)
|
|
def fit(self, start_params=None, method='newton', maxiter=35,
|
|
full_output=1, disp=1, callback=None, **kwargs):
|
|
bnryfit = super().fit(start_params=start_params,
|
|
method=method,
|
|
maxiter=maxiter,
|
|
full_output=full_output,
|
|
disp=disp,
|
|
callback=callback,
|
|
**kwargs)
|
|
discretefit = ProbitResults(self, bnryfit)
|
|
return BinaryResultsWrapper(discretefit)
|
|
|
|
def _deriv_score_obs_dendog(self, params):
|
|
"""derivative of score_obs w.r.t. endog
|
|
|
|
Parameters
|
|
----------
|
|
params : ndarray
|
|
parameter at which score is evaluated
|
|
|
|
Returns
|
|
-------
|
|
derivative : ndarray_2d
|
|
The derivative of the score_obs with respect to endog. This
|
|
can is given by `score_factor0[:, None] * exog` where
|
|
`score_factor0` is the score_factor without the residual.
|
|
"""
|
|
|
|
linpred = self.predict(params, which="linear")
|
|
|
|
pdf_ = self.pdf(linpred)
|
|
# clip to get rid of invalid divide complaint
|
|
cdf_ = np.clip(self.cdf(linpred), FLOAT_EPS, 1 - FLOAT_EPS)
|
|
deriv = pdf_ / cdf_ / (1 - cdf_) # deriv factor
|
|
return deriv[:, None] * self.exog
|
|
|
|
|
|
class MNLogit(MultinomialModel):
|
|
__doc__ = """
|
|
Multinomial Logit Model
|
|
|
|
Parameters
|
|
----------
|
|
endog : array_like
|
|
`endog` is an 1-d vector of the endogenous response. `endog` can
|
|
contain strings, ints, or floats or may be a pandas Categorical Series.
|
|
Note that if it contains strings, every distinct string will be a
|
|
category. No stripping of whitespace is done.
|
|
exog : array_like
|
|
A nobs x k array where `nobs` is the number of observations and `k`
|
|
is the number of regressors. An intercept is not included by default
|
|
and should be added by the user. See `statsmodels.tools.add_constant`.
|
|
{extra_params}
|
|
|
|
Attributes
|
|
----------
|
|
endog : ndarray
|
|
A reference to the endogenous response variable
|
|
exog : ndarray
|
|
A reference to the exogenous design.
|
|
J : float
|
|
The number of choices for the endogenous variable. Note that this
|
|
is zero-indexed.
|
|
K : float
|
|
The actual number of parameters for the exogenous design. Includes
|
|
the constant if the design has one.
|
|
names : dict
|
|
A dictionary mapping the column number in `wendog` to the variables
|
|
in `endog`.
|
|
wendog : ndarray
|
|
An n x j array where j is the number of unique categories in `endog`.
|
|
Each column of j is a dummy variable indicating the category of
|
|
each observation. See `names` for a dictionary mapping each column to
|
|
its category.
|
|
|
|
Notes
|
|
-----
|
|
See developer notes for further information on `MNLogit` internals.
|
|
""".format(extra_params=base._missing_param_doc + _check_rank_doc)
|
|
|
|
def __init__(self, endog, exog, check_rank=True, **kwargs):
|
|
super().__init__(endog, exog, check_rank=check_rank, **kwargs)
|
|
|
|
# Override cov_names since multivariate model
|
|
yname = self.endog_names
|
|
ynames = self._ynames_map
|
|
ynames = MultinomialResults._maybe_convert_ynames_int(ynames)
|
|
# use range below to ensure sortedness
|
|
ynames = [ynames[key] for key in range(int(self.J))]
|
|
idx = MultiIndex.from_product((ynames[1:], self.data.xnames),
|
|
names=(yname, None))
|
|
self.data.cov_names = idx
|
|
|
|
def pdf(self, eXB):
|
|
"""
|
|
NotImplemented
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
def cdf(self, X):
|
|
"""
|
|
Multinomial logit cumulative distribution function.
|
|
|
|
Parameters
|
|
----------
|
|
X : ndarray
|
|
The linear predictor of the model XB.
|
|
|
|
Returns
|
|
-------
|
|
cdf : ndarray
|
|
The cdf evaluated at `X`.
|
|
|
|
Notes
|
|
-----
|
|
In the multinomial logit model.
|
|
.. math:: \\frac{\\exp\\left(\\beta_{j}^{\\prime}x_{i}\\right)}{\\sum_{k=0}^{J}\\exp\\left(\\beta_{k}^{\\prime}x_{i}\\right)}
|
|
"""
|
|
eXB = np.column_stack((np.ones(len(X)), np.exp(X)))
|
|
return eXB/eXB.sum(1)[:,None]
|
|
|
|
def loglike(self, params):
|
|
"""
|
|
Log-likelihood of the multinomial logit model.
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the multinomial logit model.
|
|
|
|
Returns
|
|
-------
|
|
loglike : float
|
|
The log-likelihood function of the model evaluated at `params`.
|
|
See notes.
|
|
|
|
Notes
|
|
-----
|
|
.. math::
|
|
|
|
\\ln L=\\sum_{i=1}^{n}\\sum_{j=0}^{J}d_{ij}\\ln
|
|
\\left(\\frac{\\exp\\left(\\beta_{j}^{\\prime}x_{i}\\right)}
|
|
{\\sum_{k=0}^{J}
|
|
\\exp\\left(\\beta_{k}^{\\prime}x_{i}\\right)}\\right)
|
|
|
|
where :math:`d_{ij}=1` if individual `i` chose alternative `j` and 0
|
|
if not.
|
|
"""
|
|
params = params.reshape(self.K, -1, order='F')
|
|
d = self.wendog
|
|
logprob = np.log(self.cdf(np.dot(self.exog,params)))
|
|
return np.sum(d * logprob)
|
|
|
|
def loglikeobs(self, params):
|
|
"""
|
|
Log-likelihood of the multinomial logit model for each observation.
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the multinomial logit model.
|
|
|
|
Returns
|
|
-------
|
|
loglike : array_like
|
|
The log likelihood for each observation of the model evaluated
|
|
at `params`. See Notes
|
|
|
|
Notes
|
|
-----
|
|
.. math::
|
|
|
|
\\ln L_{i}=\\sum_{j=0}^{J}d_{ij}\\ln
|
|
\\left(\\frac{\\exp\\left(\\beta_{j}^{\\prime}x_{i}\\right)}
|
|
{\\sum_{k=0}^{J}
|
|
\\exp\\left(\\beta_{k}^{\\prime}x_{i}\\right)}\\right)
|
|
|
|
for observations :math:`i=1,...,n`
|
|
|
|
where :math:`d_{ij}=1` if individual `i` chose alternative `j` and 0
|
|
if not.
|
|
"""
|
|
params = params.reshape(self.K, -1, order='F')
|
|
d = self.wendog
|
|
logprob = np.log(self.cdf(np.dot(self.exog,params)))
|
|
return d * logprob
|
|
|
|
def score(self, params):
|
|
"""
|
|
Score matrix for multinomial logit model log-likelihood
|
|
|
|
Parameters
|
|
----------
|
|
params : ndarray
|
|
The parameters of the multinomial logit model.
|
|
|
|
Returns
|
|
-------
|
|
score : ndarray, (K * (J-1),)
|
|
The 2-d score vector, i.e. the first derivative of the
|
|
loglikelihood function, of the multinomial logit model evaluated at
|
|
`params`.
|
|
|
|
Notes
|
|
-----
|
|
.. math:: \\frac{\\partial\\ln L}{\\partial\\beta_{j}}=\\sum_{i}\\left(d_{ij}-\\frac{\\exp\\left(\\beta_{j}^{\\prime}x_{i}\\right)}{\\sum_{k=0}^{J}\\exp\\left(\\beta_{k}^{\\prime}x_{i}\\right)}\\right)x_{i}
|
|
|
|
for :math:`j=1,...,J`
|
|
|
|
In the multinomial model the score matrix is K x J-1 but is returned
|
|
as a flattened array to work with the solvers.
|
|
"""
|
|
params = params.reshape(self.K, -1, order='F')
|
|
firstterm = self.wendog[:,1:] - self.cdf(np.dot(self.exog,
|
|
params))[:,1:]
|
|
#NOTE: might need to switch terms if params is reshaped
|
|
return np.dot(firstterm.T, self.exog).flatten()
|
|
|
|
def loglike_and_score(self, params):
|
|
"""
|
|
Returns log likelihood and score, efficiently reusing calculations.
|
|
|
|
Note that both of these returned quantities will need to be negated
|
|
before being minimized by the maximum likelihood fitting machinery.
|
|
"""
|
|
params = params.reshape(self.K, -1, order='F')
|
|
cdf_dot_exog_params = self.cdf(np.dot(self.exog, params))
|
|
loglike_value = np.sum(self.wendog * np.log(cdf_dot_exog_params))
|
|
firstterm = self.wendog[:, 1:] - cdf_dot_exog_params[:, 1:]
|
|
score_array = np.dot(firstterm.T, self.exog).flatten()
|
|
return loglike_value, score_array
|
|
|
|
def score_obs(self, params):
|
|
"""
|
|
Jacobian matrix for multinomial logit model log-likelihood
|
|
|
|
Parameters
|
|
----------
|
|
params : ndarray
|
|
The parameters of the multinomial logit model.
|
|
|
|
Returns
|
|
-------
|
|
jac : array_like
|
|
The derivative of the loglikelihood for each observation evaluated
|
|
at `params` .
|
|
|
|
Notes
|
|
-----
|
|
.. math:: \\frac{\\partial\\ln L_{i}}{\\partial\\beta_{j}}=\\left(d_{ij}-\\frac{\\exp\\left(\\beta_{j}^{\\prime}x_{i}\\right)}{\\sum_{k=0}^{J}\\exp\\left(\\beta_{k}^{\\prime}x_{i}\\right)}\\right)x_{i}
|
|
|
|
for :math:`j=1,...,J`, for observations :math:`i=1,...,n`
|
|
|
|
In the multinomial model the score vector is K x (J-1) but is returned
|
|
as a flattened array. The Jacobian has the observations in rows and
|
|
the flattened array of derivatives in columns.
|
|
"""
|
|
params = params.reshape(self.K, -1, order='F')
|
|
firstterm = self.wendog[:,1:] - self.cdf(np.dot(self.exog,
|
|
params))[:,1:]
|
|
#NOTE: might need to switch terms if params is reshaped
|
|
return (firstterm[:,:,None] * self.exog[:,None,:]).reshape(self.exog.shape[0], -1)
|
|
|
|
def hessian(self, params):
|
|
"""
|
|
Multinomial logit Hessian matrix of the log-likelihood
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model
|
|
|
|
Returns
|
|
-------
|
|
hess : ndarray, (J*K, J*K)
|
|
The Hessian, second derivative of loglikelihood function with
|
|
respect to the flattened parameters, evaluated at `params`
|
|
|
|
Notes
|
|
-----
|
|
.. math:: \\frac{\\partial^{2}\\ln L}{\\partial\\beta_{j}\\partial\\beta_{l}}=-\\sum_{i=1}^{n}\\frac{\\exp\\left(\\beta_{j}^{\\prime}x_{i}\\right)}{\\sum_{k=0}^{J}\\exp\\left(\\beta_{k}^{\\prime}x_{i}\\right)}\\left[\\boldsymbol{1}\\left(j=l\\right)-\\frac{\\exp\\left(\\beta_{l}^{\\prime}x_{i}\\right)}{\\sum_{k=0}^{J}\\exp\\left(\\beta_{k}^{\\prime}x_{i}\\right)}\\right]x_{i}x_{l}^{\\prime}
|
|
|
|
where
|
|
:math:`\\boldsymbol{1}\\left(j=l\\right)` equals 1 if `j` = `l` and 0
|
|
otherwise.
|
|
|
|
The actual Hessian matrix has J**2 * K x K elements. Our Hessian
|
|
is reshaped to be square (J*K, J*K) so that the solvers can use it.
|
|
|
|
This implementation does not take advantage of the symmetry of
|
|
the Hessian and could probably be refactored for speed.
|
|
"""
|
|
params = params.reshape(self.K, -1, order='F')
|
|
X = self.exog
|
|
pr = self.cdf(np.dot(X,params))
|
|
partials = []
|
|
J = self.J
|
|
K = self.K
|
|
for i in range(J-1):
|
|
for j in range(J-1): # this loop assumes we drop the first col.
|
|
if i == j:
|
|
partials.append(\
|
|
-np.dot(((pr[:,i+1]*(1-pr[:,j+1]))[:,None]*X).T,X))
|
|
else:
|
|
partials.append(-np.dot(((pr[:,i+1]*-pr[:,j+1])[:,None]*X).T,X))
|
|
H = np.array(partials)
|
|
# the developer's notes on multinomial should clear this math up
|
|
H = np.transpose(H.reshape(J-1, J-1, K, K), (0, 2, 1, 3)).reshape((J-1)*K, (J-1)*K)
|
|
return H
|
|
|
|
|
|
#TODO: Weibull can replaced by a survival analsysis function
|
|
# like stat's streg (The cox model as well)
|
|
#class Weibull(DiscreteModel):
|
|
# """
|
|
# Binary choice Weibull model
|
|
#
|
|
# Notes
|
|
# ------
|
|
# This is unfinished and untested.
|
|
# """
|
|
##TODO: add analytic hessian for Weibull
|
|
# def initialize(self):
|
|
# pass
|
|
#
|
|
# def cdf(self, X):
|
|
# """
|
|
# Gumbell (Log Weibull) cumulative distribution function
|
|
# """
|
|
## return np.exp(-np.exp(-X))
|
|
# return stats.gumbel_r.cdf(X)
|
|
# # these two are equivalent.
|
|
# # Greene table and discussion is incorrect.
|
|
#
|
|
# def pdf(self, X):
|
|
# """
|
|
# Gumbell (LogWeibull) probability distribution function
|
|
# """
|
|
# return stats.gumbel_r.pdf(X)
|
|
#
|
|
# def loglike(self, params):
|
|
# """
|
|
# Loglikelihood of Weibull distribution
|
|
# """
|
|
# X = self.exog
|
|
# cdf = self.cdf(np.dot(X,params))
|
|
# y = self.endog
|
|
# return np.sum(y*np.log(cdf) + (1-y)*np.log(1-cdf))
|
|
#
|
|
# def score(self, params):
|
|
# y = self.endog
|
|
# X = self.exog
|
|
# F = self.cdf(np.dot(X,params))
|
|
# f = self.pdf(np.dot(X,params))
|
|
# term = (y*f/F + (1 - y)*-f/(1-F))
|
|
# return np.dot(term,X)
|
|
#
|
|
# def hessian(self, params):
|
|
# hess = nd.Jacobian(self.score)
|
|
# return hess(params)
|
|
#
|
|
# def fit(self, start_params=None, method='newton', maxiter=35, tol=1e-08):
|
|
## The example had problems with all zero start values, Hessian = 0
|
|
# if start_params is None:
|
|
# start_params = OLS(self.endog, self.exog).fit().params
|
|
# mlefit = super(Weibull, self).fit(start_params=start_params,
|
|
# method=method, maxiter=maxiter, tol=tol)
|
|
# return mlefit
|
|
#
|
|
|
|
|
|
class NegativeBinomial(CountModel):
|
|
__doc__ = """
|
|
Negative Binomial Model
|
|
|
|
%(params)s
|
|
%(extra_params)s
|
|
|
|
Attributes
|
|
----------
|
|
endog : ndarray
|
|
A reference to the endogenous response variable
|
|
exog : ndarray
|
|
A reference to the exogenous design.
|
|
|
|
References
|
|
----------
|
|
Greene, W. 2008. "Functional forms for the negative binomial model
|
|
for count data". Economics Letters. Volume 99, Number 3, pp.585-590.
|
|
Hilbe, J.M. 2011. "Negative binomial regression". Cambridge University
|
|
Press.
|
|
""" % {'params': base._model_params_doc,
|
|
'extra_params':
|
|
"""loglike_method : str
|
|
Log-likelihood type. 'nb2','nb1', or 'geometric'.
|
|
Fitted value :math:`\\mu`
|
|
Heterogeneity parameter :math:`\\alpha`
|
|
|
|
- nb2: Variance equal to :math:`\\mu + \\alpha\\mu^2` (most common)
|
|
- nb1: Variance equal to :math:`\\mu + \\alpha\\mu`
|
|
- geometric: Variance equal to :math:`\\mu + \\mu^2`
|
|
offset : array_like
|
|
Offset is added to the linear prediction with coefficient equal to 1.
|
|
exposure : array_like
|
|
Log(exposure) is added to the linear prediction with coefficient
|
|
equal to 1.
|
|
""" + base._missing_param_doc + _check_rank_doc}
|
|
|
|
def __init__(self, endog, exog, loglike_method='nb2', offset=None,
|
|
exposure=None, missing='none', check_rank=True, **kwargs):
|
|
super().__init__(endog,
|
|
exog,
|
|
offset=offset,
|
|
exposure=exposure,
|
|
missing=missing,
|
|
check_rank=check_rank,
|
|
**kwargs)
|
|
self.loglike_method = loglike_method
|
|
self._initialize()
|
|
if loglike_method in ['nb2', 'nb1']:
|
|
self.exog_names.append('alpha')
|
|
self.k_extra = 1
|
|
else:
|
|
self.k_extra = 0
|
|
# store keys for extras if we need to recreate model instance
|
|
# we need to append keys that do not go to super
|
|
self._init_keys.append('loglike_method')
|
|
|
|
def _initialize(self):
|
|
if self.loglike_method == 'nb2':
|
|
self.hessian = self._hessian_nb2
|
|
self.score = self._score_nbin
|
|
self.loglikeobs = self._ll_nb2
|
|
self._transparams = True # transform lnalpha -> alpha in fit
|
|
elif self.loglike_method == 'nb1':
|
|
self.hessian = self._hessian_nb1
|
|
self.score = self._score_nb1
|
|
self.loglikeobs = self._ll_nb1
|
|
self._transparams = True # transform lnalpha -> alpha in fit
|
|
elif self.loglike_method == 'geometric':
|
|
self.hessian = self._hessian_geom
|
|
self.score = self._score_geom
|
|
self.loglikeobs = self._ll_geometric
|
|
else:
|
|
raise ValueError('Likelihood type must "nb1", "nb2" '
|
|
'or "geometric"')
|
|
|
|
# Workaround to pickle instance methods
|
|
def __getstate__(self):
|
|
odict = self.__dict__.copy() # copy the dict since we change it
|
|
del odict['hessian']
|
|
del odict['score']
|
|
del odict['loglikeobs']
|
|
return odict
|
|
|
|
def __setstate__(self, indict):
|
|
self.__dict__.update(indict)
|
|
self._initialize()
|
|
|
|
def _ll_nbin(self, params, alpha, Q=0):
|
|
if np.any(np.iscomplex(params)) or np.iscomplex(alpha):
|
|
gamma_ln = loggamma
|
|
else:
|
|
gamma_ln = gammaln
|
|
endog = self.endog
|
|
mu = self.predict(params)
|
|
size = 1/alpha * mu**Q
|
|
prob = size/(size+mu)
|
|
coeff = (gamma_ln(size+endog) - gamma_ln(endog+1) -
|
|
gamma_ln(size))
|
|
llf = coeff + size*np.log(prob) + endog*np.log(1-prob)
|
|
return llf
|
|
|
|
def _ll_nb2(self, params):
|
|
if self._transparams: # got lnalpha during fit
|
|
alpha = np.exp(params[-1])
|
|
else:
|
|
alpha = params[-1]
|
|
return self._ll_nbin(params[:-1], alpha, Q=0)
|
|
|
|
def _ll_nb1(self, params):
|
|
if self._transparams: # got lnalpha during fit
|
|
alpha = np.exp(params[-1])
|
|
else:
|
|
alpha = params[-1]
|
|
return self._ll_nbin(params[:-1], alpha, Q=1)
|
|
|
|
def _ll_geometric(self, params):
|
|
# we give alpha of 1 because it's actually log(alpha) where alpha=0
|
|
return self._ll_nbin(params, 1, 0)
|
|
|
|
def loglike(self, params):
|
|
r"""
|
|
Loglikelihood for negative binomial model
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model. If `loglike_method` is nb1 or
|
|
nb2, then the ancillary parameter is expected to be the
|
|
last element.
|
|
|
|
Returns
|
|
-------
|
|
llf : float
|
|
The loglikelihood value at `params`
|
|
|
|
Notes
|
|
-----
|
|
Following notation in Greene (2008), with negative binomial
|
|
heterogeneity parameter :math:`\alpha`:
|
|
|
|
.. math::
|
|
|
|
\lambda_i &= exp(X\beta) \\
|
|
\theta &= 1 / \alpha \\
|
|
g_i &= \theta \lambda_i^Q \\
|
|
w_i &= g_i/(g_i + \lambda_i) \\
|
|
r_i &= \theta / (\theta+\lambda_i) \\
|
|
ln \mathcal{L}_i &= ln \Gamma(y_i+g_i) - ln \Gamma(1+y_i) + g_iln (r_i) + y_i ln(1-r_i)
|
|
|
|
where :math`Q=0` for NB2 and geometric and :math:`Q=1` for NB1.
|
|
For the geometric, :math:`\alpha=0` as well.
|
|
"""
|
|
llf = np.sum(self.loglikeobs(params))
|
|
return llf
|
|
|
|
def _score_geom(self, params):
|
|
exog = self.exog
|
|
y = self.endog[:, None]
|
|
mu = self.predict(params)[:, None]
|
|
dparams = exog * (y-mu)/(mu+1)
|
|
return dparams.sum(0)
|
|
|
|
def _score_nbin(self, params, Q=0):
|
|
"""
|
|
Score vector for NB2 model
|
|
"""
|
|
if self._transparams: # lnalpha came in during fit
|
|
alpha = np.exp(params[-1])
|
|
else:
|
|
alpha = params[-1]
|
|
params = params[:-1]
|
|
exog = self.exog
|
|
y = self.endog[:,None]
|
|
mu = self.predict(params)[:,None]
|
|
a1 = 1/alpha * mu**Q
|
|
prob = a1 / (a1 + mu) # a1 aka "size" in _ll_nbin
|
|
if Q == 1: # nb1
|
|
# Q == 1 --> a1 = mu / alpha --> prob = 1 / (alpha + 1)
|
|
dgpart = digamma(y + a1) - digamma(a1)
|
|
dparams = exog * a1 * (np.log(prob) +
|
|
dgpart)
|
|
dalpha = ((alpha * (y - mu * np.log(prob) -
|
|
mu*(dgpart + 1)) -
|
|
mu * (np.log(prob) +
|
|
dgpart))/
|
|
(alpha**2*(alpha + 1))).sum()
|
|
|
|
elif Q == 0: # nb2
|
|
dgpart = digamma(y + a1) - digamma(a1)
|
|
dparams = exog*a1 * (y-mu)/(mu+a1)
|
|
da1 = -alpha**-2
|
|
dalpha = (dgpart + np.log(a1)
|
|
- np.log(a1+mu) - (y-mu)/(a1+mu)).sum() * da1
|
|
|
|
#multiply above by constant outside sum to reduce rounding error
|
|
if self._transparams:
|
|
return np.r_[dparams.sum(0), dalpha*alpha]
|
|
else:
|
|
return np.r_[dparams.sum(0), dalpha]
|
|
|
|
def _score_nb1(self, params):
|
|
return self._score_nbin(params, Q=1)
|
|
|
|
def _hessian_geom(self, params):
|
|
exog = self.exog
|
|
y = self.endog[:,None]
|
|
mu = self.predict(params)[:,None]
|
|
|
|
# for dl/dparams dparams
|
|
dim = exog.shape[1]
|
|
hess_arr = np.empty((dim, dim))
|
|
const_arr = mu*(1+y)/(mu+1)**2
|
|
for i in range(dim):
|
|
for j in range(dim):
|
|
if j > i:
|
|
continue
|
|
hess_arr[i,j] = np.squeeze(
|
|
np.sum(-exog[:,i,None] * exog[:,j,None] * const_arr,
|
|
axis=0
|
|
)
|
|
)
|
|
tri_idx = np.triu_indices(dim, k=1)
|
|
hess_arr[tri_idx] = hess_arr.T[tri_idx]
|
|
return hess_arr
|
|
|
|
|
|
def _hessian_nb1(self, params):
|
|
"""
|
|
Hessian of NB1 model.
|
|
"""
|
|
if self._transparams: # lnalpha came in during fit
|
|
alpha = np.exp(params[-1])
|
|
else:
|
|
alpha = params[-1]
|
|
|
|
params = params[:-1]
|
|
exog = self.exog
|
|
y = self.endog[:,None]
|
|
mu = self.predict(params)[:,None]
|
|
|
|
a1 = mu/alpha
|
|
dgpart = digamma(y + a1) - digamma(a1)
|
|
prob = 1 / (1 + alpha) # equiv: a1 / (a1 + mu)
|
|
|
|
# for dl/dparams dparams
|
|
dim = exog.shape[1]
|
|
hess_arr = np.empty((dim+1,dim+1))
|
|
#const_arr = a1*mu*(a1+y)/(mu+a1)**2
|
|
# not all of dparams
|
|
dparams = exog / alpha * (np.log(prob) +
|
|
dgpart)
|
|
|
|
dmudb = exog*mu
|
|
xmu_alpha = exog * a1
|
|
trigamma = (special.polygamma(1, a1 + y) -
|
|
special.polygamma(1, a1))
|
|
for i in range(dim):
|
|
for j in range(dim):
|
|
if j > i:
|
|
continue
|
|
hess_arr[i,j] = np.squeeze(
|
|
np.sum(
|
|
dparams[:,i,None] * dmudb[:,j,None] +
|
|
xmu_alpha[:,i,None] * xmu_alpha[:,j,None] * trigamma,
|
|
axis=0
|
|
)
|
|
)
|
|
tri_idx = np.triu_indices(dim, k=1)
|
|
hess_arr[tri_idx] = hess_arr.T[tri_idx]
|
|
|
|
# for dl/dparams dalpha
|
|
# da1 = -alpha**-2
|
|
dldpda = np.sum(-a1 * dparams + exog * a1 *
|
|
(-trigamma*mu/alpha**2 - prob), axis=0)
|
|
|
|
hess_arr[-1,:-1] = dldpda
|
|
hess_arr[:-1,-1] = dldpda
|
|
|
|
log_alpha = np.log(prob)
|
|
alpha3 = alpha**3
|
|
alpha2 = alpha**2
|
|
mu2 = mu**2
|
|
dada = ((alpha3*mu*(2*log_alpha + 2*dgpart + 3) -
|
|
2*alpha3*y +
|
|
4*alpha2*mu*(log_alpha + dgpart) +
|
|
alpha2 * (2*mu - y) +
|
|
2*alpha*mu2*trigamma + mu2 * trigamma + alpha2 * mu2 * trigamma +
|
|
2*alpha*mu*(log_alpha + dgpart)
|
|
)/(alpha**4*(alpha2 + 2*alpha + 1)))
|
|
hess_arr[-1,-1] = dada.sum()
|
|
|
|
return hess_arr
|
|
|
|
def _hessian_nb2(self, params):
|
|
"""
|
|
Hessian of NB2 model.
|
|
"""
|
|
if self._transparams: # lnalpha came in during fit
|
|
alpha = np.exp(params[-1])
|
|
else:
|
|
alpha = params[-1]
|
|
a1 = 1/alpha
|
|
params = params[:-1]
|
|
|
|
exog = self.exog
|
|
y = self.endog[:,None]
|
|
mu = self.predict(params)[:,None]
|
|
prob = a1 / (a1 + mu)
|
|
dgpart = digamma(a1 + y) - digamma(a1)
|
|
|
|
# for dl/dparams dparams
|
|
dim = exog.shape[1]
|
|
hess_arr = np.empty((dim+1,dim+1))
|
|
const_arr = a1*mu*(a1+y)/(mu+a1)**2
|
|
for i in range(dim):
|
|
for j in range(dim):
|
|
if j > i:
|
|
continue
|
|
hess_arr[i,j] = np.sum(-exog[:,i,None] * exog[:,j,None] *
|
|
const_arr, axis=0).squeeze()
|
|
tri_idx = np.triu_indices(dim, k=1)
|
|
hess_arr[tri_idx] = hess_arr.T[tri_idx]
|
|
|
|
# for dl/dparams dalpha
|
|
da1 = -alpha**-2
|
|
dldpda = -np.sum(mu*exog*(y-mu)*a1**2/(mu+a1)**2 , axis=0)
|
|
hess_arr[-1,:-1] = dldpda
|
|
hess_arr[:-1,-1] = dldpda
|
|
|
|
# for dl/dalpha dalpha
|
|
#NOTE: polygamma(1,x) is the trigamma function
|
|
da2 = 2*alpha**-3
|
|
dalpha = da1 * (dgpart +
|
|
np.log(prob) - (y - mu)/(a1+mu))
|
|
dada = (da2 * dalpha/da1 + da1**2 * (special.polygamma(1, a1+y) -
|
|
special.polygamma(1, a1) + 1/a1 - 1/(a1 + mu) +
|
|
(y - mu)/(mu + a1)**2)).sum()
|
|
hess_arr[-1,-1] = dada
|
|
|
|
return hess_arr
|
|
|
|
#TODO: replace this with analytic where is it used?
|
|
def score_obs(self, params):
|
|
sc = approx_fprime_cs(params, self.loglikeobs)
|
|
return sc
|
|
|
|
@Appender(Poisson.predict.__doc__)
|
|
def predict(self, params, exog=None, exposure=None, offset=None,
|
|
which='mean', linear=None, y_values=None):
|
|
|
|
if linear is not None:
|
|
msg = 'linear keyword is deprecated, use which="linear"'
|
|
warnings.warn(msg, FutureWarning)
|
|
if linear is True:
|
|
which = "linear"
|
|
|
|
# avoid duplicate computation for get-distribution
|
|
if which == "prob":
|
|
distr = self.get_distribution(
|
|
params,
|
|
exog=exog,
|
|
exposure=exposure,
|
|
offset=offset
|
|
)
|
|
if y_values is None:
|
|
y_values = np.arange(0, np.max(self.endog) + 1)
|
|
else:
|
|
y_values = np.asarray(y_values)
|
|
|
|
assert y_values.ndim == 1
|
|
y_values = y_values[..., None]
|
|
return distr.pmf(y_values).T
|
|
|
|
exog, offset, exposure = self._get_predict_arrays(
|
|
exog=exog,
|
|
offset=offset,
|
|
exposure=exposure
|
|
)
|
|
|
|
fitted = np.dot(exog, params[:exog.shape[1]])
|
|
linpred = fitted + exposure + offset
|
|
if which == "mean":
|
|
return np.exp(linpred)
|
|
elif which.startswith("lin"):
|
|
return linpred
|
|
elif which == "var":
|
|
mu = np.exp(linpred)
|
|
if self.loglike_method == 'geometric':
|
|
var_ = mu * (1 + mu)
|
|
else:
|
|
if self.loglike_method == 'nb2':
|
|
p = 2
|
|
elif self.loglike_method == 'nb1':
|
|
p = 1
|
|
alpha = params[-1]
|
|
var_ = mu * (1 + alpha * mu**(p - 1))
|
|
return var_
|
|
else:
|
|
raise ValueError('keyword which has to be "mean" and "linear"')
|
|
|
|
@Appender(_get_start_params_null_docs)
|
|
def _get_start_params_null(self):
|
|
offset = getattr(self, "offset", 0)
|
|
exposure = getattr(self, "exposure", 0)
|
|
const = (self.endog / np.exp(offset + exposure)).mean()
|
|
params = [np.log(const)]
|
|
mu = const * np.exp(offset + exposure)
|
|
resid = self.endog - mu
|
|
a = self._estimate_dispersion(mu, resid, df_resid=resid.shape[0] - 1)
|
|
params.append(a)
|
|
return np.array(params)
|
|
|
|
def _estimate_dispersion(self, mu, resid, df_resid=None):
|
|
if df_resid is None:
|
|
df_resid = resid.shape[0]
|
|
if self.loglike_method == 'nb2':
|
|
#params.append(np.linalg.pinv(mu[:,None]).dot(resid**2 / mu - 1))
|
|
a = ((resid**2 / mu - 1) / mu).sum() / df_resid
|
|
else: #self.loglike_method == 'nb1':
|
|
a = (resid**2 / mu - 1).sum() / df_resid
|
|
return a
|
|
|
|
def fit(self, start_params=None, method='bfgs', maxiter=35,
|
|
full_output=1, disp=1, callback=None,
|
|
cov_type='nonrobust', cov_kwds=None, use_t=None,
|
|
optim_kwds_prelim=None, **kwargs):
|
|
|
|
# Note: do not let super handle robust covariance because it has
|
|
# transformed params
|
|
self._transparams = False # always define attribute
|
|
if self.loglike_method.startswith('nb') and method not in ['newton',
|
|
'ncg']:
|
|
self._transparams = True # in case same Model instance is refit
|
|
elif self.loglike_method.startswith('nb'): # method is newton/ncg
|
|
self._transparams = False # because we need to step in alpha space
|
|
|
|
if start_params is None:
|
|
# Use poisson fit as first guess.
|
|
#TODO, Warning: this assumes exposure is logged
|
|
offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0)
|
|
if np.size(offset) == 1 and offset == 0:
|
|
offset = None
|
|
kwds_prelim = {'disp': 0, 'skip_hessian': True, 'warn_convergence': False}
|
|
if optim_kwds_prelim is not None:
|
|
kwds_prelim.update(optim_kwds_prelim)
|
|
mod_poi = Poisson(self.endog, self.exog, offset=offset)
|
|
with warnings.catch_warnings():
|
|
warnings.simplefilter("always")
|
|
res_poi = mod_poi.fit(**kwds_prelim)
|
|
start_params = res_poi.params
|
|
if self.loglike_method.startswith('nb'):
|
|
a = self._estimate_dispersion(res_poi.predict(), res_poi.resid,
|
|
df_resid=res_poi.df_resid)
|
|
start_params = np.append(start_params, max(0.05, a))
|
|
else:
|
|
if self._transparams is True:
|
|
# transform user provided start_params dispersion, see #3918
|
|
start_params = np.array(start_params, copy=True)
|
|
start_params[-1] = np.log(start_params[-1])
|
|
|
|
if callback is None:
|
|
# work around perfect separation callback #3895
|
|
callback = lambda *x: x
|
|
|
|
mlefit = super().fit(start_params=start_params,
|
|
maxiter=maxiter, method=method, disp=disp,
|
|
full_output=full_output, callback=callback,
|
|
**kwargs)
|
|
if optim_kwds_prelim is not None:
|
|
mlefit.mle_settings["optim_kwds_prelim"] = optim_kwds_prelim
|
|
# TODO: Fix NBin _check_perfect_pred
|
|
if self.loglike_method.startswith('nb'):
|
|
# mlefit is a wrapped counts results
|
|
self._transparams = False # do not need to transform anymore now
|
|
# change from lnalpha to alpha
|
|
if method not in ["newton", "ncg"]:
|
|
mlefit._results.params[-1] = np.exp(mlefit._results.params[-1])
|
|
|
|
nbinfit = NegativeBinomialResults(self, mlefit._results)
|
|
result = NegativeBinomialResultsWrapper(nbinfit)
|
|
else:
|
|
result = mlefit
|
|
|
|
if cov_kwds is None:
|
|
cov_kwds = {} #TODO: make this unnecessary ?
|
|
result._get_robustcov_results(cov_type=cov_type, use_self=True, use_t=use_t, **cov_kwds)
|
|
return result
|
|
|
|
|
|
def fit_regularized(self, start_params=None, method='l1',
|
|
maxiter='defined_by_method', full_output=1, disp=1, callback=None,
|
|
alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4,
|
|
qc_tol=0.03, **kwargs):
|
|
|
|
_validate_l1_method(method)
|
|
|
|
if self.loglike_method.startswith('nb') and (np.size(alpha) == 1 and
|
|
alpha != 0):
|
|
# do not penalize alpha if alpha is scalar
|
|
k_params = self.exog.shape[1] + self.k_extra
|
|
alpha = alpha * np.ones(k_params)
|
|
alpha[-1] = 0
|
|
|
|
# alpha for regularized poisson to get starting values
|
|
alpha_p = alpha[:-1] if (self.k_extra and np.size(alpha) > 1) else alpha
|
|
|
|
self._transparams = False
|
|
if start_params is None:
|
|
# Use poisson fit as first guess.
|
|
#TODO, Warning: this assumes exposure is logged
|
|
offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0)
|
|
if np.size(offset) == 1 and offset == 0:
|
|
offset = None
|
|
mod_poi = Poisson(self.endog, self.exog, offset=offset)
|
|
with warnings.catch_warnings():
|
|
warnings.simplefilter("always")
|
|
start_params = mod_poi.fit_regularized(
|
|
start_params=start_params, method=method, maxiter=maxiter,
|
|
full_output=full_output, disp=0, callback=callback,
|
|
alpha=alpha_p, trim_mode=trim_mode,
|
|
auto_trim_tol=auto_trim_tol, size_trim_tol=size_trim_tol,
|
|
qc_tol=qc_tol, **kwargs).params
|
|
if self.loglike_method.startswith('nb'):
|
|
start_params = np.append(start_params, 0.1)
|
|
|
|
cntfit = super(CountModel, self).fit_regularized(
|
|
start_params=start_params, method=method, maxiter=maxiter,
|
|
full_output=full_output, disp=disp, callback=callback,
|
|
alpha=alpha, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol,
|
|
size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs)
|
|
|
|
discretefit = L1NegativeBinomialResults(self, cntfit)
|
|
return L1NegativeBinomialResultsWrapper(discretefit)
|
|
|
|
@Appender(Poisson.get_distribution.__doc__)
|
|
def get_distribution(self, params, exog=None, exposure=None, offset=None):
|
|
"""get frozen instance of distribution
|
|
"""
|
|
mu = self.predict(params, exog=exog, exposure=exposure, offset=offset)
|
|
if self.loglike_method == 'geometric':
|
|
# distr = stats.geom(1 / (1 + mu[:, None]), loc=-1)
|
|
distr = stats.geom(1 / (1 + mu), loc=-1)
|
|
else:
|
|
if self.loglike_method == 'nb2':
|
|
p = 2
|
|
elif self.loglike_method == 'nb1':
|
|
p = 1
|
|
|
|
alpha = params[-1]
|
|
q = 2 - p
|
|
size = 1. / alpha * mu**q
|
|
prob = size / (size + mu)
|
|
# distr = nbinom(size[:, None], prob[:, None])
|
|
distr = nbinom(size, prob)
|
|
|
|
return distr
|
|
|
|
|
|
class NegativeBinomialP(CountModel):
|
|
__doc__ = """
|
|
Generalized Negative Binomial (NB-P) Model
|
|
|
|
%(params)s
|
|
%(extra_params)s
|
|
|
|
Attributes
|
|
----------
|
|
endog : ndarray
|
|
A reference to the endogenous response variable
|
|
exog : ndarray
|
|
A reference to the exogenous design.
|
|
p : scalar
|
|
P denotes parameterizations for NB-P regression. p=1 for NB-1 and
|
|
p=2 for NB-2. Default is p=1.
|
|
""" % {'params': base._model_params_doc,
|
|
'extra_params':
|
|
"""p : scalar
|
|
P denotes parameterizations for NB regression. p=1 for NB-1 and
|
|
p=2 for NB-2. Default is p=2.
|
|
offset : array_like
|
|
Offset is added to the linear prediction with coefficient equal to 1.
|
|
exposure : array_like
|
|
Log(exposure) is added to the linear prediction with coefficient
|
|
equal to 1.
|
|
""" + base._missing_param_doc + _check_rank_doc}
|
|
|
|
def __init__(self, endog, exog, p=2, offset=None,
|
|
exposure=None, missing='none', check_rank=True,
|
|
**kwargs):
|
|
super().__init__(endog,
|
|
exog,
|
|
offset=offset,
|
|
exposure=exposure,
|
|
missing=missing,
|
|
check_rank=check_rank,
|
|
**kwargs)
|
|
self.parameterization = p
|
|
self.exog_names.append('alpha')
|
|
self.k_extra = 1
|
|
self._transparams = False
|
|
|
|
def _get_init_kwds(self):
|
|
kwds = super()._get_init_kwds()
|
|
kwds['p'] = self.parameterization
|
|
return kwds
|
|
|
|
def _get_exogs(self):
|
|
return (self.exog, None)
|
|
|
|
def loglike(self, params):
|
|
"""
|
|
Loglikelihood of Generalized Negative Binomial (NB-P) model
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model.
|
|
|
|
Returns
|
|
-------
|
|
loglike : float
|
|
The log-likelihood function of the model evaluated at `params`.
|
|
See notes.
|
|
"""
|
|
return np.sum(self.loglikeobs(params))
|
|
|
|
def loglikeobs(self, params):
|
|
"""
|
|
Loglikelihood for observations of Generalized Negative Binomial (NB-P) model
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model.
|
|
|
|
Returns
|
|
-------
|
|
loglike : ndarray
|
|
The log likelihood for each observation of the model evaluated
|
|
at `params`. See Notes
|
|
"""
|
|
if self._transparams:
|
|
alpha = np.exp(params[-1])
|
|
else:
|
|
alpha = params[-1]
|
|
|
|
params = params[:-1]
|
|
p = self.parameterization
|
|
y = self.endog
|
|
|
|
mu = self.predict(params)
|
|
mu_p = mu**(2 - p)
|
|
a1 = mu_p / alpha
|
|
a2 = mu + a1
|
|
|
|
llf = (gammaln(y + a1) - gammaln(y + 1) - gammaln(a1) +
|
|
a1 * np.log(a1) + y * np.log(mu) -
|
|
(y + a1) * np.log(a2))
|
|
|
|
return llf
|
|
|
|
def score_obs(self, params):
|
|
"""
|
|
Generalized Negative Binomial (NB-P) model score (gradient) vector of the log-likelihood for each observations.
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model
|
|
|
|
Returns
|
|
-------
|
|
score : ndarray, 1-D
|
|
The score vector of the model, i.e. the first derivative of the
|
|
loglikelihood function, evaluated at `params`
|
|
"""
|
|
if self._transparams:
|
|
alpha = np.exp(params[-1])
|
|
else:
|
|
alpha = params[-1]
|
|
|
|
params = params[:-1]
|
|
p = 2 - self.parameterization
|
|
y = self.endog
|
|
|
|
mu = self.predict(params)
|
|
mu_p = mu**p
|
|
a1 = mu_p / alpha
|
|
a2 = mu + a1
|
|
a3 = y + a1
|
|
a4 = p * a1 / mu
|
|
|
|
dgpart = digamma(a3) - digamma(a1)
|
|
dgterm = dgpart + np.log(a1 / a2) + 1 - a3 / a2
|
|
# TODO: better name/interpretation for dgterm?
|
|
|
|
dparams = (a4 * dgterm -
|
|
a3 / a2 +
|
|
y / mu)
|
|
dparams = (self.exog.T * mu * dparams).T
|
|
dalpha = -a1 / alpha * dgterm
|
|
|
|
return np.concatenate((dparams, np.atleast_2d(dalpha).T),
|
|
axis=1)
|
|
|
|
def score(self, params):
|
|
"""
|
|
Generalized Negative Binomial (NB-P) model score (gradient) vector of the log-likelihood
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model
|
|
|
|
Returns
|
|
-------
|
|
score : ndarray, 1-D
|
|
The score vector of the model, i.e. the first derivative of the
|
|
loglikelihood function, evaluated at `params`
|
|
"""
|
|
score = np.sum(self.score_obs(params), axis=0)
|
|
if self._transparams:
|
|
score[-1] == score[-1] ** 2
|
|
return score
|
|
else:
|
|
return score
|
|
|
|
def score_factor(self, params, endog=None):
|
|
"""
|
|
Generalized Negative Binomial (NB-P) model score (gradient) vector of the log-likelihood for each observations.
|
|
|
|
Parameters
|
|
----------
|
|
params : array-like
|
|
The parameters of the model
|
|
|
|
Returns
|
|
-------
|
|
score : ndarray, 1-D
|
|
The score vector of the model, i.e. the first derivative of the
|
|
loglikelihood function, evaluated at `params`
|
|
"""
|
|
params = np.asarray(params)
|
|
if self._transparams:
|
|
alpha = np.exp(params[-1])
|
|
else:
|
|
alpha = params[-1]
|
|
|
|
params = params[:-1]
|
|
p = 2 - self.parameterization
|
|
y = self.endog if endog is None else endog
|
|
|
|
mu = self.predict(params)
|
|
mu_p = mu**p
|
|
a1 = mu_p / alpha
|
|
a2 = mu + a1
|
|
a3 = y + a1
|
|
a4 = p * a1 / mu
|
|
|
|
dgpart = digamma(a3) - digamma(a1)
|
|
|
|
dparams = ((a4 * dgpart -
|
|
a3 / a2) +
|
|
y / mu + a4 * (1 - a3 / a2 + np.log(a1 / a2)))
|
|
dparams = (mu * dparams).T
|
|
dalpha = (-a1 / alpha * (dgpart +
|
|
np.log(a1 / a2) +
|
|
1 - a3 / a2))
|
|
|
|
return dparams, dalpha
|
|
|
|
def hessian(self, params):
|
|
"""
|
|
Generalized Negative Binomial (NB-P) model hessian maxtrix of the log-likelihood
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
The parameters of the model
|
|
|
|
Returns
|
|
-------
|
|
hessian : ndarray, 2-D
|
|
The hessian matrix of the model.
|
|
"""
|
|
if self._transparams:
|
|
alpha = np.exp(params[-1])
|
|
else:
|
|
alpha = params[-1]
|
|
params = params[:-1]
|
|
|
|
p = 2 - self.parameterization
|
|
y = self.endog
|
|
exog = self.exog
|
|
mu = self.predict(params)
|
|
|
|
mu_p = mu**p
|
|
a1 = mu_p / alpha
|
|
a2 = mu + a1
|
|
a3 = y + a1
|
|
a4 = p * a1 / mu
|
|
|
|
prob = a1 / a2
|
|
lprob = np.log(prob)
|
|
dgpart = digamma(a3) - digamma(a1)
|
|
pgpart = polygamma(1, a3) - polygamma(1, a1)
|
|
|
|
dim = exog.shape[1]
|
|
hess_arr = np.zeros((dim + 1, dim + 1))
|
|
|
|
coeff = mu**2 * (((1 + a4)**2 * a3 / a2**2 -
|
|
a3 / a2 * (p - 1) * a4 / mu -
|
|
y / mu**2 -
|
|
2 * a4 * (1 + a4) / a2 +
|
|
p * a4 / mu * (lprob + dgpart + 2) -
|
|
a4 / mu * (lprob + dgpart + 1) +
|
|
a4**2 * pgpart) +
|
|
(-(1 + a4) * a3 / a2 +
|
|
y / mu +
|
|
a4 * (lprob + dgpart + 1)) / mu)
|
|
|
|
for i in range(dim):
|
|
hess_arr[i, :-1] = np.sum(self.exog[:, :].T * self.exog[:, i] * coeff, axis=1)
|
|
|
|
|
|
hess_arr[-1,:-1] = (self.exog[:, :].T * mu * a1 *
|
|
((1 + a4) * (1 - a3 / a2) / a2 -
|
|
p * (lprob + dgpart + 2) / mu +
|
|
p / mu * (a3 + p * a1) / a2 -
|
|
a4 * pgpart) / alpha).sum(axis=1)
|
|
|
|
|
|
da2 = (a1 * (2 * lprob +
|
|
2 * dgpart + 3 -
|
|
2 * a3 / a2
|
|
+ a1 * pgpart
|
|
- 2 * prob +
|
|
prob * a3 / a2) / alpha**2)
|
|
|
|
hess_arr[-1, -1] = da2.sum()
|
|
|
|
tri_idx = np.triu_indices(dim + 1, k=1)
|
|
hess_arr[tri_idx] = hess_arr.T[tri_idx]
|
|
|
|
return hess_arr
|
|
|
|
def hessian_factor(self, params):
|
|
"""
|
|
Generalized Negative Binomial (NB-P) model hessian maxtrix of the log-likelihood
|
|
|
|
Parameters
|
|
----------
|
|
params : array-like
|
|
The parameters of the model
|
|
|
|
Returns
|
|
-------
|
|
hessian : ndarray, 2-D
|
|
The hessian matrix of the model.
|
|
"""
|
|
params = np.asarray(params)
|
|
if self._transparams:
|
|
alpha = np.exp(params[-1])
|
|
else:
|
|
alpha = params[-1]
|
|
params = params[:-1]
|
|
|
|
p = 2 - self.parameterization
|
|
y = self.endog
|
|
mu = self.predict(params)
|
|
|
|
mu_p = mu**p
|
|
a1 = mu_p / alpha
|
|
a2 = mu + a1
|
|
a3 = y + a1
|
|
a4 = p * a1 / mu
|
|
a5 = a4 * p / mu
|
|
|
|
dgpart = digamma(a3) - digamma(a1)
|
|
|
|
coeff = mu**2 * (((1 + a4)**2 * a3 / a2**2 -
|
|
a3 * (a5 - a4 / mu) / a2 -
|
|
y / mu**2 -
|
|
2 * a4 * (1 + a4) / a2 +
|
|
a5 * (np.log(a1) - np.log(a2) + dgpart + 2) -
|
|
a4 * (np.log(a1) - np.log(a2) + dgpart + 1) / mu -
|
|
a4**2 * (polygamma(1, a1) - polygamma(1, a3))) +
|
|
(-(1 + a4) * a3 / a2 +
|
|
y / mu +
|
|
a4 * (np.log(a1) - np.log(a2) + dgpart + 1)) / mu)
|
|
|
|
hfbb = coeff
|
|
|
|
hfba = (mu * a1 *
|
|
((1 + a4) * (1 - a3 / a2) / a2 -
|
|
p * (np.log(a1 / a2) + dgpart + 2) / mu +
|
|
p * (a3 / mu + a4) / a2 +
|
|
a4 * (polygamma(1, a1) - polygamma(1, a3))) / alpha)
|
|
|
|
hfaa = (a1 * (2 * np.log(a1 / a2) +
|
|
2 * dgpart + 3 -
|
|
2 * a3 / a2 - a1 * polygamma(1, a1) +
|
|
a1 * polygamma(1, a3) - 2 * a1 / a2 +
|
|
a1 * a3 / a2**2) / alpha**2)
|
|
|
|
return hfbb, hfba, hfaa
|
|
|
|
@Appender(_get_start_params_null_docs)
|
|
def _get_start_params_null(self):
|
|
offset = getattr(self, "offset", 0)
|
|
exposure = getattr(self, "exposure", 0)
|
|
|
|
const = (self.endog / np.exp(offset + exposure)).mean()
|
|
params = [np.log(const)]
|
|
mu = const * np.exp(offset + exposure)
|
|
resid = self.endog - mu
|
|
a = self._estimate_dispersion(mu, resid, df_resid=resid.shape[0] - 1)
|
|
params.append(a)
|
|
|
|
return np.array(params)
|
|
|
|
def _estimate_dispersion(self, mu, resid, df_resid=None):
|
|
q = self.parameterization - 1
|
|
if df_resid is None:
|
|
df_resid = resid.shape[0]
|
|
a = ((resid**2 / mu - 1) * mu**(-q)).sum() / df_resid
|
|
return a
|
|
|
|
@Appender(DiscreteModel.fit.__doc__)
|
|
def fit(self, start_params=None, method='bfgs', maxiter=35,
|
|
full_output=1, disp=1, callback=None, use_transparams=False,
|
|
cov_type='nonrobust', cov_kwds=None, use_t=None,
|
|
optim_kwds_prelim=None, **kwargs):
|
|
# TODO: Fix doc string
|
|
"""
|
|
use_transparams : bool
|
|
This parameter enable internal transformation to impose
|
|
non-negativity. True to enable. Default is False.
|
|
use_transparams=True imposes the no underdispersion (alpha > 0)
|
|
constraint. In case use_transparams=True and method="newton" or
|
|
"ncg" transformation is ignored.
|
|
"""
|
|
if use_transparams and method not in ['newton', 'ncg']:
|
|
self._transparams = True
|
|
else:
|
|
if use_transparams:
|
|
warnings.warn('Parameter "use_transparams" is ignored',
|
|
RuntimeWarning)
|
|
self._transparams = False
|
|
if start_params is None:
|
|
offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0)
|
|
if np.size(offset) == 1 and offset == 0:
|
|
offset = None
|
|
kwds_prelim = {'disp': 0, 'skip_hessian': True, 'warn_convergence': False}
|
|
if optim_kwds_prelim is not None:
|
|
kwds_prelim.update(optim_kwds_prelim)
|
|
mod_poi = Poisson(self.endog, self.exog, offset=offset)
|
|
with warnings.catch_warnings():
|
|
warnings.simplefilter("always")
|
|
res_poi = mod_poi.fit(**kwds_prelim)
|
|
start_params = res_poi.params
|
|
a = self._estimate_dispersion(res_poi.predict(), res_poi.resid,
|
|
df_resid=res_poi.df_resid)
|
|
start_params = np.append(start_params, max(0.05, a))
|
|
|
|
if callback is None:
|
|
# work around perfect separation callback #3895
|
|
callback = lambda *x: x
|
|
|
|
mlefit = super().fit(start_params=start_params,
|
|
maxiter=maxiter, method=method, disp=disp,
|
|
full_output=full_output, callback=callback,
|
|
**kwargs)
|
|
if optim_kwds_prelim is not None:
|
|
mlefit.mle_settings["optim_kwds_prelim"] = optim_kwds_prelim
|
|
if use_transparams and method not in ["newton", "ncg"]:
|
|
self._transparams = False
|
|
mlefit._results.params[-1] = np.exp(mlefit._results.params[-1])
|
|
|
|
nbinfit = NegativeBinomialPResults(self, mlefit._results)
|
|
result = NegativeBinomialPResultsWrapper(nbinfit)
|
|
|
|
if cov_kwds is None:
|
|
cov_kwds = {}
|
|
result._get_robustcov_results(cov_type=cov_type,
|
|
use_self=True, use_t=use_t, **cov_kwds)
|
|
return result
|
|
|
|
@Appender(DiscreteModel.fit_regularized.__doc__)
|
|
def fit_regularized(self, start_params=None, method='l1',
|
|
maxiter='defined_by_method', full_output=1, disp=1, callback=None,
|
|
alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4,
|
|
qc_tol=0.03, **kwargs):
|
|
|
|
_validate_l1_method(method)
|
|
|
|
if np.size(alpha) == 1 and alpha != 0:
|
|
k_params = self.exog.shape[1] + self.k_extra
|
|
alpha = alpha * np.ones(k_params)
|
|
alpha[-1] = 0
|
|
|
|
alpha_p = alpha[:-1] if (self.k_extra and np.size(alpha) > 1) else alpha
|
|
|
|
self._transparams = False
|
|
if start_params is None:
|
|
offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0)
|
|
if np.size(offset) == 1 and offset == 0:
|
|
offset = None
|
|
mod_poi = Poisson(self.endog, self.exog, offset=offset)
|
|
with warnings.catch_warnings():
|
|
warnings.simplefilter("always")
|
|
start_params = mod_poi.fit_regularized(
|
|
start_params=start_params, method=method, maxiter=maxiter,
|
|
full_output=full_output, disp=0, callback=callback,
|
|
alpha=alpha_p, trim_mode=trim_mode,
|
|
auto_trim_tol=auto_trim_tol, size_trim_tol=size_trim_tol,
|
|
qc_tol=qc_tol, **kwargs).params
|
|
start_params = np.append(start_params, 0.1)
|
|
|
|
cntfit = super(CountModel, self).fit_regularized(
|
|
start_params=start_params, method=method, maxiter=maxiter,
|
|
full_output=full_output, disp=disp, callback=callback,
|
|
alpha=alpha, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol,
|
|
size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs)
|
|
|
|
discretefit = L1NegativeBinomialResults(self, cntfit)
|
|
|
|
return L1NegativeBinomialResultsWrapper(discretefit)
|
|
|
|
@Appender(Poisson.predict.__doc__)
|
|
def predict(self, params, exog=None, exposure=None, offset=None,
|
|
which='mean', y_values=None):
|
|
|
|
if exog is None:
|
|
exog = self.exog
|
|
|
|
if exposure is None:
|
|
exposure = getattr(self, 'exposure', 0)
|
|
elif exposure != 0:
|
|
exposure = np.log(exposure)
|
|
|
|
if offset is None:
|
|
offset = getattr(self, 'offset', 0)
|
|
|
|
fitted = np.dot(exog, params[:exog.shape[1]])
|
|
linpred = fitted + exposure + offset
|
|
|
|
if which == 'mean':
|
|
return np.exp(linpred)
|
|
elif which == 'linear':
|
|
return linpred
|
|
elif which == 'var':
|
|
mean = np.exp(linpred)
|
|
alpha = params[-1]
|
|
p = self.parameterization # no `-1` as in GPP
|
|
var_ = mean * (1 + alpha * mean**(p - 1))
|
|
return var_
|
|
elif which == 'prob':
|
|
if y_values is None:
|
|
y_values = np.atleast_2d(np.arange(0, np.max(self.endog)+1))
|
|
|
|
mu = self.predict(params, exog, exposure, offset)
|
|
size, prob = self.convert_params(params, mu)
|
|
return nbinom.pmf(y_values, size[:, None], prob[:, None])
|
|
else:
|
|
raise ValueError('keyword "which" = %s not recognized' % which)
|
|
|
|
def convert_params(self, params, mu):
|
|
alpha = params[-1]
|
|
p = 2 - self.parameterization
|
|
|
|
size = 1. / alpha * mu**p
|
|
prob = size / (size + mu)
|
|
|
|
return (size, prob)
|
|
|
|
def _deriv_score_obs_dendog(self, params):
|
|
"""derivative of score_obs w.r.t. endog
|
|
|
|
Parameters
|
|
----------
|
|
params : ndarray
|
|
parameter at which score is evaluated
|
|
|
|
Returns
|
|
-------
|
|
derivative : ndarray_2d
|
|
The derivative of the score_obs with respect to endog.
|
|
"""
|
|
from statsmodels.tools.numdiff import _approx_fprime_cs_scalar
|
|
|
|
def f(y):
|
|
if y.ndim == 2 and y.shape[1] == 1:
|
|
y = y[:, 0]
|
|
sf = self.score_factor(params, endog=y)
|
|
return np.column_stack(sf)
|
|
|
|
dsf = _approx_fprime_cs_scalar(self.endog[:, None], f)
|
|
# deriv is 2d vector
|
|
d1 = dsf[:, :1] * self.exog
|
|
d2 = dsf[:, 1:2]
|
|
|
|
return np.column_stack((d1, d2))
|
|
|
|
def _var(self, mu, params=None):
|
|
"""variance implied by the distribution
|
|
|
|
internal use, will be refactored or removed
|
|
"""
|
|
alpha = params[-1]
|
|
p = self.parameterization # no `-1` as in GPP
|
|
var_ = mu * (1 + alpha * mu**(p - 1))
|
|
return var_
|
|
|
|
def _prob_nonzero(self, mu, params):
|
|
"""Probability that count is not zero
|
|
|
|
internal use in Censored model, will be refactored or removed
|
|
"""
|
|
alpha = params[-1]
|
|
p = self.parameterization
|
|
prob_nz = 1 - (1 + alpha * mu**(p-1))**(- 1 / alpha)
|
|
return prob_nz
|
|
|
|
@Appender(Poisson.get_distribution.__doc__)
|
|
def get_distribution(self, params, exog=None, exposure=None, offset=None):
|
|
"""get frozen instance of distribution
|
|
"""
|
|
mu = self.predict(params, exog=exog, exposure=exposure, offset=offset)
|
|
size, prob = self.convert_params(params, mu)
|
|
# distr = nbinom(size[:, None], prob[:, None])
|
|
distr = nbinom(size, prob)
|
|
return distr
|
|
|
|
|
|
### Results Class ###
|
|
|
|
class DiscreteResults(base.LikelihoodModelResults):
|
|
__doc__ = _discrete_results_docs % {"one_line_description" :
|
|
"A results class for the discrete dependent variable models.",
|
|
"extra_attr" : ""}
|
|
|
|
def __init__(self, model, mlefit, cov_type='nonrobust', cov_kwds=None,
|
|
use_t=None):
|
|
#super(DiscreteResults, self).__init__(model, params,
|
|
# np.linalg.inv(-hessian), scale=1.)
|
|
self.model = model
|
|
self.method = "MLE"
|
|
self.df_model = model.df_model
|
|
self.df_resid = model.df_resid
|
|
self._cache = {}
|
|
self.nobs = model.exog.shape[0]
|
|
self.__dict__.update(mlefit.__dict__)
|
|
self.converged = mlefit.mle_retvals["converged"]
|
|
|
|
if not hasattr(self, 'cov_type'):
|
|
# do this only if super, i.e. mlefit did not already add cov_type
|
|
# robust covariance
|
|
if use_t is not None:
|
|
self.use_t = use_t
|
|
if cov_type == 'nonrobust':
|
|
self.cov_type = 'nonrobust'
|
|
self.cov_kwds = {'description' : 'Standard Errors assume that the ' +
|
|
'covariance matrix of the errors is correctly ' +
|
|
'specified.'}
|
|
else:
|
|
if cov_kwds is None:
|
|
cov_kwds = {}
|
|
from statsmodels.base.covtype import get_robustcov_results
|
|
get_robustcov_results(self, cov_type=cov_type, use_self=True,
|
|
**cov_kwds)
|
|
|
|
|
|
def __getstate__(self):
|
|
# remove unpicklable methods
|
|
mle_settings = getattr(self, 'mle_settings', None)
|
|
if mle_settings is not None:
|
|
if 'callback' in mle_settings:
|
|
mle_settings['callback'] = None
|
|
if 'cov_params_func' in mle_settings:
|
|
mle_settings['cov_params_func'] = None
|
|
return self.__dict__
|
|
|
|
@cache_readonly
|
|
def prsquared(self):
|
|
"""
|
|
McFadden's pseudo-R-squared. `1 - (llf / llnull)`
|
|
"""
|
|
return 1 - self.llf/self.llnull
|
|
|
|
@cache_readonly
|
|
def llr(self):
|
|
"""
|
|
Likelihood ratio chi-squared statistic; `-2*(llnull - llf)`
|
|
"""
|
|
return -2*(self.llnull - self.llf)
|
|
|
|
@cache_readonly
|
|
def llr_pvalue(self):
|
|
"""
|
|
The chi-squared probability of getting a log-likelihood ratio
|
|
statistic greater than llr. llr has a chi-squared distribution
|
|
with degrees of freedom `df_model`.
|
|
"""
|
|
return stats.distributions.chi2.sf(self.llr, self.df_model)
|
|
|
|
def set_null_options(self, llnull=None, attach_results=True, **kwargs):
|
|
"""
|
|
Set the fit options for the Null (constant-only) model.
|
|
|
|
This resets the cache for related attributes which is potentially
|
|
fragile. This only sets the option, the null model is estimated
|
|
when llnull is accessed, if llnull is not yet in cache.
|
|
|
|
Parameters
|
|
----------
|
|
llnull : {None, float}
|
|
If llnull is not None, then the value will be directly assigned to
|
|
the cached attribute "llnull".
|
|
attach_results : bool
|
|
Sets an internal flag whether the results instance of the null
|
|
model should be attached. By default without calling this method,
|
|
thenull model results are not attached and only the loglikelihood
|
|
value llnull is stored.
|
|
**kwargs
|
|
Additional keyword arguments used as fit keyword arguments for the
|
|
null model. The override and model default values.
|
|
|
|
Notes
|
|
-----
|
|
Modifies attributes of this instance, and so has no return.
|
|
"""
|
|
# reset cache, note we need to add here anything that depends on
|
|
# llnullor the null model. If something is missing, then the attribute
|
|
# might be incorrect.
|
|
self._cache.pop('llnull', None)
|
|
self._cache.pop('llr', None)
|
|
self._cache.pop('llr_pvalue', None)
|
|
self._cache.pop('prsquared', None)
|
|
if hasattr(self, 'res_null'):
|
|
del self.res_null
|
|
|
|
if llnull is not None:
|
|
self._cache['llnull'] = llnull
|
|
self._attach_nullmodel = attach_results
|
|
self._optim_kwds_null = kwargs
|
|
|
|
@cache_readonly
|
|
def llnull(self):
|
|
"""
|
|
Value of the constant-only loglikelihood
|
|
"""
|
|
model = self.model
|
|
kwds = model._get_init_kwds().copy()
|
|
for key in getattr(model, '_null_drop_keys', []):
|
|
del kwds[key]
|
|
# TODO: what parameters to pass to fit?
|
|
mod_null = model.__class__(model.endog, np.ones(self.nobs), **kwds)
|
|
# TODO: consider catching and warning on convergence failure?
|
|
# in the meantime, try hard to converge. see
|
|
# TestPoissonConstrained1a.test_smoke
|
|
|
|
optim_kwds = getattr(self, '_optim_kwds_null', {}).copy()
|
|
|
|
if 'start_params' in optim_kwds:
|
|
# user provided
|
|
sp_null = optim_kwds.pop('start_params')
|
|
elif hasattr(model, '_get_start_params_null'):
|
|
# get moment estimates if available
|
|
sp_null = model._get_start_params_null()
|
|
else:
|
|
sp_null = None
|
|
|
|
opt_kwds = dict(method='bfgs', warn_convergence=False, maxiter=10000,
|
|
disp=0)
|
|
opt_kwds.update(optim_kwds)
|
|
|
|
if optim_kwds:
|
|
res_null = mod_null.fit(start_params=sp_null, **opt_kwds)
|
|
else:
|
|
# this should be a reasonably method case across versions
|
|
res_null = mod_null.fit(start_params=sp_null, method='nm',
|
|
warn_convergence=False,
|
|
maxiter=10000, disp=0)
|
|
res_null = mod_null.fit(start_params=res_null.params, method='bfgs',
|
|
warn_convergence=False,
|
|
maxiter=10000, disp=0)
|
|
|
|
if getattr(self, '_attach_nullmodel', False) is not False:
|
|
self.res_null = res_null
|
|
|
|
return res_null.llf
|
|
|
|
@cache_readonly
|
|
def fittedvalues(self):
|
|
"""
|
|
Linear predictor XB.
|
|
"""
|
|
return np.dot(self.model.exog, self.params[:self.model.exog.shape[1]])
|
|
|
|
@cache_readonly
|
|
def resid_response(self):
|
|
"""
|
|
Respnose residuals. The response residuals are defined as
|
|
`endog - fittedvalues`
|
|
"""
|
|
return self.model.endog - self.predict()
|
|
|
|
@cache_readonly
|
|
def resid_pearson(self):
|
|
"""
|
|
Pearson residuals defined as response residuals divided by standard
|
|
deviation implied by the model.
|
|
"""
|
|
var_ = self.predict(which="var")
|
|
return self.resid_response / np.sqrt(var_)
|
|
|
|
@cache_readonly
|
|
def aic(self):
|
|
"""
|
|
Akaike information criterion. `-2*(llf - p)` where `p` is the number
|
|
of regressors including the intercept.
|
|
"""
|
|
k_extra = getattr(self.model, 'k_extra', 0)
|
|
return -2*(self.llf - (self.df_model + 1 + k_extra))
|
|
|
|
@cache_readonly
|
|
def bic(self):
|
|
"""
|
|
Bayesian information criterion. `-2*llf + ln(nobs)*p` where `p` is the
|
|
number of regressors including the intercept.
|
|
"""
|
|
k_extra = getattr(self.model, 'k_extra', 0)
|
|
return -2*self.llf + np.log(self.nobs)*(self.df_model + 1 + k_extra)
|
|
|
|
@cache_readonly
|
|
def im_ratio(self):
|
|
return pinfer.im_ratio(self)
|
|
|
|
def info_criteria(self, crit, dk_params=0):
|
|
"""Return an information criterion for the model.
|
|
|
|
Parameters
|
|
----------
|
|
crit : string
|
|
One of 'aic', 'bic', 'tic' or 'gbic'.
|
|
dk_params : int or float
|
|
Correction to the number of parameters used in the information
|
|
criterion.
|
|
|
|
Returns
|
|
-------
|
|
Value of information criterion.
|
|
|
|
Notes
|
|
-----
|
|
Tic and gbic
|
|
|
|
References
|
|
----------
|
|
Burnham KP, Anderson KR (2002). Model Selection and Multimodel
|
|
Inference; Springer New York.
|
|
"""
|
|
crit = crit.lower()
|
|
k_extra = getattr(self.model, 'k_extra', 0)
|
|
k_params = self.df_model + 1 + k_extra + dk_params
|
|
|
|
if crit == "aic":
|
|
return -2 * self.llf + 2 * k_params
|
|
elif crit == "bic":
|
|
nobs = self.df_model + self.df_resid + 1
|
|
bic = -2*self.llf + k_params*np.log(nobs)
|
|
return bic
|
|
elif crit == "tic":
|
|
return pinfer.tic(self)
|
|
elif crit == "gbic":
|
|
return pinfer.gbic(self)
|
|
else:
|
|
raise ValueError("Name of information criterion not recognized.")
|
|
|
|
def score_test(self, exog_extra=None, params_constrained=None,
|
|
hypothesis='joint', cov_type=None, cov_kwds=None,
|
|
k_constraints=None, observed=True):
|
|
|
|
res = pinfer.score_test(self, exog_extra=exog_extra,
|
|
params_constrained=params_constrained,
|
|
hypothesis=hypothesis,
|
|
cov_type=cov_type, cov_kwds=cov_kwds,
|
|
k_constraints=k_constraints,
|
|
observed=observed)
|
|
return res
|
|
|
|
score_test.__doc__ = pinfer.score_test.__doc__
|
|
|
|
def get_prediction(self, exog=None,
|
|
transform=True, which="mean", linear=None,
|
|
row_labels=None, average=False,
|
|
agg_weights=None, y_values=None,
|
|
**kwargs):
|
|
"""
|
|
Compute prediction results when endpoint transformation is valid.
|
|
|
|
Parameters
|
|
----------
|
|
exog : array_like, optional
|
|
The values for which you want to predict.
|
|
transform : bool, optional
|
|
If the model was fit via a formula, do you want to pass
|
|
exog through the formula. Default is True. E.g., if you fit
|
|
a model y ~ log(x1) + log(x2), and transform is True, then
|
|
you can pass a data structure that contains x1 and x2 in
|
|
their original form. Otherwise, you'd need to log the data
|
|
first.
|
|
which : str
|
|
Which statistic is to be predicted. Default is "mean".
|
|
The available statistics and options depend on the model.
|
|
see the model.predict docstring
|
|
linear : bool
|
|
Linear has been replaced by the `which` keyword and will be
|
|
deprecated.
|
|
If linear is True, then `which` is ignored and the linear
|
|
prediction is returned.
|
|
row_labels : list of str or None
|
|
If row_lables are provided, then they will replace the generated
|
|
labels.
|
|
average : bool
|
|
If average is True, then the mean prediction is computed, that is,
|
|
predictions are computed for individual exog and then the average
|
|
over observation is used.
|
|
If average is False, then the results are the predictions for all
|
|
observations, i.e. same length as ``exog``.
|
|
agg_weights : ndarray, optional
|
|
Aggregation weights, only used if average is True.
|
|
The weights are not normalized.
|
|
y_values : None or nd_array
|
|
Some predictive statistics like which="prob" are computed at
|
|
values of the response variable. If y_values is not None, then
|
|
it will be used instead of the default set of y_values.
|
|
|
|
**Warning:** ``which="prob"`` for count models currently computes
|
|
the pmf for all y=k up to max(endog). This can be a large array if
|
|
the observed endog values are large.
|
|
This will likely change so that the set of y_values will be chosen
|
|
to limit the array size.
|
|
**kwargs :
|
|
Some models can take additional keyword arguments, such as offset,
|
|
exposure or additional exog in multi-part models like zero inflated
|
|
models.
|
|
See the predict method of the model for the details.
|
|
|
|
Returns
|
|
-------
|
|
prediction_results : PredictionResults
|
|
The prediction results instance contains prediction and prediction
|
|
variance and can on demand calculate confidence intervals and
|
|
summary dataframe for the prediction.
|
|
|
|
Notes
|
|
-----
|
|
Status: new in 0.14, experimental
|
|
"""
|
|
|
|
if linear is True:
|
|
# compatibility with old keyword
|
|
which = "linear"
|
|
|
|
pred_kwds = kwargs
|
|
# y_values is explicit so we can add it to the docstring
|
|
if y_values is not None:
|
|
pred_kwds["y_values"] = y_values
|
|
|
|
res = pred.get_prediction(
|
|
self,
|
|
exog=exog,
|
|
which=which,
|
|
transform=transform,
|
|
row_labels=row_labels,
|
|
average=average,
|
|
agg_weights=agg_weights,
|
|
pred_kwds=pred_kwds
|
|
)
|
|
return res
|
|
|
|
def get_distribution(self, exog=None, transform=True, **kwargs):
|
|
|
|
exog, _ = self._transform_predict_exog(exog, transform=transform)
|
|
if exog is not None:
|
|
exog = np.asarray(exog)
|
|
distr = self.model.get_distribution(self.params,
|
|
exog=exog,
|
|
**kwargs
|
|
)
|
|
return distr
|
|
|
|
def _get_endog_name(self, yname, yname_list):
|
|
if yname is None:
|
|
yname = self.model.endog_names
|
|
if yname_list is None:
|
|
yname_list = self.model.endog_names
|
|
return yname, yname_list
|
|
|
|
def get_margeff(self, at='overall', method='dydx', atexog=None,
|
|
dummy=False, count=False):
|
|
"""Get marginal effects of the fitted model.
|
|
|
|
Parameters
|
|
----------
|
|
at : str, optional
|
|
Options are:
|
|
|
|
- 'overall', The average of the marginal effects at each
|
|
observation.
|
|
- 'mean', The marginal effects at the mean of each regressor.
|
|
- 'median', The marginal effects at the median of each regressor.
|
|
- 'zero', The marginal effects at zero for each regressor.
|
|
- 'all', The marginal effects at each observation. If `at` is all
|
|
only margeff will be available from the returned object.
|
|
|
|
Note that if `exog` is specified, then marginal effects for all
|
|
variables not specified by `exog` are calculated using the `at`
|
|
option.
|
|
method : str, optional
|
|
Options are:
|
|
|
|
- 'dydx' - dy/dx - No transformation is made and marginal effects
|
|
are returned. This is the default.
|
|
- 'eyex' - estimate elasticities of variables in `exog` --
|
|
d(lny)/d(lnx)
|
|
- 'dyex' - estimate semi-elasticity -- dy/d(lnx)
|
|
- 'eydx' - estimate semi-elasticity -- d(lny)/dx
|
|
|
|
Note that tranformations are done after each observation is
|
|
calculated. Semi-elasticities for binary variables are computed
|
|
using the midpoint method. 'dyex' and 'eyex' do not make sense
|
|
for discrete variables. For interpretations of these methods
|
|
see notes below.
|
|
atexog : array_like, optional
|
|
Optionally, you can provide the exogenous variables over which to
|
|
get the marginal effects. This should be a dictionary with the key
|
|
as the zero-indexed column number and the value of the dictionary.
|
|
Default is None for all independent variables less the constant.
|
|
dummy : bool, optional
|
|
If False, treats binary variables (if present) as continuous. This
|
|
is the default. Else if True, treats binary variables as
|
|
changing from 0 to 1. Note that any variable that is either 0 or 1
|
|
is treated as binary. Each binary variable is treated separately
|
|
for now.
|
|
count : bool, optional
|
|
If False, treats count variables (if present) as continuous. This
|
|
is the default. Else if True, the marginal effect is the
|
|
change in probabilities when each observation is increased by one.
|
|
|
|
Returns
|
|
-------
|
|
DiscreteMargins : marginal effects instance
|
|
Returns an object that holds the marginal effects, standard
|
|
errors, confidence intervals, etc. See
|
|
`statsmodels.discrete.discrete_margins.DiscreteMargins` for more
|
|
information.
|
|
|
|
Notes
|
|
-----
|
|
Interpretations of methods:
|
|
|
|
- 'dydx' - change in `endog` for a change in `exog`.
|
|
- 'eyex' - proportional change in `endog` for a proportional change
|
|
in `exog`.
|
|
- 'dyex' - change in `endog` for a proportional change in `exog`.
|
|
- 'eydx' - proportional change in `endog` for a change in `exog`.
|
|
|
|
When using after Poisson, returns the expected number of events per
|
|
period, assuming that the model is loglinear.
|
|
"""
|
|
if getattr(self.model, "offset", None) is not None:
|
|
raise NotImplementedError("Margins with offset are not available.")
|
|
from statsmodels.discrete.discrete_margins import DiscreteMargins
|
|
return DiscreteMargins(self, (at, method, atexog, dummy, count))
|
|
|
|
def get_influence(self):
|
|
"""
|
|
Get an instance of MLEInfluence with influence and outlier measures
|
|
|
|
Returns
|
|
-------
|
|
infl : MLEInfluence instance
|
|
The instance has methods to calculate the main influence and
|
|
outlier measures as attributes.
|
|
|
|
See Also
|
|
--------
|
|
statsmodels.stats.outliers_influence.MLEInfluence
|
|
"""
|
|
from statsmodels.stats.outliers_influence import MLEInfluence
|
|
return MLEInfluence(self)
|
|
|
|
def summary(self, yname=None, xname=None, title=None, alpha=.05,
|
|
yname_list=None):
|
|
"""
|
|
Summarize the Regression Results.
|
|
|
|
Parameters
|
|
----------
|
|
yname : str, optional
|
|
The name of the endog variable in the tables. The default is `y`.
|
|
xname : list[str], optional
|
|
The names for the exogenous variables, default is "var_xx".
|
|
Must match the number of parameters in the model.
|
|
title : str, optional
|
|
Title for the top table. If not None, then this replaces the
|
|
default title.
|
|
alpha : float
|
|
The significance level for the confidence intervals.
|
|
|
|
Returns
|
|
-------
|
|
Summary
|
|
Class that holds the summary tables and text, which can be printed
|
|
or converted to various output formats.
|
|
|
|
See Also
|
|
--------
|
|
statsmodels.iolib.summary.Summary : Class that hold summary results.
|
|
"""
|
|
|
|
top_left = [('Dep. Variable:', None),
|
|
('Model:', [self.model.__class__.__name__]),
|
|
('Method:', [self.method]),
|
|
('Date:', None),
|
|
('Time:', None),
|
|
('converged:', ["%s" % self.mle_retvals['converged']]),
|
|
]
|
|
|
|
top_right = [('No. Observations:', None),
|
|
('Df Residuals:', None),
|
|
('Df Model:', None),
|
|
('Pseudo R-squ.:', ["%#6.4g" % self.prsquared]),
|
|
('Log-Likelihood:', None),
|
|
('LL-Null:', ["%#8.5g" % self.llnull]),
|
|
('LLR p-value:', ["%#6.4g" % self.llr_pvalue])
|
|
]
|
|
|
|
if hasattr(self, 'cov_type'):
|
|
top_left.append(('Covariance Type:', [self.cov_type]))
|
|
|
|
if title is None:
|
|
title = self.model.__class__.__name__ + ' ' + "Regression Results"
|
|
|
|
# boiler plate
|
|
from statsmodels.iolib.summary import Summary
|
|
smry = Summary()
|
|
yname, yname_list = self._get_endog_name(yname, yname_list)
|
|
|
|
# for top of table
|
|
smry.add_table_2cols(self, gleft=top_left, gright=top_right,
|
|
yname=yname, xname=xname, title=title)
|
|
|
|
# for parameters, etc
|
|
smry.add_table_params(self, yname=yname_list, xname=xname, alpha=alpha,
|
|
use_t=self.use_t)
|
|
|
|
if hasattr(self, 'constraints'):
|
|
smry.add_extra_txt(['Model has been estimated subject to linear '
|
|
'equality constraints.'])
|
|
|
|
return smry
|
|
|
|
def summary2(self, yname=None, xname=None, title=None, alpha=.05,
|
|
float_format="%.4f"):
|
|
"""
|
|
Experimental function to summarize regression results.
|
|
|
|
Parameters
|
|
----------
|
|
yname : str
|
|
Name of the dependent variable (optional).
|
|
xname : list[str], optional
|
|
List of strings of length equal to the number of parameters
|
|
Names of the independent variables (optional).
|
|
title : str, optional
|
|
Title for the top table. If not None, then this replaces the
|
|
default title.
|
|
alpha : float
|
|
The significance level for the confidence intervals.
|
|
float_format : str
|
|
The print format for floats in parameters summary.
|
|
|
|
Returns
|
|
-------
|
|
Summary
|
|
Instance that contains the summary tables and text, which can be
|
|
printed or converted to various output formats.
|
|
|
|
See Also
|
|
--------
|
|
statsmodels.iolib.summary2.Summary : Class that holds summary results.
|
|
"""
|
|
from statsmodels.iolib import summary2
|
|
smry = summary2.Summary()
|
|
smry.add_base(results=self, alpha=alpha, float_format=float_format,
|
|
xname=xname, yname=yname, title=title)
|
|
|
|
if hasattr(self, 'constraints'):
|
|
smry.add_text('Model has been estimated subject to linear '
|
|
'equality constraints.')
|
|
|
|
return smry
|
|
|
|
|
|
class CountResults(DiscreteResults):
|
|
__doc__ = _discrete_results_docs % {
|
|
"one_line_description": "A results class for count data",
|
|
"extra_attr": ""}
|
|
|
|
@cache_readonly
|
|
def resid(self):
|
|
"""
|
|
Residuals
|
|
|
|
Notes
|
|
-----
|
|
The residuals for Count models are defined as
|
|
|
|
.. math:: y - p
|
|
|
|
where :math:`p = \\exp(X\\beta)`. Any exposure and offset variables
|
|
are also handled.
|
|
"""
|
|
return self.model.endog - self.predict()
|
|
|
|
def get_diagnostic(self, y_max=None):
|
|
"""
|
|
Get instance of class with specification and diagnostic methods.
|
|
|
|
experimental, API of Diagnostic classes will change
|
|
|
|
Returns
|
|
-------
|
|
CountDiagnostic instance
|
|
The instance has methods to perform specification and diagnostic
|
|
tesst and plots
|
|
|
|
See Also
|
|
--------
|
|
statsmodels.statsmodels.discrete.diagnostic.CountDiagnostic
|
|
"""
|
|
from statsmodels.discrete.diagnostic import CountDiagnostic
|
|
return CountDiagnostic(self, y_max=y_max)
|
|
|
|
|
|
class NegativeBinomialResults(CountResults):
|
|
__doc__ = _discrete_results_docs % {
|
|
"one_line_description": "A results class for NegativeBinomial 1 and 2",
|
|
"extra_attr": ""}
|
|
|
|
@cache_readonly
|
|
def lnalpha(self):
|
|
"""Natural log of alpha"""
|
|
return np.log(self.params[-1])
|
|
|
|
@cache_readonly
|
|
def lnalpha_std_err(self):
|
|
"""Natural log of standardized error"""
|
|
return self.bse[-1] / self.params[-1]
|
|
|
|
@cache_readonly
|
|
def aic(self):
|
|
# + 1 because we estimate alpha
|
|
k_extra = getattr(self.model, 'k_extra', 0)
|
|
return -2*(self.llf - (self.df_model + self.k_constant + k_extra))
|
|
|
|
@cache_readonly
|
|
def bic(self):
|
|
# + 1 because we estimate alpha
|
|
k_extra = getattr(self.model, 'k_extra', 0)
|
|
return -2*self.llf + np.log(self.nobs)*(self.df_model +
|
|
self.k_constant + k_extra)
|
|
|
|
|
|
class NegativeBinomialPResults(NegativeBinomialResults):
|
|
__doc__ = _discrete_results_docs % {
|
|
"one_line_description": "A results class for NegativeBinomialP",
|
|
"extra_attr": ""}
|
|
|
|
|
|
class GeneralizedPoissonResults(NegativeBinomialResults):
|
|
__doc__ = _discrete_results_docs % {
|
|
"one_line_description": "A results class for Generalized Poisson",
|
|
"extra_attr": ""}
|
|
|
|
@cache_readonly
|
|
def _dispersion_factor(self):
|
|
p = getattr(self.model, 'parameterization', 0)
|
|
mu = self.predict()
|
|
return (1 + self.params[-1] * mu**p)**2
|
|
|
|
|
|
class L1CountResults(DiscreteResults):
|
|
__doc__ = _discrete_results_docs % {"one_line_description" :
|
|
"A results class for count data fit by l1 regularization",
|
|
"extra_attr" : _l1_results_attr}
|
|
|
|
def __init__(self, model, cntfit):
|
|
super().__init__(model, cntfit)
|
|
# self.trimmed is a boolean array with T/F telling whether or not that
|
|
# entry in params has been set zero'd out.
|
|
self.trimmed = cntfit.mle_retvals['trimmed']
|
|
self.nnz_params = (~self.trimmed).sum()
|
|
|
|
# Set degrees of freedom. In doing so,
|
|
# adjust for extra parameter in NegativeBinomial nb1 and nb2
|
|
# extra parameter is not included in df_model
|
|
k_extra = getattr(self.model, 'k_extra', 0)
|
|
|
|
self.df_model = self.nnz_params - 1 - k_extra
|
|
self.df_resid = float(self.model.endog.shape[0] - self.nnz_params) + k_extra
|
|
|
|
|
|
class PoissonResults(CountResults):
|
|
|
|
def predict_prob(self, n=None, exog=None, exposure=None, offset=None,
|
|
transform=True):
|
|
"""
|
|
Return predicted probability of each count level for each observation
|
|
|
|
Parameters
|
|
----------
|
|
n : array_like or int
|
|
The counts for which you want the probabilities. If n is None
|
|
then the probabilities for each count from 0 to max(y) are
|
|
given.
|
|
|
|
Returns
|
|
-------
|
|
ndarray
|
|
A nobs x n array where len(`n`) columns are indexed by the count
|
|
n. If n is None, then column 0 is the probability that each
|
|
observation is 0, column 1 is the probability that each
|
|
observation is 1, etc.
|
|
"""
|
|
if n is not None:
|
|
counts = np.atleast_2d(n)
|
|
else:
|
|
counts = np.atleast_2d(np.arange(0, np.max(self.model.endog)+1))
|
|
mu = self.predict(exog=exog, exposure=exposure, offset=offset,
|
|
transform=transform, which="mean")[:,None]
|
|
# uses broadcasting
|
|
return stats.poisson.pmf(counts, mu)
|
|
|
|
@property
|
|
def resid_pearson(self):
|
|
"""
|
|
Pearson residuals
|
|
|
|
Notes
|
|
-----
|
|
Pearson residuals are defined to be
|
|
|
|
.. math:: r_j = \\frac{(y - M_jp_j)}{\\sqrt{M_jp_j(1-p_j)}}
|
|
|
|
where :math:`p_j=cdf(X\\beta)` and :math:`M_j` is the total number of
|
|
observations sharing the covariate pattern :math:`j`.
|
|
|
|
For now :math:`M_j` is always set to 1.
|
|
"""
|
|
# Pearson residuals
|
|
p = self.predict() # fittedvalues is still linear
|
|
return (self.model.endog - p)/np.sqrt(p)
|
|
|
|
def get_influence(self):
|
|
"""
|
|
Get an instance of MLEInfluence with influence and outlier measures
|
|
|
|
Returns
|
|
-------
|
|
infl : MLEInfluence instance
|
|
The instance has methods to calculate the main influence and
|
|
outlier measures as attributes.
|
|
|
|
See Also
|
|
--------
|
|
statsmodels.stats.outliers_influence.MLEInfluence
|
|
"""
|
|
from statsmodels.stats.outliers_influence import MLEInfluence
|
|
return MLEInfluence(self)
|
|
|
|
def get_diagnostic(self, y_max=None):
|
|
"""
|
|
Get instance of class with specification and diagnostic methods
|
|
|
|
experimental, API of Diagnostic classes will change
|
|
|
|
Returns
|
|
-------
|
|
PoissonDiagnostic instance
|
|
The instance has methods to perform specification and diagnostic
|
|
tesst and plots
|
|
|
|
See Also
|
|
--------
|
|
statsmodels.statsmodels.discrete.diagnostic.PoissonDiagnostic
|
|
"""
|
|
from statsmodels.discrete.diagnostic import (
|
|
PoissonDiagnostic)
|
|
return PoissonDiagnostic(self, y_max=y_max)
|
|
|
|
|
|
class L1PoissonResults(L1CountResults, PoissonResults):
|
|
pass
|
|
|
|
class L1NegativeBinomialResults(L1CountResults, NegativeBinomialResults):
|
|
pass
|
|
|
|
class L1GeneralizedPoissonResults(L1CountResults, GeneralizedPoissonResults):
|
|
pass
|
|
|
|
class OrderedResults(DiscreteResults):
|
|
__doc__ = _discrete_results_docs % {"one_line_description" : "A results class for ordered discrete data." , "extra_attr" : ""}
|
|
pass
|
|
|
|
class BinaryResults(DiscreteResults):
|
|
__doc__ = _discrete_results_docs % {"one_line_description" : "A results class for binary data", "extra_attr" : ""}
|
|
|
|
def pred_table(self, threshold=.5):
|
|
"""
|
|
Prediction table
|
|
|
|
Parameters
|
|
----------
|
|
threshold : scalar
|
|
Number between 0 and 1. Threshold above which a prediction is
|
|
considered 1 and below which a prediction is considered 0.
|
|
|
|
Notes
|
|
-----
|
|
pred_table[i,j] refers to the number of times "i" was observed and
|
|
the model predicted "j". Correct predictions are along the diagonal.
|
|
"""
|
|
model = self.model
|
|
actual = model.endog
|
|
pred = np.array(self.predict() > threshold, dtype=float)
|
|
bins = np.array([0, 0.5, 1])
|
|
return np.histogram2d(actual, pred, bins=bins)[0]
|
|
|
|
@Appender(DiscreteResults.summary.__doc__)
|
|
def summary(self, yname=None, xname=None, title=None, alpha=.05,
|
|
yname_list=None):
|
|
smry = super().summary(yname, xname, title, alpha,
|
|
yname_list)
|
|
fittedvalues = self.model.cdf(self.fittedvalues)
|
|
absprederror = np.abs(self.model.endog - fittedvalues)
|
|
predclose_sum = (absprederror < 1e-4).sum()
|
|
predclose_frac = predclose_sum / len(fittedvalues)
|
|
|
|
# add warnings/notes
|
|
etext = []
|
|
if predclose_sum == len(fittedvalues): # TODO: nobs?
|
|
wstr = "Complete Separation: The results show that there is"
|
|
wstr += "complete separation or perfect prediction.\n"
|
|
wstr += "In this case the Maximum Likelihood Estimator does "
|
|
wstr += "not exist and the parameters\n"
|
|
wstr += "are not identified."
|
|
etext.append(wstr)
|
|
elif predclose_frac > 0.1: # TODO: get better diagnosis
|
|
wstr = "Possibly complete quasi-separation: A fraction "
|
|
wstr += "%4.2f of observations can be\n" % predclose_frac
|
|
wstr += "perfectly predicted. This might indicate that there "
|
|
wstr += "is complete\nquasi-separation. In this case some "
|
|
wstr += "parameters will not be identified."
|
|
etext.append(wstr)
|
|
if etext:
|
|
smry.add_extra_txt(etext)
|
|
return smry
|
|
|
|
@cache_readonly
|
|
def resid_dev(self):
|
|
"""
|
|
Deviance residuals
|
|
|
|
Notes
|
|
-----
|
|
Deviance residuals are defined
|
|
|
|
.. math:: d_j = \\pm\\left(2\\left[Y_j\\ln\\left(\\frac{Y_j}{M_jp_j}\\right) + (M_j - Y_j\\ln\\left(\\frac{M_j-Y_j}{M_j(1-p_j)} \\right) \\right] \\right)^{1/2}
|
|
|
|
where
|
|
|
|
:math:`p_j = cdf(X\\beta)` and :math:`M_j` is the total number of
|
|
observations sharing the covariate pattern :math:`j`.
|
|
|
|
For now :math:`M_j` is always set to 1.
|
|
"""
|
|
#These are the deviance residuals
|
|
#model = self.model
|
|
endog = self.model.endog
|
|
#exog = model.exog
|
|
# M = # of individuals that share a covariate pattern
|
|
# so M[i] = 2 for i = two share a covariate pattern
|
|
M = 1
|
|
p = self.predict()
|
|
#Y_0 = np.where(exog == 0)
|
|
#Y_M = np.where(exog == M)
|
|
#NOTE: Common covariate patterns are not yet handled
|
|
res = -(1-endog)*np.sqrt(2*M*np.abs(np.log(1-p))) + \
|
|
endog*np.sqrt(2*M*np.abs(np.log(p)))
|
|
return res
|
|
|
|
@cache_readonly
|
|
def resid_pearson(self):
|
|
"""
|
|
Pearson residuals
|
|
|
|
Notes
|
|
-----
|
|
Pearson residuals are defined to be
|
|
|
|
.. math:: r_j = \\frac{(y - M_jp_j)}{\\sqrt{M_jp_j(1-p_j)}}
|
|
|
|
where :math:`p_j=cdf(X\\beta)` and :math:`M_j` is the total number of
|
|
observations sharing the covariate pattern :math:`j`.
|
|
|
|
For now :math:`M_j` is always set to 1.
|
|
"""
|
|
# Pearson residuals
|
|
#model = self.model
|
|
endog = self.model.endog
|
|
#exog = model.exog
|
|
# M = # of individuals that share a covariate pattern
|
|
# so M[i] = 2 for i = two share a covariate pattern
|
|
# use unique row pattern?
|
|
M = 1
|
|
p = self.predict()
|
|
return (endog - M*p)/np.sqrt(M*p*(1-p))
|
|
|
|
@cache_readonly
|
|
def resid_response(self):
|
|
"""
|
|
The response residuals
|
|
|
|
Notes
|
|
-----
|
|
Response residuals are defined to be
|
|
|
|
.. math:: y - p
|
|
|
|
where :math:`p=cdf(X\\beta)`.
|
|
"""
|
|
return self.model.endog - self.predict()
|
|
|
|
|
|
class LogitResults(BinaryResults):
|
|
__doc__ = _discrete_results_docs % {
|
|
"one_line_description": "A results class for Logit Model",
|
|
"extra_attr": ""}
|
|
|
|
@cache_readonly
|
|
def resid_generalized(self):
|
|
"""
|
|
Generalized residuals
|
|
|
|
Notes
|
|
-----
|
|
The generalized residuals for the Logit model are defined
|
|
|
|
.. math:: y - p
|
|
|
|
where :math:`p=cdf(X\\beta)`. This is the same as the `resid_response`
|
|
for the Logit model.
|
|
"""
|
|
# Generalized residuals
|
|
return self.model.endog - self.predict()
|
|
|
|
def get_influence(self):
|
|
"""
|
|
Get an instance of MLEInfluence with influence and outlier measures
|
|
|
|
Returns
|
|
-------
|
|
infl : MLEInfluence instance
|
|
The instance has methods to calculate the main influence and
|
|
outlier measures as attributes.
|
|
|
|
See Also
|
|
--------
|
|
statsmodels.stats.outliers_influence.MLEInfluence
|
|
"""
|
|
from statsmodels.stats.outliers_influence import MLEInfluence
|
|
return MLEInfluence(self)
|
|
|
|
|
|
class ProbitResults(BinaryResults):
|
|
__doc__ = _discrete_results_docs % {
|
|
"one_line_description": "A results class for Probit Model",
|
|
"extra_attr": ""}
|
|
|
|
@cache_readonly
|
|
def resid_generalized(self):
|
|
"""
|
|
Generalized residuals
|
|
|
|
Notes
|
|
-----
|
|
The generalized residuals for the Probit model are defined
|
|
|
|
.. math:: y\\frac{\\phi(X\\beta)}{\\Phi(X\\beta)}-(1-y)\\frac{\\phi(X\\beta)}{1-\\Phi(X\\beta)}
|
|
"""
|
|
# generalized residuals
|
|
model = self.model
|
|
endog = model.endog
|
|
XB = self.predict(which="linear")
|
|
pdf = model.pdf(XB)
|
|
cdf = model.cdf(XB)
|
|
return endog * pdf/cdf - (1-endog)*pdf/(1-cdf)
|
|
|
|
class L1BinaryResults(BinaryResults):
|
|
__doc__ = _discrete_results_docs % {"one_line_description" :
|
|
"Results instance for binary data fit by l1 regularization",
|
|
"extra_attr" : _l1_results_attr}
|
|
def __init__(self, model, bnryfit):
|
|
super().__init__(model, bnryfit)
|
|
# self.trimmed is a boolean array with T/F telling whether or not that
|
|
# entry in params has been set zero'd out.
|
|
self.trimmed = bnryfit.mle_retvals['trimmed']
|
|
self.nnz_params = (~self.trimmed).sum()
|
|
self.df_model = self.nnz_params - 1
|
|
self.df_resid = float(self.model.endog.shape[0] - self.nnz_params)
|
|
|
|
|
|
class MultinomialResults(DiscreteResults):
|
|
__doc__ = _discrete_results_docs % {"one_line_description" :
|
|
"A results class for multinomial data", "extra_attr" : ""}
|
|
|
|
def __init__(self, model, mlefit):
|
|
super().__init__(model, mlefit)
|
|
self.J = model.J
|
|
self.K = model.K
|
|
|
|
@staticmethod
|
|
def _maybe_convert_ynames_int(ynames):
|
|
# see if they're integers
|
|
issue_warning = False
|
|
msg = ('endog contains values are that not int-like. Uses string '
|
|
'representation of value. Use integer-valued endog to '
|
|
'suppress this warning.')
|
|
for i in ynames:
|
|
try:
|
|
if ynames[i] % 1 == 0:
|
|
ynames[i] = str(int(ynames[i]))
|
|
else:
|
|
issue_warning = True
|
|
ynames[i] = str(ynames[i])
|
|
except TypeError:
|
|
ynames[i] = str(ynames[i])
|
|
if issue_warning:
|
|
warnings.warn(msg, SpecificationWarning)
|
|
|
|
return ynames
|
|
|
|
def _get_endog_name(self, yname, yname_list, all=False):
|
|
"""
|
|
If all is False, the first variable name is dropped
|
|
"""
|
|
model = self.model
|
|
if yname is None:
|
|
yname = model.endog_names
|
|
if yname_list is None:
|
|
ynames = model._ynames_map
|
|
ynames = self._maybe_convert_ynames_int(ynames)
|
|
# use range below to ensure sortedness
|
|
ynames = [ynames[key] for key in range(int(model.J))]
|
|
ynames = ['='.join([yname, name]) for name in ynames]
|
|
if not all:
|
|
yname_list = ynames[1:] # assumes first variable is dropped
|
|
else:
|
|
yname_list = ynames
|
|
return yname, yname_list
|
|
|
|
def pred_table(self):
|
|
"""
|
|
Returns the J x J prediction table.
|
|
|
|
Notes
|
|
-----
|
|
pred_table[i,j] refers to the number of times "i" was observed and
|
|
the model predicted "j". Correct predictions are along the diagonal.
|
|
"""
|
|
ju = self.model.J - 1 # highest index
|
|
# these are the actual, predicted indices
|
|
#idx = lzip(self.model.endog, self.predict().argmax(1))
|
|
bins = np.concatenate(([0], np.linspace(0.5, ju - 0.5, ju), [ju]))
|
|
return np.histogram2d(self.model.endog, self.predict().argmax(1),
|
|
bins=bins)[0]
|
|
|
|
@cache_readonly
|
|
def bse(self):
|
|
bse = np.sqrt(np.diag(self.cov_params()))
|
|
return bse.reshape(self.params.shape, order='F')
|
|
|
|
@cache_readonly
|
|
def aic(self):
|
|
return -2*(self.llf - (self.df_model+self.model.J-1))
|
|
|
|
@cache_readonly
|
|
def bic(self):
|
|
return -2*self.llf + np.log(self.nobs)*(self.df_model+self.model.J-1)
|
|
|
|
def conf_int(self, alpha=.05, cols=None):
|
|
confint = super(DiscreteResults, self).conf_int(alpha=alpha,
|
|
cols=cols)
|
|
return confint.transpose(2,0,1)
|
|
|
|
def get_prediction(self):
|
|
"""Not implemented for Multinomial
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
def margeff(self):
|
|
raise NotImplementedError("Use get_margeff instead")
|
|
|
|
@cache_readonly
|
|
def resid_misclassified(self):
|
|
"""
|
|
Residuals indicating which observations are misclassified.
|
|
|
|
Notes
|
|
-----
|
|
The residuals for the multinomial model are defined as
|
|
|
|
.. math:: argmax(y_i) \\neq argmax(p_i)
|
|
|
|
where :math:`argmax(y_i)` is the index of the category for the
|
|
endogenous variable and :math:`argmax(p_i)` is the index of the
|
|
predicted probabilities for each category. That is, the residual
|
|
is a binary indicator that is 0 if the category with the highest
|
|
predicted probability is the same as that of the observed variable
|
|
and 1 otherwise.
|
|
"""
|
|
# it's 0 or 1 - 0 for correct prediction and 1 for a missed one
|
|
return (self.model.wendog.argmax(1) !=
|
|
self.predict().argmax(1)).astype(float)
|
|
|
|
def summary2(self, alpha=0.05, float_format="%.4f"):
|
|
"""Experimental function to summarize regression results
|
|
|
|
Parameters
|
|
----------
|
|
alpha : float
|
|
significance level for the confidence intervals
|
|
float_format : str
|
|
print format for floats in parameters summary
|
|
|
|
Returns
|
|
-------
|
|
smry : Summary instance
|
|
this holds the summary tables and text, which can be printed or
|
|
converted to various output formats.
|
|
|
|
See Also
|
|
--------
|
|
statsmodels.iolib.summary2.Summary : class to hold summary results
|
|
"""
|
|
|
|
from statsmodels.iolib import summary2
|
|
smry = summary2.Summary()
|
|
smry.add_dict(summary2.summary_model(self))
|
|
# One data frame per value of endog
|
|
eqn = self.params.shape[1]
|
|
confint = self.conf_int(alpha)
|
|
for i in range(eqn):
|
|
coefs = summary2.summary_params((self, self.params[:, i],
|
|
self.bse[:, i],
|
|
self.tvalues[:, i],
|
|
self.pvalues[:, i],
|
|
confint[i]),
|
|
alpha=alpha)
|
|
# Header must show value of endog
|
|
level_str = self.model.endog_names + ' = ' + str(i)
|
|
coefs[level_str] = coefs.index
|
|
coefs = coefs.iloc[:, [-1, 0, 1, 2, 3, 4, 5]]
|
|
smry.add_df(coefs, index=False, header=True,
|
|
float_format=float_format)
|
|
smry.add_title(results=self)
|
|
return smry
|
|
|
|
|
|
class L1MultinomialResults(MultinomialResults):
|
|
__doc__ = _discrete_results_docs % {"one_line_description" :
|
|
"A results class for multinomial data fit by l1 regularization",
|
|
"extra_attr" : _l1_results_attr}
|
|
def __init__(self, model, mlefit):
|
|
super().__init__(model, mlefit)
|
|
# self.trimmed is a boolean array with T/F telling whether or not that
|
|
# entry in params has been set zero'd out.
|
|
self.trimmed = mlefit.mle_retvals['trimmed']
|
|
self.nnz_params = (~self.trimmed).sum()
|
|
|
|
# Note: J-1 constants
|
|
self.df_model = self.nnz_params - (self.model.J - 1)
|
|
self.df_resid = float(self.model.endog.shape[0] - self.nnz_params)
|
|
|
|
|
|
#### Results Wrappers ####
|
|
|
|
class OrderedResultsWrapper(lm.RegressionResultsWrapper):
|
|
pass
|
|
|
|
|
|
wrap.populate_wrapper(OrderedResultsWrapper, OrderedResults)
|
|
|
|
|
|
class CountResultsWrapper(lm.RegressionResultsWrapper):
|
|
pass
|
|
|
|
|
|
wrap.populate_wrapper(CountResultsWrapper, CountResults)
|
|
|
|
|
|
class NegativeBinomialResultsWrapper(lm.RegressionResultsWrapper):
|
|
pass
|
|
|
|
|
|
wrap.populate_wrapper(NegativeBinomialResultsWrapper,
|
|
NegativeBinomialResults)
|
|
|
|
|
|
class NegativeBinomialPResultsWrapper(lm.RegressionResultsWrapper):
|
|
pass
|
|
|
|
|
|
wrap.populate_wrapper(NegativeBinomialPResultsWrapper,
|
|
NegativeBinomialPResults)
|
|
|
|
|
|
class GeneralizedPoissonResultsWrapper(lm.RegressionResultsWrapper):
|
|
pass
|
|
|
|
|
|
wrap.populate_wrapper(GeneralizedPoissonResultsWrapper,
|
|
GeneralizedPoissonResults)
|
|
|
|
|
|
class PoissonResultsWrapper(lm.RegressionResultsWrapper):
|
|
pass
|
|
|
|
|
|
wrap.populate_wrapper(PoissonResultsWrapper, PoissonResults)
|
|
|
|
|
|
class L1CountResultsWrapper(lm.RegressionResultsWrapper):
|
|
pass
|
|
|
|
|
|
class L1PoissonResultsWrapper(lm.RegressionResultsWrapper):
|
|
pass
|
|
|
|
|
|
wrap.populate_wrapper(L1PoissonResultsWrapper, L1PoissonResults)
|
|
|
|
|
|
class L1NegativeBinomialResultsWrapper(lm.RegressionResultsWrapper):
|
|
pass
|
|
|
|
|
|
wrap.populate_wrapper(L1NegativeBinomialResultsWrapper,
|
|
L1NegativeBinomialResults)
|
|
|
|
|
|
class L1GeneralizedPoissonResultsWrapper(lm.RegressionResultsWrapper):
|
|
pass
|
|
|
|
|
|
wrap.populate_wrapper(L1GeneralizedPoissonResultsWrapper,
|
|
L1GeneralizedPoissonResults)
|
|
|
|
|
|
class BinaryResultsWrapper(lm.RegressionResultsWrapper):
|
|
_attrs = {"resid_dev": "rows",
|
|
"resid_generalized": "rows",
|
|
"resid_pearson": "rows",
|
|
"resid_response": "rows"
|
|
}
|
|
_wrap_attrs = wrap.union_dicts(lm.RegressionResultsWrapper._wrap_attrs,
|
|
_attrs)
|
|
|
|
|
|
wrap.populate_wrapper(BinaryResultsWrapper, BinaryResults)
|
|
|
|
|
|
class L1BinaryResultsWrapper(lm.RegressionResultsWrapper):
|
|
pass
|
|
|
|
|
|
wrap.populate_wrapper(L1BinaryResultsWrapper, L1BinaryResults)
|
|
|
|
|
|
class MultinomialResultsWrapper(lm.RegressionResultsWrapper):
|
|
_attrs = {"resid_misclassified": "rows"}
|
|
_wrap_attrs = wrap.union_dicts(lm.RegressionResultsWrapper._wrap_attrs,
|
|
_attrs)
|
|
_methods = {'conf_int': 'multivariate_confint'}
|
|
_wrap_methods = wrap.union_dicts(lm.RegressionResultsWrapper._wrap_methods,
|
|
_methods)
|
|
|
|
|
|
wrap.populate_wrapper(MultinomialResultsWrapper, MultinomialResults)
|
|
|
|
|
|
class L1MultinomialResultsWrapper(lm.RegressionResultsWrapper):
|
|
pass
|
|
|
|
|
|
wrap.populate_wrapper(L1MultinomialResultsWrapper, L1MultinomialResults)
|