__all__ = ["TruncatedLFPoisson", "TruncatedLFNegativeBinomialP",
           "HurdleCountModel"]

import warnings
import numpy as np
import statsmodels.base.model as base
import statsmodels.base.wrapper as wrap
import statsmodels.regression.linear_model as lm
from statsmodels.distributions.discrete import (
    truncatedpoisson,
    truncatednegbin,
    )
from statsmodels.discrete.discrete_model import (
    DiscreteModel,
    CountModel,
    CountResults,
    L1CountResults,
    Poisson,
    NegativeBinomialP,
    GeneralizedPoisson,
    _discrete_results_docs,
    )
from statsmodels.tools.numdiff import approx_hess
from statsmodels.tools.decorators import cache_readonly
from statsmodels.tools.sm_exceptions import ConvergenceWarning
from copy import deepcopy


class TruncatedLFGeneric(CountModel):
    __doc__ = """
    Generic Truncated model for count data

    .. versionadded:: 0.14.0

    %(params)s
    %(extra_params)s

    Attributes
    ----------
    endog : array
        A reference to the endogenous response variable
    exog : array
        A reference to the exogenous design.
    truncation : int, optional
        Truncation parameter specify truncation point out of the support
        of the distribution. pmf(k) = 0 for k <= truncation
    """ % {'params': base._model_params_doc,
           'extra_params':
           """offset : array_like
        Offset is added to the linear prediction with coefficient equal to 1.
    exposure : array_like
        Log(exposure) is added to the linear prediction with coefficient
        equal to 1.

    """ + base._missing_param_doc}

    def __init__(self, endog, exog, truncation=0, offset=None,
                 exposure=None, missing='none', **kwargs):
        super().__init__(
            endog,
            exog,
            offset=offset,
            exposure=exposure,
            missing=missing,
            **kwargs
            )
        mask = self.endog > truncation
        self.exog = self.exog[mask]
        self.endog = self.endog[mask]
        if offset is not None:
            self.offset = self.offset[mask]
        if exposure is not None:
            self.exposure = self.exposure[mask]

        self.trunc = truncation
        self.truncation = truncation  # needed for recreating model
        # We cannot set the correct df_resid here, not enough information
        self._init_keys.extend(['truncation'])
        self._null_drop_keys = []

    def loglike(self, params):
        """
        Loglikelihood of Generic Truncated model

        Parameters
        ----------
        params : array-like
            The parameters of the model.

        Returns
        -------
        loglike : float
            The log-likelihood function of the model evaluated at `params`.
            See notes.

        Notes
        -----

        """
        return np.sum(self.loglikeobs(params))

    def loglikeobs(self, params):
        """
        Loglikelihood for observations of Generic Truncated model

        Parameters
        ----------
        params : array-like
            The parameters of the model.

        Returns
        -------
        loglike : ndarray (nobs,)
            The log likelihood for each observation of the model evaluated
            at `params`. See Notes

        Notes
        -----

        """
        llf_main = self.model_main.loglikeobs(params)

        yt = self.trunc + 1

        # equivalent ways to compute truncation probability
        # pmf0 = np.zeros_like(self.endog, dtype=np.float64)
        # for i in range(self.trunc + 1):
        #     model = self.model_main.__class__(np.ones_like(self.endog) * i,
        #                                       self.exog)
        #     pmf0 += np.exp(model.loglikeobs(params))
        #
        # pmf1 = self.model_main.predict(
        #     params, which="prob", y_values=np.arange(yt)).sum(-1)

        pmf = self.predict(
            params, which="prob-base", y_values=np.arange(yt)).sum(-1)

        # Skip pmf = 1 to avoid warnings
        log_1_m_pmf = np.full_like(pmf, -np.inf)
        loc = pmf > 1
        log_1_m_pmf[loc] = np.nan
        loc = pmf < 1
        log_1_m_pmf[loc] = np.log(1 - pmf[loc])
        llf = llf_main - log_1_m_pmf

        return llf

    def score_obs(self, params):
        """
        Generic Truncated model score (gradient) vector of the log-likelihood

        Parameters
        ----------
        params : array-like
            The parameters of the model

        Returns
        -------
        score : ndarray, 1-D
            The score vector of the model, i.e. the first derivative of the
            loglikelihood function, evaluated at `params`
        """
        score_main = self.model_main.score_obs(params)

        pmf = np.zeros_like(self.endog, dtype=np.float64)
        # TODO: can we rewrite to following without creating new models
        score_trunc = np.zeros_like(score_main, dtype=np.float64)
        for i in range(self.trunc + 1):
            model = self.model_main.__class__(
                np.ones_like(self.endog) * i,
                self.exog,
                offset=getattr(self, "offset", None),
                exposure=getattr(self, "exposure", None),
                )
            pmf_i = np.exp(model.loglikeobs(params))
            score_trunc += (model.score_obs(params).T * pmf_i).T
            pmf += pmf_i

        dparams = score_main + (score_trunc.T / (1 - pmf)).T

        return dparams

    def score(self, params):
        """
        Generic Truncated model score (gradient) vector of the log-likelihood

        Parameters
        ----------
        params : array-like
            The parameters of the model

        Returns
        -------
        score : ndarray, 1-D
            The score vector of the model, i.e. the first derivative of the
            loglikelihood function, evaluated at `params`
        """
        return self.score_obs(params).sum(0)

    def fit(self, start_params=None, method='bfgs', maxiter=35,
            full_output=1, disp=1, callback=None,
            cov_type='nonrobust', cov_kwds=None, use_t=None, **kwargs):
        if start_params is None:
            offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0)
            if np.size(offset) == 1 and offset == 0:
                offset = None
            model = self.model_main.__class__(self.endog, self.exog,
                                              offset=offset)
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", category=ConvergenceWarning)
                start_params = model.fit(disp=0).params

        # Todo: check how we can to this in __init__
        k_params = self.df_model + 1 + self.k_extra
        self.df_resid = self.endog.shape[0] - k_params

        mlefit = super().fit(
            start_params=start_params,
            method=method,
            maxiter=maxiter,
            disp=disp,
            full_output=full_output,
            callback=lambda x: x,
            **kwargs
            )

        zipfit = self.result_class(self, mlefit._results)
        result = self.result_class_wrapper(zipfit)

        if cov_kwds is None:
            cov_kwds = {}

        result._get_robustcov_results(cov_type=cov_type,
                                      use_self=True, use_t=use_t, **cov_kwds)
        return result

    fit.__doc__ = DiscreteModel.fit.__doc__

    def fit_regularized(
            self, start_params=None, method='l1',
            maxiter='defined_by_method', full_output=1, disp=1, callback=None,
            alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4,
            qc_tol=0.03, **kwargs):

        if np.size(alpha) == 1 and alpha != 0:
            k_params = self.exog.shape[1]
            alpha = alpha * np.ones(k_params)

        alpha_p = alpha
        if start_params is None:
            offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0)
            if np.size(offset) == 1 and offset == 0:
                offset = None
            model = self.model_main.__class__(self.endog, self.exog,
                                              offset=offset)
            start_params = model.fit_regularized(
                start_params=start_params, method=method, maxiter=maxiter,
                full_output=full_output, disp=0, callback=callback,
                alpha=alpha_p, trim_mode=trim_mode,
                auto_trim_tol=auto_trim_tol,
                size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs).params
        cntfit = super(CountModel, self).fit_regularized(
                start_params=start_params, method=method, maxiter=maxiter,
                full_output=full_output, disp=disp, callback=callback,
                alpha=alpha, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol,
                size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs)

        if method in ['l1', 'l1_cvxopt_cp']:
            discretefit = self.result_class_reg(self, cntfit)
        else:
            raise TypeError(
                    "argument method == %s, which is not handled" % method)

        return self.result_class_reg_wrapper(discretefit)

    fit_regularized.__doc__ = DiscreteModel.fit_regularized.__doc__

    def hessian(self, params):
        """
        Generic Truncated model Hessian matrix of the loglikelihood

        Parameters
        ----------
        params : array-like
            The parameters of the model

        Returns
        -------
        hess : ndarray, (k_vars, k_vars)
            The Hessian, second derivative of loglikelihood function,
            evaluated at `params`

        Notes
        -----
        """
        return approx_hess(params, self.loglike)

    def predict(self, params, exog=None, exposure=None, offset=None,
                which='mean', y_values=None):
        """
        Predict response variable or other statistic given exogenous variables.

        Parameters
        ----------
        params : array_like
            The parameters of the model.
        exog : ndarray, optional
            Explanatory variables for the main count model.
            If ``exog`` is None, then the data from the model will be used.
        offset : ndarray, optional
            Offset is added to the linear predictor of the mean function with
            coefficient equal to 1.
            Default is zero if exog is not None, and the model offset if exog
            is None.
        exposure : ndarray, optional
            Log(exposure) is added to the linear predictor with coefficient
            equal to 1. If exposure is specified, then it will be logged by
            the method. The user does not need to log it first.
            Default is one if exog is is not None, and it is the model exposure
            if exog is None.
        which : str (optional)
            Statitistic to predict. Default is 'mean'.

            - 'mean' : the conditional expectation of endog E(y | x)
            - 'mean-main' : mean parameter of truncated count model.
              Note, this is not the mean of the truncated distribution.
            - 'linear' : the linear predictor of the truncated count model.
            - 'var' : returns the estimated variance of endog implied by the
              model.
            - 'prob-trunc' : probability of truncation. This is the probability
              of observing a zero count implied
              by the truncation model.
            - 'prob' : probabilities of each count from 0 to max(endog), or
              for y_values if those are provided. This is a multivariate
              return (2-dim when predicting for several observations).
              The probabilities in the truncated region are zero.
            - 'prob-base' : probabilities for untruncated base distribution.
              The probabilities are for each count from 0 to max(endog), or
              for y_values if those are provided. This is a multivariate
              return (2-dim when predicting for several observations).


        y_values : array_like
            Values of the random variable endog at which pmf is evaluated.
            Only used if ``which="prob"``

        Returns
        -------
        predicted values

        Notes
        -----
        If exposure is specified, then it will be logged by the method.
        The user does not need to log it first.
        """
        exog, offset, exposure = self._get_predict_arrays(
            exog=exog,
            offset=offset,
            exposure=exposure
            )

        fitted = np.dot(exog, params[:exog.shape[1]])
        linpred = fitted + exposure + offset

        if which == 'mean':
            mu = np.exp(linpred)
            if self.truncation == 0:
                prob_main = self.model_main._prob_nonzero(mu, params)
                return mu / prob_main
            elif self.truncation == -1:
                return mu
            elif self.truncation > 0:
                counts = np.atleast_2d(np.arange(0, self.truncation + 1))
                # next is same as in prob-main below
                probs = self.model_main.predict(
                    params, exog=exog, exposure=np.exp(exposure),
                    offset=offset, which="prob", y_values=counts)
                prob_tregion = probs.sum(1)
                mean_tregion = (np.arange(self.truncation + 1) * probs).sum(1)
                mean = (mu - mean_tregion) / (1 - prob_tregion)
                return mean
            else:
                raise ValueError("unsupported self.truncation")
        elif which == 'linear':
            return linpred
        elif which == 'mean-main':
            return np.exp(linpred)
        elif which == 'prob':
            if y_values is not None:
                counts = np.atleast_2d(y_values)
            else:
                counts = np.atleast_2d(np.arange(0, np.max(self.endog)+1))
            mu = np.exp(linpred)[:, None]
            if self.k_extra == 0:
                # poisson, no extra params
                probs = self.model_dist.pmf(counts, mu, self.trunc)
            elif self.k_extra == 1:
                p = self.model_main.parameterization
                probs = self.model_dist.pmf(counts, mu, params[-1],
                                            p, self.trunc)
            else:
                raise ValueError("k_extra is not 0 or 1")
            return probs
        elif which == 'prob-base':
            if y_values is not None:
                counts = np.asarray(y_values)
            else:
                counts = np.arange(0, np.max(self.endog)+1)

            probs = self.model_main.predict(
                params, exog=exog, exposure=np.exp(exposure),
                offset=offset, which="prob", y_values=counts)
            return probs
        elif which == 'var':
            mu = np.exp(linpred)
            counts = np.atleast_2d(np.arange(0, self.truncation + 1))
            # next is same as in prob-main below
            probs = self.model_main.predict(
                params, exog=exog, exposure=np.exp(exposure),
                offset=offset, which="prob", y_values=counts)
            prob_tregion = probs.sum(1)
            mean_tregion = (np.arange(self.truncation + 1) * probs).sum(1)
            mean = (mu - mean_tregion) / (1 - prob_tregion)
            mnc2_tregion = (np.arange(self.truncation + 1)**2 *
                            probs).sum(1)
            vm = self.model_main._var(mu, params)
            # uncentered 2nd moment
            mnc2 = (mu**2 + vm - mnc2_tregion) / (1 - prob_tregion)
            v = mnc2 - mean**2
            return v
        else:
            raise ValueError(
                "argument which == %s not handled" % which)


class TruncatedLFPoisson(TruncatedLFGeneric):
    __doc__ = """
    Truncated Poisson model for count data

    .. versionadded:: 0.14.0

    %(params)s
    %(extra_params)s

    Attributes
    ----------
    endog : array
        A reference to the endogenous response variable
    exog : array
        A reference to the exogenous design.
    truncation : int, optional
        Truncation parameter specify truncation point out of the support
        of the distribution. pmf(k) = 0 for k <= truncation
    """ % {'params': base._model_params_doc,
           'extra_params':
           """offset : array_like
        Offset is added to the linear prediction with coefficient equal to 1.
    exposure : array_like
        Log(exposure) is added to the linear prediction with coefficient
        equal to 1.

    """ + base._missing_param_doc}

    def __init__(self, endog, exog, offset=None, exposure=None,
                 truncation=0, missing='none', **kwargs):
        super().__init__(
            endog,
            exog,
            offset=offset,
            exposure=exposure,
            truncation=truncation,
            missing=missing,
            **kwargs
            )
        self.model_main = Poisson(self.endog, self.exog,
                                  exposure=getattr(self, "exposure", None),
                                  offset=getattr(self, "offset", None),
                                  )
        self.model_dist = truncatedpoisson

        self.result_class = TruncatedLFPoissonResults
        self.result_class_wrapper = TruncatedLFGenericResultsWrapper
        self.result_class_reg = L1TruncatedLFGenericResults
        self.result_class_reg_wrapper = L1TruncatedLFGenericResultsWrapper

    def _predict_mom_trunc0(self, params, mu):
        """Predict mean and variance of zero-truncated distribution.

        experimental api, will likely be replaced by other methods

        Parameters
        ----------
        params : array_like
            The model parameters. This is only used to extract extra params
            like dispersion parameter.
        mu : array_like
            Array of mean predictions for main model.

        Returns
        -------
        Predicted conditional variance.
        """
        w = (1 - np.exp(-mu))  # prob of no truncation, 1 - P(y=0)
        m = mu / w
        var_ = m - (1 - w) * m**2
        return m, var_


class TruncatedLFNegativeBinomialP(TruncatedLFGeneric):
    __doc__ = """
    Truncated Generalized Negative Binomial model for count data

    .. versionadded:: 0.14.0

    %(params)s
    %(extra_params)s

    Attributes
    ----------
    endog : array
        A reference to the endogenous response variable
    exog : array
        A reference to the exogenous design.
    truncation : int, optional
        Truncation parameter specify truncation point out of the support
        of the distribution. pmf(k) = 0 for k <= truncation
    """ % {'params': base._model_params_doc,
           'extra_params':
           """offset : array_like
        Offset is added to the linear prediction with coefficient equal to 1.
    exposure : array_like
        Log(exposure) is added to the linear prediction with coefficient
        equal to 1.

    """ + base._missing_param_doc}

    def __init__(self, endog, exog, offset=None, exposure=None,
                 truncation=0, p=2, missing='none', **kwargs):
        super().__init__(
            endog,
            exog,
            offset=offset,
            exposure=exposure,
            truncation=truncation,
            missing=missing,
            **kwargs
            )
        self.model_main = NegativeBinomialP(
            self.endog,
            self.exog,
            exposure=getattr(self, "exposure", None),
            offset=getattr(self, "offset", None),
            p=p
            )
        self.k_extra = self.model_main.k_extra
        self.exog_names.extend(self.model_main.exog_names[-self.k_extra:])
        self.model_dist = truncatednegbin

        self.result_class = TruncatedNegativeBinomialResults
        self.result_class_wrapper = TruncatedLFGenericResultsWrapper
        self.result_class_reg = L1TruncatedLFGenericResults
        self.result_class_reg_wrapper = L1TruncatedLFGenericResultsWrapper

    def _predict_mom_trunc0(self, params, mu):
        """Predict mean and variance of zero-truncated distribution.

        experimental api, will likely be replaced by other methods

        Parameters
        ----------
        params : array_like
            The model parameters. This is only used to extract extra params
            like dispersion parameter.
        mu : array_like
            Array of mean predictions for main model.

        Returns
        -------
        Predicted conditional variance.
        """
        # note: prob_zero and vm are distribution specific, rest is generic
        # when mean of base model is mu
        alpha = params[-1]
        p = self.model_main.parameterization
        prob_zero = (1 + alpha * mu**(p-1))**(- 1 / alpha)
        w = 1 - prob_zero  # prob of no truncation, 1 - P(y=0)
        m = mu / w
        vm = mu * (1 + alpha * mu**(p-1))  # variance of NBP
        # uncentered 2nd moment is vm + mu**2
        mnc2 = (mu**2 + vm) / w  # uses mnc2_tregion = 0
        var_ = mnc2 - m**2
        return m, var_


class TruncatedLFGeneralizedPoisson(TruncatedLFGeneric):
    __doc__ = """
    Truncated Generalized Poisson model for count data

    .. versionadded:: 0.14.0

    %(params)s
    %(extra_params)s

    Attributes
    ----------
    endog : array
        A reference to the endogenous response variable
    exog : array
        A reference to the exogenous design.
    truncation : int, optional
        Truncation parameter specify truncation point out of the support
        of the distribution. pmf(k) = 0 for k <= truncation
    """ % {'params': base._model_params_doc,
           'extra_params':
           """offset : array_like
        Offset is added to the linear prediction with coefficient equal to 1.
    exposure : array_like
        Log(exposure) is added to the linear prediction with coefficient
        equal to 1.

    """ + base._missing_param_doc}

    def __init__(self, endog, exog, offset=None, exposure=None,
                 truncation=0, p=2, missing='none', **kwargs):
        super().__init__(
            endog,
            exog,
            offset=offset,
            exposure=exposure,
            truncation=truncation,
            missing=missing,
            **kwargs
            )
        self.model_main = GeneralizedPoisson(
            self.endog,
            self.exog,
            exposure=getattr(self, "exposure", None),
            offset=getattr(self, "offset", None),
            p=p
            )
        self.k_extra = self.model_main.k_extra
        self.exog_names.extend(self.model_main.exog_names[-self.k_extra:])
        self.model_dist = None
        self.result_class = TruncatedNegativeBinomialResults

        self.result_class_wrapper = TruncatedLFGenericResultsWrapper
        self.result_class_reg = L1TruncatedLFGenericResults
        self.result_class_reg_wrapper = L1TruncatedLFGenericResultsWrapper


class _RCensoredGeneric(CountModel):
    __doc__ = """
    Generic right Censored model for count data

    %(params)s
    %(extra_params)s

    Attributes
    ----------
    endog : array
        A reference to the endogenous response variable
    exog : array
        A reference to the exogenous design.
    """ % {'params': base._model_params_doc,
           'extra_params':
           """offset : array_like
        Offset is added to the linear prediction with coefficient equal to 1.
    exposure : array_like
        Log(exposure) is added to the linear prediction with coefficient
        equal to 1.

    """ + base._missing_param_doc}

    def __init__(self, endog, exog, offset=None, exposure=None,
                 missing='none', **kwargs):
        self.zero_idx = np.nonzero(endog == 0)[0]
        self.nonzero_idx = np.nonzero(endog)[0]
        super().__init__(
            endog,
            exog,
            offset=offset,
            exposure=exposure,
            missing=missing,
            **kwargs
            )

    def loglike(self, params):
        """
        Loglikelihood of Generic Censored model

        Parameters
        ----------
        params : array-like
            The parameters of the model.

        Returns
        -------
        loglike : float
            The log-likelihood function of the model evaluated at `params`.
            See notes.

        Notes
        -----

        """
        return np.sum(self.loglikeobs(params))

    def loglikeobs(self, params):
        """
        Loglikelihood for observations of Generic Censored model

        Parameters
        ----------
        params : array-like
            The parameters of the model.

        Returns
        -------
        loglike : ndarray (nobs,)
            The log likelihood for each observation of the model evaluated
            at `params`. See Notes

        Notes
        -----

        """
        llf_main = self.model_main.loglikeobs(params)

        llf = np.concatenate(
            (llf_main[self.zero_idx],
             np.log(1 - np.exp(llf_main[self.nonzero_idx])))
            )

        return llf

    def score_obs(self, params):
        """
        Generic Censored model score (gradient) vector of the log-likelihood

        Parameters
        ----------
        params : array-like
            The parameters of the model

        Returns
        -------
        score : ndarray, 1-D
            The score vector of the model, i.e. the first derivative of the
            loglikelihood function, evaluated at `params`
        """
        score_main = self.model_main.score_obs(params)
        llf_main = self.model_main.loglikeobs(params)

        score = np.concatenate((
            score_main[self.zero_idx],
            (score_main[self.nonzero_idx].T *
             -np.exp(llf_main[self.nonzero_idx]) /
             (1 - np.exp(llf_main[self.nonzero_idx]))).T
            ))

        return score

    def score(self, params):
        """
        Generic Censored model score (gradient) vector of the log-likelihood

        Parameters
        ----------
        params : array-like
            The parameters of the model

        Returns
        -------
        score : ndarray, 1-D
            The score vector of the model, i.e. the first derivative of the
            loglikelihood function, evaluated at `params`
        """
        return self.score_obs(params).sum(0)

    def fit(self, start_params=None, method='bfgs', maxiter=35,
            full_output=1, disp=1, callback=None,
            cov_type='nonrobust', cov_kwds=None, use_t=None, **kwargs):
        if start_params is None:
            offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0)
            if np.size(offset) == 1 and offset == 0:
                offset = None
            model = self.model_main.__class__(self.endog, self.exog,
                                              offset=offset)
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", category=ConvergenceWarning)
                start_params = model.fit(disp=0).params
        mlefit = super().fit(
            start_params=start_params,
            method=method,
            maxiter=maxiter,
            disp=disp,
            full_output=full_output,
            callback=lambda x: x,
            **kwargs
            )

        zipfit = self.result_class(self, mlefit._results)
        result = self.result_class_wrapper(zipfit)

        if cov_kwds is None:
            cov_kwds = {}

        result._get_robustcov_results(cov_type=cov_type,
                                      use_self=True, use_t=use_t, **cov_kwds)
        return result

    fit.__doc__ = DiscreteModel.fit.__doc__

    def fit_regularized(
            self, start_params=None, method='l1',
            maxiter='defined_by_method', full_output=1, disp=1, callback=None,
            alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4,
            qc_tol=0.03, **kwargs):

        if np.size(alpha) == 1 and alpha != 0:
            k_params = self.exog.shape[1]
            alpha = alpha * np.ones(k_params)

        alpha_p = alpha
        if start_params is None:
            offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0)
            if np.size(offset) == 1 and offset == 0:
                offset = None
            model = self.model_main.__class__(self.endog, self.exog,
                                              offset=offset)
            start_params = model.fit_regularized(
                start_params=start_params, method=method, maxiter=maxiter,
                full_output=full_output, disp=0, callback=callback,
                alpha=alpha_p, trim_mode=trim_mode,
                auto_trim_tol=auto_trim_tol,
                size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs).params
        cntfit = super(CountModel, self).fit_regularized(
                start_params=start_params, method=method, maxiter=maxiter,
                full_output=full_output, disp=disp, callback=callback,
                alpha=alpha, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol,
                size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs)

        if method in ['l1', 'l1_cvxopt_cp']:
            discretefit = self.result_class_reg(self, cntfit)
        else:
            raise TypeError(
                    "argument method == %s, which is not handled" % method)

        return self.result_class_reg_wrapper(discretefit)

    fit_regularized.__doc__ = DiscreteModel.fit_regularized.__doc__

    def hessian(self, params):
        """
        Generic Censored model Hessian matrix of the loglikelihood

        Parameters
        ----------
        params : array-like
            The parameters of the model

        Returns
        -------
        hess : ndarray, (k_vars, k_vars)
            The Hessian, second derivative of loglikelihood function,
            evaluated at `params`

        Notes
        -----
        """
        return approx_hess(params, self.loglike)


class _RCensoredPoisson(_RCensoredGeneric):
    __doc__ = """
    Censored Poisson model for count data

    %(params)s
    %(extra_params)s

    Attributes
    ----------
    endog : array
        A reference to the endogenous response variable
    exog : array
        A reference to the exogenous design.
    """ % {'params': base._model_params_doc,
           'extra_params':
           """offset : array_like
        Offset is added to the linear prediction with coefficient equal to 1.
    exposure : array_like
        Log(exposure) is added to the linear prediction with coefficient
        equal to 1.

    """ + base._missing_param_doc}

    def __init__(self, endog, exog, offset=None,
                 exposure=None, missing='none', **kwargs):
        super().__init__(
            endog,
            exog,
            offset=offset,
            exposure=exposure,
            missing=missing,
            **kwargs
        )
        self.model_main = Poisson(np.zeros_like(self.endog), self.exog)
        self.model_dist = None
        self.result_class = TruncatedLFGenericResults
        self.result_class_wrapper = TruncatedLFGenericResultsWrapper
        self.result_class_reg = L1TruncatedLFGenericResults
        self.result_class_reg_wrapper = L1TruncatedLFGenericResultsWrapper


class _RCensoredGeneralizedPoisson(_RCensoredGeneric):
    __doc__ = """
    Censored Generalized Poisson model for count data

    %(params)s
    %(extra_params)s

    Attributes
    ----------
    endog : array
        A reference to the endogenous response variable
    exog : array
        A reference to the exogenous design.
    """ % {'params': base._model_params_doc,
           'extra_params':
           """offset : array_like
        Offset is added to the linear prediction with coefficient equal to 1.
    exposure : array_like
        Log(exposure) is added to the linear prediction with coefficient
        equal to 1.

    """ + base._missing_param_doc}

    def __init__(self, endog, exog, offset=None, p=2,
                 exposure=None, missing='none', **kwargs):
        super().__init__(
            endog, exog, offset=offset, exposure=exposure,
            missing=missing, **kwargs)

        self.model_main = GeneralizedPoisson(
            np.zeros_like(self.endog), self.exog)
        self.model_dist = None
        self.result_class = TruncatedLFGenericResults
        self.result_class_wrapper = TruncatedLFGenericResultsWrapper
        self.result_class_reg = L1TruncatedLFGenericResults
        self.result_class_reg_wrapper = L1TruncatedLFGenericResultsWrapper


class _RCensoredNegativeBinomialP(_RCensoredGeneric):
    __doc__ = """
    Censored Negative Binomial model for count data

    %(params)s
    %(extra_params)s

    Attributes
    ----------
    endog : array
        A reference to the endogenous response variable
    exog : array
        A reference to the exogenous design.
    """ % {'params': base._model_params_doc,
           'extra_params':
           """offset : array_like
        Offset is added to the linear prediction with coefficient equal to 1.
    exposure : array_like
        Log(exposure) is added to the linear prediction with coefficient
        equal to 1.

    """ + base._missing_param_doc}

    def __init__(self, endog, exog, offset=None, p=2,
                 exposure=None, missing='none', **kwargs):
        super().__init__(
            endog,
            exog,
            offset=offset,
            exposure=exposure,
            missing=missing,
            **kwargs
            )
        self.model_main = NegativeBinomialP(np.zeros_like(self.endog),
                                            self.exog,
                                            p=p
                                            )
        self.model_dist = None
        self.result_class = TruncatedLFGenericResults
        self.result_class_wrapper = TruncatedLFGenericResultsWrapper
        self.result_class_reg = L1TruncatedLFGenericResults
        self.result_class_reg_wrapper = L1TruncatedLFGenericResultsWrapper


class _RCensored(_RCensoredGeneric):
    __doc__ = """
    Censored model for count data

    %(params)s
    %(extra_params)s

    Attributes
    ----------
    endog : array
        A reference to the endogenous response variable
    exog : array
        A reference to the exogenous design.
    """ % {'params': base._model_params_doc,
           'extra_params':
           """offset : array_like
        Offset is added to the linear prediction with coefficient equal to 1.
    exposure : array_like
        Log(exposure) is added to the linear prediction with coefficient
        equal to 1.

    """ + base._missing_param_doc}

    def __init__(self, endog, exog, model=Poisson,
                 distribution=truncatedpoisson, offset=None,
                 exposure=None, missing='none', **kwargs):
        super().__init__(
            endog,
            exog,
            offset=offset,
            exposure=exposure,
            missing=missing,
            **kwargs
            )
        self.model_main = model(np.zeros_like(self.endog), self.exog)
        self.model_dist = distribution
        # fix k_extra and exog_names
        self.k_extra = k_extra = self.model_main.k_extra
        if k_extra > 0:
            self.exog_names.extend(self.model_main.exog_names[-k_extra:])

        self.result_class = TruncatedLFGenericResults
        self.result_class_wrapper = TruncatedLFGenericResultsWrapper
        self.result_class_reg = L1TruncatedLFGenericResults
        self.result_class_reg_wrapper = L1TruncatedLFGenericResultsWrapper

    def _prob_nonzero(self, mu, params):
        """Probability that count is not zero

        internal use in Censored model, will be refactored or removed
        """
        prob_nz = self.model_main._prob_nonzero(mu, params)
        return prob_nz


class HurdleCountModel(CountModel):
    __doc__ = """
    Hurdle model for count data

    .. versionadded:: 0.14.0

    %(params)s
    %(extra_params)s

    Attributes
    ----------
    endog : array
        A reference to the endogenous response variable
    exog : array
        A reference to the exogenous design.
    dist : string
        Log-likelihood type of count model family. 'poisson' or 'negbin'
    zerodist : string
        Log-likelihood type of zero hurdle model family. 'poisson', 'negbin'
    p : scalar
        Define parameterization for count model.
        Used when dist='negbin'.
    pzero : scalar
        Define parameterization parameter zero hurdle model family.
        Used when zerodist='negbin'.
    """ % {'params': base._model_params_doc,
           'extra_params':
           """offset : array_like
        Offset is added to the linear prediction with coefficient equal to 1.
    exposure : array_like
        Log(exposure) is added to the linear prediction with coefficient
        equal to 1.

    Notes
    -----
    The parameters in the NegativeBinomial zero model are not identified if
    the predicted mean is constant. If there is no or only little variation in
    the predicted mean, then convergence might fail, hessian might not be
    invertible or parameter estimates will have large standard errors.

    References
    ----------
    not yet

    """ + base._missing_param_doc}

    def __init__(self, endog, exog, offset=None,
                 dist="poisson", zerodist="poisson",
                 p=2, pzero=2,
                 exposure=None, missing='none', **kwargs):

        if (offset is not None) or (exposure is not None):
            msg = "Offset and exposure are not yet implemented"
            raise NotImplementedError(msg)
        super().__init__(
            endog,
            exog,
            offset=offset,
            exposure=exposure,
            missing=missing,
            **kwargs
            )
        self.k_extra1 = 0
        self.k_extra2 = 0

        self._initialize(dist, zerodist, p, pzero)
        self.result_class = HurdleCountResults
        self.result_class_wrapper = HurdleCountResultsWrapper
        self.result_class_reg = L1HurdleCountResults
        self.result_class_reg_wrapper = L1HurdleCountResultsWrapper

    def _initialize(self, dist, zerodist, p, pzero):
        if (dist not in ["poisson", "negbin"] or
                zerodist not in ["poisson", "negbin"]):
            raise NotImplementedError('dist and zerodist must be "poisson",'
                                      '"negbin"')

        if zerodist == "poisson":
            self.model1 = _RCensored(self.endog, self.exog, model=Poisson)
        elif zerodist == "negbin":
            self.model1 = _RCensored(self.endog, self.exog,
                                     model=NegativeBinomialP)
            self.k_extra1 += 1

        if dist == "poisson":
            self.model2 = TruncatedLFPoisson(self.endog, self.exog)
        elif dist == "negbin":
            self.model2 = TruncatedLFNegativeBinomialP(self.endog, self.exog,
                                                       p=p)
            self.k_extra2 += 1

    def loglike(self, params):
        """
        Loglikelihood of Generic Hurdle model

        Parameters
        ----------
        params : array-like
            The parameters of the model.

        Returns
        -------
        loglike : float
            The log-likelihood function of the model evaluated at `params`.
            See notes.

        Notes
        -----

        """
        k = int((len(params) - self.k_extra1 - self.k_extra2) / 2
                ) + self.k_extra1
        return (self.model1.loglike(params[:k]) +
                self.model2.loglike(params[k:]))

    def fit(self, start_params=None, method='bfgs', maxiter=35,
            full_output=1, disp=1, callback=None,
            cov_type='nonrobust', cov_kwds=None, use_t=None, **kwargs):

        if cov_type != "nonrobust":
            raise ValueError("robust cov_type currently not supported")

        results1 = self.model1.fit(
            start_params=start_params,
            method=method, maxiter=maxiter, disp=disp,
            full_output=full_output, callback=lambda x: x,
            **kwargs
            )

        results2 = self.model2.fit(
            start_params=start_params,
            method=method, maxiter=maxiter, disp=disp,
            full_output=full_output, callback=lambda x: x,
            **kwargs
            )

        result = deepcopy(results1)
        result._results.model = self
        result.mle_retvals['converged'] = [results1.mle_retvals['converged'],
                                           results2.mle_retvals['converged']]
        result._results.params = np.append(results1._results.params,
                                           results2._results.params)
        # TODO: the following should be in __init__ or initialize
        result._results.df_model += results2._results.df_model
        # this looks wrong attr does not exist, always 0
        self.k_extra1 += getattr(results1._results, "k_extra", 0)
        self.k_extra2 += getattr(results2._results, "k_extra", 0)
        self.k_extra = (self.k_extra1 + self.k_extra2 + 1)
        xnames1 = ["zm_" + name for name in self.model1.exog_names]
        self.exog_names[:] = xnames1 + self.model2.exog_names

        # fix up cov_params,
        # we could use normalized cov_params directly, unless it's not used
        from scipy.linalg import block_diag
        result._results.normalized_cov_params = None
        try:
            cov1 = results1._results.cov_params()
            cov2 = results2._results.cov_params()
            result._results.normalized_cov_params = block_diag(cov1, cov2)
        except ValueError as e:
            if "need covariance" not in str(e):
                # could be some other problem
                raise

        modelfit = self.result_class(self, result._results, results1, results2)
        result = self.result_class_wrapper(modelfit)

        return result

    fit.__doc__ = DiscreteModel.fit.__doc__

    def predict(self, params, exog=None, exposure=None,
                offset=None, which='mean', y_values=None):
        """
        Predict response variable or other statistic given exogenous variables.

        Parameters
        ----------
        params : array_like
            The parameters of the model.
        exog : ndarray, optional
            Explanatory variables for the main count model.
            If ``exog`` is None, then the data from the model will be used.
        exog_infl : ndarray, optional
            Explanatory variables for the zero-inflation model.
            ``exog_infl`` has to be provided if ``exog`` was provided unless
            ``exog_infl`` in the model is only a constant.
        offset : ndarray, optional
            Offset is added to the linear predictor of the mean function with
            coefficient equal to 1.
            Default is zero if exog is not None, and the model offset if exog
            is None.
        exposure : ndarray, optional
            Log(exposure) is added to the linear predictor with coefficient
            equal to 1. If exposure is specified, then it will be logged by
            the method. The user does not need to log it first.
            Default is one if exog is is not None, and it is the model exposure
            if exog is None.
        which : str (optional)
            Statitistic to predict. Default is 'mean'.

            - 'mean' : the conditional expectation of endog E(y | x)
            - 'mean-main' : mean parameter of truncated count model.
              Note, this is not the mean of the truncated distribution.
            - 'linear' : the linear predictor of the truncated count model.
            - 'var' : returns the estimated variance of endog implied by the
              model.
            - 'prob-main' : probability of selecting the main model which is
              the probability of observing a nonzero count P(y > 0 | x).
            - 'prob-zero' : probability of observing a zero count. P(y=0 | x).
              This is equal to is ``1 - prob-main``
            - 'prob-trunc' : probability of truncation of the truncated count
              model. This is the probability of observing a zero count implied
              by the truncation model.
            - 'mean-nonzero' : expected value conditional on having observation
              larger than zero, E(y | X, y>0)
            - 'prob' : probabilities of each count from 0 to max(endog), or
              for y_values if those are provided. This is a multivariate
              return (2-dim when predicting for several observations).

        y_values : array_like
            Values of the random variable endog at which pmf is evaluated.
            Only used if ``which="prob"``

        Returns
        -------
        predicted values

        Notes
        -----
        'prob-zero' / 'prob-trunc' is the ratio of probabilities of observing
        a zero count between hurdle model and the truncated count model.
        If this ratio is larger than one, then the hurdle model has an inflated
        number of zeros compared to the count model. If it is smaller than one,
        then the number of zeros is deflated.
        """
        which = which.lower()  # make it case insensitive
        no_exog = True if exog is None else False
        exog, offset, exposure = self._get_predict_arrays(
            exog=exog,
            offset=offset,
            exposure=exposure
            )

        exog_zero = None  # not yet
        if exog_zero is None:
            if no_exog:
                exog_zero = self.exog
            else:
                exog_zero = exog

        k_zeros = int((len(params) - self.k_extra1 - self.k_extra2) / 2
                      ) + self.k_extra1
        params_zero = params[:k_zeros]
        params_main = params[k_zeros:]

        lin_pred = (np.dot(exog, params_main[:self.exog.shape[1]]) +
                    exposure + offset)

        # this currently is mean_main, offset, exposure for zero part ?
        mu1 = self.model1.predict(params_zero, exog=exog)
        # prob that count model applies y>0 from zero model predict
        prob_main = self.model1.model_main._prob_nonzero(mu1, params_zero)
        prob_zero = (1 - prob_main)

        mu2 = np.exp(lin_pred)
        prob_ntrunc = self.model2.model_main._prob_nonzero(mu2, params_main)

        if which == 'mean':
            return prob_main * np.exp(lin_pred) / prob_ntrunc
        elif which == 'mean-main':
            return np.exp(lin_pred)
        elif which == 'linear':
            return lin_pred
        elif which == 'mean-nonzero':
            return np.exp(lin_pred) / prob_ntrunc
        elif which == 'prob-zero':
            return prob_zero
        elif which == 'prob-main':
            return prob_main
        elif which == 'prob-trunc':
            return 1 - prob_ntrunc
        # not yet supported
        elif which == 'var':
            # generic computation using results from submodels
            mu = np.exp(lin_pred)
            mt, vt = self.model2._predict_mom_trunc0(params_main, mu)
            var_ = prob_main * vt + prob_main * (1 - prob_main) * mt**2
            return var_
        elif which == 'prob':
            probs_main = self.model2.predict(
                params_main, exog, np.exp(exposure), offset, which="prob",
                y_values=y_values)
            probs_main *= prob_main[:, None]
            probs_main[:, 0] = prob_zero
            return probs_main
        else:
            raise ValueError('which = %s is not available' % which)


class TruncatedLFGenericResults(CountResults):
    __doc__ = _discrete_results_docs % {
        "one_line_description": "A results class for Generic Truncated",
        "extra_attr": ""}


class TruncatedLFPoissonResults(TruncatedLFGenericResults):
    __doc__ = _discrete_results_docs % {
        "one_line_description": "A results class for Truncated Poisson",
        "extra_attr": ""}

    @cache_readonly
    def _dispersion_factor(self):
        if self.model.trunc != 0:
            msg = "dispersion is only available for zero-truncation"
            raise NotImplementedError(msg)

        mu = np.exp(self.predict(which='linear'))

        return (1 - mu / (np.exp(mu) - 1))


class TruncatedNegativeBinomialResults(TruncatedLFGenericResults):
    __doc__ = _discrete_results_docs % {
        "one_line_description":
            "A results class for Truncated Negative Binomial",
        "extra_attr": ""}

    @cache_readonly
    def _dispersion_factor(self):
        if self.model.trunc != 0:
            msg = "dispersion is only available for zero-truncation"
            raise NotImplementedError(msg)

        alpha = self.params[-1]
        p = self.model.model_main.parameterization
        mu = np.exp(self.predict(which='linear'))

        return (1 - alpha * mu**(p-1) / (np.exp(mu**(p-1)) - 1))


class L1TruncatedLFGenericResults(L1CountResults, TruncatedLFGenericResults):
    pass


class TruncatedLFGenericResultsWrapper(lm.RegressionResultsWrapper):
    pass


wrap.populate_wrapper(TruncatedLFGenericResultsWrapper,
                      TruncatedLFGenericResults)


class L1TruncatedLFGenericResultsWrapper(lm.RegressionResultsWrapper):
    pass


wrap.populate_wrapper(L1TruncatedLFGenericResultsWrapper,
                      L1TruncatedLFGenericResults)


class HurdleCountResults(CountResults):
    __doc__ = _discrete_results_docs % {
        "one_line_description": "A results class for Hurdle model",
        "extra_attr": ""}

    def __init__(self, model, mlefit, results_zero, results_count,
                 cov_type='nonrobust', cov_kwds=None, use_t=None):
        super().__init__(
            model,
            mlefit,
            cov_type=cov_type,
            cov_kwds=cov_kwds,
            use_t=use_t,
            )
        self.results_zero = results_zero
        self.results_count = results_count
        # TODO: this is to fix df_resid, should be automatic but is not
        self.df_resid = self.model.endog.shape[0] - len(self.params)

    @cache_readonly
    def llnull(self):
        return (self.results_zero._results.llnull +
                self.results_count._results.llnull)

    @cache_readonly
    def bse(self):
        return np.append(self.results_zero.bse, self.results_count.bse)


class L1HurdleCountResults(L1CountResults, HurdleCountResults):
    pass


class HurdleCountResultsWrapper(lm.RegressionResultsWrapper):
    pass


wrap.populate_wrapper(HurdleCountResultsWrapper,
                      HurdleCountResults)


class L1HurdleCountResultsWrapper(lm.RegressionResultsWrapper):
    pass


wrap.populate_wrapper(L1HurdleCountResultsWrapper,
                      L1HurdleCountResults)