"""
Test functions for models.GLM
"""
import os
import warnings

import numpy as np
from numpy.testing import (
    assert_,
    assert_allclose,
    assert_almost_equal,
    assert_array_less,
    assert_equal,
    assert_raises,
)
import pandas as pd
from pandas.testing import assert_series_equal
import pytest
from scipy import stats

import statsmodels.api as sm
from statsmodels.compat.scipy import SP_LT_17
from statsmodels.datasets import cpunish, longley
from statsmodels.discrete import discrete_model as discrete
from statsmodels.genmod.generalized_linear_model import GLM, SET_USE_BIC_LLF
from statsmodels.tools.numdiff import (
    approx_fprime,
    approx_fprime_cs,
    approx_hess,
    approx_hess_cs,
)
from statsmodels.tools.sm_exceptions import (
    DomainWarning,
    PerfectSeparationWarning,
    ValueWarning,
)
from statsmodels.tools.tools import add_constant

# Test Precisions
DECIMAL_4 = 4
DECIMAL_3 = 3
DECIMAL_2 = 2
DECIMAL_1 = 1
DECIMAL_0 = 0

pdf_output = False

if pdf_output:
    from matplotlib.backends.backend_pdf import PdfPages
    pdf = PdfPages("test_glm.pdf")
else:
    pdf = None


def close_or_save(pdf, fig):
    if pdf_output:
        pdf.savefig(fig)


def teardown_module():
    if pdf_output:
        pdf.close()


@pytest.fixture(scope="module")
def iris():
    cur_dir = os.path.dirname(os.path.abspath(__file__))
    return np.genfromtxt(os.path.join(cur_dir, 'results', 'iris.csv'),
                         delimiter=",", skip_header=1)


class CheckModelResultsMixin:
    '''
    res2 should be either the results from RModelWrap
    or the results as defined in model_results_data
    '''

    decimal_params = DECIMAL_4
    def test_params(self):
        assert_almost_equal(self.res1.params, self.res2.params,
                self.decimal_params)

    decimal_bse = DECIMAL_4
    def test_standard_errors(self):
        assert_allclose(self.res1.bse, self.res2.bse,
                        atol=10**(-self.decimal_bse), rtol=1e-5)

    decimal_resids = DECIMAL_4
    def test_residuals(self):
        # fix incorrect numbers in resid_working results
        # residuals for Poisson are also tested in test_glm_weights.py
        import copy

        # new numpy would have copy method
        resid2 = copy.copy(self.res2.resids)
        resid2[:, 2] *= self.res1.family.link.deriv(self.res1.mu)**2

        atol = 10**(-self.decimal_resids)
        resid_a = self.res1.resid_anscombe_unscaled
        resids = np.column_stack((self.res1.resid_pearson,
                self.res1.resid_deviance, self.res1.resid_working,
                resid_a, self.res1.resid_response))
        assert_allclose(resids, resid2, rtol=1e-6, atol=atol)

    decimal_aic_R = DECIMAL_4

    def test_aic_R(self):
        # R includes the estimation of the scale as a lost dof
        # Does not with Gamma though
        if self.res1.scale != 1:
            dof = 2
        else:
            dof = 0
        if isinstance(self.res1.model.family, (sm.families.NegativeBinomial)):
            llf = self.res1.model.family.loglike(self.res1.model.endog,
                                                 self.res1.mu,
                                                 self.res1.model.var_weights,
                                                 self.res1.model.freq_weights,
                                                 scale=1)
            aic = (-2*llf+2*(self.res1.df_model+1))
        else:
            aic = self.res1.aic
        assert_almost_equal(aic+dof, self.res2.aic_R,
                self.decimal_aic_R)

    decimal_aic_Stata = DECIMAL_4
    def test_aic_Stata(self):
        # Stata uses the below llf for aic definition for these families
        if isinstance(self.res1.model.family, (sm.families.Gamma,
                                               sm.families.InverseGaussian,
                                               sm.families.NegativeBinomial)):
            llf = self.res1.model.family.loglike(self.res1.model.endog,
                                                 self.res1.mu,
                                                 self.res1.model.var_weights,
                                                 self.res1.model.freq_weights,
                                                 scale=1)
            aic = (-2*llf+2*(self.res1.df_model+1))/self.res1.nobs
        else:
            aic = self.res1.aic/self.res1.nobs
        assert_almost_equal(aic, self.res2.aic_Stata, self.decimal_aic_Stata)

    decimal_deviance = DECIMAL_4
    def test_deviance(self):
        assert_almost_equal(self.res1.deviance, self.res2.deviance,
                self.decimal_deviance)

    decimal_scale = DECIMAL_4
    def test_scale(self):
        assert_almost_equal(self.res1.scale, self.res2.scale,
                self.decimal_scale)

    decimal_loglike = DECIMAL_4
    def test_loglike(self):
        # Stata uses the below llf for these families
        # We differ with R for them
        if isinstance(self.res1.model.family, (sm.families.Gamma,
                                               sm.families.InverseGaussian,
                                               sm.families.NegativeBinomial)):
            llf = self.res1.model.family.loglike(self.res1.model.endog,
                                                 self.res1.mu,
                                                 self.res1.model.var_weights,
                                                 self.res1.model.freq_weights,
                                                 scale=1)
        else:
            llf = self.res1.llf
        assert_almost_equal(llf, self.res2.llf, self.decimal_loglike)

    decimal_null_deviance = DECIMAL_4
    def test_null_deviance(self):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", DomainWarning)

            assert_almost_equal(self.res1.null_deviance,
                                self.res2.null_deviance,
                                self.decimal_null_deviance)

    decimal_bic = DECIMAL_4
    def test_bic(self):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            assert_almost_equal(self.res1.bic,
                                self.res2.bic_Stata,
                                self.decimal_bic)

    def test_degrees(self):
        assert_equal(self.res1.model.df_resid,self.res2.df_resid)

    decimal_fittedvalues = DECIMAL_4
    def test_fittedvalues(self):
        assert_almost_equal(self.res1.fittedvalues, self.res2.fittedvalues,
                self.decimal_fittedvalues)

    def test_tpvalues(self):
        # test comparing tvalues and pvalues with normal implementation
        # make sure they use normal distribution (inherited in results class)
        params = self.res1.params
        tvalues = params / self.res1.bse
        pvalues = stats.norm.sf(np.abs(tvalues)) * 2
        half_width = stats.norm.isf(0.025) * self.res1.bse
        conf_int = np.column_stack((params - half_width, params + half_width))
        if isinstance(tvalues, pd.Series):
            assert_series_equal(self.res1.tvalues, tvalues)
        else:
            assert_almost_equal(self.res1.tvalues, tvalues)
        assert_almost_equal(self.res1.pvalues, pvalues)
        assert_almost_equal(self.res1.conf_int(), conf_int)

    def test_pearson_chi2(self):
        if hasattr(self.res2, 'pearson_chi2'):
            assert_allclose(self.res1.pearson_chi2, self.res2.pearson_chi2,
                            atol=1e-6, rtol=1e-6)

    def test_prsquared(self):
        if hasattr(self.res2, 'prsquared'):
            assert_allclose(self.res1.pseudo_rsquared(kind="mcf"),
                            self.res2.prsquared, rtol=0.05)

        if hasattr(self.res2, 'prsquared_cox_snell'):
            assert_allclose(float(self.res1.pseudo_rsquared(kind="cs")),
                            self.res2.prsquared_cox_snell, rtol=0.05)

    @pytest.mark.smoke
    def test_summary(self):
        self.res1.summary()

    @pytest.mark.smoke
    def test_summary2(self):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", DomainWarning)
            self.res1.summary2()

    def test_get_distribution(self):
        res1 = self.res1
        if not hasattr(res1.model.family, "get_distribution"):
            # only Tweedie has not get_distribution
            pytest.skip("get_distribution not available")

        if isinstance(res1.model.family, sm.families.NegativeBinomial):
            res_scale = 1  # QMLE scale can differ from 1
        else:
            res_scale = res1.scale

        distr = res1.model.family.get_distribution(res1.fittedvalues,
                                                   res_scale)
        var_endog = res1.model.family.variance(res1.fittedvalues) * res_scale
        m, v = distr.stats()
        assert_allclose(res1.fittedvalues, m, rtol=1e-13)
        assert_allclose(var_endog, v, rtol=1e-13)
        # check model method
        distr2 = res1.model.get_distribution(res1.params, res_scale)
        for k in distr2.kwds:
            assert_allclose(distr.kwds[k], distr2.kwds[k], rtol=1e-13)

        # compare var with predict
        var_ = res1.predict(which="var_unscaled")
        assert_allclose(var_ * res_scale, var_endog, rtol=1e-13)

        # check get_distribution of results instance
        if getattr(self, "has_edispersion", False):
            with pytest.warns(UserWarning, match="using scale=1"):
                distr3 = res1.get_distribution()
        else:
            distr3 = res1.get_distribution()
        for k in distr2.kwds:
            assert_allclose(distr3.kwds[k], distr2.kwds[k], rtol=1e-13)


class CheckComparisonMixin:

    def test_compare_discrete(self):
        res1 = self.res1
        resd = self.resd

        assert_allclose(res1.llf, resd.llf, rtol=1e-10)
        score_obs1 = res1.model.score_obs(res1.params * 0.98)
        score_obsd = resd.model.score_obs(resd.params * 0.98)
        assert_allclose(score_obs1, score_obsd, rtol=1e-10)

        # score
        score1 = res1.model.score(res1.params * 0.98)
        assert_allclose(score1, score_obs1.sum(0), atol=1e-20)
        score0 = res1.model.score(res1.params)
        assert_allclose(score0, np.zeros(score_obs1.shape[1]), atol=5e-7)

        hessian1 = res1.model.hessian(res1.params * 0.98, observed=False)
        hessiand = resd.model.hessian(resd.params * 0.98)
        assert_allclose(hessian1, hessiand, rtol=1e-10)

        hessian1 = res1.model.hessian(res1.params * 0.98, observed=True)
        hessiand = resd.model.hessian(resd.params * 0.98)
        assert_allclose(hessian1, hessiand, rtol=1e-9)

    def test_score_test(self):
        res1 = self.res1
        # fake example, should be zero, k_constraint should be 0
        st, pv, df = res1.model.score_test(res1.params, k_constraints=1)
        assert_allclose(st, 0, atol=1e-20)
        assert_allclose(pv, 1, atol=1e-10)
        assert_equal(df, 1)

        st, pv, df = res1.model.score_test(res1.params, k_constraints=0)
        assert_allclose(st, 0, atol=1e-20)
        assert_(np.isnan(pv), msg=repr(pv))
        assert_equal(df, 0)

        # TODO: no verified numbers largely SMOKE test
        exog_extra = res1.model.exog[:,1]**2
        st, pv, df = res1.model.score_test(res1.params, exog_extra=exog_extra)
        assert_array_less(0.1, st)
        assert_array_less(0.1, pv)
        assert_equal(df, 1)

    def test_get_prediction(self):
        pred1 = self.res1.get_prediction()  # GLM
        predd = self.resd.get_prediction()  # discrete class
        assert_allclose(predd.predicted, pred1.predicted_mean, rtol=1e-11)
        assert_allclose(predd.se, pred1.se_mean, rtol=1e-6)
        assert_allclose(predd.summary_frame().values,
                        pred1.summary_frame().values, rtol=1e-6)

        pred1 = self.res1.get_prediction(which="mean")  # GLM
        predd = self.resd.get_prediction()  # discrete class
        assert_allclose(predd.predicted, pred1.predicted, rtol=1e-11)
        assert_allclose(predd.se, pred1.se, rtol=1e-6)
        assert_allclose(predd.summary_frame().values,
                        pred1.summary_frame().values, rtol=1e-6)


class TestGlmGaussian(CheckModelResultsMixin):
    @classmethod
    def setup_class(cls):
        '''
        Test Gaussian family with canonical identity link
        '''
        # Test Precisions
        cls.decimal_resids = DECIMAL_3
        cls.decimal_params = DECIMAL_2
        cls.decimal_bic = DECIMAL_0
        cls.decimal_bse = DECIMAL_3

        from statsmodels.datasets.longley import load
        cls.data = load()
        cls.data.endog = np.require(cls.data.endog, requirements="W")
        cls.data.exog = np.require(cls.data.exog, requirements="W")
        cls.data.exog = add_constant(cls.data.exog, prepend=False)
        cls.res1 = GLM(cls.data.endog, cls.data.exog,
                        family=sm.families.Gaussian()).fit()
        from .results.results_glm import Longley
        cls.res2 = Longley()


    def test_compare_OLS(self):
        res1 = self.res1
        # OLS does not define score_obs
        from statsmodels.regression.linear_model import OLS
        resd = OLS(self.data.endog, self.data.exog).fit(use_t=False)
        self.resd = resd  # attach to access from the outside

        assert_allclose(res1.llf, resd.llf, rtol=1e-10)
        score_obs1 = res1.model.score_obs(res1.params, scale=None)
        score_obsd = resd.resid[:, None] / resd.scale * resd.model.exog
        # low precision because of badly scaled exog
        assert_allclose(score_obs1, score_obsd, rtol=1e-8)

        score_obs1 = res1.model.score_obs(res1.params, scale=1)
        score_obsd = resd.resid[:, None] * resd.model.exog
        assert_allclose(score_obs1, score_obsd, rtol=1e-8)

        hess_obs1 = res1.model.hessian(res1.params, scale=None)
        hess_obsd = -1. / resd.scale * resd.model.exog.T.dot(resd.model.exog)
        # low precision because of badly scaled exog
        assert_allclose(hess_obs1, hess_obsd, rtol=1e-8)

        pred1 = res1.get_prediction()  # GLM
        predd = resd.get_prediction()  # discrete class
        assert_allclose(predd.predicted, pred1.predicted_mean, rtol=1e-11)
        assert_allclose(predd.se, pred1.se_mean, rtol=1e-6)
        assert_allclose(predd.summary_frame().values[:, :4],
                        pred1.summary_frame().values, rtol=1e-6)

        pred1 = self.res1.get_prediction(which="mean")  # GLM
        predd = self.resd.get_prediction()  # discrete class
        assert_allclose(predd.predicted, pred1.predicted, rtol=1e-11)
        assert_allclose(predd.se, pred1.se, rtol=1e-6)
        assert_allclose(predd.summary_frame().values[:, :4],
                        pred1.summary_frame().values, rtol=1e-6)

# FIXME: enable or delete
#    def setup_method(self):
#        if skipR:
#            raise SkipTest, "Rpy not installed."
#        Gauss = r.gaussian
#        self.res2 = RModel(self.data.endog, self.data.exog, r.glm, family=Gauss)
#        self.res2.resids = np.array(self.res2.resid)[:,None]*np.ones((1,5))
#        self.res2.null_deviance = 185008826 # taken from R. Rpy bug?


class TestGlmGaussianGradient(TestGlmGaussian):
    @classmethod
    def setup_class(cls):
        '''
        Test Gaussian family with canonical identity link
        '''
        # Test Precisions
        cls.decimal_resids = DECIMAL_3
        cls.decimal_params = DECIMAL_2
        cls.decimal_bic = DECIMAL_0
        cls.decimal_bse = DECIMAL_2

        from statsmodels.datasets.longley import load
        cls.data = load()
        cls.data.endog = np.require(cls.data.endog, requirements="W")
        cls.data.exog = np.require(cls.data.exog, requirements="W")
        cls.data.exog = add_constant(cls.data.exog, prepend=False)
        cls.res1 = GLM(cls.data.endog, cls.data.exog,
                       family=sm.families.Gaussian()).fit(method='newton')
        from .results.results_glm import Longley
        cls.res2 = Longley()


class TestGaussianLog(CheckModelResultsMixin):
    @classmethod
    def setup_class(cls):
        # Test Precision
        cls.decimal_aic_R = DECIMAL_0
        cls.decimal_aic_Stata = DECIMAL_2
        cls.decimal_loglike = DECIMAL_0
        cls.decimal_null_deviance = DECIMAL_1

        nobs = 100
        x = np.arange(nobs)
        np.random.seed(54321)
#        y = 1.0 - .02*x - .001*x**2 + 0.001 * np.random.randn(nobs)
        cls.X = np.c_[np.ones((nobs,1)),x,x**2]
        cls.lny = np.exp(-(-1.0 + 0.02*x + 0.0001*x**2)) +\
                        0.001 * np.random.randn(nobs)

        GaussLog_Model = GLM(cls.lny, cls.X,
                             family=sm.families.Gaussian(sm.families.links.Log()))
        cls.res1 = GaussLog_Model.fit()
        from .results.results_glm import GaussianLog
        cls.res2 = GaussianLog()

# FIXME: enable or delete
#    def setup(cls):
#        if skipR:
#            raise SkipTest, "Rpy not installed"
#        GaussLogLink = r.gaussian(link = "log")
#        GaussLog_Res_R = RModel(cls.lny, cls.X, r.glm, family=GaussLogLink)
#        cls.res2 = GaussLog_Res_R

class TestGaussianInverse(CheckModelResultsMixin):
    @classmethod
    def setup_class(cls):
        # Test Precisions
        cls.decimal_bic = DECIMAL_1
        cls.decimal_aic_R = DECIMAL_1
        cls.decimal_aic_Stata = DECIMAL_3
        cls.decimal_loglike = DECIMAL_1
        cls.decimal_resids = DECIMAL_3

        nobs = 100
        x = np.arange(nobs)
        np.random.seed(54321)
        y = 1.0 + 2.0 * x + x**2 + 0.1 * np.random.randn(nobs)
        cls.X = np.c_[np.ones((nobs,1)),x,x**2]
        cls.y_inv = (1. + .02*x + .001*x**2)**-1 + .001 * np.random.randn(nobs)
        InverseLink_Model = GLM(cls.y_inv, cls.X,
                family=sm.families.Gaussian(sm.families.links.InversePower()))
        InverseLink_Res = InverseLink_Model.fit()
        cls.res1 = InverseLink_Res
        from .results.results_glm import GaussianInverse
        cls.res2 = GaussianInverse()

# FIXME: enable or delete
#    def setup(cls):
#        if skipR:
#            raise SkipTest, "Rpy not installed."
#        InverseLink = r.gaussian(link = "inverse")
#        InverseLink_Res_R = RModel(cls.y_inv, cls.X, r.glm, family=InverseLink)
#        cls.res2 = InverseLink_Res_R

class TestGlmBinomial(CheckModelResultsMixin):
    @classmethod
    def setup_class(cls):
        '''
        Test Binomial family with canonical logit link using star98 dataset.
        '''
        cls.decimal_resids = DECIMAL_1
        cls.decimal_bic = DECIMAL_2

        from statsmodels.datasets.star98 import load

        from .results.results_glm import Star98
        data = load()
        data.endog = np.require(data.endog, requirements="W")
        data.exog = np.require(data.exog, requirements="W")
        data.exog = add_constant(data.exog, prepend=False)
        cls.res1 = GLM(data.endog, data.exog,
                       family=sm.families.Binomial()).fit()
        # NOTE: if you want to replicate with RModel
        # res2 = RModel(data.endog[:,0]/trials, data.exog, r.glm,
        #        family=r.binomial, weights=trials)

        cls.res2 = Star98()

    def test_endog_dtype(self):
        from statsmodels.datasets.star98 import load
        data = load()
        data.exog = add_constant(data.exog, prepend=False)
        endog = data.endog.astype(int)
        res2 = GLM(endog, data.exog, family=sm.families.Binomial()).fit()
        assert_allclose(res2.params, self.res1.params)
        endog = data.endog.astype(np.double)
        res3 = GLM(endog, data.exog, family=sm.families.Binomial()).fit()
        assert_allclose(res3.params, self.res1.params)

    def test_invalid_endog(self, reset_randomstate):
        # GH2733 inspired check
        endog = np.random.randint(0, 100, size=(1000, 3))
        exog = np.random.standard_normal((1000, 2))
        with pytest.raises(ValueError, match='endog has more than 2 columns'):
            GLM(endog, exog, family=sm.families.Binomial())

    def test_invalid_endog_formula(self, reset_randomstate):
        # GH2733
        n = 200
        exog = np.random.normal(size=(n, 2))
        endog = np.random.randint(0, 3, size=n).astype(str)
        # formula interface
        data = pd.DataFrame({"y": endog, "x1": exog[:, 0], "x2": exog[:, 1]})
        with pytest.raises(ValueError, match='array with multiple columns'):
            sm.GLM.from_formula("y ~ x1 + x2", data,
                                family=sm.families.Binomial())

    def test_get_distribution_binom_count(self):
        # test for binomial counts with n_trials > 1
        res1 = self.res1
        res_scale = 1  # QMLE scale can differ from 1

        mu_prob = res1.fittedvalues
        n = res1.model.n_trials
        distr = res1.model.family.get_distribution(mu_prob, res_scale,
                                                   n_trials=n)
        var_endog = res1.model.family.variance(mu_prob) * res_scale
        m, v = distr.stats()
        assert_allclose(mu_prob * n, m, rtol=1e-13)
        assert_allclose(var_endog * n, v, rtol=1e-13)

        # check model method
        distr2 = res1.model.get_distribution(res1.params, res_scale,
                                             n_trials=n)
        for k in distr2.kwds:
            assert_allclose(distr.kwds[k], distr2.kwds[k], rtol=1e-13)


# FIXME: enable/xfail/skip or delete
# TODO:
# Non-Canonical Links for the Binomial family require the algorithm to be
# slightly changed
# class TestGlmBinomialLog(CheckModelResultsMixin):
#    pass

# class TestGlmBinomialLogit(CheckModelResultsMixin):
#    pass

# class TestGlmBinomialProbit(CheckModelResultsMixin):
#    pass

# class TestGlmBinomialCloglog(CheckModelResultsMixin):
#    pass

# class TestGlmBinomialPower(CheckModelResultsMixin):
#    pass

# class TestGlmBinomialLoglog(CheckModelResultsMixin):
#    pass

# class TestGlmBinomialLogc(CheckModelResultsMixin):
# TODO: need include logc link
#    pass


class TestGlmBernoulli(CheckModelResultsMixin, CheckComparisonMixin):
    @classmethod
    def setup_class(cls):
        from .results.results_glm import Lbw
        cls.res2 = Lbw()
        cls.res1 = GLM(cls.res2.endog, cls.res2.exog,
                       family=sm.families.Binomial()).fit()

        modd = discrete.Logit(cls.res2.endog, cls.res2.exog)
        cls.resd = modd.fit(start_params=cls.res1.params * 0.9, disp=False)

    def test_score_r(self):
        res1 = self.res1
        res2 = self.res2
        st, pv, df = res1.model.score_test(res1.params,
                                           exog_extra=res1.model.exog[:, 1]**2)
        st_res = 0.2837680293459376  # (-0.5326988167303712)**2
        assert_allclose(st, st_res, rtol=1e-4)

        st, pv, df = res1.model.score_test(res1.params,
                                          exog_extra=res1.model.exog[:, 0]**2)
        st_res = 0.6713492821514992  # (-0.8193590679009413)**2
        assert_allclose(st, st_res, rtol=1e-4)

        select = list(range(9))
        select.pop(7)

        res1b = GLM(res2.endog, res2.exog.iloc[:, select],
                    family=sm.families.Binomial()).fit()
        tres = res1b.model.score_test(res1b.params,
                                      exog_extra=res1.model.exog[:, -2])
        tres = np.asarray(tres[:2]).ravel()
        tres_r = (2.7864148487452, 0.0950667)
        assert_allclose(tres, tres_r, rtol=1e-4)

        cmd_r = """\
        data = read.csv("...statsmodels\\statsmodels\\genmod\\tests\\results\\stata_lbw_glm.csv")

        data["race_black"] = data["race"] == "black"
        data["race_other"] = data["race"] == "other"
        mod = glm(low ~ age + lwt + race_black + race_other + smoke + ptl + ht + ui, family=binomial, data=data)
        options(digits=16)
        anova(mod, test="Rao")

        library(statmod)
        s = glm.scoretest(mod, data["age"]**2)
        s**2
        s = glm.scoretest(mod, data["lwt"]**2)
        s**2
        """

# class TestGlmBernoulliIdentity(CheckModelResultsMixin):
#    pass

# class TestGlmBernoulliLog(CheckModelResultsMixin):
#    pass

# class TestGlmBernoulliProbit(CheckModelResultsMixin):
#    pass

# class TestGlmBernoulliCloglog(CheckModelResultsMixin):
#    pass

# class TestGlmBernoulliPower(CheckModelResultsMixin):
#    pass

# class TestGlmBernoulliLoglog(CheckModelResultsMixin):
#    pass

# class test_glm_bernoulli_logc(CheckModelResultsMixin):
#    pass


class TestGlmGamma(CheckModelResultsMixin):

    @classmethod
    def setup_class(cls):
        '''
        Tests Gamma family with canonical inverse link (power -1)
        '''
        # Test Precisions
        cls.decimal_aic_R = -1 #TODO: off by about 1, we are right with Stata
        cls.decimal_resids = DECIMAL_2

        from statsmodels.datasets.scotland import load

        from .results.results_glm import Scotvote
        data = load()
        data.exog = add_constant(data.exog, prepend=False)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            res1 = GLM(data.endog, data.exog,
                       family=sm.families.Gamma()).fit()
        cls.res1 = res1
#        res2 = RModel(data.endog, data.exog, r.glm, family=r.Gamma)
        res2 = Scotvote()
        res2.aic_R += 2 # R does not count degree of freedom for scale with gamma
        cls.res2 = res2


class TestGlmGammaLog(CheckModelResultsMixin):
    @classmethod
    def setup_class(cls):
        # Test Precisions
        cls.decimal_resids = DECIMAL_3
        cls.decimal_aic_R = DECIMAL_0
        cls.decimal_fittedvalues = DECIMAL_3

        from .results.results_glm import CancerLog
        res2 = CancerLog()
        cls.res1 = GLM(res2.endog, res2.exog,
            family=sm.families.Gamma(link=sm.families.links.Log())).fit()
        cls.res2 = res2

# FIXME: enable or delete
#    def setup(cls):
#        if skipR:
#            raise SkipTest, "Rpy not installed."
#        cls.res2 = RModel(cls.data.endog, cls.data.exog, r.glm,
#            family=r.Gamma(link="log"))
#        cls.res2.null_deviance = 27.92207137420696 # From R (bug in rpy)
#        cls.res2.bic = -154.1582089453923 # from Stata


class TestGlmGammaIdentity(CheckModelResultsMixin):
    @classmethod
    def setup_class(cls):
        # Test Precisions
        cls.decimal_resids = -100 #TODO Very off from Stata?
        cls.decimal_params = DECIMAL_2
        cls.decimal_aic_R = DECIMAL_0
        cls.decimal_loglike = DECIMAL_1

        from .results.results_glm import CancerIdentity
        res2 = CancerIdentity()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            fam = sm.families.Gamma(link=sm.families.links.Identity())
            cls.res1 = GLM(res2.endog, res2.exog, family=fam).fit()
        cls.res2 = res2

# FIXME: enable or delete
#    def setup(cls):
#        if skipR:
#            raise SkipTest, "Rpy not installed."
#        cls.res2 = RModel(cls.data.endog, cls.data.exog, r.glm,
#            family=r.Gamma(link="identity"))
#        cls.res2.null_deviance = 27.92207137420696 # from R, Rpy bug

class TestGlmPoisson(CheckModelResultsMixin, CheckComparisonMixin):
    @classmethod
    def setup_class(cls):
        '''
        Tests Poisson family with canonical log link.

        Test results were obtained by R.
        '''
        from .results.results_glm import Cpunish
        cls.data = cpunish.load()
        cls.data.endog = np.require(cls.data.endog, requirements="W")
        cls.data.exog = np.require(cls.data.exog, requirements="W")
        cls.data.exog[:, 3] = np.log(cls.data.exog[:, 3])
        cls.data.exog = add_constant(cls.data.exog, prepend=False)
        cls.res1 = GLM(cls.data.endog, cls.data.exog,
                       family=sm.families.Poisson()).fit()
        cls.res2 = Cpunish()
        # compare with discrete, start close to save time
        modd = discrete.Poisson(cls.data.endog, cls.data.exog)
        cls.resd = modd.fit(start_params=cls.res1.params * 0.9, disp=False)

#class TestGlmPoissonIdentity(CheckModelResultsMixin):
#    pass

#class TestGlmPoissonPower(CheckModelResultsMixin):
#    pass


class TestGlmInvgauss(CheckModelResultsMixin):
    @classmethod
    def setup_class(cls):
        '''
        Tests the Inverse Gaussian family in GLM.

        Notes
        -----
        Used the rndivgx.ado file provided by Hardin and Hilbe to
        generate the data.  Results are read from model_results, which
        were obtained by running R_ig.s
        '''
        # Test Precisions
        cls.decimal_aic_R = DECIMAL_0
        cls.decimal_loglike = DECIMAL_0

        from .results.results_glm import InvGauss
        res2 = InvGauss()
        res1 = GLM(res2.endog, res2.exog,
                   family=sm.families.InverseGaussian()).fit()
        cls.res1 = res1
        cls.res2 = res2

    def test_get_distribution(self):
        res1 = self.res1
        distr = res1.model.family.get_distribution(res1.fittedvalues,
                                                   res1.scale)
        var_endog = res1.model.family.variance(res1.fittedvalues) * res1.scale
        m, v = distr.stats()
        assert_allclose(res1.fittedvalues, m, rtol=1e-13)
        assert_allclose(var_endog, v, rtol=1e-13)


class TestGlmInvgaussLog(CheckModelResultsMixin):
    @classmethod
    def setup_class(cls):
        # Test Precisions
        cls.decimal_aic_R = -10 # Big difference vs R.
        cls.decimal_resids = DECIMAL_3

        from .results.results_glm import InvGaussLog
        res2 = InvGaussLog()
        cls.res1 = GLM(res2.endog, res2.exog,
            family=sm.families.InverseGaussian(
                link=sm.families.links.Log())).fit()
        cls.res2 = res2

# FIXME: enable or delete
#    def setup(cls):
#        if skipR:
#            raise SkipTest, "Rpy not installed."
#        cls.res2 = RModel(cls.data.endog, cls.data.exog, r.glm,
#            family=r.inverse_gaussian(link="log"))
#        cls.res2.null_deviance = 335.1539777981053 # from R, Rpy bug
#        cls.res2.llf = -12162.72308 # from Stata, R's has big rounding diff


class TestGlmInvgaussIdentity(CheckModelResultsMixin):
    @classmethod
    def setup_class(cls):
        # Test Precisions
        cls.decimal_aic_R = -10 #TODO: Big difference vs R
        cls.decimal_fittedvalues = DECIMAL_3
        cls.decimal_params = DECIMAL_3

        from .results.results_glm import Medpar1
        data = Medpar1()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            cls.res1 = GLM(data.endog, data.exog,
                            family=sm.families.InverseGaussian(
                                link=sm.families.links.Identity())).fit()
        from .results.results_glm import InvGaussIdentity
        cls.res2 = InvGaussIdentity()

# FIXME: enable or delete
#    def setup(cls):
#        if skipR:
#            raise SkipTest, "Rpy not installed."
#        cls.res2 = RModel(cls.data.endog, cls.data.exog, r.glm,
#            family=r.inverse_gaussian(link="identity"))
#        cls.res2.null_deviance = 335.1539777981053 # from R, Rpy bug
#        cls.res2.llf = -12163.25545    # from Stata, big diff with R


class TestGlmNegbinomial(CheckModelResultsMixin):
    @classmethod
    def setup_class(cls):
        '''
        Test Negative Binomial family with log link
        '''
        # Test Precision
        cls.decimal_resid = DECIMAL_1
        cls.decimal_params = DECIMAL_3
        cls.decimal_resids = -1 # 1 % mismatch at 0
        cls.decimal_fittedvalues = DECIMAL_1

        from statsmodels.datasets.committee import load
        cls.data = load()
        cls.data.endog = np.require(cls.data.endog, requirements="W")
        cls.data.exog = np.require(cls.data.exog, requirements="W")
        cls.data.exog[:,2] = np.log(cls.data.exog[:,2])
        interaction = cls.data.exog[:,2]*cls.data.exog[:,1]
        cls.data.exog = np.column_stack((cls.data.exog,interaction))
        cls.data.exog = add_constant(cls.data.exog, prepend=False)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=DomainWarning)
            with pytest.warns(UserWarning):
                fam = sm.families.NegativeBinomial()

        cls.res1 = GLM(cls.data.endog, cls.data.exog,
                family=fam).fit(scale='x2')
        from .results.results_glm import Committee
        res2 = Committee()
        res2.aic_R += 2 # They do not count a degree of freedom for the scale
        cls.res2 = res2
        cls.has_edispersion = True

# FIXME: enable or delete
#    def setup_method(self):
#        if skipR:
#            raise SkipTest, "Rpy not installed"
#        r.library('MASS')  # this does not work when done in rmodelwrap?
#        self.res2 = RModel(self.data.endog, self.data.exog, r.glm,
#                family=r.negative_binomial(1))
#        self.res2.null_deviance = 27.8110469364343

# FIXME: enable/xfail/skip or delete
#class TestGlmNegbinomial_log(CheckModelResultsMixin):
#    pass

# FIXME: enable/xfail/skip or delete
#class TestGlmNegbinomial_power(CheckModelResultsMixin):
#    pass

# FIXME: enable/xfail/skip or delete
#class TestGlmNegbinomial_nbinom(CheckModelResultsMixin):
#    pass


class TestGlmPoissonOffset(CheckModelResultsMixin):
    @classmethod
    def setup_class(cls):
        from .results.results_glm import Cpunish_offset
        cls.decimal_params = DECIMAL_4
        cls.decimal_bse = DECIMAL_4
        cls.decimal_aic_R = 3
        data = cpunish.load()
        data.endog = np.asarray(data.endog)
        data.exog = np.asarray(data.exog)
        data.exog[:, 3] = np.log(data.exog[:, 3])
        data.exog = add_constant(data.exog, prepend=True)
        exposure = [100] * len(data.endog)
        cls.data = data
        cls.exposure = exposure
        cls.res1 = GLM(data.endog, data.exog, family=sm.families.Poisson(),
                       exposure=exposure).fit()
        cls.res2 = Cpunish_offset()

    def test_missing(self):
        # make sure offset is dropped correctly
        endog = self.data.endog.copy()
        endog[[2,4,6,8]] = np.nan
        mod = GLM(endog, self.data.exog, family=sm.families.Poisson(),
                    exposure=self.exposure, missing='drop')
        assert_equal(mod.exposure.shape[0], 13)

    def test_offset_exposure(self):
        # exposure=x and offset=log(x) should have the same effect
        np.random.seed(382304)
        endog = np.random.randint(0, 10, 100)
        exog = np.random.normal(size=(100,3))
        exposure = np.random.uniform(1, 2, 100)
        offset = np.random.uniform(1, 2, 100)
        mod1 = GLM(endog, exog, family=sm.families.Poisson(),
                   offset=offset, exposure=exposure).fit()
        offset2 = offset + np.log(exposure)
        mod2 = GLM(endog, exog, family=sm.families.Poisson(),
                   offset=offset2).fit()
        assert_almost_equal(mod1.params, mod2.params)
        assert_allclose(mod1.null, mod2.null, rtol=1e-10)

        # test recreating model
        mod1_ = mod1.model
        kwds = mod1_._get_init_kwds()
        assert_allclose(kwds['exposure'], exposure, rtol=1e-14)
        assert_allclose(kwds['offset'], mod1_.offset, rtol=1e-14)
        mod3 = mod1_.__class__(mod1_.endog, mod1_.exog, **kwds)
        assert_allclose(mod3.exposure, mod1_.exposure, rtol=1e-14)
        assert_allclose(mod3.offset, mod1_.offset, rtol=1e-14)

        # test fit_regularized exposure, see #4605
        resr1 = mod1.model.fit_regularized()
        resr2 = mod2.model.fit_regularized()
        assert_allclose(resr1.params, resr2.params, rtol=1e-10)


    def test_predict(self):
        np.random.seed(382304)
        endog = np.random.randint(0, 10, 100)
        exog = np.random.normal(size=(100,3))
        exposure = np.random.uniform(1, 2, 100)
        mod1 = GLM(endog, exog, family=sm.families.Poisson(),
                   exposure=exposure).fit()
        exog1 = np.random.normal(size=(10,3))
        exposure1 = np.random.uniform(1, 2, 10)

        # Doubling exposure time should double expected response
        pred1 = mod1.predict(exog=exog1, exposure=exposure1)
        pred2 = mod1.predict(exog=exog1, exposure=2*exposure1)
        assert_almost_equal(pred2, 2*pred1)

        # Check exposure defaults
        pred3 = mod1.predict()
        pred4 = mod1.predict(exposure=exposure)
        pred5 = mod1.predict(exog=exog, exposure=exposure)
        assert_almost_equal(pred3, pred4)
        assert_almost_equal(pred4, pred5)

        # Check offset defaults
        offset = np.random.uniform(1, 2, 100)
        mod2 = GLM(endog, exog, offset=offset,
                   family=sm.families.Poisson()).fit()
        pred1 = mod2.predict()
        pred2 = mod2.predict(which="mean", offset=offset)
        pred3 = mod2.predict(exog=exog, which="mean", offset=offset)
        assert_almost_equal(pred1, pred2)
        assert_almost_equal(pred2, pred3)

        # Check that offset shifts the linear predictor
        mod3 = GLM(endog, exog, family=sm.families.Poisson()).fit()
        offset = np.random.uniform(1, 2, 10)
        with pytest.warns(FutureWarning):
            # deprecation warning for linear keyword
            pred1 = mod3.predict(exog=exog1, offset=offset, linear=True)
        pred2 = mod3.predict(exog=exog1, offset=2*offset, which="linear")
        assert_almost_equal(pred2, pred1+offset)

        # Passing exposure as a pandas series should not effect output type
        assert isinstance(
            mod1.predict(exog=exog1, exposure=pd.Series(exposure1)),
            np.ndarray
        )


def test_perfect_pred(iris):
    y = iris[:, -1]
    X = iris[:, :-1]
    X = X[y != 2]
    y = y[y != 2]
    X = add_constant(X, prepend=True)
    glm = GLM(y, X, family=sm.families.Binomial())

    with pytest.warns(PerfectSeparationWarning):
        glm.fit()


def test_score_test_ols():
    # nicer example than Longley
    from statsmodels.regression.linear_model import OLS
    np.random.seed(5)
    nobs = 100
    sige = 0.5
    x = np.random.uniform(0, 1, size=(nobs, 5))
    x[:, 0] = 1
    beta = 1. / np.arange(1., x.shape[1] + 1)
    y = x.dot(beta) + sige * np.random.randn(nobs)

    res_ols = OLS(y, x).fit()
    res_olsc = OLS(y, x[:, :-2]).fit()
    co = res_ols.compare_lm_test(res_olsc, demean=False)

    res_glm = GLM(y, x[:, :-2], family=sm.families.Gaussian()).fit()
    co2 = res_glm.model.score_test(res_glm.params, exog_extra=x[:, -2:])
    # difference in df_resid versus nobs in scale see #1786
    assert_allclose(co[0] * 97 / 100., co2[0], rtol=1e-13)


def test_attribute_writable_resettable():
    # Regression test for mutables and class constructors.
    data = sm.datasets.longley.load()
    endog, exog = data.endog, data.exog
    glm_model = sm.GLM(endog, exog)
    assert_equal(glm_model.family.link.power, 1.0)
    glm_model.family.link.power = 2.
    assert_equal(glm_model.family.link.power, 2.0)
    glm_model2 = sm.GLM(endog, exog)
    assert_equal(glm_model2.family.link.power, 1.0)


class TestStartParams(CheckModelResultsMixin):
    @classmethod
    def setup_class(cls):
        '''
        Test Gaussian family with canonical identity link
        '''
        # Test Precisions
        cls.decimal_resids = DECIMAL_3
        cls.decimal_params = DECIMAL_2
        cls.decimal_bic = DECIMAL_0
        cls.decimal_bse = DECIMAL_3

        from statsmodels.datasets.longley import load
        cls.data = load()
        cls.data.exog = add_constant(cls.data.exog, prepend=False)
        params = sm.OLS(cls.data.endog, cls.data.exog).fit().params
        cls.res1 = GLM(cls.data.endog, cls.data.exog,
                        family=sm.families.Gaussian()).fit(start_params=params)
        from .results.results_glm import Longley
        cls.res2 = Longley()


def test_glm_start_params():
    # see 1604
    y2 = np.array('0 1 0 0 0 1'.split(), int)
    wt = np.array([50,1,50,1,5,10])
    y2 = np.repeat(y2, wt)
    x2 = np.repeat([0,0,0.001,100,-1,-1], wt)
    mod = sm.GLM(y2, sm.add_constant(x2), family=sm.families.Binomial())
    res = mod.fit(start_params=[-4, -5])
    np.testing.assert_almost_equal(res.params, [-4.60305022, -5.29634545], 6)


def test_loglike_no_opt():
    # see 1728

    y = np.asarray([0, 1, 0, 0, 1, 1, 0, 1, 1, 1])
    x = np.arange(10, dtype=np.float64)

    def llf(params):
        lin_pred = params[0] + params[1]*x
        pr = 1 / (1 + np.exp(-lin_pred))
        return np.sum(y*np.log(pr) + (1-y)*np.log(1-pr))

    for params in [0,0], [0,1], [0.5,0.5]:
        mod = sm.GLM(y, sm.add_constant(x), family=sm.families.Binomial())
        res = mod.fit(start_params=params, maxiter=0)
        like = llf(params)
        assert_almost_equal(like, res.llf)


def test_formula_missing_exposure():
    # see 2083
    import statsmodels.formula.api as smf

    d = {'Foo': [1, 2, 10, 149], 'Bar': [1, 2, 3, np.nan],
         'constant': [1] * 4, 'exposure': np.random.uniform(size=4),
         'x': [1, 3, 2, 1.5]}
    df = pd.DataFrame(d)

    family = sm.families.Gaussian(link=sm.families.links.Log())

    mod = smf.glm("Foo ~ Bar", data=df, exposure=df.exposure,
                  family=family)
    assert_(type(mod.exposure) is np.ndarray, msg='Exposure is not ndarray')

    exposure = pd.Series(np.random.uniform(size=5))
    df.loc[3, 'Bar'] = 4   # nan not relevant for Valueerror for shape mismatch
    assert_raises(ValueError, smf.glm, "Foo ~ Bar", data=df,
                  exposure=exposure, family=family)
    assert_raises(ValueError, GLM, df.Foo, df[['constant', 'Bar']],
                  exposure=exposure, family=family)


@pytest.mark.matplotlib
def test_plots(close_figures):

    np.random.seed(378)
    n = 200
    exog = np.random.normal(size=(n, 2))
    lin_pred = exog[:, 0] + exog[:, 1]**2
    prob = 1 / (1 + np.exp(-lin_pred))
    endog = 1 * (np.random.uniform(size=n) < prob)

    model = sm.GLM(endog, exog, family=sm.families.Binomial())
    result = model.fit()

    import pandas as pd

    from statsmodels.graphics.regressionplots import add_lowess

    # array interface
    for j in 0,1:
        fig = result.plot_added_variable(j)
        add_lowess(fig.axes[0], frac=0.5)
        close_or_save(pdf, fig)
        fig = result.plot_partial_residuals(j)
        add_lowess(fig.axes[0], frac=0.5)
        close_or_save(pdf, fig)
        fig = result.plot_ceres_residuals(j)
        add_lowess(fig.axes[0], frac=0.5)
        close_or_save(pdf, fig)

    # formula interface
    data = pd.DataFrame({"y": endog, "x1": exog[:, 0], "x2": exog[:, 1]})
    model = sm.GLM.from_formula("y ~ x1 + x2", data, family=sm.families.Binomial())
    result = model.fit()
    for j in 0,1:
        xname = ["x1", "x2"][j]
        fig = result.plot_added_variable(xname)
        add_lowess(fig.axes[0], frac=0.5)
        close_or_save(pdf, fig)
        fig = result.plot_partial_residuals(xname)
        add_lowess(fig.axes[0], frac=0.5)
        close_or_save(pdf, fig)
        fig = result.plot_ceres_residuals(xname)
        add_lowess(fig.axes[0], frac=0.5)
        close_or_save(pdf, fig)

def gen_endog(lin_pred, family_class, link, binom_version=0):

    np.random.seed(872)

    fam = sm.families

    mu = link().inverse(lin_pred)

    if family_class == fam.Binomial:
        if binom_version == 0:
            endog = 1*(np.random.uniform(size=len(lin_pred)) < mu)
        else:
            endog = np.empty((len(lin_pred), 2))
            n = 10
            endog[:, 0] = (np.random.uniform(size=(len(lin_pred), n)) < mu[:, None]).sum(1)
            endog[:, 1] = n - endog[:, 0]
    elif family_class == fam.Poisson:
        endog = np.random.poisson(mu)
    elif family_class == fam.Gamma:
        endog = np.random.gamma(2, mu)
    elif family_class == fam.Gaussian:
        endog = mu + 2 * np.random.normal(size=len(lin_pred))
    elif family_class == fam.NegativeBinomial:
        from scipy.stats.distributions import nbinom
        endog = nbinom.rvs(mu, 0.5)
    elif family_class == fam.InverseGaussian:
        from scipy.stats.distributions import invgauss
        endog = invgauss.rvs(mu, scale=20)
    else:
        raise ValueError

    return endog


@pytest.mark.smoke
def test_summary():
    np.random.seed(4323)

    n = 100
    exog = np.random.normal(size=(n, 2))
    exog[:, 0] = 1
    endog = np.random.normal(size=n)

    for method in ["irls", "cg"]:
        fa = sm.families.Gaussian()
        model = sm.GLM(endog, exog, family=fa)
        rslt = model.fit(method=method)
        s = rslt.summary()


def check_score_hessian(results):
    # compare models core and hessian with numerical derivatives

    params = results.params
    # avoid checking score at MLE, score close to zero
    sc = results.model.score(params * 0.98, scale=1)
    # cs currently (0.9) does not work for all families
    llfunc = lambda x: results.model.loglike(x, scale=1)  # noqa
    sc2 = approx_fprime(params * 0.98, llfunc)
    assert_allclose(sc, sc2, rtol=1e-4, atol=1e-4)

    hess = results.model.hessian(params, scale=1)
    hess2 = approx_hess(params, llfunc)
    assert_allclose(hess, hess2, rtol=1e-4)
    scfunc = lambda x: results.model.score(x, scale=1)  # noqa
    hess3 = approx_fprime(params, scfunc)
    assert_allclose(hess, hess3, rtol=1e-4)


def test_gradient_irls():
    # Compare the results when using gradient optimization and IRLS.

    # TODO: Find working examples for inverse_squared link

    np.random.seed(87342)

    fam = sm.families
    lnk = sm.families.links
    families = [(fam.Binomial, [lnk.Logit, lnk.Probit, lnk.CLogLog, lnk.Log, lnk.Cauchy]),
                (fam.Poisson, [lnk.Log, lnk.Identity, lnk.Sqrt]),
                (fam.Gamma, [lnk.Log, lnk.Identity, lnk.InversePower]),
                (fam.Gaussian, [lnk.Identity, lnk.Log, lnk.InversePower]),
                (fam.InverseGaussian, [lnk.Log, lnk.Identity, lnk.InversePower, lnk.InverseSquared]),
                (fam.NegativeBinomial, [lnk.Log, lnk.InversePower, lnk.InverseSquared, lnk.Identity])]

    n = 100
    p = 3
    exog = np.random.normal(size=(n, p))
    exog[:, 0] = 1

    skip_one = False
    for family_class, family_links in families:
        for link in family_links:
            for binom_version in 0,1:

                if family_class != fam.Binomial and binom_version == 1:
                    continue

                if (family_class, link) == (fam.Poisson, lnk.Identity):
                    lin_pred = 20 + exog.sum(1)
                elif (family_class, link) == (fam.Binomial, lnk.Log):
                    lin_pred = -1 + exog.sum(1) / 8
                elif (family_class, link) == (fam.Poisson, lnk.Sqrt):
                    lin_pred = 2 + exog.sum(1)
                elif (family_class, link) == (fam.InverseGaussian, lnk.Log):
                    #skip_zero = True
                    lin_pred = -1 + exog.sum(1)
                elif (family_class, link) == (fam.InverseGaussian, lnk.Identity):
                    lin_pred = 20 + 5*exog.sum(1)
                    lin_pred = np.clip(lin_pred, 1e-4, np.inf)
                elif (family_class, link) == (fam.InverseGaussian, lnk.InverseSquared):
                    lin_pred = 0.5 + exog.sum(1) / 5
                    continue # skip due to non-convergence
                elif (family_class, link) == (fam.InverseGaussian, lnk.InversePower):
                    lin_pred = 1 + exog.sum(1) / 5
                elif (family_class, link) == (fam.NegativeBinomial, lnk.Identity):
                    lin_pred = 20 + 5*exog.sum(1)
                    lin_pred = np.clip(lin_pred, 1e-4, np.inf)
                elif (family_class, link) == (fam.NegativeBinomial, lnk.InverseSquared):
                    lin_pred = 0.1 + np.random.uniform(size=exog.shape[0])
                    continue # skip due to non-convergence
                elif (family_class, link) == (fam.NegativeBinomial, lnk.InversePower):
                    lin_pred = 1 + exog.sum(1) / 5

                elif (family_class, link) == (fam.Gaussian, lnk.InversePower):
                    # adding skip because of convergence failure
                    skip_one = True
                # the following fails with Identity link, because endog < 0
                # elif family_class == fam.Gamma:
                #     lin_pred = 0.5 * exog.sum(1) + np.random.uniform(size=exog.shape[0])
                else:
                    lin_pred = np.random.uniform(size=exog.shape[0])

                endog = gen_endog(lin_pred, family_class, link, binom_version)

                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    mod_irls = sm.GLM(endog, exog, family=family_class(link=link()))
                rslt_irls = mod_irls.fit(method="IRLS")

                if not (family_class, link) in [(fam.Poisson, lnk.Sqrt),
                                                (fam.Gamma, lnk.InversePower),
                                                (fam.InverseGaussian, lnk.Identity)
                                                ]:
                    check_score_hessian(rslt_irls)

                # Try with and without starting values.
                for max_start_irls, start_params in (0, rslt_irls.params), (3, None):
                    # TODO: skip convergence failures for now
                    if max_start_irls > 0 and skip_one:
                        continue
                    with warnings.catch_warnings():
                        warnings.simplefilter("ignore")
                        mod_gradient = sm.GLM(endog, exog, family=family_class(link=link()))
                    rslt_gradient = mod_gradient.fit(max_start_irls=max_start_irls,
                                                     start_params=start_params,
                                                     method="newton", maxiter=300)

                    assert_allclose(rslt_gradient.params,
                                    rslt_irls.params, rtol=1e-6, atol=5e-5)

                    assert_allclose(rslt_gradient.llf, rslt_irls.llf,
                                    rtol=1e-6, atol=1e-6)

                    assert_allclose(rslt_gradient.scale, rslt_irls.scale,
                                    rtol=1e-6, atol=1e-6)

                    # Get the standard errors using expected information.
                    gradient_bse = rslt_gradient.bse
                    ehess = mod_gradient.hessian(rslt_gradient.params, observed=False)
                    gradient_bse = np.sqrt(-np.diag(np.linalg.inv(ehess)))
                    assert_allclose(gradient_bse, rslt_irls.bse, rtol=1e-6, atol=5e-5)
                    # rslt_irls.bse corresponds to observed=True
                    assert_allclose(rslt_gradient.bse, rslt_irls.bse, rtol=0.2, atol=5e-5)

                    rslt_gradient_eim = mod_gradient.fit(max_start_irls=0,
                                                         cov_type='eim',
                                                         start_params=rslt_gradient.params,
                                                         method="newton", maxiter=300)
                    assert_allclose(rslt_gradient_eim.bse, rslt_irls.bse, rtol=5e-5, atol=0)


def test_gradient_irls_eim():
    # Compare the results when using eime gradient optimization and IRLS.

    # TODO: Find working examples for inverse_squared link

    np.random.seed(87342)

    fam = sm.families
    lnk = sm.families.links
    families = [(fam.Binomial, [lnk.Logit, lnk.Probit, lnk.CLogLog, lnk.Log,
                                lnk.Cauchy]),
                (fam.Poisson, [lnk.Log, lnk.Identity, lnk.Sqrt]),
                (fam.Gamma, [lnk.Log, lnk.Identity, lnk.InversePower]),
                (fam.Gaussian, [lnk.Identity, lnk.Log, lnk.InversePower]),
                (fam.InverseGaussian, [lnk.Log, lnk.Identity,
                                       lnk.InversePower,
                                       lnk.InverseSquared]),
                (fam.NegativeBinomial, [lnk.Log, lnk.InversePower,
                                        lnk.InverseSquared, lnk.Identity])]

    n = 100
    p = 3
    exog = np.random.normal(size=(n, p))
    exog[:, 0] = 1

    skip_one = False
    for family_class, family_links in families:
        for link in family_links:
            for binom_version in 0, 1:

                if family_class != fam.Binomial and binom_version == 1:
                    continue

                if (family_class, link) == (fam.Poisson, lnk.Identity):
                    lin_pred = 20 + exog.sum(1)
                elif (family_class, link) == (fam.Binomial, lnk.Log):
                    lin_pred = -1 + exog.sum(1) / 8
                elif (family_class, link) == (fam.Poisson, lnk.Sqrt):
                    lin_pred = 2 + exog.sum(1)
                elif (family_class, link) == (fam.InverseGaussian, lnk.Log):
                    # skip_zero = True
                    lin_pred = -1 + exog.sum(1)
                elif (family_class, link) == (fam.InverseGaussian,
                                              lnk.Identity):
                    lin_pred = 20 + 5*exog.sum(1)
                    lin_pred = np.clip(lin_pred, 1e-4, np.inf)
                elif (family_class, link) == (fam.InverseGaussian,
                                              lnk.InverseSquared):
                    lin_pred = 0.5 + exog.sum(1) / 5
                    continue  # skip due to non-convergence
                elif (family_class, link) == (fam.InverseGaussian,
                                              lnk.InversePower):
                    lin_pred = 1 + exog.sum(1) / 5
                elif (family_class, link) == (fam.NegativeBinomial,
                                              lnk.Identity):
                    lin_pred = 20 + 5*exog.sum(1)
                    lin_pred = np.clip(lin_pred, 1e-4, np.inf)
                elif (family_class, link) == (fam.NegativeBinomial,
                                              lnk.InverseSquared):
                    lin_pred = 0.1 + np.random.uniform(size=exog.shape[0])
                    continue  # skip due to non-convergence
                elif (family_class, link) == (fam.NegativeBinomial,
                                              lnk.InversePower):
                    lin_pred = 1 + exog.sum(1) / 5

                elif (family_class, link) == (fam.Gaussian, lnk.InversePower):
                    # adding skip because of convergence failure
                    skip_one = True
                else:
                    lin_pred = np.random.uniform(size=exog.shape[0])

                endog = gen_endog(lin_pred, family_class, link, binom_version)

                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    mod_irls = sm.GLM(endog, exog,
                                      family=family_class(link=link()))
                rslt_irls = mod_irls.fit(method="IRLS")

                # Try with and without starting values.
                for max_start_irls, start_params in ((0, rslt_irls.params),
                                                     (3, None)):
                    # TODO: skip convergence failures for now
                    if max_start_irls > 0 and skip_one:
                        continue
                    with warnings.catch_warnings():
                        warnings.simplefilter("ignore")
                        mod_gradient = sm.GLM(endog, exog,
                                              family=family_class(link=link()))
                    rslt_gradient = mod_gradient.fit(
                            max_start_irls=max_start_irls,
                            start_params=start_params,
                            method="newton",
                            optim_hessian='eim'
                    )

                    assert_allclose(rslt_gradient.params, rslt_irls.params,
                                    rtol=1e-6, atol=5e-5)

                    assert_allclose(rslt_gradient.llf, rslt_irls.llf,
                                    rtol=1e-6, atol=1e-6)

                    assert_allclose(rslt_gradient.scale, rslt_irls.scale,
                                    rtol=1e-6, atol=1e-6)

                    # Get the standard errors using expected information.
                    ehess = mod_gradient.hessian(rslt_gradient.params,
                                                 observed=False)
                    gradient_bse = np.sqrt(-np.diag(np.linalg.inv(ehess)))

                    assert_allclose(gradient_bse, rslt_irls.bse, rtol=1e-6,
                                    atol=5e-5)


def test_glm_irls_method():
    nobs, k_vars = 50, 4
    np.random.seed(987126)
    x = np.random.randn(nobs, k_vars - 1)
    exog = add_constant(x, has_constant='add')
    y = exog.sum(1) + np.random.randn(nobs)

    mod = GLM(y, exog)
    res1 = mod.fit()
    res2 = mod.fit(wls_method='pinv', attach_wls=True)
    res3 = mod.fit(wls_method='qr', attach_wls=True)
    # fit_gradient does not attach mle_settings
    res_g1 = mod.fit(start_params=res1.params, method='bfgs')

    for r in [res1, res2, res3]:
        assert_equal(r.mle_settings['optimizer'], 'IRLS')
        assert_equal(r.method, 'IRLS')

    assert_equal(res1.mle_settings['wls_method'], 'lstsq')
    assert_equal(res2.mle_settings['wls_method'], 'pinv')
    assert_equal(res3.mle_settings['wls_method'], 'qr')

    assert_(hasattr(res2.results_wls.model, 'pinv_wexog'))
    assert_(hasattr(res3.results_wls.model, 'exog_Q'))

    # fit_gradient currently does not attach mle_settings
    assert_equal(res_g1.method, 'bfgs')


class CheckWtdDuplicationMixin:
    decimal_params = DECIMAL_4

    @classmethod
    def setup_class(cls):
        cls.data = cpunish.load()
        cls.data.endog = np.asarray(cls.data.endog)
        cls.data.exog = np.asarray(cls.data.exog)
        cls.endog = cls.data.endog
        cls.exog = cls.data.exog
        np.random.seed(1234)
        cls.weight = np.random.randint(5, 100, len(cls.endog))
        cls.endog_big = np.repeat(cls.endog, cls.weight)
        cls.exog_big = np.repeat(cls.exog, cls.weight, axis=0)

    def test_params(self):
        assert_allclose(self.res1.params, self.res2.params,  atol=1e-6,
                        rtol=1e-6)

    decimal_bse = DECIMAL_4

    def test_standard_errors(self):
        assert_allclose(self.res1.bse, self.res2.bse, rtol=1e-5, atol=1e-6)

    decimal_resids = DECIMAL_4

    # TODO: This does not work... Arrays are of different shape.
    # Perhaps we use self.res1.model.family.resid_XXX()?
    """
    def test_residuals(self):
        resids1 = np.column_stack((self.res1.resid_pearson,
                                   self.res1.resid_deviance,
                                   self.res1.resid_working,
                                   self.res1.resid_anscombe,
                                   self.res1.resid_response))
        resids2 = np.column_stack((self.res1.resid_pearson,
                                   self.res2.resid_deviance,
                                   self.res2.resid_working,
                                   self.res2.resid_anscombe,
                                   self.res2.resid_response))
        assert_allclose(resids1, resids2, self.decimal_resids)
    """

    def test_aic(self):
        # R includes the estimation of the scale as a lost dof
        # Does not with Gamma though
        assert_allclose(self.res1.aic, self.res2.aic,  atol=1e-6, rtol=1e-6)

    def test_deviance(self):
        assert_allclose(self.res1.deviance, self.res2.deviance,  atol=1e-6,
                        rtol=1e-6)

    def test_scale(self):
        assert_allclose(self.res1.scale, self.res2.scale, atol=1e-6, rtol=1e-6)

    def test_loglike(self):
        # Stata uses the below llf for these families
        # We differ with R for them
        assert_allclose(self.res1.llf, self.res2.llf, 1e-6)

    decimal_null_deviance = DECIMAL_4

    def test_null_deviance(self):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", DomainWarning)

            assert_allclose(self.res1.null_deviance,
                            self.res2.null_deviance,
                            atol=1e-6,
                            rtol=1e-6)

    decimal_bic = DECIMAL_4

    def test_bic(self):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            assert_allclose(self.res1.bic, self.res2.bic,  atol=1e-6, rtol=1e-6)

    decimal_fittedvalues = DECIMAL_4

    def test_fittedvalues(self):
        res2_fitted = self.res2.predict(self.res1.model.exog)
        assert_allclose(self.res1.fittedvalues, res2_fitted, atol=1e-5,
                        rtol=1e-5)

    decimal_tpvalues = DECIMAL_4

    def test_tpvalues(self):
        # test comparing tvalues and pvalues with normal implementation
        # make sure they use normal distribution (inherited in results class)
        assert_allclose(self.res1.tvalues, self.res2.tvalues, atol=1e-6,
                        rtol=2e-4)
        assert_allclose(self.res1.pvalues, self.res2.pvalues, atol=1e-6,
                        rtol=1e-6)
        assert_allclose(self.res1.conf_int(), self.res2.conf_int(), atol=1e-6,
                        rtol=1e-6)


class TestWtdGlmPoisson(CheckWtdDuplicationMixin):

    @classmethod
    def setup_class(cls):
        '''
        Tests Poisson family with canonical log link.
        '''
        super().setup_class()
        cls.endog = np.asarray(cls.endog)
        cls.exog = np.asarray(cls.exog)

        cls.res1 = GLM(cls.endog, cls.exog,
                        freq_weights=cls.weight,
                        family=sm.families.Poisson()).fit()
        cls.res2 = GLM(cls.endog_big, cls.exog_big,
                        family=sm.families.Poisson()).fit()


class TestWtdGlmPoissonNewton(CheckWtdDuplicationMixin):
    @classmethod
    def setup_class(cls):
        '''
        Tests Poisson family with canonical log link.
        '''
        super().setup_class()

        start_params = np.array([1.82794424e-04, -4.76785037e-02,
                                 -9.48249717e-02, -2.92293226e-04,
                                 2.63728909e+00, -2.05934384e+01])

        fit_kwds = dict(method='newton')
        cls.res1 = GLM(cls.endog, cls.exog,
                        freq_weights=cls.weight,
                        family=sm.families.Poisson()).fit(**fit_kwds)
        fit_kwds = dict(method='newton', start_params=start_params)
        cls.res2 = GLM(cls.endog_big, cls.exog_big,
                        family=sm.families.Poisson()).fit(**fit_kwds)


class TestWtdGlmPoissonHC0(CheckWtdDuplicationMixin):
    @classmethod
    def setup_class(cls):

        '''
        Tests Poisson family with canonical log link.
        '''
        super().setup_class()

        start_params = np.array([1.82794424e-04, -4.76785037e-02,
                                 -9.48249717e-02, -2.92293226e-04,
                                 2.63728909e+00, -2.05934384e+01])

        fit_kwds = dict(cov_type='HC0')
        cls.res1 = GLM(cls.endog, cls.exog,
                        freq_weights=cls.weight,
                        family=sm.families.Poisson()).fit(**fit_kwds)
        fit_kwds = dict(cov_type='HC0', start_params=start_params)
        cls.res2 = GLM(cls.endog_big, cls.exog_big,
                        family=sm.families.Poisson()).fit(**fit_kwds)


class TestWtdGlmPoissonClu(CheckWtdDuplicationMixin):
    @classmethod
    def setup_class(cls):

        '''
        Tests Poisson family with canonical log link.
        '''
        super().setup_class()

        start_params = np.array([1.82794424e-04, -4.76785037e-02,
                                 -9.48249717e-02, -2.92293226e-04,
                                 2.63728909e+00, -2.05934384e+01])

        gid = np.arange(1, len(cls.endog) + 1) // 2
        fit_kwds = dict(cov_type='cluster', cov_kwds={'groups': gid, 'use_correction':False})

        import warnings
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            cls.res1 = GLM(cls.endog, cls.exog,
                            freq_weights=cls.weight,
                            family=sm.families.Poisson()).fit(**fit_kwds)
            gidr = np.repeat(gid, cls.weight)
            fit_kwds = dict(cov_type='cluster', cov_kwds={'groups': gidr, 'use_correction':False})
            cls.res2 = GLM(cls.endog_big, cls.exog_big,
                            family=sm.families.Poisson()).fit(start_params=start_params,
                                                              **fit_kwds)


class TestWtdGlmBinomial(CheckWtdDuplicationMixin):
    @classmethod
    def setup_class(cls):

        '''
        Tests Binomial family with canonical logit link.
        '''
        super().setup_class()
        cls.endog = cls.endog / 100
        cls.endog_big = cls.endog_big / 100
        cls.res1 = GLM(cls.endog, cls.exog,
                       freq_weights=cls.weight,
                       family=sm.families.Binomial()).fit()
        cls.res2 = GLM(cls.endog_big, cls.exog_big,
                       family=sm.families.Binomial()).fit()


class TestWtdGlmNegativeBinomial(CheckWtdDuplicationMixin):
    @classmethod
    def setup_class(cls):

        '''
        Tests Negative Binomial family with canonical link
        g(p) = log(p/(p + 1/alpha))
        '''
        super().setup_class()
        alpha = 1.

        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=DomainWarning)
            family_link = sm.families.NegativeBinomial(
                link=sm.families.links.NegativeBinomial(alpha=alpha),
                alpha=alpha)
            cls.res1 = GLM(cls.endog, cls.exog,
                           freq_weights=cls.weight,
                           family=family_link).fit()
            cls.res2 = GLM(cls.endog_big, cls.exog_big,
                           family=family_link).fit()


class TestWtdGlmGamma(CheckWtdDuplicationMixin):
    @classmethod
    def setup_class(cls):

        '''
        Tests Gamma family with log link.
        '''
        super().setup_class()
        family_link = sm.families.Gamma(sm.families.links.Log())
        cls.res1 = GLM(cls.endog, cls.exog,
                       freq_weights=cls.weight,
                       family=family_link).fit()
        cls.res2 = GLM(cls.endog_big, cls.exog_big,
                       family=family_link).fit()


class TestWtdGlmGaussian(CheckWtdDuplicationMixin):
    @classmethod
    def setup_class(cls):
        '''
        Tests Gaussian family with log link.
        '''
        super().setup_class()
        family_link = sm.families.Gaussian(sm.families.links.Log())
        cls.res1 = GLM(cls.endog, cls.exog,
                       freq_weights=cls.weight,
                       family=family_link).fit()
        cls.res2 = GLM(cls.endog_big, cls.exog_big,
                       family=family_link).fit()


class TestWtdGlmInverseGaussian(CheckWtdDuplicationMixin):
    @classmethod
    def setup_class(cls):
        '''
        Tests InverseGaussian family with log link.
        '''
        super().setup_class()
        family_link = sm.families.InverseGaussian(sm.families.links.Log())
        cls.res1 = GLM(cls.endog, cls.exog,
                       freq_weights=cls.weight,
                       family=family_link).fit()
        cls.res2 = GLM(cls.endog_big, cls.exog_big,
                       family=family_link).fit()


class TestWtdGlmGammaNewton(CheckWtdDuplicationMixin):
    @classmethod
    def setup_class(cls):
        '''
        Tests Gamma family with log link.
        '''
        super().setup_class()
        family_link = sm.families.Gamma(sm.families.links.Log())
        cls.res1 = GLM(cls.endog, cls.exog,
                       freq_weights=cls.weight,
                       family=family_link
                       ).fit(method='newton')
        cls.res2 = GLM(cls.endog_big, cls.exog_big,
                       family=family_link
                       ).fit(method='newton')

    def test_init_kwargs(self):
        family_link = sm.families.Gamma(sm.families.links.Log())

        with pytest.warns(ValueWarning, match="unknown kwargs"):
            GLM(self.endog, self.exog, family=family_link,
                weights=self.weight,  # incorrect keyword
                )


class TestWtdGlmGammaScale_X2(CheckWtdDuplicationMixin):
    @classmethod
    def setup_class(cls):
        '''
        Tests Gamma family with log link.
        '''
        super().setup_class()
        family_link = sm.families.Gamma(sm.families.links.Log())
        cls.res1 = GLM(cls.endog, cls.exog,
                       freq_weights=cls.weight,
                       family=family_link,
                       ).fit(scale='X2')
        cls.res2 = GLM(cls.endog_big, cls.exog_big,
                       family=family_link,
                       ).fit(scale='X2')


class TestWtdGlmGammaScale_dev(CheckWtdDuplicationMixin):
    @classmethod
    def setup_class(cls):
        '''
        Tests Gamma family with log link.
        '''
        super().setup_class()
        family_link = sm.families.Gamma(sm.families.links.Log())
        cls.res1 = GLM(cls.endog, cls.exog,
                       freq_weights=cls.weight,
                       family=family_link,
                       ).fit(scale='dev')
        cls.res2 = GLM(cls.endog_big, cls.exog_big,
                       family=family_link,
                       ).fit(scale='dev')

    def test_missing(self):
        endog = self.data.endog.copy()
        exog = self.data.exog.copy()
        exog[0, 0] = np.nan
        endog[[2, 4, 6, 8]] = np.nan
        freq_weights = self.weight
        mod_misisng = GLM(endog, exog, family=self.res1.model.family,
                          freq_weights=freq_weights, missing='drop')
        assert_equal(mod_misisng.freq_weights.shape[0],
                     mod_misisng.endog.shape[0])
        assert_equal(mod_misisng.freq_weights.shape[0],
                     mod_misisng.exog.shape[0])
        keep_idx = np.array([1,  3,  5,  7,  9, 10, 11, 12, 13, 14, 15, 16])
        assert_equal(mod_misisng.freq_weights, self.weight[keep_idx])


class TestWtdTweedieLog(CheckWtdDuplicationMixin):
    @classmethod
    def setup_class(cls):
        '''
        Tests Tweedie family with log link and var_power=1.
        '''
        super().setup_class()
        family_link = sm.families.Tweedie(link=sm.families.links.Log(),
                                          var_power=1)
        cls.res1 = GLM(cls.endog, cls.exog,
                        freq_weights=cls.weight,
                        family=family_link).fit()
        cls.res2 = GLM(cls.endog_big, cls.exog_big,
                        family=family_link).fit()


class TestWtdTweediePower2(CheckWtdDuplicationMixin):
    @classmethod
    def setup_class(cls):
        '''
        Tests Tweedie family with Power(1) link and var_power=2.
        '''
        cls.data = cpunish.load_pandas()
        cls.endog = cls.data.endog
        cls.exog = cls.data.exog[['INCOME', 'SOUTH']]
        np.random.seed(1234)
        cls.weight = np.random.randint(5, 100, len(cls.endog))
        cls.endog_big = np.repeat(cls.endog.values, cls.weight)
        cls.exog_big = np.repeat(cls.exog.values, cls.weight, axis=0)
        link = sm.families.links.Power()
        family_link = sm.families.Tweedie(link=link, var_power=2)
        cls.res1 = GLM(cls.endog, cls.exog,
                       freq_weights=cls.weight,
                       family=family_link).fit()
        cls.res2 = GLM(cls.endog_big, cls.exog_big,
                       family=family_link).fit()


class TestWtdTweediePower15(CheckWtdDuplicationMixin):
    @classmethod
    def setup_class(cls):
        '''
        Tests Tweedie family with Power(0.5) link and var_power=1.5.
        '''
        super().setup_class()
        family_link = sm.families.Tweedie(link=sm.families.links.Power(0.5),
                                          var_power=1.5)
        cls.res1 = GLM(cls.endog, cls.exog,
                        freq_weights=cls.weight,
                        family=family_link).fit()
        cls.res2 = GLM(cls.endog_big, cls.exog_big,
                        family=family_link).fit()


def test_wtd_patsy_missing():
    import pandas as pd
    data = cpunish.load()
    data.endog = np.require(data.endog, requirements="W")
    data.exog = np.require(data.exog, requirements="W")
    data.exog[0, 0] = np.nan
    data.endog[[2, 4, 6, 8]] = np.nan
    data.pandas = pd.DataFrame(data.exog, columns=data.exog_name)
    data.pandas['EXECUTIONS'] = data.endog
    weights = np.arange(1, len(data.endog)+1)
    formula = """EXECUTIONS ~ INCOME + PERPOVERTY + PERBLACK + VC100k96 +
                 SOUTH + DEGREE"""
    mod_misisng = GLM.from_formula(formula, data=data.pandas,
                                   freq_weights=weights)
    assert_equal(mod_misisng.freq_weights.shape[0],
                 mod_misisng.endog.shape[0])
    assert_equal(mod_misisng.freq_weights.shape[0],
                 mod_misisng.exog.shape[0])
    assert_equal(mod_misisng.freq_weights.shape[0], 12)
    keep_weights = np.array([2,  4,  6,  8, 10, 11, 12, 13, 14, 15, 16, 17])
    assert_equal(mod_misisng.freq_weights, keep_weights)


class CheckTweedie:
    def test_resid(self):
        idx1 = len(self.res1.resid_response) - 1
        idx2 = len(self.res2.resid_response) - 1
        assert_allclose(np.concatenate((self.res1.resid_response[:17],
                                        [self.res1.resid_response[idx1]])),
                        np.concatenate((self.res2.resid_response[:17],
                                        [self.res2.resid_response[idx2]])),
                        rtol=1e-5, atol=1e-5)
        assert_allclose(np.concatenate((self.res1.resid_pearson[:17],
                                        [self.res1.resid_pearson[idx1]])),
                        np.concatenate((self.res2.resid_pearson[:17],
                                        [self.res2.resid_pearson[idx2]])),
                        rtol=1e-5, atol=1e-5)
        assert_allclose(np.concatenate((self.res1.resid_deviance[:17],
                                        [self.res1.resid_deviance[idx1]])),
                        np.concatenate((self.res2.resid_deviance[:17],
                                        [self.res2.resid_deviance[idx2]])),
                        rtol=1e-5, atol=1e-5)

        assert_allclose(np.concatenate((self.res1.resid_working[:17],
                                        [self.res1.resid_working[idx1]])),
                        np.concatenate((self.res2.resid_working[:17],
                                        [self.res2.resid_working[idx2]])),
                        rtol=1e-5, atol=1e-5)


    def test_bse(self):
        assert_allclose(self.res1.bse, self.res2.bse, atol=1e-6, rtol=1e6)

    def test_params(self):
        assert_allclose(self.res1.params, self.res2.params, atol=1e-5,
                        rtol=1e-5)

    def test_deviance(self):
        assert_allclose(self.res1.deviance, self.res2.deviance, atol=1e-6,
                        rtol=1e-6)

    def test_df(self):
        assert_equal(self.res1.df_model, self.res2.df_model)
        assert_equal(self.res1.df_resid, self.res2.df_resid)

    def test_fittedvalues(self):
        idx1 = len(self.res1.fittedvalues) - 1
        idx2 = len(self.res2.resid_response) - 1
        assert_allclose(np.concatenate((self.res1.fittedvalues[:17],
                                        [self.res1.fittedvalues[idx1]])),
                        np.concatenate((self.res2.fittedvalues[:17],
                                        [self.res2.fittedvalues[idx2]])),
                        atol=1e-4, rtol=1e-4)

    def test_summary(self):
        self.res1.summary()
        self.res1.summary2()


class TestTweediePower15(CheckTweedie):
    @classmethod
    def setup_class(cls):
        from .results.results_glm import CpunishTweediePower15
        cls.data = cpunish.load_pandas()
        cls.exog = cls.data.exog[['INCOME', 'SOUTH']]
        cls.endog = cls.data.endog
        family_link = sm.families.Tweedie(link=sm.families.links.Power(1),
                                          var_power=1.5)
        cls.res1 = sm.GLM(endog=cls.data.endog,
                          exog=cls.data.exog[['INCOME', 'SOUTH']],
                          family=family_link).fit()
        cls.res2 = CpunishTweediePower15()


class TestTweediePower2(CheckTweedie):
    @classmethod
    def setup_class(cls):
        from .results.results_glm import CpunishTweediePower2
        cls.data = cpunish.load_pandas()
        cls.exog = cls.data.exog[['INCOME', 'SOUTH']]
        cls.endog = cls.data.endog
        family_link = sm.families.Tweedie(link=sm.families.links.Power(1),
                                          var_power=2.)
        cls.res1 = sm.GLM(endog=cls.data.endog,
                          exog=cls.data.exog[['INCOME', 'SOUTH']],
                          family=family_link).fit()
        cls.res2 = CpunishTweediePower2()


class TestTweedieLog1(CheckTweedie):
    @classmethod
    def setup_class(cls):
        from .results.results_glm import CpunishTweedieLog1
        cls.data = cpunish.load_pandas()
        cls.exog = cls.data.exog[['INCOME', 'SOUTH']]
        cls.endog = cls.data.endog
        family_link = sm.families.Tweedie(link=sm.families.links.Log(),
                                          var_power=1.)
        cls.res1 = sm.GLM(endog=cls.data.endog,
                          exog=cls.data.exog[['INCOME', 'SOUTH']],
                          family=family_link).fit()
        cls.res2 = CpunishTweedieLog1()


class TestTweedieLog15Fair(CheckTweedie):
    @classmethod
    def setup_class(cls):
        from statsmodels.datasets.fair import load_pandas

        from .results.results_glm import FairTweedieLog15
        data = load_pandas()
        family_link = sm.families.Tweedie(link=sm.families.links.Log(),
                                          var_power=1.5)
        cls.res1 = sm.GLM(endog=data.endog,
                          exog=data.exog[['rate_marriage', 'age',
                                          'yrs_married']],
                          family=family_link).fit()
        cls.res2 = FairTweedieLog15()


class CheckTweedieSpecial:
    def test_mu(self):
        assert_allclose(self.res1.mu, self.res2.mu, rtol=1e-5, atol=1e-5)

    def test_resid(self):
        assert_allclose(self.res1.resid_response, self.res2.resid_response,
                        rtol=1e-5, atol=1e-5)
        assert_allclose(self.res1.resid_pearson, self.res2.resid_pearson,
                        rtol=1e-5, atol=1e-5)
        assert_allclose(self.res1.resid_deviance, self.res2.resid_deviance,
                        rtol=1e-5, atol=1e-5)
        assert_allclose(self.res1.resid_working, self.res2.resid_working,
                        rtol=1e-5, atol=1e-5)
        assert_allclose(self.res1.resid_anscombe_unscaled,
                        self.res2.resid_anscombe_unscaled,
                        rtol=1e-5, atol=1e-5)


class TestTweedieSpecialLog0(CheckTweedieSpecial):
    @classmethod
    def setup_class(cls):
        cls.data = cpunish.load_pandas()
        cls.exog = cls.data.exog[['INCOME', 'SOUTH']]
        cls.endog = cls.data.endog
        family1 = sm.families.Gaussian(link=sm.families.links.Log())
        cls.res1 = sm.GLM(endog=cls.data.endog,
                          exog=cls.data.exog[['INCOME', 'SOUTH']],
                          family=family1).fit()
        family2 = sm.families.Tweedie(link=sm.families.links.Log(),
                                      var_power=0)
        cls.res2 = sm.GLM(endog=cls.data.endog,
                          exog=cls.data.exog[['INCOME', 'SOUTH']],
                          family=family2).fit()


class TestTweedieSpecialLog1(CheckTweedieSpecial):
    @classmethod
    def setup_class(cls):
        cls.data = cpunish.load_pandas()
        cls.exog = cls.data.exog[['INCOME', 'SOUTH']]
        cls.endog = cls.data.endog
        family1 = sm.families.Poisson(link=sm.families.links.Log())
        cls.res1 = sm.GLM(endog=cls.data.endog,
                          exog=cls.data.exog[['INCOME', 'SOUTH']],
                          family=family1).fit()
        family2 = sm.families.Tweedie(link=sm.families.links.Log(),
                                      var_power=1)
        cls.res2 = sm.GLM(endog=cls.data.endog,
                          exog=cls.data.exog[['INCOME', 'SOUTH']],
                          family=family2).fit()


class TestTweedieSpecialLog2(CheckTweedieSpecial):
    @classmethod
    def setup_class(cls):
        cls.data = cpunish.load_pandas()
        cls.exog = cls.data.exog[['INCOME', 'SOUTH']]
        cls.endog = cls.data.endog
        family1 = sm.families.Gamma(link=sm.families.links.Log())
        cls.res1 = sm.GLM(endog=cls.data.endog,
                          exog=cls.data.exog[['INCOME', 'SOUTH']],
                          family=family1).fit()
        family2 = sm.families.Tweedie(link=sm.families.links.Log(),
                                      var_power=2)
        cls.res2 = sm.GLM(endog=cls.data.endog,
                          exog=cls.data.exog[['INCOME', 'SOUTH']],
                          family=family2).fit()


class TestTweedieSpecialLog3(CheckTweedieSpecial):
    @classmethod
    def setup_class(cls):
        cls.data = cpunish.load_pandas()
        cls.exog = cls.data.exog[['INCOME', 'SOUTH']]
        cls.endog = cls.data.endog
        family1 = sm.families.InverseGaussian(link=sm.families.links.Log())
        cls.res1 = sm.GLM(endog=cls.data.endog,
                          exog=cls.data.exog[['INCOME', 'SOUTH']],
                          family=family1).fit()
        family2 = sm.families.Tweedie(link=sm.families.links.Log(),
                                      var_power=3)
        cls.res2 = sm.GLM(endog=cls.data.endog,
                          exog=cls.data.exog[['INCOME', 'SOUTH']],
                          family=family2).fit()

def gen_tweedie(p):

    np.random.seed(3242)
    n = 500
    x = np.random.normal(size=(n, 4))
    lpr = np.dot(x, np.r_[1, -1, 0, 0.5])
    mu = np.exp(lpr)
    lam = 10 * mu**(2 - p) / (2 - p)
    alp = (2 - p) / (p - 1)
    bet = 10 * mu**(1 - p) / (p - 1)

    # Generate Tweedie values using commpound Poisson distribution
    y = np.empty(n)
    N = np.random.poisson(lam)
    for i in range(n):
        y[i] = np.random.gamma(alp, 1 / bet[i], N[i]).sum()

    return y, x

@pytest.mark.filterwarnings("ignore:GLM ridge optimization")
def test_tweedie_EQL():
    # All tests below are regression tests, but the results
    # are very close to the population values.

    p = 1.5
    y, x = gen_tweedie(p)

    # Un-regularized fit using gradients
    fam = sm.families.Tweedie(var_power=p, eql=True)
    model1 = sm.GLM(y, x, family=fam)
    result1 = model1.fit(method="newton")
    assert_allclose(result1.params,
       np.array([1.00350497, -0.99656954, 0.00802702, 0.50713209]),
       rtol=1e-5, atol=1e-5)

    # Un-regularized fit using IRLS
    model1x = sm.GLM(y, x, family=fam)
    result1x = model1x.fit(method="irls")
    assert_allclose(result1.params, result1x.params)
    assert_allclose(result1.bse, result1x.bse, rtol=1e-2)

    # Lasso fit using coordinate-wise descent
    # TODO: The search gets trapped in an infinite oscillation, so use
    # a slack convergence tolerance.
    model2 = sm.GLM(y, x, family=fam)
    result2 = model2.fit_regularized(L1_wt=1, alpha=0.07, maxiter=200,
                   cnvrg_tol=0.01)

    rtol, atol = 1e-2, 1e-4
    assert_allclose(result2.params,
        np.array([0.976831, -0.952854, 0., 0.470171]),
        rtol=rtol, atol=atol)

    # Series of ridge fits using gradients
    ev = (np.array([1.001778, -0.99388, 0.00797, 0.506183]),
          np.array([0.98586638, -0.96953481, 0.00749983, 0.4975267]),
          np.array([0.206429, -0.164547, 0.000235, 0.102489]))
    for j, alpha in enumerate([0.05, 0.5, 0.7]):
        model3 = sm.GLM(y, x, family=fam)
        result3 = model3.fit_regularized(L1_wt=0, alpha=alpha)
        assert_allclose(result3.params, ev[j], rtol=rtol, atol=atol)
        result4 = model3.fit_regularized(L1_wt=0, alpha=alpha * np.ones(x.shape[1]))
        assert_allclose(result4.params, result3.params, rtol=rtol, atol=atol)
        alpha = alpha * np.ones(x.shape[1])
        alpha[0] = 0
        result5 = model3.fit_regularized(L1_wt=0, alpha=alpha)
        assert not np.allclose(result5.params, result4.params)

def test_tweedie_elastic_net():
    # Check that the coefficients vanish one-by-one
    # when using the elastic net.

    p = 1.5 # Tweedie variance exponent
    y, x = gen_tweedie(p)

    # Un-regularized fit using gradients
    fam = sm.families.Tweedie(var_power=p, eql=True)
    model1 = sm.GLM(y, x, family=fam)

    nnz = []
    for alpha in np.linspace(0, 10, 20):
        result1 = model1.fit_regularized(L1_wt=0.5, alpha=alpha)
        nnz.append((np.abs(result1.params) > 0).sum())
    nnz = np.unique(nnz)
    assert len(nnz) == 5


def test_tweedie_EQL_poisson_limit():
    # Test the limiting Poisson case of the Nelder/Pregibon/Tweedie
    # EQL.

    np.random.seed(3242)
    n = 500

    x = np.random.normal(size=(n, 3))
    x[:, 0] = 1
    lpr = 4 + x[:, 1:].sum(1)
    mn = np.exp(lpr)
    y = np.random.poisson(mn)

    for scale in 1.0, 'x2', 'dev':

        # Un-regularized fit using gradients not IRLS
        fam = sm.families.Tweedie(var_power=1, eql=True)
        model1 = sm.GLM(y, x, family=fam)
        result1 = model1.fit(method="newton", scale=scale)

        # Poisson GLM
        model2 = sm.GLM(y, x, family=sm.families.Poisson())
        result2 = model2.fit(method="newton", scale=scale)

        assert_allclose(result1.params, result2.params, atol=1e-6, rtol=1e-6)
        assert_allclose(result1.bse, result2.bse, 1e-6, 1e-6)


def test_tweedie_EQL_upper_limit():
    # Test the limiting case of the Nelder/Pregibon/Tweedie
    # EQL with var = mean^2.  These are tests against population
    # values so accuracy is not high.

    np.random.seed(3242)
    n = 500

    x = np.random.normal(size=(n, 3))
    x[:, 0] = 1
    lpr = 4 + x[:, 1:].sum(1)
    mn = np.exp(lpr)
    y = np.random.poisson(mn)

    for scale in 'x2', 'dev', 1.0:

        # Un-regularized fit using gradients not IRLS
        fam = sm.families.Tweedie(var_power=2, eql=True)
        model1 = sm.GLM(y, x, family=fam)
        result1 = model1.fit(method="newton", scale=scale)
        assert_allclose(result1.params, np.r_[4, 1, 1], atol=1e-3, rtol=1e-1)


def testTweediePowerEstimate():
    # Test the Pearson estimate of the Tweedie variance and scale parameters.
    #
    # Ideally, this would match the following R code, but I cannot make it work...
    #
    # setwd('c:/workspace')
    # data <- read.csv('cpunish.csv', sep=",")
    #
    # library(tweedie)
    #
    # y <- c(1.00113835e+05,   6.89668315e+03,   6.15726842e+03,
    #        1.41718806e+03,   5.11776456e+02,   2.55369154e+02,
    #        1.07147443e+01,   3.56874698e+00,   4.06797842e-02,
    #        7.06996731e-05,   2.10165106e-07,   4.34276938e-08,
    #        1.56354040e-09,   0.00000000e+00,   0.00000000e+00,
    #        0.00000000e+00,   0.00000000e+00)
    #
    # data$NewY <- y
    #
    # out <- tweedie.profile( NewY ~ INCOME + SOUTH - 1,
    #                         p.vec=c(1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8,
    #                                 1.9), link.power=0,
    #                         data=data,do.plot = TRUE)
    data = cpunish.load_pandas()
    y = [1.00113835e+05,   6.89668315e+03,   6.15726842e+03,
         1.41718806e+03,   5.11776456e+02,   2.55369154e+02,
         1.07147443e+01,   3.56874698e+00,   4.06797842e-02,
         7.06996731e-05,   2.10165106e-07,   4.34276938e-08,
         1.56354040e-09,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00]
    model1 = sm.GLM(y, data.exog[['INCOME', 'SOUTH']],
                    family=sm.families.Tweedie(link=sm.families.links.Log(),
                                               var_power=1.5))
    res1 = model1.fit()
    model2 = sm.GLM((y - res1.mu) ** 2,
                    np.column_stack((np.ones(len(res1.mu)), np.log(res1.mu))),
                    family=sm.families.Gamma(sm.families.links.Log()))
    res2 = model2.fit()
    # Sample may be too small for this...
    # assert_allclose(res1.scale, np.exp(res2.params[0]), rtol=0.25)
    p = model1.estimate_tweedie_power(res1.mu)
    assert_allclose(p, res2.params[1], rtol=0.25)

def test_glm_lasso_6431():

    # Based on issue #6431
    # Fails with newton-cg as optimizer
    np.random.seed(123)

    from statsmodels.regression.linear_model import OLS

    n = 50
    x = np.ones((n, 2))
    x[:, 1] = np.arange(0, n)
    y = 1000 + x[:, 1] + np.random.normal(0, 1, n)

    params = np.r_[999.82244338, 1.0077889]

    for method in "bfgs", None:
        for fun in [OLS, GLM]:

            # Changing L1_wtValue from 0 to 1e-9 changes
            # the algorithm from scipy gradient optimization
            # to statsmodels coordinate descent
            for L1_wtValue in [0, 1e-9]:
                model = fun(y, x)
                if fun == OLS:
                    fit = model.fit_regularized(alpha=0, L1_wt=L1_wtValue)
                else:
                    fit = model._fit_ridge(alpha=0, start_params=None, method=method)
                assert_allclose(params, fit.params, atol=1e-6, rtol=1e-6)

class TestRegularized:

    def test_regularized(self):

        import os

        from .results import glmnet_r_results

        for dtype in "binomial", "poisson":

            cur_dir = os.path.dirname(os.path.abspath(__file__))
            data = np.loadtxt(os.path.join(cur_dir, "results", "enet_%s.csv" % dtype),
                              delimiter=",")

            endog = data[:, 0]
            exog = data[:, 1:]

            fam = {"binomial" : sm.families.Binomial,
                   "poisson" : sm.families.Poisson}[dtype]

            for j in range(9):

                vn = "rslt_%s_%d" % (dtype, j)
                r_result = getattr(glmnet_r_results, vn)
                L1_wt = r_result[0]
                alpha = r_result[1]
                params = r_result[2:]

                model = GLM(endog, exog, family=fam())
                sm_result = model.fit_regularized(L1_wt=L1_wt, alpha=alpha)

                # Agreement is OK, see below for further check
                assert_allclose(params, sm_result.params, atol=1e-2, rtol=0.3)

                # The penalized log-likelihood that we are maximizing.
                def plf(params):
                    llf = model.loglike(params) / len(endog)
                    llf = llf - alpha * ((1 - L1_wt)*np.sum(params**2) / 2 + L1_wt*np.sum(np.abs(params)))
                    return llf

                # Confirm that we are doing better than glmnet.
                llf_r = plf(params)
                llf_sm = plf(sm_result.params)
                assert_equal(np.sign(llf_sm - llf_r), 1)


class TestConvergence:
    @classmethod
    def setup_class(cls):
        '''
        Test Binomial family with canonical logit link using star98 dataset.
        '''
        from statsmodels.datasets.star98 import load
        data = load()
        data.exog = add_constant(data.exog, prepend=False)
        cls.model = GLM(data.endog, data.exog,
                         family=sm.families.Binomial())

    def _when_converged(self, atol=1e-8, rtol=0, tol_criterion='deviance'):
        for i, dev in enumerate(self.res.fit_history[tol_criterion]):
            orig = self.res.fit_history[tol_criterion][i]
            new = self.res.fit_history[tol_criterion][i + 1]
            if np.allclose(orig, new, atol=atol, rtol=rtol):
                return i
        raise ValueError('CONVERGENCE CHECK: It seems this doens\'t converge!')

    def test_convergence_atol_only(self):
        atol = 1e-8
        rtol = 0
        self.res = self.model.fit(atol=atol, rtol=rtol)
        expected_iterations = self._when_converged(atol=atol, rtol=rtol)
        actual_iterations = self.res.fit_history['iteration']
        # Note the first value is the list is np.inf. The second value
        # is the initial guess based off of start_params or the
        # estimate thereof. The third value (index = 2) is the actual "first
        # iteration"
        assert_equal(expected_iterations, actual_iterations)
        assert_equal(len(self.res.fit_history['deviance']) - 2,
                     actual_iterations)

    def test_convergence_rtol_only(self):
        atol = 0
        rtol = 1e-8
        self.res = self.model.fit(atol=atol, rtol=rtol)
        expected_iterations = self._when_converged(atol=atol, rtol=rtol)
        actual_iterations = self.res.fit_history['iteration']
        # Note the first value is the list is np.inf. The second value
        # is the initial guess based off of start_params or the
        # estimate thereof. The third value (index = 2) is the actual "first
        # iteration"
        assert_equal(expected_iterations, actual_iterations)
        assert_equal(len(self.res.fit_history['deviance']) - 2,
                     actual_iterations)

    def test_convergence_atol_rtol(self):
        atol = 1e-8
        rtol = 1e-8
        self.res = self.model.fit(atol=atol, rtol=rtol)
        expected_iterations = self._when_converged(atol=atol, rtol=rtol)
        actual_iterations = self.res.fit_history['iteration']
        # Note the first value is the list is np.inf. The second value
        # is the initial guess based off of start_params or the
        # estimate thereof. The third value (index = 2) is the actual "first
        # iteration"
        assert_equal(expected_iterations, actual_iterations)
        assert_equal(len(self.res.fit_history['deviance']) - 2,
                     actual_iterations)

    def test_convergence_atol_only_params(self):
        atol = 1e-8
        rtol = 0
        self.res = self.model.fit(atol=atol, rtol=rtol, tol_criterion='params')
        expected_iterations = self._when_converged(atol=atol, rtol=rtol,
                                                   tol_criterion='params')
        actual_iterations = self.res.fit_history['iteration']
        # Note the first value is the list is np.inf. The second value
        # is the initial guess based off of start_params or the
        # estimate thereof. The third value (index = 2) is the actual "first
        # iteration"
        assert_equal(expected_iterations, actual_iterations)
        assert_equal(len(self.res.fit_history['deviance']) - 2,
                     actual_iterations)

    def test_convergence_rtol_only_params(self):
        atol = 0
        rtol = 1e-8
        self.res = self.model.fit(atol=atol, rtol=rtol, tol_criterion='params')
        expected_iterations = self._when_converged(atol=atol, rtol=rtol,
                                                   tol_criterion='params')
        actual_iterations = self.res.fit_history['iteration']
        # Note the first value is the list is np.inf. The second value
        # is the initial guess based off of start_params or the
        # estimate thereof. The third value (index = 2) is the actual "first
        # iteration"
        assert_equal(expected_iterations, actual_iterations)
        assert_equal(len(self.res.fit_history['deviance']) - 2,
                     actual_iterations)

    def test_convergence_atol_rtol_params(self):
        atol = 1e-8
        rtol = 1e-8
        self.res = self.model.fit(atol=atol, rtol=rtol, tol_criterion='params')
        expected_iterations = self._when_converged(atol=atol, rtol=rtol,
                                                   tol_criterion='params')
        actual_iterations = self.res.fit_history['iteration']
        # Note the first value is the list is np.inf. The second value
        # is the initial guess based off of start_params or the
        # estimate thereof. The third value (index = 2) is the actual "first
        # iteration"
        assert_equal(expected_iterations, actual_iterations)
        assert_equal(len(self.res.fit_history['deviance']) - 2,
                     actual_iterations)


def test_poisson_deviance():
    # see #3355 missing term in deviance if resid_response.sum() != 0
    np.random.seed(123987)
    nobs, k_vars = 50, 3-1
    x = sm.add_constant(np.random.randn(nobs, k_vars))

    mu_true = np.exp(x.sum(1))
    y = np.random.poisson(mu_true, size=nobs)

    mod = sm.GLM(y, x[:, :], family=sm.genmod.families.Poisson())
    res = mod.fit()

    d_i = res.resid_deviance
    d = res.deviance
    lr = (mod.family.loglike(y, y+1e-20) -
          mod.family.loglike(y, res.fittedvalues)) * 2

    assert_allclose(d, (d_i**2).sum(), rtol=1e-12)
    assert_allclose(d, lr, rtol=1e-12)

    # case without constant, resid_response.sum() != 0
    mod_nc = sm.GLM(y, x[:, 1:], family=sm.genmod.families.Poisson())
    res_nc = mod_nc.fit()

    d_i = res_nc.resid_deviance
    d = res_nc.deviance
    lr = (mod.family.loglike(y, y+1e-20) -
          mod.family.loglike(y, res_nc.fittedvalues)) * 2

    assert_allclose(d, (d_i**2).sum(), rtol=1e-12)
    assert_allclose(d, lr, rtol=1e-12)


def test_non_invertible_hessian_fails_summary():
    # Test when the hessian fails the summary is still available.
    data = cpunish.load_pandas()

    data.endog[:] = 1
    with warnings.catch_warnings():
        # we filter DomainWarning, the convergence problems
        # and warnings in summary
        warnings.simplefilter("ignore")
        mod = sm.GLM(data.endog, data.exog, family=sm.families.Gamma())
        res = mod.fit(maxiter=1, method='bfgs', max_start_irls=0)
        res.summary()


def test_int_scale():
    # GH-6627, make sure it works with int scale
    data = longley.load()
    mod = GLM(data.endog, data.exog, family=sm.families.Gaussian())
    res = mod.fit(scale=1)
    assert isinstance(res.params, pd.Series)
    assert res.scale.dtype == np.float64


@pytest.mark.parametrize("dtype", [np.int8, np.int16, np.int32, np.int64])
def test_int_exog(dtype):
    # GH-6627, make use of floats internally
    count1, n1, count2, n2 = 60, 51477.5, 30, 54308.7
    y = [count1, count2]
    x = np.asarray([[1, 1], [1, 0]]).astype(dtype)
    exposure = np.asarray([n1, n2])
    mod = GLM(y, x, exposure=exposure, family=sm.families.Poisson())
    res = mod.fit(method='bfgs', max_start_irls=0)
    assert isinstance(res.params, np.ndarray)


def test_glm_bic(iris):
    X = np.c_[np.ones(100), iris[50:, :4]]
    y = np.array(iris)[50:, 4].astype(np.int32)
    y -= 1
    SET_USE_BIC_LLF(True)
    model = GLM(y, X, family=sm.families.Binomial()).fit()
    # 34.9244 is what glm() of R yields
    assert_almost_equal(model.bic, 34.9244, decimal=3)
    assert_almost_equal(model.bic_llf, 34.9244, decimal=3)
    SET_USE_BIC_LLF(False)
    assert_almost_equal(model.bic, model.bic_deviance, decimal=3)
    SET_USE_BIC_LLF(None)


def test_glm_bic_warning(iris):
    X = np.c_[np.ones(100), iris[50:, :4]]
    y = np.array(iris)[50:, 4].astype(np.int32)
    y -= 1
    model = GLM(y, X, family=sm.families.Binomial()).fit()
    with pytest.warns(FutureWarning, match="The bic"):
        assert isinstance(model.bic, float)


def test_output_exposure_null(reset_randomstate):
    # GH 6953

    x0 = [np.sin(i / 20) + 2 for i in range(1000)]
    rs = np.random.RandomState(0)
    # Variable exposures for each observation
    exposure = rs.randint(100, 200, size=1000)
    y = [np.sum(rs.poisson(x, size=e)) for x, e in zip(x0, exposure)]
    x = add_constant(x0)

    model = GLM(
        endog=y, exog=x, exposure=exposure, family=sm.families.Poisson()
    ).fit()
    null_model = GLM(
        endog=y, exog=x[:, 0], exposure=exposure, family=sm.families.Poisson()
    ).fit()
    null_model_without_exposure = GLM(
        endog=y, exog=x[:, 0], family=sm.families.Poisson()
    ).fit()
    assert_allclose(model.llnull, null_model.llf)
    # Check that they are different
    assert np.abs(null_model_without_exposure.llf - model.llnull) > 1


def test_qaic():

    # Example from documentation of R package MuMIn
    import patsy
    ldose = np.concatenate((np.arange(6), np.arange(6)))
    sex = ["M"]*6 + ["F"]*6
    numdead = [10, 4, 9, 12, 18, 20, 0, 2, 6, 10, 12, 16]
    df = pd.DataFrame({"ldose": ldose, "sex": sex, "numdead": numdead})
    df["numalive"] = 20 - df["numdead"]
    df["SF"] = df["numdead"]

    y = df[["numalive", "numdead"]].values
    x = patsy.dmatrix("sex*ldose", data=df, return_type='dataframe')
    m = GLM(y, x, family=sm.families.Binomial())
    r = m.fit()
    scale = 2.412699
    qaic = r.info_criteria(crit="qaic", scale=scale)

    # R gives 31.13266 because it uses a df that is 1 greater,
    # presumably because they count the scale parameter in df.
    # This won't matter when comparing models by differencing
    # QAICs.
    # Binomial doesn't have a scale parameter, so adding +1 is not correct.
    assert_allclose(qaic, 29.13266, rtol=1e-5, atol=1e-5)
    qaic1 = r.info_criteria(crit="qaic", scale=scale, dk_params=1)
    assert_allclose(qaic1, 31.13266, rtol=1e-5, atol=1e-5)


def test_tweedie_score():

    np.random.seed(3242)
    n = 500
    x = np.random.normal(size=(n, 4))
    lpr = np.dot(x, np.r_[1, -1, 0, 0.5])
    mu = np.exp(lpr)

    p0 = 1.5
    lam = 10 * mu**(2 - p0) / (2 - p0)
    alp = (2 - p0) / (p0 - 1)
    bet = 10 * mu**(1 - p0) / (p0 - 1)
    y = np.empty(n)
    N = np.random.poisson(lam)
    for i in range(n):
        y[i] = np.random.gamma(alp, 1 / bet[i], N[i]).sum()

    for eql in [True, False]:
        for p in [1, 1.5, 2]:
            if eql is False and SP_LT_17:
                pytest.skip('skip, scipy too old, no bessel_wright')

            fam = sm.families.Tweedie(var_power=p, eql=eql)
            model = GLM(y, x, family=fam)
            result = model.fit()

            pa = result.params + 0.2*np.random.normal(size=result.params.size)

            ngrad = approx_fprime_cs(pa, lambda x: model.loglike(x, scale=1))
            agrad = model.score(pa, scale=1)
            assert_allclose(ngrad, agrad, atol=1e-8, rtol=1e-8)

            nhess = approx_hess_cs(pa, lambda x: model.loglike(x, scale=1))
            ahess = model.hessian(pa, scale=1)
            assert_allclose(nhess, ahess, atol=5e-8, rtol=5e-8)