AIM-PIbd-32-Kurbanova-A-A/aimenv/Lib/site-packages/statsmodels/discrete/tests/test_conditional.py

import numpy as np
from statsmodels.discrete.conditional_models import (
      ConditionalLogit, ConditionalPoisson, ConditionalMNLogit)
from statsmodels.tools.numdiff import approx_fprime
from numpy.testing import assert_allclose
import pandas as pd


def test_logit_1d():

    y = np.r_[0, 1, 0, 1, 0, 1, 0, 1, 1, 1]
    g = np.r_[0, 0, 0, 1, 1, 1, 2, 2, 2, 2]

    x = np.r_[0, 1, 0, 0, 1, 1, 0, 0, 1, 0]
    x = x[:, None]

    model = ConditionalLogit(y, x, groups=g)

    # Check the gradient for the denominator of the partial likelihood
    for x in -1, 0, 1, 2:
        params = np.r_[x, ]
        _, grad = model._denom_grad(0, params)
        ngrad = approx_fprime(params, lambda x: model._denom(0, x)).squeeze()
        assert_allclose(grad, ngrad)

    # Check the gradient for the loglikelihood
    for x in -1, 0, 1, 2:
        grad = approx_fprime(np.r_[x, ], model.loglike).squeeze()
        score = model.score(np.r_[x, ])
        assert_allclose(grad, score, rtol=1e-4)

    result = model.fit()

    # From Stata
    assert_allclose(result.params, np.r_[0.9272407], rtol=1e-5)
    assert_allclose(result.bse, np.r_[1.295155], rtol=1e-5)


def test_logit_2d():

    y = np.r_[0, 1, 0, 1, 0, 1, 0, 1, 1, 1]
    g = np.r_[0, 0, 0, 1, 1, 1, 2, 2, 2, 2]

    x1 = np.r_[0, 1, 0, 0, 1, 1, 0, 0, 1, 0]
    x2 = np.r_[0, 0, 1, 0, 0, 1, 0, 1, 1, 1]
    x = np.empty((10, 2))
    x[:, 0] = x1
    x[:, 1] = x2

    model = ConditionalLogit(y, x, groups=g)

    # Check the gradient for the denominator of the partial likelihood
    for x in -1, 0, 1, 2:
        params = np.r_[x, -1.5*x]
        _, grad = model._denom_grad(0, params)
        ngrad = approx_fprime(params, lambda x: model._denom(0, x))
        assert_allclose(grad, ngrad, rtol=1e-5)

    # Check the gradient for the loglikelihood
    for x in -1, 0, 1, 2:
        params = np.r_[-0.5*x, 0.5*x]
        grad = approx_fprime(params, model.loglike)
        score = model.score(params)
        assert_allclose(grad, score, rtol=1e-4)

    result = model.fit()

    # From Stata
    assert_allclose(result.params, np.r_[1.011074, 1.236758], rtol=1e-3)
    assert_allclose(result.bse, np.r_[1.420784, 1.361738], rtol=1e-5)

    result.summary()


def test_formula():

    for j in 0, 1:

        np.random.seed(34234)
        n = 200
        y = np.random.randint(0, 2, size=n)
        x1 = np.random.normal(size=n)
        x2 = np.random.normal(size=n)
        g = np.random.randint(0, 25, size=n)

        x = np.hstack((x1[:, None], x2[:, None]))
        if j == 0:
            model1 = ConditionalLogit(y, x, groups=g)
        else:
            model1 = ConditionalPoisson(y, x, groups=g)
        result1 = model1.fit()

        df = pd.DataFrame({"y": y, "x1": x1, "x2": x2, "g": g})
        if j == 0:
            model2 = ConditionalLogit.from_formula(
                        "y ~ 0 + x1 + x2", groups="g", data=df)
        else:
            model2 = ConditionalPoisson.from_formula(
                        "y ~ 0 + x1 + x2", groups="g", data=df)
        result2 = model2.fit()

        assert_allclose(result1.params, result2.params, rtol=1e-5)
        assert_allclose(result1.bse, result2.bse, rtol=1e-5)
        assert_allclose(result1.cov_params(), result2.cov_params(), rtol=1e-5)
        assert_allclose(result1.tvalues, result2.tvalues, rtol=1e-5)


def test_poisson_1d():

    y = np.r_[3, 1, 1, 4, 5, 2, 0, 1, 6, 2]
    g = np.r_[0, 0, 0, 0, 1, 1, 1, 1, 1, 1]

    x = np.r_[0, 1, 0, 0, 1, 1, 0, 0, 1, 0]
    x = x[:, None]

    model = ConditionalPoisson(y, x, groups=g)

    # Check the gradient for the loglikelihood
    for x in -1, 0, 1, 2:
        grad = approx_fprime(np.r_[x, ], model.loglike).squeeze()
        score = model.score(np.r_[x, ])
        assert_allclose(grad, score, rtol=1e-4)

    result = model.fit()

    # From Stata
    assert_allclose(result.params, np.r_[0.6466272], rtol=1e-4)
    assert_allclose(result.bse, np.r_[0.4170918], rtol=1e-5)


def test_poisson_2d():

    y = np.r_[3, 1, 4, 8, 2, 5, 4, 7, 2, 6]
    g = np.r_[0, 0, 0, 1, 1, 1, 2, 2, 2, 2]

    x1 = np.r_[0, 1, 0, 0, 1, 1, 0, 0, 1, 0]
    x2 = np.r_[2, 1, 0, 0, 1, 2, 3, 2, 0, 1]
    x = np.empty((10, 2))
    x[:, 0] = x1
    x[:, 1] = x2

    model = ConditionalPoisson(y, x, groups=g)

    # Check the gradient for the loglikelihood
    for x in -1, 0, 1, 2:
        params = np.r_[-0.5*x, 0.5*x]
        grad = approx_fprime(params, model.loglike)
        score = model.score(params)
        assert_allclose(grad, score, rtol=1e-4)

    result = model.fit()

    # From Stata
    assert_allclose(result.params, np.r_[-.9478957, -.0134279], rtol=1e-3)
    assert_allclose(result.bse, np.r_[.3874942, .1686712], rtol=1e-5)

    result.summary()


def test_lasso_logistic():

    np.random.seed(3423948)

    n = 200
    groups = np.arange(10)
    groups = np.kron(groups, np.ones(n // 10))
    group_effects = np.random.normal(size=10)
    group_effects = np.kron(group_effects, np.ones(n // 10))

    x = np.random.normal(size=(n, 4))
    params = np.r_[0, 0, 1, 0]
    lin_pred = np.dot(x, params) + group_effects

    mean = 1 / (1 + np.exp(-lin_pred))
    y = (np.random.uniform(size=n) < mean).astype(int)

    model0 = ConditionalLogit(y, x, groups=groups)
    result0 = model0.fit()

    # Should be the same as model0
    model1 = ConditionalLogit(y, x, groups=groups)
    result1 = model1.fit_regularized(L1_wt=0, alpha=0)

    assert_allclose(result0.params, result1.params, rtol=1e-3)

    model2 = ConditionalLogit(y, x, groups=groups)
    result2 = model2.fit_regularized(L1_wt=1, alpha=0.05)

    # Rxegression test
    assert_allclose(result2.params, np.r_[0, 0, 0.55235152, 0], rtol=1e-4)

    # Test with formula
    df = pd.DataFrame({"y": y, "x1": x[:, 0], "x2": x[:, 1], "x3": x[:, 2],
                       "x4": x[:, 3], "groups": groups})
    fml = "y ~ 0 + x1 + x2 + x3 + x4"
    model3 = ConditionalLogit.from_formula(fml, groups="groups", data=df)
    result3 = model3.fit_regularized(L1_wt=1, alpha=0.05)
    assert_allclose(result2.params, result3.params)


def test_lasso_poisson():

    np.random.seed(342394)

    n = 200
    groups = np.arange(10)
    groups = np.kron(groups, np.ones(n // 10))
    group_effects = np.random.normal(size=10)
    group_effects = np.kron(group_effects, np.ones(n // 10))

    x = np.random.normal(size=(n, 4))
    params = np.r_[0, 0, 1, 0]
    lin_pred = np.dot(x, params) + group_effects

    mean = np.exp(lin_pred)
    y = np.random.poisson(mean)

    model0 = ConditionalPoisson(y, x, groups=groups)
    result0 = model0.fit()

    # Should be the same as model0
    model1 = ConditionalPoisson(y, x, groups=groups)
    result1 = model1.fit_regularized(L1_wt=0, alpha=0)

    assert_allclose(result0.params, result1.params, rtol=1e-3)

    model2 = ConditionalPoisson(y, x, groups=groups)
    result2 = model2.fit_regularized(L1_wt=1, alpha=0.2)

    # Regression test
    assert_allclose(result2.params, np.r_[0, 0, 0.91697508, 0], rtol=1e-4)

    # Test with formula
    df = pd.DataFrame({"y": y, "x1": x[:, 0], "x2": x[:, 1], "x3": x[:, 2],
                       "x4": x[:, 3], "groups": groups})
    fml = "y ~ 0 + x1 + x2 + x3 + x4"
    model3 = ConditionalPoisson.from_formula(fml, groups="groups", data=df)
    result3 = model3.fit_regularized(L1_wt=1, alpha=0.2)
    assert_allclose(result2.params, result3.params)


def gen_mnlogit(n):

    np.random.seed(235)

    g = np.kron(np.ones(5), np.arange(n//5))
    x1 = np.random.normal(size=n)
    x2 = np.random.normal(size=n)
    xm = np.concatenate((x1[:, None], x2[:, None]), axis=1)
    pa = np.array([[0, 1, -1], [0, 2, -1]])
    lpr = np.dot(xm, pa)
    pr = np.exp(lpr)
    pr /= pr.sum(1)[:, None]
    cpr = pr.cumsum(1)
    y = 2 * np.ones(n)
    u = np.random.uniform(size=n)
    y[u < cpr[:, 2]] = 2
    y[u < cpr[:, 1]] = 1
    y[u < cpr[:, 0]] = 0

    df = pd.DataFrame({"y": y, "x1": x1,
                       "x2": x2, "g": g})
    return df


def test_conditional_mnlogit_grad():

    df = gen_mnlogit(90)
    model = ConditionalMNLogit.from_formula(
                "y ~ 0 + x1 + x2", groups="g", data=df)

    # Compare the gradients to numeric gradients
    for _ in range(5):
        za = np.random.normal(size=4)
        grad = model.score(za)
        ngrad = approx_fprime(za, model.loglike)
        assert_allclose(grad, ngrad, rtol=1e-5, atol=1e-3)


def test_conditional_mnlogit_2d():

    df = gen_mnlogit(90)
    model = ConditionalMNLogit.from_formula(
                "y ~ 0 + x1 + x2", groups="g", data=df)
    result = model.fit()

    # Regression tests
    assert_allclose(
        result.params,
        np.asarray([[0.75592035, -1.58565494],
                    [1.82919869, -1.32594231]]),
        rtol=1e-5, atol=1e-5)
    assert_allclose(
        result.bse,
        np.asarray([[0.68099698, 0.70142727],
                    [0.65190315, 0.59653771]]),
        rtol=1e-5, atol=1e-5)


def test_conditional_mnlogit_3d():

    df = gen_mnlogit(90)
    df["x3"] = np.random.normal(size=df.shape[0])
    model = ConditionalMNLogit.from_formula(
                "y ~ 0 + x1 + x2 + x3", groups="g", data=df)
    result = model.fit()

    # Regression tests
    assert_allclose(
        result.params,
        np.asarray([[ 0.729629, -1.633673],
                    [ 1.879019, -1.327163],
                    [-0.114124, -0.109378]]),
        atol=1e-5, rtol=1e-5)

    assert_allclose(
        result.bse,
        np.asarray([[0.682965, 0.60472],
                    [0.672947, 0.42401],
                    [0.722631, 0.33663]]),
        atol=1e-5, rtol=1e-5)

    # Smoke test
    result.summary()
lab 1 is done 2024-10-02 22:15:59 +04:00			`import numpy as np`
			`from statsmodels.discrete.conditional_models import (`
			`ConditionalLogit, ConditionalPoisson, ConditionalMNLogit)`
			`from statsmodels.tools.numdiff import approx_fprime`
			`from numpy.testing import assert_allclose`
			`import pandas as pd`


			`def test_logit_1d():`

			`y = np.r_[0, 1, 0, 1, 0, 1, 0, 1, 1, 1]`
			`g = np.r_[0, 0, 0, 1, 1, 1, 2, 2, 2, 2]`

			`x = np.r_[0, 1, 0, 0, 1, 1, 0, 0, 1, 0]`
			`x = x[:, None]`

			`model = ConditionalLogit(y, x, groups=g)`

			`# Check the gradient for the denominator of the partial likelihood`
			`for x in -1, 0, 1, 2:`
			`params = np.r_[x, ]`
			`_, grad = model._denom_grad(0, params)`
			`ngrad = approx_fprime(params, lambda x: model._denom(0, x)).squeeze()`
			`assert_allclose(grad, ngrad)`

			`# Check the gradient for the loglikelihood`
			`for x in -1, 0, 1, 2:`
			`grad = approx_fprime(np.r_[x, ], model.loglike).squeeze()`
			`score = model.score(np.r_[x, ])`
			`assert_allclose(grad, score, rtol=1e-4)`

			`result = model.fit()`

			`# From Stata`
			`assert_allclose(result.params, np.r_[0.9272407], rtol=1e-5)`
			`assert_allclose(result.bse, np.r_[1.295155], rtol=1e-5)`


			`def test_logit_2d():`

			`y = np.r_[0, 1, 0, 1, 0, 1, 0, 1, 1, 1]`
			`g = np.r_[0, 0, 0, 1, 1, 1, 2, 2, 2, 2]`

			`x1 = np.r_[0, 1, 0, 0, 1, 1, 0, 0, 1, 0]`
			`x2 = np.r_[0, 0, 1, 0, 0, 1, 0, 1, 1, 1]`
			`x = np.empty((10, 2))`
			`x[:, 0] = x1`
			`x[:, 1] = x2`

			`model = ConditionalLogit(y, x, groups=g)`

			`# Check the gradient for the denominator of the partial likelihood`
			`for x in -1, 0, 1, 2:`
			`params = np.r_[x, -1.5*x]`
			`_, grad = model._denom_grad(0, params)`
			`ngrad = approx_fprime(params, lambda x: model._denom(0, x))`
			`assert_allclose(grad, ngrad, rtol=1e-5)`

			`# Check the gradient for the loglikelihood`
			`for x in -1, 0, 1, 2:`
			`params = np.r_[-0.5x, 0.5x]`
			`grad = approx_fprime(params, model.loglike)`
			`score = model.score(params)`
			`assert_allclose(grad, score, rtol=1e-4)`

			`result = model.fit()`

			`# From Stata`
			`assert_allclose(result.params, np.r_[1.011074, 1.236758], rtol=1e-3)`
			`assert_allclose(result.bse, np.r_[1.420784, 1.361738], rtol=1e-5)`

			`result.summary()`


			`def test_formula():`

			`for j in 0, 1:`

			`np.random.seed(34234)`
			`n = 200`
			`y = np.random.randint(0, 2, size=n)`
			`x1 = np.random.normal(size=n)`
			`x2 = np.random.normal(size=n)`
			`g = np.random.randint(0, 25, size=n)`

			`x = np.hstack((x1[:, None], x2[:, None]))`
			`if j == 0:`
			`model1 = ConditionalLogit(y, x, groups=g)`
			`else:`
			`model1 = ConditionalPoisson(y, x, groups=g)`
			`result1 = model1.fit()`

			`df = pd.DataFrame({"y": y, "x1": x1, "x2": x2, "g": g})`
			`if j == 0:`
			`model2 = ConditionalLogit.from_formula(`
			`"y ~ 0 + x1 + x2", groups="g", data=df)`
			`else:`
			`model2 = ConditionalPoisson.from_formula(`
			`"y ~ 0 + x1 + x2", groups="g", data=df)`
			`result2 = model2.fit()`

			`assert_allclose(result1.params, result2.params, rtol=1e-5)`
			`assert_allclose(result1.bse, result2.bse, rtol=1e-5)`
			`assert_allclose(result1.cov_params(), result2.cov_params(), rtol=1e-5)`
			`assert_allclose(result1.tvalues, result2.tvalues, rtol=1e-5)`


			`def test_poisson_1d():`

			`y = np.r_[3, 1, 1, 4, 5, 2, 0, 1, 6, 2]`
			`g = np.r_[0, 0, 0, 0, 1, 1, 1, 1, 1, 1]`

			`x = np.r_[0, 1, 0, 0, 1, 1, 0, 0, 1, 0]`
			`x = x[:, None]`

			`model = ConditionalPoisson(y, x, groups=g)`

			`# Check the gradient for the loglikelihood`
			`for x in -1, 0, 1, 2:`
			`grad = approx_fprime(np.r_[x, ], model.loglike).squeeze()`
			`score = model.score(np.r_[x, ])`
			`assert_allclose(grad, score, rtol=1e-4)`

			`result = model.fit()`

			`# From Stata`
			`assert_allclose(result.params, np.r_[0.6466272], rtol=1e-4)`
			`assert_allclose(result.bse, np.r_[0.4170918], rtol=1e-5)`


			`def test_poisson_2d():`

			`y = np.r_[3, 1, 4, 8, 2, 5, 4, 7, 2, 6]`
			`g = np.r_[0, 0, 0, 1, 1, 1, 2, 2, 2, 2]`

			`x1 = np.r_[0, 1, 0, 0, 1, 1, 0, 0, 1, 0]`
			`x2 = np.r_[2, 1, 0, 0, 1, 2, 3, 2, 0, 1]`
			`x = np.empty((10, 2))`
			`x[:, 0] = x1`
			`x[:, 1] = x2`

			`model = ConditionalPoisson(y, x, groups=g)`

			`# Check the gradient for the loglikelihood`
			`for x in -1, 0, 1, 2:`
			`params = np.r_[-0.5x, 0.5x]`
			`grad = approx_fprime(params, model.loglike)`
			`score = model.score(params)`
			`assert_allclose(grad, score, rtol=1e-4)`

			`result = model.fit()`

			`# From Stata`
			`assert_allclose(result.params, np.r_[-.9478957, -.0134279], rtol=1e-3)`
			`assert_allclose(result.bse, np.r_[.3874942, .1686712], rtol=1e-5)`

			`result.summary()`


			`def test_lasso_logistic():`

			`np.random.seed(3423948)`

			`n = 200`
			`groups = np.arange(10)`
			`groups = np.kron(groups, np.ones(n // 10))`
			`group_effects = np.random.normal(size=10)`
			`group_effects = np.kron(group_effects, np.ones(n // 10))`

			`x = np.random.normal(size=(n, 4))`
			`params = np.r_[0, 0, 1, 0]`
			`lin_pred = np.dot(x, params) + group_effects`

			`mean = 1 / (1 + np.exp(-lin_pred))`
			`y = (np.random.uniform(size=n) < mean).astype(int)`

			`model0 = ConditionalLogit(y, x, groups=groups)`
			`result0 = model0.fit()`

			`# Should be the same as model0`
			`model1 = ConditionalLogit(y, x, groups=groups)`
			`result1 = model1.fit_regularized(L1_wt=0, alpha=0)`

			`assert_allclose(result0.params, result1.params, rtol=1e-3)`

			`model2 = ConditionalLogit(y, x, groups=groups)`
			`result2 = model2.fit_regularized(L1_wt=1, alpha=0.05)`

			`# Rxegression test`
			`assert_allclose(result2.params, np.r_[0, 0, 0.55235152, 0], rtol=1e-4)`

			`# Test with formula`
			`df = pd.DataFrame({"y": y, "x1": x[:, 0], "x2": x[:, 1], "x3": x[:, 2],`
			`"x4": x[:, 3], "groups": groups})`
			`fml = "y ~ 0 + x1 + x2 + x3 + x4"`
			`model3 = ConditionalLogit.from_formula(fml, groups="groups", data=df)`
			`result3 = model3.fit_regularized(L1_wt=1, alpha=0.05)`
			`assert_allclose(result2.params, result3.params)`


			`def test_lasso_poisson():`

			`np.random.seed(342394)`

			`n = 200`
			`groups = np.arange(10)`
			`groups = np.kron(groups, np.ones(n // 10))`
			`group_effects = np.random.normal(size=10)`
			`group_effects = np.kron(group_effects, np.ones(n // 10))`

			`x = np.random.normal(size=(n, 4))`
			`params = np.r_[0, 0, 1, 0]`
			`lin_pred = np.dot(x, params) + group_effects`

			`mean = np.exp(lin_pred)`
			`y = np.random.poisson(mean)`

			`model0 = ConditionalPoisson(y, x, groups=groups)`
			`result0 = model0.fit()`

			`# Should be the same as model0`
			`model1 = ConditionalPoisson(y, x, groups=groups)`
			`result1 = model1.fit_regularized(L1_wt=0, alpha=0)`

			`assert_allclose(result0.params, result1.params, rtol=1e-3)`

			`model2 = ConditionalPoisson(y, x, groups=groups)`
			`result2 = model2.fit_regularized(L1_wt=1, alpha=0.2)`

			`# Regression test`
			`assert_allclose(result2.params, np.r_[0, 0, 0.91697508, 0], rtol=1e-4)`

			`# Test with formula`
			`df = pd.DataFrame({"y": y, "x1": x[:, 0], "x2": x[:, 1], "x3": x[:, 2],`
			`"x4": x[:, 3], "groups": groups})`
			`fml = "y ~ 0 + x1 + x2 + x3 + x4"`
			`model3 = ConditionalPoisson.from_formula(fml, groups="groups", data=df)`
			`result3 = model3.fit_regularized(L1_wt=1, alpha=0.2)`
			`assert_allclose(result2.params, result3.params)`


			`def gen_mnlogit(n):`

			`np.random.seed(235)`

			`g = np.kron(np.ones(5), np.arange(n//5))`
			`x1 = np.random.normal(size=n)`
			`x2 = np.random.normal(size=n)`
			`xm = np.concatenate((x1[:, None], x2[:, None]), axis=1)`
			`pa = np.array([[0, 1, -1], [0, 2, -1]])`
			`lpr = np.dot(xm, pa)`
			`pr = np.exp(lpr)`
			`pr /= pr.sum(1)[:, None]`
			`cpr = pr.cumsum(1)`
			`y = 2 * np.ones(n)`
			`u = np.random.uniform(size=n)`
			`y[u < cpr[:, 2]] = 2`
			`y[u < cpr[:, 1]] = 1`
			`y[u < cpr[:, 0]] = 0`

			`df = pd.DataFrame({"y": y, "x1": x1,`
			`"x2": x2, "g": g})`
			`return df`


			`def test_conditional_mnlogit_grad():`

			`df = gen_mnlogit(90)`
			`model = ConditionalMNLogit.from_formula(`
			`"y ~ 0 + x1 + x2", groups="g", data=df)`

			`# Compare the gradients to numeric gradients`
			`for _ in range(5):`
			`za = np.random.normal(size=4)`
			`grad = model.score(za)`
			`ngrad = approx_fprime(za, model.loglike)`
			`assert_allclose(grad, ngrad, rtol=1e-5, atol=1e-3)`


			`def test_conditional_mnlogit_2d():`

			`df = gen_mnlogit(90)`
			`model = ConditionalMNLogit.from_formula(`
			`"y ~ 0 + x1 + x2", groups="g", data=df)`
			`result = model.fit()`

			`# Regression tests`
			`assert_allclose(`
			`result.params,`
			`np.asarray([[0.75592035, -1.58565494],`
			`[1.82919869, -1.32594231]]),`
			`rtol=1e-5, atol=1e-5)`
			`assert_allclose(`
			`result.bse,`
			`np.asarray([[0.68099698, 0.70142727],`
			`[0.65190315, 0.59653771]]),`
			`rtol=1e-5, atol=1e-5)`


			`def test_conditional_mnlogit_3d():`

			`df = gen_mnlogit(90)`
			`df["x3"] = np.random.normal(size=df.shape[0])`
			`model = ConditionalMNLogit.from_formula(`
			`"y ~ 0 + x1 + x2 + x3", groups="g", data=df)`
			`result = model.fit()`

			`# Regression tests`
			`assert_allclose(`
			`result.params,`
			`np.asarray([[ 0.729629, -1.633673],`
			`[ 1.879019, -1.327163],`
			`[-0.114124, -0.109378]]),`
			`atol=1e-5, rtol=1e-5)`

			`assert_allclose(`
			`result.bse,`
			`np.asarray([[0.682965, 0.60472],`
			`[0.672947, 0.42401],`
			`[0.722631, 0.33663]]),`
			`atol=1e-5, rtol=1e-5)`

			`# Smoke test`
			`result.summary()`