AIM-PIbd-32-Kurbanova-A-A/aimenv/Lib/site-packages/statsmodels/distributions/tests/test_discrete.py

542 lines
19 KiB
Python
Raw Normal View History

2024-10-02 22:15:59 +04:00
import numpy as np
from numpy.testing import assert_allclose, assert_equal
from scipy import stats
from scipy.stats import poisson, nbinom
from statsmodels.tools.tools import Bunch
from statsmodels.distributions.discrete import (
genpoisson_p,
truncatedpoisson,
truncatednegbin,
zipoisson,
zinegbin,
zigenpoisson,
DiscretizedCount,
DiscretizedModel
)
class TestGenpoisson_p:
# Test Generalized Poisson Destribution
def test_pmf_p1(self):
poisson_pmf = poisson.pmf(1, 1)
genpoisson_pmf = genpoisson_p.pmf(1, 1, 0, 1)
assert_allclose(poisson_pmf, genpoisson_pmf, rtol=1e-15)
def test_pmf_p2(self):
poisson_pmf = poisson.pmf(2, 2)
genpoisson_pmf = genpoisson_p.pmf(2, 2, 0, 2)
assert_allclose(poisson_pmf, genpoisson_pmf, rtol=1e-15)
def test_pmf_p5(self):
poisson_pmf = poisson.pmf(10, 2)
genpoisson_pmf_5 = genpoisson_p.pmf(10, 2, 1e-25, 5)
assert_allclose(poisson_pmf, genpoisson_pmf_5, rtol=1e-12)
def test_logpmf_p1(self):
poisson_pmf = poisson.logpmf(5, 2)
genpoisson_pmf = genpoisson_p.logpmf(5, 2, 0, 1)
assert_allclose(poisson_pmf, genpoisson_pmf, rtol=1e-15)
def test_logpmf_p2(self):
poisson_pmf = poisson.logpmf(6, 1)
genpoisson_pmf = genpoisson_p.logpmf(6, 1, 0, 2)
assert_allclose(poisson_pmf, genpoisson_pmf, rtol=1e-15)
class TestTruncatedPoisson:
"""
Test Truncated Poisson distribution
"""
def test_pmf_zero(self):
poisson_pmf = poisson.pmf(2, 2) / poisson.sf(0, 2)
tpoisson_pmf = truncatedpoisson.pmf(2, 2, 0)
assert_allclose(poisson_pmf, tpoisson_pmf, rtol=1e-7)
def test_logpmf_zero(self):
poisson_logpmf = poisson.logpmf(2, 2) - np.log(poisson.sf(0, 2))
tpoisson_logpmf = truncatedpoisson.logpmf(2, 2, 0)
assert_allclose(poisson_logpmf, tpoisson_logpmf, rtol=1e-7)
def test_pmf(self):
poisson_pmf = poisson.pmf(4, 6) / (1 - poisson.cdf(2, 6))
tpoisson_pmf = truncatedpoisson.pmf(4, 6, 2)
assert_allclose(poisson_pmf, tpoisson_pmf, rtol=1e-7)
def test_logpmf(self):
poisson_logpmf = poisson.logpmf(4, 6) - np.log(poisson.sf(2, 6))
tpoisson_logpmf = truncatedpoisson.logpmf(4, 6, 2)
assert_allclose(poisson_logpmf, tpoisson_logpmf, rtol=1e-7)
class TestZIPoisson:
def test_pmf_zero(self):
poisson_pmf = poisson.pmf(3, 2)
zipoisson_pmf = zipoisson.pmf(3, 2, 0)
assert_allclose(poisson_pmf, zipoisson_pmf, rtol=1e-12)
def test_logpmf_zero(self):
poisson_logpmf = poisson.logpmf(5, 1)
zipoisson_logpmf = zipoisson.logpmf(5, 1, 0)
assert_allclose(poisson_logpmf, zipoisson_logpmf, rtol=1e-12)
def test_pmf(self):
poisson_pmf = poisson.pmf(2, 2)
zipoisson_pmf = zipoisson.pmf(2, 2, 0.1)
assert_allclose(poisson_pmf, zipoisson_pmf, rtol=5e-2, atol=5e-2)
def test_logpmf(self):
poisson_logpmf = poisson.logpmf(7, 3)
zipoisson_logpmf = zipoisson.logpmf(7, 3, 0.1)
assert_allclose(poisson_logpmf, zipoisson_logpmf, rtol=5e-2, atol=5e-2)
def test_cdf_zero(self):
poisson_cdf = poisson.cdf(3, 2)
zipoisson_cdf = zipoisson.cdf(3, 2, 0)
assert_allclose(poisson_cdf, zipoisson_cdf, rtol=1e-12)
def test_ppf_zero(self):
poisson_ppf = poisson.ppf(5, 1)
zipoisson_ppf = zipoisson.ppf(5, 1, 0)
assert_allclose(poisson_ppf, zipoisson_ppf, rtol=1e-12)
def test_mean_var(self):
poisson_mean, poisson_var = poisson.mean(12), poisson.var(12)
zipoisson_mean = zipoisson.mean(12, 0)
zipoisson_var = zipoisson.var(12, 0)
assert_allclose(poisson_mean, zipoisson_mean, rtol=1e-10)
assert_allclose(poisson_var, zipoisson_var, rtol=1e-10)
m = np.array([1, 5, 10])
poisson_mean, poisson_var = poisson.mean(m), poisson.var(m)
zipoisson_mean = zipoisson.mean(m, 0)
zipoisson_var = zipoisson.var(m, 0.0)
assert_allclose(poisson_mean, zipoisson_mean, rtol=1e-10)
assert_allclose(poisson_var, zipoisson_var, rtol=1e-10)
def test_moments(self):
poisson_m1, poisson_m2 = poisson.moment(1, 12), poisson.moment(2, 12)
zip_m0 = zipoisson.moment(0, 12, 0)
zip_m1 = zipoisson.moment(1, 12, 0)
zip_m2 = zipoisson.moment(2, 12, 0)
assert_allclose(1, zip_m0, rtol=1e-10)
assert_allclose(poisson_m1, zip_m1, rtol=1e-10)
assert_allclose(poisson_m2, zip_m2, rtol=1e-10)
class TestZIGeneralizedPoisson:
def test_pmf_zero(self):
gp_pmf = genpoisson_p.pmf(3, 2, 1, 1)
zigp_pmf = zigenpoisson.pmf(3, 2, 1, 1, 0)
assert_allclose(gp_pmf, zigp_pmf, rtol=1e-12)
def test_logpmf_zero(self):
gp_logpmf = genpoisson_p.logpmf(7, 3, 1, 1)
zigp_logpmf = zigenpoisson.logpmf(7, 3, 1, 1, 0)
assert_allclose(gp_logpmf, zigp_logpmf, rtol=1e-12)
def test_pmf(self):
gp_pmf = genpoisson_p.pmf(3, 2, 2, 2)
zigp_pmf = zigenpoisson.pmf(3, 2, 2, 2, 0.1)
assert_allclose(gp_pmf, zigp_pmf, rtol=5e-2, atol=5e-2)
def test_logpmf(self):
gp_logpmf = genpoisson_p.logpmf(2, 3, 0, 2)
zigp_logpmf = zigenpoisson.logpmf(2, 3, 0, 2, 0.1)
assert_allclose(gp_logpmf, zigp_logpmf, rtol=5e-2, atol=5e-2)
def test_mean_var(self):
# compare with Poisson special case
m = np.array([1, 5, 10])
poisson_mean, poisson_var = poisson.mean(m), poisson.var(m)
zigenpoisson_mean = zigenpoisson.mean(m, 0, 1, 0)
zigenpoisson_var = zigenpoisson.var(m, 0.0, 1, 0)
assert_allclose(poisson_mean, zigenpoisson_mean, rtol=1e-10)
assert_allclose(poisson_var, zigenpoisson_var, rtol=1e-10)
class TestZiNBP:
def test_pmf_p2(self):
n, p = zinegbin.convert_params(30, 0.1, 2)
nb_pmf = nbinom.pmf(100, n, p)
tnb_pmf = zinegbin.pmf(100, 30, 0.1, 2, 0.01)
assert_allclose(nb_pmf, tnb_pmf, rtol=1e-5, atol=1e-5)
def test_logpmf_p2(self):
n, p = zinegbin.convert_params(10, 1, 2)
nb_logpmf = nbinom.logpmf(200, n, p)
tnb_logpmf = zinegbin.logpmf(200, 10, 1, 2, 0.01)
assert_allclose(nb_logpmf, tnb_logpmf, rtol=1e-2, atol=1e-2)
def test_cdf_p2(self):
n, p = zinegbin.convert_params(30, 0.1, 2)
nbinom_cdf = nbinom.cdf(10, n, p)
zinbinom_cdf = zinegbin.cdf(10, 30, 0.1, 2, 0)
assert_allclose(nbinom_cdf, zinbinom_cdf, rtol=1e-12, atol=1e-12)
def test_ppf_p2(self):
n, p = zinegbin.convert_params(100, 1, 2)
nbinom_ppf = nbinom.ppf(0.27, n, p)
zinbinom_ppf = zinegbin.ppf(0.27, 100, 1, 2, 0)
assert_allclose(nbinom_ppf, zinbinom_ppf, rtol=1e-12, atol=1e-12)
def test_mran_var_p2(self):
n, p = zinegbin.convert_params(7, 1, 2)
nbinom_mean, nbinom_var = nbinom.mean(n, p), nbinom.var(n, p)
zinb_mean = zinegbin.mean(7, 1, 2, 0)
zinb_var = zinegbin.var(7, 1, 2, 0)
assert_allclose(nbinom_mean, zinb_mean, rtol=1e-10)
assert_allclose(nbinom_var, zinb_var, rtol=1e-10)
def test_moments_p2(self):
n, p = zinegbin.convert_params(7, 1, 2)
nb_m1, nb_m2 = nbinom.moment(1, n, p), nbinom.moment(2, n, p)
zinb_m0 = zinegbin.moment(0, 7, 1, 2, 0)
zinb_m1 = zinegbin.moment(1, 7, 1, 2, 0)
zinb_m2 = zinegbin.moment(2, 7, 1, 2, 0)
assert_allclose(1, zinb_m0, rtol=1e-10)
assert_allclose(nb_m1, zinb_m1, rtol=1e-10)
assert_allclose(nb_m2, zinb_m2, rtol=1e-10)
def test_pmf(self):
n, p = zinegbin.convert_params(1, 0.9, 1)
nb_logpmf = nbinom.pmf(2, n, p)
tnb_pmf = zinegbin.pmf(2, 1, 0.9, 2, 0.5)
assert_allclose(nb_logpmf, tnb_pmf * 2, rtol=1e-7)
def test_logpmf(self):
n, p = zinegbin.convert_params(5, 1, 1)
nb_logpmf = nbinom.logpmf(2, n, p)
tnb_logpmf = zinegbin.logpmf(2, 5, 1, 1, 0.005)
assert_allclose(nb_logpmf, tnb_logpmf, rtol=1e-2, atol=1e-2)
def test_cdf(self):
n, p = zinegbin.convert_params(1, 0.9, 1)
nbinom_cdf = nbinom.cdf(2, n, p)
zinbinom_cdf = zinegbin.cdf(2, 1, 0.9, 2, 0)
assert_allclose(nbinom_cdf, zinbinom_cdf, rtol=1e-12, atol=1e-12)
def test_ppf(self):
n, p = zinegbin.convert_params(5, 1, 1)
nbinom_ppf = nbinom.ppf(0.71, n, p)
zinbinom_ppf = zinegbin.ppf(0.71, 5, 1, 1, 0)
assert_allclose(nbinom_ppf, zinbinom_ppf, rtol=1e-12, atol=1e-12)
def test_convert(self):
n, p = zinegbin.convert_params(25, 0.85, 2)
n_true, p_true = 1.1764705882352942, 0.04494382022471911
assert_allclose(n, n_true, rtol=1e-12, atol=1e-12)
assert_allclose(p, p_true, rtol=1e-12, atol=1e-12)
n, p = zinegbin.convert_params(7, 0.17, 1)
n_true, p_true = 41.17647058823529, 0.8547008547008547
assert_allclose(n, n_true, rtol=1e-12, atol=1e-12)
assert_allclose(p, p_true, rtol=1e-12, atol=1e-12)
def test_mean_var(self):
for m in [9, np.array([1, 5, 10])]:
n, p = zinegbin.convert_params(m, 1, 1)
nbinom_mean, nbinom_var = nbinom.mean(n, p), nbinom.var(n, p)
zinb_mean = zinegbin.mean(m, 1, 1, 0)
zinb_var = zinegbin.var(m, 1, 1, 0)
assert_allclose(nbinom_mean, zinb_mean, rtol=1e-10)
assert_allclose(nbinom_var, zinb_var, rtol=1e-10)
def test_moments(self):
n, p = zinegbin.convert_params(9, 1, 1)
nb_m1, nb_m2 = nbinom.moment(1, n, p), nbinom.moment(2, n, p)
zinb_m0 = zinegbin.moment(0, 9, 1, 1, 0)
zinb_m1 = zinegbin.moment(1, 9, 1, 1, 0)
zinb_m2 = zinegbin.moment(2, 9, 1, 1, 0)
assert_allclose(1, zinb_m0, rtol=1e-10)
assert_allclose(nb_m1, zinb_m1, rtol=1e-10)
assert_allclose(nb_m2, zinb_m2, rtol=1e-10)
class CheckDiscretized():
def convert_params(self, params):
args = params.tolist()
args.insert(-1, 0)
return args
def test_basic(self):
d_offset = self.d_offset
ddistr = self.ddistr
paramg = self.paramg
paramd = self.paramd
shapes = self.shapes
start_params = self.start_params
np.random.seed(987146)
dp = DiscretizedCount(ddistr, d_offset)
assert dp.shapes == shapes
xi = np.arange(5)
p = dp._pmf(xi, *paramd)
cdf1 = ddistr.cdf(xi, *paramg)
p1 = np.diff(cdf1)
assert_allclose(p[: len(p1)], p1, rtol=1e-13)
cdf = dp._cdf(xi, *paramd)
assert_allclose(cdf[: len(cdf1) - 1], cdf1[1:], rtol=1e-13)
# check that scipy dispatch methods work
p2 = dp.pmf(xi, *paramd)
assert_allclose(p2, p, rtol=1e-13)
cdf2 = dp.cdf(xi, *paramd)
assert_allclose(cdf2, cdf, rtol=1e-13)
sf = dp.sf(xi, *paramd)
assert_allclose(sf, 1 - cdf, rtol=1e-13)
nobs = 2000
xx = dp.rvs(*paramd, size=nobs) # , random_state=987146)
# check that we go a non-trivial rvs
assert len(xx) == nobs
assert xx.var() > 0.001
mod = DiscretizedModel(xx, distr=dp)
res = mod.fit(start_params=start_params)
p = mod.predict(res.params, which="probs")
args = self.convert_params(res.params)
p1 = -np.diff(ddistr.sf(np.arange(21), *args))
assert_allclose(p, p1, rtol=1e-13)
# using cdf limits precision to computation around 1
p1 = np.diff(ddistr.cdf(np.arange(21), *args))
assert_allclose(p, p1, rtol=1e-13, atol=1e-15)
freq = np.bincount(xx.astype(int))
# truncate at last observed
k = len(freq)
if k > 10:
# reduce low count bins for heavy tailed distributions
k = 10
freq[k - 1] += freq[k:].sum()
freq = freq[:k]
p = mod.predict(res.params, which="probs", k_max=k)
p[k - 1] += 1 - p[:k].sum()
tchi2 = stats.chisquare(freq, p[:k] * nobs)
assert tchi2.pvalue > 0.01
# estimated distribution methods rvs, ppf
# frozen distribution with estimated parameters
# Todo results method
dfr = mod.get_distr(res.params)
nobs_rvs = 500
rvs = dfr.rvs(size=nobs_rvs)
freq = np.bincount(rvs)
p = mod.predict(res.params, which="probs", k_max=nobs_rvs)
k = len(freq)
p[k - 1] += 1 - p[:k].sum()
tchi2 = stats.chisquare(freq, p[:k] * nobs_rvs)
assert tchi2.pvalue > 0.01
# round trip cdf-ppf
q = dfr.ppf(dfr.cdf(np.arange(-1, 5) + 1e-6))
q1 = np.array([-1., 1., 2., 3., 4., 5.])
assert_equal(q, q1)
p = np.maximum(dfr.cdf(np.arange(-1, 5)) - 1e-6, 0)
q = dfr.ppf(p)
q1 = np.arange(-1, 5)
assert_equal(q, q1)
q = dfr.ppf(dfr.cdf(np.arange(5)))
q1 = np.arange(0, 5)
assert_equal(q, q1)
q = dfr.isf(1 - dfr.cdf(np.arange(-1, 5) + 1e-6))
q1 = np.array([-1., 1., 2., 3., 4., 5.])
assert_equal(q, q1)
class TestDiscretizedGamma(CheckDiscretized):
@classmethod
def setup_class(cls):
cls.d_offset = 0
cls.ddistr = stats.gamma
cls.paramg = (5, 0, 0.5) # include constant so we can use args
cls.paramd = (5, 0.5)
cls.shapes = "a, s"
cls.start_params = (1, 0.5)
class TestDiscretizedExponential(CheckDiscretized):
@classmethod
def setup_class(cls):
cls.d_offset = 0
cls.ddistr = stats.expon
cls.paramg = (0, 5) # include constant so we can use args
cls.paramd = (5,)
cls.shapes = "s"
cls.start_params = (0.5)
class TestDiscretizedLomax(CheckDiscretized):
@classmethod
def setup_class(cls):
cls.d_offset = 0
cls.ddistr = stats.lomax # instead of pareto to avoid p(y=0) = 0
cls.paramg = (2, 0, 1.5) # include constant so we can use args
cls.paramd = (2, 1.5,)
cls.shapes = "c, s"
cls.start_params = (0.5, 0.5)
class TestDiscretizedBurr12(CheckDiscretized):
@classmethod
def setup_class(cls):
cls.d_offset = 0
cls.ddistr = stats.burr12 # should be lomax as special case of burr12
cls.paramg = (2, 1, 0, 1.5)
cls.paramd = (2, 1, 1.5)
cls.shapes = "c, d, s"
cls.start_params = (0.5, 1, 0.5)
class TestDiscretizedGammaEx():
# strike outbreaks example from Ch... 2012
def test_all(self):
# expand frequencies to observations, (no freq_weights yet)
freq = [46, 76, 24, 9, 1]
y = np.repeat(np.arange(5), freq)
# results from article table 7
res1 = Bunch(
params=[3.52636, 0.425617],
llf=-187.469,
chi2=1.701208, # chisquare test
df_model=0,
p=0.4272, # p-value for chi2
aic=378.938,
probs=[46.48, 73.72, 27.88, 6.5, 1.42])
dp = DiscretizedCount(stats.gamma)
mod = DiscretizedModel(y, distr=dp)
res = mod.fit(start_params=[1, 1])
nobs = len(y)
assert_allclose(res.params, res1.params, rtol=1e-5)
assert_allclose(res.llf, res1.llf, atol=6e-3)
assert_allclose(res.aic, res1.aic, atol=6e-3)
assert_equal(res.df_model, res1.df_model)
probs = mod.predict(res.params, which="probs")
probs_trunc = probs[:len(res1.probs)]
probs_trunc[-1] += 1 - probs_trunc.sum()
assert_allclose(probs_trunc * nobs, res1.probs, atol=6e-2)
assert_allclose(np.sum(freq), (probs_trunc * nobs).sum(), rtol=1e-10)
res_chi2 = stats.chisquare(freq, probs_trunc * nobs,
ddof=len(res.params))
# regression test, numbers from running test
# close but not identical to article
assert_allclose(res_chi2.statistic, 1.70409356, rtol=1e-7)
assert_allclose(res_chi2.pvalue, 0.42654100, rtol=1e-7)
# smoke test for summary
res.summary()
np.random.seed(987146)
res_boots = res.bootstrap()
# only loose check, small default n_rep=100, agreement at around 3%
assert_allclose(res.params, res_boots[0], rtol=0.05)
assert_allclose(res.bse, res_boots[1], rtol=0.05)
class TestGeometric():
def test_all(self):
p_geom = 0.6
scale_dexpon = -1 / np.log(1-p_geom)
dgeo = stats.geom(p_geom, loc=-1)
dpg = DiscretizedCount(stats.expon)(scale_dexpon)
xi = np.arange(6)
pmf1 = dgeo.pmf(xi)
pmf = dpg.pmf(xi)
assert_allclose(pmf, pmf1, rtol=1e-10)
cdf1 = dgeo.cdf(xi)
cdf = dpg.cdf(xi)
assert_allclose(cdf, cdf1, rtol=1e-10)
sf1 = dgeo.sf(xi)
sf = dpg.sf(xi)
assert_allclose(sf, sf1, rtol=1e-10)
ppf1 = dgeo.ppf(cdf1)
ppf = dpg.ppf(cdf1)
assert_equal(ppf, ppf1)
ppf1 = dgeo.ppf(cdf1 - 1e-8)
ppf = dpg.ppf(cdf1 - 1e-8)
assert_equal(ppf, ppf1)
ppf1 = dgeo.ppf(cdf1 + 1e-8)
ppf = dpg.ppf(cdf1 + 1e-8)
assert_equal(ppf, ppf1)
ppf1 = dgeo.ppf(0) # incorrect in scipy < 1.5.0
ppf = dpg.ppf(0)
assert_equal(ppf, -1)
# isf
isf1 = dgeo.isf(sf1)
isf = dpg.isf(sf1)
assert_equal(isf, isf1)
isf1 = dgeo.isf(sf1 - 1e-8)
isf = dpg.isf(sf1 - 1e-8)
assert_equal(isf, isf1)
isf1 = dgeo.isf(sf1 + 1e-8)
isf = dpg.isf(sf1 + 1e-8)
assert_equal(isf, isf1)
isf1 = dgeo.isf(0)
isf = dpg.isf(0)
assert_equal(isf, isf1) # inf
isf1 = dgeo.isf(1) # currently incorrect in scipy
isf = dpg.isf(1)
assert_equal(isf, -1)
class TestTruncatedNBP:
"""
Test Truncated Poisson distribution
"""
def test_pmf_zero(self):
n, p = truncatednegbin.convert_params(5, 0.1, 2)
nb_pmf = nbinom.pmf(1, n, p) / nbinom.sf(0, n, p)
tnb_pmf = truncatednegbin.pmf(1, 5, 0.1, 2, 0)
assert_allclose(nb_pmf, tnb_pmf, rtol=1e-5)
def test_logpmf_zero(self):
n, p = truncatednegbin.convert_params(5, 1, 2)
nb_logpmf = nbinom.logpmf(1, n, p) - np.log(nbinom.sf(0, n, p))
tnb_logpmf = truncatednegbin.logpmf(1, 5, 1, 2, 0)
assert_allclose(nb_logpmf, tnb_logpmf, rtol=1e-2, atol=1e-2)
def test_pmf(self):
n, p = truncatednegbin.convert_params(2, 0.5, 2)
nb_logpmf = nbinom.pmf(6, n, p) / nbinom.sf(5, n, p)
tnb_pmf = truncatednegbin.pmf(6, 2, 0.5, 2, 5)
assert_allclose(nb_logpmf, tnb_pmf, rtol=1e-7)
tnb_pmf = truncatednegbin.pmf(5, 2, 0.5, 2, 5)
assert_equal(tnb_pmf, 0)
def test_logpmf(self):
n, p = truncatednegbin.convert_params(5, 0.1, 2)
nb_logpmf = nbinom.logpmf(6, n, p) - np.log(nbinom.sf(5, n, p))
tnb_logpmf = truncatednegbin.logpmf(6, 5, 0.1, 2, 5)
assert_allclose(nb_logpmf, tnb_logpmf, rtol=1e-7)
tnb_logpmf = truncatednegbin.logpmf(5, 5, 0.1, 2, 5)
assert np.isneginf(tnb_logpmf)