AIM-PIbd-32-Kurbanova-A-A/aimenv/Lib/site-packages/statsmodels/stats/tests/test_multivariate.py
2024-10-02 22:15:59 +04:00

316 lines
12 KiB
Python

"""
Created on Sun Nov 5 14:48:19 2017
Author: Josef Perktold
"""
import numpy as np
from numpy.testing import assert_allclose, assert_equal #noqa
from statsmodels.stats import weightstats
import statsmodels.stats.multivariate as smmv # pytest cannot import test_xxx
from statsmodels.stats.multivariate import confint_mvmean_fromstats
from statsmodels.tools.testing import Holder
def test_mv_mean():
# names = ['id', 'mpg1', 'mpg2', 'add']
x = np.asarray([[1.0, 24.0, 23.5, 1.0],
[2.0, 25.0, 24.5, 1.0],
[3.0, 21.0, 20.5, 1.0],
[4.0, 22.0, 20.5, 1.0],
[5.0, 23.0, 22.5, 1.0],
[6.0, 18.0, 16.5, 1.0],
[7.0, 17.0, 16.5, 1.0],
[8.0, 28.0, 27.5, 1.0],
[9.0, 24.0, 23.5, 1.0],
[10.0, 27.0, 25.5, 1.0],
[11.0, 21.0, 20.5, 1.0],
[12.0, 23.0, 22.5, 1.0],
[1.0, 20.0, 19.0, 0.0],
[2.0, 23.0, 22.0, 0.0],
[3.0, 21.0, 20.0, 0.0],
[4.0, 25.0, 24.0, 0.0],
[5.0, 18.0, 17.0, 0.0],
[6.0, 17.0, 16.0, 0.0],
[7.0, 18.0, 17.0, 0.0],
[8.0, 24.0, 23.0, 0.0],
[9.0, 20.0, 19.0, 0.0],
[10.0, 24.0, 22.0, 0.0],
[11.0, 23.0, 22.0, 0.0],
[12.0, 19.0, 18.0, 0.0]])
res = smmv.test_mvmean(x[:, 1:3], [21, 21])
res_stata = Holder(p_F=1.25062334808e-09,
df_r=22,
df_m=2,
F=59.91609589041116,
T2=125.2791095890415)
assert_allclose(res.statistic, res_stata.F, rtol=1e-10)
assert_allclose(res.pvalue, res_stata.p_F, rtol=1e-10)
assert_allclose(res.t2, res_stata.T2, rtol=1e-10)
assert_equal(res.df, [res_stata.df_m, res_stata.df_r])
# diff of paired sample
mask = x[:, -1] == 1
x1 = x[mask, 1:3]
x0 = x[~mask, 1:3]
res_p = smmv.test_mvmean(x1 - x0, [0, 0])
# result Stata hotelling
res_stata = Holder(T2=9.698067632850247,
df=10,
k=2,
N=12,
F=4.4082126, # not in return List
p_F=0.0424) # not in return List
res = res_p
assert_allclose(res.statistic, res_stata.F, atol=5e-7)
assert_allclose(res.pvalue, res_stata.p_F, atol=5e-4)
assert_allclose(res.t2, res_stata.T2, rtol=1e-10)
assert_equal(res.df, [res_stata.k, res_stata.df])
# mvtest means diff1 diff2, zero
res_stata = Holder(p_F=.0423949782937231,
df_r=10,
df_m=2,
F=4.408212560386478,
T2=9.69806763285025)
assert_allclose(res.statistic, res_stata.F, rtol=1e-12)
assert_allclose(res.pvalue, res_stata.p_F, rtol=1e-12)
assert_allclose(res.t2, res_stata.T2, rtol=1e-12)
assert_equal(res.df, [res_stata.df_m, res_stata.df_r])
dw = weightstats.DescrStatsW(x)
ci0 = dw.tconfint_mean(alpha=0.05)
nobs = len(x[:, 1:])
ci1 = confint_mvmean_fromstats(dw.mean, np.diag(dw.var), nobs,
lin_transf=np.eye(4), alpha=0.05)
ci2 = confint_mvmean_fromstats(dw.mean, dw.cov, nobs,
lin_transf=np.eye(4), alpha=0.05)
assert_allclose(ci1[:2], ci0, rtol=1e-13)
assert_allclose(ci2[:2], ci0, rtol=1e-13)
# test from data
res = smmv.confint_mvmean(x, lin_transf=np.eye(4), alpha=0.05)
assert_allclose(res, ci2, rtol=1e-13)
def test_mvmean_2indep():
x = np.asarray([[1.0, 24.0, 23.5, 1.0],
[2.0, 25.0, 24.5, 1.0],
[3.0, 21.0, 20.5, 1.0],
[4.0, 22.0, 20.5, 1.0],
[5.0, 23.0, 22.5, 1.0],
[6.0, 18.0, 16.5, 1.0],
[7.0, 17.0, 16.5, 1.0],
[8.0, 28.0, 27.5, 1.0],
[9.0, 24.0, 23.5, 1.0],
[10.0, 27.0, 25.5, 1.0],
[11.0, 21.0, 20.5, 1.0],
[12.0, 23.0, 22.5, 1.0],
[1.0, 20.0, 19.0, 0.0],
[2.0, 23.0, 22.0, 0.0],
[3.0, 21.0, 20.0, 0.0],
[4.0, 25.0, 24.0, 0.0],
[5.0, 18.0, 17.0, 0.0],
[6.0, 17.0, 16.0, 0.0],
[7.0, 18.0, 17.0, 0.0],
[8.0, 24.0, 23.0, 0.0],
[9.0, 20.0, 19.0, 0.0],
[10.0, 24.0, 22.0, 0.0],
[11.0, 23.0, 22.0, 0.0],
[12.0, 19.0, 18.0, 0.0]])
y = np.asarray([[1.1, 24.1, 23.4, 1.1],
[1.9, 25.2, 24.3, 1.2],
[3.2, 20.9, 20.2, 1.3],
[4.1, 21.8, 20.6, 0.9],
[5.2, 23.0, 22.7, 0.8],
[6.3, 18.1, 16.8, 0.7],
[7.1, 17.2, 16.5, 1.0],
[7.8, 28.3, 27.4, 1.1],
[9.5, 23.9, 23.3, 1.2],
[10.1, 26.8, 25.2, 1.3],
[10.5, 26.7, 20.6, 0.9],
[12.1, 23.0, 22.7, 0.8],
[1.1, 20.1, 19.0, 0.7],
[1.8, 23.2, 22.0, 0.1],
[3.2, 21.3, 20.3, 0.2],
[4.3, 24.9, 24.2, 0.3],
[5.5, 17.9, 17.1, 0.0],
[5.5, 17.8, 16.0, 0.6],
[7.1, 17.7, 16.7, 0.0],
[7.7, 24.0, 22.8, 0.5],
[9.1, 20.1, 18.9, 0.0],
[10.2, 24.2, 22.3, 0.3],
[11.3, 23.3, 22.2, 0.0],
[11.7, 18.8, 18.1, 0.1]])
res = smmv.test_mvmean_2indep(x, y)
res_stata = Holder(p_F=0.6686659171701677,
df_r=43,
df_m=4,
F=0.594263378678938,
T2=2.5428944576028973)
assert_allclose(res.statistic, res_stata.F, rtol=1e-10)
assert_allclose(res.pvalue, res_stata.p_F, rtol=1e-10)
assert_allclose(res.t2, res_stata.T2, rtol=1e-10)
assert_equal(res.df, [res_stata.df_m, res_stata.df_r])
def test_confint_simult():
# example from book for simultaneous confint
m = [526.29, 54.69, 25.13]
cov = [[5808.06, 597.84, 222.03],
[597.84, 126.05, 23.39],
[222.03, 23.39, 23.11]]
nobs = 87
res_ci = confint_mvmean_fromstats(m, cov, nobs, lin_transf=np.eye(3),
simult=True)
cii = [confint_mvmean_fromstats(
m, cov, nobs, lin_transf=np.eye(3)[i], simult=True)[:2]
for i in range(3)]
cii = np.array(cii).squeeze()
# these might use rounded numbers in intermediate computation
res_ci_book = np.array([[503.06, 550.12], [51.22, 58.16], [23.65, 26.61]])
assert_allclose(res_ci[0], res_ci_book[:, 0], rtol=1e-3) # low
assert_allclose(res_ci[0], res_ci_book[:, 0], rtol=1e-3) # upp
assert_allclose(res_ci[0], cii[:, 0], rtol=1e-13)
assert_allclose(res_ci[1], cii[:, 1], rtol=1e-13)
res_constr = confint_mvmean_fromstats(m, cov, nobs, lin_transf=[0, 1, -1],
simult=True)
assert_allclose(res_constr[0], 29.56 - 3.12, rtol=1e-3)
assert_allclose(res_constr[1], 29.56 + 3.12, rtol=1e-3)
# TODO: this assumes separate constraints,
# but we want multiplicity correction
# test if several constraints or transformations work
# original, flipping sign, multiply by 2
lt = [[0, 1, -1], [0, -1, 1], [0, 2, -2]]
res_constr2 = confint_mvmean_fromstats(m, cov, nobs, lin_transf=lt,
simult=True)
lows = res_constr[0], - res_constr[1], 2 * res_constr[0]
upps = res_constr[1], - res_constr[0], 2 * res_constr[1]
# TODO: check return dimensions
lows = np.asarray(lows).squeeze()
upps = np.asarray(upps).squeeze()
assert_allclose(res_constr2[0], lows, rtol=1e-13)
assert_allclose(res_constr2[1], upps, rtol=1e-13)
class TestCovStructure:
@classmethod
def setup_class(cls):
# computed from data with ``cov = np.cov(dta1, rowvar=0, ddof=1)``
cls.cov = np.array(
[[28.965925000000002, 17.215358333333327, 2.6945666666666654],
[17.215358333333327, 21.452852666666672, 6.044527833333332],
[2.6945666666666654, 6.044527833333332, 13.599042333333331]])
cls.nobs = 25
def test_spherical(self):
cov, nobs = self.cov, self.nobs
# from Stata 14
p_chi2 = 0.0006422366870356
# df = 5
chi2 = 21.53275509455011
stat, pv = smmv.test_cov_spherical(cov, nobs)
assert_allclose(stat, chi2, rtol=1e-7)
assert_allclose(pv, p_chi2, rtol=1e-6)
def test_diagonal(self):
cov, nobs = self.cov, self.nobs
# from Stata 14
p_chi2 = 0.0004589987613319
# df = 3
chi2 = 17.91025335733012
stat, pv = smmv.test_cov_diagonal(cov, nobs)
assert_allclose(stat, chi2, rtol=1e-8)
assert_allclose(pv, p_chi2, rtol=1e-7)
def test_blockdiagonal(self):
cov, nobs = self.cov, self.nobs
# from Stata 14
p_chi2 = 0.1721758850671037
# df = 2
chi2 = 3.518477474111563
# cov_blocks = cov[:2, :2], cov[-1:, -1:]
# stat, pv = smmv.test_cov_blockdiagonal(cov, nobs, cov_blocks)
block_len = [2, 1]
stat, pv = smmv.test_cov_blockdiagonal(cov, nobs, block_len)
assert_allclose(stat, chi2, rtol=1e-7)
assert_allclose(pv, p_chi2, rtol=1e-6)
def test_covmat(self):
cov, nobs = self.cov, self.nobs
# from Stata 14
p_chi2 = 0.4837049015162541
# df = 6
chi2 = 5.481422374989864
cov_null = np.array([[30, 15, 0], [15, 20, 0], [0, 0, 10]])
stat, pv = smmv.test_cov(cov, nobs, cov_null)
assert_allclose(stat, chi2, rtol=1e-7)
assert_allclose(pv, p_chi2, rtol=1e-6)
def test_cov_oneway():
# from Stata 14
p_chi2 = .1944866419800838
chi2 = 13.55075120374669
df = 10
p_F_Box = .1949865290585139
df_r_Box = 18377.68924302788
df_m_Box = 10
F_Box = 1.354282822767436
nobs = [32, 32]
cov_m = np.array(
[[5.192540322580645, 4.545362903225806, 6.522177419354839, 5.25],
[4.545362903225806, 13.184475806451612, 6.76008064516129,
6.266129032258064],
[6.522177419354839, 6.76008064516129, 28.673387096774192,
14.46774193548387],
[5.25, 6.266129032258064, 14.46774193548387, 16.64516129032258]])
cov_f = np.array(
[[9.13608870967742, 7.549395161290322, 4.86391129032258,
4.151209677419355],
[7.549395161290322, 18.60383064516129, 10.224798387096774,
5.445564516129032],
[4.86391129032258, 10.224798387096774, 30.039314516129032,
13.493951612903226],
[4.151209677419355, 5.445564516129032, 13.493951612903226,
27.995967741935484]])
res = smmv.test_cov_oneway([cov_m, cov_f], nobs)
stat, pv = res
assert_allclose(stat, F_Box, rtol=1e-10)
assert_allclose(pv, p_F_Box, rtol=1e-6)
assert_allclose(res.statistic_f, F_Box, rtol=1e-10)
assert_allclose(res.pvalue_f, p_F_Box, rtol=1e-6)
assert_allclose(res.df_f, (df_m_Box, df_r_Box), rtol=1e-13)
assert_allclose(res.statistic_chi2, chi2, rtol=1e-10)
assert_allclose(res.pvalue_chi2, p_chi2, rtol=1e-6)
assert_equal(res.df_chi2, df)