AIM-PIbd-32-Kurbanova-A-A/aimenv/Lib/site-packages/statsmodels/regression/tests/test_robustcov.py
2024-10-02 22:15:59 +04:00

996 lines
32 KiB
Python

"""Testing OLS robust covariance matrices against STATA
Created on Mon Oct 28 15:25:14 2013
Author: Josef Perktold
"""
import numpy as np
from numpy.testing import (
assert_allclose,
assert_equal,
assert_raises,
assert_warns,
)
import pytest
from scipy import stats
from statsmodels.datasets import macrodata
from statsmodels.regression.linear_model import OLS, WLS
import statsmodels.stats.sandwich_covariance as sw
from statsmodels.tools.sm_exceptions import InvalidTestWarning
from statsmodels.tools.tools import add_constant
from .results import (
results_grunfeld_ols_robust_cluster as res2,
results_macro_ols_robust as res,
)
# TODO: implement test_hac_simple
class CheckOLSRobust:
def test_basic(self):
res1 = self.res1
res2 = self.res2
rtol = getattr(self, "rtol", 1e-10)
assert_allclose(res1.params, res2.params, rtol=rtol)
assert_allclose(self.bse_robust, res2.bse, rtol=rtol)
assert_allclose(self.cov_robust, res2.cov, rtol=rtol)
@pytest.mark.smoke
def test_t_test_summary(self):
res1 = self.res1
mat = np.eye(len(res1.params))
# TODO: if the t_test call is expensive, possibly make it a fixture?
tt = res1.t_test(mat, cov_p=self.cov_robust)
tt.summary()
@pytest.mark.smoke
def test_t_test_summary_frame(self):
res1 = self.res1
mat = np.eye(len(res1.params))
tt = res1.t_test(mat, cov_p=self.cov_robust)
tt.summary_frame()
@pytest.mark.smoke
def test_f_test_summary(self):
res1 = self.res1
mat = np.eye(len(res1.params))
ft = res1.f_test(mat[:-1], cov_p=self.cov_robust)
ft.summary()
def test_tests(self): # TODO: break into well-scoped tests
# Note: differences between small (t-distribution, ddof) and large (normal)
# F statistic has no ddof correction in large, but uses F distribution (?)
res1 = self.res1
res2 = self.res2
rtol = getattr(self, "rtol", 1e-10)
rtolh = getattr(self, "rtolh", 1e-12)
mat = np.eye(len(res1.params))
tt = res1.t_test(mat, cov_p=self.cov_robust)
# has 'effect', 'pvalue', 'sd', 'tvalue'
# TODO confint missing
assert_allclose(tt.effect, res2.params, rtol=rtol)
assert_allclose(tt.sd, res2.bse, rtol=rtol)
assert_allclose(tt.tvalue, res2.tvalues, rtol=rtol)
if self.small:
assert_allclose(tt.pvalue, res2.pvalues, rtol=5 * rtol)
else:
pval = stats.norm.sf(np.abs(tt.tvalue)) * 2
assert_allclose(pval, res2.pvalues, rtol=5 * rtol, atol=1e-25)
ft = res1.f_test(mat[:-1], cov_p=self.cov_robust)
if self.small:
#'df_denom', 'df_num', 'fvalue', 'pvalue'
assert_allclose(ft.fvalue, res2.F, rtol=rtol)
# f-pvalue is not directly available in Stata results, but is in ivreg2
if hasattr(res2, "Fp"):
assert_allclose(ft.pvalue, res2.Fp, rtol=rtol)
else:
if not getattr(self, "skip_f", False):
dof_corr = res1.df_resid * 1.0 / res1.nobs
assert_allclose(ft.fvalue * dof_corr, res2.F, rtol=rtol)
if hasattr(res2, "df_r"):
assert_equal(ft.df_num, res2.df_m)
assert_equal(ft.df_denom, res2.df_r)
else:
# ivreg2
assert_equal(ft.df_num, res2.Fdf1)
assert_equal(ft.df_denom, res2.Fdf2)
class TestOLSRobust1(CheckOLSRobust):
# compare with regress robust
def setup_method(self):
res_ols = self.res1
self.bse_robust = res_ols.HC1_se
self.cov_robust = res_ols.cov_HC1
self.small = True
self.res2 = res.results_hc0
@classmethod
def setup_class(cls):
d2 = macrodata.load_pandas().data
g_gdp = 400 * np.diff(np.log(d2["realgdp"].values))
g_inv = 400 * np.diff(np.log(d2["realinv"].values))
exogg = add_constant(
np.c_[g_gdp, d2["realint"][:-1].values], prepend=False
)
cls.res1 = OLS(g_inv, exogg).fit()
def test_qr_equiv(self):
# GH8157
res2 = self.res1.model.fit(method="qr")
assert_allclose(self.res1.HC0_se, res2.HC0_se)
class TestOLSRobust2(TestOLSRobust1):
# compare with ivreg robust small
def setup_method(self):
res_ols = self.res1
self.bse_robust = res_ols.HC1_se
self.cov_robust = res_ols.cov_HC1
self.small = True
self.res2 = res.results_ivhc0_small
class TestOLSRobust3(TestOLSRobust1):
# compare with ivreg robust (not small)
def setup_method(self):
res_ols = self.res1
self.bse_robust = res_ols.HC0_se
self.cov_robust = res_ols.cov_HC0
self.small = False
self.res2 = res.results_ivhc0_large
class TestOLSRobustHacSmall(TestOLSRobust1):
# compare with ivreg robust small
def setup_method(self):
res_ols = self.res1
cov1 = sw.cov_hac_simple(res_ols, nlags=4, use_correction=True)
se1 = sw.se_cov(cov1)
self.bse_robust = se1
self.cov_robust = cov1
self.small = True
self.res2 = res.results_ivhac4_small
class TestOLSRobustHacLarge(TestOLSRobust1):
# compare with ivreg robust (not small)
def setup_method(self):
res_ols = self.res1
cov1 = sw.cov_hac_simple(res_ols, nlags=4, use_correction=False)
se1 = sw.se_cov(cov1)
self.bse_robust = se1
self.cov_robust = cov1
self.small = False
self.res2 = res.results_ivhac4_large
class CheckOLSRobustNewMixin:
# This uses the robust covariance as default covariance
def test_compare(self):
rtol = getattr(self, "rtol", 1e-10)
assert_allclose(self.cov_robust, self.cov_robust2, rtol=rtol)
assert_allclose(self.bse_robust, self.bse_robust2, rtol=rtol)
def test_fvalue(self):
if not getattr(self, "skip_f", False):
rtol = getattr(self, "rtol", 1e-10)
assert_allclose(self.res1.fvalue, self.res2.F, rtol=rtol)
if hasattr(self.res2, "Fp"):
# only available with ivreg2
assert_allclose(self.res1.f_pvalue, self.res2.Fp, rtol=rtol)
else:
raise pytest.skip("TODO: document why this test is skipped")
def test_confint(self):
rtol = getattr(self, "rtol", 1e-10)
ci1 = self.res1.conf_int()
ci2 = self.res2.params_table[:, 4:6]
assert_allclose(ci1, ci2, rtol=rtol)
# check critical value
crit1 = np.diff(ci1, 1).ravel() / 2 / self.res1.bse
crit2 = np.diff(ci1, 1).ravel() / 2 / self.res1.bse
assert_allclose(crit1, crit2, rtol=12)
def test_ttest(self):
res1 = self.res1
res2 = self.res2
rtol = getattr(self, "rtol", 1e-10)
rtolh = getattr(self, "rtol", 1e-12)
mat = np.eye(len(res1.params))
tt = res1.t_test(mat, cov_p=self.cov_robust)
# has 'effect', 'pvalue', 'sd', 'tvalue'
# TODO confint missing
assert_allclose(tt.effect, res2.params, rtol=rtolh)
assert_allclose(tt.sd, res2.bse, rtol=rtol)
assert_allclose(tt.tvalue, res2.tvalues, rtol=rtolh)
assert_allclose(tt.pvalue, res2.pvalues, rtol=5 * rtol)
ci1 = tt.conf_int()
ci2 = self.res2.params_table[:, 4:6]
assert_allclose(ci1, ci2, rtol=rtol)
def test_scale(self):
res1 = self.res1
res2 = self.res2
rtol = 1e-5
# Note we always use df_resid for scale
# Stata uses nobs or df_resid for rmse, not always available in Stata
# assert_allclose(res1.scale, res2.rmse**2 * res2.N / (res2.N - res2.df_m - 1), rtol=rtol)
skip = False
if hasattr(res2, "rss"):
scale = res2.rss / (res2.N - res2.df_m - 1)
elif hasattr(res2, "rmse"):
scale = res2.rmse**2
else:
skip = True
if isinstance(res1.model, WLS):
skip = True
# Stata uses different scaling and using unweighted resid for rmse
if not skip:
assert_allclose(res1.scale, scale, rtol=rtol)
if not res2.vcetype == "Newey-West":
# no rsquared in Stata
r2 = res2.r2 if hasattr(res2, "r2") else res2.r2c
assert_allclose(res1.rsquared, r2, rtol=rtol, err_msg=str(skip))
# consistency checks, not against Stata
df_resid = res1.nobs - res1.df_model - 1
assert_equal(res1.df_resid, df_resid)
# variance of resid_pearson is 1, with ddof, and loc=0
psum = (res1.resid_pearson**2).sum()
assert_allclose(psum, df_resid, rtol=1e-13)
@pytest.mark.smoke
def test_summary(self):
self.res1.summary()
class TestOLSRobust2SmallNew(TestOLSRobust1, CheckOLSRobustNewMixin):
# compare with ivreg robust small
def setup_method(self):
res_ols = self.res1.get_robustcov_results("HC1", use_t=True)
self.res3 = self.res1
self.res1 = res_ols
self.bse_robust = res_ols.bse
self.cov_robust = res_ols.cov_params()
self.bse_robust2 = res_ols.HC1_se
self.cov_robust2 = res_ols.cov_HC1
self.small = True
self.res2 = res.results_ivhc0_small
def test_compare(self):
# check that we get a warning using the nested compare methods
res1 = self.res1
endog = res1.model.endog
exog = res1.model.exog[:, [0, 2]] # drop one variable
res_ols2 = OLS(endog, exog).fit()
# results from Stata
r_pval = 0.0307306938402991
r_chi2 = 4.667944083588736
r_df = 1
assert_warns(InvalidTestWarning, res1.compare_lr_test, res_ols2)
import warnings
with warnings.catch_warnings():
warnings.simplefilter("ignore")
chi2, pval, df = res1.compare_lr_test(res_ols2)
assert_allclose(chi2, r_chi2, rtol=1e-11)
assert_allclose(pval, r_pval, rtol=1e-11)
assert_equal(df, r_df)
assert_warns(InvalidTestWarning, res1.compare_f_test, res_ols2)
# fva, pval, df = res1.compare_f_test(res_ols2)
class TestOLSRobustHACSmallNew(TestOLSRobust1, CheckOLSRobustNewMixin):
# compare with ivreg robust small
def setup_method(self):
res_ols = self.res1.get_robustcov_results(
"HAC", maxlags=4, use_correction=True, use_t=True
)
self.res3 = self.res1
self.res1 = res_ols
self.bse_robust = res_ols.bse
self.cov_robust = res_ols.cov_params()
cov1 = sw.cov_hac_simple(res_ols, nlags=4, use_correction=True)
se1 = sw.se_cov(cov1)
self.bse_robust2 = se1
self.cov_robust2 = cov1
self.small = True
self.res2 = res.results_ivhac4_small
class TestOLSRobust2LargeNew(TestOLSRobust1, CheckOLSRobustNewMixin):
# compare with ivreg robust small
def setup_method(self):
res_ols = self.res1.get_robustcov_results("HC0")
res_ols.use_t = False
self.res3 = self.res1
self.res1 = res_ols
self.bse_robust = res_ols.bse
self.cov_robust = res_ols.cov_params()
self.bse_robust2 = res_ols.HC0_se
self.cov_robust2 = res_ols.cov_HC0
self.small = False
self.res2 = res.results_ivhc0_large
@pytest.mark.skip(reason="not refactored yet for `large`")
def test_fvalue(self):
super().test_fvalue()
@pytest.mark.skip(reason="not refactored yet for `large`")
def test_confint(self):
super().test_confint()
#######################################################
# cluster robust standard errors
#######################################################
class CheckOLSRobustCluster(CheckOLSRobust):
# compare with regress robust
@classmethod
def setup_class(cls):
# import pandas as pa
from statsmodels.datasets import grunfeld
dtapa = grunfeld.data.load_pandas()
# Stata example/data seems to miss last firm
dtapa_endog = dtapa.endog[:200]
dtapa_exog = dtapa.exog[:200]
exog = add_constant(dtapa_exog[["value", "capital"]], prepend=False)
# asserts do not work for pandas
cls.res1 = OLS(dtapa_endog, exog).fit()
firm_names, firm_id = np.unique(
np.asarray(dtapa_exog[["firm"]], "S20"), return_inverse=True
)
cls.groups = firm_id
# time indicator in range(max Ti)
time = np.require(dtapa_exog[["year"]], requirements="W")
time -= time.min()
cls.time = np.squeeze(time).astype(int)
# nw_panel function requires interval bounds
cls.tidx = [(i * 20, 20 * (i + 1)) for i in range(10)]
class TestOLSRobustCluster2(CheckOLSRobustCluster, CheckOLSRobustNewMixin):
# compare with `reg cluster`
def setup_method(self):
res_ols = self.res1.get_robustcov_results(
"cluster", groups=self.groups, use_correction=True, use_t=True
)
self.res3 = self.res1
self.res1 = res_ols
self.bse_robust = res_ols.bse
self.cov_robust = res_ols.cov_params()
cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=True)
se1 = sw.se_cov(cov1)
self.bse_robust2 = se1
self.cov_robust2 = cov1
self.small = True
self.res2 = res2.results_cluster
self.rtol = 1e-6
self.rtolh = 1e-10
class TestOLSRobustCluster2Input(
CheckOLSRobustCluster, CheckOLSRobustNewMixin
):
# compare with `reg cluster`
def setup_method(self):
import pandas as pd
fat_array = self.groups.reshape(-1, 1)
fat_groups = pd.DataFrame(fat_array)
res_ols = self.res1.get_robustcov_results(
"cluster", groups=fat_groups, use_correction=True, use_t=True
)
self.res3 = self.res1
self.res1 = res_ols
self.bse_robust = res_ols.bse
self.cov_robust = res_ols.cov_params()
cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=True)
se1 = sw.se_cov(cov1)
self.bse_robust2 = se1
self.cov_robust2 = cov1
self.small = True
self.res2 = res2.results_cluster
self.rtol = 1e-6
self.rtolh = 1e-10
def test_too_many_groups(self):
long_groups = self.groups.reshape(-1, 1)
groups3 = np.hstack((long_groups, long_groups, long_groups))
assert_raises(
ValueError,
self.res1.get_robustcov_results,
"cluster",
groups=groups3,
use_correction=True,
use_t=True,
)
def test_2way_dataframe(self):
import pandas as pd
long_groups = self.groups.reshape(-1, 1)
groups2 = pd.DataFrame(np.hstack((long_groups, long_groups)))
res = self.res1.get_robustcov_results(
"cluster", groups=groups2, use_correction=True, use_t=True
)
class TestOLSRobustCluster2Fit(CheckOLSRobustCluster, CheckOLSRobustNewMixin):
# copy, past uses fit method
# compare with `reg cluster`
def setup_method(self):
res_ols = self.res1.model.fit(
cov_type="cluster",
cov_kwds=dict(groups=self.groups, use_correction=True, use_t=True),
)
self.res3 = self.res1
self.res1 = res_ols
self.bse_robust = res_ols.bse
self.cov_robust = res_ols.cov_params()
cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=True)
se1 = sw.se_cov(cov1)
self.bse_robust2 = se1
self.cov_robust2 = cov1
self.small = True
self.res2 = res2.results_cluster
self.rtol = 1e-6
self.rtolh = 1e-10
def test_basic_inference(self):
res1 = self.res1
res2 = self.res2
rtol = 1e-7
assert_allclose(res1.params, res2.params, rtol=1e-8)
assert_allclose(res1.bse, res2.bse, rtol=rtol)
assert_allclose(res1.pvalues, res2.pvalues, rtol=rtol, atol=1e-20)
ci = res2.params_table[:, 4:6]
assert_allclose(res1.conf_int(), ci, rtol=5e-7, atol=1e-20)
class TestOLSRobustCluster2Large(
CheckOLSRobustCluster, CheckOLSRobustNewMixin
):
# compare with `reg cluster`
def setup_method(self):
res_ols = self.res1.get_robustcov_results(
"cluster",
groups=self.groups,
use_correction=False,
use_t=False,
df_correction=True,
)
self.res3 = self.res1
self.res1 = res_ols
self.bse_robust = res_ols.bse
self.cov_robust = res_ols.cov_params()
cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=False)
se1 = sw.se_cov(cov1)
self.bse_robust2 = se1
self.cov_robust2 = cov1
self.small = False
self.res2 = res2.results_cluster_large
self.skip_f = True
self.rtol = 1e-6
self.rtolh = 1e-10
@pytest.mark.skip(reason="GH#1189 issuecomment-29141741")
def test_f_value(self):
super().test_fvalue()
class TestOLSRobustCluster2LargeFit(
CheckOLSRobustCluster, CheckOLSRobustNewMixin
):
# compare with `reg cluster`
def setup_method(self):
model = OLS(self.res1.model.endog, self.res1.model.exog)
# res_ols = self.res1.model.fit(cov_type='cluster',
res_ols = model.fit(
cov_type="cluster",
cov_kwds=dict(
groups=self.groups,
use_correction=False,
use_t=False,
df_correction=True,
),
)
self.res3 = self.res1
self.res1 = res_ols
self.bse_robust = res_ols.bse
self.cov_robust = res_ols.cov_params()
cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=False)
se1 = sw.se_cov(cov1)
self.bse_robust2 = se1
self.cov_robust2 = cov1
self.small = False
self.res2 = res2.results_cluster_large
self.skip_f = True
self.rtol = 1e-6
self.rtolh = 1e-10
@pytest.mark.skip(reason="GH#1189 issuecomment-29141741")
def test_fvalue(self):
super().test_fvalue()
class TestOLSRobustClusterGS(CheckOLSRobustCluster, CheckOLSRobustNewMixin):
# compare with `reg cluster`
def setup_method(self):
res_ols = self.res1.get_robustcov_results(
"nw-groupsum",
time=self.time,
maxlags=4,
use_correction=False,
use_t=True,
)
self.res3 = self.res1
self.res1 = res_ols
self.bse_robust = res_ols.bse
self.cov_robust = res_ols.cov_params()
cov1 = sw.cov_nw_groupsum(
self.res1, 4, self.time, use_correction=False
)
se1 = sw.se_cov(cov1)
self.bse_robust2 = se1
self.cov_robust2 = cov1
self.small = True
self.res2 = res2.results_nw_groupsum4
self.skip_f = True
self.rtol = 1e-6
self.rtolh = 1e-10
class TestOLSRobustClusterGSFit(CheckOLSRobustCluster, CheckOLSRobustNewMixin):
# compare with `reg cluster`
def setup_method(self):
res_ols = self.res1.model.fit(
cov_type="nw-groupsum",
cov_kwds=dict(
time=self.time, maxlags=4, use_correction=False, use_t=True
),
)
self.res3 = self.res1
self.res1 = res_ols
self.bse_robust = res_ols.bse
self.cov_robust = res_ols.cov_params()
cov1 = sw.cov_nw_groupsum(
self.res1, 4, self.time, use_correction=False
)
se1 = sw.se_cov(cov1)
self.bse_robust2 = se1
self.cov_robust2 = cov1
self.small = True
self.res2 = res2.results_nw_groupsum4
self.skip_f = True
self.rtol = 1e-6
self.rtolh = 1e-10
class TestOLSRobustClusterNWP(CheckOLSRobustCluster, CheckOLSRobustNewMixin):
# compare with `reg cluster`
def setup_method(self):
res_ols = self.res1.get_robustcov_results(
"nw-panel",
time=self.time,
maxlags=4,
use_correction="hac",
use_t=True,
df_correction=False,
)
self.res3 = self.res1
self.res1 = res_ols
self.bse_robust = res_ols.bse
self.cov_robust = res_ols.cov_params()
cov1 = sw.cov_nw_panel(self.res1, 4, self.tidx)
se1 = sw.se_cov(cov1)
self.bse_robust2 = se1
self.cov_robust2 = cov1
self.small = True
self.res2 = res2.results_nw_panel4
self.skip_f = True
self.rtol = 1e-6
self.rtolh = 1e-10
def test_keyword(self):
# check corrected keyword
res_ols = self.res1.get_robustcov_results(
"hac-panel",
time=self.time,
maxlags=4,
use_correction="hac",
use_t=True,
df_correction=False,
)
assert_allclose(res_ols.bse, self.res1.bse, rtol=1e-12)
class TestOLSRobustClusterNWPGroupsFit(
CheckOLSRobustCluster, CheckOLSRobustNewMixin
):
# compare with `reg cluster`
def setup_method(self):
res_ols = self.res1.model.fit(
cov_type="nw-panel",
cov_kwds=dict(
groups=self.groups,
maxlags=4,
use_correction="hac",
use_t=True,
df_correction=False,
),
)
self.res3 = self.res1
self.res1 = res_ols
self.bse_robust = res_ols.bse
self.cov_robust = res_ols.cov_params()
cov1 = sw.cov_nw_panel(self.res1, 4, self.tidx)
se1 = sw.se_cov(cov1)
self.bse_robust2 = se1
self.cov_robust2 = cov1
self.small = True
self.res2 = res2.results_nw_panel4
self.skip_f = True
self.rtol = 1e-6
self.rtolh = 1e-10
# TODO: low precision/agreement
class TestOLSRobustCluster2G(CheckOLSRobustCluster, CheckOLSRobustNewMixin):
# compare with `reg cluster`
def setup_method(self):
res_ols = self.res1.get_robustcov_results(
"cluster",
groups=(self.groups, self.time),
use_correction=True,
use_t=True,
)
self.res3 = self.res1
self.res1 = res_ols
self.bse_robust = res_ols.bse
self.cov_robust = res_ols.cov_params()
cov1 = sw.cov_cluster_2groups(
self.res1, self.groups, group2=self.time, use_correction=True
)[0]
se1 = sw.se_cov(cov1)
self.bse_robust2 = se1
self.cov_robust2 = cov1
self.small = True
self.res2 = res2.results_cluster_2groups_small
self.rtol = (
0.35 # only f_pvalue and confint for constant differ >rtol=0.05
)
self.rtolh = 1e-10
class TestOLSRobustCluster2GLarge(
CheckOLSRobustCluster, CheckOLSRobustNewMixin
):
# compare with `reg cluster`
def setup_method(self):
res_ols = self.res1.get_robustcov_results(
"cluster",
groups=(self.groups, self.time),
use_correction=False, # True,
use_t=False,
)
self.res3 = self.res1
self.res1 = res_ols
self.bse_robust = res_ols.bse
self.cov_robust = res_ols.cov_params()
cov1 = sw.cov_cluster_2groups(
self.res1, self.groups, group2=self.time, use_correction=False
)[0]
se1 = sw.se_cov(cov1)
self.bse_robust2 = se1
self.cov_robust2 = cov1
self.small = False
self.res2 = res2.results_cluster_2groups_large
self.skip_f = True
self.rtol = 1e-7
self.rtolh = 1e-10
######################################
# WLS
######################################
class CheckWLSRobustCluster(CheckOLSRobust):
# compare with regress robust
@classmethod
def setup_class(cls):
# import pandas as pa
from statsmodels.datasets import grunfeld
dtapa = grunfeld.data.load_pandas()
# Stata example/data seems to miss last firm
dtapa_endog = dtapa.endog[:200]
dtapa_exog = dtapa.exog[:200]
exog = add_constant(dtapa_exog[["value", "capital"]], prepend=False)
# asserts do not work for pandas
cls.res1 = WLS(
dtapa_endog, exog, weights=1 / dtapa_exog["value"]
).fit()
firm_names, firm_id = np.unique(
np.asarray(dtapa_exog[["firm"]], "S20"), return_inverse=True
)
cls.groups = firm_id
# time indicator in range(max Ti)
time = np.require(dtapa_exog[["year"]], requirements="W")
time -= time.min()
cls.time = np.squeeze(time).astype(int)
# nw_panel function requires interval bounds
cls.tidx = [(i * 20, 20 * (i + 1)) for i in range(10)]
# not available yet for WLS
class TestWLSRobustCluster2(CheckWLSRobustCluster, CheckOLSRobustNewMixin):
# compare with `reg cluster`
def setup_method(self):
res_ols = self.res1.get_robustcov_results(
"cluster", groups=self.groups, use_correction=True, use_t=True
)
self.res3 = self.res1
self.res1 = res_ols
self.bse_robust = res_ols.bse
self.cov_robust = res_ols.cov_params()
cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=True)
se1 = sw.se_cov(cov1)
self.bse_robust2 = se1
self.cov_robust2 = cov1
self.small = True
self.res2 = res2.results_cluster_wls_small
self.rtol = 1e-6
self.rtolh = 1e-10
# not available yet for WLS
class TestWLSRobustCluster2Large(
CheckWLSRobustCluster, CheckOLSRobustNewMixin
):
# compare with `reg cluster`
def setup_method(self):
res_ols = self.res1.get_robustcov_results(
"cluster",
groups=self.groups,
use_correction=False,
use_t=False,
df_correction=True,
)
self.res3 = self.res1
self.res1 = res_ols
self.bse_robust = res_ols.bse
self.cov_robust = res_ols.cov_params()
cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=False)
se1 = sw.se_cov(cov1)
self.bse_robust2 = se1
self.cov_robust2 = cov1
self.small = False
self.res2 = res2.results_cluster_wls_large
self.skip_f = True
self.rtol = 1e-6
self.rtolh = 1e-10
class TestWLSRobustSmall(CheckWLSRobustCluster, CheckOLSRobustNewMixin):
# compare with `reg cluster`
def setup_method(self):
res_ols = self.res1.get_robustcov_results("HC1", use_t=True)
self.res3 = self.res1
self.res1 = res_ols
self.bse_robust = res_ols.bse
self.cov_robust = res_ols.cov_params()
# TODO: check standalone function
# cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=False)
cov1 = res_ols.cov_HC1
se1 = sw.se_cov(cov1)
self.bse_robust2 = se1
self.cov_robust2 = cov1
self.small = True
self.res2 = res2.results_hc1_wls_small
self.skip_f = True
self.rtol = 1e-6
self.rtolh = 1e-10
class TestWLSOLSRobustSmall:
@classmethod
def setup_class(cls):
# import pandas as pa
from statsmodels.datasets import grunfeld
dtapa = grunfeld.data.load_pandas()
# Stata example/data seems to miss last firm
dtapa_endog = dtapa.endog[:200]
dtapa_exog = dtapa.exog[:200]
exog = add_constant(dtapa_exog[["value", "capital"]], prepend=False)
# asserts do not work for pandas
cls.res_wls = WLS(
dtapa_endog, exog, weights=1 / dtapa_exog["value"]
).fit()
w_sqrt = 1 / np.sqrt(np.asarray(dtapa_exog["value"]))
cls.res_ols = OLS(
dtapa_endog * w_sqrt, np.asarray(exog) * w_sqrt[:, None]
).fit()
ids = np.asarray(dtapa_exog[["firm"]], "S20")
firm_names, firm_id = np.unique(ids, return_inverse=True)
cls.groups = firm_id
# time indicator in range(max Ti)
time = np.require(dtapa_exog[["year"]], requirements="W")
time -= time.min()
cls.time = np.squeeze(time).astype(int)
# nw_panel function requires interval bounds
cls.tidx = [(i * 20, 20 * (i + 1)) for i in range(10)]
def test_all(self):
all_cov = [
("HC0", dict(use_t=True)),
("HC1", dict(use_t=True)),
("HC2", dict(use_t=True)),
("HC3", dict(use_t=True)),
]
for cov_type, kwds in all_cov:
res1 = self.res_ols.get_robustcov_results(cov_type, **kwds)
res2 = self.res_wls.get_robustcov_results(cov_type, **kwds)
assert_allclose(res1.params, res2.params, rtol=1e-13)
assert_allclose(res1.cov_params(), res2.cov_params(), rtol=1e-13)
assert_allclose(res1.bse, res2.bse, rtol=1e-13)
assert_allclose(res1.pvalues, res2.pvalues, rtol=1e-13)
mat = np.eye(len(res1.params))
ft1 = res1.f_test(mat)
ft2 = res2.f_test(mat)
assert_allclose(ft1.fvalue, ft2.fvalue, rtol=1e-12)
assert_allclose(ft1.pvalue, ft2.pvalue, rtol=5e-11)
def test_fixed_scale(self):
cov_type = "fixed_scale"
kwds = {}
res1 = self.res_ols.get_robustcov_results(cov_type, **kwds)
res2 = self.res_wls.get_robustcov_results(cov_type, **kwds)
assert_allclose(res1.params, res2.params, rtol=1e-13)
assert_allclose(res1.cov_params(), res2.cov_params(), rtol=1e-13)
assert_allclose(res1.bse, res2.bse, rtol=1e-13)
assert_allclose(res1.pvalues, res2.pvalues, rtol=1e-12)
tt = res2.t_test(
np.eye(len(res2.params)), cov_p=res2.normalized_cov_params
)
assert_allclose(
res2.cov_params(), res2.normalized_cov_params, rtol=1e-13
)
assert_allclose(res2.bse, tt.sd, rtol=1e-13)
assert_allclose(res2.pvalues, tt.pvalue, rtol=1e-13)
assert_allclose(res2.tvalues, tt.tvalue, rtol=1e-13)
# using cov_type in fit
mod = self.res_wls.model
mod3 = WLS(mod.endog, mod.exog, weights=mod.weights)
res3 = mod3.fit(cov_type=cov_type, cov_kwds=kwds)
tt = res3.t_test(
np.eye(len(res3.params)), cov_p=res3.normalized_cov_params
)
assert_allclose(
res3.cov_params(), res3.normalized_cov_params, rtol=1e-13
)
assert_allclose(res3.bse, tt.sd, rtol=1e-13)
assert_allclose(res3.pvalues, tt.pvalue, rtol=1e-13)
assert_allclose(res3.tvalues, tt.tvalue, rtol=1e-13)
def test_cov_type_fixed_scale():
# this is a unit test from scipy curvefit for `absolute_sigma` keyword
xdata = np.array([0, 1, 2, 3, 4, 5])
ydata = np.array([1, 1, 5, 7, 8, 12])
sigma = np.array([1, 2, 1, 2, 1, 2])
xdata = np.column_stack((xdata, np.ones(len(xdata))))
weights = 1.0 / sigma**2
res = WLS(ydata, xdata, weights=weights).fit()
assert_allclose(res.bse, [0.20659803, 0.57204404], rtol=1e-3)
res = WLS(ydata, xdata, weights=weights).fit()
assert_allclose(res.bse, [0.20659803, 0.57204404], rtol=1e-3)
res = WLS(ydata, xdata, weights=weights).fit(cov_type="fixed scale")
assert_allclose(res.bse, [0.30714756, 0.85045308], rtol=1e-3)
res = WLS(ydata, xdata, weights=weights / 9.0).fit(cov_type="fixed scale")
assert_allclose(res.bse, [3 * 0.30714756, 3 * 0.85045308], rtol=1e-3)
res = WLS(ydata, xdata, weights=weights).fit(
cov_type="fixed scale", cov_kwds={"scale": 9}
)
assert_allclose(res.bse, [3 * 0.30714756, 3 * 0.85045308], rtol=1e-3)
@pytest.mark.parametrize(
"cov_info",
[
("nonrobust", {}),
("HC0", {}),
("HC1", {}),
("HC2", {}),
("HC3", {}),
("HAC", {"maxlags": 7}),
("cluster", {"groups": (np.arange(500) % 27)}),
],
)
def test_qr_equiv(cov_info):
cov_type, cov_kwds = cov_info
rs = np.random.RandomState(123498)
x = rs.standard_normal((500, 3))
b = np.ones(3)
y = x @ b + rs.standard_normal(500)
mod = OLS(y, x)
pinv_fit = mod.fit(cov_type=cov_type, cov_kwds=cov_kwds)
qr_fit = mod.fit(cov_type=cov_type, cov_kwds=cov_kwds, method="qr")
assert_allclose(pinv_fit.bse, qr_fit.bse)