996 lines
32 KiB
Python
996 lines
32 KiB
Python
"""Testing OLS robust covariance matrices against STATA
|
|
|
|
Created on Mon Oct 28 15:25:14 2013
|
|
|
|
Author: Josef Perktold
|
|
"""
|
|
|
|
import numpy as np
|
|
from numpy.testing import (
|
|
assert_allclose,
|
|
assert_equal,
|
|
assert_raises,
|
|
assert_warns,
|
|
)
|
|
import pytest
|
|
from scipy import stats
|
|
|
|
from statsmodels.datasets import macrodata
|
|
from statsmodels.regression.linear_model import OLS, WLS
|
|
import statsmodels.stats.sandwich_covariance as sw
|
|
from statsmodels.tools.sm_exceptions import InvalidTestWarning
|
|
from statsmodels.tools.tools import add_constant
|
|
|
|
from .results import (
|
|
results_grunfeld_ols_robust_cluster as res2,
|
|
results_macro_ols_robust as res,
|
|
)
|
|
|
|
# TODO: implement test_hac_simple
|
|
|
|
|
|
class CheckOLSRobust:
|
|
def test_basic(self):
|
|
res1 = self.res1
|
|
res2 = self.res2
|
|
rtol = getattr(self, "rtol", 1e-10)
|
|
assert_allclose(res1.params, res2.params, rtol=rtol)
|
|
assert_allclose(self.bse_robust, res2.bse, rtol=rtol)
|
|
assert_allclose(self.cov_robust, res2.cov, rtol=rtol)
|
|
|
|
@pytest.mark.smoke
|
|
def test_t_test_summary(self):
|
|
res1 = self.res1
|
|
mat = np.eye(len(res1.params))
|
|
# TODO: if the t_test call is expensive, possibly make it a fixture?
|
|
tt = res1.t_test(mat, cov_p=self.cov_robust)
|
|
|
|
tt.summary()
|
|
|
|
@pytest.mark.smoke
|
|
def test_t_test_summary_frame(self):
|
|
res1 = self.res1
|
|
mat = np.eye(len(res1.params))
|
|
tt = res1.t_test(mat, cov_p=self.cov_robust)
|
|
|
|
tt.summary_frame()
|
|
|
|
@pytest.mark.smoke
|
|
def test_f_test_summary(self):
|
|
res1 = self.res1
|
|
mat = np.eye(len(res1.params))
|
|
ft = res1.f_test(mat[:-1], cov_p=self.cov_robust)
|
|
|
|
ft.summary()
|
|
|
|
def test_tests(self): # TODO: break into well-scoped tests
|
|
# Note: differences between small (t-distribution, ddof) and large (normal)
|
|
# F statistic has no ddof correction in large, but uses F distribution (?)
|
|
res1 = self.res1
|
|
res2 = self.res2
|
|
rtol = getattr(self, "rtol", 1e-10)
|
|
rtolh = getattr(self, "rtolh", 1e-12)
|
|
mat = np.eye(len(res1.params))
|
|
tt = res1.t_test(mat, cov_p=self.cov_robust)
|
|
# has 'effect', 'pvalue', 'sd', 'tvalue'
|
|
# TODO confint missing
|
|
assert_allclose(tt.effect, res2.params, rtol=rtol)
|
|
assert_allclose(tt.sd, res2.bse, rtol=rtol)
|
|
assert_allclose(tt.tvalue, res2.tvalues, rtol=rtol)
|
|
if self.small:
|
|
assert_allclose(tt.pvalue, res2.pvalues, rtol=5 * rtol)
|
|
else:
|
|
pval = stats.norm.sf(np.abs(tt.tvalue)) * 2
|
|
assert_allclose(pval, res2.pvalues, rtol=5 * rtol, atol=1e-25)
|
|
|
|
ft = res1.f_test(mat[:-1], cov_p=self.cov_robust)
|
|
if self.small:
|
|
#'df_denom', 'df_num', 'fvalue', 'pvalue'
|
|
assert_allclose(ft.fvalue, res2.F, rtol=rtol)
|
|
# f-pvalue is not directly available in Stata results, but is in ivreg2
|
|
if hasattr(res2, "Fp"):
|
|
assert_allclose(ft.pvalue, res2.Fp, rtol=rtol)
|
|
else:
|
|
if not getattr(self, "skip_f", False):
|
|
dof_corr = res1.df_resid * 1.0 / res1.nobs
|
|
assert_allclose(ft.fvalue * dof_corr, res2.F, rtol=rtol)
|
|
|
|
if hasattr(res2, "df_r"):
|
|
assert_equal(ft.df_num, res2.df_m)
|
|
assert_equal(ft.df_denom, res2.df_r)
|
|
else:
|
|
# ivreg2
|
|
assert_equal(ft.df_num, res2.Fdf1)
|
|
assert_equal(ft.df_denom, res2.Fdf2)
|
|
|
|
|
|
class TestOLSRobust1(CheckOLSRobust):
|
|
# compare with regress robust
|
|
|
|
def setup_method(self):
|
|
res_ols = self.res1
|
|
self.bse_robust = res_ols.HC1_se
|
|
self.cov_robust = res_ols.cov_HC1
|
|
self.small = True
|
|
self.res2 = res.results_hc0
|
|
|
|
@classmethod
|
|
def setup_class(cls):
|
|
d2 = macrodata.load_pandas().data
|
|
g_gdp = 400 * np.diff(np.log(d2["realgdp"].values))
|
|
g_inv = 400 * np.diff(np.log(d2["realinv"].values))
|
|
exogg = add_constant(
|
|
np.c_[g_gdp, d2["realint"][:-1].values], prepend=False
|
|
)
|
|
|
|
cls.res1 = OLS(g_inv, exogg).fit()
|
|
|
|
def test_qr_equiv(self):
|
|
# GH8157
|
|
res2 = self.res1.model.fit(method="qr")
|
|
assert_allclose(self.res1.HC0_se, res2.HC0_se)
|
|
|
|
|
|
class TestOLSRobust2(TestOLSRobust1):
|
|
# compare with ivreg robust small
|
|
|
|
def setup_method(self):
|
|
res_ols = self.res1
|
|
self.bse_robust = res_ols.HC1_se
|
|
self.cov_robust = res_ols.cov_HC1
|
|
self.small = True
|
|
|
|
self.res2 = res.results_ivhc0_small
|
|
|
|
|
|
class TestOLSRobust3(TestOLSRobust1):
|
|
# compare with ivreg robust (not small)
|
|
|
|
def setup_method(self):
|
|
res_ols = self.res1
|
|
self.bse_robust = res_ols.HC0_se
|
|
self.cov_robust = res_ols.cov_HC0
|
|
self.small = False
|
|
|
|
self.res2 = res.results_ivhc0_large
|
|
|
|
|
|
class TestOLSRobustHacSmall(TestOLSRobust1):
|
|
# compare with ivreg robust small
|
|
|
|
def setup_method(self):
|
|
res_ols = self.res1
|
|
cov1 = sw.cov_hac_simple(res_ols, nlags=4, use_correction=True)
|
|
se1 = sw.se_cov(cov1)
|
|
self.bse_robust = se1
|
|
self.cov_robust = cov1
|
|
self.small = True
|
|
|
|
self.res2 = res.results_ivhac4_small
|
|
|
|
|
|
class TestOLSRobustHacLarge(TestOLSRobust1):
|
|
# compare with ivreg robust (not small)
|
|
|
|
def setup_method(self):
|
|
res_ols = self.res1
|
|
cov1 = sw.cov_hac_simple(res_ols, nlags=4, use_correction=False)
|
|
se1 = sw.se_cov(cov1)
|
|
self.bse_robust = se1
|
|
self.cov_robust = cov1
|
|
self.small = False
|
|
|
|
self.res2 = res.results_ivhac4_large
|
|
|
|
|
|
class CheckOLSRobustNewMixin:
|
|
# This uses the robust covariance as default covariance
|
|
|
|
def test_compare(self):
|
|
rtol = getattr(self, "rtol", 1e-10)
|
|
assert_allclose(self.cov_robust, self.cov_robust2, rtol=rtol)
|
|
assert_allclose(self.bse_robust, self.bse_robust2, rtol=rtol)
|
|
|
|
def test_fvalue(self):
|
|
if not getattr(self, "skip_f", False):
|
|
rtol = getattr(self, "rtol", 1e-10)
|
|
assert_allclose(self.res1.fvalue, self.res2.F, rtol=rtol)
|
|
if hasattr(self.res2, "Fp"):
|
|
# only available with ivreg2
|
|
assert_allclose(self.res1.f_pvalue, self.res2.Fp, rtol=rtol)
|
|
else:
|
|
raise pytest.skip("TODO: document why this test is skipped")
|
|
|
|
def test_confint(self):
|
|
rtol = getattr(self, "rtol", 1e-10)
|
|
ci1 = self.res1.conf_int()
|
|
ci2 = self.res2.params_table[:, 4:6]
|
|
assert_allclose(ci1, ci2, rtol=rtol)
|
|
|
|
# check critical value
|
|
crit1 = np.diff(ci1, 1).ravel() / 2 / self.res1.bse
|
|
crit2 = np.diff(ci1, 1).ravel() / 2 / self.res1.bse
|
|
assert_allclose(crit1, crit2, rtol=12)
|
|
|
|
def test_ttest(self):
|
|
res1 = self.res1
|
|
res2 = self.res2
|
|
rtol = getattr(self, "rtol", 1e-10)
|
|
rtolh = getattr(self, "rtol", 1e-12)
|
|
|
|
mat = np.eye(len(res1.params))
|
|
tt = res1.t_test(mat, cov_p=self.cov_robust)
|
|
# has 'effect', 'pvalue', 'sd', 'tvalue'
|
|
# TODO confint missing
|
|
assert_allclose(tt.effect, res2.params, rtol=rtolh)
|
|
assert_allclose(tt.sd, res2.bse, rtol=rtol)
|
|
assert_allclose(tt.tvalue, res2.tvalues, rtol=rtolh)
|
|
assert_allclose(tt.pvalue, res2.pvalues, rtol=5 * rtol)
|
|
ci1 = tt.conf_int()
|
|
ci2 = self.res2.params_table[:, 4:6]
|
|
assert_allclose(ci1, ci2, rtol=rtol)
|
|
|
|
def test_scale(self):
|
|
res1 = self.res1
|
|
res2 = self.res2
|
|
rtol = 1e-5
|
|
# Note we always use df_resid for scale
|
|
# Stata uses nobs or df_resid for rmse, not always available in Stata
|
|
# assert_allclose(res1.scale, res2.rmse**2 * res2.N / (res2.N - res2.df_m - 1), rtol=rtol)
|
|
skip = False
|
|
if hasattr(res2, "rss"):
|
|
scale = res2.rss / (res2.N - res2.df_m - 1)
|
|
elif hasattr(res2, "rmse"):
|
|
scale = res2.rmse**2
|
|
else:
|
|
skip = True
|
|
|
|
if isinstance(res1.model, WLS):
|
|
skip = True
|
|
# Stata uses different scaling and using unweighted resid for rmse
|
|
|
|
if not skip:
|
|
assert_allclose(res1.scale, scale, rtol=rtol)
|
|
|
|
if not res2.vcetype == "Newey-West":
|
|
# no rsquared in Stata
|
|
r2 = res2.r2 if hasattr(res2, "r2") else res2.r2c
|
|
assert_allclose(res1.rsquared, r2, rtol=rtol, err_msg=str(skip))
|
|
|
|
# consistency checks, not against Stata
|
|
df_resid = res1.nobs - res1.df_model - 1
|
|
assert_equal(res1.df_resid, df_resid)
|
|
# variance of resid_pearson is 1, with ddof, and loc=0
|
|
psum = (res1.resid_pearson**2).sum()
|
|
assert_allclose(psum, df_resid, rtol=1e-13)
|
|
|
|
@pytest.mark.smoke
|
|
def test_summary(self):
|
|
self.res1.summary()
|
|
|
|
|
|
class TestOLSRobust2SmallNew(TestOLSRobust1, CheckOLSRobustNewMixin):
|
|
# compare with ivreg robust small
|
|
|
|
def setup_method(self):
|
|
res_ols = self.res1.get_robustcov_results("HC1", use_t=True)
|
|
self.res3 = self.res1
|
|
self.res1 = res_ols
|
|
self.bse_robust = res_ols.bse
|
|
self.cov_robust = res_ols.cov_params()
|
|
self.bse_robust2 = res_ols.HC1_se
|
|
self.cov_robust2 = res_ols.cov_HC1
|
|
self.small = True
|
|
self.res2 = res.results_ivhc0_small
|
|
|
|
def test_compare(self):
|
|
# check that we get a warning using the nested compare methods
|
|
res1 = self.res1
|
|
endog = res1.model.endog
|
|
exog = res1.model.exog[:, [0, 2]] # drop one variable
|
|
res_ols2 = OLS(endog, exog).fit()
|
|
# results from Stata
|
|
r_pval = 0.0307306938402991
|
|
r_chi2 = 4.667944083588736
|
|
r_df = 1
|
|
assert_warns(InvalidTestWarning, res1.compare_lr_test, res_ols2)
|
|
import warnings
|
|
|
|
with warnings.catch_warnings():
|
|
warnings.simplefilter("ignore")
|
|
chi2, pval, df = res1.compare_lr_test(res_ols2)
|
|
assert_allclose(chi2, r_chi2, rtol=1e-11)
|
|
assert_allclose(pval, r_pval, rtol=1e-11)
|
|
assert_equal(df, r_df)
|
|
|
|
assert_warns(InvalidTestWarning, res1.compare_f_test, res_ols2)
|
|
# fva, pval, df = res1.compare_f_test(res_ols2)
|
|
|
|
|
|
class TestOLSRobustHACSmallNew(TestOLSRobust1, CheckOLSRobustNewMixin):
|
|
# compare with ivreg robust small
|
|
|
|
def setup_method(self):
|
|
res_ols = self.res1.get_robustcov_results(
|
|
"HAC", maxlags=4, use_correction=True, use_t=True
|
|
)
|
|
self.res3 = self.res1
|
|
self.res1 = res_ols
|
|
self.bse_robust = res_ols.bse
|
|
self.cov_robust = res_ols.cov_params()
|
|
cov1 = sw.cov_hac_simple(res_ols, nlags=4, use_correction=True)
|
|
se1 = sw.se_cov(cov1)
|
|
self.bse_robust2 = se1
|
|
self.cov_robust2 = cov1
|
|
self.small = True
|
|
self.res2 = res.results_ivhac4_small
|
|
|
|
|
|
class TestOLSRobust2LargeNew(TestOLSRobust1, CheckOLSRobustNewMixin):
|
|
# compare with ivreg robust small
|
|
|
|
def setup_method(self):
|
|
res_ols = self.res1.get_robustcov_results("HC0")
|
|
res_ols.use_t = False
|
|
self.res3 = self.res1
|
|
self.res1 = res_ols
|
|
self.bse_robust = res_ols.bse
|
|
self.cov_robust = res_ols.cov_params()
|
|
self.bse_robust2 = res_ols.HC0_se
|
|
self.cov_robust2 = res_ols.cov_HC0
|
|
self.small = False
|
|
self.res2 = res.results_ivhc0_large
|
|
|
|
@pytest.mark.skip(reason="not refactored yet for `large`")
|
|
def test_fvalue(self):
|
|
super().test_fvalue()
|
|
|
|
@pytest.mark.skip(reason="not refactored yet for `large`")
|
|
def test_confint(self):
|
|
super().test_confint()
|
|
|
|
|
|
#######################################################
|
|
# cluster robust standard errors
|
|
#######################################################
|
|
|
|
|
|
class CheckOLSRobustCluster(CheckOLSRobust):
|
|
# compare with regress robust
|
|
|
|
@classmethod
|
|
def setup_class(cls):
|
|
# import pandas as pa
|
|
from statsmodels.datasets import grunfeld
|
|
|
|
dtapa = grunfeld.data.load_pandas()
|
|
# Stata example/data seems to miss last firm
|
|
dtapa_endog = dtapa.endog[:200]
|
|
dtapa_exog = dtapa.exog[:200]
|
|
exog = add_constant(dtapa_exog[["value", "capital"]], prepend=False)
|
|
# asserts do not work for pandas
|
|
cls.res1 = OLS(dtapa_endog, exog).fit()
|
|
|
|
firm_names, firm_id = np.unique(
|
|
np.asarray(dtapa_exog[["firm"]], "S20"), return_inverse=True
|
|
)
|
|
cls.groups = firm_id
|
|
# time indicator in range(max Ti)
|
|
time = np.require(dtapa_exog[["year"]], requirements="W")
|
|
time -= time.min()
|
|
cls.time = np.squeeze(time).astype(int)
|
|
# nw_panel function requires interval bounds
|
|
cls.tidx = [(i * 20, 20 * (i + 1)) for i in range(10)]
|
|
|
|
|
|
class TestOLSRobustCluster2(CheckOLSRobustCluster, CheckOLSRobustNewMixin):
|
|
# compare with `reg cluster`
|
|
|
|
def setup_method(self):
|
|
res_ols = self.res1.get_robustcov_results(
|
|
"cluster", groups=self.groups, use_correction=True, use_t=True
|
|
)
|
|
self.res3 = self.res1
|
|
self.res1 = res_ols
|
|
self.bse_robust = res_ols.bse
|
|
self.cov_robust = res_ols.cov_params()
|
|
cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=True)
|
|
se1 = sw.se_cov(cov1)
|
|
self.bse_robust2 = se1
|
|
self.cov_robust2 = cov1
|
|
self.small = True
|
|
self.res2 = res2.results_cluster
|
|
|
|
self.rtol = 1e-6
|
|
self.rtolh = 1e-10
|
|
|
|
|
|
class TestOLSRobustCluster2Input(
|
|
CheckOLSRobustCluster, CheckOLSRobustNewMixin
|
|
):
|
|
# compare with `reg cluster`
|
|
|
|
def setup_method(self):
|
|
import pandas as pd
|
|
|
|
fat_array = self.groups.reshape(-1, 1)
|
|
fat_groups = pd.DataFrame(fat_array)
|
|
|
|
res_ols = self.res1.get_robustcov_results(
|
|
"cluster", groups=fat_groups, use_correction=True, use_t=True
|
|
)
|
|
self.res3 = self.res1
|
|
self.res1 = res_ols
|
|
self.bse_robust = res_ols.bse
|
|
self.cov_robust = res_ols.cov_params()
|
|
cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=True)
|
|
se1 = sw.se_cov(cov1)
|
|
self.bse_robust2 = se1
|
|
self.cov_robust2 = cov1
|
|
self.small = True
|
|
self.res2 = res2.results_cluster
|
|
|
|
self.rtol = 1e-6
|
|
self.rtolh = 1e-10
|
|
|
|
def test_too_many_groups(self):
|
|
long_groups = self.groups.reshape(-1, 1)
|
|
groups3 = np.hstack((long_groups, long_groups, long_groups))
|
|
assert_raises(
|
|
ValueError,
|
|
self.res1.get_robustcov_results,
|
|
"cluster",
|
|
groups=groups3,
|
|
use_correction=True,
|
|
use_t=True,
|
|
)
|
|
|
|
def test_2way_dataframe(self):
|
|
import pandas as pd
|
|
|
|
long_groups = self.groups.reshape(-1, 1)
|
|
groups2 = pd.DataFrame(np.hstack((long_groups, long_groups)))
|
|
res = self.res1.get_robustcov_results(
|
|
"cluster", groups=groups2, use_correction=True, use_t=True
|
|
)
|
|
|
|
|
|
class TestOLSRobustCluster2Fit(CheckOLSRobustCluster, CheckOLSRobustNewMixin):
|
|
# copy, past uses fit method
|
|
# compare with `reg cluster`
|
|
|
|
def setup_method(self):
|
|
res_ols = self.res1.model.fit(
|
|
cov_type="cluster",
|
|
cov_kwds=dict(groups=self.groups, use_correction=True, use_t=True),
|
|
)
|
|
self.res3 = self.res1
|
|
self.res1 = res_ols
|
|
self.bse_robust = res_ols.bse
|
|
self.cov_robust = res_ols.cov_params()
|
|
cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=True)
|
|
se1 = sw.se_cov(cov1)
|
|
self.bse_robust2 = se1
|
|
self.cov_robust2 = cov1
|
|
self.small = True
|
|
self.res2 = res2.results_cluster
|
|
|
|
self.rtol = 1e-6
|
|
self.rtolh = 1e-10
|
|
|
|
def test_basic_inference(self):
|
|
res1 = self.res1
|
|
res2 = self.res2
|
|
rtol = 1e-7
|
|
assert_allclose(res1.params, res2.params, rtol=1e-8)
|
|
assert_allclose(res1.bse, res2.bse, rtol=rtol)
|
|
assert_allclose(res1.pvalues, res2.pvalues, rtol=rtol, atol=1e-20)
|
|
ci = res2.params_table[:, 4:6]
|
|
assert_allclose(res1.conf_int(), ci, rtol=5e-7, atol=1e-20)
|
|
|
|
|
|
class TestOLSRobustCluster2Large(
|
|
CheckOLSRobustCluster, CheckOLSRobustNewMixin
|
|
):
|
|
# compare with `reg cluster`
|
|
|
|
def setup_method(self):
|
|
res_ols = self.res1.get_robustcov_results(
|
|
"cluster",
|
|
groups=self.groups,
|
|
use_correction=False,
|
|
use_t=False,
|
|
df_correction=True,
|
|
)
|
|
self.res3 = self.res1
|
|
self.res1 = res_ols
|
|
self.bse_robust = res_ols.bse
|
|
self.cov_robust = res_ols.cov_params()
|
|
cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=False)
|
|
se1 = sw.se_cov(cov1)
|
|
self.bse_robust2 = se1
|
|
self.cov_robust2 = cov1
|
|
self.small = False
|
|
self.res2 = res2.results_cluster_large
|
|
|
|
self.skip_f = True
|
|
self.rtol = 1e-6
|
|
self.rtolh = 1e-10
|
|
|
|
@pytest.mark.skip(reason="GH#1189 issuecomment-29141741")
|
|
def test_f_value(self):
|
|
super().test_fvalue()
|
|
|
|
|
|
class TestOLSRobustCluster2LargeFit(
|
|
CheckOLSRobustCluster, CheckOLSRobustNewMixin
|
|
):
|
|
# compare with `reg cluster`
|
|
|
|
def setup_method(self):
|
|
model = OLS(self.res1.model.endog, self.res1.model.exog)
|
|
# res_ols = self.res1.model.fit(cov_type='cluster',
|
|
res_ols = model.fit(
|
|
cov_type="cluster",
|
|
cov_kwds=dict(
|
|
groups=self.groups,
|
|
use_correction=False,
|
|
use_t=False,
|
|
df_correction=True,
|
|
),
|
|
)
|
|
self.res3 = self.res1
|
|
self.res1 = res_ols
|
|
self.bse_robust = res_ols.bse
|
|
self.cov_robust = res_ols.cov_params()
|
|
cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=False)
|
|
se1 = sw.se_cov(cov1)
|
|
self.bse_robust2 = se1
|
|
self.cov_robust2 = cov1
|
|
self.small = False
|
|
self.res2 = res2.results_cluster_large
|
|
|
|
self.skip_f = True
|
|
self.rtol = 1e-6
|
|
self.rtolh = 1e-10
|
|
|
|
@pytest.mark.skip(reason="GH#1189 issuecomment-29141741")
|
|
def test_fvalue(self):
|
|
super().test_fvalue()
|
|
|
|
|
|
class TestOLSRobustClusterGS(CheckOLSRobustCluster, CheckOLSRobustNewMixin):
|
|
# compare with `reg cluster`
|
|
|
|
def setup_method(self):
|
|
res_ols = self.res1.get_robustcov_results(
|
|
"nw-groupsum",
|
|
time=self.time,
|
|
maxlags=4,
|
|
use_correction=False,
|
|
use_t=True,
|
|
)
|
|
self.res3 = self.res1
|
|
self.res1 = res_ols
|
|
self.bse_robust = res_ols.bse
|
|
self.cov_robust = res_ols.cov_params()
|
|
cov1 = sw.cov_nw_groupsum(
|
|
self.res1, 4, self.time, use_correction=False
|
|
)
|
|
se1 = sw.se_cov(cov1)
|
|
self.bse_robust2 = se1
|
|
self.cov_robust2 = cov1
|
|
self.small = True
|
|
self.res2 = res2.results_nw_groupsum4
|
|
|
|
self.skip_f = True
|
|
self.rtol = 1e-6
|
|
self.rtolh = 1e-10
|
|
|
|
|
|
class TestOLSRobustClusterGSFit(CheckOLSRobustCluster, CheckOLSRobustNewMixin):
|
|
# compare with `reg cluster`
|
|
|
|
def setup_method(self):
|
|
res_ols = self.res1.model.fit(
|
|
cov_type="nw-groupsum",
|
|
cov_kwds=dict(
|
|
time=self.time, maxlags=4, use_correction=False, use_t=True
|
|
),
|
|
)
|
|
self.res3 = self.res1
|
|
self.res1 = res_ols
|
|
self.bse_robust = res_ols.bse
|
|
self.cov_robust = res_ols.cov_params()
|
|
cov1 = sw.cov_nw_groupsum(
|
|
self.res1, 4, self.time, use_correction=False
|
|
)
|
|
se1 = sw.se_cov(cov1)
|
|
self.bse_robust2 = se1
|
|
self.cov_robust2 = cov1
|
|
self.small = True
|
|
self.res2 = res2.results_nw_groupsum4
|
|
|
|
self.skip_f = True
|
|
self.rtol = 1e-6
|
|
self.rtolh = 1e-10
|
|
|
|
|
|
class TestOLSRobustClusterNWP(CheckOLSRobustCluster, CheckOLSRobustNewMixin):
|
|
# compare with `reg cluster`
|
|
|
|
def setup_method(self):
|
|
res_ols = self.res1.get_robustcov_results(
|
|
"nw-panel",
|
|
time=self.time,
|
|
maxlags=4,
|
|
use_correction="hac",
|
|
use_t=True,
|
|
df_correction=False,
|
|
)
|
|
self.res3 = self.res1
|
|
self.res1 = res_ols
|
|
self.bse_robust = res_ols.bse
|
|
self.cov_robust = res_ols.cov_params()
|
|
cov1 = sw.cov_nw_panel(self.res1, 4, self.tidx)
|
|
se1 = sw.se_cov(cov1)
|
|
self.bse_robust2 = se1
|
|
self.cov_robust2 = cov1
|
|
self.small = True
|
|
self.res2 = res2.results_nw_panel4
|
|
|
|
self.skip_f = True
|
|
self.rtol = 1e-6
|
|
self.rtolh = 1e-10
|
|
|
|
def test_keyword(self):
|
|
# check corrected keyword
|
|
res_ols = self.res1.get_robustcov_results(
|
|
"hac-panel",
|
|
time=self.time,
|
|
maxlags=4,
|
|
use_correction="hac",
|
|
use_t=True,
|
|
df_correction=False,
|
|
)
|
|
assert_allclose(res_ols.bse, self.res1.bse, rtol=1e-12)
|
|
|
|
|
|
class TestOLSRobustClusterNWPGroupsFit(
|
|
CheckOLSRobustCluster, CheckOLSRobustNewMixin
|
|
):
|
|
# compare with `reg cluster`
|
|
|
|
def setup_method(self):
|
|
res_ols = self.res1.model.fit(
|
|
cov_type="nw-panel",
|
|
cov_kwds=dict(
|
|
groups=self.groups,
|
|
maxlags=4,
|
|
use_correction="hac",
|
|
use_t=True,
|
|
df_correction=False,
|
|
),
|
|
)
|
|
self.res3 = self.res1
|
|
self.res1 = res_ols
|
|
self.bse_robust = res_ols.bse
|
|
self.cov_robust = res_ols.cov_params()
|
|
cov1 = sw.cov_nw_panel(self.res1, 4, self.tidx)
|
|
se1 = sw.se_cov(cov1)
|
|
self.bse_robust2 = se1
|
|
self.cov_robust2 = cov1
|
|
self.small = True
|
|
self.res2 = res2.results_nw_panel4
|
|
|
|
self.skip_f = True
|
|
self.rtol = 1e-6
|
|
self.rtolh = 1e-10
|
|
|
|
|
|
# TODO: low precision/agreement
|
|
class TestOLSRobustCluster2G(CheckOLSRobustCluster, CheckOLSRobustNewMixin):
|
|
# compare with `reg cluster`
|
|
|
|
def setup_method(self):
|
|
res_ols = self.res1.get_robustcov_results(
|
|
"cluster",
|
|
groups=(self.groups, self.time),
|
|
use_correction=True,
|
|
use_t=True,
|
|
)
|
|
self.res3 = self.res1
|
|
self.res1 = res_ols
|
|
self.bse_robust = res_ols.bse
|
|
self.cov_robust = res_ols.cov_params()
|
|
cov1 = sw.cov_cluster_2groups(
|
|
self.res1, self.groups, group2=self.time, use_correction=True
|
|
)[0]
|
|
se1 = sw.se_cov(cov1)
|
|
self.bse_robust2 = se1
|
|
self.cov_robust2 = cov1
|
|
self.small = True
|
|
self.res2 = res2.results_cluster_2groups_small
|
|
|
|
self.rtol = (
|
|
0.35 # only f_pvalue and confint for constant differ >rtol=0.05
|
|
)
|
|
self.rtolh = 1e-10
|
|
|
|
|
|
class TestOLSRobustCluster2GLarge(
|
|
CheckOLSRobustCluster, CheckOLSRobustNewMixin
|
|
):
|
|
# compare with `reg cluster`
|
|
|
|
def setup_method(self):
|
|
res_ols = self.res1.get_robustcov_results(
|
|
"cluster",
|
|
groups=(self.groups, self.time),
|
|
use_correction=False, # True,
|
|
use_t=False,
|
|
)
|
|
self.res3 = self.res1
|
|
self.res1 = res_ols
|
|
self.bse_robust = res_ols.bse
|
|
self.cov_robust = res_ols.cov_params()
|
|
cov1 = sw.cov_cluster_2groups(
|
|
self.res1, self.groups, group2=self.time, use_correction=False
|
|
)[0]
|
|
se1 = sw.se_cov(cov1)
|
|
self.bse_robust2 = se1
|
|
self.cov_robust2 = cov1
|
|
self.small = False
|
|
self.res2 = res2.results_cluster_2groups_large
|
|
|
|
self.skip_f = True
|
|
self.rtol = 1e-7
|
|
self.rtolh = 1e-10
|
|
|
|
|
|
######################################
|
|
# WLS
|
|
######################################
|
|
|
|
|
|
class CheckWLSRobustCluster(CheckOLSRobust):
|
|
# compare with regress robust
|
|
|
|
@classmethod
|
|
def setup_class(cls):
|
|
# import pandas as pa
|
|
from statsmodels.datasets import grunfeld
|
|
|
|
dtapa = grunfeld.data.load_pandas()
|
|
# Stata example/data seems to miss last firm
|
|
dtapa_endog = dtapa.endog[:200]
|
|
dtapa_exog = dtapa.exog[:200]
|
|
exog = add_constant(dtapa_exog[["value", "capital"]], prepend=False)
|
|
# asserts do not work for pandas
|
|
cls.res1 = WLS(
|
|
dtapa_endog, exog, weights=1 / dtapa_exog["value"]
|
|
).fit()
|
|
|
|
firm_names, firm_id = np.unique(
|
|
np.asarray(dtapa_exog[["firm"]], "S20"), return_inverse=True
|
|
)
|
|
cls.groups = firm_id
|
|
# time indicator in range(max Ti)
|
|
time = np.require(dtapa_exog[["year"]], requirements="W")
|
|
time -= time.min()
|
|
cls.time = np.squeeze(time).astype(int)
|
|
# nw_panel function requires interval bounds
|
|
cls.tidx = [(i * 20, 20 * (i + 1)) for i in range(10)]
|
|
|
|
|
|
# not available yet for WLS
|
|
class TestWLSRobustCluster2(CheckWLSRobustCluster, CheckOLSRobustNewMixin):
|
|
# compare with `reg cluster`
|
|
|
|
def setup_method(self):
|
|
res_ols = self.res1.get_robustcov_results(
|
|
"cluster", groups=self.groups, use_correction=True, use_t=True
|
|
)
|
|
self.res3 = self.res1
|
|
self.res1 = res_ols
|
|
self.bse_robust = res_ols.bse
|
|
self.cov_robust = res_ols.cov_params()
|
|
cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=True)
|
|
se1 = sw.se_cov(cov1)
|
|
self.bse_robust2 = se1
|
|
self.cov_robust2 = cov1
|
|
self.small = True
|
|
self.res2 = res2.results_cluster_wls_small
|
|
|
|
self.rtol = 1e-6
|
|
self.rtolh = 1e-10
|
|
|
|
|
|
# not available yet for WLS
|
|
class TestWLSRobustCluster2Large(
|
|
CheckWLSRobustCluster, CheckOLSRobustNewMixin
|
|
):
|
|
# compare with `reg cluster`
|
|
|
|
def setup_method(self):
|
|
res_ols = self.res1.get_robustcov_results(
|
|
"cluster",
|
|
groups=self.groups,
|
|
use_correction=False,
|
|
use_t=False,
|
|
df_correction=True,
|
|
)
|
|
self.res3 = self.res1
|
|
self.res1 = res_ols
|
|
self.bse_robust = res_ols.bse
|
|
self.cov_robust = res_ols.cov_params()
|
|
cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=False)
|
|
se1 = sw.se_cov(cov1)
|
|
self.bse_robust2 = se1
|
|
self.cov_robust2 = cov1
|
|
self.small = False
|
|
self.res2 = res2.results_cluster_wls_large
|
|
|
|
self.skip_f = True
|
|
self.rtol = 1e-6
|
|
self.rtolh = 1e-10
|
|
|
|
|
|
class TestWLSRobustSmall(CheckWLSRobustCluster, CheckOLSRobustNewMixin):
|
|
# compare with `reg cluster`
|
|
|
|
def setup_method(self):
|
|
res_ols = self.res1.get_robustcov_results("HC1", use_t=True)
|
|
self.res3 = self.res1
|
|
self.res1 = res_ols
|
|
self.bse_robust = res_ols.bse
|
|
self.cov_robust = res_ols.cov_params()
|
|
# TODO: check standalone function
|
|
# cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=False)
|
|
cov1 = res_ols.cov_HC1
|
|
se1 = sw.se_cov(cov1)
|
|
self.bse_robust2 = se1
|
|
self.cov_robust2 = cov1
|
|
self.small = True
|
|
self.res2 = res2.results_hc1_wls_small
|
|
|
|
self.skip_f = True
|
|
self.rtol = 1e-6
|
|
self.rtolh = 1e-10
|
|
|
|
|
|
class TestWLSOLSRobustSmall:
|
|
@classmethod
|
|
def setup_class(cls):
|
|
# import pandas as pa
|
|
from statsmodels.datasets import grunfeld
|
|
|
|
dtapa = grunfeld.data.load_pandas()
|
|
# Stata example/data seems to miss last firm
|
|
dtapa_endog = dtapa.endog[:200]
|
|
dtapa_exog = dtapa.exog[:200]
|
|
exog = add_constant(dtapa_exog[["value", "capital"]], prepend=False)
|
|
# asserts do not work for pandas
|
|
cls.res_wls = WLS(
|
|
dtapa_endog, exog, weights=1 / dtapa_exog["value"]
|
|
).fit()
|
|
w_sqrt = 1 / np.sqrt(np.asarray(dtapa_exog["value"]))
|
|
cls.res_ols = OLS(
|
|
dtapa_endog * w_sqrt, np.asarray(exog) * w_sqrt[:, None]
|
|
).fit()
|
|
ids = np.asarray(dtapa_exog[["firm"]], "S20")
|
|
firm_names, firm_id = np.unique(ids, return_inverse=True)
|
|
cls.groups = firm_id
|
|
# time indicator in range(max Ti)
|
|
time = np.require(dtapa_exog[["year"]], requirements="W")
|
|
time -= time.min()
|
|
cls.time = np.squeeze(time).astype(int)
|
|
# nw_panel function requires interval bounds
|
|
cls.tidx = [(i * 20, 20 * (i + 1)) for i in range(10)]
|
|
|
|
def test_all(self):
|
|
all_cov = [
|
|
("HC0", dict(use_t=True)),
|
|
("HC1", dict(use_t=True)),
|
|
("HC2", dict(use_t=True)),
|
|
("HC3", dict(use_t=True)),
|
|
]
|
|
|
|
for cov_type, kwds in all_cov:
|
|
res1 = self.res_ols.get_robustcov_results(cov_type, **kwds)
|
|
res2 = self.res_wls.get_robustcov_results(cov_type, **kwds)
|
|
assert_allclose(res1.params, res2.params, rtol=1e-13)
|
|
assert_allclose(res1.cov_params(), res2.cov_params(), rtol=1e-13)
|
|
assert_allclose(res1.bse, res2.bse, rtol=1e-13)
|
|
assert_allclose(res1.pvalues, res2.pvalues, rtol=1e-13)
|
|
mat = np.eye(len(res1.params))
|
|
ft1 = res1.f_test(mat)
|
|
ft2 = res2.f_test(mat)
|
|
assert_allclose(ft1.fvalue, ft2.fvalue, rtol=1e-12)
|
|
assert_allclose(ft1.pvalue, ft2.pvalue, rtol=5e-11)
|
|
|
|
def test_fixed_scale(self):
|
|
cov_type = "fixed_scale"
|
|
kwds = {}
|
|
res1 = self.res_ols.get_robustcov_results(cov_type, **kwds)
|
|
res2 = self.res_wls.get_robustcov_results(cov_type, **kwds)
|
|
assert_allclose(res1.params, res2.params, rtol=1e-13)
|
|
assert_allclose(res1.cov_params(), res2.cov_params(), rtol=1e-13)
|
|
assert_allclose(res1.bse, res2.bse, rtol=1e-13)
|
|
assert_allclose(res1.pvalues, res2.pvalues, rtol=1e-12)
|
|
|
|
tt = res2.t_test(
|
|
np.eye(len(res2.params)), cov_p=res2.normalized_cov_params
|
|
)
|
|
assert_allclose(
|
|
res2.cov_params(), res2.normalized_cov_params, rtol=1e-13
|
|
)
|
|
assert_allclose(res2.bse, tt.sd, rtol=1e-13)
|
|
assert_allclose(res2.pvalues, tt.pvalue, rtol=1e-13)
|
|
assert_allclose(res2.tvalues, tt.tvalue, rtol=1e-13)
|
|
|
|
# using cov_type in fit
|
|
mod = self.res_wls.model
|
|
mod3 = WLS(mod.endog, mod.exog, weights=mod.weights)
|
|
res3 = mod3.fit(cov_type=cov_type, cov_kwds=kwds)
|
|
tt = res3.t_test(
|
|
np.eye(len(res3.params)), cov_p=res3.normalized_cov_params
|
|
)
|
|
assert_allclose(
|
|
res3.cov_params(), res3.normalized_cov_params, rtol=1e-13
|
|
)
|
|
assert_allclose(res3.bse, tt.sd, rtol=1e-13)
|
|
assert_allclose(res3.pvalues, tt.pvalue, rtol=1e-13)
|
|
assert_allclose(res3.tvalues, tt.tvalue, rtol=1e-13)
|
|
|
|
|
|
def test_cov_type_fixed_scale():
|
|
# this is a unit test from scipy curvefit for `absolute_sigma` keyword
|
|
xdata = np.array([0, 1, 2, 3, 4, 5])
|
|
ydata = np.array([1, 1, 5, 7, 8, 12])
|
|
sigma = np.array([1, 2, 1, 2, 1, 2])
|
|
|
|
xdata = np.column_stack((xdata, np.ones(len(xdata))))
|
|
weights = 1.0 / sigma**2
|
|
|
|
res = WLS(ydata, xdata, weights=weights).fit()
|
|
assert_allclose(res.bse, [0.20659803, 0.57204404], rtol=1e-3)
|
|
|
|
res = WLS(ydata, xdata, weights=weights).fit()
|
|
assert_allclose(res.bse, [0.20659803, 0.57204404], rtol=1e-3)
|
|
|
|
res = WLS(ydata, xdata, weights=weights).fit(cov_type="fixed scale")
|
|
assert_allclose(res.bse, [0.30714756, 0.85045308], rtol=1e-3)
|
|
|
|
res = WLS(ydata, xdata, weights=weights / 9.0).fit(cov_type="fixed scale")
|
|
assert_allclose(res.bse, [3 * 0.30714756, 3 * 0.85045308], rtol=1e-3)
|
|
|
|
res = WLS(ydata, xdata, weights=weights).fit(
|
|
cov_type="fixed scale", cov_kwds={"scale": 9}
|
|
)
|
|
assert_allclose(res.bse, [3 * 0.30714756, 3 * 0.85045308], rtol=1e-3)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"cov_info",
|
|
[
|
|
("nonrobust", {}),
|
|
("HC0", {}),
|
|
("HC1", {}),
|
|
("HC2", {}),
|
|
("HC3", {}),
|
|
("HAC", {"maxlags": 7}),
|
|
("cluster", {"groups": (np.arange(500) % 27)}),
|
|
],
|
|
)
|
|
def test_qr_equiv(cov_info):
|
|
cov_type, cov_kwds = cov_info
|
|
rs = np.random.RandomState(123498)
|
|
x = rs.standard_normal((500, 3))
|
|
b = np.ones(3)
|
|
y = x @ b + rs.standard_normal(500)
|
|
mod = OLS(y, x)
|
|
pinv_fit = mod.fit(cov_type=cov_type, cov_kwds=cov_kwds)
|
|
qr_fit = mod.fit(cov_type=cov_type, cov_kwds=cov_kwds, method="qr")
|
|
assert_allclose(pinv_fit.bse, qr_fit.bse)
|