1220 lines
45 KiB
Python
1220 lines
45 KiB
Python
|
"""
|
|||
|
Created on Wed Mar 18 10:33:38 2020
|
|||
|
|
|||
|
Author: Josef Perktold
|
|||
|
License: BSD-3
|
|||
|
|
|||
|
"""
|
|||
|
|
|||
|
import numpy as np
|
|||
|
from scipy import stats
|
|||
|
from scipy.special import ncfdtrinc
|
|||
|
|
|||
|
# functions that use scipy.special instead of boost based function in stats
|
|||
|
from statsmodels.stats.power import ncf_cdf, ncf_ppf
|
|||
|
|
|||
|
from statsmodels.stats.robust_compare import TrimmedMean, scale_transform
|
|||
|
from statsmodels.tools.testing import Holder
|
|||
|
from statsmodels.stats.base import HolderTuple
|
|||
|
|
|||
|
|
|||
|
def effectsize_oneway(means, vars_, nobs, use_var="unequal", ddof_between=0):
|
|||
|
"""
|
|||
|
Effect size corresponding to Cohen's f = nc / nobs for oneway anova
|
|||
|
|
|||
|
This contains adjustment for Welch and Brown-Forsythe Anova so that
|
|||
|
effect size can be used with FTestAnovaPower.
|
|||
|
|
|||
|
Parameters
|
|||
|
----------
|
|||
|
means : array_like
|
|||
|
Mean of samples to be compared
|
|||
|
vars_ : float or array_like
|
|||
|
Residual (within) variance of each sample or pooled
|
|||
|
If ``vars_`` is scalar, then it is interpreted as pooled variance that
|
|||
|
is the same for all samples, ``use_var`` will be ignored.
|
|||
|
Otherwise, the variances are used depending on the ``use_var`` keyword.
|
|||
|
nobs : int or array_like
|
|||
|
Number of observations for the samples.
|
|||
|
If nobs is scalar, then it is assumed that all samples have the same
|
|||
|
number ``nobs`` of observation, i.e. a balanced sample case.
|
|||
|
Otherwise, statistics will be weighted corresponding to nobs.
|
|||
|
Only relative sizes are relevant, any proportional change to nobs does
|
|||
|
not change the effect size.
|
|||
|
use_var : {"unequal", "equal", "bf"}
|
|||
|
If ``use_var`` is "unequal", then the variances can differ across
|
|||
|
samples and the effect size for Welch anova will be computed.
|
|||
|
ddof_between : int
|
|||
|
Degrees of freedom correction for the weighted between sum of squares.
|
|||
|
The denominator is ``nobs_total - ddof_between``
|
|||
|
This can be used to match differences across reference literature.
|
|||
|
|
|||
|
Returns
|
|||
|
-------
|
|||
|
f2 : float
|
|||
|
Effect size corresponding to squared Cohen's f, which is also equal
|
|||
|
to the noncentrality divided by total number of observations.
|
|||
|
|
|||
|
Notes
|
|||
|
-----
|
|||
|
This currently handles the following cases for oneway anova
|
|||
|
|
|||
|
- balanced sample with homoscedastic variances
|
|||
|
- samples with different number of observations and with homoscedastic
|
|||
|
variances
|
|||
|
- samples with different number of observations and with heteroskedastic
|
|||
|
variances. This corresponds to Welch anova
|
|||
|
|
|||
|
In the case of "unequal" and "bf" methods for unequal variances, the
|
|||
|
effect sizes do not directly correspond to the test statistic in Anova.
|
|||
|
Both have correction terms dropped or added, so the effect sizes match up
|
|||
|
with using FTestAnovaPower.
|
|||
|
If all variances are equal, then all three methods result in the same
|
|||
|
effect size. If variances are unequal, then the three methods produce
|
|||
|
small differences in effect size.
|
|||
|
|
|||
|
Note, the effect size and power computation for BF Anova was not found in
|
|||
|
the literature. The correction terms were added so that FTestAnovaPower
|
|||
|
provides a good approximation to the power.
|
|||
|
|
|||
|
Status: experimental
|
|||
|
We might add additional returns, if those are needed to support power
|
|||
|
and sample size applications.
|
|||
|
|
|||
|
Examples
|
|||
|
--------
|
|||
|
The following shows how to compute effect size and power for each of the
|
|||
|
three anova methods. The null hypothesis is that the means are equal which
|
|||
|
corresponds to a zero effect size. Under the alternative, means differ
|
|||
|
with two sample means at a distance delta from the mean. We assume the
|
|||
|
variance is the same under the null and alternative hypothesis.
|
|||
|
|
|||
|
``nobs`` for the samples defines the fraction of observations in the
|
|||
|
samples. ``nobs`` in the power method defines the total sample size.
|
|||
|
|
|||
|
In simulations, the computed power for standard anova,
|
|||
|
i.e.``use_var="equal"`` overestimates the simulated power by a few percent.
|
|||
|
The equal variance assumption does not hold in this example.
|
|||
|
|
|||
|
>>> from statsmodels.stats.oneway import effectsize_oneway
|
|||
|
>>> from statsmodels.stats.power import FTestAnovaPower
|
|||
|
>>>
|
|||
|
>>> nobs = np.array([10, 12, 13, 15])
|
|||
|
>>> delta = 0.5
|
|||
|
>>> means_alt = np.array([-1, 0, 0, 1]) * delta
|
|||
|
>>> vars_ = np.arange(1, len(means_alt) + 1)
|
|||
|
>>>
|
|||
|
>>> f2_alt = effectsize_oneway(means_alt, vars_, nobs, use_var="equal")
|
|||
|
>>> f2_alt
|
|||
|
0.04581300813008131
|
|||
|
>>>
|
|||
|
>>> kwds = {'effect_size': np.sqrt(f2_alt), 'nobs': 100, 'alpha': 0.05,
|
|||
|
... 'k_groups': 4}
|
|||
|
>>> power = FTestAnovaPower().power(**kwds)
|
|||
|
>>> power
|
|||
|
0.39165892158983273
|
|||
|
>>>
|
|||
|
>>> f2_alt = effectsize_oneway(means_alt, vars_, nobs, use_var="unequal")
|
|||
|
>>> f2_alt
|
|||
|
0.060640138408304504
|
|||
|
>>>
|
|||
|
>>> kwds['effect_size'] = np.sqrt(f2_alt)
|
|||
|
>>> power = FTestAnovaPower().power(**kwds)
|
|||
|
>>> power
|
|||
|
0.5047366512800622
|
|||
|
>>>
|
|||
|
>>> f2_alt = effectsize_oneway(means_alt, vars_, nobs, use_var="bf")
|
|||
|
>>> f2_alt
|
|||
|
0.04391324307956788
|
|||
|
>>>
|
|||
|
>>> kwds['effect_size'] = np.sqrt(f2_alt)
|
|||
|
>>> power = FTestAnovaPower().power(**kwds)
|
|||
|
>>> power
|
|||
|
0.3765792117047725
|
|||
|
|
|||
|
"""
|
|||
|
# the code here is largely a copy of onway_generic with adjustments
|
|||
|
|
|||
|
means = np.asarray(means)
|
|||
|
n_groups = means.shape[0]
|
|||
|
|
|||
|
if np.size(nobs) == 1:
|
|||
|
nobs = np.ones(n_groups) * nobs
|
|||
|
|
|||
|
nobs_t = nobs.sum()
|
|||
|
|
|||
|
if use_var == "equal":
|
|||
|
if np.size(vars_) == 1:
|
|||
|
var_resid = vars_
|
|||
|
else:
|
|||
|
vars_ = np.asarray(vars_)
|
|||
|
var_resid = ((nobs - 1) * vars_).sum() / (nobs_t - n_groups)
|
|||
|
|
|||
|
vars_ = var_resid # scalar, if broadcasting works
|
|||
|
|
|||
|
weights = nobs / vars_
|
|||
|
|
|||
|
w_total = weights.sum()
|
|||
|
w_rel = weights / w_total
|
|||
|
# meanw_t = (weights * means).sum() / w_total
|
|||
|
meanw_t = w_rel @ means
|
|||
|
|
|||
|
f2 = np.dot(weights, (means - meanw_t)**2) / (nobs_t - ddof_between)
|
|||
|
|
|||
|
if use_var.lower() == "bf":
|
|||
|
weights = nobs
|
|||
|
w_total = weights.sum()
|
|||
|
w_rel = weights / w_total
|
|||
|
meanw_t = w_rel @ means
|
|||
|
# TODO: reuse general case with weights
|
|||
|
tmp = ((1. - nobs / nobs_t) * vars_).sum()
|
|||
|
statistic = 1. * (nobs * (means - meanw_t)**2).sum()
|
|||
|
statistic /= tmp
|
|||
|
f2 = statistic * (1. - nobs / nobs_t).sum() / nobs_t
|
|||
|
# correction factor for df_num in BFM
|
|||
|
df_num2 = n_groups - 1
|
|||
|
df_num = tmp**2 / ((vars_**2).sum() +
|
|||
|
(nobs / nobs_t * vars_).sum()**2 -
|
|||
|
2 * (nobs / nobs_t * vars_**2).sum())
|
|||
|
f2 *= df_num / df_num2
|
|||
|
|
|||
|
return f2
|
|||
|
|
|||
|
|
|||
|
def convert_effectsize_fsqu(f2=None, eta2=None):
|
|||
|
"""Convert squared effect sizes in f family
|
|||
|
|
|||
|
f2 is signal to noise ratio, var_explained / var_residual
|
|||
|
|
|||
|
eta2 is proportion of explained variance, var_explained / var_total
|
|||
|
|
|||
|
uses the relationship:
|
|||
|
f2 = eta2 / (1 - eta2)
|
|||
|
|
|||
|
Parameters
|
|||
|
----------
|
|||
|
f2 : None or float
|
|||
|
Squared Cohen's F effect size. If f2 is not None, then eta2 will be
|
|||
|
computed.
|
|||
|
eta2 : None or float
|
|||
|
Squared eta effect size. If f2 is None and eta2 is not None, then f2 is
|
|||
|
computed.
|
|||
|
|
|||
|
Returns
|
|||
|
-------
|
|||
|
res : Holder instance
|
|||
|
An instance of the Holder class with f2 and eta2 as attributes.
|
|||
|
|
|||
|
"""
|
|||
|
if f2 is not None:
|
|||
|
eta2 = 1 / (1 + 1 / f2)
|
|||
|
|
|||
|
elif eta2 is not None:
|
|||
|
f2 = eta2 / (1 - eta2)
|
|||
|
|
|||
|
res = Holder(f2=f2, eta2=eta2)
|
|||
|
return res
|
|||
|
|
|||
|
|
|||
|
def _fstat2effectsize(f_stat, df):
|
|||
|
"""Compute anova effect size from F-statistic
|
|||
|
|
|||
|
This might be combined with convert_effectsize_fsqu
|
|||
|
|
|||
|
Parameters
|
|||
|
----------
|
|||
|
f_stat : array_like
|
|||
|
Test statistic of an F-test
|
|||
|
df : tuple
|
|||
|
degrees of freedom ``df = (df1, df2)`` where
|
|||
|
- df1 : numerator degrees of freedom, number of constraints
|
|||
|
- df2 : denominator degrees of freedom, df_resid
|
|||
|
|
|||
|
Returns
|
|||
|
-------
|
|||
|
res : Holder instance
|
|||
|
This instance contains effect size measures f2, eta2, omega2 and eps2
|
|||
|
as attributes.
|
|||
|
|
|||
|
Notes
|
|||
|
-----
|
|||
|
This uses the following definitions:
|
|||
|
|
|||
|
- f2 = f_stat * df1 / df2
|
|||
|
- eta2 = f2 / (f2 + 1)
|
|||
|
- omega2 = (f2 - df1 / df2) / (f2 + 2)
|
|||
|
- eps2 = (f2 - df1 / df2) / (f2 + 1)
|
|||
|
|
|||
|
This differs from effect size measures in other function which define
|
|||
|
``f2 = f_stat * df1 / nobs``
|
|||
|
or an equivalent expression for power computation. The noncentrality
|
|||
|
index for the hypothesis test is in those cases given by
|
|||
|
``nc = f_stat * df1``.
|
|||
|
|
|||
|
Currently omega2 and eps2 are computed in two different ways. Those
|
|||
|
values agree for regular cases but can show different behavior in corner
|
|||
|
cases (e.g. zero division).
|
|||
|
|
|||
|
"""
|
|||
|
df1, df2 = df
|
|||
|
f2 = f_stat * df1 / df2
|
|||
|
eta2 = f2 / (f2 + 1)
|
|||
|
omega2_ = (f_stat - 1) / (f_stat + (df2 + 1) / df1)
|
|||
|
omega2 = (f2 - df1 / df2) / (f2 + 1 + 1 / df2) # rewrite
|
|||
|
eps2_ = (f_stat - 1) / (f_stat + df2 / df1)
|
|||
|
eps2 = (f2 - df1 / df2) / (f2 + 1) # rewrite
|
|||
|
return Holder(f2=f2, eta2=eta2, omega2=omega2, eps2=eps2, eps2_=eps2_,
|
|||
|
omega2_=omega2_)
|
|||
|
|
|||
|
|
|||
|
# conversion functions for Wellek's equivalence effect size
|
|||
|
# these are mainly to compare with literature
|
|||
|
|
|||
|
def wellek_to_f2(eps, n_groups):
|
|||
|
"""Convert Wellek's effect size (sqrt) to Cohen's f-squared
|
|||
|
|
|||
|
This computes the following effect size :
|
|||
|
|
|||
|
f2 = 1 / n_groups * eps**2
|
|||
|
|
|||
|
Parameters
|
|||
|
----------
|
|||
|
eps : float or ndarray
|
|||
|
Wellek's effect size used in anova equivalence test
|
|||
|
n_groups : int
|
|||
|
Number of groups in oneway comparison
|
|||
|
|
|||
|
Returns
|
|||
|
-------
|
|||
|
f2 : effect size Cohen's f-squared
|
|||
|
|
|||
|
"""
|
|||
|
f2 = 1 / n_groups * eps**2
|
|||
|
return f2
|
|||
|
|
|||
|
|
|||
|
def f2_to_wellek(f2, n_groups):
|
|||
|
"""Convert Cohen's f-squared to Wellek's effect size (sqrt)
|
|||
|
|
|||
|
This computes the following effect size :
|
|||
|
|
|||
|
eps = sqrt(n_groups * f2)
|
|||
|
|
|||
|
Parameters
|
|||
|
----------
|
|||
|
f2 : float or ndarray
|
|||
|
Effect size Cohen's f-squared
|
|||
|
n_groups : int
|
|||
|
Number of groups in oneway comparison
|
|||
|
|
|||
|
Returns
|
|||
|
-------
|
|||
|
eps : float or ndarray
|
|||
|
Wellek's effect size used in anova equivalence test
|
|||
|
"""
|
|||
|
eps = np.sqrt(n_groups * f2)
|
|||
|
return eps
|
|||
|
|
|||
|
|
|||
|
def fstat_to_wellek(f_stat, n_groups, nobs_mean):
|
|||
|
"""Convert F statistic to wellek's effect size eps squared
|
|||
|
|
|||
|
This computes the following effect size :
|
|||
|
|
|||
|
es = f_stat * (n_groups - 1) / nobs_mean
|
|||
|
|
|||
|
Parameters
|
|||
|
----------
|
|||
|
f_stat : float or ndarray
|
|||
|
Test statistic of an F-test.
|
|||
|
n_groups : int
|
|||
|
Number of groups in oneway comparison
|
|||
|
nobs_mean : float or ndarray
|
|||
|
Average number of observations across groups.
|
|||
|
|
|||
|
Returns
|
|||
|
-------
|
|||
|
eps : float or ndarray
|
|||
|
Wellek's effect size used in anova equivalence test
|
|||
|
|
|||
|
"""
|
|||
|
es = f_stat * (n_groups - 1) / nobs_mean
|
|||
|
return es
|
|||
|
|
|||
|
|
|||
|
def confint_noncentrality(f_stat, df, alpha=0.05,
|
|||
|
alternative="two-sided"):
|
|||
|
"""
|
|||
|
Confidence interval for noncentrality parameter in F-test
|
|||
|
|
|||
|
This does not yet handle non-negativity constraint on nc.
|
|||
|
Currently only two-sided alternative is supported.
|
|||
|
|
|||
|
Parameters
|
|||
|
----------
|
|||
|
f_stat : float
|
|||
|
df : tuple
|
|||
|
degrees of freedom ``df = (df1, df2)`` where
|
|||
|
|
|||
|
- df1 : numerator degrees of freedom, number of constraints
|
|||
|
- df2 : denominator degrees of freedom, df_resid
|
|||
|
|
|||
|
alpha : float, default 0.05
|
|||
|
alternative : {"two-sided"}
|
|||
|
Other alternatives have not been implements.
|
|||
|
|
|||
|
Returns
|
|||
|
-------
|
|||
|
float
|
|||
|
The end point of the confidence interval.
|
|||
|
|
|||
|
Notes
|
|||
|
-----
|
|||
|
The algorithm inverts the cdf of the noncentral F distribution with
|
|||
|
respect to the noncentrality parameters.
|
|||
|
See Steiger 2004 and references cited in it.
|
|||
|
|
|||
|
References
|
|||
|
----------
|
|||
|
.. [1] Steiger, James H. 2004. “Beyond the F Test: Effect Size Confidence
|
|||
|
Intervals and Tests of Close Fit in the Analysis of Variance and
|
|||
|
Contrast Analysis.” Psychological Methods 9 (2): 164–82.
|
|||
|
https://doi.org/10.1037/1082-989X.9.2.164.
|
|||
|
|
|||
|
See Also
|
|||
|
--------
|
|||
|
confint_effectsize_oneway
|
|||
|
"""
|
|||
|
|
|||
|
df1, df2 = df
|
|||
|
if alternative in ["two-sided", "2s", "ts"]:
|
|||
|
alpha1s = alpha / 2
|
|||
|
ci = ncfdtrinc(df1, df2, [1 - alpha1s, alpha1s], f_stat)
|
|||
|
else:
|
|||
|
raise NotImplementedError
|
|||
|
|
|||
|
return ci
|
|||
|
|
|||
|
|
|||
|
def confint_effectsize_oneway(f_stat, df, alpha=0.05, nobs=None):
|
|||
|
"""
|
|||
|
Confidence interval for effect size in oneway anova for F distribution
|
|||
|
|
|||
|
This does not yet handle non-negativity constraint on nc.
|
|||
|
Currently only two-sided alternative is supported.
|
|||
|
|
|||
|
Parameters
|
|||
|
----------
|
|||
|
f_stat : float
|
|||
|
df : tuple
|
|||
|
degrees of freedom ``df = (df1, df2)`` where
|
|||
|
|
|||
|
- df1 : numerator degrees of freedom, number of constraints
|
|||
|
- df2 : denominator degrees of freedom, df_resid
|
|||
|
|
|||
|
alpha : float, default 0.05
|
|||
|
nobs : int, default None
|
|||
|
|
|||
|
Returns
|
|||
|
-------
|
|||
|
Holder
|
|||
|
Class with effect size and confidence attributes
|
|||
|
|
|||
|
Notes
|
|||
|
-----
|
|||
|
The confidence interval for the noncentrality parameter is obtained by
|
|||
|
inverting the cdf of the noncentral F distribution. Confidence intervals
|
|||
|
for other effect sizes are computed by endpoint transformation.
|
|||
|
|
|||
|
|
|||
|
R package ``effectsize`` does not compute the confidence intervals in the
|
|||
|
same way. Their confidence intervals can be replicated with
|
|||
|
|
|||
|
>>> ci_nc = confint_noncentrality(f_stat, df1, df2, alpha=0.1)
|
|||
|
>>> ci_es = smo._fstat2effectsize(ci_nc / df1, df1, df2)
|
|||
|
|
|||
|
See Also
|
|||
|
--------
|
|||
|
confint_noncentrality
|
|||
|
"""
|
|||
|
|
|||
|
df1, df2 = df
|
|||
|
if nobs is None:
|
|||
|
nobs = df1 + df2 + 1
|
|||
|
ci_nc = confint_noncentrality(f_stat, df, alpha=alpha)
|
|||
|
|
|||
|
ci_f2 = ci_nc / nobs
|
|||
|
ci_res = convert_effectsize_fsqu(f2=ci_f2)
|
|||
|
ci_res.ci_omega2 = (ci_f2 - df1 / df2) / (ci_f2 + 1 + 1 / df2)
|
|||
|
ci_res.ci_nc = ci_nc
|
|||
|
ci_res.ci_f = np.sqrt(ci_res.f2)
|
|||
|
ci_res.ci_eta = np.sqrt(ci_res.eta2)
|
|||
|
ci_res.ci_f_corrected = np.sqrt(ci_res.f2 * (df1 + 1) / df1)
|
|||
|
|
|||
|
return ci_res
|
|||
|
|
|||
|
|
|||
|
def anova_generic(means, variances, nobs, use_var="unequal",
|
|||
|
welch_correction=True, info=None):
|
|||
|
"""
|
|||
|
Oneway Anova based on summary statistics
|
|||
|
|
|||
|
Parameters
|
|||
|
----------
|
|||
|
means : array_like
|
|||
|
Mean of samples to be compared
|
|||
|
variances : float or array_like
|
|||
|
Residual (within) variance of each sample or pooled.
|
|||
|
If ``variances`` is scalar, then it is interpreted as pooled variance
|
|||
|
that is the same for all samples, ``use_var`` will be ignored.
|
|||
|
Otherwise, the variances are used depending on the ``use_var`` keyword.
|
|||
|
nobs : int or array_like
|
|||
|
Number of observations for the samples.
|
|||
|
If nobs is scalar, then it is assumed that all samples have the same
|
|||
|
number ``nobs`` of observation, i.e. a balanced sample case.
|
|||
|
Otherwise, statistics will be weighted corresponding to nobs.
|
|||
|
Only relative sizes are relevant, any proportional change to nobs does
|
|||
|
not change the effect size.
|
|||
|
use_var : {"unequal", "equal", "bf"}
|
|||
|
If ``use_var`` is "unequal", then the variances can differ across
|
|||
|
samples and the effect size for Welch anova will be computed.
|
|||
|
welch_correction : bool
|
|||
|
If this is false, then the Welch correction to the test statistic is
|
|||
|
not included. This allows the computation of an effect size measure
|
|||
|
that corresponds more closely to Cohen's f.
|
|||
|
info : not used yet
|
|||
|
|
|||
|
Returns
|
|||
|
-------
|
|||
|
res : results instance
|
|||
|
This includes `statistic` and `pvalue`.
|
|||
|
|
|||
|
"""
|
|||
|
options = {"use_var": use_var,
|
|||
|
"welch_correction": welch_correction
|
|||
|
}
|
|||
|
if means.ndim != 1:
|
|||
|
raise ValueError('data (means, ...) has to be one-dimensional')
|
|||
|
nobs_t = nobs.sum()
|
|||
|
n_groups = len(means)
|
|||
|
# mean_t = (nobs * means).sum() / nobs_t
|
|||
|
if use_var == "unequal":
|
|||
|
weights = nobs / variances
|
|||
|
else:
|
|||
|
weights = nobs
|
|||
|
|
|||
|
w_total = weights.sum()
|
|||
|
w_rel = weights / w_total
|
|||
|
# meanw_t = (weights * means).sum() / w_total
|
|||
|
meanw_t = w_rel @ means
|
|||
|
|
|||
|
statistic = np.dot(weights, (means - meanw_t)**2) / (n_groups - 1.)
|
|||
|
df_num = n_groups - 1.
|
|||
|
|
|||
|
if use_var == "unequal":
|
|||
|
tmp = ((1 - w_rel)**2 / (nobs - 1)).sum() / (n_groups**2 - 1)
|
|||
|
if welch_correction:
|
|||
|
statistic /= 1 + 2 * (n_groups - 2) * tmp
|
|||
|
df_denom = 1. / (3. * tmp)
|
|||
|
|
|||
|
elif use_var == "equal":
|
|||
|
# variance of group demeaned total sample, pooled var_resid
|
|||
|
tmp = ((nobs - 1) * variances).sum() / (nobs_t - n_groups)
|
|||
|
statistic /= tmp
|
|||
|
df_denom = nobs_t - n_groups
|
|||
|
|
|||
|
elif use_var == "bf":
|
|||
|
tmp = ((1. - nobs / nobs_t) * variances).sum()
|
|||
|
statistic = 1. * (nobs * (means - meanw_t)**2).sum()
|
|||
|
statistic /= tmp
|
|||
|
|
|||
|
df_num2 = n_groups - 1
|
|||
|
df_denom = tmp**2 / ((1. - nobs / nobs_t) ** 2 *
|
|||
|
variances ** 2 / (nobs - 1)).sum()
|
|||
|
df_num = tmp**2 / ((variances ** 2).sum() +
|
|||
|
(nobs / nobs_t * variances).sum() ** 2 -
|
|||
|
2 * (nobs / nobs_t * variances ** 2).sum())
|
|||
|
pval2 = stats.f.sf(statistic, df_num2, df_denom)
|
|||
|
options["df2"] = (df_num2, df_denom)
|
|||
|
options["df_num2"] = df_num2
|
|||
|
options["pvalue2"] = pval2
|
|||
|
|
|||
|
else:
|
|||
|
raise ValueError('use_var is to be one of "unequal", "equal" or "bf"')
|
|||
|
|
|||
|
pval = stats.f.sf(statistic, df_num, df_denom)
|
|||
|
res = HolderTuple(statistic=statistic,
|
|||
|
pvalue=pval,
|
|||
|
df=(df_num, df_denom),
|
|||
|
df_num=df_num,
|
|||
|
df_denom=df_denom,
|
|||
|
nobs_t=nobs_t,
|
|||
|
n_groups=n_groups,
|
|||
|
means=means,
|
|||
|
nobs=nobs,
|
|||
|
vars_=variances,
|
|||
|
**options
|
|||
|
)
|
|||
|
return res
|
|||
|
|
|||
|
|
|||
|
def anova_oneway(data, groups=None, use_var="unequal", welch_correction=True,
|
|||
|
trim_frac=0):
|
|||
|
"""Oneway Anova
|
|||
|
|
|||
|
This implements standard anova, Welch and Brown-Forsythe, and trimmed
|
|||
|
(Yuen) variants of those.
|
|||
|
|
|||
|
Parameters
|
|||
|
----------
|
|||
|
data : tuple of array_like or DataFrame or Series
|
|||
|
Data for k independent samples, with k >= 2.
|
|||
|
The data can be provided as a tuple or list of arrays or in long
|
|||
|
format with outcome observations in ``data`` and group membership in
|
|||
|
``groups``.
|
|||
|
groups : ndarray or Series
|
|||
|
If data is in long format, then groups is needed as indicator to which
|
|||
|
group or sample and observations belongs.
|
|||
|
use_var : {"unequal", "equal" or "bf"}
|
|||
|
`use_var` specified how to treat heteroscedasticity, unequal variance,
|
|||
|
across samples. Three approaches are available
|
|||
|
|
|||
|
"unequal" : Variances are not assumed to be equal across samples.
|
|||
|
Heteroscedasticity is taken into account with Welch Anova and
|
|||
|
Satterthwaite-Welch degrees of freedom.
|
|||
|
This is the default.
|
|||
|
"equal" : Variances are assumed to be equal across samples.
|
|||
|
This is the standard Anova.
|
|||
|
"bf" : Variances are not assumed to be equal across samples.
|
|||
|
The method is Browne-Forsythe (1971) for testing equality of means
|
|||
|
with the corrected degrees of freedom by Merothra. The original BF
|
|||
|
degrees of freedom are available as additional attributes in the
|
|||
|
results instance, ``df_denom2`` and ``p_value2``.
|
|||
|
|
|||
|
welch_correction : bool
|
|||
|
If this is false, then the Welch correction to the test statistic is
|
|||
|
not included. This allows the computation of an effect size measure
|
|||
|
that corresponds more closely to Cohen's f.
|
|||
|
trim_frac : float in [0, 0.5)
|
|||
|
Optional trimming for Anova with trimmed mean and winsorized variances.
|
|||
|
With the default trim_frac equal to zero, the oneway Anova statistics
|
|||
|
are computed without trimming. If `trim_frac` is larger than zero,
|
|||
|
then the largest and smallest observations in each sample are trimmed.
|
|||
|
The number of trimmed observations is the fraction of number of
|
|||
|
observations in the sample truncated to the next lower integer.
|
|||
|
`trim_frac` has to be smaller than 0.5, however, if the fraction is
|
|||
|
so large that there are not enough observations left over, then `nan`
|
|||
|
will be returned.
|
|||
|
|
|||
|
Returns
|
|||
|
-------
|
|||
|
res : results instance
|
|||
|
The returned HolderTuple instance has the following main attributes
|
|||
|
and some additional information in other attributes.
|
|||
|
|
|||
|
statistic : float
|
|||
|
Test statistic for k-sample mean comparison which is approximately
|
|||
|
F-distributed.
|
|||
|
pvalue : float
|
|||
|
If ``use_var="bf"``, then the p-value is based on corrected
|
|||
|
degrees of freedom following Mehrotra 1997.
|
|||
|
pvalue2 : float
|
|||
|
This is the p-value based on degrees of freedom as in
|
|||
|
Brown-Forsythe 1974 and is only available if ``use_var="bf"``.
|
|||
|
df = (df_denom, df_num) : tuple of floats
|
|||
|
Degreeds of freedom for the F-distribution depend on ``use_var``.
|
|||
|
If ``use_var="bf"``, then `df_denom` is for Mehrotra p-values
|
|||
|
`df_denom2` is available for Brown-Forsythe 1974 p-values.
|
|||
|
`df_num` is the same numerator degrees of freedom for both
|
|||
|
p-values.
|
|||
|
|
|||
|
Notes
|
|||
|
-----
|
|||
|
Welch's anova is correctly sized (not liberal or conservative) in smaller
|
|||
|
samples if the distribution of the samples is not very far away from the
|
|||
|
normal distribution. The test can become liberal if the data is strongly
|
|||
|
skewed. Welch's Anova can also be correctly sized for discrete
|
|||
|
distributions with finite support, like Lickert scale data.
|
|||
|
The trimmed version is robust to many non-normal distributions, it stays
|
|||
|
correctly sized in many cases, and is more powerful in some cases with
|
|||
|
skewness or heavy tails.
|
|||
|
|
|||
|
Trimming is currently based on the integer part of ``nobs * trim_frac``.
|
|||
|
The default might change to including fractional observations as in the
|
|||
|
original articles by Yuen.
|
|||
|
|
|||
|
|
|||
|
See Also
|
|||
|
--------
|
|||
|
anova_generic
|
|||
|
|
|||
|
References
|
|||
|
----------
|
|||
|
Brown, Morton B., and Alan B. Forsythe. 1974. “The Small Sample Behavior
|
|||
|
of Some Statistics Which Test the Equality of Several Means.”
|
|||
|
Technometrics 16 (1) (February 1): 129–132. doi:10.2307/1267501.
|
|||
|
|
|||
|
Mehrotra, Devan V. 1997. “Improving the Brown-Forsythe Solution to the
|
|||
|
Generalized Behrens-Fisher Problem.” Communications in Statistics -
|
|||
|
Simulation and Computation 26 (3): 1139–1145.
|
|||
|
doi:10.1080/03610919708813431.
|
|||
|
"""
|
|||
|
if groups is not None:
|
|||
|
uniques = np.unique(groups)
|
|||
|
data = [data[groups == uni] for uni in uniques]
|
|||
|
else:
|
|||
|
# uniques = None # not used yet, add to info?
|
|||
|
pass
|
|||
|
args = list(map(np.asarray, data))
|
|||
|
if any([x.ndim != 1 for x in args]):
|
|||
|
raise ValueError('data arrays have to be one-dimensional')
|
|||
|
|
|||
|
nobs = np.array([len(x) for x in args], float)
|
|||
|
# n_groups = len(args) # not used
|
|||
|
# means = np.array([np.mean(x, axis=0) for x in args], float)
|
|||
|
# vars_ = np.array([np.var(x, ddof=1, axis=0) for x in args], float)
|
|||
|
|
|||
|
if trim_frac == 0:
|
|||
|
means = np.array([x.mean() for x in args])
|
|||
|
vars_ = np.array([x.var(ddof=1) for x in args])
|
|||
|
else:
|
|||
|
tms = [TrimmedMean(x, trim_frac) for x in args]
|
|||
|
means = np.array([tm.mean_trimmed for tm in tms])
|
|||
|
# R doesn't use uncorrected var_winsorized
|
|||
|
# vars_ = np.array([tm.var_winsorized for tm in tms])
|
|||
|
vars_ = np.array([tm.var_winsorized * (tm.nobs - 1) /
|
|||
|
(tm.nobs_reduced - 1) for tm in tms])
|
|||
|
# nobs_original = nobs # store just in case
|
|||
|
nobs = np.array([tm.nobs_reduced for tm in tms])
|
|||
|
|
|||
|
res = anova_generic(means, vars_, nobs, use_var=use_var,
|
|||
|
welch_correction=welch_correction)
|
|||
|
|
|||
|
return res
|
|||
|
|
|||
|
|
|||
|
def equivalence_oneway_generic(f_stat, n_groups, nobs, equiv_margin, df,
|
|||
|
alpha=0.05, margin_type="f2"):
|
|||
|
"""Equivalence test for oneway anova (Wellek and extensions)
|
|||
|
|
|||
|
This is an helper function when summary statistics are available.
|
|||
|
Use `equivalence_oneway` instead.
|
|||
|
|
|||
|
The null hypothesis is that the means differ by more than `equiv_margin`
|
|||
|
in the anova distance measure.
|
|||
|
If the Null is rejected, then the data supports that means are equivalent,
|
|||
|
i.e. within a given distance.
|
|||
|
|
|||
|
Parameters
|
|||
|
----------
|
|||
|
f_stat : float
|
|||
|
F-statistic
|
|||
|
n_groups : int
|
|||
|
Number of groups in oneway comparison.
|
|||
|
nobs : ndarray
|
|||
|
Array of number of observations in groups.
|
|||
|
equiv_margin : float
|
|||
|
Equivalence margin in terms of effect size. Effect size can be chosen
|
|||
|
with `margin_type`. default is squared Cohen's f.
|
|||
|
df : tuple
|
|||
|
degrees of freedom ``df = (df1, df2)`` where
|
|||
|
|
|||
|
- df1 : numerator degrees of freedom, number of constraints
|
|||
|
- df2 : denominator degrees of freedom, df_resid
|
|||
|
|
|||
|
alpha : float in (0, 1)
|
|||
|
Significance level for the hypothesis test.
|
|||
|
margin_type : "f2" or "wellek"
|
|||
|
Type of effect size used for equivalence margin.
|
|||
|
|
|||
|
Returns
|
|||
|
-------
|
|||
|
results : instance of HolderTuple class
|
|||
|
The two main attributes are test statistic `statistic` and p-value
|
|||
|
`pvalue`.
|
|||
|
|
|||
|
Notes
|
|||
|
-----
|
|||
|
Equivalence in this function is defined in terms of a squared distance
|
|||
|
measure similar to Mahalanobis distance.
|
|||
|
Alternative definitions for the oneway case are based on maximum difference
|
|||
|
between pairs of means or similar pairwise distances.
|
|||
|
|
|||
|
The equivalence margin is used for the noncentrality parameter in the
|
|||
|
noncentral F distribution for the test statistic. In samples with unequal
|
|||
|
variances estimated using Welch or Brown-Forsythe Anova, the f-statistic
|
|||
|
depends on the unequal variances and corrections to the test statistic.
|
|||
|
This means that the equivalence margins are not fully comparable across
|
|||
|
methods for treating unequal variances.
|
|||
|
|
|||
|
References
|
|||
|
----------
|
|||
|
Wellek, Stefan. 2010. Testing Statistical Hypotheses of Equivalence and
|
|||
|
Noninferiority. 2nd ed. Boca Raton: CRC Press.
|
|||
|
|
|||
|
Cribbie, Robert A., Chantal A. Arpin-Cribbie, and Jamie A. Gruman. 2009.
|
|||
|
“Tests of Equivalence for One-Way Independent Groups Designs.” The Journal
|
|||
|
of Experimental Education 78 (1): 1–13.
|
|||
|
https://doi.org/10.1080/00220970903224552.
|
|||
|
|
|||
|
Jan, Show-Li, and Gwowen Shieh. 2019. “On the Extended Welch Test for
|
|||
|
Assessing Equivalence of Standardized Means.” Statistics in
|
|||
|
Biopharmaceutical Research 0 (0): 1–8.
|
|||
|
https://doi.org/10.1080/19466315.2019.1654915.
|
|||
|
|
|||
|
"""
|
|||
|
nobs_t = nobs.sum()
|
|||
|
nobs_mean = nobs_t / n_groups
|
|||
|
|
|||
|
if margin_type == "wellek":
|
|||
|
nc_null = nobs_mean * equiv_margin**2
|
|||
|
es = f_stat * (n_groups - 1) / nobs_mean
|
|||
|
type_effectsize = "Wellek's psi_squared"
|
|||
|
elif margin_type in ["f2", "fsqu", "fsquared"]:
|
|||
|
nc_null = nobs_t * equiv_margin
|
|||
|
es = f_stat / nobs_t
|
|||
|
type_effectsize = "Cohen's f_squared"
|
|||
|
else:
|
|||
|
raise ValueError('`margin_type` should be "f2" or "wellek"')
|
|||
|
crit_f = ncf_ppf(alpha, df[0], df[1], nc_null)
|
|||
|
|
|||
|
if margin_type == "wellek":
|
|||
|
# TODO: do we need a sqrt
|
|||
|
crit_es = crit_f * (n_groups - 1) / nobs_mean
|
|||
|
elif margin_type in ["f2", "fsqu", "fsquared"]:
|
|||
|
crit_es = crit_f / nobs_t
|
|||
|
|
|||
|
reject = (es < crit_es)
|
|||
|
|
|||
|
pv = ncf_cdf(f_stat, df[0], df[1], nc_null)
|
|||
|
pwr = ncf_cdf(crit_f, df[0], df[1], 1e-13) # scipy, cannot be 0
|
|||
|
res = HolderTuple(statistic=f_stat,
|
|||
|
pvalue=pv,
|
|||
|
effectsize=es, # match es type to margin_type
|
|||
|
crit_f=crit_f,
|
|||
|
crit_es=crit_es,
|
|||
|
reject=reject,
|
|||
|
power_zero=pwr,
|
|||
|
df=df,
|
|||
|
f_stat=f_stat,
|
|||
|
type_effectsize=type_effectsize
|
|||
|
)
|
|||
|
return res
|
|||
|
|
|||
|
|
|||
|
def equivalence_oneway(data, equiv_margin, groups=None, use_var="unequal",
|
|||
|
welch_correction=True, trim_frac=0, margin_type="f2"):
|
|||
|
"""equivalence test for oneway anova (Wellek's Anova)
|
|||
|
|
|||
|
The null hypothesis is that the means differ by more than `equiv_margin`
|
|||
|
in the anova distance measure.
|
|||
|
If the Null is rejected, then the data supports that means are equivalent,
|
|||
|
i.e. within a given distance.
|
|||
|
|
|||
|
Parameters
|
|||
|
----------
|
|||
|
data : tuple of array_like or DataFrame or Series
|
|||
|
Data for k independent samples, with k >= 2.
|
|||
|
The data can be provided as a tuple or list of arrays or in long
|
|||
|
format with outcome observations in ``data`` and group membership in
|
|||
|
``groups``.
|
|||
|
equiv_margin : float
|
|||
|
Equivalence margin in terms of effect size. Effect size can be chosen
|
|||
|
with `margin_type`. default is squared Cohen's f.
|
|||
|
groups : ndarray or Series
|
|||
|
If data is in long format, then groups is needed as indicator to which
|
|||
|
group or sample and observations belongs.
|
|||
|
use_var : {"unequal", "equal" or "bf"}
|
|||
|
`use_var` specified how to treat heteroscedasticity, unequal variance,
|
|||
|
across samples. Three approaches are available
|
|||
|
|
|||
|
"unequal" : Variances are not assumed to be equal across samples.
|
|||
|
Heteroscedasticity is taken into account with Welch Anova and
|
|||
|
Satterthwaite-Welch degrees of freedom.
|
|||
|
This is the default.
|
|||
|
"equal" : Variances are assumed to be equal across samples.
|
|||
|
This is the standard Anova.
|
|||
|
"bf: Variances are not assumed to be equal across samples.
|
|||
|
The method is Browne-Forsythe (1971) for testing equality of means
|
|||
|
with the corrected degrees of freedom by Merothra. The original BF
|
|||
|
degrees of freedom are available as additional attributes in the
|
|||
|
results instance, ``df_denom2`` and ``p_value2``.
|
|||
|
|
|||
|
welch_correction : bool
|
|||
|
If this is false, then the Welch correction to the test statistic is
|
|||
|
not included. This allows the computation of an effect size measure
|
|||
|
that corresponds more closely to Cohen's f.
|
|||
|
trim_frac : float in [0, 0.5)
|
|||
|
Optional trimming for Anova with trimmed mean and winsorized variances.
|
|||
|
With the default trim_frac equal to zero, the oneway Anova statistics
|
|||
|
are computed without trimming. If `trim_frac` is larger than zero,
|
|||
|
then the largest and smallest observations in each sample are trimmed.
|
|||
|
The number of trimmed observations is the fraction of number of
|
|||
|
observations in the sample truncated to the next lower integer.
|
|||
|
`trim_frac` has to be smaller than 0.5, however, if the fraction is
|
|||
|
so large that there are not enough observations left over, then `nan`
|
|||
|
will be returned.
|
|||
|
margin_type : "f2" or "wellek"
|
|||
|
Type of effect size used for equivalence margin, either squared
|
|||
|
Cohen's f or Wellek's psi. Default is "f2".
|
|||
|
|
|||
|
Returns
|
|||
|
-------
|
|||
|
results : instance of HolderTuple class
|
|||
|
The two main attributes are test statistic `statistic` and p-value
|
|||
|
`pvalue`.
|
|||
|
|
|||
|
See Also
|
|||
|
--------
|
|||
|
anova_oneway
|
|||
|
equivalence_scale_oneway
|
|||
|
"""
|
|||
|
|
|||
|
# use anova to compute summary statistics and f-statistic
|
|||
|
res0 = anova_oneway(data, groups=groups, use_var=use_var,
|
|||
|
welch_correction=welch_correction,
|
|||
|
trim_frac=trim_frac)
|
|||
|
f_stat = res0.statistic
|
|||
|
res = equivalence_oneway_generic(f_stat, res0.n_groups, res0.nobs_t,
|
|||
|
equiv_margin, res0.df, alpha=0.05,
|
|||
|
margin_type=margin_type)
|
|||
|
|
|||
|
return res
|
|||
|
|
|||
|
|
|||
|
def _power_equivalence_oneway_emp(f_stat, n_groups, nobs, eps, df, alpha=0.05):
|
|||
|
"""Empirical power of oneway equivalence test
|
|||
|
|
|||
|
This only returns post-hoc, empirical power.
|
|||
|
|
|||
|
Warning: eps is currently effect size margin as defined as in Wellek, and
|
|||
|
not the signal to noise ratio (Cohen's f family).
|
|||
|
|
|||
|
Parameters
|
|||
|
----------
|
|||
|
f_stat : float
|
|||
|
F-statistic from oneway anova, used to compute empirical effect size
|
|||
|
n_groups : int
|
|||
|
Number of groups in oneway comparison.
|
|||
|
nobs : ndarray
|
|||
|
Array of number of observations in groups.
|
|||
|
eps : float
|
|||
|
Equivalence margin in terms of effect size given by Wellek's psi.
|
|||
|
df : tuple
|
|||
|
Degrees of freedom for F distribution.
|
|||
|
alpha : float in (0, 1)
|
|||
|
Significance level for the hypothesis test.
|
|||
|
|
|||
|
Returns
|
|||
|
-------
|
|||
|
pow : float
|
|||
|
Ex-post, post-hoc or empirical power at f-statistic of the equivalence
|
|||
|
test.
|
|||
|
"""
|
|||
|
|
|||
|
res = equivalence_oneway_generic(f_stat, n_groups, nobs, eps, df,
|
|||
|
alpha=alpha, margin_type="wellek")
|
|||
|
|
|||
|
nobs_mean = nobs.sum() / n_groups
|
|||
|
fn = f_stat # post-hoc power, empirical power at estimate
|
|||
|
esn = fn * (n_groups - 1) / nobs_mean # Wellek psi
|
|||
|
pow_ = ncf_cdf(res.crit_f, df[0], df[1], nobs_mean * esn)
|
|||
|
|
|||
|
return pow_
|
|||
|
|
|||
|
|
|||
|
def power_equivalence_oneway(f2_alt, equiv_margin, nobs_t, n_groups=None,
|
|||
|
df=None, alpha=0.05, margin_type="f2"):
|
|||
|
"""
|
|||
|
Power of oneway equivalence test
|
|||
|
|
|||
|
Parameters
|
|||
|
----------
|
|||
|
f2_alt : float
|
|||
|
Effect size, squared Cohen's f, under the alternative.
|
|||
|
equiv_margin : float
|
|||
|
Equivalence margin in terms of effect size. Effect size can be chosen
|
|||
|
with `margin_type`. default is squared Cohen's f.
|
|||
|
nobs_t : ndarray
|
|||
|
Total number of observations summed over all groups.
|
|||
|
n_groups : int
|
|||
|
Number of groups in oneway comparison. If margin_type is "wellek",
|
|||
|
then either ``n_groups`` or ``df`` has to be given.
|
|||
|
df : tuple
|
|||
|
Degrees of freedom for F distribution,
|
|||
|
``df = (n_groups - 1, nobs_t - n_groups)``
|
|||
|
alpha : float in (0, 1)
|
|||
|
Significance level for the hypothesis test.
|
|||
|
margin_type : "f2" or "wellek"
|
|||
|
Type of effect size used for equivalence margin, either squared
|
|||
|
Cohen's f or Wellek's psi. Default is "f2".
|
|||
|
|
|||
|
Returns
|
|||
|
-------
|
|||
|
pow_alt : float
|
|||
|
Power of the equivalence test at given equivalence effect size under
|
|||
|
the alternative.
|
|||
|
"""
|
|||
|
|
|||
|
# one of n_groups or df has to be specified
|
|||
|
if df is None:
|
|||
|
if n_groups is None:
|
|||
|
raise ValueError("either df or n_groups has to be provided")
|
|||
|
df = (n_groups - 1, nobs_t - n_groups)
|
|||
|
|
|||
|
# esn = fn * (n_groups - 1) / nobs_mean # Wellek psi
|
|||
|
|
|||
|
# fix for scipy, ncf does not allow nc == 0, fixed in scipy master
|
|||
|
if f2_alt == 0:
|
|||
|
f2_alt = 1e-13
|
|||
|
# effect size, critical value at margin
|
|||
|
# f2_null = equiv_margin
|
|||
|
if margin_type in ["f2", "fsqu", "fsquared"]:
|
|||
|
f2_null = equiv_margin
|
|||
|
elif margin_type == "wellek":
|
|||
|
if n_groups is None:
|
|||
|
raise ValueError("If margin_type is wellek, then n_groups has "
|
|||
|
"to be provided")
|
|||
|
# f2_null = (n_groups - 1) * n_groups / nobs_t * equiv_margin**2
|
|||
|
nobs_mean = nobs_t / n_groups
|
|||
|
f2_null = nobs_mean * equiv_margin**2 / nobs_t
|
|||
|
f2_alt = nobs_mean * f2_alt**2 / nobs_t
|
|||
|
else:
|
|||
|
raise ValueError('`margin_type` should be "f2" or "wellek"')
|
|||
|
|
|||
|
crit_f_margin = ncf_ppf(alpha, df[0], df[1], nobs_t * f2_null)
|
|||
|
pwr_alt = ncf_cdf(crit_f_margin, df[0], df[1], nobs_t * f2_alt)
|
|||
|
return pwr_alt
|
|||
|
|
|||
|
|
|||
|
def simulate_power_equivalence_oneway(means, nobs, equiv_margin, vars_=None,
|
|||
|
k_mc=1000, trim_frac=0,
|
|||
|
options_var=None, margin_type="f2"
|
|||
|
): # , anova_options=None): #TODO
|
|||
|
"""Simulate Power for oneway equivalence test (Wellek's Anova)
|
|||
|
|
|||
|
This function is experimental and written to evaluate asymptotic power
|
|||
|
function. This function will change without backwards compatibility
|
|||
|
constraints. The only part that is stable is `pvalue` attribute in results.
|
|||
|
|
|||
|
Effect size for equivalence margin
|
|||
|
|
|||
|
"""
|
|||
|
if options_var is None:
|
|||
|
options_var = ["unequal", "equal", "bf"]
|
|||
|
if vars_ is not None:
|
|||
|
stds = np.sqrt(vars_)
|
|||
|
else:
|
|||
|
stds = np.ones(len(means))
|
|||
|
|
|||
|
nobs_mean = nobs.mean()
|
|||
|
n_groups = len(nobs)
|
|||
|
res_mc = []
|
|||
|
f_mc = []
|
|||
|
reject_mc = []
|
|||
|
other_mc = []
|
|||
|
for _ in range(k_mc):
|
|||
|
y0, y1, y2, y3 = (m + std * np.random.randn(n)
|
|||
|
for (n, m, std) in zip(nobs, means, stds))
|
|||
|
|
|||
|
res_i = []
|
|||
|
f_i = []
|
|||
|
reject_i = []
|
|||
|
other_i = []
|
|||
|
for uv in options_var:
|
|||
|
# for welch in options_welch:
|
|||
|
# res1 = sma.anova_generic(means, vars_, nobs, use_var=uv,
|
|||
|
# welch_correction=welch)
|
|||
|
res0 = anova_oneway([y0, y1, y2, y3], use_var=uv,
|
|||
|
trim_frac=trim_frac)
|
|||
|
f_stat = res0.statistic
|
|||
|
res1 = equivalence_oneway_generic(f_stat, n_groups, nobs.sum(),
|
|||
|
equiv_margin, res0.df,
|
|||
|
alpha=0.05,
|
|||
|
margin_type=margin_type)
|
|||
|
res_i.append(res1.pvalue)
|
|||
|
es_wellek = f_stat * (n_groups - 1) / nobs_mean
|
|||
|
f_i.append(es_wellek)
|
|||
|
reject_i.append(res1.reject)
|
|||
|
other_i.extend([res1.crit_f, res1.crit_es, res1.power_zero])
|
|||
|
res_mc.append(res_i)
|
|||
|
f_mc.append(f_i)
|
|||
|
reject_mc.append(reject_i)
|
|||
|
other_mc.append(other_i)
|
|||
|
|
|||
|
f_mc = np.asarray(f_mc)
|
|||
|
other_mc = np.asarray(other_mc)
|
|||
|
res_mc = np.asarray(res_mc)
|
|||
|
reject_mc = np.asarray(reject_mc)
|
|||
|
res = Holder(f_stat=f_mc,
|
|||
|
other=other_mc,
|
|||
|
pvalue=res_mc,
|
|||
|
reject=reject_mc
|
|||
|
)
|
|||
|
return res
|
|||
|
|
|||
|
|
|||
|
def test_scale_oneway(data, method="bf", center="median", transform="abs",
|
|||
|
trim_frac_mean=0.1, trim_frac_anova=0.0):
|
|||
|
"""Oneway Anova test for equal scale, variance or dispersion
|
|||
|
|
|||
|
This hypothesis test performs a oneway anova test on transformed data and
|
|||
|
includes Levene and Brown-Forsythe tests for equal variances as special
|
|||
|
cases.
|
|||
|
|
|||
|
Parameters
|
|||
|
----------
|
|||
|
data : tuple of array_like or DataFrame or Series
|
|||
|
Data for k independent samples, with k >= 2. The data can be provided
|
|||
|
as a tuple or list of arrays or in long format with outcome
|
|||
|
observations in ``data`` and group membership in ``groups``.
|
|||
|
method : {"unequal", "equal" or "bf"}
|
|||
|
How to treat heteroscedasticity across samples. This is used as
|
|||
|
`use_var` option in `anova_oneway` and refers to the variance of the
|
|||
|
transformed data, i.e. assumption is on 4th moment if squares are used
|
|||
|
as transform.
|
|||
|
Three approaches are available:
|
|||
|
|
|||
|
"unequal" : Variances are not assumed to be equal across samples.
|
|||
|
Heteroscedasticity is taken into account with Welch Anova and
|
|||
|
Satterthwaite-Welch degrees of freedom.
|
|||
|
This is the default.
|
|||
|
"equal" : Variances are assumed to be equal across samples.
|
|||
|
This is the standard Anova.
|
|||
|
"bf" : Variances are not assumed to be equal across samples.
|
|||
|
The method is Browne-Forsythe (1971) for testing equality of means
|
|||
|
with the corrected degrees of freedom by Merothra. The original BF
|
|||
|
degrees of freedom are available as additional attributes in the
|
|||
|
results instance, ``df_denom2`` and ``p_value2``.
|
|||
|
|
|||
|
center : "median", "mean", "trimmed" or float
|
|||
|
Statistic used for centering observations. If a float, then this
|
|||
|
value is used to center. Default is median.
|
|||
|
transform : "abs", "square" or callable
|
|||
|
Transformation for the centered observations. If a callable, then this
|
|||
|
function is called on the centered data.
|
|||
|
Default is absolute value.
|
|||
|
trim_frac_mean=0.1 : float in [0, 0.5)
|
|||
|
Trim fraction for the trimmed mean when `center` is "trimmed"
|
|||
|
trim_frac_anova : float in [0, 0.5)
|
|||
|
Optional trimming for Anova with trimmed mean and Winsorized variances.
|
|||
|
With the default trim_frac equal to zero, the oneway Anova statistics
|
|||
|
are computed without trimming. If `trim_frac` is larger than zero,
|
|||
|
then the largest and smallest observations in each sample are trimmed.
|
|||
|
see ``trim_frac`` option in `anova_oneway`
|
|||
|
|
|||
|
Returns
|
|||
|
-------
|
|||
|
res : results instance
|
|||
|
The returned HolderTuple instance has the following main attributes
|
|||
|
and some additional information in other attributes.
|
|||
|
|
|||
|
statistic : float
|
|||
|
Test statistic for k-sample mean comparison which is approximately
|
|||
|
F-distributed.
|
|||
|
pvalue : float
|
|||
|
If ``method="bf"``, then the p-value is based on corrected
|
|||
|
degrees of freedom following Mehrotra 1997.
|
|||
|
pvalue2 : float
|
|||
|
This is the p-value based on degrees of freedom as in
|
|||
|
Brown-Forsythe 1974 and is only available if ``method="bf"``.
|
|||
|
df : (df_denom, df_num)
|
|||
|
Tuple containing degrees of freedom for the F-distribution depend
|
|||
|
on ``method``. If ``method="bf"``, then `df_denom` is for Mehrotra
|
|||
|
p-values `df_denom2` is available for Brown-Forsythe 1974 p-values.
|
|||
|
`df_num` is the same numerator degrees of freedom for both
|
|||
|
p-values.
|
|||
|
|
|||
|
See Also
|
|||
|
--------
|
|||
|
anova_oneway
|
|||
|
scale_transform
|
|||
|
"""
|
|||
|
|
|||
|
data = map(np.asarray, data)
|
|||
|
xxd = [scale_transform(x, center=center, transform=transform,
|
|||
|
trim_frac=trim_frac_mean) for x in data]
|
|||
|
|
|||
|
res = anova_oneway(xxd, groups=None, use_var=method,
|
|||
|
welch_correction=True, trim_frac=trim_frac_anova)
|
|||
|
res.data_transformed = xxd
|
|||
|
return res
|
|||
|
|
|||
|
|
|||
|
def equivalence_scale_oneway(data, equiv_margin, method='bf', center='median',
|
|||
|
transform='abs', trim_frac_mean=0.,
|
|||
|
trim_frac_anova=0.):
|
|||
|
"""Oneway Anova test for equivalence of scale, variance or dispersion
|
|||
|
|
|||
|
This hypothesis test performs a oneway equivalence anova test on
|
|||
|
transformed data.
|
|||
|
|
|||
|
Note, the interpretation of the equivalence margin `equiv_margin` will
|
|||
|
depend on the transformation of the data. Transformations like
|
|||
|
absolute deviation are not scaled to correspond to the variance under
|
|||
|
normal distribution.
|
|||
|
|
|||
|
Parameters
|
|||
|
----------
|
|||
|
data : tuple of array_like or DataFrame or Series
|
|||
|
Data for k independent samples, with k >= 2. The data can be provided
|
|||
|
as a tuple or list of arrays or in long format with outcome
|
|||
|
observations in ``data`` and group membership in ``groups``.
|
|||
|
equiv_margin : float
|
|||
|
Equivalence margin in terms of effect size. Effect size can be chosen
|
|||
|
with `margin_type`. default is squared Cohen's f.
|
|||
|
method : {"unequal", "equal" or "bf"}
|
|||
|
How to treat heteroscedasticity across samples. This is used as
|
|||
|
`use_var` option in `anova_oneway` and refers to the variance of the
|
|||
|
transformed data, i.e. assumption is on 4th moment if squares are used
|
|||
|
as transform.
|
|||
|
Three approaches are available:
|
|||
|
|
|||
|
"unequal" : Variances are not assumed to be equal across samples.
|
|||
|
Heteroscedasticity is taken into account with Welch Anova and
|
|||
|
Satterthwaite-Welch degrees of freedom.
|
|||
|
This is the default.
|
|||
|
"equal" : Variances are assumed to be equal across samples.
|
|||
|
This is the standard Anova.
|
|||
|
"bf" : Variances are not assumed to be equal across samples.
|
|||
|
The method is Browne-Forsythe (1971) for testing equality of means
|
|||
|
with the corrected degrees of freedom by Merothra. The original BF
|
|||
|
degrees of freedom are available as additional attributes in the
|
|||
|
results instance, ``df_denom2`` and ``p_value2``.
|
|||
|
center : "median", "mean", "trimmed" or float
|
|||
|
Statistic used for centering observations. If a float, then this
|
|||
|
value is used to center. Default is median.
|
|||
|
transform : "abs", "square" or callable
|
|||
|
Transformation for the centered observations. If a callable, then this
|
|||
|
function is called on the centered data.
|
|||
|
Default is absolute value.
|
|||
|
trim_frac_mean : float in [0, 0.5)
|
|||
|
Trim fraction for the trimmed mean when `center` is "trimmed"
|
|||
|
trim_frac_anova : float in [0, 0.5)
|
|||
|
Optional trimming for Anova with trimmed mean and Winsorized variances.
|
|||
|
With the default trim_frac equal to zero, the oneway Anova statistics
|
|||
|
are computed without trimming. If `trim_frac` is larger than zero,
|
|||
|
then the largest and smallest observations in each sample are trimmed.
|
|||
|
see ``trim_frac`` option in `anova_oneway`
|
|||
|
|
|||
|
Returns
|
|||
|
-------
|
|||
|
results : instance of HolderTuple class
|
|||
|
The two main attributes are test statistic `statistic` and p-value
|
|||
|
`pvalue`.
|
|||
|
|
|||
|
See Also
|
|||
|
--------
|
|||
|
anova_oneway
|
|||
|
scale_transform
|
|||
|
equivalence_oneway
|
|||
|
"""
|
|||
|
data = map(np.asarray, data)
|
|||
|
xxd = [scale_transform(x, center=center, transform=transform,
|
|||
|
trim_frac=trim_frac_mean) for x in data]
|
|||
|
|
|||
|
res = equivalence_oneway(xxd, equiv_margin, use_var=method,
|
|||
|
welch_correction=True, trim_frac=trim_frac_anova)
|
|||
|
res.x_transformed = xxd
|
|||
|
return res
|