2353 lines
84 KiB
Python
2353 lines
84 KiB
Python
"""
|
||
Tests and Confidence Intervals for Binomial Proportions
|
||
|
||
Created on Fri Mar 01 00:23:07 2013
|
||
|
||
Author: Josef Perktold
|
||
License: BSD-3
|
||
"""
|
||
|
||
from statsmodels.compat.python import lzip
|
||
from typing import Callable
|
||
import numpy as np
|
||
import pandas as pd
|
||
from scipy import optimize, stats
|
||
|
||
from statsmodels.stats.base import AllPairsResults, HolderTuple
|
||
from statsmodels.stats.weightstats import _zstat_generic2
|
||
from statsmodels.tools.sm_exceptions import HypothesisTestWarning
|
||
from statsmodels.tools.testing import Holder
|
||
from statsmodels.tools.validation import array_like
|
||
|
||
FLOAT_INFO = np.finfo(float)
|
||
|
||
|
||
def _bound_proportion_confint(
|
||
func: Callable[[float], float], qi: float, lower: bool = True
|
||
) -> float:
|
||
"""
|
||
Try hard to find a bound different from eps/1 - eps in proportion_confint
|
||
|
||
Parameters
|
||
----------
|
||
func : callable
|
||
Callable function to use as the objective of the search
|
||
qi : float
|
||
The empirical success rate
|
||
lower : bool
|
||
Whether to fund a lower bound for the left side of the CI
|
||
|
||
Returns
|
||
-------
|
||
float
|
||
The coarse bound
|
||
"""
|
||
default = FLOAT_INFO.eps if lower else 1.0 - FLOAT_INFO.eps
|
||
|
||
def step(v):
|
||
return v / 8 if lower else v + (1.0 - v) / 8
|
||
|
||
x = step(qi)
|
||
w = func(x)
|
||
cnt = 1
|
||
while w > 0 and cnt < 10:
|
||
x = step(x)
|
||
w = func(x)
|
||
cnt += 1
|
||
return x if cnt < 10 else default
|
||
|
||
|
||
def _bisection_search_conservative(
|
||
func: Callable[[float], float], lb: float, ub: float, steps: int = 27
|
||
) -> tuple[float, float]:
|
||
"""
|
||
Private function used as a fallback by proportion_confint
|
||
|
||
Used when brentq returns a non-conservative bound for the CI
|
||
|
||
Parameters
|
||
----------
|
||
func : callable
|
||
Callable function to use as the objective of the search
|
||
lb : float
|
||
Lower bound
|
||
ub : float
|
||
Upper bound
|
||
steps : int
|
||
Number of steps to use in the bisection
|
||
|
||
Returns
|
||
-------
|
||
est : float
|
||
The estimated value. Will always produce a negative value of func
|
||
func_val : float
|
||
The value of the function at the estimate
|
||
"""
|
||
upper = func(ub)
|
||
lower = func(lb)
|
||
best = upper if upper < 0 else lower
|
||
best_pt = ub if upper < 0 else lb
|
||
if np.sign(lower) == np.sign(upper):
|
||
raise ValueError("problem with signs")
|
||
mp = (ub + lb) / 2
|
||
mid = func(mp)
|
||
if (mid < 0) and (mid > best):
|
||
best = mid
|
||
best_pt = mp
|
||
for _ in range(steps):
|
||
if np.sign(mid) == np.sign(upper):
|
||
ub = mp
|
||
upper = mid
|
||
else:
|
||
lb = mp
|
||
mp = (ub + lb) / 2
|
||
mid = func(mp)
|
||
if (mid < 0) and (mid > best):
|
||
best = mid
|
||
best_pt = mp
|
||
return best_pt, best
|
||
|
||
|
||
def proportion_confint(count, nobs, alpha:float=0.05, method="normal"):
|
||
"""
|
||
Confidence interval for a binomial proportion
|
||
|
||
Parameters
|
||
----------
|
||
count : {int or float, array_like}
|
||
number of successes, can be pandas Series or DataFrame. Arrays
|
||
must contain integer values if method is "binom_test".
|
||
nobs : {int or float, array_like}
|
||
total number of trials. Arrays must contain integer values if method
|
||
is "binom_test".
|
||
alpha : float
|
||
Significance level, default 0.05. Must be in (0, 1)
|
||
method : {"normal", "agresti_coull", "beta", "wilson", "binom_test"}
|
||
default: "normal"
|
||
method to use for confidence interval. Supported methods:
|
||
|
||
- `normal` : asymptotic normal approximation
|
||
- `agresti_coull` : Agresti-Coull interval
|
||
- `beta` : Clopper-Pearson interval based on Beta distribution
|
||
- `wilson` : Wilson Score interval
|
||
- `jeffreys` : Jeffreys Bayesian Interval
|
||
- `binom_test` : Numerical inversion of binom_test
|
||
|
||
Returns
|
||
-------
|
||
ci_low, ci_upp : {float, ndarray, Series DataFrame}
|
||
lower and upper confidence level with coverage (approximately) 1-alpha.
|
||
When a pandas object is returned, then the index is taken from `count`.
|
||
|
||
Notes
|
||
-----
|
||
Beta, the Clopper-Pearson exact interval has coverage at least 1-alpha,
|
||
but is in general conservative. Most of the other methods have average
|
||
coverage equal to 1-alpha, but will have smaller coverage in some cases.
|
||
|
||
The "beta" and "jeffreys" interval are central, they use alpha/2 in each
|
||
tail, and alpha is not adjusted at the boundaries. In the extreme case
|
||
when `count` is zero or equal to `nobs`, then the coverage will be only
|
||
1 - alpha/2 in the case of "beta".
|
||
|
||
The confidence intervals are clipped to be in the [0, 1] interval in the
|
||
case of "normal" and "agresti_coull".
|
||
|
||
Method "binom_test" directly inverts the binomial test in scipy.stats.
|
||
which has discrete steps.
|
||
|
||
TODO: binom_test intervals raise an exception in small samples if one
|
||
interval bound is close to zero or one.
|
||
|
||
References
|
||
----------
|
||
.. [*] https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval
|
||
|
||
.. [*] Brown, Lawrence D.; Cai, T. Tony; DasGupta, Anirban (2001).
|
||
"Interval Estimation for a Binomial Proportion", Statistical
|
||
Science 16 (2): 101–133. doi:10.1214/ss/1009213286.
|
||
"""
|
||
is_scalar = np.isscalar(count) and np.isscalar(nobs)
|
||
is_pandas = isinstance(count, (pd.Series, pd.DataFrame))
|
||
count_a = array_like(count, "count", optional=False, ndim=None)
|
||
nobs_a = array_like(nobs, "nobs", optional=False, ndim=None)
|
||
|
||
def _check(x: np.ndarray, name: str) -> np.ndarray:
|
||
if np.issubdtype(x.dtype, np.integer):
|
||
return x
|
||
y = x.astype(np.int64, casting="unsafe")
|
||
if np.any(y != x):
|
||
raise ValueError(
|
||
f"{name} must have an integral dtype. Found data with "
|
||
f"dtype {x.dtype}"
|
||
)
|
||
return y
|
||
|
||
if method == "binom_test":
|
||
count_a = _check(np.asarray(count_a), "count")
|
||
nobs_a = _check(np.asarray(nobs_a), "count")
|
||
|
||
q_ = count_a / nobs_a
|
||
alpha_2 = 0.5 * alpha
|
||
|
||
if method == "normal":
|
||
std_ = np.sqrt(q_ * (1 - q_) / nobs_a)
|
||
dist = stats.norm.isf(alpha / 2.0) * std_
|
||
ci_low = q_ - dist
|
||
ci_upp = q_ + dist
|
||
elif method == "binom_test":
|
||
# inverting the binomial test
|
||
def func_factory(count: int, nobs: int) -> Callable[[float], float]:
|
||
if hasattr(stats, "binomtest"):
|
||
|
||
def func(qi):
|
||
return stats.binomtest(count, nobs, p=qi).pvalue - alpha
|
||
|
||
else:
|
||
# Remove after min SciPy >= 1.7
|
||
def func(qi):
|
||
return stats.binom_test(count, nobs, p=qi) - alpha
|
||
|
||
return func
|
||
|
||
bcast = np.broadcast(count_a, nobs_a)
|
||
ci_low = np.zeros(bcast.shape)
|
||
ci_upp = np.zeros(bcast.shape)
|
||
index = bcast.index
|
||
for c, n in bcast:
|
||
# Enforce symmetry
|
||
reverse = False
|
||
_q = q_.flat[index]
|
||
if c > n // 2:
|
||
c = n - c
|
||
reverse = True
|
||
_q = 1 - _q
|
||
func = func_factory(c, n)
|
||
if c == 0:
|
||
ci_low.flat[index] = 0.0
|
||
else:
|
||
lower_bnd = _bound_proportion_confint(func, _q, lower=True)
|
||
val, _z = optimize.brentq(
|
||
func, lower_bnd, _q, full_output=True
|
||
)
|
||
if func(val) > 0:
|
||
power = 10
|
||
new_lb = val - (val - lower_bnd) / 2**power
|
||
while func(new_lb) > 0 and power >= 0:
|
||
power -= 1
|
||
new_lb = val - (val - lower_bnd) / 2**power
|
||
val, _ = _bisection_search_conservative(func, new_lb, _q)
|
||
ci_low.flat[index] = val
|
||
if c == n:
|
||
ci_upp.flat[index] = 1.0
|
||
else:
|
||
upper_bnd = _bound_proportion_confint(func, _q, lower=False)
|
||
val, _z = optimize.brentq(
|
||
func, _q, upper_bnd, full_output=True
|
||
)
|
||
if func(val) > 0:
|
||
power = 10
|
||
new_ub = val + (upper_bnd - val) / 2**power
|
||
while func(new_ub) > 0 and power >= 0:
|
||
power -= 1
|
||
new_ub = val - (upper_bnd - val) / 2**power
|
||
val, _ = _bisection_search_conservative(func, _q, new_ub)
|
||
ci_upp.flat[index] = val
|
||
if reverse:
|
||
temp = ci_upp.flat[index]
|
||
ci_upp.flat[index] = 1 - ci_low.flat[index]
|
||
ci_low.flat[index] = 1 - temp
|
||
index = bcast.index
|
||
elif method == "beta":
|
||
ci_low = stats.beta.ppf(alpha_2, count_a, nobs_a - count_a + 1)
|
||
ci_upp = stats.beta.isf(alpha_2, count_a + 1, nobs_a - count_a)
|
||
|
||
if np.ndim(ci_low) > 0:
|
||
ci_low.flat[q_.flat == 0] = 0
|
||
ci_upp.flat[q_.flat == 1] = 1
|
||
else:
|
||
ci_low = 0 if q_ == 0 else ci_low
|
||
ci_upp = 1 if q_ == 1 else ci_upp
|
||
elif method == "agresti_coull":
|
||
crit = stats.norm.isf(alpha / 2.0)
|
||
nobs_c = nobs_a + crit**2
|
||
q_c = (count_a + crit**2 / 2.0) / nobs_c
|
||
std_c = np.sqrt(q_c * (1.0 - q_c) / nobs_c)
|
||
dist = crit * std_c
|
||
ci_low = q_c - dist
|
||
ci_upp = q_c + dist
|
||
elif method == "wilson":
|
||
crit = stats.norm.isf(alpha / 2.0)
|
||
crit2 = crit**2
|
||
denom = 1 + crit2 / nobs_a
|
||
center = (q_ + crit2 / (2 * nobs_a)) / denom
|
||
dist = crit * np.sqrt(
|
||
q_ * (1.0 - q_) / nobs_a + crit2 / (4.0 * nobs_a**2)
|
||
)
|
||
dist /= denom
|
||
ci_low = center - dist
|
||
ci_upp = center + dist
|
||
# method adjusted to be more forgiving of misspellings or incorrect option name
|
||
elif method[:4] == "jeff":
|
||
ci_low, ci_upp = stats.beta.interval(
|
||
1 - alpha, count_a + 0.5, nobs_a - count_a + 0.5
|
||
)
|
||
else:
|
||
raise NotImplementedError(f"method {method} is not available")
|
||
if method in ["normal", "agresti_coull"]:
|
||
ci_low = np.clip(ci_low, 0, 1)
|
||
ci_upp = np.clip(ci_upp, 0, 1)
|
||
if is_pandas:
|
||
container = pd.Series if isinstance(count, pd.Series) else pd.DataFrame
|
||
ci_low = container(ci_low, index=count.index)
|
||
ci_upp = container(ci_upp, index=count.index)
|
||
if is_scalar:
|
||
return float(ci_low), float(ci_upp)
|
||
return ci_low, ci_upp
|
||
|
||
|
||
def multinomial_proportions_confint(counts, alpha=0.05, method='goodman'):
|
||
"""
|
||
Confidence intervals for multinomial proportions.
|
||
|
||
Parameters
|
||
----------
|
||
counts : array_like of int, 1-D
|
||
Number of observations in each category.
|
||
alpha : float in (0, 1), optional
|
||
Significance level, defaults to 0.05.
|
||
method : {'goodman', 'sison-glaz'}, optional
|
||
Method to use to compute the confidence intervals; available methods
|
||
are:
|
||
|
||
- `goodman`: based on a chi-squared approximation, valid if all
|
||
values in `counts` are greater or equal to 5 [2]_
|
||
- `sison-glaz`: less conservative than `goodman`, but only valid if
|
||
`counts` has 7 or more categories (``len(counts) >= 7``) [3]_
|
||
|
||
Returns
|
||
-------
|
||
confint : ndarray, 2-D
|
||
Array of [lower, upper] confidence levels for each category, such that
|
||
overall coverage is (approximately) `1-alpha`.
|
||
|
||
Raises
|
||
------
|
||
ValueError
|
||
If `alpha` is not in `(0, 1)` (bounds excluded), or if the values in
|
||
`counts` are not all positive or null.
|
||
NotImplementedError
|
||
If `method` is not kown.
|
||
Exception
|
||
When ``method == 'sison-glaz'``, if for some reason `c` cannot be
|
||
computed; this signals a bug and should be reported.
|
||
|
||
Notes
|
||
-----
|
||
The `goodman` method [2]_ is based on approximating a statistic based on
|
||
the multinomial as a chi-squared random variable. The usual recommendation
|
||
is that this is valid if all the values in `counts` are greater than or
|
||
equal to 5. There is no condition on the number of categories for this
|
||
method.
|
||
|
||
The `sison-glaz` method [3]_ approximates the multinomial probabilities,
|
||
and evaluates that with a maximum-likelihood estimator. The first
|
||
approximation is an Edgeworth expansion that converges when the number of
|
||
categories goes to infinity, and the maximum-likelihood estimator converges
|
||
when the number of observations (``sum(counts)``) goes to infinity. In
|
||
their paper, Sison & Glaz demo their method with at least 7 categories, so
|
||
``len(counts) >= 7`` with all values in `counts` at or above 5 can be used
|
||
as a rule of thumb for the validity of this method. This method is less
|
||
conservative than the `goodman` method (i.e. it will yield confidence
|
||
intervals closer to the desired significance level), but produces
|
||
confidence intervals of uniform width over all categories (except when the
|
||
intervals reach 0 or 1, in which case they are truncated), which makes it
|
||
most useful when proportions are of similar magnitude.
|
||
|
||
Aside from the original sources ([1]_, [2]_, and [3]_), the implementation
|
||
uses the formulas (though not the code) presented in [4]_ and [5]_.
|
||
|
||
References
|
||
----------
|
||
.. [1] Levin, Bruce, "A representation for multinomial cumulative
|
||
distribution functions," The Annals of Statistics, Vol. 9, No. 5,
|
||
1981, pp. 1123-1126.
|
||
|
||
.. [2] Goodman, L.A., "On simultaneous confidence intervals for multinomial
|
||
proportions," Technometrics, Vol. 7, No. 2, 1965, pp. 247-254.
|
||
|
||
.. [3] Sison, Cristina P., and Joseph Glaz, "Simultaneous Confidence
|
||
Intervals and Sample Size Determination for Multinomial
|
||
Proportions," Journal of the American Statistical Association,
|
||
Vol. 90, No. 429, 1995, pp. 366-369.
|
||
|
||
.. [4] May, Warren L., and William D. Johnson, "A SAS® macro for
|
||
constructing simultaneous confidence intervals for multinomial
|
||
proportions," Computer methods and programs in Biomedicine, Vol. 53,
|
||
No. 3, 1997, pp. 153-162.
|
||
|
||
.. [5] May, Warren L., and William D. Johnson, "Constructing two-sided
|
||
simultaneous confidence intervals for multinomial proportions for
|
||
small counts in a large number of cells," Journal of Statistical
|
||
Software, Vol. 5, No. 6, 2000, pp. 1-24.
|
||
"""
|
||
if alpha <= 0 or alpha >= 1:
|
||
raise ValueError('alpha must be in (0, 1), bounds excluded')
|
||
counts = np.array(counts, dtype=float)
|
||
if (counts < 0).any():
|
||
raise ValueError('counts must be >= 0')
|
||
|
||
n = counts.sum()
|
||
k = len(counts)
|
||
proportions = counts / n
|
||
if method == 'goodman':
|
||
chi2 = stats.chi2.ppf(1 - alpha / k, 1)
|
||
delta = chi2 ** 2 + (4 * n * proportions * chi2 * (1 - proportions))
|
||
region = ((2 * n * proportions + chi2 +
|
||
np.array([- np.sqrt(delta), np.sqrt(delta)])) /
|
||
(2 * (chi2 + n))).T
|
||
elif method[:5] == 'sison': # We accept any name starting with 'sison'
|
||
# Define a few functions we'll use a lot.
|
||
def poisson_interval(interval, p):
|
||
"""
|
||
Compute P(b <= Z <= a) where Z ~ Poisson(p) and
|
||
`interval = (b, a)`.
|
||
"""
|
||
b, a = interval
|
||
prob = stats.poisson.cdf(a, p) - stats.poisson.cdf(b - 1, p)
|
||
return prob
|
||
|
||
def truncated_poisson_factorial_moment(interval, r, p):
|
||
"""
|
||
Compute mu_r, the r-th factorial moment of a poisson random
|
||
variable of parameter `p` truncated to `interval = (b, a)`.
|
||
"""
|
||
b, a = interval
|
||
return p ** r * (1 - ((poisson_interval((a - r + 1, a), p) -
|
||
poisson_interval((b - r, b - 1), p)) /
|
||
poisson_interval((b, a), p)))
|
||
|
||
def edgeworth(intervals):
|
||
"""
|
||
Compute the Edgeworth expansion term of Sison & Glaz's formula
|
||
(1) (approximated probability for multinomial proportions in a
|
||
given box).
|
||
"""
|
||
# Compute means and central moments of the truncated poisson
|
||
# variables.
|
||
mu_r1, mu_r2, mu_r3, mu_r4 = (
|
||
np.array([truncated_poisson_factorial_moment(interval, r, p)
|
||
for (interval, p) in zip(intervals, counts)])
|
||
for r in range(1, 5)
|
||
)
|
||
mu = mu_r1
|
||
mu2 = mu_r2 + mu - mu ** 2
|
||
mu3 = mu_r3 + mu_r2 * (3 - 3 * mu) + mu - 3 * mu ** 2 + 2 * mu ** 3
|
||
mu4 = (mu_r4 + mu_r3 * (6 - 4 * mu) +
|
||
mu_r2 * (7 - 12 * mu + 6 * mu ** 2) +
|
||
mu - 4 * mu ** 2 + 6 * mu ** 3 - 3 * mu ** 4)
|
||
|
||
# Compute expansion factors, gamma_1 and gamma_2.
|
||
g1 = mu3.sum() / mu2.sum() ** 1.5
|
||
g2 = (mu4.sum() - 3 * (mu2 ** 2).sum()) / mu2.sum() ** 2
|
||
|
||
# Compute the expansion itself.
|
||
x = (n - mu.sum()) / np.sqrt(mu2.sum())
|
||
phi = np.exp(- x ** 2 / 2) / np.sqrt(2 * np.pi)
|
||
H3 = x ** 3 - 3 * x
|
||
H4 = x ** 4 - 6 * x ** 2 + 3
|
||
H6 = x ** 6 - 15 * x ** 4 + 45 * x ** 2 - 15
|
||
f = phi * (1 + g1 * H3 / 6 + g2 * H4 / 24 + g1 ** 2 * H6 / 72)
|
||
return f / np.sqrt(mu2.sum())
|
||
|
||
|
||
def approximated_multinomial_interval(intervals):
|
||
"""
|
||
Compute approximated probability for Multinomial(n, proportions)
|
||
to be in `intervals` (Sison & Glaz's formula (1)).
|
||
"""
|
||
return np.exp(
|
||
np.sum(np.log([poisson_interval(interval, p)
|
||
for (interval, p) in zip(intervals, counts)])) +
|
||
np.log(edgeworth(intervals)) -
|
||
np.log(stats.poisson._pmf(n, n))
|
||
)
|
||
|
||
def nu(c):
|
||
"""
|
||
Compute interval coverage for a given `c` (Sison & Glaz's
|
||
formula (7)).
|
||
"""
|
||
return approximated_multinomial_interval(
|
||
[(np.maximum(count - c, 0), np.minimum(count + c, n))
|
||
for count in counts])
|
||
|
||
# Find the value of `c` that will give us the confidence intervals
|
||
# (solving nu(c) <= 1 - alpha < nu(c + 1).
|
||
c = 1.0
|
||
nuc = nu(c)
|
||
nucp1 = nu(c + 1)
|
||
while not (nuc <= (1 - alpha) < nucp1):
|
||
if c > n:
|
||
raise Exception("Couldn't find a value for `c` that "
|
||
"solves nu(c) <= 1 - alpha < nu(c + 1)")
|
||
c += 1
|
||
nuc = nucp1
|
||
nucp1 = nu(c + 1)
|
||
|
||
# Compute gamma and the corresponding confidence intervals.
|
||
g = (1 - alpha - nuc) / (nucp1 - nuc)
|
||
ci_lower = np.maximum(proportions - c / n, 0)
|
||
ci_upper = np.minimum(proportions + (c + 2 * g) / n, 1)
|
||
region = np.array([ci_lower, ci_upper]).T
|
||
else:
|
||
raise NotImplementedError('method "%s" is not available' % method)
|
||
return region
|
||
|
||
|
||
def samplesize_confint_proportion(proportion, half_length, alpha=0.05,
|
||
method='normal'):
|
||
"""
|
||
Find sample size to get desired confidence interval length
|
||
|
||
Parameters
|
||
----------
|
||
proportion : float in (0, 1)
|
||
proportion or quantile
|
||
half_length : float in (0, 1)
|
||
desired half length of the confidence interval
|
||
alpha : float in (0, 1)
|
||
significance level, default 0.05,
|
||
coverage of the two-sided interval is (approximately) ``1 - alpha``
|
||
method : str in ['normal']
|
||
method to use for confidence interval,
|
||
currently only normal approximation
|
||
|
||
Returns
|
||
-------
|
||
n : float
|
||
sample size to get the desired half length of the confidence interval
|
||
|
||
Notes
|
||
-----
|
||
this is mainly to store the formula.
|
||
possible application: number of replications in bootstrap samples
|
||
|
||
"""
|
||
q_ = proportion
|
||
if method == 'normal':
|
||
n = q_ * (1 - q_) / (half_length / stats.norm.isf(alpha / 2.))**2
|
||
else:
|
||
raise NotImplementedError('only "normal" is available')
|
||
|
||
return n
|
||
|
||
|
||
def proportion_effectsize(prop1, prop2, method='normal'):
|
||
"""
|
||
Effect size for a test comparing two proportions
|
||
|
||
for use in power function
|
||
|
||
Parameters
|
||
----------
|
||
prop1, prop2 : float or array_like
|
||
The proportion value(s).
|
||
|
||
Returns
|
||
-------
|
||
es : float or ndarray
|
||
effect size for (transformed) prop1 - prop2
|
||
|
||
Notes
|
||
-----
|
||
only method='normal' is implemented to match pwr.p2.test
|
||
see http://www.statmethods.net/stats/power.html
|
||
|
||
Effect size for `normal` is defined as ::
|
||
|
||
2 * (arcsin(sqrt(prop1)) - arcsin(sqrt(prop2)))
|
||
|
||
I think other conversions to normality can be used, but I need to check.
|
||
|
||
Examples
|
||
--------
|
||
>>> import statsmodels.api as sm
|
||
>>> sm.stats.proportion_effectsize(0.5, 0.4)
|
||
0.20135792079033088
|
||
>>> sm.stats.proportion_effectsize([0.3, 0.4, 0.5], 0.4)
|
||
array([-0.21015893, 0. , 0.20135792])
|
||
|
||
"""
|
||
if method != 'normal':
|
||
raise ValueError('only "normal" is implemented')
|
||
|
||
es = 2 * (np.arcsin(np.sqrt(prop1)) - np.arcsin(np.sqrt(prop2)))
|
||
return es
|
||
|
||
|
||
def std_prop(prop, nobs):
|
||
"""
|
||
Standard error for the estimate of a proportion
|
||
|
||
This is just ``np.sqrt(p * (1. - p) / nobs)``
|
||
|
||
Parameters
|
||
----------
|
||
prop : array_like
|
||
proportion
|
||
nobs : int, array_like
|
||
number of observations
|
||
|
||
Returns
|
||
-------
|
||
std : array_like
|
||
standard error for a proportion of nobs independent observations
|
||
"""
|
||
return np.sqrt(prop * (1. - prop) / nobs)
|
||
|
||
|
||
def _std_diff_prop(p1, p2, ratio=1):
|
||
return np.sqrt(p1 * (1 - p1) + p2 * (1 - p2) / ratio)
|
||
|
||
|
||
def _power_ztost(mean_low, var_low, mean_upp, var_upp, mean_alt, var_alt,
|
||
alpha=0.05, discrete=True, dist='norm', nobs=None,
|
||
continuity=0, critval_continuity=0):
|
||
"""
|
||
Generic statistical power function for normal based equivalence test
|
||
|
||
This includes options to adjust the normal approximation and can use
|
||
the binomial to evaluate the probability of the rejection region
|
||
|
||
see power_ztost_prob for a description of the options
|
||
"""
|
||
# TODO: refactor structure, separate norm and binom better
|
||
if not isinstance(continuity, tuple):
|
||
continuity = (continuity, continuity)
|
||
crit = stats.norm.isf(alpha)
|
||
k_low = mean_low + np.sqrt(var_low) * crit
|
||
k_upp = mean_upp - np.sqrt(var_upp) * crit
|
||
if discrete or dist == 'binom':
|
||
k_low = np.ceil(k_low * nobs + 0.5 * critval_continuity)
|
||
k_upp = np.trunc(k_upp * nobs - 0.5 * critval_continuity)
|
||
if dist == 'norm':
|
||
#need proportion
|
||
k_low = (k_low) * 1. / nobs #-1 to match PASS
|
||
k_upp = k_upp * 1. / nobs
|
||
# else:
|
||
# if dist == 'binom':
|
||
# #need counts
|
||
# k_low *= nobs
|
||
# k_upp *= nobs
|
||
#print mean_low, np.sqrt(var_low), crit, var_low
|
||
#print mean_upp, np.sqrt(var_upp), crit, var_upp
|
||
if np.any(k_low > k_upp): #vectorize
|
||
import warnings
|
||
warnings.warn("no overlap, power is zero", HypothesisTestWarning)
|
||
std_alt = np.sqrt(var_alt)
|
||
z_low = (k_low - mean_alt - continuity[0] * 0.5 / nobs) / std_alt
|
||
z_upp = (k_upp - mean_alt + continuity[1] * 0.5 / nobs) / std_alt
|
||
if dist == 'norm':
|
||
power = stats.norm.cdf(z_upp) - stats.norm.cdf(z_low)
|
||
elif dist == 'binom':
|
||
power = (stats.binom.cdf(k_upp, nobs, mean_alt) -
|
||
stats.binom.cdf(k_low-1, nobs, mean_alt))
|
||
return power, (k_low, k_upp, z_low, z_upp)
|
||
|
||
|
||
def binom_tost(count, nobs, low, upp):
|
||
"""
|
||
Exact TOST test for one proportion using binomial distribution
|
||
|
||
Parameters
|
||
----------
|
||
count : {int, array_like}
|
||
the number of successes in nobs trials.
|
||
nobs : int
|
||
the number of trials or observations.
|
||
low, upp : floats
|
||
lower and upper limit of equivalence region
|
||
|
||
Returns
|
||
-------
|
||
pvalue : float
|
||
p-value of equivalence test
|
||
pval_low, pval_upp : floats
|
||
p-values of lower and upper one-sided tests
|
||
|
||
"""
|
||
# binom_test_stat only returns pval
|
||
tt1 = binom_test(count, nobs, alternative='larger', prop=low)
|
||
tt2 = binom_test(count, nobs, alternative='smaller', prop=upp)
|
||
return np.maximum(tt1, tt2), tt1, tt2,
|
||
|
||
|
||
def binom_tost_reject_interval(low, upp, nobs, alpha=0.05):
|
||
"""
|
||
Rejection region for binomial TOST
|
||
|
||
The interval includes the end points,
|
||
`reject` if and only if `r_low <= x <= r_upp`.
|
||
|
||
The interval might be empty with `r_upp < r_low`.
|
||
|
||
Parameters
|
||
----------
|
||
low, upp : floats
|
||
lower and upper limit of equivalence region
|
||
nobs : int
|
||
the number of trials or observations.
|
||
|
||
Returns
|
||
-------
|
||
x_low, x_upp : float
|
||
lower and upper bound of rejection region
|
||
|
||
"""
|
||
x_low = stats.binom.isf(alpha, nobs, low) + 1
|
||
x_upp = stats.binom.ppf(alpha, nobs, upp) - 1
|
||
return x_low, x_upp
|
||
|
||
|
||
def binom_test_reject_interval(value, nobs, alpha=0.05, alternative='two-sided'):
|
||
"""
|
||
Rejection region for binomial test for one sample proportion
|
||
|
||
The interval includes the end points of the rejection region.
|
||
|
||
Parameters
|
||
----------
|
||
value : float
|
||
proportion under the Null hypothesis
|
||
nobs : int
|
||
the number of trials or observations.
|
||
|
||
Returns
|
||
-------
|
||
x_low, x_upp : int
|
||
lower and upper bound of rejection region
|
||
"""
|
||
if alternative in ['2s', 'two-sided']:
|
||
alternative = '2s' # normalize alternative name
|
||
alpha = alpha / 2
|
||
|
||
if alternative in ['2s', 'smaller']:
|
||
x_low = stats.binom.ppf(alpha, nobs, value) - 1
|
||
else:
|
||
x_low = 0
|
||
if alternative in ['2s', 'larger']:
|
||
x_upp = stats.binom.isf(alpha, nobs, value) + 1
|
||
else :
|
||
x_upp = nobs
|
||
|
||
return int(x_low), int(x_upp)
|
||
|
||
|
||
def binom_test(count, nobs, prop=0.5, alternative='two-sided'):
|
||
"""
|
||
Perform a test that the probability of success is p.
|
||
|
||
This is an exact, two-sided test of the null hypothesis
|
||
that the probability of success in a Bernoulli experiment
|
||
is `p`.
|
||
|
||
Parameters
|
||
----------
|
||
count : {int, array_like}
|
||
the number of successes in nobs trials.
|
||
nobs : int
|
||
the number of trials or observations.
|
||
prop : float, optional
|
||
The probability of success under the null hypothesis,
|
||
`0 <= prop <= 1`. The default value is `prop = 0.5`
|
||
alternative : str in ['two-sided', 'smaller', 'larger']
|
||
alternative hypothesis, which can be two-sided or either one of the
|
||
one-sided tests.
|
||
|
||
Returns
|
||
-------
|
||
p-value : float
|
||
The p-value of the hypothesis test
|
||
|
||
Notes
|
||
-----
|
||
This uses scipy.stats.binom_test for the two-sided alternative.
|
||
"""
|
||
|
||
if np.any(prop > 1.0) or np.any(prop < 0.0):
|
||
raise ValueError("p must be in range [0,1]")
|
||
if alternative in ['2s', 'two-sided']:
|
||
try:
|
||
pval = stats.binomtest(count, n=nobs, p=prop).pvalue
|
||
except AttributeError:
|
||
# Remove after min SciPy >= 1.7
|
||
pval = stats.binom_test(count, n=nobs, p=prop)
|
||
elif alternative in ['l', 'larger']:
|
||
pval = stats.binom.sf(count-1, nobs, prop)
|
||
elif alternative in ['s', 'smaller']:
|
||
pval = stats.binom.cdf(count, nobs, prop)
|
||
else:
|
||
raise ValueError('alternative not recognized\n'
|
||
'should be two-sided, larger or smaller')
|
||
return pval
|
||
|
||
|
||
def power_binom_tost(low, upp, nobs, p_alt=None, alpha=0.05):
|
||
if p_alt is None:
|
||
p_alt = 0.5 * (low + upp)
|
||
x_low, x_upp = binom_tost_reject_interval(low, upp, nobs, alpha=alpha)
|
||
power = (stats.binom.cdf(x_upp, nobs, p_alt) -
|
||
stats.binom.cdf(x_low-1, nobs, p_alt))
|
||
return power
|
||
|
||
|
||
def power_ztost_prop(low, upp, nobs, p_alt, alpha=0.05, dist='norm',
|
||
variance_prop=None, discrete=True, continuity=0,
|
||
critval_continuity=0):
|
||
"""
|
||
Power of proportions equivalence test based on normal distribution
|
||
|
||
Parameters
|
||
----------
|
||
low, upp : floats
|
||
lower and upper limit of equivalence region
|
||
nobs : int
|
||
number of observations
|
||
p_alt : float in (0,1)
|
||
proportion under the alternative
|
||
alpha : float in (0,1)
|
||
significance level of the test
|
||
dist : str in ['norm', 'binom']
|
||
This defines the distribution to evaluate the power of the test. The
|
||
critical values of the TOST test are always based on the normal
|
||
approximation, but the distribution for the power can be either the
|
||
normal (default) or the binomial (exact) distribution.
|
||
variance_prop : None or float in (0,1)
|
||
If this is None, then the variances for the two one sided tests are
|
||
based on the proportions equal to the equivalence limits.
|
||
If variance_prop is given, then it is used to calculate the variance
|
||
for the TOST statistics. If this is based on an sample, then the
|
||
estimated proportion can be used.
|
||
discrete : bool
|
||
If true, then the critical values of the rejection region are converted
|
||
to integers. If dist is "binom", this is automatically assumed.
|
||
If discrete is false, then the TOST critical values are used as
|
||
floating point numbers, and the power is calculated based on the
|
||
rejection region that is not discretized.
|
||
continuity : bool or float
|
||
adjust the rejection region for the normal power probability. This has
|
||
and effect only if ``dist='norm'``
|
||
critval_continuity : bool or float
|
||
If this is non-zero, then the critical values of the tost rejection
|
||
region are adjusted before converting to integers. This affects both
|
||
distributions, ``dist='norm'`` and ``dist='binom'``.
|
||
|
||
Returns
|
||
-------
|
||
power : float
|
||
statistical power of the equivalence test.
|
||
(k_low, k_upp, z_low, z_upp) : tuple of floats
|
||
critical limits in intermediate steps
|
||
temporary return, will be changed
|
||
|
||
Notes
|
||
-----
|
||
In small samples the power for the ``discrete`` version, has a sawtooth
|
||
pattern as a function of the number of observations. As a consequence,
|
||
small changes in the number of observations or in the normal approximation
|
||
can have a large effect on the power.
|
||
|
||
``continuity`` and ``critval_continuity`` are added to match some results
|
||
of PASS, and are mainly to investigate the sensitivity of the ztost power
|
||
to small changes in the rejection region. From my interpretation of the
|
||
equations in the SAS manual, both are zero in SAS.
|
||
|
||
works vectorized
|
||
|
||
**verification:**
|
||
|
||
The ``dist='binom'`` results match PASS,
|
||
The ``dist='norm'`` results look reasonable, but no benchmark is available.
|
||
|
||
References
|
||
----------
|
||
SAS Manual: Chapter 68: The Power Procedure, Computational Resources
|
||
PASS Chapter 110: Equivalence Tests for One Proportion.
|
||
|
||
"""
|
||
mean_low = low
|
||
var_low = std_prop(low, nobs)**2
|
||
mean_upp = upp
|
||
var_upp = std_prop(upp, nobs)**2
|
||
mean_alt = p_alt
|
||
var_alt = std_prop(p_alt, nobs)**2
|
||
if variance_prop is not None:
|
||
var_low = var_upp = std_prop(variance_prop, nobs)**2
|
||
power = _power_ztost(mean_low, var_low, mean_upp, var_upp, mean_alt, var_alt,
|
||
alpha=alpha, discrete=discrete, dist=dist, nobs=nobs,
|
||
continuity=continuity, critval_continuity=critval_continuity)
|
||
return np.maximum(power[0], 0), power[1:]
|
||
|
||
|
||
def _table_proportion(count, nobs):
|
||
"""
|
||
Create a k by 2 contingency table for proportion
|
||
|
||
helper function for proportions_chisquare
|
||
|
||
Parameters
|
||
----------
|
||
count : {int, array_like}
|
||
the number of successes in nobs trials.
|
||
nobs : int
|
||
the number of trials or observations.
|
||
|
||
Returns
|
||
-------
|
||
table : ndarray
|
||
(k, 2) contingency table
|
||
|
||
Notes
|
||
-----
|
||
recent scipy has more elaborate contingency table functions
|
||
|
||
"""
|
||
count = np.asarray(count)
|
||
dt = np.promote_types(count.dtype, np.float64)
|
||
count = np.asarray(count, dtype=dt)
|
||
table = np.column_stack((count, nobs - count))
|
||
expected = table.sum(0) * table.sum(1)[:, None] * 1. / table.sum()
|
||
n_rows = table.shape[0]
|
||
return table, expected, n_rows
|
||
|
||
|
||
def proportions_ztest(count, nobs, value=None, alternative='two-sided',
|
||
prop_var=False):
|
||
"""
|
||
Test for proportions based on normal (z) test
|
||
|
||
Parameters
|
||
----------
|
||
count : {int, array_like}
|
||
the number of successes in nobs trials. If this is array_like, then
|
||
the assumption is that this represents the number of successes for
|
||
each independent sample
|
||
nobs : {int, array_like}
|
||
the number of trials or observations, with the same length as
|
||
count.
|
||
value : float, array_like or None, optional
|
||
This is the value of the null hypothesis equal to the proportion in the
|
||
case of a one sample test. In the case of a two-sample test, the
|
||
null hypothesis is that prop[0] - prop[1] = value, where prop is the
|
||
proportion in the two samples. If not provided value = 0 and the null
|
||
is prop[0] = prop[1]
|
||
alternative : str in ['two-sided', 'smaller', 'larger']
|
||
The alternative hypothesis can be either two-sided or one of the one-
|
||
sided tests, smaller means that the alternative hypothesis is
|
||
``prop < value`` and larger means ``prop > value``. In the two sample
|
||
test, smaller means that the alternative hypothesis is ``p1 < p2`` and
|
||
larger means ``p1 > p2`` where ``p1`` is the proportion of the first
|
||
sample and ``p2`` of the second one.
|
||
prop_var : False or float in (0, 1)
|
||
If prop_var is false, then the variance of the proportion estimate is
|
||
calculated based on the sample proportion. Alternatively, a proportion
|
||
can be specified to calculate this variance. Common use case is to
|
||
use the proportion under the Null hypothesis to specify the variance
|
||
of the proportion estimate.
|
||
|
||
Returns
|
||
-------
|
||
zstat : float
|
||
test statistic for the z-test
|
||
p-value : float
|
||
p-value for the z-test
|
||
|
||
Examples
|
||
--------
|
||
>>> count = 5
|
||
>>> nobs = 83
|
||
>>> value = .05
|
||
>>> stat, pval = proportions_ztest(count, nobs, value)
|
||
>>> print('{0:0.3f}'.format(pval))
|
||
0.695
|
||
|
||
>>> import numpy as np
|
||
>>> from statsmodels.stats.proportion import proportions_ztest
|
||
>>> count = np.array([5, 12])
|
||
>>> nobs = np.array([83, 99])
|
||
>>> stat, pval = proportions_ztest(count, nobs)
|
||
>>> print('{0:0.3f}'.format(pval))
|
||
0.159
|
||
|
||
Notes
|
||
-----
|
||
This uses a simple normal test for proportions. It should be the same as
|
||
running the mean z-test on the data encoded 1 for event and 0 for no event
|
||
so that the sum corresponds to the count.
|
||
|
||
In the one and two sample cases with two-sided alternative, this test
|
||
produces the same p-value as ``proportions_chisquare``, since the
|
||
chisquare is the distribution of the square of a standard normal
|
||
distribution.
|
||
"""
|
||
# TODO: verify that this really holds
|
||
# TODO: add continuity correction or other improvements for small samples
|
||
# TODO: change options similar to propotion_ztost ?
|
||
|
||
count = np.asarray(count)
|
||
nobs = np.asarray(nobs)
|
||
|
||
if nobs.size == 1:
|
||
nobs = nobs * np.ones_like(count)
|
||
|
||
prop = count * 1. / nobs
|
||
k_sample = np.size(prop)
|
||
if value is None:
|
||
if k_sample == 1:
|
||
raise ValueError('value must be provided for a 1-sample test')
|
||
value = 0
|
||
if k_sample == 1:
|
||
diff = prop - value
|
||
elif k_sample == 2:
|
||
diff = prop[0] - prop[1] - value
|
||
else:
|
||
msg = 'more than two samples are not implemented yet'
|
||
raise NotImplementedError(msg)
|
||
|
||
p_pooled = np.sum(count) * 1. / np.sum(nobs)
|
||
|
||
nobs_fact = np.sum(1. / nobs)
|
||
if prop_var:
|
||
p_pooled = prop_var
|
||
var_ = p_pooled * (1 - p_pooled) * nobs_fact
|
||
std_diff = np.sqrt(var_)
|
||
from statsmodels.stats.weightstats import _zstat_generic2
|
||
return _zstat_generic2(diff, std_diff, alternative)
|
||
|
||
|
||
def proportions_ztost(count, nobs, low, upp, prop_var='sample'):
|
||
"""
|
||
Equivalence test based on normal distribution
|
||
|
||
Parameters
|
||
----------
|
||
count : {int, array_like}
|
||
the number of successes in nobs trials. If this is array_like, then
|
||
the assumption is that this represents the number of successes for
|
||
each independent sample
|
||
nobs : int
|
||
the number of trials or observations, with the same length as
|
||
count.
|
||
low, upp : float
|
||
equivalence interval low < prop1 - prop2 < upp
|
||
prop_var : str or float in (0, 1)
|
||
prop_var determines which proportion is used for the calculation
|
||
of the standard deviation of the proportion estimate
|
||
The available options for string are 'sample' (default), 'null' and
|
||
'limits'. If prop_var is a float, then it is used directly.
|
||
|
||
Returns
|
||
-------
|
||
pvalue : float
|
||
pvalue of the non-equivalence test
|
||
t1, pv1 : tuple of floats
|
||
test statistic and pvalue for lower threshold test
|
||
t2, pv2 : tuple of floats
|
||
test statistic and pvalue for upper threshold test
|
||
|
||
Notes
|
||
-----
|
||
checked only for 1 sample case
|
||
|
||
"""
|
||
if prop_var == 'limits':
|
||
prop_var_low = low
|
||
prop_var_upp = upp
|
||
elif prop_var == 'sample':
|
||
prop_var_low = prop_var_upp = False #ztest uses sample
|
||
elif prop_var == 'null':
|
||
prop_var_low = prop_var_upp = 0.5 * (low + upp)
|
||
elif np.isreal(prop_var):
|
||
prop_var_low = prop_var_upp = prop_var
|
||
|
||
tt1 = proportions_ztest(count, nobs, alternative='larger',
|
||
prop_var=prop_var_low, value=low)
|
||
tt2 = proportions_ztest(count, nobs, alternative='smaller',
|
||
prop_var=prop_var_upp, value=upp)
|
||
return np.maximum(tt1[1], tt2[1]), tt1, tt2,
|
||
|
||
|
||
def proportions_chisquare(count, nobs, value=None):
|
||
"""
|
||
Test for proportions based on chisquare test
|
||
|
||
Parameters
|
||
----------
|
||
count : {int, array_like}
|
||
the number of successes in nobs trials. If this is array_like, then
|
||
the assumption is that this represents the number of successes for
|
||
each independent sample
|
||
nobs : int
|
||
the number of trials or observations, with the same length as
|
||
count.
|
||
value : None or float or array_like
|
||
|
||
Returns
|
||
-------
|
||
chi2stat : float
|
||
test statistic for the chisquare test
|
||
p-value : float
|
||
p-value for the chisquare test
|
||
(table, expected)
|
||
table is a (k, 2) contingency table, ``expected`` is the corresponding
|
||
table of counts that are expected under independence with given
|
||
margins
|
||
|
||
Notes
|
||
-----
|
||
Recent version of scipy.stats have a chisquare test for independence in
|
||
contingency tables.
|
||
|
||
This function provides a similar interface to chisquare tests as
|
||
``prop.test`` in R, however without the option for Yates continuity
|
||
correction.
|
||
|
||
count can be the count for the number of events for a single proportion,
|
||
or the counts for several independent proportions. If value is given, then
|
||
all proportions are jointly tested against this value. If value is not
|
||
given and count and nobs are not scalar, then the null hypothesis is
|
||
that all samples have the same proportion.
|
||
|
||
"""
|
||
nobs = np.atleast_1d(nobs)
|
||
table, expected, n_rows = _table_proportion(count, nobs)
|
||
if value is not None:
|
||
expected = np.column_stack((nobs * value, nobs * (1 - value)))
|
||
ddof = n_rows - 1
|
||
else:
|
||
ddof = n_rows
|
||
|
||
#print table, expected
|
||
chi2stat, pval = stats.chisquare(table.ravel(), expected.ravel(),
|
||
ddof=ddof)
|
||
return chi2stat, pval, (table, expected)
|
||
|
||
|
||
def proportions_chisquare_allpairs(count, nobs, multitest_method='hs'):
|
||
"""
|
||
Chisquare test of proportions for all pairs of k samples
|
||
|
||
Performs a chisquare test for proportions for all pairwise comparisons.
|
||
The alternative is two-sided
|
||
|
||
Parameters
|
||
----------
|
||
count : {int, array_like}
|
||
the number of successes in nobs trials.
|
||
nobs : int
|
||
the number of trials or observations.
|
||
multitest_method : str
|
||
This chooses the method for the multiple testing p-value correction,
|
||
that is used as default in the results.
|
||
It can be any method that is available in ``multipletesting``.
|
||
The default is Holm-Sidak 'hs'.
|
||
|
||
Returns
|
||
-------
|
||
result : AllPairsResults instance
|
||
The returned results instance has several statistics, such as p-values,
|
||
attached, and additional methods for using a non-default
|
||
``multitest_method``.
|
||
|
||
Notes
|
||
-----
|
||
Yates continuity correction is not available.
|
||
"""
|
||
#all_pairs = lmap(list, lzip(*np.triu_indices(4, 1)))
|
||
all_pairs = lzip(*np.triu_indices(len(count), 1))
|
||
pvals = [proportions_chisquare(count[list(pair)], nobs[list(pair)])[1]
|
||
for pair in all_pairs]
|
||
return AllPairsResults(pvals, all_pairs, multitest_method=multitest_method)
|
||
|
||
|
||
def proportions_chisquare_pairscontrol(count, nobs, value=None,
|
||
multitest_method='hs', alternative='two-sided'):
|
||
"""
|
||
Chisquare test of proportions for pairs of k samples compared to control
|
||
|
||
Performs a chisquare test for proportions for pairwise comparisons with a
|
||
control (Dunnet's test). The control is assumed to be the first element
|
||
of ``count`` and ``nobs``. The alternative is two-sided, larger or
|
||
smaller.
|
||
|
||
Parameters
|
||
----------
|
||
count : {int, array_like}
|
||
the number of successes in nobs trials.
|
||
nobs : int
|
||
the number of trials or observations.
|
||
multitest_method : str
|
||
This chooses the method for the multiple testing p-value correction,
|
||
that is used as default in the results.
|
||
It can be any method that is available in ``multipletesting``.
|
||
The default is Holm-Sidak 'hs'.
|
||
alternative : str in ['two-sided', 'smaller', 'larger']
|
||
alternative hypothesis, which can be two-sided or either one of the
|
||
one-sided tests.
|
||
|
||
Returns
|
||
-------
|
||
result : AllPairsResults instance
|
||
The returned results instance has several statistics, such as p-values,
|
||
attached, and additional methods for using a non-default
|
||
``multitest_method``.
|
||
|
||
|
||
Notes
|
||
-----
|
||
Yates continuity correction is not available.
|
||
|
||
``value`` and ``alternative`` options are not yet implemented.
|
||
|
||
"""
|
||
if (value is not None) or (alternative not in ['two-sided', '2s']):
|
||
raise NotImplementedError
|
||
#all_pairs = lmap(list, lzip(*np.triu_indices(4, 1)))
|
||
all_pairs = [(0, k) for k in range(1, len(count))]
|
||
pvals = [proportions_chisquare(count[list(pair)], nobs[list(pair)],
|
||
#alternative=alternative)[1]
|
||
)[1]
|
||
for pair in all_pairs]
|
||
return AllPairsResults(pvals, all_pairs, multitest_method=multitest_method)
|
||
|
||
|
||
def confint_proportions_2indep(count1, nobs1, count2, nobs2, method=None,
|
||
compare='diff', alpha=0.05, correction=True):
|
||
"""
|
||
Confidence intervals for comparing two independent proportions.
|
||
|
||
This assumes that we have two independent binomial samples.
|
||
|
||
Parameters
|
||
----------
|
||
count1, nobs1 : float
|
||
Count and sample size for first sample.
|
||
count2, nobs2 : float
|
||
Count and sample size for the second sample.
|
||
method : str
|
||
Method for computing confidence interval. If method is None, then a
|
||
default method is used. The default might change as more methods are
|
||
added.
|
||
|
||
diff:
|
||
- 'wald',
|
||
- 'agresti-caffo'
|
||
- 'newcomb' (default)
|
||
- 'score'
|
||
|
||
ratio:
|
||
- 'log'
|
||
- 'log-adjusted' (default)
|
||
- 'score'
|
||
|
||
odds-ratio:
|
||
- 'logit'
|
||
- 'logit-adjusted' (default)
|
||
- 'score'
|
||
|
||
compare : string in ['diff', 'ratio' 'odds-ratio']
|
||
If compare is diff, then the confidence interval is for diff = p1 - p2.
|
||
If compare is ratio, then the confidence interval is for the risk ratio
|
||
defined by ratio = p1 / p2.
|
||
If compare is odds-ratio, then the confidence interval is for the
|
||
odds-ratio defined by or = p1 / (1 - p1) / (p2 / (1 - p2).
|
||
alpha : float
|
||
Significance level for the confidence interval, default is 0.05.
|
||
The nominal coverage probability is 1 - alpha.
|
||
|
||
Returns
|
||
-------
|
||
low, upp
|
||
|
||
See Also
|
||
--------
|
||
test_proportions_2indep
|
||
tost_proportions_2indep
|
||
|
||
Notes
|
||
-----
|
||
Status: experimental, API and defaults might still change.
|
||
more ``methods`` will be added.
|
||
|
||
References
|
||
----------
|
||
.. [1] Fagerland, Morten W., Stian Lydersen, and Petter Laake. 2015.
|
||
“Recommended Confidence Intervals for Two Independent Binomial
|
||
Proportions.” Statistical Methods in Medical Research 24 (2): 224–54.
|
||
https://doi.org/10.1177/0962280211415469.
|
||
.. [2] Koopman, P. A. R. 1984. “Confidence Intervals for the Ratio of Two
|
||
Binomial Proportions.” Biometrics 40 (2): 513–17.
|
||
https://doi.org/10.2307/2531405.
|
||
.. [3] Miettinen, Olli, and Markku Nurminen. "Comparative analysis of two
|
||
rates." Statistics in medicine 4, no. 2 (1985): 213-226.
|
||
.. [4] Newcombe, Robert G. 1998. “Interval Estimation for the Difference
|
||
between Independent Proportions: Comparison of Eleven Methods.”
|
||
Statistics in Medicine 17 (8): 873–90.
|
||
https://doi.org/10.1002/(SICI)1097-0258(19980430)17:8<873::AID-
|
||
SIM779>3.0.CO;2-I.
|
||
.. [5] Newcombe, Robert G., and Markku M. Nurminen. 2011. “In Defence of
|
||
Score Intervals for Proportions and Their Differences.” Communications
|
||
in Statistics - Theory and Methods 40 (7): 1271–82.
|
||
https://doi.org/10.1080/03610920903576580.
|
||
"""
|
||
method_default = {'diff': 'newcomb',
|
||
'ratio': 'log-adjusted',
|
||
'odds-ratio': 'logit-adjusted'}
|
||
# normalize compare name
|
||
if compare.lower() == 'or':
|
||
compare = 'odds-ratio'
|
||
if method is None:
|
||
method = method_default[compare]
|
||
|
||
method = method.lower()
|
||
if method.startswith('agr'):
|
||
method = 'agresti-caffo'
|
||
|
||
p1 = count1 / nobs1
|
||
p2 = count2 / nobs2
|
||
diff = p1 - p2
|
||
addone = 1 if method == 'agresti-caffo' else 0
|
||
|
||
if compare == 'diff':
|
||
if method in ['wald', 'agresti-caffo']:
|
||
count1_, nobs1_ = count1 + addone, nobs1 + 2 * addone
|
||
count2_, nobs2_ = count2 + addone, nobs2 + 2 * addone
|
||
p1_ = count1_ / nobs1_
|
||
p2_ = count2_ / nobs2_
|
||
diff_ = p1_ - p2_
|
||
var = p1_ * (1 - p1_) / nobs1_ + p2_ * (1 - p2_) / nobs2_
|
||
z = stats.norm.isf(alpha / 2)
|
||
d_wald = z * np.sqrt(var)
|
||
low = diff_ - d_wald
|
||
upp = diff_ + d_wald
|
||
|
||
elif method.startswith('newcomb'):
|
||
low1, upp1 = proportion_confint(count1, nobs1,
|
||
method='wilson', alpha=alpha)
|
||
low2, upp2 = proportion_confint(count2, nobs2,
|
||
method='wilson', alpha=alpha)
|
||
d_low = np.sqrt((p1 - low1)**2 + (upp2 - p2)**2)
|
||
d_upp = np.sqrt((p2 - low2)**2 + (upp1 - p1)**2)
|
||
low = diff - d_low
|
||
upp = diff + d_upp
|
||
|
||
elif method == "score":
|
||
low, upp = _score_confint_inversion(count1, nobs1, count2, nobs2,
|
||
compare=compare, alpha=alpha,
|
||
correction=correction)
|
||
|
||
else:
|
||
raise ValueError('method not recognized')
|
||
|
||
elif compare == 'ratio':
|
||
# ratio = p1 / p2
|
||
if method in ['log', 'log-adjusted']:
|
||
addhalf = 0.5 if method == 'log-adjusted' else 0
|
||
count1_, nobs1_ = count1 + addhalf, nobs1 + addhalf
|
||
count2_, nobs2_ = count2 + addhalf, nobs2 + addhalf
|
||
p1_ = count1_ / nobs1_
|
||
p2_ = count2_ / nobs2_
|
||
ratio_ = p1_ / p2_
|
||
var = (1 / count1_) - 1 / nobs1_ + 1 / count2_ - 1 / nobs2_
|
||
z = stats.norm.isf(alpha / 2)
|
||
d_log = z * np.sqrt(var)
|
||
low = np.exp(np.log(ratio_) - d_log)
|
||
upp = np.exp(np.log(ratio_) + d_log)
|
||
|
||
elif method == 'score':
|
||
res = _confint_riskratio_koopman(count1, nobs1, count2, nobs2,
|
||
alpha=alpha,
|
||
correction=correction)
|
||
low, upp = res.confint
|
||
|
||
else:
|
||
raise ValueError('method not recognized')
|
||
|
||
elif compare == 'odds-ratio':
|
||
# odds_ratio = p1 / (1 - p1) / p2 * (1 - p2)
|
||
if method in ['logit', 'logit-adjusted', 'logit-smoothed']:
|
||
if method in ['logit-smoothed']:
|
||
adjusted = _shrink_prob(count1, nobs1, count2, nobs2,
|
||
shrink_factor=2, return_corr=False)[0]
|
||
count1_, nobs1_, count2_, nobs2_ = adjusted
|
||
|
||
else:
|
||
addhalf = 0.5 if method == 'logit-adjusted' else 0
|
||
count1_, nobs1_ = count1 + addhalf, nobs1 + 2 * addhalf
|
||
count2_, nobs2_ = count2 + addhalf, nobs2 + 2 * addhalf
|
||
p1_ = count1_ / nobs1_
|
||
p2_ = count2_ / nobs2_
|
||
odds_ratio_ = p1_ / (1 - p1_) / p2_ * (1 - p2_)
|
||
var = (1 / count1_ + 1 / (nobs1_ - count1_) +
|
||
1 / count2_ + 1 / (nobs2_ - count2_))
|
||
z = stats.norm.isf(alpha / 2)
|
||
d_log = z * np.sqrt(var)
|
||
low = np.exp(np.log(odds_ratio_) - d_log)
|
||
upp = np.exp(np.log(odds_ratio_) + d_log)
|
||
|
||
elif method == "score":
|
||
low, upp = _score_confint_inversion(count1, nobs1, count2, nobs2,
|
||
compare=compare, alpha=alpha,
|
||
correction=correction)
|
||
|
||
else:
|
||
raise ValueError('method not recognized')
|
||
|
||
else:
|
||
raise ValueError('compare not recognized')
|
||
|
||
return low, upp
|
||
|
||
|
||
def _shrink_prob(count1, nobs1, count2, nobs2, shrink_factor=2,
|
||
return_corr=True):
|
||
"""
|
||
Shrink observed counts towards independence
|
||
|
||
Helper function for 'logit-smoothed' inference for the odds-ratio of two
|
||
independent proportions.
|
||
|
||
Parameters
|
||
----------
|
||
count1, nobs1 : float or int
|
||
count and sample size for first sample
|
||
count2, nobs2 : float or int
|
||
count and sample size for the second sample
|
||
shrink_factor : float
|
||
This corresponds to the number of observations that are added in total
|
||
proportional to the probabilities under independence.
|
||
return_corr : bool
|
||
If true, then only the correction term is returned
|
||
If false, then the corrected counts, i.e. original counts plus
|
||
correction term, are returned.
|
||
|
||
Returns
|
||
-------
|
||
count1_corr, nobs1_corr, count2_corr, nobs2_corr : float
|
||
correction or corrected counts
|
||
prob_indep :
|
||
TODO/Warning : this will change most likely
|
||
probabilities under independence, only returned if return_corr is
|
||
false.
|
||
|
||
"""
|
||
vectorized = any(np.size(i) > 1 for i in [count1, nobs1, count2, nobs2])
|
||
if vectorized:
|
||
raise ValueError("function is not vectorized")
|
||
nobs_col = np.array([count1 + count2, nobs1 - count1 + nobs2 - count2])
|
||
nobs_row = np.array([nobs1, nobs2])
|
||
nobs = nobs1 + nobs2
|
||
prob_indep = (nobs_col * nobs_row[:, None]) / nobs**2
|
||
corr = shrink_factor * prob_indep
|
||
if return_corr:
|
||
return (corr[0, 0], corr[0].sum(), corr[1, 0], corr[1].sum())
|
||
else:
|
||
return (count1 + corr[0, 0], nobs1 + corr[0].sum(),
|
||
count2 + corr[1, 0], nobs2 + corr[1].sum()), prob_indep
|
||
|
||
|
||
def score_test_proportions_2indep(count1, nobs1, count2, nobs2, value=None,
|
||
compare='diff', alternative='two-sided',
|
||
correction=True, return_results=True):
|
||
"""
|
||
Score test for two independent proportions
|
||
|
||
This uses the constrained estimate of the proportions to compute
|
||
the variance under the Null hypothesis.
|
||
|
||
Parameters
|
||
----------
|
||
count1, nobs1 :
|
||
count and sample size for first sample
|
||
count2, nobs2 :
|
||
count and sample size for the second sample
|
||
value : float
|
||
diff, ratio or odds-ratio under the null hypothesis. If value is None,
|
||
then equality of proportions under the Null is assumed,
|
||
i.e. value=0 for 'diff' or value=1 for either rate or odds-ratio.
|
||
compare : string in ['diff', 'ratio' 'odds-ratio']
|
||
If compare is diff, then the confidence interval is for diff = p1 - p2.
|
||
If compare is ratio, then the confidence interval is for the risk ratio
|
||
defined by ratio = p1 / p2.
|
||
If compare is odds-ratio, then the confidence interval is for the
|
||
odds-ratio defined by or = p1 / (1 - p1) / (p2 / (1 - p2)
|
||
return_results : bool
|
||
If true, then a results instance with extra information is returned,
|
||
otherwise a tuple with statistic and pvalue is returned.
|
||
|
||
Returns
|
||
-------
|
||
results : results instance or tuple
|
||
If return_results is True, then a results instance with the
|
||
information in attributes is returned.
|
||
If return_results is False, then only ``statistic`` and ``pvalue``
|
||
are returned.
|
||
|
||
statistic : float
|
||
test statistic asymptotically normal distributed N(0, 1)
|
||
pvalue : float
|
||
p-value based on normal distribution
|
||
other attributes :
|
||
additional information about the hypothesis test
|
||
|
||
Notes
|
||
-----
|
||
Status: experimental, the type or extra information in the return might
|
||
change.
|
||
|
||
"""
|
||
|
||
value_default = 0 if compare == 'diff' else 1
|
||
if value is None:
|
||
# TODO: odds ratio does not work if value=1
|
||
value = value_default
|
||
|
||
nobs = nobs1 + nobs2
|
||
count = count1 + count2
|
||
p1 = count1 / nobs1
|
||
p2 = count2 / nobs2
|
||
if value == value_default:
|
||
# use pooled estimator if equality test
|
||
# shortcut, but required for odds ratio
|
||
prop0 = prop1 = count / nobs
|
||
# this uses index 0 from Miettinen Nurminned 1985
|
||
count0, nobs0 = count2, nobs2
|
||
p0 = p2
|
||
|
||
if compare == 'diff':
|
||
diff = value # hypothesis value
|
||
|
||
if diff != 0:
|
||
tmp3 = nobs
|
||
tmp2 = (nobs1 + 2 * nobs0) * diff - nobs - count
|
||
tmp1 = (count0 * diff - nobs - 2 * count0) * diff + count
|
||
tmp0 = count0 * diff * (1 - diff)
|
||
q = ((tmp2 / (3 * tmp3))**3 - tmp1 * tmp2 / (6 * tmp3**2) +
|
||
tmp0 / (2 * tmp3))
|
||
p = np.sign(q) * np.sqrt((tmp2 / (3 * tmp3))**2 -
|
||
tmp1 / (3 * tmp3))
|
||
a = (np.pi + np.arccos(q / p**3)) / 3
|
||
|
||
prop0 = 2 * p * np.cos(a) - tmp2 / (3 * tmp3)
|
||
prop1 = prop0 + diff
|
||
|
||
var = prop1 * (1 - prop1) / nobs1 + prop0 * (1 - prop0) / nobs0
|
||
if correction:
|
||
var *= nobs / (nobs - 1)
|
||
|
||
diff_stat = (p1 - p0 - diff)
|
||
|
||
elif compare == 'ratio':
|
||
# risk ratio
|
||
ratio = value
|
||
|
||
if ratio != 1:
|
||
a = nobs * ratio
|
||
b = -(nobs1 * ratio + count1 + nobs2 + count0 * ratio)
|
||
c = count
|
||
prop0 = (-b - np.sqrt(b**2 - 4 * a * c)) / (2 * a)
|
||
prop1 = prop0 * ratio
|
||
|
||
var = (prop1 * (1 - prop1) / nobs1 +
|
||
ratio**2 * prop0 * (1 - prop0) / nobs0)
|
||
if correction:
|
||
var *= nobs / (nobs - 1)
|
||
|
||
# NCSS looks incorrect for var, but it is what should be reported
|
||
# diff_stat = (p1 / p0 - ratio) # NCSS/PASS
|
||
diff_stat = (p1 - ratio * p0) # Miettinen Nurminen
|
||
|
||
elif compare in ['or', 'odds-ratio']:
|
||
# odds ratio
|
||
oratio = value
|
||
|
||
if oratio != 1:
|
||
# Note the constraint estimator does not handle odds-ratio = 1
|
||
a = nobs0 * (oratio - 1)
|
||
b = nobs1 * oratio + nobs0 - count * (oratio - 1)
|
||
c = -count
|
||
prop0 = (-b + np.sqrt(b**2 - 4 * a * c)) / (2 * a)
|
||
prop1 = prop0 * oratio / (1 + prop0 * (oratio - 1))
|
||
|
||
# try to avoid 0 and 1 proportions,
|
||
# those raise Zero Division Runtime Warnings
|
||
eps = 1e-10
|
||
prop0 = np.clip(prop0, eps, 1 - eps)
|
||
prop1 = np.clip(prop1, eps, 1 - eps)
|
||
|
||
var = (1 / (prop1 * (1 - prop1) * nobs1) +
|
||
1 / (prop0 * (1 - prop0) * nobs0))
|
||
if correction:
|
||
var *= nobs / (nobs - 1)
|
||
|
||
diff_stat = ((p1 - prop1) / (prop1 * (1 - prop1)) -
|
||
(p0 - prop0) / (prop0 * (1 - prop0)))
|
||
|
||
statistic, pvalue = _zstat_generic2(diff_stat, np.sqrt(var),
|
||
alternative=alternative)
|
||
|
||
if return_results:
|
||
res = HolderTuple(statistic=statistic,
|
||
pvalue=pvalue,
|
||
compare=compare,
|
||
method='score',
|
||
variance=var,
|
||
alternative=alternative,
|
||
prop1_null=prop1,
|
||
prop2_null=prop0,
|
||
)
|
||
return res
|
||
else:
|
||
return statistic, pvalue
|
||
|
||
|
||
def test_proportions_2indep(count1, nobs1, count2, nobs2, value=None,
|
||
method=None, compare='diff',
|
||
alternative='two-sided', correction=True,
|
||
return_results=True):
|
||
"""
|
||
Hypothesis test for comparing two independent proportions
|
||
|
||
This assumes that we have two independent binomial samples.
|
||
|
||
The Null and alternative hypothesis are
|
||
|
||
for compare = 'diff'
|
||
|
||
- H0: prop1 - prop2 - value = 0
|
||
- H1: prop1 - prop2 - value != 0 if alternative = 'two-sided'
|
||
- H1: prop1 - prop2 - value > 0 if alternative = 'larger'
|
||
- H1: prop1 - prop2 - value < 0 if alternative = 'smaller'
|
||
|
||
for compare = 'ratio'
|
||
|
||
- H0: prop1 / prop2 - value = 0
|
||
- H1: prop1 / prop2 - value != 0 if alternative = 'two-sided'
|
||
- H1: prop1 / prop2 - value > 0 if alternative = 'larger'
|
||
- H1: prop1 / prop2 - value < 0 if alternative = 'smaller'
|
||
|
||
for compare = 'odds-ratio'
|
||
|
||
- H0: or - value = 0
|
||
- H1: or - value != 0 if alternative = 'two-sided'
|
||
- H1: or - value > 0 if alternative = 'larger'
|
||
- H1: or - value < 0 if alternative = 'smaller'
|
||
|
||
where odds-ratio or = prop1 / (1 - prop1) / (prop2 / (1 - prop2))
|
||
|
||
Parameters
|
||
----------
|
||
count1 : int
|
||
Count for first sample.
|
||
nobs1 : int
|
||
Sample size for first sample.
|
||
count2 : int
|
||
Count for the second sample.
|
||
nobs2 : int
|
||
Sample size for the second sample.
|
||
value : float
|
||
Value of the difference, risk ratio or odds ratio of 2 independent
|
||
proportions under the null hypothesis.
|
||
Default is equal proportions, 0 for diff and 1 for risk-ratio and for
|
||
odds-ratio.
|
||
method : string
|
||
Method for computing the hypothesis test. If method is None, then a
|
||
default method is used. The default might change as more methods are
|
||
added.
|
||
|
||
diff:
|
||
|
||
- 'wald',
|
||
- 'agresti-caffo'
|
||
- 'score' if correction is True, then this uses the degrees of freedom
|
||
correction ``nobs / (nobs - 1)`` as in Miettinen Nurminen 1985
|
||
|
||
ratio:
|
||
|
||
- 'log': wald test using log transformation
|
||
- 'log-adjusted': wald test using log transformation,
|
||
adds 0.5 to counts
|
||
- 'score': if correction is True, then this uses the degrees of freedom
|
||
correction ``nobs / (nobs - 1)`` as in Miettinen Nurminen 1985
|
||
|
||
odds-ratio:
|
||
|
||
- 'logit': wald test using logit transformation
|
||
- 'logit-adjusted': wald test using logit transformation,
|
||
adds 0.5 to counts
|
||
- 'logit-smoothed': wald test using logit transformation, biases
|
||
cell counts towards independence by adding two observations in
|
||
total.
|
||
- 'score' if correction is True, then this uses the degrees of freedom
|
||
correction ``nobs / (nobs - 1)`` as in Miettinen Nurminen 1985
|
||
|
||
compare : {'diff', 'ratio' 'odds-ratio'}
|
||
If compare is `diff`, then the hypothesis test is for the risk
|
||
difference diff = p1 - p2.
|
||
If compare is `ratio`, then the hypothesis test is for the
|
||
risk ratio defined by ratio = p1 / p2.
|
||
If compare is `odds-ratio`, then the hypothesis test is for the
|
||
odds-ratio defined by or = p1 / (1 - p1) / (p2 / (1 - p2)
|
||
alternative : {'two-sided', 'smaller', 'larger'}
|
||
alternative hypothesis, which can be two-sided or either one of the
|
||
one-sided tests.
|
||
correction : bool
|
||
If correction is True (default), then the Miettinen and Nurminen
|
||
small sample correction to the variance nobs / (nobs - 1) is used.
|
||
Applies only if method='score'.
|
||
return_results : bool
|
||
If true, then a results instance with extra information is returned,
|
||
otherwise a tuple with statistic and pvalue is returned.
|
||
|
||
Returns
|
||
-------
|
||
results : results instance or tuple
|
||
If return_results is True, then a results instance with the
|
||
information in attributes is returned.
|
||
If return_results is False, then only ``statistic`` and ``pvalue``
|
||
are returned.
|
||
|
||
statistic : float
|
||
test statistic asymptotically normal distributed N(0, 1)
|
||
pvalue : float
|
||
p-value based on normal distribution
|
||
other attributes :
|
||
additional information about the hypothesis test
|
||
|
||
See Also
|
||
--------
|
||
tost_proportions_2indep
|
||
confint_proportions_2indep
|
||
|
||
Notes
|
||
-----
|
||
Status: experimental, API and defaults might still change.
|
||
More ``methods`` will be added.
|
||
|
||
The current default methods are
|
||
|
||
- 'diff': 'agresti-caffo',
|
||
- 'ratio': 'log-adjusted',
|
||
- 'odds-ratio': 'logit-adjusted'
|
||
|
||
"""
|
||
method_default = {'diff': 'agresti-caffo',
|
||
'ratio': 'log-adjusted',
|
||
'odds-ratio': 'logit-adjusted'}
|
||
# normalize compare name
|
||
if compare.lower() == 'or':
|
||
compare = 'odds-ratio'
|
||
if method is None:
|
||
method = method_default[compare]
|
||
|
||
method = method.lower()
|
||
if method.startswith('agr'):
|
||
method = 'agresti-caffo'
|
||
|
||
if value is None:
|
||
# TODO: odds ratio does not work if value=1 for score test
|
||
value = 0 if compare == 'diff' else 1
|
||
|
||
count1, nobs1, count2, nobs2 = map(np.asarray,
|
||
[count1, nobs1, count2, nobs2])
|
||
|
||
p1 = count1 / nobs1
|
||
p2 = count2 / nobs2
|
||
diff = p1 - p2
|
||
ratio = p1 / p2
|
||
odds_ratio = p1 / (1 - p1) / p2 * (1 - p2)
|
||
res = None
|
||
|
||
if compare == 'diff':
|
||
if method in ['wald', 'agresti-caffo']:
|
||
addone = 1 if method == 'agresti-caffo' else 0
|
||
count1_, nobs1_ = count1 + addone, nobs1 + 2 * addone
|
||
count2_, nobs2_ = count2 + addone, nobs2 + 2 * addone
|
||
p1_ = count1_ / nobs1_
|
||
p2_ = count2_ / nobs2_
|
||
diff_stat = p1_ - p2_ - value
|
||
var = p1_ * (1 - p1_) / nobs1_ + p2_ * (1 - p2_) / nobs2_
|
||
statistic = diff_stat / np.sqrt(var)
|
||
distr = 'normal'
|
||
|
||
elif method.startswith('newcomb'):
|
||
msg = 'newcomb not available for hypothesis test'
|
||
raise NotImplementedError(msg)
|
||
|
||
elif method == 'score':
|
||
# Note score part is the same call for all compare
|
||
res = score_test_proportions_2indep(count1, nobs1, count2, nobs2,
|
||
value=value, compare=compare,
|
||
alternative=alternative,
|
||
correction=correction,
|
||
return_results=return_results)
|
||
if return_results is False:
|
||
statistic, pvalue = res[:2]
|
||
distr = 'normal'
|
||
# TODO/Note score_test_proportion_2samp returns statistic and
|
||
# not diff_stat
|
||
diff_stat = None
|
||
else:
|
||
raise ValueError('method not recognized')
|
||
|
||
elif compare == 'ratio':
|
||
if method in ['log', 'log-adjusted']:
|
||
addhalf = 0.5 if method == 'log-adjusted' else 0
|
||
count1_, nobs1_ = count1 + addhalf, nobs1 + addhalf
|
||
count2_, nobs2_ = count2 + addhalf, nobs2 + addhalf
|
||
p1_ = count1_ / nobs1_
|
||
p2_ = count2_ / nobs2_
|
||
ratio_ = p1_ / p2_
|
||
var = (1 / count1_) - 1 / nobs1_ + 1 / count2_ - 1 / nobs2_
|
||
diff_stat = np.log(ratio_) - np.log(value)
|
||
statistic = diff_stat / np.sqrt(var)
|
||
distr = 'normal'
|
||
|
||
elif method == 'score':
|
||
res = score_test_proportions_2indep(count1, nobs1, count2, nobs2,
|
||
value=value, compare=compare,
|
||
alternative=alternative,
|
||
correction=correction,
|
||
return_results=return_results)
|
||
if return_results is False:
|
||
statistic, pvalue = res[:2]
|
||
distr = 'normal'
|
||
diff_stat = None
|
||
|
||
else:
|
||
raise ValueError('method not recognized')
|
||
|
||
elif compare == "odds-ratio":
|
||
|
||
if method in ['logit', 'logit-adjusted', 'logit-smoothed']:
|
||
if method in ['logit-smoothed']:
|
||
adjusted = _shrink_prob(count1, nobs1, count2, nobs2,
|
||
shrink_factor=2, return_corr=False)[0]
|
||
count1_, nobs1_, count2_, nobs2_ = adjusted
|
||
|
||
else:
|
||
addhalf = 0.5 if method == 'logit-adjusted' else 0
|
||
count1_, nobs1_ = count1 + addhalf, nobs1 + 2 * addhalf
|
||
count2_, nobs2_ = count2 + addhalf, nobs2 + 2 * addhalf
|
||
p1_ = count1_ / nobs1_
|
||
p2_ = count2_ / nobs2_
|
||
odds_ratio_ = p1_ / (1 - p1_) / p2_ * (1 - p2_)
|
||
var = (1 / count1_ + 1 / (nobs1_ - count1_) +
|
||
1 / count2_ + 1 / (nobs2_ - count2_))
|
||
|
||
diff_stat = np.log(odds_ratio_) - np.log(value)
|
||
statistic = diff_stat / np.sqrt(var)
|
||
distr = 'normal'
|
||
|
||
elif method == 'score':
|
||
res = score_test_proportions_2indep(count1, nobs1, count2, nobs2,
|
||
value=value, compare=compare,
|
||
alternative=alternative,
|
||
correction=correction,
|
||
return_results=return_results)
|
||
if return_results is False:
|
||
statistic, pvalue = res[:2]
|
||
distr = 'normal'
|
||
diff_stat = None
|
||
else:
|
||
raise ValueError('method "%s" not recognized' % method)
|
||
|
||
else:
|
||
raise ValueError('compare "%s" not recognized' % compare)
|
||
|
||
if distr == 'normal' and diff_stat is not None:
|
||
statistic, pvalue = _zstat_generic2(diff_stat, np.sqrt(var),
|
||
alternative=alternative)
|
||
|
||
if return_results:
|
||
if res is None:
|
||
res = HolderTuple(statistic=statistic,
|
||
pvalue=pvalue,
|
||
compare=compare,
|
||
method=method,
|
||
diff=diff,
|
||
ratio=ratio,
|
||
odds_ratio=odds_ratio,
|
||
variance=var,
|
||
alternative=alternative,
|
||
value=value,
|
||
)
|
||
else:
|
||
# we already have a return result from score test
|
||
# add missing attributes
|
||
res.diff = diff
|
||
res.ratio = ratio
|
||
res.odds_ratio = odds_ratio
|
||
res.value = value
|
||
return res
|
||
else:
|
||
return statistic, pvalue
|
||
|
||
|
||
def tost_proportions_2indep(count1, nobs1, count2, nobs2, low, upp,
|
||
method=None, compare='diff', correction=True):
|
||
"""
|
||
Equivalence test based on two one-sided `test_proportions_2indep`
|
||
|
||
This assumes that we have two independent binomial samples.
|
||
|
||
The Null and alternative hypothesis for equivalence testing are
|
||
|
||
for compare = 'diff'
|
||
|
||
- H0: prop1 - prop2 <= low or upp <= prop1 - prop2
|
||
- H1: low < prop1 - prop2 < upp
|
||
|
||
for compare = 'ratio'
|
||
|
||
- H0: prop1 / prop2 <= low or upp <= prop1 / prop2
|
||
- H1: low < prop1 / prop2 < upp
|
||
|
||
|
||
for compare = 'odds-ratio'
|
||
|
||
- H0: or <= low or upp <= or
|
||
- H1: low < or < upp
|
||
|
||
where odds-ratio or = prop1 / (1 - prop1) / (prop2 / (1 - prop2))
|
||
|
||
Parameters
|
||
----------
|
||
count1, nobs1 :
|
||
count and sample size for first sample
|
||
count2, nobs2 :
|
||
count and sample size for the second sample
|
||
low, upp :
|
||
equivalence margin for diff, risk ratio or odds ratio
|
||
method : string
|
||
method for computing the hypothesis test. If method is None, then a
|
||
default method is used. The default might change as more methods are
|
||
added.
|
||
|
||
diff:
|
||
- 'wald',
|
||
- 'agresti-caffo'
|
||
- 'score' if correction is True, then this uses the degrees of freedom
|
||
correction ``nobs / (nobs - 1)`` as in Miettinen Nurminen 1985.
|
||
|
||
ratio:
|
||
- 'log': wald test using log transformation
|
||
- 'log-adjusted': wald test using log transformation,
|
||
adds 0.5 to counts
|
||
- 'score' if correction is True, then this uses the degrees of freedom
|
||
correction ``nobs / (nobs - 1)`` as in Miettinen Nurminen 1985.
|
||
|
||
odds-ratio:
|
||
- 'logit': wald test using logit transformation
|
||
- 'logit-adjusted': : wald test using logit transformation,
|
||
adds 0.5 to counts
|
||
- 'logit-smoothed': : wald test using logit transformation, biases
|
||
cell counts towards independence by adding two observations in
|
||
total.
|
||
- 'score' if correction is True, then this uses the degrees of freedom
|
||
correction ``nobs / (nobs - 1)`` as in Miettinen Nurminen 1985
|
||
|
||
compare : string in ['diff', 'ratio' 'odds-ratio']
|
||
If compare is `diff`, then the hypothesis test is for
|
||
diff = p1 - p2.
|
||
If compare is `ratio`, then the hypothesis test is for the
|
||
risk ratio defined by ratio = p1 / p2.
|
||
If compare is `odds-ratio`, then the hypothesis test is for the
|
||
odds-ratio defined by or = p1 / (1 - p1) / (p2 / (1 - p2).
|
||
correction : bool
|
||
If correction is True (default), then the Miettinen and Nurminen
|
||
small sample correction to the variance nobs / (nobs - 1) is used.
|
||
Applies only if method='score'.
|
||
|
||
Returns
|
||
-------
|
||
pvalue : float
|
||
p-value is the max of the pvalues of the two one-sided tests
|
||
t1 : test results
|
||
results instance for one-sided hypothesis at the lower margin
|
||
t1 : test results
|
||
results instance for one-sided hypothesis at the upper margin
|
||
|
||
See Also
|
||
--------
|
||
test_proportions_2indep
|
||
confint_proportions_2indep
|
||
|
||
Notes
|
||
-----
|
||
Status: experimental, API and defaults might still change.
|
||
|
||
The TOST equivalence test delegates to `test_proportions_2indep` and has
|
||
the same method and comparison options.
|
||
|
||
"""
|
||
|
||
tt1 = test_proportions_2indep(count1, nobs1, count2, nobs2, value=low,
|
||
method=method, compare=compare,
|
||
alternative='larger',
|
||
correction=correction,
|
||
return_results=True)
|
||
tt2 = test_proportions_2indep(count1, nobs1, count2, nobs2, value=upp,
|
||
method=method, compare=compare,
|
||
alternative='smaller',
|
||
correction=correction,
|
||
return_results=True)
|
||
|
||
# idx_max = 1 if t1.pvalue < t2.pvalue else 0
|
||
idx_max = np.asarray(tt1.pvalue < tt2.pvalue, int)
|
||
statistic = np.choose(idx_max, [tt1.statistic, tt2.statistic])
|
||
pvalue = np.choose(idx_max, [tt1.pvalue, tt2.pvalue])
|
||
|
||
res = HolderTuple(statistic=statistic,
|
||
pvalue=pvalue,
|
||
compare=compare,
|
||
method=method,
|
||
results_larger=tt1,
|
||
results_smaller=tt2,
|
||
title="Equivalence test for 2 independent proportions"
|
||
)
|
||
|
||
return res
|
||
|
||
|
||
def _std_2prop_power(diff, p2, ratio=1, alpha=0.05, value=0):
|
||
"""
|
||
Compute standard error under null and alternative for 2 proportions
|
||
|
||
helper function for power and sample size computation
|
||
|
||
"""
|
||
if value != 0:
|
||
msg = 'non-zero diff under null, value, is not yet implemented'
|
||
raise NotImplementedError(msg)
|
||
|
||
nobs_ratio = ratio
|
||
p1 = p2 + diff
|
||
# The following contains currently redundant variables that will
|
||
# be useful for different options for the null variance
|
||
p_pooled = (p1 + p2 * ratio) / (1 + ratio)
|
||
# probabilities for the variance for the null statistic
|
||
p1_vnull, p2_vnull = p_pooled, p_pooled
|
||
p2_alt = p2
|
||
p1_alt = p2_alt + diff
|
||
|
||
std_null = _std_diff_prop(p1_vnull, p2_vnull, ratio=nobs_ratio)
|
||
std_alt = _std_diff_prop(p1_alt, p2_alt, ratio=nobs_ratio)
|
||
return p_pooled, std_null, std_alt
|
||
|
||
|
||
def power_proportions_2indep(diff, prop2, nobs1, ratio=1, alpha=0.05,
|
||
value=0, alternative='two-sided',
|
||
return_results=True):
|
||
"""
|
||
Power for ztest that two independent proportions are equal
|
||
|
||
This assumes that the variance is based on the pooled proportion
|
||
under the null and the non-pooled variance under the alternative
|
||
|
||
Parameters
|
||
----------
|
||
diff : float
|
||
difference between proportion 1 and 2 under the alternative
|
||
prop2 : float
|
||
proportion for the reference case, prop2, proportions for the
|
||
first case will be computed using p2 and diff
|
||
p1 = p2 + diff
|
||
nobs1 : float or int
|
||
number of observations in sample 1
|
||
ratio : float
|
||
sample size ratio, nobs2 = ratio * nobs1
|
||
alpha : float in interval (0,1)
|
||
Significance level, e.g. 0.05, is the probability of a type I
|
||
error, that is wrong rejections if the Null Hypothesis is true.
|
||
value : float
|
||
currently only `value=0`, i.e. equality testing, is supported
|
||
alternative : string, 'two-sided' (default), 'larger', 'smaller'
|
||
Alternative hypothesis whether the power is calculated for a
|
||
two-sided (default) or one sided test. The one-sided test can be
|
||
either 'larger', 'smaller'.
|
||
return_results : bool
|
||
If true, then a results instance with extra information is returned,
|
||
otherwise only the computed power is returned.
|
||
|
||
Returns
|
||
-------
|
||
results : results instance or float
|
||
If return_results is True, then a results instance with the
|
||
information in attributes is returned.
|
||
If return_results is False, then only the power is returned.
|
||
|
||
power : float
|
||
Power of the test, e.g. 0.8, is one minus the probability of a
|
||
type II error. Power is the probability that the test correctly
|
||
rejects the Null Hypothesis if the Alternative Hypothesis is true.
|
||
|
||
Other attributes in results instance include :
|
||
|
||
p_pooled
|
||
pooled proportion, used for std_null
|
||
std_null
|
||
standard error of difference under the null hypothesis (without
|
||
sqrt(nobs1))
|
||
std_alt
|
||
standard error of difference under the alternative hypothesis
|
||
(without sqrt(nobs1))
|
||
"""
|
||
# TODO: avoid possible circular import, check if needed
|
||
from statsmodels.stats.power import normal_power_het
|
||
|
||
p_pooled, std_null, std_alt = _std_2prop_power(diff, prop2, ratio=ratio,
|
||
alpha=alpha, value=value)
|
||
|
||
pow_ = normal_power_het(diff, nobs1, alpha, std_null=std_null,
|
||
std_alternative=std_alt,
|
||
alternative=alternative)
|
||
|
||
if return_results:
|
||
res = Holder(power=pow_,
|
||
p_pooled=p_pooled,
|
||
std_null=std_null,
|
||
std_alt=std_alt,
|
||
nobs1=nobs1,
|
||
nobs2=ratio * nobs1,
|
||
nobs_ratio=ratio,
|
||
alpha=alpha,
|
||
)
|
||
return res
|
||
else:
|
||
return pow_
|
||
|
||
|
||
def samplesize_proportions_2indep_onetail(diff, prop2, power, ratio=1,
|
||
alpha=0.05, value=0,
|
||
alternative='two-sided'):
|
||
"""
|
||
Required sample size assuming normal distribution based on one tail
|
||
|
||
This uses an explicit computation for the sample size that is required
|
||
to achieve a given power corresponding to the appropriate tails of the
|
||
normal distribution. This ignores the far tail in a two-sided test
|
||
which is negligible in the common case when alternative and null are
|
||
far apart.
|
||
|
||
Parameters
|
||
----------
|
||
diff : float
|
||
Difference between proportion 1 and 2 under the alternative
|
||
prop2 : float
|
||
proportion for the reference case, prop2, proportions for the
|
||
first case will be computing using p2 and diff
|
||
p1 = p2 + diff
|
||
power : float
|
||
Power for which sample size is computed.
|
||
ratio : float
|
||
Sample size ratio, nobs2 = ratio * nobs1
|
||
alpha : float in interval (0,1)
|
||
Significance level, e.g. 0.05, is the probability of a type I
|
||
error, that is wrong rejections if the Null Hypothesis is true.
|
||
value : float
|
||
Currently only `value=0`, i.e. equality testing, is supported
|
||
alternative : string, 'two-sided' (default), 'larger', 'smaller'
|
||
Alternative hypothesis whether the power is calculated for a
|
||
two-sided (default) or one sided test. In the case of a one-sided
|
||
alternative, it is assumed that the test is in the appropriate tail.
|
||
|
||
Returns
|
||
-------
|
||
nobs1 : float
|
||
Number of observations in sample 1.
|
||
"""
|
||
# TODO: avoid possible circular import, check if needed
|
||
from statsmodels.stats.power import normal_sample_size_one_tail
|
||
|
||
if alternative in ['two-sided', '2s']:
|
||
alpha = alpha / 2
|
||
|
||
_, std_null, std_alt = _std_2prop_power(diff, prop2, ratio=ratio,
|
||
alpha=alpha, value=value)
|
||
|
||
nobs = normal_sample_size_one_tail(diff, power, alpha, std_null=std_null,
|
||
std_alternative=std_alt)
|
||
return nobs
|
||
|
||
|
||
def _score_confint_inversion(count1, nobs1, count2, nobs2, compare='diff',
|
||
alpha=0.05, correction=True):
|
||
"""
|
||
Compute score confidence interval by inverting score test
|
||
|
||
Parameters
|
||
----------
|
||
count1, nobs1 :
|
||
Count and sample size for first sample.
|
||
count2, nobs2 :
|
||
Count and sample size for the second sample.
|
||
compare : string in ['diff', 'ratio' 'odds-ratio']
|
||
If compare is `diff`, then the confidence interval is for
|
||
diff = p1 - p2.
|
||
If compare is `ratio`, then the confidence interval is for the
|
||
risk ratio defined by ratio = p1 / p2.
|
||
If compare is `odds-ratio`, then the confidence interval is for the
|
||
odds-ratio defined by or = p1 / (1 - p1) / (p2 / (1 - p2).
|
||
alpha : float in interval (0,1)
|
||
Significance level, e.g. 0.05, is the probability of a type I
|
||
error, that is wrong rejections if the Null Hypothesis is true.
|
||
correction : bool
|
||
If correction is True (default), then the Miettinen and Nurminen
|
||
small sample correction to the variance nobs / (nobs - 1) is used.
|
||
Applies only if method='score'.
|
||
|
||
Returns
|
||
-------
|
||
low : float
|
||
Lower confidence bound.
|
||
upp : float
|
||
Upper confidence bound.
|
||
"""
|
||
|
||
def func(v):
|
||
r = test_proportions_2indep(count1, nobs1, count2, nobs2,
|
||
value=v, compare=compare, method='score',
|
||
correction=correction,
|
||
alternative="two-sided")
|
||
return r.pvalue - alpha
|
||
|
||
rt0 = test_proportions_2indep(count1, nobs1, count2, nobs2,
|
||
value=0, compare=compare, method='score',
|
||
correction=correction,
|
||
alternative="two-sided")
|
||
|
||
# use default method to get starting values
|
||
# this will not work if score confint becomes default
|
||
# maybe use "wald" as alias that works for all compare statistics
|
||
use_method = {"diff": "wald", "ratio": "log", "odds-ratio": "logit"}
|
||
rci0 = confint_proportions_2indep(count1, nobs1, count2, nobs2,
|
||
method=use_method[compare],
|
||
compare=compare, alpha=alpha)
|
||
|
||
# Note diff might be negative
|
||
ub = rci0[1] + np.abs(rci0[1]) * 0.5
|
||
lb = rci0[0] - np.abs(rci0[0]) * 0.25
|
||
if compare == 'diff':
|
||
param = rt0.diff
|
||
# 1 might not be the correct upper bound because
|
||
# rootfinding is for the `diff` and not for a probability.
|
||
ub = min(ub, 0.99999)
|
||
elif compare == 'ratio':
|
||
param = rt0.ratio
|
||
ub *= 2 # add more buffer
|
||
if compare == 'odds-ratio':
|
||
param = rt0.odds_ratio
|
||
|
||
# root finding for confint bounds
|
||
upp = optimize.brentq(func, param, ub)
|
||
low = optimize.brentq(func, lb, param)
|
||
return low, upp
|
||
|
||
|
||
def _confint_riskratio_koopman(count1, nobs1, count2, nobs2, alpha=0.05,
|
||
correction=True):
|
||
"""
|
||
Score confidence interval for ratio or proportions, Koopman/Nam
|
||
|
||
signature not consistent with other functions
|
||
|
||
When correction is True, then the small sample correction nobs / (nobs - 1)
|
||
by Miettinen/Nurminen is used.
|
||
"""
|
||
# The names below follow Nam
|
||
x0, x1, n0, n1 = count2, count1, nobs2, nobs1
|
||
x = x0 + x1
|
||
n = n0 + n1
|
||
z = stats.norm.isf(alpha / 2)**2
|
||
if correction:
|
||
# Mietinnen/Nurminen small sample correction
|
||
z *= n / (n - 1)
|
||
# z = stats.chi2.isf(alpha, 1)
|
||
# equ 6 in Nam 1995
|
||
a1 = n0 * (n0 * n * x1 + n1 * (n0 + x1) * z)
|
||
a2 = - n0 * (n0 * n1 * x + 2 * n * x0 * x1 + n1 * (n0 + x0 + 2 * x1) * z)
|
||
a3 = 2 * n0 * n1 * x0 * x + n * x0 * x0 * x1 + n0 * n1 * x * z
|
||
a4 = - n1 * x0 * x0 * x
|
||
|
||
p_roots_ = np.sort(np.roots([a1, a2, a3, a4]))
|
||
p_roots = p_roots_[:2][::-1]
|
||
|
||
# equ 5
|
||
ci = (1 - (n1 - x1) * (1 - p_roots) / (x0 + n1 - n * p_roots)) / p_roots
|
||
|
||
res = Holder()
|
||
res.confint = ci
|
||
res._p_roots = p_roots_ # for unit tests, can be dropped
|
||
return res
|
||
|
||
|
||
def _confint_riskratio_paired_nam(table, alpha=0.05):
|
||
"""
|
||
Confidence interval for marginal risk ratio for matched pairs
|
||
|
||
need full table
|
||
|
||
success fail marginal
|
||
success x11 x10 x1.
|
||
fail x01 x00 x0.
|
||
marginal x.1 x.0 n
|
||
|
||
The confidence interval is for the ratio p1 / p0 where
|
||
p1 = x1. / n and
|
||
p0 - x.1 / n
|
||
Todo: rename p1 to pa and p2 to pb, so we have a, b for treatment and
|
||
0, 1 for success/failure
|
||
|
||
current namings follow Nam 2009
|
||
|
||
status
|
||
testing:
|
||
compared to example in Nam 2009
|
||
internal polynomial coefficients in calculation correspond at around
|
||
4 decimals
|
||
confidence interval agrees only at 2 decimals
|
||
|
||
"""
|
||
x11, x10, x01, x00 = np.ravel(table)
|
||
n = np.sum(table) # nobs
|
||
p10, p01 = x10 / n, x01 / n
|
||
p1 = (x11 + x10) / n
|
||
p0 = (x11 + x01) / n
|
||
q00 = 1 - x00 / n
|
||
|
||
z2 = stats.norm.isf(alpha / 2)**2
|
||
# z = stats.chi2.isf(alpha, 1)
|
||
# before equ 3 in Nam 2009
|
||
|
||
g1 = (n * p0 + z2 / 2) * p0
|
||
g2 = - (2 * n * p1 * p0 + z2 * q00)
|
||
g3 = (n * p1 + z2 / 2) * p1
|
||
|
||
a0 = g1**2 - (z2 * p0 / 2)**2
|
||
a1 = 2 * g1 * g2
|
||
a2 = g2**2 + 2 * g1 * g3 + z2**2 * (p1 * p0 - 2 * p10 * p01) / 2
|
||
a3 = 2 * g2 * g3
|
||
a4 = g3**2 - (z2 * p1 / 2)**2
|
||
|
||
p_roots = np.sort(np.roots([a0, a1, a2, a3, a4]))
|
||
# p_roots = np.sort(np.roots([1, a1 / a0, a2 / a0, a3 / a0, a4 / a0]))
|
||
|
||
ci = [p_roots.min(), p_roots.max()]
|
||
res = Holder()
|
||
res.confint = ci
|
||
res.p = p1, p0
|
||
res._p_roots = p_roots # for unit tests, can be dropped
|
||
return res
|