""" Tests and Confidence Intervals for Binomial Proportions Created on Fri Mar 01 00:23:07 2013 Author: Josef Perktold License: BSD-3 """ from statsmodels.compat.python import lzip from typing import Callable import numpy as np import pandas as pd from scipy import optimize, stats from statsmodels.stats.base import AllPairsResults, HolderTuple from statsmodels.stats.weightstats import _zstat_generic2 from statsmodels.tools.sm_exceptions import HypothesisTestWarning from statsmodels.tools.testing import Holder from statsmodels.tools.validation import array_like FLOAT_INFO = np.finfo(float) def _bound_proportion_confint( func: Callable[[float], float], qi: float, lower: bool = True ) -> float: """ Try hard to find a bound different from eps/1 - eps in proportion_confint Parameters ---------- func : callable Callable function to use as the objective of the search qi : float The empirical success rate lower : bool Whether to fund a lower bound for the left side of the CI Returns ------- float The coarse bound """ default = FLOAT_INFO.eps if lower else 1.0 - FLOAT_INFO.eps def step(v): return v / 8 if lower else v + (1.0 - v) / 8 x = step(qi) w = func(x) cnt = 1 while w > 0 and cnt < 10: x = step(x) w = func(x) cnt += 1 return x if cnt < 10 else default def _bisection_search_conservative( func: Callable[[float], float], lb: float, ub: float, steps: int = 27 ) -> tuple[float, float]: """ Private function used as a fallback by proportion_confint Used when brentq returns a non-conservative bound for the CI Parameters ---------- func : callable Callable function to use as the objective of the search lb : float Lower bound ub : float Upper bound steps : int Number of steps to use in the bisection Returns ------- est : float The estimated value. Will always produce a negative value of func func_val : float The value of the function at the estimate """ upper = func(ub) lower = func(lb) best = upper if upper < 0 else lower best_pt = ub if upper < 0 else lb if np.sign(lower) == np.sign(upper): raise ValueError("problem with signs") mp = (ub + lb) / 2 mid = func(mp) if (mid < 0) and (mid > best): best = mid best_pt = mp for _ in range(steps): if np.sign(mid) == np.sign(upper): ub = mp upper = mid else: lb = mp mp = (ub + lb) / 2 mid = func(mp) if (mid < 0) and (mid > best): best = mid best_pt = mp return best_pt, best def proportion_confint(count, nobs, alpha:float=0.05, method="normal"): """ Confidence interval for a binomial proportion Parameters ---------- count : {int or float, array_like} number of successes, can be pandas Series or DataFrame. Arrays must contain integer values if method is "binom_test". nobs : {int or float, array_like} total number of trials. Arrays must contain integer values if method is "binom_test". alpha : float Significance level, default 0.05. Must be in (0, 1) method : {"normal", "agresti_coull", "beta", "wilson", "binom_test"} default: "normal" method to use for confidence interval. Supported methods: - `normal` : asymptotic normal approximation - `agresti_coull` : Agresti-Coull interval - `beta` : Clopper-Pearson interval based on Beta distribution - `wilson` : Wilson Score interval - `jeffreys` : Jeffreys Bayesian Interval - `binom_test` : Numerical inversion of binom_test Returns ------- ci_low, ci_upp : {float, ndarray, Series DataFrame} lower and upper confidence level with coverage (approximately) 1-alpha. When a pandas object is returned, then the index is taken from `count`. Notes ----- Beta, the Clopper-Pearson exact interval has coverage at least 1-alpha, but is in general conservative. Most of the other methods have average coverage equal to 1-alpha, but will have smaller coverage in some cases. The "beta" and "jeffreys" interval are central, they use alpha/2 in each tail, and alpha is not adjusted at the boundaries. In the extreme case when `count` is zero or equal to `nobs`, then the coverage will be only 1 - alpha/2 in the case of "beta". The confidence intervals are clipped to be in the [0, 1] interval in the case of "normal" and "agresti_coull". Method "binom_test" directly inverts the binomial test in scipy.stats. which has discrete steps. TODO: binom_test intervals raise an exception in small samples if one interval bound is close to zero or one. References ---------- .. [*] https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval .. [*] Brown, Lawrence D.; Cai, T. Tony; DasGupta, Anirban (2001). "Interval Estimation for a Binomial Proportion", Statistical Science 16 (2): 101–133. doi:10.1214/ss/1009213286. """ is_scalar = np.isscalar(count) and np.isscalar(nobs) is_pandas = isinstance(count, (pd.Series, pd.DataFrame)) count_a = array_like(count, "count", optional=False, ndim=None) nobs_a = array_like(nobs, "nobs", optional=False, ndim=None) def _check(x: np.ndarray, name: str) -> np.ndarray: if np.issubdtype(x.dtype, np.integer): return x y = x.astype(np.int64, casting="unsafe") if np.any(y != x): raise ValueError( f"{name} must have an integral dtype. Found data with " f"dtype {x.dtype}" ) return y if method == "binom_test": count_a = _check(np.asarray(count_a), "count") nobs_a = _check(np.asarray(nobs_a), "count") q_ = count_a / nobs_a alpha_2 = 0.5 * alpha if method == "normal": std_ = np.sqrt(q_ * (1 - q_) / nobs_a) dist = stats.norm.isf(alpha / 2.0) * std_ ci_low = q_ - dist ci_upp = q_ + dist elif method == "binom_test": # inverting the binomial test def func_factory(count: int, nobs: int) -> Callable[[float], float]: if hasattr(stats, "binomtest"): def func(qi): return stats.binomtest(count, nobs, p=qi).pvalue - alpha else: # Remove after min SciPy >= 1.7 def func(qi): return stats.binom_test(count, nobs, p=qi) - alpha return func bcast = np.broadcast(count_a, nobs_a) ci_low = np.zeros(bcast.shape) ci_upp = np.zeros(bcast.shape) index = bcast.index for c, n in bcast: # Enforce symmetry reverse = False _q = q_.flat[index] if c > n // 2: c = n - c reverse = True _q = 1 - _q func = func_factory(c, n) if c == 0: ci_low.flat[index] = 0.0 else: lower_bnd = _bound_proportion_confint(func, _q, lower=True) val, _z = optimize.brentq( func, lower_bnd, _q, full_output=True ) if func(val) > 0: power = 10 new_lb = val - (val - lower_bnd) / 2**power while func(new_lb) > 0 and power >= 0: power -= 1 new_lb = val - (val - lower_bnd) / 2**power val, _ = _bisection_search_conservative(func, new_lb, _q) ci_low.flat[index] = val if c == n: ci_upp.flat[index] = 1.0 else: upper_bnd = _bound_proportion_confint(func, _q, lower=False) val, _z = optimize.brentq( func, _q, upper_bnd, full_output=True ) if func(val) > 0: power = 10 new_ub = val + (upper_bnd - val) / 2**power while func(new_ub) > 0 and power >= 0: power -= 1 new_ub = val - (upper_bnd - val) / 2**power val, _ = _bisection_search_conservative(func, _q, new_ub) ci_upp.flat[index] = val if reverse: temp = ci_upp.flat[index] ci_upp.flat[index] = 1 - ci_low.flat[index] ci_low.flat[index] = 1 - temp index = bcast.index elif method == "beta": ci_low = stats.beta.ppf(alpha_2, count_a, nobs_a - count_a + 1) ci_upp = stats.beta.isf(alpha_2, count_a + 1, nobs_a - count_a) if np.ndim(ci_low) > 0: ci_low.flat[q_.flat == 0] = 0 ci_upp.flat[q_.flat == 1] = 1 else: ci_low = 0 if q_ == 0 else ci_low ci_upp = 1 if q_ == 1 else ci_upp elif method == "agresti_coull": crit = stats.norm.isf(alpha / 2.0) nobs_c = nobs_a + crit**2 q_c = (count_a + crit**2 / 2.0) / nobs_c std_c = np.sqrt(q_c * (1.0 - q_c) / nobs_c) dist = crit * std_c ci_low = q_c - dist ci_upp = q_c + dist elif method == "wilson": crit = stats.norm.isf(alpha / 2.0) crit2 = crit**2 denom = 1 + crit2 / nobs_a center = (q_ + crit2 / (2 * nobs_a)) / denom dist = crit * np.sqrt( q_ * (1.0 - q_) / nobs_a + crit2 / (4.0 * nobs_a**2) ) dist /= denom ci_low = center - dist ci_upp = center + dist # method adjusted to be more forgiving of misspellings or incorrect option name elif method[:4] == "jeff": ci_low, ci_upp = stats.beta.interval( 1 - alpha, count_a + 0.5, nobs_a - count_a + 0.5 ) else: raise NotImplementedError(f"method {method} is not available") if method in ["normal", "agresti_coull"]: ci_low = np.clip(ci_low, 0, 1) ci_upp = np.clip(ci_upp, 0, 1) if is_pandas: container = pd.Series if isinstance(count, pd.Series) else pd.DataFrame ci_low = container(ci_low, index=count.index) ci_upp = container(ci_upp, index=count.index) if is_scalar: return float(ci_low), float(ci_upp) return ci_low, ci_upp def multinomial_proportions_confint(counts, alpha=0.05, method='goodman'): """ Confidence intervals for multinomial proportions. Parameters ---------- counts : array_like of int, 1-D Number of observations in each category. alpha : float in (0, 1), optional Significance level, defaults to 0.05. method : {'goodman', 'sison-glaz'}, optional Method to use to compute the confidence intervals; available methods are: - `goodman`: based on a chi-squared approximation, valid if all values in `counts` are greater or equal to 5 [2]_ - `sison-glaz`: less conservative than `goodman`, but only valid if `counts` has 7 or more categories (``len(counts) >= 7``) [3]_ Returns ------- confint : ndarray, 2-D Array of [lower, upper] confidence levels for each category, such that overall coverage is (approximately) `1-alpha`. Raises ------ ValueError If `alpha` is not in `(0, 1)` (bounds excluded), or if the values in `counts` are not all positive or null. NotImplementedError If `method` is not kown. Exception When ``method == 'sison-glaz'``, if for some reason `c` cannot be computed; this signals a bug and should be reported. Notes ----- The `goodman` method [2]_ is based on approximating a statistic based on the multinomial as a chi-squared random variable. The usual recommendation is that this is valid if all the values in `counts` are greater than or equal to 5. There is no condition on the number of categories for this method. The `sison-glaz` method [3]_ approximates the multinomial probabilities, and evaluates that with a maximum-likelihood estimator. The first approximation is an Edgeworth expansion that converges when the number of categories goes to infinity, and the maximum-likelihood estimator converges when the number of observations (``sum(counts)``) goes to infinity. In their paper, Sison & Glaz demo their method with at least 7 categories, so ``len(counts) >= 7`` with all values in `counts` at or above 5 can be used as a rule of thumb for the validity of this method. This method is less conservative than the `goodman` method (i.e. it will yield confidence intervals closer to the desired significance level), but produces confidence intervals of uniform width over all categories (except when the intervals reach 0 or 1, in which case they are truncated), which makes it most useful when proportions are of similar magnitude. Aside from the original sources ([1]_, [2]_, and [3]_), the implementation uses the formulas (though not the code) presented in [4]_ and [5]_. References ---------- .. [1] Levin, Bruce, "A representation for multinomial cumulative distribution functions," The Annals of Statistics, Vol. 9, No. 5, 1981, pp. 1123-1126. .. [2] Goodman, L.A., "On simultaneous confidence intervals for multinomial proportions," Technometrics, Vol. 7, No. 2, 1965, pp. 247-254. .. [3] Sison, Cristina P., and Joseph Glaz, "Simultaneous Confidence Intervals and Sample Size Determination for Multinomial Proportions," Journal of the American Statistical Association, Vol. 90, No. 429, 1995, pp. 366-369. .. [4] May, Warren L., and William D. Johnson, "A SAS® macro for constructing simultaneous confidence intervals for multinomial proportions," Computer methods and programs in Biomedicine, Vol. 53, No. 3, 1997, pp. 153-162. .. [5] May, Warren L., and William D. Johnson, "Constructing two-sided simultaneous confidence intervals for multinomial proportions for small counts in a large number of cells," Journal of Statistical Software, Vol. 5, No. 6, 2000, pp. 1-24. """ if alpha <= 0 or alpha >= 1: raise ValueError('alpha must be in (0, 1), bounds excluded') counts = np.array(counts, dtype=float) if (counts < 0).any(): raise ValueError('counts must be >= 0') n = counts.sum() k = len(counts) proportions = counts / n if method == 'goodman': chi2 = stats.chi2.ppf(1 - alpha / k, 1) delta = chi2 ** 2 + (4 * n * proportions * chi2 * (1 - proportions)) region = ((2 * n * proportions + chi2 + np.array([- np.sqrt(delta), np.sqrt(delta)])) / (2 * (chi2 + n))).T elif method[:5] == 'sison': # We accept any name starting with 'sison' # Define a few functions we'll use a lot. def poisson_interval(interval, p): """ Compute P(b <= Z <= a) where Z ~ Poisson(p) and `interval = (b, a)`. """ b, a = interval prob = stats.poisson.cdf(a, p) - stats.poisson.cdf(b - 1, p) return prob def truncated_poisson_factorial_moment(interval, r, p): """ Compute mu_r, the r-th factorial moment of a poisson random variable of parameter `p` truncated to `interval = (b, a)`. """ b, a = interval return p ** r * (1 - ((poisson_interval((a - r + 1, a), p) - poisson_interval((b - r, b - 1), p)) / poisson_interval((b, a), p))) def edgeworth(intervals): """ Compute the Edgeworth expansion term of Sison & Glaz's formula (1) (approximated probability for multinomial proportions in a given box). """ # Compute means and central moments of the truncated poisson # variables. mu_r1, mu_r2, mu_r3, mu_r4 = ( np.array([truncated_poisson_factorial_moment(interval, r, p) for (interval, p) in zip(intervals, counts)]) for r in range(1, 5) ) mu = mu_r1 mu2 = mu_r2 + mu - mu ** 2 mu3 = mu_r3 + mu_r2 * (3 - 3 * mu) + mu - 3 * mu ** 2 + 2 * mu ** 3 mu4 = (mu_r4 + mu_r3 * (6 - 4 * mu) + mu_r2 * (7 - 12 * mu + 6 * mu ** 2) + mu - 4 * mu ** 2 + 6 * mu ** 3 - 3 * mu ** 4) # Compute expansion factors, gamma_1 and gamma_2. g1 = mu3.sum() / mu2.sum() ** 1.5 g2 = (mu4.sum() - 3 * (mu2 ** 2).sum()) / mu2.sum() ** 2 # Compute the expansion itself. x = (n - mu.sum()) / np.sqrt(mu2.sum()) phi = np.exp(- x ** 2 / 2) / np.sqrt(2 * np.pi) H3 = x ** 3 - 3 * x H4 = x ** 4 - 6 * x ** 2 + 3 H6 = x ** 6 - 15 * x ** 4 + 45 * x ** 2 - 15 f = phi * (1 + g1 * H3 / 6 + g2 * H4 / 24 + g1 ** 2 * H6 / 72) return f / np.sqrt(mu2.sum()) def approximated_multinomial_interval(intervals): """ Compute approximated probability for Multinomial(n, proportions) to be in `intervals` (Sison & Glaz's formula (1)). """ return np.exp( np.sum(np.log([poisson_interval(interval, p) for (interval, p) in zip(intervals, counts)])) + np.log(edgeworth(intervals)) - np.log(stats.poisson._pmf(n, n)) ) def nu(c): """ Compute interval coverage for a given `c` (Sison & Glaz's formula (7)). """ return approximated_multinomial_interval( [(np.maximum(count - c, 0), np.minimum(count + c, n)) for count in counts]) # Find the value of `c` that will give us the confidence intervals # (solving nu(c) <= 1 - alpha < nu(c + 1). c = 1.0 nuc = nu(c) nucp1 = nu(c + 1) while not (nuc <= (1 - alpha) < nucp1): if c > n: raise Exception("Couldn't find a value for `c` that " "solves nu(c) <= 1 - alpha < nu(c + 1)") c += 1 nuc = nucp1 nucp1 = nu(c + 1) # Compute gamma and the corresponding confidence intervals. g = (1 - alpha - nuc) / (nucp1 - nuc) ci_lower = np.maximum(proportions - c / n, 0) ci_upper = np.minimum(proportions + (c + 2 * g) / n, 1) region = np.array([ci_lower, ci_upper]).T else: raise NotImplementedError('method "%s" is not available' % method) return region def samplesize_confint_proportion(proportion, half_length, alpha=0.05, method='normal'): """ Find sample size to get desired confidence interval length Parameters ---------- proportion : float in (0, 1) proportion or quantile half_length : float in (0, 1) desired half length of the confidence interval alpha : float in (0, 1) significance level, default 0.05, coverage of the two-sided interval is (approximately) ``1 - alpha`` method : str in ['normal'] method to use for confidence interval, currently only normal approximation Returns ------- n : float sample size to get the desired half length of the confidence interval Notes ----- this is mainly to store the formula. possible application: number of replications in bootstrap samples """ q_ = proportion if method == 'normal': n = q_ * (1 - q_) / (half_length / stats.norm.isf(alpha / 2.))**2 else: raise NotImplementedError('only "normal" is available') return n def proportion_effectsize(prop1, prop2, method='normal'): """ Effect size for a test comparing two proportions for use in power function Parameters ---------- prop1, prop2 : float or array_like The proportion value(s). Returns ------- es : float or ndarray effect size for (transformed) prop1 - prop2 Notes ----- only method='normal' is implemented to match pwr.p2.test see http://www.statmethods.net/stats/power.html Effect size for `normal` is defined as :: 2 * (arcsin(sqrt(prop1)) - arcsin(sqrt(prop2))) I think other conversions to normality can be used, but I need to check. Examples -------- >>> import statsmodels.api as sm >>> sm.stats.proportion_effectsize(0.5, 0.4) 0.20135792079033088 >>> sm.stats.proportion_effectsize([0.3, 0.4, 0.5], 0.4) array([-0.21015893, 0. , 0.20135792]) """ if method != 'normal': raise ValueError('only "normal" is implemented') es = 2 * (np.arcsin(np.sqrt(prop1)) - np.arcsin(np.sqrt(prop2))) return es def std_prop(prop, nobs): """ Standard error for the estimate of a proportion This is just ``np.sqrt(p * (1. - p) / nobs)`` Parameters ---------- prop : array_like proportion nobs : int, array_like number of observations Returns ------- std : array_like standard error for a proportion of nobs independent observations """ return np.sqrt(prop * (1. - prop) / nobs) def _std_diff_prop(p1, p2, ratio=1): return np.sqrt(p1 * (1 - p1) + p2 * (1 - p2) / ratio) def _power_ztost(mean_low, var_low, mean_upp, var_upp, mean_alt, var_alt, alpha=0.05, discrete=True, dist='norm', nobs=None, continuity=0, critval_continuity=0): """ Generic statistical power function for normal based equivalence test This includes options to adjust the normal approximation and can use the binomial to evaluate the probability of the rejection region see power_ztost_prob for a description of the options """ # TODO: refactor structure, separate norm and binom better if not isinstance(continuity, tuple): continuity = (continuity, continuity) crit = stats.norm.isf(alpha) k_low = mean_low + np.sqrt(var_low) * crit k_upp = mean_upp - np.sqrt(var_upp) * crit if discrete or dist == 'binom': k_low = np.ceil(k_low * nobs + 0.5 * critval_continuity) k_upp = np.trunc(k_upp * nobs - 0.5 * critval_continuity) if dist == 'norm': #need proportion k_low = (k_low) * 1. / nobs #-1 to match PASS k_upp = k_upp * 1. / nobs # else: # if dist == 'binom': # #need counts # k_low *= nobs # k_upp *= nobs #print mean_low, np.sqrt(var_low), crit, var_low #print mean_upp, np.sqrt(var_upp), crit, var_upp if np.any(k_low > k_upp): #vectorize import warnings warnings.warn("no overlap, power is zero", HypothesisTestWarning) std_alt = np.sqrt(var_alt) z_low = (k_low - mean_alt - continuity[0] * 0.5 / nobs) / std_alt z_upp = (k_upp - mean_alt + continuity[1] * 0.5 / nobs) / std_alt if dist == 'norm': power = stats.norm.cdf(z_upp) - stats.norm.cdf(z_low) elif dist == 'binom': power = (stats.binom.cdf(k_upp, nobs, mean_alt) - stats.binom.cdf(k_low-1, nobs, mean_alt)) return power, (k_low, k_upp, z_low, z_upp) def binom_tost(count, nobs, low, upp): """ Exact TOST test for one proportion using binomial distribution Parameters ---------- count : {int, array_like} the number of successes in nobs trials. nobs : int the number of trials or observations. low, upp : floats lower and upper limit of equivalence region Returns ------- pvalue : float p-value of equivalence test pval_low, pval_upp : floats p-values of lower and upper one-sided tests """ # binom_test_stat only returns pval tt1 = binom_test(count, nobs, alternative='larger', prop=low) tt2 = binom_test(count, nobs, alternative='smaller', prop=upp) return np.maximum(tt1, tt2), tt1, tt2, def binom_tost_reject_interval(low, upp, nobs, alpha=0.05): """ Rejection region for binomial TOST The interval includes the end points, `reject` if and only if `r_low <= x <= r_upp`. The interval might be empty with `r_upp < r_low`. Parameters ---------- low, upp : floats lower and upper limit of equivalence region nobs : int the number of trials or observations. Returns ------- x_low, x_upp : float lower and upper bound of rejection region """ x_low = stats.binom.isf(alpha, nobs, low) + 1 x_upp = stats.binom.ppf(alpha, nobs, upp) - 1 return x_low, x_upp def binom_test_reject_interval(value, nobs, alpha=0.05, alternative='two-sided'): """ Rejection region for binomial test for one sample proportion The interval includes the end points of the rejection region. Parameters ---------- value : float proportion under the Null hypothesis nobs : int the number of trials or observations. Returns ------- x_low, x_upp : int lower and upper bound of rejection region """ if alternative in ['2s', 'two-sided']: alternative = '2s' # normalize alternative name alpha = alpha / 2 if alternative in ['2s', 'smaller']: x_low = stats.binom.ppf(alpha, nobs, value) - 1 else: x_low = 0 if alternative in ['2s', 'larger']: x_upp = stats.binom.isf(alpha, nobs, value) + 1 else : x_upp = nobs return int(x_low), int(x_upp) def binom_test(count, nobs, prop=0.5, alternative='two-sided'): """ Perform a test that the probability of success is p. This is an exact, two-sided test of the null hypothesis that the probability of success in a Bernoulli experiment is `p`. Parameters ---------- count : {int, array_like} the number of successes in nobs trials. nobs : int the number of trials or observations. prop : float, optional The probability of success under the null hypothesis, `0 <= prop <= 1`. The default value is `prop = 0.5` alternative : str in ['two-sided', 'smaller', 'larger'] alternative hypothesis, which can be two-sided or either one of the one-sided tests. Returns ------- p-value : float The p-value of the hypothesis test Notes ----- This uses scipy.stats.binom_test for the two-sided alternative. """ if np.any(prop > 1.0) or np.any(prop < 0.0): raise ValueError("p must be in range [0,1]") if alternative in ['2s', 'two-sided']: try: pval = stats.binomtest(count, n=nobs, p=prop).pvalue except AttributeError: # Remove after min SciPy >= 1.7 pval = stats.binom_test(count, n=nobs, p=prop) elif alternative in ['l', 'larger']: pval = stats.binom.sf(count-1, nobs, prop) elif alternative in ['s', 'smaller']: pval = stats.binom.cdf(count, nobs, prop) else: raise ValueError('alternative not recognized\n' 'should be two-sided, larger or smaller') return pval def power_binom_tost(low, upp, nobs, p_alt=None, alpha=0.05): if p_alt is None: p_alt = 0.5 * (low + upp) x_low, x_upp = binom_tost_reject_interval(low, upp, nobs, alpha=alpha) power = (stats.binom.cdf(x_upp, nobs, p_alt) - stats.binom.cdf(x_low-1, nobs, p_alt)) return power def power_ztost_prop(low, upp, nobs, p_alt, alpha=0.05, dist='norm', variance_prop=None, discrete=True, continuity=0, critval_continuity=0): """ Power of proportions equivalence test based on normal distribution Parameters ---------- low, upp : floats lower and upper limit of equivalence region nobs : int number of observations p_alt : float in (0,1) proportion under the alternative alpha : float in (0,1) significance level of the test dist : str in ['norm', 'binom'] This defines the distribution to evaluate the power of the test. The critical values of the TOST test are always based on the normal approximation, but the distribution for the power can be either the normal (default) or the binomial (exact) distribution. variance_prop : None or float in (0,1) If this is None, then the variances for the two one sided tests are based on the proportions equal to the equivalence limits. If variance_prop is given, then it is used to calculate the variance for the TOST statistics. If this is based on an sample, then the estimated proportion can be used. discrete : bool If true, then the critical values of the rejection region are converted to integers. If dist is "binom", this is automatically assumed. If discrete is false, then the TOST critical values are used as floating point numbers, and the power is calculated based on the rejection region that is not discretized. continuity : bool or float adjust the rejection region for the normal power probability. This has and effect only if ``dist='norm'`` critval_continuity : bool or float If this is non-zero, then the critical values of the tost rejection region are adjusted before converting to integers. This affects both distributions, ``dist='norm'`` and ``dist='binom'``. Returns ------- power : float statistical power of the equivalence test. (k_low, k_upp, z_low, z_upp) : tuple of floats critical limits in intermediate steps temporary return, will be changed Notes ----- In small samples the power for the ``discrete`` version, has a sawtooth pattern as a function of the number of observations. As a consequence, small changes in the number of observations or in the normal approximation can have a large effect on the power. ``continuity`` and ``critval_continuity`` are added to match some results of PASS, and are mainly to investigate the sensitivity of the ztost power to small changes in the rejection region. From my interpretation of the equations in the SAS manual, both are zero in SAS. works vectorized **verification:** The ``dist='binom'`` results match PASS, The ``dist='norm'`` results look reasonable, but no benchmark is available. References ---------- SAS Manual: Chapter 68: The Power Procedure, Computational Resources PASS Chapter 110: Equivalence Tests for One Proportion. """ mean_low = low var_low = std_prop(low, nobs)**2 mean_upp = upp var_upp = std_prop(upp, nobs)**2 mean_alt = p_alt var_alt = std_prop(p_alt, nobs)**2 if variance_prop is not None: var_low = var_upp = std_prop(variance_prop, nobs)**2 power = _power_ztost(mean_low, var_low, mean_upp, var_upp, mean_alt, var_alt, alpha=alpha, discrete=discrete, dist=dist, nobs=nobs, continuity=continuity, critval_continuity=critval_continuity) return np.maximum(power[0], 0), power[1:] def _table_proportion(count, nobs): """ Create a k by 2 contingency table for proportion helper function for proportions_chisquare Parameters ---------- count : {int, array_like} the number of successes in nobs trials. nobs : int the number of trials or observations. Returns ------- table : ndarray (k, 2) contingency table Notes ----- recent scipy has more elaborate contingency table functions """ count = np.asarray(count) dt = np.promote_types(count.dtype, np.float64) count = np.asarray(count, dtype=dt) table = np.column_stack((count, nobs - count)) expected = table.sum(0) * table.sum(1)[:, None] * 1. / table.sum() n_rows = table.shape[0] return table, expected, n_rows def proportions_ztest(count, nobs, value=None, alternative='two-sided', prop_var=False): """ Test for proportions based on normal (z) test Parameters ---------- count : {int, array_like} the number of successes in nobs trials. If this is array_like, then the assumption is that this represents the number of successes for each independent sample nobs : {int, array_like} the number of trials or observations, with the same length as count. value : float, array_like or None, optional This is the value of the null hypothesis equal to the proportion in the case of a one sample test. In the case of a two-sample test, the null hypothesis is that prop[0] - prop[1] = value, where prop is the proportion in the two samples. If not provided value = 0 and the null is prop[0] = prop[1] alternative : str in ['two-sided', 'smaller', 'larger'] The alternative hypothesis can be either two-sided or one of the one- sided tests, smaller means that the alternative hypothesis is ``prop < value`` and larger means ``prop > value``. In the two sample test, smaller means that the alternative hypothesis is ``p1 < p2`` and larger means ``p1 > p2`` where ``p1`` is the proportion of the first sample and ``p2`` of the second one. prop_var : False or float in (0, 1) If prop_var is false, then the variance of the proportion estimate is calculated based on the sample proportion. Alternatively, a proportion can be specified to calculate this variance. Common use case is to use the proportion under the Null hypothesis to specify the variance of the proportion estimate. Returns ------- zstat : float test statistic for the z-test p-value : float p-value for the z-test Examples -------- >>> count = 5 >>> nobs = 83 >>> value = .05 >>> stat, pval = proportions_ztest(count, nobs, value) >>> print('{0:0.3f}'.format(pval)) 0.695 >>> import numpy as np >>> from statsmodels.stats.proportion import proportions_ztest >>> count = np.array([5, 12]) >>> nobs = np.array([83, 99]) >>> stat, pval = proportions_ztest(count, nobs) >>> print('{0:0.3f}'.format(pval)) 0.159 Notes ----- This uses a simple normal test for proportions. It should be the same as running the mean z-test on the data encoded 1 for event and 0 for no event so that the sum corresponds to the count. In the one and two sample cases with two-sided alternative, this test produces the same p-value as ``proportions_chisquare``, since the chisquare is the distribution of the square of a standard normal distribution. """ # TODO: verify that this really holds # TODO: add continuity correction or other improvements for small samples # TODO: change options similar to propotion_ztost ? count = np.asarray(count) nobs = np.asarray(nobs) if nobs.size == 1: nobs = nobs * np.ones_like(count) prop = count * 1. / nobs k_sample = np.size(prop) if value is None: if k_sample == 1: raise ValueError('value must be provided for a 1-sample test') value = 0 if k_sample == 1: diff = prop - value elif k_sample == 2: diff = prop[0] - prop[1] - value else: msg = 'more than two samples are not implemented yet' raise NotImplementedError(msg) p_pooled = np.sum(count) * 1. / np.sum(nobs) nobs_fact = np.sum(1. / nobs) if prop_var: p_pooled = prop_var var_ = p_pooled * (1 - p_pooled) * nobs_fact std_diff = np.sqrt(var_) from statsmodels.stats.weightstats import _zstat_generic2 return _zstat_generic2(diff, std_diff, alternative) def proportions_ztost(count, nobs, low, upp, prop_var='sample'): """ Equivalence test based on normal distribution Parameters ---------- count : {int, array_like} the number of successes in nobs trials. If this is array_like, then the assumption is that this represents the number of successes for each independent sample nobs : int the number of trials or observations, with the same length as count. low, upp : float equivalence interval low < prop1 - prop2 < upp prop_var : str or float in (0, 1) prop_var determines which proportion is used for the calculation of the standard deviation of the proportion estimate The available options for string are 'sample' (default), 'null' and 'limits'. If prop_var is a float, then it is used directly. Returns ------- pvalue : float pvalue of the non-equivalence test t1, pv1 : tuple of floats test statistic and pvalue for lower threshold test t2, pv2 : tuple of floats test statistic and pvalue for upper threshold test Notes ----- checked only for 1 sample case """ if prop_var == 'limits': prop_var_low = low prop_var_upp = upp elif prop_var == 'sample': prop_var_low = prop_var_upp = False #ztest uses sample elif prop_var == 'null': prop_var_low = prop_var_upp = 0.5 * (low + upp) elif np.isreal(prop_var): prop_var_low = prop_var_upp = prop_var tt1 = proportions_ztest(count, nobs, alternative='larger', prop_var=prop_var_low, value=low) tt2 = proportions_ztest(count, nobs, alternative='smaller', prop_var=prop_var_upp, value=upp) return np.maximum(tt1[1], tt2[1]), tt1, tt2, def proportions_chisquare(count, nobs, value=None): """ Test for proportions based on chisquare test Parameters ---------- count : {int, array_like} the number of successes in nobs trials. If this is array_like, then the assumption is that this represents the number of successes for each independent sample nobs : int the number of trials or observations, with the same length as count. value : None or float or array_like Returns ------- chi2stat : float test statistic for the chisquare test p-value : float p-value for the chisquare test (table, expected) table is a (k, 2) contingency table, ``expected`` is the corresponding table of counts that are expected under independence with given margins Notes ----- Recent version of scipy.stats have a chisquare test for independence in contingency tables. This function provides a similar interface to chisquare tests as ``prop.test`` in R, however without the option for Yates continuity correction. count can be the count for the number of events for a single proportion, or the counts for several independent proportions. If value is given, then all proportions are jointly tested against this value. If value is not given and count and nobs are not scalar, then the null hypothesis is that all samples have the same proportion. """ nobs = np.atleast_1d(nobs) table, expected, n_rows = _table_proportion(count, nobs) if value is not None: expected = np.column_stack((nobs * value, nobs * (1 - value))) ddof = n_rows - 1 else: ddof = n_rows #print table, expected chi2stat, pval = stats.chisquare(table.ravel(), expected.ravel(), ddof=ddof) return chi2stat, pval, (table, expected) def proportions_chisquare_allpairs(count, nobs, multitest_method='hs'): """ Chisquare test of proportions for all pairs of k samples Performs a chisquare test for proportions for all pairwise comparisons. The alternative is two-sided Parameters ---------- count : {int, array_like} the number of successes in nobs trials. nobs : int the number of trials or observations. multitest_method : str This chooses the method for the multiple testing p-value correction, that is used as default in the results. It can be any method that is available in ``multipletesting``. The default is Holm-Sidak 'hs'. Returns ------- result : AllPairsResults instance The returned results instance has several statistics, such as p-values, attached, and additional methods for using a non-default ``multitest_method``. Notes ----- Yates continuity correction is not available. """ #all_pairs = lmap(list, lzip(*np.triu_indices(4, 1))) all_pairs = lzip(*np.triu_indices(len(count), 1)) pvals = [proportions_chisquare(count[list(pair)], nobs[list(pair)])[1] for pair in all_pairs] return AllPairsResults(pvals, all_pairs, multitest_method=multitest_method) def proportions_chisquare_pairscontrol(count, nobs, value=None, multitest_method='hs', alternative='two-sided'): """ Chisquare test of proportions for pairs of k samples compared to control Performs a chisquare test for proportions for pairwise comparisons with a control (Dunnet's test). The control is assumed to be the first element of ``count`` and ``nobs``. The alternative is two-sided, larger or smaller. Parameters ---------- count : {int, array_like} the number of successes in nobs trials. nobs : int the number of trials or observations. multitest_method : str This chooses the method for the multiple testing p-value correction, that is used as default in the results. It can be any method that is available in ``multipletesting``. The default is Holm-Sidak 'hs'. alternative : str in ['two-sided', 'smaller', 'larger'] alternative hypothesis, which can be two-sided or either one of the one-sided tests. Returns ------- result : AllPairsResults instance The returned results instance has several statistics, such as p-values, attached, and additional methods for using a non-default ``multitest_method``. Notes ----- Yates continuity correction is not available. ``value`` and ``alternative`` options are not yet implemented. """ if (value is not None) or (alternative not in ['two-sided', '2s']): raise NotImplementedError #all_pairs = lmap(list, lzip(*np.triu_indices(4, 1))) all_pairs = [(0, k) for k in range(1, len(count))] pvals = [proportions_chisquare(count[list(pair)], nobs[list(pair)], #alternative=alternative)[1] )[1] for pair in all_pairs] return AllPairsResults(pvals, all_pairs, multitest_method=multitest_method) def confint_proportions_2indep(count1, nobs1, count2, nobs2, method=None, compare='diff', alpha=0.05, correction=True): """ Confidence intervals for comparing two independent proportions. This assumes that we have two independent binomial samples. Parameters ---------- count1, nobs1 : float Count and sample size for first sample. count2, nobs2 : float Count and sample size for the second sample. method : str Method for computing confidence interval. If method is None, then a default method is used. The default might change as more methods are added. diff: - 'wald', - 'agresti-caffo' - 'newcomb' (default) - 'score' ratio: - 'log' - 'log-adjusted' (default) - 'score' odds-ratio: - 'logit' - 'logit-adjusted' (default) - 'score' compare : string in ['diff', 'ratio' 'odds-ratio'] If compare is diff, then the confidence interval is for diff = p1 - p2. If compare is ratio, then the confidence interval is for the risk ratio defined by ratio = p1 / p2. If compare is odds-ratio, then the confidence interval is for the odds-ratio defined by or = p1 / (1 - p1) / (p2 / (1 - p2). alpha : float Significance level for the confidence interval, default is 0.05. The nominal coverage probability is 1 - alpha. Returns ------- low, upp See Also -------- test_proportions_2indep tost_proportions_2indep Notes ----- Status: experimental, API and defaults might still change. more ``methods`` will be added. References ---------- .. [1] Fagerland, Morten W., Stian Lydersen, and Petter Laake. 2015. “Recommended Confidence Intervals for Two Independent Binomial Proportions.” Statistical Methods in Medical Research 24 (2): 224–54. https://doi.org/10.1177/0962280211415469. .. [2] Koopman, P. A. R. 1984. “Confidence Intervals for the Ratio of Two Binomial Proportions.” Biometrics 40 (2): 513–17. https://doi.org/10.2307/2531405. .. [3] Miettinen, Olli, and Markku Nurminen. "Comparative analysis of two rates." Statistics in medicine 4, no. 2 (1985): 213-226. .. [4] Newcombe, Robert G. 1998. “Interval Estimation for the Difference between Independent Proportions: Comparison of Eleven Methods.” Statistics in Medicine 17 (8): 873–90. https://doi.org/10.1002/(SICI)1097-0258(19980430)17:8<873::AID- SIM779>3.0.CO;2-I. .. [5] Newcombe, Robert G., and Markku M. Nurminen. 2011. “In Defence of Score Intervals for Proportions and Their Differences.” Communications in Statistics - Theory and Methods 40 (7): 1271–82. https://doi.org/10.1080/03610920903576580. """ method_default = {'diff': 'newcomb', 'ratio': 'log-adjusted', 'odds-ratio': 'logit-adjusted'} # normalize compare name if compare.lower() == 'or': compare = 'odds-ratio' if method is None: method = method_default[compare] method = method.lower() if method.startswith('agr'): method = 'agresti-caffo' p1 = count1 / nobs1 p2 = count2 / nobs2 diff = p1 - p2 addone = 1 if method == 'agresti-caffo' else 0 if compare == 'diff': if method in ['wald', 'agresti-caffo']: count1_, nobs1_ = count1 + addone, nobs1 + 2 * addone count2_, nobs2_ = count2 + addone, nobs2 + 2 * addone p1_ = count1_ / nobs1_ p2_ = count2_ / nobs2_ diff_ = p1_ - p2_ var = p1_ * (1 - p1_) / nobs1_ + p2_ * (1 - p2_) / nobs2_ z = stats.norm.isf(alpha / 2) d_wald = z * np.sqrt(var) low = diff_ - d_wald upp = diff_ + d_wald elif method.startswith('newcomb'): low1, upp1 = proportion_confint(count1, nobs1, method='wilson', alpha=alpha) low2, upp2 = proportion_confint(count2, nobs2, method='wilson', alpha=alpha) d_low = np.sqrt((p1 - low1)**2 + (upp2 - p2)**2) d_upp = np.sqrt((p2 - low2)**2 + (upp1 - p1)**2) low = diff - d_low upp = diff + d_upp elif method == "score": low, upp = _score_confint_inversion(count1, nobs1, count2, nobs2, compare=compare, alpha=alpha, correction=correction) else: raise ValueError('method not recognized') elif compare == 'ratio': # ratio = p1 / p2 if method in ['log', 'log-adjusted']: addhalf = 0.5 if method == 'log-adjusted' else 0 count1_, nobs1_ = count1 + addhalf, nobs1 + addhalf count2_, nobs2_ = count2 + addhalf, nobs2 + addhalf p1_ = count1_ / nobs1_ p2_ = count2_ / nobs2_ ratio_ = p1_ / p2_ var = (1 / count1_) - 1 / nobs1_ + 1 / count2_ - 1 / nobs2_ z = stats.norm.isf(alpha / 2) d_log = z * np.sqrt(var) low = np.exp(np.log(ratio_) - d_log) upp = np.exp(np.log(ratio_) + d_log) elif method == 'score': res = _confint_riskratio_koopman(count1, nobs1, count2, nobs2, alpha=alpha, correction=correction) low, upp = res.confint else: raise ValueError('method not recognized') elif compare == 'odds-ratio': # odds_ratio = p1 / (1 - p1) / p2 * (1 - p2) if method in ['logit', 'logit-adjusted', 'logit-smoothed']: if method in ['logit-smoothed']: adjusted = _shrink_prob(count1, nobs1, count2, nobs2, shrink_factor=2, return_corr=False)[0] count1_, nobs1_, count2_, nobs2_ = adjusted else: addhalf = 0.5 if method == 'logit-adjusted' else 0 count1_, nobs1_ = count1 + addhalf, nobs1 + 2 * addhalf count2_, nobs2_ = count2 + addhalf, nobs2 + 2 * addhalf p1_ = count1_ / nobs1_ p2_ = count2_ / nobs2_ odds_ratio_ = p1_ / (1 - p1_) / p2_ * (1 - p2_) var = (1 / count1_ + 1 / (nobs1_ - count1_) + 1 / count2_ + 1 / (nobs2_ - count2_)) z = stats.norm.isf(alpha / 2) d_log = z * np.sqrt(var) low = np.exp(np.log(odds_ratio_) - d_log) upp = np.exp(np.log(odds_ratio_) + d_log) elif method == "score": low, upp = _score_confint_inversion(count1, nobs1, count2, nobs2, compare=compare, alpha=alpha, correction=correction) else: raise ValueError('method not recognized') else: raise ValueError('compare not recognized') return low, upp def _shrink_prob(count1, nobs1, count2, nobs2, shrink_factor=2, return_corr=True): """ Shrink observed counts towards independence Helper function for 'logit-smoothed' inference for the odds-ratio of two independent proportions. Parameters ---------- count1, nobs1 : float or int count and sample size for first sample count2, nobs2 : float or int count and sample size for the second sample shrink_factor : float This corresponds to the number of observations that are added in total proportional to the probabilities under independence. return_corr : bool If true, then only the correction term is returned If false, then the corrected counts, i.e. original counts plus correction term, are returned. Returns ------- count1_corr, nobs1_corr, count2_corr, nobs2_corr : float correction or corrected counts prob_indep : TODO/Warning : this will change most likely probabilities under independence, only returned if return_corr is false. """ vectorized = any(np.size(i) > 1 for i in [count1, nobs1, count2, nobs2]) if vectorized: raise ValueError("function is not vectorized") nobs_col = np.array([count1 + count2, nobs1 - count1 + nobs2 - count2]) nobs_row = np.array([nobs1, nobs2]) nobs = nobs1 + nobs2 prob_indep = (nobs_col * nobs_row[:, None]) / nobs**2 corr = shrink_factor * prob_indep if return_corr: return (corr[0, 0], corr[0].sum(), corr[1, 0], corr[1].sum()) else: return (count1 + corr[0, 0], nobs1 + corr[0].sum(), count2 + corr[1, 0], nobs2 + corr[1].sum()), prob_indep def score_test_proportions_2indep(count1, nobs1, count2, nobs2, value=None, compare='diff', alternative='two-sided', correction=True, return_results=True): """ Score test for two independent proportions This uses the constrained estimate of the proportions to compute the variance under the Null hypothesis. Parameters ---------- count1, nobs1 : count and sample size for first sample count2, nobs2 : count and sample size for the second sample value : float diff, ratio or odds-ratio under the null hypothesis. If value is None, then equality of proportions under the Null is assumed, i.e. value=0 for 'diff' or value=1 for either rate or odds-ratio. compare : string in ['diff', 'ratio' 'odds-ratio'] If compare is diff, then the confidence interval is for diff = p1 - p2. If compare is ratio, then the confidence interval is for the risk ratio defined by ratio = p1 / p2. If compare is odds-ratio, then the confidence interval is for the odds-ratio defined by or = p1 / (1 - p1) / (p2 / (1 - p2) return_results : bool If true, then a results instance with extra information is returned, otherwise a tuple with statistic and pvalue is returned. Returns ------- results : results instance or tuple If return_results is True, then a results instance with the information in attributes is returned. If return_results is False, then only ``statistic`` and ``pvalue`` are returned. statistic : float test statistic asymptotically normal distributed N(0, 1) pvalue : float p-value based on normal distribution other attributes : additional information about the hypothesis test Notes ----- Status: experimental, the type or extra information in the return might change. """ value_default = 0 if compare == 'diff' else 1 if value is None: # TODO: odds ratio does not work if value=1 value = value_default nobs = nobs1 + nobs2 count = count1 + count2 p1 = count1 / nobs1 p2 = count2 / nobs2 if value == value_default: # use pooled estimator if equality test # shortcut, but required for odds ratio prop0 = prop1 = count / nobs # this uses index 0 from Miettinen Nurminned 1985 count0, nobs0 = count2, nobs2 p0 = p2 if compare == 'diff': diff = value # hypothesis value if diff != 0: tmp3 = nobs tmp2 = (nobs1 + 2 * nobs0) * diff - nobs - count tmp1 = (count0 * diff - nobs - 2 * count0) * diff + count tmp0 = count0 * diff * (1 - diff) q = ((tmp2 / (3 * tmp3))**3 - tmp1 * tmp2 / (6 * tmp3**2) + tmp0 / (2 * tmp3)) p = np.sign(q) * np.sqrt((tmp2 / (3 * tmp3))**2 - tmp1 / (3 * tmp3)) a = (np.pi + np.arccos(q / p**3)) / 3 prop0 = 2 * p * np.cos(a) - tmp2 / (3 * tmp3) prop1 = prop0 + diff var = prop1 * (1 - prop1) / nobs1 + prop0 * (1 - prop0) / nobs0 if correction: var *= nobs / (nobs - 1) diff_stat = (p1 - p0 - diff) elif compare == 'ratio': # risk ratio ratio = value if ratio != 1: a = nobs * ratio b = -(nobs1 * ratio + count1 + nobs2 + count0 * ratio) c = count prop0 = (-b - np.sqrt(b**2 - 4 * a * c)) / (2 * a) prop1 = prop0 * ratio var = (prop1 * (1 - prop1) / nobs1 + ratio**2 * prop0 * (1 - prop0) / nobs0) if correction: var *= nobs / (nobs - 1) # NCSS looks incorrect for var, but it is what should be reported # diff_stat = (p1 / p0 - ratio) # NCSS/PASS diff_stat = (p1 - ratio * p0) # Miettinen Nurminen elif compare in ['or', 'odds-ratio']: # odds ratio oratio = value if oratio != 1: # Note the constraint estimator does not handle odds-ratio = 1 a = nobs0 * (oratio - 1) b = nobs1 * oratio + nobs0 - count * (oratio - 1) c = -count prop0 = (-b + np.sqrt(b**2 - 4 * a * c)) / (2 * a) prop1 = prop0 * oratio / (1 + prop0 * (oratio - 1)) # try to avoid 0 and 1 proportions, # those raise Zero Division Runtime Warnings eps = 1e-10 prop0 = np.clip(prop0, eps, 1 - eps) prop1 = np.clip(prop1, eps, 1 - eps) var = (1 / (prop1 * (1 - prop1) * nobs1) + 1 / (prop0 * (1 - prop0) * nobs0)) if correction: var *= nobs / (nobs - 1) diff_stat = ((p1 - prop1) / (prop1 * (1 - prop1)) - (p0 - prop0) / (prop0 * (1 - prop0))) statistic, pvalue = _zstat_generic2(diff_stat, np.sqrt(var), alternative=alternative) if return_results: res = HolderTuple(statistic=statistic, pvalue=pvalue, compare=compare, method='score', variance=var, alternative=alternative, prop1_null=prop1, prop2_null=prop0, ) return res else: return statistic, pvalue def test_proportions_2indep(count1, nobs1, count2, nobs2, value=None, method=None, compare='diff', alternative='two-sided', correction=True, return_results=True): """ Hypothesis test for comparing two independent proportions This assumes that we have two independent binomial samples. The Null and alternative hypothesis are for compare = 'diff' - H0: prop1 - prop2 - value = 0 - H1: prop1 - prop2 - value != 0 if alternative = 'two-sided' - H1: prop1 - prop2 - value > 0 if alternative = 'larger' - H1: prop1 - prop2 - value < 0 if alternative = 'smaller' for compare = 'ratio' - H0: prop1 / prop2 - value = 0 - H1: prop1 / prop2 - value != 0 if alternative = 'two-sided' - H1: prop1 / prop2 - value > 0 if alternative = 'larger' - H1: prop1 / prop2 - value < 0 if alternative = 'smaller' for compare = 'odds-ratio' - H0: or - value = 0 - H1: or - value != 0 if alternative = 'two-sided' - H1: or - value > 0 if alternative = 'larger' - H1: or - value < 0 if alternative = 'smaller' where odds-ratio or = prop1 / (1 - prop1) / (prop2 / (1 - prop2)) Parameters ---------- count1 : int Count for first sample. nobs1 : int Sample size for first sample. count2 : int Count for the second sample. nobs2 : int Sample size for the second sample. value : float Value of the difference, risk ratio or odds ratio of 2 independent proportions under the null hypothesis. Default is equal proportions, 0 for diff and 1 for risk-ratio and for odds-ratio. method : string Method for computing the hypothesis test. If method is None, then a default method is used. The default might change as more methods are added. diff: - 'wald', - 'agresti-caffo' - 'score' if correction is True, then this uses the degrees of freedom correction ``nobs / (nobs - 1)`` as in Miettinen Nurminen 1985 ratio: - 'log': wald test using log transformation - 'log-adjusted': wald test using log transformation, adds 0.5 to counts - 'score': if correction is True, then this uses the degrees of freedom correction ``nobs / (nobs - 1)`` as in Miettinen Nurminen 1985 odds-ratio: - 'logit': wald test using logit transformation - 'logit-adjusted': wald test using logit transformation, adds 0.5 to counts - 'logit-smoothed': wald test using logit transformation, biases cell counts towards independence by adding two observations in total. - 'score' if correction is True, then this uses the degrees of freedom correction ``nobs / (nobs - 1)`` as in Miettinen Nurminen 1985 compare : {'diff', 'ratio' 'odds-ratio'} If compare is `diff`, then the hypothesis test is for the risk difference diff = p1 - p2. If compare is `ratio`, then the hypothesis test is for the risk ratio defined by ratio = p1 / p2. If compare is `odds-ratio`, then the hypothesis test is for the odds-ratio defined by or = p1 / (1 - p1) / (p2 / (1 - p2) alternative : {'two-sided', 'smaller', 'larger'} alternative hypothesis, which can be two-sided or either one of the one-sided tests. correction : bool If correction is True (default), then the Miettinen and Nurminen small sample correction to the variance nobs / (nobs - 1) is used. Applies only if method='score'. return_results : bool If true, then a results instance with extra information is returned, otherwise a tuple with statistic and pvalue is returned. Returns ------- results : results instance or tuple If return_results is True, then a results instance with the information in attributes is returned. If return_results is False, then only ``statistic`` and ``pvalue`` are returned. statistic : float test statistic asymptotically normal distributed N(0, 1) pvalue : float p-value based on normal distribution other attributes : additional information about the hypothesis test See Also -------- tost_proportions_2indep confint_proportions_2indep Notes ----- Status: experimental, API and defaults might still change. More ``methods`` will be added. The current default methods are - 'diff': 'agresti-caffo', - 'ratio': 'log-adjusted', - 'odds-ratio': 'logit-adjusted' """ method_default = {'diff': 'agresti-caffo', 'ratio': 'log-adjusted', 'odds-ratio': 'logit-adjusted'} # normalize compare name if compare.lower() == 'or': compare = 'odds-ratio' if method is None: method = method_default[compare] method = method.lower() if method.startswith('agr'): method = 'agresti-caffo' if value is None: # TODO: odds ratio does not work if value=1 for score test value = 0 if compare == 'diff' else 1 count1, nobs1, count2, nobs2 = map(np.asarray, [count1, nobs1, count2, nobs2]) p1 = count1 / nobs1 p2 = count2 / nobs2 diff = p1 - p2 ratio = p1 / p2 odds_ratio = p1 / (1 - p1) / p2 * (1 - p2) res = None if compare == 'diff': if method in ['wald', 'agresti-caffo']: addone = 1 if method == 'agresti-caffo' else 0 count1_, nobs1_ = count1 + addone, nobs1 + 2 * addone count2_, nobs2_ = count2 + addone, nobs2 + 2 * addone p1_ = count1_ / nobs1_ p2_ = count2_ / nobs2_ diff_stat = p1_ - p2_ - value var = p1_ * (1 - p1_) / nobs1_ + p2_ * (1 - p2_) / nobs2_ statistic = diff_stat / np.sqrt(var) distr = 'normal' elif method.startswith('newcomb'): msg = 'newcomb not available for hypothesis test' raise NotImplementedError(msg) elif method == 'score': # Note score part is the same call for all compare res = score_test_proportions_2indep(count1, nobs1, count2, nobs2, value=value, compare=compare, alternative=alternative, correction=correction, return_results=return_results) if return_results is False: statistic, pvalue = res[:2] distr = 'normal' # TODO/Note score_test_proportion_2samp returns statistic and # not diff_stat diff_stat = None else: raise ValueError('method not recognized') elif compare == 'ratio': if method in ['log', 'log-adjusted']: addhalf = 0.5 if method == 'log-adjusted' else 0 count1_, nobs1_ = count1 + addhalf, nobs1 + addhalf count2_, nobs2_ = count2 + addhalf, nobs2 + addhalf p1_ = count1_ / nobs1_ p2_ = count2_ / nobs2_ ratio_ = p1_ / p2_ var = (1 / count1_) - 1 / nobs1_ + 1 / count2_ - 1 / nobs2_ diff_stat = np.log(ratio_) - np.log(value) statistic = diff_stat / np.sqrt(var) distr = 'normal' elif method == 'score': res = score_test_proportions_2indep(count1, nobs1, count2, nobs2, value=value, compare=compare, alternative=alternative, correction=correction, return_results=return_results) if return_results is False: statistic, pvalue = res[:2] distr = 'normal' diff_stat = None else: raise ValueError('method not recognized') elif compare == "odds-ratio": if method in ['logit', 'logit-adjusted', 'logit-smoothed']: if method in ['logit-smoothed']: adjusted = _shrink_prob(count1, nobs1, count2, nobs2, shrink_factor=2, return_corr=False)[0] count1_, nobs1_, count2_, nobs2_ = adjusted else: addhalf = 0.5 if method == 'logit-adjusted' else 0 count1_, nobs1_ = count1 + addhalf, nobs1 + 2 * addhalf count2_, nobs2_ = count2 + addhalf, nobs2 + 2 * addhalf p1_ = count1_ / nobs1_ p2_ = count2_ / nobs2_ odds_ratio_ = p1_ / (1 - p1_) / p2_ * (1 - p2_) var = (1 / count1_ + 1 / (nobs1_ - count1_) + 1 / count2_ + 1 / (nobs2_ - count2_)) diff_stat = np.log(odds_ratio_) - np.log(value) statistic = diff_stat / np.sqrt(var) distr = 'normal' elif method == 'score': res = score_test_proportions_2indep(count1, nobs1, count2, nobs2, value=value, compare=compare, alternative=alternative, correction=correction, return_results=return_results) if return_results is False: statistic, pvalue = res[:2] distr = 'normal' diff_stat = None else: raise ValueError('method "%s" not recognized' % method) else: raise ValueError('compare "%s" not recognized' % compare) if distr == 'normal' and diff_stat is not None: statistic, pvalue = _zstat_generic2(diff_stat, np.sqrt(var), alternative=alternative) if return_results: if res is None: res = HolderTuple(statistic=statistic, pvalue=pvalue, compare=compare, method=method, diff=diff, ratio=ratio, odds_ratio=odds_ratio, variance=var, alternative=alternative, value=value, ) else: # we already have a return result from score test # add missing attributes res.diff = diff res.ratio = ratio res.odds_ratio = odds_ratio res.value = value return res else: return statistic, pvalue def tost_proportions_2indep(count1, nobs1, count2, nobs2, low, upp, method=None, compare='diff', correction=True): """ Equivalence test based on two one-sided `test_proportions_2indep` This assumes that we have two independent binomial samples. The Null and alternative hypothesis for equivalence testing are for compare = 'diff' - H0: prop1 - prop2 <= low or upp <= prop1 - prop2 - H1: low < prop1 - prop2 < upp for compare = 'ratio' - H0: prop1 / prop2 <= low or upp <= prop1 / prop2 - H1: low < prop1 / prop2 < upp for compare = 'odds-ratio' - H0: or <= low or upp <= or - H1: low < or < upp where odds-ratio or = prop1 / (1 - prop1) / (prop2 / (1 - prop2)) Parameters ---------- count1, nobs1 : count and sample size for first sample count2, nobs2 : count and sample size for the second sample low, upp : equivalence margin for diff, risk ratio or odds ratio method : string method for computing the hypothesis test. If method is None, then a default method is used. The default might change as more methods are added. diff: - 'wald', - 'agresti-caffo' - 'score' if correction is True, then this uses the degrees of freedom correction ``nobs / (nobs - 1)`` as in Miettinen Nurminen 1985. ratio: - 'log': wald test using log transformation - 'log-adjusted': wald test using log transformation, adds 0.5 to counts - 'score' if correction is True, then this uses the degrees of freedom correction ``nobs / (nobs - 1)`` as in Miettinen Nurminen 1985. odds-ratio: - 'logit': wald test using logit transformation - 'logit-adjusted': : wald test using logit transformation, adds 0.5 to counts - 'logit-smoothed': : wald test using logit transformation, biases cell counts towards independence by adding two observations in total. - 'score' if correction is True, then this uses the degrees of freedom correction ``nobs / (nobs - 1)`` as in Miettinen Nurminen 1985 compare : string in ['diff', 'ratio' 'odds-ratio'] If compare is `diff`, then the hypothesis test is for diff = p1 - p2. If compare is `ratio`, then the hypothesis test is for the risk ratio defined by ratio = p1 / p2. If compare is `odds-ratio`, then the hypothesis test is for the odds-ratio defined by or = p1 / (1 - p1) / (p2 / (1 - p2). correction : bool If correction is True (default), then the Miettinen and Nurminen small sample correction to the variance nobs / (nobs - 1) is used. Applies only if method='score'. Returns ------- pvalue : float p-value is the max of the pvalues of the two one-sided tests t1 : test results results instance for one-sided hypothesis at the lower margin t1 : test results results instance for one-sided hypothesis at the upper margin See Also -------- test_proportions_2indep confint_proportions_2indep Notes ----- Status: experimental, API and defaults might still change. The TOST equivalence test delegates to `test_proportions_2indep` and has the same method and comparison options. """ tt1 = test_proportions_2indep(count1, nobs1, count2, nobs2, value=low, method=method, compare=compare, alternative='larger', correction=correction, return_results=True) tt2 = test_proportions_2indep(count1, nobs1, count2, nobs2, value=upp, method=method, compare=compare, alternative='smaller', correction=correction, return_results=True) # idx_max = 1 if t1.pvalue < t2.pvalue else 0 idx_max = np.asarray(tt1.pvalue < tt2.pvalue, int) statistic = np.choose(idx_max, [tt1.statistic, tt2.statistic]) pvalue = np.choose(idx_max, [tt1.pvalue, tt2.pvalue]) res = HolderTuple(statistic=statistic, pvalue=pvalue, compare=compare, method=method, results_larger=tt1, results_smaller=tt2, title="Equivalence test for 2 independent proportions" ) return res def _std_2prop_power(diff, p2, ratio=1, alpha=0.05, value=0): """ Compute standard error under null and alternative for 2 proportions helper function for power and sample size computation """ if value != 0: msg = 'non-zero diff under null, value, is not yet implemented' raise NotImplementedError(msg) nobs_ratio = ratio p1 = p2 + diff # The following contains currently redundant variables that will # be useful for different options for the null variance p_pooled = (p1 + p2 * ratio) / (1 + ratio) # probabilities for the variance for the null statistic p1_vnull, p2_vnull = p_pooled, p_pooled p2_alt = p2 p1_alt = p2_alt + diff std_null = _std_diff_prop(p1_vnull, p2_vnull, ratio=nobs_ratio) std_alt = _std_diff_prop(p1_alt, p2_alt, ratio=nobs_ratio) return p_pooled, std_null, std_alt def power_proportions_2indep(diff, prop2, nobs1, ratio=1, alpha=0.05, value=0, alternative='two-sided', return_results=True): """ Power for ztest that two independent proportions are equal This assumes that the variance is based on the pooled proportion under the null and the non-pooled variance under the alternative Parameters ---------- diff : float difference between proportion 1 and 2 under the alternative prop2 : float proportion for the reference case, prop2, proportions for the first case will be computed using p2 and diff p1 = p2 + diff nobs1 : float or int number of observations in sample 1 ratio : float sample size ratio, nobs2 = ratio * nobs1 alpha : float in interval (0,1) Significance level, e.g. 0.05, is the probability of a type I error, that is wrong rejections if the Null Hypothesis is true. value : float currently only `value=0`, i.e. equality testing, is supported alternative : string, 'two-sided' (default), 'larger', 'smaller' Alternative hypothesis whether the power is calculated for a two-sided (default) or one sided test. The one-sided test can be either 'larger', 'smaller'. return_results : bool If true, then a results instance with extra information is returned, otherwise only the computed power is returned. Returns ------- results : results instance or float If return_results is True, then a results instance with the information in attributes is returned. If return_results is False, then only the power is returned. power : float Power of the test, e.g. 0.8, is one minus the probability of a type II error. Power is the probability that the test correctly rejects the Null Hypothesis if the Alternative Hypothesis is true. Other attributes in results instance include : p_pooled pooled proportion, used for std_null std_null standard error of difference under the null hypothesis (without sqrt(nobs1)) std_alt standard error of difference under the alternative hypothesis (without sqrt(nobs1)) """ # TODO: avoid possible circular import, check if needed from statsmodels.stats.power import normal_power_het p_pooled, std_null, std_alt = _std_2prop_power(diff, prop2, ratio=ratio, alpha=alpha, value=value) pow_ = normal_power_het(diff, nobs1, alpha, std_null=std_null, std_alternative=std_alt, alternative=alternative) if return_results: res = Holder(power=pow_, p_pooled=p_pooled, std_null=std_null, std_alt=std_alt, nobs1=nobs1, nobs2=ratio * nobs1, nobs_ratio=ratio, alpha=alpha, ) return res else: return pow_ def samplesize_proportions_2indep_onetail(diff, prop2, power, ratio=1, alpha=0.05, value=0, alternative='two-sided'): """ Required sample size assuming normal distribution based on one tail This uses an explicit computation for the sample size that is required to achieve a given power corresponding to the appropriate tails of the normal distribution. This ignores the far tail in a two-sided test which is negligible in the common case when alternative and null are far apart. Parameters ---------- diff : float Difference between proportion 1 and 2 under the alternative prop2 : float proportion for the reference case, prop2, proportions for the first case will be computing using p2 and diff p1 = p2 + diff power : float Power for which sample size is computed. ratio : float Sample size ratio, nobs2 = ratio * nobs1 alpha : float in interval (0,1) Significance level, e.g. 0.05, is the probability of a type I error, that is wrong rejections if the Null Hypothesis is true. value : float Currently only `value=0`, i.e. equality testing, is supported alternative : string, 'two-sided' (default), 'larger', 'smaller' Alternative hypothesis whether the power is calculated for a two-sided (default) or one sided test. In the case of a one-sided alternative, it is assumed that the test is in the appropriate tail. Returns ------- nobs1 : float Number of observations in sample 1. """ # TODO: avoid possible circular import, check if needed from statsmodels.stats.power import normal_sample_size_one_tail if alternative in ['two-sided', '2s']: alpha = alpha / 2 _, std_null, std_alt = _std_2prop_power(diff, prop2, ratio=ratio, alpha=alpha, value=value) nobs = normal_sample_size_one_tail(diff, power, alpha, std_null=std_null, std_alternative=std_alt) return nobs def _score_confint_inversion(count1, nobs1, count2, nobs2, compare='diff', alpha=0.05, correction=True): """ Compute score confidence interval by inverting score test Parameters ---------- count1, nobs1 : Count and sample size for first sample. count2, nobs2 : Count and sample size for the second sample. compare : string in ['diff', 'ratio' 'odds-ratio'] If compare is `diff`, then the confidence interval is for diff = p1 - p2. If compare is `ratio`, then the confidence interval is for the risk ratio defined by ratio = p1 / p2. If compare is `odds-ratio`, then the confidence interval is for the odds-ratio defined by or = p1 / (1 - p1) / (p2 / (1 - p2). alpha : float in interval (0,1) Significance level, e.g. 0.05, is the probability of a type I error, that is wrong rejections if the Null Hypothesis is true. correction : bool If correction is True (default), then the Miettinen and Nurminen small sample correction to the variance nobs / (nobs - 1) is used. Applies only if method='score'. Returns ------- low : float Lower confidence bound. upp : float Upper confidence bound. """ def func(v): r = test_proportions_2indep(count1, nobs1, count2, nobs2, value=v, compare=compare, method='score', correction=correction, alternative="two-sided") return r.pvalue - alpha rt0 = test_proportions_2indep(count1, nobs1, count2, nobs2, value=0, compare=compare, method='score', correction=correction, alternative="two-sided") # use default method to get starting values # this will not work if score confint becomes default # maybe use "wald" as alias that works for all compare statistics use_method = {"diff": "wald", "ratio": "log", "odds-ratio": "logit"} rci0 = confint_proportions_2indep(count1, nobs1, count2, nobs2, method=use_method[compare], compare=compare, alpha=alpha) # Note diff might be negative ub = rci0[1] + np.abs(rci0[1]) * 0.5 lb = rci0[0] - np.abs(rci0[0]) * 0.25 if compare == 'diff': param = rt0.diff # 1 might not be the correct upper bound because # rootfinding is for the `diff` and not for a probability. ub = min(ub, 0.99999) elif compare == 'ratio': param = rt0.ratio ub *= 2 # add more buffer if compare == 'odds-ratio': param = rt0.odds_ratio # root finding for confint bounds upp = optimize.brentq(func, param, ub) low = optimize.brentq(func, lb, param) return low, upp def _confint_riskratio_koopman(count1, nobs1, count2, nobs2, alpha=0.05, correction=True): """ Score confidence interval for ratio or proportions, Koopman/Nam signature not consistent with other functions When correction is True, then the small sample correction nobs / (nobs - 1) by Miettinen/Nurminen is used. """ # The names below follow Nam x0, x1, n0, n1 = count2, count1, nobs2, nobs1 x = x0 + x1 n = n0 + n1 z = stats.norm.isf(alpha / 2)**2 if correction: # Mietinnen/Nurminen small sample correction z *= n / (n - 1) # z = stats.chi2.isf(alpha, 1) # equ 6 in Nam 1995 a1 = n0 * (n0 * n * x1 + n1 * (n0 + x1) * z) a2 = - n0 * (n0 * n1 * x + 2 * n * x0 * x1 + n1 * (n0 + x0 + 2 * x1) * z) a3 = 2 * n0 * n1 * x0 * x + n * x0 * x0 * x1 + n0 * n1 * x * z a4 = - n1 * x0 * x0 * x p_roots_ = np.sort(np.roots([a1, a2, a3, a4])) p_roots = p_roots_[:2][::-1] # equ 5 ci = (1 - (n1 - x1) * (1 - p_roots) / (x0 + n1 - n * p_roots)) / p_roots res = Holder() res.confint = ci res._p_roots = p_roots_ # for unit tests, can be dropped return res def _confint_riskratio_paired_nam(table, alpha=0.05): """ Confidence interval for marginal risk ratio for matched pairs need full table success fail marginal success x11 x10 x1. fail x01 x00 x0. marginal x.1 x.0 n The confidence interval is for the ratio p1 / p0 where p1 = x1. / n and p0 - x.1 / n Todo: rename p1 to pa and p2 to pb, so we have a, b for treatment and 0, 1 for success/failure current namings follow Nam 2009 status testing: compared to example in Nam 2009 internal polynomial coefficients in calculation correspond at around 4 decimals confidence interval agrees only at 2 decimals """ x11, x10, x01, x00 = np.ravel(table) n = np.sum(table) # nobs p10, p01 = x10 / n, x01 / n p1 = (x11 + x10) / n p0 = (x11 + x01) / n q00 = 1 - x00 / n z2 = stats.norm.isf(alpha / 2)**2 # z = stats.chi2.isf(alpha, 1) # before equ 3 in Nam 2009 g1 = (n * p0 + z2 / 2) * p0 g2 = - (2 * n * p1 * p0 + z2 * q00) g3 = (n * p1 + z2 / 2) * p1 a0 = g1**2 - (z2 * p0 / 2)**2 a1 = 2 * g1 * g2 a2 = g2**2 + 2 * g1 * g3 + z2**2 * (p1 * p0 - 2 * p10 * p01) / 2 a3 = 2 * g2 * g3 a4 = g3**2 - (z2 * p1 / 2)**2 p_roots = np.sort(np.roots([a0, a1, a2, a3, a4])) # p_roots = np.sort(np.roots([1, a1 / a0, a2 / a0, a3 / a0, a4 / a0])) ci = [p_roots.min(), p_roots.max()] res = Holder() res.confint = ci res.p = p1, p0 res._p_roots = p_roots # for unit tests, can be dropped return res