"""Tests and descriptive statistics with weights Created on 2010-09-18 Author: josef-pktd License: BSD (3-clause) References ---------- SPSS manual SAS manual This follows in large parts the SPSS manual, which is largely the same as the SAS manual with different, simpler notation. Freq, Weight in SAS seems redundant since they always show up as product, SPSS has only weights. Notes ----- This has potential problems with ddof, I started to follow numpy with ddof=0 by default and users can change it, but this might still mess up the t-tests, since the estimates for the standard deviation will be based on the ddof that the user chooses. - fixed ddof for the meandiff ttest, now matches scipy.stats.ttest_ind Note: scipy has now a separate, pooled variance option in ttest, but I have not compared yet. """ import numpy as np from scipy import stats from statsmodels.tools.decorators import cache_readonly class DescrStatsW: """ Descriptive statistics and tests with weights for case weights Assumes that the data is 1d or 2d with (nobs, nvars) observations in rows, variables in columns, and that the same weight applies to each column. If degrees of freedom correction is used, then weights should add up to the number of observations. ttest also assumes that the sum of weights corresponds to the sample size. This is essentially the same as replicating each observations by its weight, if the weights are integers, often called case or frequency weights. Parameters ---------- data : array_like, 1-D or 2-D dataset weights : None or 1-D ndarray weights for each observation, with same length as zero axis of data ddof : int default ddof=0, degrees of freedom correction used for second moments, var, std, cov, corrcoef. However, statistical tests are independent of `ddof`, based on the standard formulas. Examples -------- >>> import numpy as np >>> np.random.seed(0) >>> x1_2d = 1.0 + np.random.randn(20, 3) >>> w1 = np.random.randint(1, 4, 20) >>> d1 = DescrStatsW(x1_2d, weights=w1) >>> d1.mean array([ 1.42739844, 1.23174284, 1.083753 ]) >>> d1.var array([ 0.94855633, 0.52074626, 1.12309325]) >>> d1.std_mean array([ 0.14682676, 0.10878944, 0.15976497]) >>> tstat, pval, df = d1.ttest_mean(0) >>> tstat; pval; df array([ 9.72165021, 11.32226471, 6.78342055]) array([ 1.58414212e-12, 1.26536887e-14, 2.37623126e-08]) 44.0 >>> tstat, pval, df = d1.ttest_mean([0, 1, 1]) >>> tstat; pval; df array([ 9.72165021, 2.13019609, 0.52422632]) array([ 1.58414212e-12, 3.87842808e-02, 6.02752170e-01]) 44.0 # if weights are integers, then asrepeats can be used >>> x1r = d1.asrepeats() >>> x1r.shape ... >>> stats.ttest_1samp(x1r, [0, 1, 1]) ... """ def __init__(self, data, weights=None, ddof=0): self.data = np.asarray(data) if weights is None: self.weights = np.ones(self.data.shape[0]) else: self.weights = np.asarray(weights).astype(float) # TODO: why squeeze? if len(self.weights.shape) > 1 and len(self.weights) > 1: self.weights = self.weights.squeeze() self.ddof = ddof @cache_readonly def sum_weights(self): """Sum of weights""" return self.weights.sum(0) @cache_readonly def nobs(self): """alias for number of observations/cases, equal to sum of weights """ return self.sum_weights @cache_readonly def sum(self): """weighted sum of data""" return np.dot(self.data.T, self.weights) @cache_readonly def mean(self): """weighted mean of data""" return self.sum / self.sum_weights @cache_readonly def demeaned(self): """data with weighted mean subtracted""" return self.data - self.mean @cache_readonly def sumsquares(self): """weighted sum of squares of demeaned data""" return np.dot((self.demeaned ** 2).T, self.weights) # need memoize instead of cache decorator def var_ddof(self, ddof=0): """variance of data given ddof Parameters ---------- ddof : int, float degrees of freedom correction, independent of attribute ddof Returns ------- var : float, ndarray variance with denominator ``sum_weights - ddof`` """ return self.sumsquares / (self.sum_weights - ddof) def std_ddof(self, ddof=0): """standard deviation of data with given ddof Parameters ---------- ddof : int, float degrees of freedom correction, independent of attribute ddof Returns ------- std : float, ndarray standard deviation with denominator ``sum_weights - ddof`` """ return np.sqrt(self.var_ddof(ddof=ddof)) @cache_readonly def var(self): """variance with default degrees of freedom correction """ return self.sumsquares / (self.sum_weights - self.ddof) @cache_readonly def _var(self): """variance without degrees of freedom correction used for statistical tests with controlled ddof """ return self.sumsquares / self.sum_weights @cache_readonly def std(self): """standard deviation with default degrees of freedom correction """ return np.sqrt(self.var) @cache_readonly def cov(self): """weighted covariance of data if data is 2 dimensional assumes variables in columns and observations in rows uses default ddof """ cov_ = np.dot(self.weights * self.demeaned.T, self.demeaned) cov_ /= self.sum_weights - self.ddof return cov_ @cache_readonly def corrcoef(self): """weighted correlation with default ddof assumes variables in columns and observations in rows """ return self.cov / self.std / self.std[:, None] @cache_readonly def std_mean(self): """standard deviation of weighted mean """ std = self.std if self.ddof != 0: # ddof correction, (need copy of std) std = std * np.sqrt( (self.sum_weights - self.ddof) / self.sum_weights ) return std / np.sqrt(self.sum_weights - 1) def quantile(self, probs, return_pandas=True): """ Compute quantiles for a weighted sample. Parameters ---------- probs : array_like A vector of probability points at which to calculate the quantiles. Each element of `probs` should fall in [0, 1]. return_pandas : bool If True, return value is a Pandas DataFrame or Series. Otherwise returns a ndarray. Returns ------- quantiles : Series, DataFrame, or ndarray If `return_pandas` = True, returns one of the following: * data are 1d, `return_pandas` = True: a Series indexed by the probability points. * data are 2d, `return_pandas` = True: a DataFrame with the probability points as row index and the variables as column index. If `return_pandas` = False, returns an ndarray containing the same values as the Series/DataFrame. Notes ----- To compute the quantiles, first, the weights are summed over exact ties yielding distinct data values y_1 < y_2 < ..., and corresponding weights w_1, w_2, .... Let s_j denote the sum of the first j weights, and let W denote the sum of all the weights. For a probability point p, if pW falls strictly between s_j and s_{j+1} then the estimated quantile is y_{j+1}. If pW = s_j then the estimated quantile is (y_j + y_{j+1})/2. If pW < p_1 then the estimated quantile is y_1. References ---------- SAS documentation for weighted quantiles: https://support.sas.com/documentation/cdl/en/procstat/63104/HTML/default/viewer.htm#procstat_univariate_sect028.htm """ import pandas as pd probs = np.asarray(probs) probs = np.atleast_1d(probs) if self.data.ndim == 1: rslt = self._quantile(self.data, probs) if return_pandas: rslt = pd.Series(rslt, index=probs) else: rslt = [] for vec in self.data.T: rslt.append(self._quantile(vec, probs)) rslt = np.column_stack(rslt) if return_pandas: columns = ["col%d" % (j + 1) for j in range(rslt.shape[1])] rslt = pd.DataFrame(data=rslt, columns=columns, index=probs) if return_pandas: rslt.index.name = "p" return rslt def _quantile(self, vec, probs): # Helper function to calculate weighted quantiles for one column. # Follows definition from SAS documentation. # Returns ndarray import pandas as pd # Aggregate over ties df = pd.DataFrame(index=np.arange(len(self.weights))) df["weights"] = self.weights df["vec"] = vec dfg = df.groupby("vec").agg("sum") weights = dfg.values[:, 0] values = np.asarray(dfg.index) cweights = np.cumsum(weights) totwt = cweights[-1] targets = probs * totwt ii = np.searchsorted(cweights, targets) rslt = values[ii] # Exact hits jj = np.flatnonzero(np.abs(targets - cweights[ii]) < 1e-10) jj = jj[ii[jj] < len(cweights) - 1] rslt[jj] = (values[ii[jj]] + values[ii[jj] + 1]) / 2 return rslt def tconfint_mean(self, alpha=0.05, alternative="two-sided"): """two-sided confidence interval for weighted mean of data If the data is 2d, then these are separate confidence intervals for each column. Parameters ---------- alpha : float significance level for the confidence interval, coverage is ``1-alpha`` alternative : str This specifies the alternative hypothesis for the test that corresponds to the confidence interval. The alternative hypothesis, H1, has to be one of the following 'two-sided': H1: mean not equal to value (default) 'larger' : H1: mean larger than value 'smaller' : H1: mean smaller than value Returns ------- lower, upper : floats or ndarrays lower and upper bound of confidence interval Notes ----- In a previous version, statsmodels 0.4, alpha was the confidence level, e.g. 0.95 """ # TODO: add asymmetric dof = self.sum_weights - 1 ci = _tconfint_generic( self.mean, self.std_mean, dof, alpha, alternative ) return ci def zconfint_mean(self, alpha=0.05, alternative="two-sided"): """two-sided confidence interval for weighted mean of data Confidence interval is based on normal distribution. If the data is 2d, then these are separate confidence intervals for each column. Parameters ---------- alpha : float significance level for the confidence interval, coverage is ``1-alpha`` alternative : str This specifies the alternative hypothesis for the test that corresponds to the confidence interval. The alternative hypothesis, H1, has to be one of the following 'two-sided': H1: mean not equal to value (default) 'larger' : H1: mean larger than value 'smaller' : H1: mean smaller than value Returns ------- lower, upper : floats or ndarrays lower and upper bound of confidence interval Notes ----- In a previous version, statsmodels 0.4, alpha was the confidence level, e.g. 0.95 """ return _zconfint_generic(self.mean, self.std_mean, alpha, alternative) def ttest_mean(self, value=0, alternative="two-sided"): """ttest of Null hypothesis that mean is equal to value. The alternative hypothesis H1 is defined by the following - 'two-sided': H1: mean not equal to value - 'larger' : H1: mean larger than value - 'smaller' : H1: mean smaller than value Parameters ---------- value : float or array the hypothesized value for the mean alternative : str The alternative hypothesis, H1, has to be one of the following: - 'two-sided': H1: mean not equal to value (default) - 'larger' : H1: mean larger than value - 'smaller' : H1: mean smaller than value Returns ------- tstat : float test statistic pvalue : float pvalue of the t-test df : int or float """ # TODO: check direction with R, smaller=less, larger=greater tstat = (self.mean - value) / self.std_mean dof = self.sum_weights - 1 # TODO: use outsourced if alternative == "two-sided": pvalue = stats.t.sf(np.abs(tstat), dof) * 2 elif alternative == "larger": pvalue = stats.t.sf(tstat, dof) elif alternative == "smaller": pvalue = stats.t.cdf(tstat, dof) else: raise ValueError("alternative not recognized") return tstat, pvalue, dof def ttost_mean(self, low, upp): """test of (non-)equivalence of one sample TOST: two one-sided t tests null hypothesis: m < low or m > upp alternative hypothesis: low < m < upp where m is the expected value of the sample (mean of the population). If the pvalue is smaller than a threshold, say 0.05, then we reject the hypothesis that the expected value of the sample (mean of the population) is outside of the interval given by thresholds low and upp. Parameters ---------- low, upp : float equivalence interval low < mean < upp Returns ------- pvalue : float pvalue of the non-equivalence test t1, pv1, df1 : tuple test statistic, pvalue and degrees of freedom for lower threshold test t2, pv2, df2 : tuple test statistic, pvalue and degrees of freedom for upper threshold test """ t1, pv1, df1 = self.ttest_mean(low, alternative="larger") t2, pv2, df2 = self.ttest_mean(upp, alternative="smaller") return np.maximum(pv1, pv2), (t1, pv1, df1), (t2, pv2, df2) def ztest_mean(self, value=0, alternative="two-sided"): """z-test of Null hypothesis that mean is equal to value. The alternative hypothesis H1 is defined by the following 'two-sided': H1: mean not equal to value 'larger' : H1: mean larger than value 'smaller' : H1: mean smaller than value Parameters ---------- value : float or array the hypothesized value for the mean alternative : str The alternative hypothesis, H1, has to be one of the following 'two-sided': H1: mean not equal to value (default) 'larger' : H1: mean larger than value 'smaller' : H1: mean smaller than value Returns ------- tstat : float test statistic pvalue : float pvalue of the t-test Notes ----- This uses the same degrees of freedom correction as the t-test in the calculation of the standard error of the mean, i.e it uses `(sum_weights - 1)` instead of `sum_weights` in the denominator. See Examples below for the difference. Examples -------- z-test on a proportion, with 20 observations, 15 of those are our event >>> import statsmodels.api as sm >>> x1 = [0, 1] >>> w1 = [5, 15] >>> d1 = sm.stats.DescrStatsW(x1, w1) >>> d1.ztest_mean(0.5) (2.5166114784235836, 0.011848940928347452) This differs from the proportions_ztest because of the degrees of freedom correction: >>> sm.stats.proportions_ztest(15, 20.0, value=0.5) (2.5819888974716112, 0.009823274507519247). We can replicate the results from ``proportions_ztest`` if we increase the weights to have artificially one more observation: >>> sm.stats.DescrStatsW(x1, np.array(w1)*21./20).ztest_mean(0.5) (2.5819888974716116, 0.0098232745075192366) """ tstat = (self.mean - value) / self.std_mean # TODO: use outsourced if alternative == "two-sided": pvalue = stats.norm.sf(np.abs(tstat)) * 2 elif alternative == "larger": pvalue = stats.norm.sf(tstat) elif alternative == "smaller": pvalue = stats.norm.cdf(tstat) return tstat, pvalue def ztost_mean(self, low, upp): """test of (non-)equivalence of one sample, based on z-test TOST: two one-sided z-tests null hypothesis: m < low or m > upp alternative hypothesis: low < m < upp where m is the expected value of the sample (mean of the population). If the pvalue is smaller than a threshold, say 0.05, then we reject the hypothesis that the expected value of the sample (mean of the population) is outside of the interval given by thresholds low and upp. Parameters ---------- low, upp : float equivalence interval low < mean < upp Returns ------- pvalue : float pvalue of the non-equivalence test t1, pv1 : tuple test statistic and p-value for lower threshold test t2, pv2 : tuple test statistic and p-value for upper threshold test """ t1, pv1 = self.ztest_mean(low, alternative="larger") t2, pv2 = self.ztest_mean(upp, alternative="smaller") return np.maximum(pv1, pv2), (t1, pv1), (t2, pv2) def get_compare(self, other, weights=None): """return an instance of CompareMeans with self and other Parameters ---------- other : array_like or instance of DescrStatsW If array_like then this creates an instance of DescrStatsW with the given weights. weights : None or array weights are only used if other is not an instance of DescrStatsW Returns ------- cm : instance of CompareMeans the instance has self attached as d1 and other as d2. See Also -------- CompareMeans """ if not isinstance(other, self.__class__): d2 = DescrStatsW(other, weights) else: d2 = other return CompareMeans(self, d2) def asrepeats(self): """get array that has repeats given by floor(weights) observations with weight=0 are dropped """ w_int = np.floor(self.weights).astype(int) return np.repeat(self.data, w_int, axis=0) def _tstat_generic(value1, value2, std_diff, dof, alternative, diff=0): """generic ttest based on summary statistic The test statistic is : tstat = (value1 - value2 - diff) / std_diff and is assumed to be t-distributed with ``dof`` degrees of freedom. Parameters ---------- value1 : float or ndarray Value, for example mean, of the first sample. value2 : float or ndarray Value, for example mean, of the second sample. std_diff : float or ndarray Standard error of the difference value1 - value2 dof : int or float Degrees of freedom alternative : str The alternative hypothesis, H1, has to be one of the following * 'two-sided' : H1: ``value1 - value2 - diff`` not equal to 0. * 'larger' : H1: ``value1 - value2 - diff > 0`` * 'smaller' : H1: ``value1 - value2 - diff < 0`` diff : float value of difference ``value1 - value2`` under the null hypothesis Returns ------- tstat : float or ndarray Test statistic. pvalue : float or ndarray P-value of the hypothesis test assuming that the test statistic is t-distributed with ``df`` degrees of freedom. """ tstat = (value1 - value2 - diff) / std_diff if alternative in ["two-sided", "2-sided", "2s"]: pvalue = stats.t.sf(np.abs(tstat), dof) * 2 elif alternative in ["larger", "l"]: pvalue = stats.t.sf(tstat, dof) elif alternative in ["smaller", "s"]: pvalue = stats.t.cdf(tstat, dof) else: raise ValueError("invalid alternative") return tstat, pvalue def _tconfint_generic(mean, std_mean, dof, alpha, alternative): """generic t-confint based on summary statistic Parameters ---------- mean : float or ndarray Value, for example mean, of the first sample. std_mean : float or ndarray Standard error of the difference value1 - value2 dof : int or float Degrees of freedom alpha : float Significance level for the confidence interval, coverage is ``1-alpha``. alternative : str The alternative hypothesis, H1, has to be one of the following * 'two-sided' : H1: ``value1 - value2 - diff`` not equal to 0. * 'larger' : H1: ``value1 - value2 - diff > 0`` * 'smaller' : H1: ``value1 - value2 - diff < 0`` Returns ------- lower : float or ndarray Lower confidence limit. This is -inf for the one-sided alternative "smaller". upper : float or ndarray Upper confidence limit. This is inf for the one-sided alternative "larger". """ if alternative in ["two-sided", "2-sided", "2s"]: tcrit = stats.t.ppf(1 - alpha / 2.0, dof) lower = mean - tcrit * std_mean upper = mean + tcrit * std_mean elif alternative in ["larger", "l"]: tcrit = stats.t.ppf(alpha, dof) lower = mean + tcrit * std_mean upper = np.inf elif alternative in ["smaller", "s"]: tcrit = stats.t.ppf(1 - alpha, dof) lower = -np.inf upper = mean + tcrit * std_mean else: raise ValueError("invalid alternative") return lower, upper def _zstat_generic(value1, value2, std_diff, alternative, diff=0): """generic (normal) z-test based on summary statistic The test statistic is : tstat = (value1 - value2 - diff) / std_diff and is assumed to be normally distributed. Parameters ---------- value1 : float or ndarray Value, for example mean, of the first sample. value2 : float or ndarray Value, for example mean, of the second sample. std_diff : float or ndarray Standard error of the difference value1 - value2 alternative : str The alternative hypothesis, H1, has to be one of the following * 'two-sided' : H1: ``value1 - value2 - diff`` not equal to 0. * 'larger' : H1: ``value1 - value2 - diff > 0`` * 'smaller' : H1: ``value1 - value2 - diff < 0`` diff : float value of difference ``value1 - value2`` under the null hypothesis Returns ------- tstat : float or ndarray Test statistic. pvalue : float or ndarray P-value of the hypothesis test assuming that the test statistic is t-distributed with ``df`` degrees of freedom. """ zstat = (value1 - value2 - diff) / std_diff if alternative in ["two-sided", "2-sided", "2s"]: pvalue = stats.norm.sf(np.abs(zstat)) * 2 elif alternative in ["larger", "l"]: pvalue = stats.norm.sf(zstat) elif alternative in ["smaller", "s"]: pvalue = stats.norm.cdf(zstat) else: raise ValueError("invalid alternative") return zstat, pvalue def _zstat_generic2(value, std, alternative): """generic (normal) z-test based on summary statistic The test statistic is : zstat = value / std and is assumed to be normally distributed with standard deviation ``std``. Parameters ---------- value : float or ndarray Value of a sample statistic, for example mean. value2 : float or ndarray Value, for example mean, of the second sample. std : float or ndarray Standard error of the sample statistic value. alternative : str The alternative hypothesis, H1, has to be one of the following * 'two-sided' : H1: ``value1 - value2 - diff`` not equal to 0. * 'larger' : H1: ``value1 - value2 - diff > 0`` * 'smaller' : H1: ``value1 - value2 - diff < 0`` Returns ------- zstat : float or ndarray Test statistic. pvalue : float or ndarray P-value of the hypothesis test assuming that the test statistic is normally distributed. """ zstat = value / std if alternative in ["two-sided", "2-sided", "2s"]: pvalue = stats.norm.sf(np.abs(zstat)) * 2 elif alternative in ["larger", "l"]: pvalue = stats.norm.sf(zstat) elif alternative in ["smaller", "s"]: pvalue = stats.norm.cdf(zstat) else: raise ValueError("invalid alternative") return zstat, pvalue def _zconfint_generic(mean, std_mean, alpha, alternative): """generic normal-confint based on summary statistic Parameters ---------- mean : float or ndarray Value, for example mean, of the first sample. std_mean : float or ndarray Standard error of the difference value1 - value2 alpha : float Significance level for the confidence interval, coverage is ``1-alpha`` alternative : str The alternative hypothesis, H1, has to be one of the following * 'two-sided' : H1: ``value1 - value2 - diff`` not equal to 0. * 'larger' : H1: ``value1 - value2 - diff > 0`` * 'smaller' : H1: ``value1 - value2 - diff < 0`` Returns ------- lower : float or ndarray Lower confidence limit. This is -inf for the one-sided alternative "smaller". upper : float or ndarray Upper confidence limit. This is inf for the one-sided alternative "larger". """ if alternative in ["two-sided", "2-sided", "2s"]: zcrit = stats.norm.ppf(1 - alpha / 2.0) lower = mean - zcrit * std_mean upper = mean + zcrit * std_mean elif alternative in ["larger", "l"]: zcrit = stats.norm.ppf(alpha) lower = mean + zcrit * std_mean upper = np.inf elif alternative in ["smaller", "s"]: zcrit = stats.norm.ppf(1 - alpha) lower = -np.inf upper = mean + zcrit * std_mean else: raise ValueError("invalid alternative") return lower, upper class CompareMeans: """class for two sample comparison The tests and the confidence interval work for multi-endpoint comparison: If d1 and d2 have the same number of rows, then each column of the data in d1 is compared with the corresponding column in d2. Parameters ---------- d1, d2 : instances of DescrStatsW Notes ----- The result for the statistical tests and the confidence interval are independent of the user specified ddof. TODO: Extend to any number of groups or write a version that works in that case, like in SAS and SPSS. """ def __init__(self, d1, d2): """assume d1, d2 hold the relevant attributes """ self.d1 = d1 self.d2 = d2 # assume nobs is available # if not hasattr(self.d1, 'nobs'): # d1.nobs1 = d1.sum_weights.astype(float) #float just to make sure # self.nobs2 = d2.sum_weights.astype(float) @classmethod def from_data( cls, data1, data2, weights1=None, weights2=None, ddof1=0, ddof2=0 ): """construct a CompareMeans object from data Parameters ---------- data1, data2 : array_like, 1-D or 2-D compared datasets weights1, weights2 : None or 1-D ndarray weights for each observation of data1 and data2 respectively, with same length as zero axis of corresponding dataset. ddof1, ddof2 : int default ddof1=0, ddof2=0, degrees of freedom for data1, data2 respectively. Returns ------- A CompareMeans instance. """ return cls( DescrStatsW(data1, weights=weights1, ddof=ddof1), DescrStatsW(data2, weights=weights2, ddof=ddof2), ) def summary(self, use_t=True, alpha=0.05, usevar="pooled", value=0): """summarize the results of the hypothesis test Parameters ---------- use_t : bool, optional if use_t is True, then t test results are returned if use_t is False, then z test results are returned alpha : float significance level for the confidence interval, coverage is ``1-alpha`` usevar : str, 'pooled' or 'unequal' If ``pooled``, then the standard deviation of the samples is assumed to be the same. If ``unequal``, then the variance of Welch ttest will be used, and the degrees of freedom are those of Satterthwaite if ``use_t`` is True. value : float difference between the means under the Null hypothesis. Returns ------- smry : SimpleTable """ d1 = self.d1 d2 = self.d2 confint_percents = 100 - alpha * 100 if use_t: tstat, pvalue, _ = self.ttest_ind(usevar=usevar, value=value) lower, upper = self.tconfint_diff(alpha=alpha, usevar=usevar) else: tstat, pvalue = self.ztest_ind(usevar=usevar, value=value) lower, upper = self.zconfint_diff(alpha=alpha, usevar=usevar) if usevar == "pooled": std_err = self.std_meandiff_pooledvar else: std_err = self.std_meandiff_separatevar std_err = np.atleast_1d(std_err) tstat = np.atleast_1d(tstat) pvalue = np.atleast_1d(pvalue) lower = np.atleast_1d(lower) upper = np.atleast_1d(upper) conf_int = np.column_stack((lower, upper)) params = np.atleast_1d(d1.mean - d2.mean - value) title = "Test for equality of means" yname = "y" # not used in params_frame xname = ["subset #%d" % (ii + 1) for ii in range(tstat.shape[0])] from statsmodels.iolib.summary import summary_params return summary_params( (None, params, std_err, tstat, pvalue, conf_int), alpha=alpha, use_t=use_t, yname=yname, xname=xname, title=title, ) @cache_readonly def std_meandiff_separatevar(self): # this uses ``_var`` to use ddof=0 for formula d1 = self.d1 d2 = self.d2 return np.sqrt(d1._var / (d1.nobs - 1) + d2._var / (d2.nobs - 1)) @cache_readonly def std_meandiff_pooledvar(self): """variance assuming equal variance in both data sets """ # this uses ``_var`` to use ddof=0 for formula d1 = self.d1 d2 = self.d2 # could make var_pooled into attribute var_pooled = ( (d1.sumsquares + d2.sumsquares) / # (d1.nobs - d1.ddof + d2.nobs - d2.ddof)) (d1.nobs - 1 + d2.nobs - 1) ) return np.sqrt(var_pooled * (1.0 / d1.nobs + 1.0 / d2.nobs)) def dof_satt(self): """degrees of freedom of Satterthwaite for unequal variance """ d1 = self.d1 d2 = self.d2 # this follows blindly the SPSS manual # except I use ``_var`` which has ddof=0 sem1 = d1._var / (d1.nobs - 1) sem2 = d2._var / (d2.nobs - 1) semsum = sem1 + sem2 z1 = (sem1 / semsum) ** 2 / (d1.nobs - 1) z2 = (sem2 / semsum) ** 2 / (d2.nobs - 1) dof = 1.0 / (z1 + z2) return dof def ttest_ind(self, alternative="two-sided", usevar="pooled", value=0): """ttest for the null hypothesis of identical means this should also be the same as onewaygls, except for ddof differences Parameters ---------- x1 : array_like, 1-D or 2-D first of the two independent samples, see notes for 2-D case x2 : array_like, 1-D or 2-D second of the two independent samples, see notes for 2-D case alternative : str The alternative hypothesis, H1, has to be one of the following 'two-sided': H1: difference in means not equal to value (default) 'larger' : H1: difference in means larger than value 'smaller' : H1: difference in means smaller than value usevar : str, 'pooled' or 'unequal' If ``pooled``, then the standard deviation of the samples is assumed to be the same. If ``unequal``, then Welch ttest with Satterthwait degrees of freedom is used value : float difference between the means under the Null hypothesis. Returns ------- tstat : float test statistic pvalue : float pvalue of the t-test df : int or float degrees of freedom used in the t-test Notes ----- The result is independent of the user specified ddof. """ d1 = self.d1 d2 = self.d2 if usevar == "pooled": stdm = self.std_meandiff_pooledvar dof = d1.nobs - 1 + d2.nobs - 1 elif usevar == "unequal": stdm = self.std_meandiff_separatevar dof = self.dof_satt() else: raise ValueError('usevar can only be "pooled" or "unequal"') tstat, pval = _tstat_generic( d1.mean, d2.mean, stdm, dof, alternative, diff=value ) return tstat, pval, dof def ztest_ind(self, alternative="two-sided", usevar="pooled", value=0): """z-test for the null hypothesis of identical means Parameters ---------- x1 : array_like, 1-D or 2-D first of the two independent samples, see notes for 2-D case x2 : array_like, 1-D or 2-D second of the two independent samples, see notes for 2-D case alternative : str The alternative hypothesis, H1, has to be one of the following 'two-sided': H1: difference in means not equal to value (default) 'larger' : H1: difference in means larger than value 'smaller' : H1: difference in means smaller than value usevar : str, 'pooled' or 'unequal' If ``pooled``, then the standard deviation of the samples is assumed to be the same. If ``unequal``, then the standard deviations of the samples may be different. value : float difference between the means under the Null hypothesis. Returns ------- tstat : float test statistic pvalue : float pvalue of the z-test """ d1 = self.d1 d2 = self.d2 if usevar == "pooled": stdm = self.std_meandiff_pooledvar elif usevar == "unequal": stdm = self.std_meandiff_separatevar else: raise ValueError('usevar can only be "pooled" or "unequal"') tstat, pval = _zstat_generic( d1.mean, d2.mean, stdm, alternative, diff=value ) return tstat, pval def tconfint_diff( self, alpha=0.05, alternative="two-sided", usevar="pooled" ): """confidence interval for the difference in means Parameters ---------- alpha : float significance level for the confidence interval, coverage is ``1-alpha`` alternative : str This specifies the alternative hypothesis for the test that corresponds to the confidence interval. The alternative hypothesis, H1, has to be one of the following : 'two-sided': H1: difference in means not equal to value (default) 'larger' : H1: difference in means larger than value 'smaller' : H1: difference in means smaller than value usevar : str, 'pooled' or 'unequal' If ``pooled``, then the standard deviation of the samples is assumed to be the same. If ``unequal``, then Welch ttest with Satterthwait degrees of freedom is used Returns ------- lower, upper : floats lower and upper limits of the confidence interval Notes ----- The result is independent of the user specified ddof. """ d1 = self.d1 d2 = self.d2 diff = d1.mean - d2.mean if usevar == "pooled": std_diff = self.std_meandiff_pooledvar dof = d1.nobs - 1 + d2.nobs - 1 elif usevar == "unequal": std_diff = self.std_meandiff_separatevar dof = self.dof_satt() else: raise ValueError('usevar can only be "pooled" or "unequal"') res = _tconfint_generic( diff, std_diff, dof, alpha=alpha, alternative=alternative ) return res def zconfint_diff( self, alpha=0.05, alternative="two-sided", usevar="pooled" ): """confidence interval for the difference in means Parameters ---------- alpha : float significance level for the confidence interval, coverage is ``1-alpha`` alternative : str This specifies the alternative hypothesis for the test that corresponds to the confidence interval. The alternative hypothesis, H1, has to be one of the following : 'two-sided': H1: difference in means not equal to value (default) 'larger' : H1: difference in means larger than value 'smaller' : H1: difference in means smaller than value usevar : str, 'pooled' or 'unequal' If ``pooled``, then the standard deviation of the samples is assumed to be the same. If ``unequal``, then Welch ttest with Satterthwait degrees of freedom is used Returns ------- lower, upper : floats lower and upper limits of the confidence interval Notes ----- The result is independent of the user specified ddof. """ d1 = self.d1 d2 = self.d2 diff = d1.mean - d2.mean if usevar == "pooled": std_diff = self.std_meandiff_pooledvar elif usevar == "unequal": std_diff = self.std_meandiff_separatevar else: raise ValueError('usevar can only be "pooled" or "unequal"') res = _zconfint_generic( diff, std_diff, alpha=alpha, alternative=alternative ) return res def ttost_ind(self, low, upp, usevar="pooled"): """ test of equivalence for two independent samples, base on t-test Parameters ---------- low, upp : float equivalence interval low < m1 - m2 < upp usevar : str, 'pooled' or 'unequal' If ``pooled``, then the standard deviation of the samples is assumed to be the same. If ``unequal``, then Welch ttest with Satterthwait degrees of freedom is used Returns ------- pvalue : float pvalue of the non-equivalence test t1, pv1 : tuple of floats test statistic and pvalue for lower threshold test t2, pv2 : tuple of floats test statistic and pvalue for upper threshold test """ tt1 = self.ttest_ind(alternative="larger", usevar=usevar, value=low) tt2 = self.ttest_ind(alternative="smaller", usevar=usevar, value=upp) # TODO: remove tuple return, use same as for function tost_ind return np.maximum(tt1[1], tt2[1]), (tt1, tt2) def ztost_ind(self, low, upp, usevar="pooled"): """ test of equivalence for two independent samples, based on z-test Parameters ---------- low, upp : float equivalence interval low < m1 - m2 < upp usevar : str, 'pooled' or 'unequal' If ``pooled``, then the standard deviation of the samples is assumed to be the same. If ``unequal``, then Welch ttest with Satterthwait degrees of freedom is used Returns ------- pvalue : float pvalue of the non-equivalence test t1, pv1 : tuple of floats test statistic and pvalue for lower threshold test t2, pv2 : tuple of floats test statistic and pvalue for upper threshold test """ tt1 = self.ztest_ind(alternative="larger", usevar=usevar, value=low) tt2 = self.ztest_ind(alternative="smaller", usevar=usevar, value=upp) # TODO: remove tuple return, use same as for function tost_ind return np.maximum(tt1[1], tt2[1]), tt1, tt2 # tost.__doc__ = tost_ind.__doc__ # does not work for 2d, does not take weights into account ## def test_equal_var(self): ## """Levene test for independence ## ## """ ## d1 = self.d1 ## d2 = self.d2 ## #rewrite this, for now just use scipy.stats ## return stats.levene(d1.data, d2.data) def ttest_ind( x1, x2, alternative="two-sided", usevar="pooled", weights=(None, None), value=0, ): """ttest independent sample Convenience function that uses the classes and throws away the intermediate results, compared to scipy stats: drops axis option, adds alternative, usevar, and weights option. Parameters ---------- x1 : array_like, 1-D or 2-D first of the two independent samples, see notes for 2-D case x2 : array_like, 1-D or 2-D second of the two independent samples, see notes for 2-D case alternative : str The alternative hypothesis, H1, has to be one of the following * 'two-sided' (default): H1: difference in means not equal to value * 'larger' : H1: difference in means larger than value * 'smaller' : H1: difference in means smaller than value usevar : str, 'pooled' or 'unequal' If ``pooled``, then the standard deviation of the samples is assumed to be the same. If ``unequal``, then Welch ttest with Satterthwait degrees of freedom is used weights : tuple of None or ndarrays Case weights for the two samples. For details on weights see ``DescrStatsW`` value : float difference between the means under the Null hypothesis. Returns ------- tstat : float test statistic pvalue : float pvalue of the t-test df : int or float degrees of freedom used in the t-test """ cm = CompareMeans( DescrStatsW(x1, weights=weights[0], ddof=0), DescrStatsW(x2, weights=weights[1], ddof=0), ) tstat, pval, dof = cm.ttest_ind( alternative=alternative, usevar=usevar, value=value ) return tstat, pval, dof def ttost_ind( x1, x2, low, upp, usevar="pooled", weights=(None, None), transform=None ): """test of (non-)equivalence for two independent samples TOST: two one-sided t tests null hypothesis: m1 - m2 < low or m1 - m2 > upp alternative hypothesis: low < m1 - m2 < upp where m1, m2 are the means, expected values of the two samples. If the pvalue is smaller than a threshold, say 0.05, then we reject the hypothesis that the difference between the two samples is larger than the the thresholds given by low and upp. Parameters ---------- x1 : array_like, 1-D or 2-D first of the two independent samples, see notes for 2-D case x2 : array_like, 1-D or 2-D second of the two independent samples, see notes for 2-D case low, upp : float equivalence interval low < m1 - m2 < upp usevar : str, 'pooled' or 'unequal' If ``pooled``, then the standard deviation of the samples is assumed to be the same. If ``unequal``, then Welch ttest with Satterthwait degrees of freedom is used weights : tuple of None or ndarrays Case weights for the two samples. For details on weights see ``DescrStatsW`` transform : None or function If None (default), then the data is not transformed. Given a function, sample data and thresholds are transformed. If transform is log, then the equivalence interval is in ratio: low < m1 / m2 < upp Returns ------- pvalue : float pvalue of the non-equivalence test t1, pv1 : tuple of floats test statistic and pvalue for lower threshold test t2, pv2 : tuple of floats test statistic and pvalue for upper threshold test Notes ----- The test rejects if the 2*alpha confidence interval for the difference is contained in the ``(low, upp)`` interval. This test works also for multi-endpoint comparisons: If d1 and d2 have the same number of columns, then each column of the data in d1 is compared with the corresponding column in d2. This is the same as comparing each of the corresponding columns separately. Currently no multi-comparison correction is used. The raw p-values reported here can be correction with the functions in ``multitest``. """ if transform: if transform is np.log: # avoid hstack in special case x1 = transform(x1) x2 = transform(x2) else: # for transforms like rankdata that will need both datasets # concatenate works for stacking 1d and 2d arrays xx = transform(np.concatenate((x1, x2), 0)) x1 = xx[: len(x1)] x2 = xx[len(x1) :] low = transform(low) upp = transform(upp) cm = CompareMeans( DescrStatsW(x1, weights=weights[0], ddof=0), DescrStatsW(x2, weights=weights[1], ddof=0), ) pval, res = cm.ttost_ind(low, upp, usevar=usevar) return pval, res[0], res[1] def ttost_paired(x1, x2, low, upp, transform=None, weights=None): """test of (non-)equivalence for two dependent, paired sample TOST: two one-sided t tests null hypothesis: md < low or md > upp alternative hypothesis: low < md < upp where md is the mean, expected value of the difference x1 - x2 If the pvalue is smaller than a threshold,say 0.05, then we reject the hypothesis that the difference between the two samples is larger than the the thresholds given by low and upp. Parameters ---------- x1 : array_like first of the two independent samples x2 : array_like second of the two independent samples low, upp : float equivalence interval low < mean of difference < upp weights : None or ndarray case weights for the two samples. For details on weights see ``DescrStatsW`` transform : None or function If None (default), then the data is not transformed. Given a function sample data and thresholds are transformed. If transform is log the the equivalence interval is in ratio: low < x1 / x2 < upp Returns ------- pvalue : float pvalue of the non-equivalence test t1, pv1, df1 : tuple test statistic, pvalue and degrees of freedom for lower threshold test t2, pv2, df2 : tuple test statistic, pvalue and degrees of freedom for upper threshold test """ if transform: if transform is np.log: # avoid hstack in special case x1 = transform(x1) x2 = transform(x2) else: # for transforms like rankdata that will need both datasets # concatenate works for stacking 1d and 2d arrays xx = transform(np.concatenate((x1, x2), 0)) x1 = xx[: len(x1)] x2 = xx[len(x1) :] low = transform(low) upp = transform(upp) dd = DescrStatsW(x1 - x2, weights=weights, ddof=0) t1, pv1, df1 = dd.ttest_mean(low, alternative="larger") t2, pv2, df2 = dd.ttest_mean(upp, alternative="smaller") return np.maximum(pv1, pv2), (t1, pv1, df1), (t2, pv2, df2) def ztest( x1, x2=None, value=0, alternative="two-sided", usevar="pooled", ddof=1.0 ): """test for mean based on normal distribution, one or two samples In the case of two samples, the samples are assumed to be independent. Parameters ---------- x1 : array_like, 1-D or 2-D first of the two independent samples x2 : array_like, 1-D or 2-D second of the two independent samples value : float In the one sample case, value is the mean of x1 under the Null hypothesis. In the two sample case, value is the difference between mean of x1 and mean of x2 under the Null hypothesis. The test statistic is `x1_mean - x2_mean - value`. alternative : str The alternative hypothesis, H1, has to be one of the following 'two-sided': H1: difference in means not equal to value (default) 'larger' : H1: difference in means larger than value 'smaller' : H1: difference in means smaller than value usevar : str, 'pooled' or 'unequal' If ``pooled``, then the standard deviation of the samples is assumed to be the same. If ``unequal``, then the standard deviation of the sample is assumed to be different. ddof : int Degrees of freedom use in the calculation of the variance of the mean estimate. In the case of comparing means this is one, however it can be adjusted for testing other statistics (proportion, correlation) Returns ------- tstat : float test statistic pvalue : float pvalue of the t-test Notes ----- usevar can be pooled or unequal in two sample case """ # TODO: this should delegate to CompareMeans like ttest_ind # However that does not implement ddof # usevar can be pooled or unequal if usevar not in {"pooled", "unequal"}: raise NotImplementedError('usevar can only be "pooled" or "unequal"') x1 = np.asarray(x1) nobs1 = x1.shape[0] x1_mean = x1.mean(0) x1_var = x1.var(0) if x2 is not None: x2 = np.asarray(x2) nobs2 = x2.shape[0] x2_mean = x2.mean(0) x2_var = x2.var(0) if usevar == "pooled": var = nobs1 * x1_var + nobs2 * x2_var var /= nobs1 + nobs2 - 2 * ddof var *= 1.0 / nobs1 + 1.0 / nobs2 elif usevar == "unequal": var = x1_var / (nobs1 - ddof) + x2_var / (nobs2 - ddof) else: var = x1_var / (nobs1 - ddof) x2_mean = 0 std_diff = np.sqrt(var) # stat = x1_mean - x2_mean - value return _zstat_generic(x1_mean, x2_mean, std_diff, alternative, diff=value) def zconfint( x1, x2=None, value=0, alpha=0.05, alternative="two-sided", usevar="pooled", ddof=1.0, ): """confidence interval based on normal distribution z-test Parameters ---------- x1 : array_like, 1-D or 2-D first of the two independent samples, see notes for 2-D case x2 : array_like, 1-D or 2-D second of the two independent samples, see notes for 2-D case value : float In the one sample case, value is the mean of x1 under the Null hypothesis. In the two sample case, value is the difference between mean of x1 and mean of x2 under the Null hypothesis. The test statistic is `x1_mean - x2_mean - value`. usevar : str, 'pooled' Currently, only 'pooled' is implemented. If ``pooled``, then the standard deviation of the samples is assumed to be the same. see CompareMeans.ztest_ind for different options. ddof : int Degrees of freedom use in the calculation of the variance of the mean estimate. In the case of comparing means this is one, however it can be adjusted for testing other statistics (proportion, correlation) Notes ----- checked only for 1 sample case usevar not implemented, is always pooled in two sample case ``value`` shifts the confidence interval so it is centered at `x1_mean - x2_mean - value` See Also -------- ztest CompareMeans """ # usevar is not used, always pooled # mostly duplicate code from ztest if usevar != "pooled": raise NotImplementedError('only usevar="pooled" is implemented') x1 = np.asarray(x1) nobs1 = x1.shape[0] x1_mean = x1.mean(0) x1_var = x1.var(0) if x2 is not None: x2 = np.asarray(x2) nobs2 = x2.shape[0] x2_mean = x2.mean(0) x2_var = x2.var(0) var_pooled = nobs1 * x1_var + nobs2 * x2_var var_pooled /= nobs1 + nobs2 - 2 * ddof var_pooled *= 1.0 / nobs1 + 1.0 / nobs2 else: var_pooled = x1_var / (nobs1 - ddof) x2_mean = 0 std_diff = np.sqrt(var_pooled) ci = _zconfint_generic( x1_mean - x2_mean - value, std_diff, alpha, alternative ) return ci def ztost(x1, low, upp, x2=None, usevar="pooled", ddof=1.0): """Equivalence test based on normal distribution Parameters ---------- x1 : array_like one sample or first sample for 2 independent samples low, upp : float equivalence interval low < m1 - m2 < upp x1 : array_like or None second sample for 2 independent samples test. If None, then a one-sample test is performed. usevar : str, 'pooled' If `pooled`, then the standard deviation of the samples is assumed to be the same. Only `pooled` is currently implemented. Returns ------- pvalue : float pvalue of the non-equivalence test t1, pv1 : tuple of floats test statistic and pvalue for lower threshold test t2, pv2 : tuple of floats test statistic and pvalue for upper threshold test Notes ----- checked only for 1 sample case """ tt1 = ztest( x1, x2, alternative="larger", usevar=usevar, value=low, ddof=ddof ) tt2 = ztest( x1, x2, alternative="smaller", usevar=usevar, value=upp, ddof=ddof ) return ( np.maximum(tt1[1], tt2[1]), tt1, tt2, )