638 lines
20 KiB
Python
638 lines
20 KiB
Python
'''runstest
|
|
|
|
formulas for mean and var of runs taken from SAS manual NPAR tests, also idea
|
|
for runstest_1samp and runstest_2samp
|
|
|
|
Description in NIST handbook and dataplot does not explain their expected
|
|
values, or variance
|
|
|
|
Note:
|
|
There are (at least) two definitions of runs used in literature. The classical
|
|
definition which is also used here, is that runs are sequences of identical
|
|
observations separated by observations with different realizations.
|
|
The second definition allows for overlapping runs, or runs where counting a
|
|
run is also started after a run of a fixed length of the same kind.
|
|
|
|
|
|
TODO
|
|
* add one-sided tests where possible or where it makes sense
|
|
|
|
'''
|
|
|
|
import numpy as np
|
|
from scipy import stats
|
|
from scipy.special import comb
|
|
import warnings
|
|
from statsmodels.tools.validation import array_like
|
|
|
|
class Runs:
|
|
'''class for runs in a binary sequence
|
|
|
|
|
|
Parameters
|
|
----------
|
|
x : array_like, 1d
|
|
data array,
|
|
|
|
|
|
Notes
|
|
-----
|
|
This was written as a more general class for runs. This has some redundant
|
|
calculations when only the runs_test is used.
|
|
|
|
TODO: make it lazy
|
|
|
|
The runs test could be generalized to more than 1d if there is a use case
|
|
for it.
|
|
|
|
This should be extended once I figure out what the distribution of runs
|
|
of any length k is.
|
|
|
|
The exact distribution for the runs test is also available but not yet
|
|
verified.
|
|
|
|
'''
|
|
|
|
def __init__(self, x):
|
|
self.x = np.asarray(x)
|
|
|
|
self.runstart = runstart = np.nonzero(np.diff(np.r_[[-np.inf], x, [np.inf]]))[0]
|
|
self.runs = runs = np.diff(runstart)
|
|
self.runs_sign = runs_sign = x[runstart[:-1]]
|
|
self.runs_pos = runs[runs_sign==1]
|
|
self.runs_neg = runs[runs_sign==0]
|
|
self.runs_freqs = np.bincount(runs)
|
|
self.n_runs = len(self.runs)
|
|
self.n_pos = (x==1).sum()
|
|
|
|
def runs_test(self, correction=True):
|
|
'''basic version of runs test
|
|
|
|
Parameters
|
|
----------
|
|
correction : bool
|
|
Following the SAS manual, for samplesize below 50, the test
|
|
statistic is corrected by 0.5. This can be turned off with
|
|
correction=False, and was included to match R, tseries, which
|
|
does not use any correction.
|
|
|
|
pvalue based on normal distribution, with integer correction
|
|
|
|
'''
|
|
self.npo = npo = (self.runs_pos).sum()
|
|
self.nne = nne = (self.runs_neg).sum()
|
|
|
|
#n_r = self.n_runs
|
|
n = npo + nne
|
|
npn = npo * nne
|
|
rmean = 2. * npn / n + 1
|
|
rvar = 2. * npn * (2.*npn - n) / n**2. / (n-1.)
|
|
rstd = np.sqrt(rvar)
|
|
rdemean = self.n_runs - rmean
|
|
if n >= 50 or not correction:
|
|
z = rdemean
|
|
else:
|
|
if rdemean > 0.5:
|
|
z = rdemean - 0.5
|
|
elif rdemean < 0.5:
|
|
z = rdemean + 0.5
|
|
else:
|
|
z = 0.
|
|
|
|
z /= rstd
|
|
pval = 2 * stats.norm.sf(np.abs(z))
|
|
return z, pval
|
|
|
|
def runstest_1samp(x, cutoff='mean', correction=True):
|
|
'''use runs test on binary discretized data above/below cutoff
|
|
|
|
Parameters
|
|
----------
|
|
x : array_like
|
|
data, numeric
|
|
cutoff : {'mean', 'median'} or number
|
|
This specifies the cutoff to split the data into large and small
|
|
values.
|
|
correction : bool
|
|
Following the SAS manual, for samplesize below 50, the test
|
|
statistic is corrected by 0.5. This can be turned off with
|
|
correction=False, and was included to match R, tseries, which
|
|
does not use any correction.
|
|
|
|
Returns
|
|
-------
|
|
z_stat : float
|
|
test statistic, asymptotically normally distributed
|
|
p-value : float
|
|
p-value, reject the null hypothesis if it is below an type 1 error
|
|
level, alpha .
|
|
|
|
'''
|
|
|
|
x = array_like(x, "x")
|
|
if cutoff == 'mean':
|
|
cutoff = np.mean(x)
|
|
elif cutoff == 'median':
|
|
cutoff = np.median(x)
|
|
else:
|
|
cutoff = float(cutoff)
|
|
xindicator = (x >= cutoff).astype(int)
|
|
return Runs(xindicator).runs_test(correction=correction)
|
|
|
|
def runstest_2samp(x, y=None, groups=None, correction=True):
|
|
'''Wald-Wolfowitz runstest for two samples
|
|
|
|
This tests whether two samples come from the same distribution.
|
|
|
|
Parameters
|
|
----------
|
|
x : array_like
|
|
data, numeric, contains either one group, if y is also given, or
|
|
both groups, if additionally a group indicator is provided
|
|
y : array_like (optional)
|
|
data, numeric
|
|
groups : array_like
|
|
group labels or indicator the data for both groups is given in a
|
|
single 1-dimensional array, x. If group labels are not [0,1], then
|
|
correction : bool
|
|
Following the SAS manual, for samplesize below 50, the test
|
|
statistic is corrected by 0.5. This can be turned off with
|
|
correction=False, and was included to match R, tseries, which
|
|
does not use any correction.
|
|
|
|
Returns
|
|
-------
|
|
z_stat : float
|
|
test statistic, asymptotically normally distributed
|
|
p-value : float
|
|
p-value, reject the null hypothesis if it is below an type 1 error
|
|
level, alpha .
|
|
|
|
|
|
Notes
|
|
-----
|
|
Wald-Wolfowitz runs test.
|
|
|
|
If there are ties, then then the test statistic and p-value that is
|
|
reported, is based on the higher p-value between sorting all tied
|
|
observations of the same group
|
|
|
|
|
|
This test is intended for continuous distributions
|
|
SAS has treatment for ties, but not clear, and sounds more complicated
|
|
(minimum and maximum possible runs prevent use of argsort)
|
|
(maybe it's not so difficult, idea: add small positive noise to first
|
|
one, run test, then to the other, run test, take max(?) p-value - DONE
|
|
This gives not the minimum and maximum of the number of runs, but should
|
|
be close. Not true, this is close to minimum but far away from maximum.
|
|
maximum number of runs would use alternating groups in the ties.)
|
|
Maybe adding random noise would be the better approach.
|
|
|
|
SAS has exact distribution for sample size <=30, does not look standard
|
|
but should be easy to add.
|
|
|
|
currently two-sided test only
|
|
|
|
This has not been verified against a reference implementation. In a short
|
|
Monte Carlo simulation where both samples are normally distribute, the test
|
|
seems to be correctly sized for larger number of observations (30 or
|
|
larger), but conservative (i.e. reject less often than nominal) with a
|
|
sample size of 10 in each group.
|
|
|
|
See Also
|
|
--------
|
|
runs_test_1samp
|
|
Runs
|
|
RunsProb
|
|
|
|
'''
|
|
x = np.asarray(x)
|
|
if y is not None:
|
|
y = np.asarray(y)
|
|
groups = np.concatenate((np.zeros(len(x)), np.ones(len(y))))
|
|
# note reassigning x
|
|
x = np.concatenate((x, y))
|
|
gruni = np.arange(2)
|
|
elif groups is not None:
|
|
gruni = np.unique(groups)
|
|
if gruni.size != 2: # pylint: disable=E1103
|
|
raise ValueError('not exactly two groups specified')
|
|
#require groups to be numeric ???
|
|
else:
|
|
raise ValueError('either y or groups is necessary')
|
|
|
|
xargsort = np.argsort(x)
|
|
#check for ties
|
|
x_sorted = x[xargsort]
|
|
x_diff = np.diff(x_sorted) # used for detecting and handling ties
|
|
if x_diff.min() == 0:
|
|
print('ties detected') #replace with warning
|
|
x_mindiff = x_diff[x_diff > 0].min()
|
|
eps = x_mindiff/2.
|
|
xx = x.copy() #do not change original, just in case
|
|
|
|
xx[groups==gruni[0]] += eps
|
|
xargsort = np.argsort(xx)
|
|
xindicator = groups[xargsort]
|
|
z0, p0 = Runs(xindicator).runs_test(correction=correction)
|
|
|
|
xx[groups==gruni[0]] -= eps #restore xx = x
|
|
xx[groups==gruni[1]] += eps
|
|
xargsort = np.argsort(xx)
|
|
xindicator = groups[xargsort]
|
|
z1, p1 = Runs(xindicator).runs_test(correction=correction)
|
|
|
|
idx = np.argmax([p0,p1])
|
|
return [z0, z1][idx], [p0, p1][idx]
|
|
|
|
else:
|
|
xindicator = groups[xargsort]
|
|
return Runs(xindicator).runs_test(correction=correction)
|
|
|
|
|
|
class TotalRunsProb:
|
|
'''class for the probability distribution of total runs
|
|
|
|
This is the exact probability distribution for the (Wald-Wolfowitz)
|
|
runs test. The random variable is the total number of runs if the
|
|
sample has (n0, n1) observations of groups 0 and 1.
|
|
|
|
|
|
Notes
|
|
-----
|
|
Written as a class so I can store temporary calculations, but I do not
|
|
think it matters much.
|
|
|
|
Formulas taken from SAS manual for one-sided significance level.
|
|
|
|
Could be converted to a full univariate distribution, subclassing
|
|
scipy.stats.distributions.
|
|
|
|
*Status*
|
|
Not verified yet except for mean.
|
|
|
|
|
|
|
|
'''
|
|
|
|
def __init__(self, n0, n1):
|
|
self.n0 = n0
|
|
self.n1 = n1
|
|
self.n = n = n0 + n1
|
|
self.comball = comb(n, n1)
|
|
|
|
def runs_prob_even(self, r):
|
|
n0, n1 = self.n0, self.n1
|
|
tmp0 = comb(n0-1, r//2-1)
|
|
tmp1 = comb(n1-1, r//2-1)
|
|
return tmp0 * tmp1 * 2. / self.comball
|
|
|
|
def runs_prob_odd(self, r):
|
|
n0, n1 = self.n0, self.n1
|
|
k = (r+1)//2
|
|
tmp0 = comb(n0-1, k-1)
|
|
tmp1 = comb(n1-1, k-2)
|
|
tmp3 = comb(n0-1, k-2)
|
|
tmp4 = comb(n1-1, k-1)
|
|
return (tmp0 * tmp1 + tmp3 * tmp4) / self.comball
|
|
|
|
def pdf(self, r):
|
|
r = np.asarray(r)
|
|
r_isodd = np.mod(r, 2) > 0
|
|
r_odd = r[r_isodd]
|
|
r_even = r[~r_isodd]
|
|
runs_pdf = np.zeros(r.shape)
|
|
runs_pdf[r_isodd] = self.runs_prob_odd(r_odd)
|
|
runs_pdf[~r_isodd] = self.runs_prob_even(r_even)
|
|
return runs_pdf
|
|
|
|
|
|
def cdf(self, r):
|
|
r_ = np.arange(2,r+1)
|
|
cdfval = self.runs_prob_even(r_[::2]).sum()
|
|
cdfval += self.runs_prob_odd(r_[1::2]).sum()
|
|
return cdfval
|
|
|
|
|
|
class RunsProb:
|
|
'''distribution of success runs of length k or more (classical definition)
|
|
|
|
The underlying process is assumed to be a sequence of Bernoulli trials
|
|
of a given length n.
|
|
|
|
not sure yet, how to interpret or use the distribution for runs
|
|
of length k or more.
|
|
|
|
Musseli also has longest success run, and waiting time distribution
|
|
negative binomial of order k and geometric of order k
|
|
|
|
need to compare with Godpole
|
|
|
|
need a MonteCarlo function to do some quick tests before doing more
|
|
|
|
|
|
'''
|
|
|
|
|
|
|
|
def pdf(self, x, k, n, p):
|
|
'''distribution of success runs of length k or more
|
|
|
|
Parameters
|
|
----------
|
|
x : float
|
|
count of runs of length n
|
|
k : int
|
|
length of runs
|
|
n : int
|
|
total number of observations or trials
|
|
p : float
|
|
probability of success in each Bernoulli trial
|
|
|
|
Returns
|
|
-------
|
|
pdf : float
|
|
probability that x runs of length of k are observed
|
|
|
|
Notes
|
|
-----
|
|
not yet vectorized
|
|
|
|
References
|
|
----------
|
|
Muselli 1996, theorem 3
|
|
'''
|
|
|
|
q = 1-p
|
|
m = np.arange(x, (n+1)//(k+1)+1)[:,None]
|
|
terms = (-1)**(m-x) * comb(m, x) * p**(m*k) * q**(m-1) \
|
|
* (comb(n - m*k, m - 1) + q * comb(n - m*k, m))
|
|
return terms.sum(0)
|
|
|
|
def pdf_nb(self, x, k, n, p):
|
|
pass
|
|
#y = np.arange(m-1, n-mk+1
|
|
|
|
'''
|
|
>>> [np.sum([RunsProb().pdf(xi, k, 16, 10/16.) for xi in range(0,16)]) for k in range(16)]
|
|
[0.99999332193894064, 0.99999999999999367, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
|
|
>>> [(np.arange(0,16) * [RunsProb().pdf(xi, k, 16, 10/16.) for xi in range(0,16)]).sum() for k in range(16)]
|
|
[6.9998931510341809, 4.1406249999999929, 2.4414062500000075, 1.4343261718749996, 0.83923339843749856, 0.48875808715820324, 0.28312206268310569, 0.1629814505577086, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
|
|
>>> np.array([(np.arange(0,16) * [RunsProb().pdf(xi, k, 16, 10/16.) for xi in range(0,16)]).sum() for k in range(16)])/11
|
|
array([ 0.63635392, 0.37642045, 0.22194602, 0.13039329, 0.07629395,
|
|
0.04443255, 0.02573837, 0.0148165 , 0. , 0. ,
|
|
0. , 0. , 0. , 0. , 0. , 0. ])
|
|
>>> np.diff([(np.arange(0,16) * [RunsProb().pdf(xi, k, 16, 10/16.) for xi in range(0,16)]).sum() for k in range(16)][::-1])
|
|
array([ 0. , 0. , 0. , 0. , 0. ,
|
|
0. , 0. , 0.16298145, 0.12014061, 0.20563602,
|
|
0.35047531, 0.59509277, 1.00708008, 1.69921875, 2.85926815])
|
|
'''
|
|
|
|
|
|
|
|
def median_test_ksample(x, groups):
|
|
'''chisquare test for equality of median/location
|
|
|
|
This tests whether all groups have the same fraction of observations
|
|
above the median.
|
|
|
|
Parameters
|
|
----------
|
|
x : array_like
|
|
data values stacked for all groups
|
|
groups : array_like
|
|
group labels or indicator
|
|
|
|
Returns
|
|
-------
|
|
stat : float
|
|
test statistic
|
|
pvalue : float
|
|
pvalue from the chisquare distribution
|
|
others ????
|
|
currently some test output, table and expected
|
|
|
|
'''
|
|
x = np.asarray(x)
|
|
gruni = np.unique(groups)
|
|
xli = [x[groups==group] for group in gruni]
|
|
xmedian = np.median(x)
|
|
counts_larger = np.array([(xg > xmedian).sum() for xg in xli])
|
|
counts = np.array([len(xg) for xg in xli])
|
|
counts_smaller = counts - counts_larger
|
|
nobs = counts.sum()
|
|
n_larger = (x > xmedian).sum()
|
|
n_smaller = nobs - n_larger
|
|
table = np.vstack((counts_smaller, counts_larger))
|
|
|
|
#the following should be replaced by chisquare_contingency table
|
|
expected = np.vstack((counts * 1. / nobs * n_smaller,
|
|
counts * 1. / nobs * n_larger))
|
|
|
|
if (expected < 5).any():
|
|
print('Warning: There are cells with less than 5 expected' \
|
|
'observations. The chisquare distribution might not be a good' \
|
|
'approximation for the true distribution.')
|
|
|
|
#check ddof
|
|
return stats.chisquare(table.ravel(), expected.ravel(), ddof=1), table, expected
|
|
|
|
|
|
|
|
|
|
def cochrans_q(x):
|
|
'''Cochran's Q test for identical effect of k treatments
|
|
|
|
Cochran's Q is a k-sample extension of the McNemar test. If there are only
|
|
two treatments, then Cochran's Q test and McNemar test are equivalent.
|
|
|
|
Test that the probability of success is the same for each treatment.
|
|
The alternative is that at least two treatments have a different
|
|
probability of success.
|
|
|
|
Parameters
|
|
----------
|
|
x : array_like, 2d (N,k)
|
|
data with N cases and k variables
|
|
|
|
Returns
|
|
-------
|
|
q_stat : float
|
|
test statistic
|
|
pvalue : float
|
|
pvalue from the chisquare distribution
|
|
|
|
Notes
|
|
-----
|
|
In Wikipedia terminology, rows are blocks and columns are treatments.
|
|
The number of rows N, should be large for the chisquare distribution to be
|
|
a good approximation.
|
|
The Null hypothesis of the test is that all treatments have the
|
|
same effect.
|
|
|
|
References
|
|
----------
|
|
https://en.wikipedia.org/wiki/Cochran_test
|
|
SAS Manual for NPAR TESTS
|
|
|
|
'''
|
|
|
|
warnings.warn("Deprecated, use stats.cochrans_q instead", FutureWarning)
|
|
|
|
x = np.asarray(x)
|
|
gruni = np.unique(x)
|
|
N, k = x.shape
|
|
count_row_success = (x==gruni[-1]).sum(1, float)
|
|
count_col_success = (x==gruni[-1]).sum(0, float)
|
|
count_row_ss = count_row_success.sum()
|
|
count_col_ss = count_col_success.sum()
|
|
assert count_row_ss == count_col_ss #just a calculation check
|
|
|
|
|
|
#this is SAS manual
|
|
q_stat = (k-1) * (k * np.sum(count_col_success**2) - count_col_ss**2) \
|
|
/ (k * count_row_ss - np.sum(count_row_success**2))
|
|
|
|
#Note: the denominator looks just like k times the variance of the
|
|
#columns
|
|
|
|
#Wikipedia uses a different, but equivalent expression
|
|
## q_stat = (k-1) * (k * np.sum(count_row_success**2) - count_row_ss**2) \
|
|
## / (k * count_col_ss - np.sum(count_col_success**2))
|
|
|
|
return q_stat, stats.chi2.sf(q_stat, k-1)
|
|
|
|
def mcnemar(x, y=None, exact=True, correction=True):
|
|
'''McNemar test
|
|
|
|
Parameters
|
|
----------
|
|
x, y : array_like
|
|
two paired data samples. If y is None, then x can be a 2 by 2
|
|
contingency table. x and y can have more than one dimension, then
|
|
the results are calculated under the assumption that axis zero
|
|
contains the observation for the samples.
|
|
exact : bool
|
|
If exact is true, then the binomial distribution will be used.
|
|
If exact is false, then the chisquare distribution will be used, which
|
|
is the approximation to the distribution of the test statistic for
|
|
large sample sizes.
|
|
correction : bool
|
|
If true, then a continuity correction is used for the chisquare
|
|
distribution (if exact is false.)
|
|
|
|
Returns
|
|
-------
|
|
stat : float or int, array
|
|
The test statistic is the chisquare statistic if exact is false. If the
|
|
exact binomial distribution is used, then this contains the min(n1, n2),
|
|
where n1, n2 are cases that are zero in one sample but one in the other
|
|
sample.
|
|
|
|
pvalue : float or array
|
|
p-value of the null hypothesis of equal effects.
|
|
|
|
Notes
|
|
-----
|
|
This is a special case of Cochran's Q test. The results when the chisquare
|
|
distribution is used are identical, except for continuity correction.
|
|
|
|
'''
|
|
|
|
warnings.warn("Deprecated, use stats.TableSymmetry instead", FutureWarning)
|
|
|
|
x = np.asarray(x)
|
|
if y is None and x.shape[0] == x.shape[1]:
|
|
if x.shape[0] != 2:
|
|
raise ValueError('table needs to be 2 by 2')
|
|
n1, n2 = x[1, 0], x[0, 1]
|
|
else:
|
|
# I'm not checking here whether x and y are binary,
|
|
# is not this also paired sign test
|
|
n1 = np.sum(x < y, 0)
|
|
n2 = np.sum(x > y, 0)
|
|
|
|
if exact:
|
|
stat = np.minimum(n1, n2)
|
|
# binom is symmetric with p=0.5
|
|
pval = stats.binom.cdf(stat, n1 + n2, 0.5) * 2
|
|
pval = np.minimum(pval, 1) # limit to 1 if n1==n2
|
|
else:
|
|
corr = int(correction) # convert bool to 0 or 1
|
|
stat = (np.abs(n1 - n2) - corr)**2 / (1. * (n1 + n2))
|
|
df = 1
|
|
pval = stats.chi2.sf(stat, df)
|
|
return stat, pval
|
|
|
|
|
|
def symmetry_bowker(table):
|
|
'''Test for symmetry of a (k, k) square contingency table
|
|
|
|
This is an extension of the McNemar test to test the Null hypothesis
|
|
that the contingency table is symmetric around the main diagonal, that is
|
|
|
|
n_{i, j} = n_{j, i} for all i, j
|
|
|
|
Parameters
|
|
----------
|
|
table : array_like, 2d, (k, k)
|
|
a square contingency table that contains the count for k categories
|
|
in rows and columns.
|
|
|
|
Returns
|
|
-------
|
|
statistic : float
|
|
chisquare test statistic
|
|
p-value : float
|
|
p-value of the test statistic based on chisquare distribution
|
|
df : int
|
|
degrees of freedom of the chisquare distribution
|
|
|
|
Notes
|
|
-----
|
|
Implementation is based on the SAS documentation, R includes it in
|
|
`mcnemar.test` if the table is not 2 by 2.
|
|
|
|
The pvalue is based on the chisquare distribution which requires that the
|
|
sample size is not very small to be a good approximation of the true
|
|
distribution. For 2x2 contingency tables exact distribution can be
|
|
obtained with `mcnemar`
|
|
|
|
See Also
|
|
--------
|
|
mcnemar
|
|
|
|
|
|
'''
|
|
|
|
warnings.warn("Deprecated, use stats.TableSymmetry instead", FutureWarning)
|
|
|
|
table = np.asarray(table)
|
|
k, k2 = table.shape
|
|
if k != k2:
|
|
raise ValueError('table needs to be square')
|
|
|
|
#low_idx = np.tril_indices(k, -1) # this does not have Fortran order
|
|
upp_idx = np.triu_indices(k, 1)
|
|
|
|
tril = table.T[upp_idx] # lower triangle in column order
|
|
triu = table[upp_idx] # upper triangle in row order
|
|
|
|
stat = ((tril - triu)**2 / (tril + triu + 1e-20)).sum()
|
|
df = k * (k-1) / 2.
|
|
pval = stats.chi2.sf(stat, df)
|
|
|
|
return stat, pval, df
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
x1 = np.array([1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1])
|
|
|
|
print(Runs(x1).runs_test())
|
|
print(runstest_1samp(x1, cutoff='mean'))
|
|
print(runstest_2samp(np.arange(16,0,-1), groups=x1))
|
|
print(TotalRunsProb(7,9).cdf(11))
|
|
print(median_test_ksample(np.random.randn(100), np.random.randint(0,2,100)))
|
|
print(cochrans_q(np.random.randint(0,2,(100,8))))
|