960 lines
28 KiB
Python
960 lines
28 KiB
Python
|
'''functions to work with contrasts for multiple tests
|
||
|
|
||
|
contrast matrices for comparing all pairs, all levels to reference level, ...
|
||
|
extension to 2-way groups in progress
|
||
|
|
||
|
TwoWay: class for bringing two-way analysis together and try out
|
||
|
various helper functions
|
||
|
|
||
|
|
||
|
Idea for second part
|
||
|
- get all transformation matrices to move in between different full rank
|
||
|
parameterizations
|
||
|
- standardize to one parameterization to get all interesting effects.
|
||
|
|
||
|
- multivariate normal distribution
|
||
|
- exploit or expand what we have in LikelihoodResults, cov_params, f_test,
|
||
|
t_test, example: resols_dropf_full.cov_params(C2)
|
||
|
- connect to new multiple comparison for contrast matrices, based on
|
||
|
multivariate normal or t distribution (Hothorn, Bretz, Westfall)
|
||
|
|
||
|
'''
|
||
|
|
||
|
|
||
|
|
||
|
from numpy.testing import assert_equal
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
#next 3 functions copied from multicomp.py
|
||
|
|
||
|
def contrast_allpairs(nm):
|
||
|
'''contrast or restriction matrix for all pairs of nm variables
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
nm : int
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
contr : ndarray, 2d, (nm*(nm-1)/2, nm)
|
||
|
contrast matrix for all pairwise comparisons
|
||
|
|
||
|
'''
|
||
|
contr = []
|
||
|
for i in range(nm):
|
||
|
for j in range(i+1, nm):
|
||
|
contr_row = np.zeros(nm)
|
||
|
contr_row[i] = 1
|
||
|
contr_row[j] = -1
|
||
|
contr.append(contr_row)
|
||
|
return np.array(contr)
|
||
|
|
||
|
def contrast_all_one(nm):
|
||
|
'''contrast or restriction matrix for all against first comparison
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
nm : int
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
contr : ndarray, 2d, (nm-1, nm)
|
||
|
contrast matrix for all against first comparisons
|
||
|
|
||
|
'''
|
||
|
contr = np.column_stack((np.ones(nm-1), -np.eye(nm-1)))
|
||
|
return contr
|
||
|
|
||
|
def contrast_diff_mean(nm):
|
||
|
'''contrast or restriction matrix for all against mean comparison
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
nm : int
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
contr : ndarray, 2d, (nm-1, nm)
|
||
|
contrast matrix for all against mean comparisons
|
||
|
|
||
|
'''
|
||
|
return np.eye(nm) - np.ones((nm,nm))/nm
|
||
|
|
||
|
def signstr(x, noplus=False):
|
||
|
if x in [-1,0,1]:
|
||
|
if not noplus:
|
||
|
return '+' if np.sign(x)>=0 else '-'
|
||
|
else:
|
||
|
return '' if np.sign(x)>=0 else '-'
|
||
|
else:
|
||
|
return str(x)
|
||
|
|
||
|
|
||
|
def contrast_labels(contrasts, names, reverse=False):
|
||
|
if reverse:
|
||
|
sl = slice(None, None, -1)
|
||
|
else:
|
||
|
sl = slice(None)
|
||
|
labels = [''.join([f'{signstr(c, noplus=True)}{v}'
|
||
|
for c,v in zip(row, names)[sl] if c != 0])
|
||
|
for row in contrasts]
|
||
|
return labels
|
||
|
|
||
|
def contrast_product(names1, names2, intgroup1=None, intgroup2=None, pairs=False):
|
||
|
'''build contrast matrices for products of two categorical variables
|
||
|
|
||
|
this is an experimental script and should be converted to a class
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
names1, names2 : lists of strings
|
||
|
contains the list of level labels for each categorical variable
|
||
|
intgroup1, intgroup2 : ndarrays TODO: this part not tested, finished yet
|
||
|
categorical variable
|
||
|
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
This creates a full rank matrix. It does not do all pairwise comparisons,
|
||
|
parameterization is using contrast_all_one to get differences with first
|
||
|
level.
|
||
|
|
||
|
? does contrast_all_pairs work as a plugin to get all pairs ?
|
||
|
|
||
|
'''
|
||
|
|
||
|
n1 = len(names1)
|
||
|
n2 = len(names2)
|
||
|
names_prod = [f'{i}_{j}' for i in names1 for j in names2]
|
||
|
ee1 = np.zeros((1,n1))
|
||
|
ee1[0,0] = 1
|
||
|
if not pairs:
|
||
|
dd = np.r_[ee1, -contrast_all_one(n1)]
|
||
|
else:
|
||
|
dd = np.r_[ee1, -contrast_allpairs(n1)]
|
||
|
|
||
|
contrast_prod = np.kron(dd[1:], np.eye(n2))
|
||
|
names_contrast_prod0 = contrast_labels(contrast_prod, names_prod, reverse=True)
|
||
|
names_contrast_prod = [''.join([f'{signstr(c, noplus=True)}{v}'
|
||
|
for c,v in zip(row, names_prod)[::-1] if c != 0])
|
||
|
for row in contrast_prod]
|
||
|
|
||
|
ee2 = np.zeros((1,n2))
|
||
|
ee2[0,0] = 1
|
||
|
#dd2 = np.r_[ee2, -contrast_all_one(n2)]
|
||
|
if not pairs:
|
||
|
dd2 = np.r_[ee2, -contrast_all_one(n2)]
|
||
|
else:
|
||
|
dd2 = np.r_[ee2, -contrast_allpairs(n2)]
|
||
|
|
||
|
contrast_prod2 = np.kron(np.eye(n1), dd2[1:])
|
||
|
names_contrast_prod2 = [''.join([f'{signstr(c, noplus=True)}{v}'
|
||
|
for c,v in zip(row, names_prod)[::-1] if c != 0])
|
||
|
for row in contrast_prod2]
|
||
|
|
||
|
if (intgroup1 is not None) and (intgroup1 is not None):
|
||
|
d1, _ = dummy_1d(intgroup1)
|
||
|
d2, _ = dummy_1d(intgroup2)
|
||
|
dummy = dummy_product(d1, d2)
|
||
|
else:
|
||
|
dummy = None
|
||
|
|
||
|
return (names_prod, contrast_prod, names_contrast_prod,
|
||
|
contrast_prod2, names_contrast_prod2, dummy)
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
def dummy_1d(x, varname=None):
|
||
|
'''dummy variable for id integer groups
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x : ndarray, 1d
|
||
|
categorical variable, requires integers if varname is None
|
||
|
varname : str
|
||
|
name of the variable used in labels for category levels
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
dummy : ndarray, 2d
|
||
|
array of dummy variables, one column for each level of the
|
||
|
category (full set)
|
||
|
labels : list[str]
|
||
|
labels for the columns, i.e. levels of each category
|
||
|
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
use tools.categorical instead for more more options
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
statsmodels.tools.categorical
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> x = np.array(['F', 'F', 'M', 'M', 'F', 'F', 'M', 'M', 'F', 'F', 'M', 'M'],
|
||
|
dtype='|S1')
|
||
|
>>> dummy_1d(x, varname='gender')
|
||
|
(array([[1, 0],
|
||
|
[1, 0],
|
||
|
[0, 1],
|
||
|
[0, 1],
|
||
|
[1, 0],
|
||
|
[1, 0],
|
||
|
[0, 1],
|
||
|
[0, 1],
|
||
|
[1, 0],
|
||
|
[1, 0],
|
||
|
[0, 1],
|
||
|
[0, 1]]), ['gender_F', 'gender_M'])
|
||
|
|
||
|
'''
|
||
|
if varname is None: #assumes integer
|
||
|
labels = ['level_%d' % i for i in range(x.max() + 1)]
|
||
|
return (x[:,None]==np.arange(x.max()+1)).astype(int), labels
|
||
|
else:
|
||
|
grouplabels = np.unique(x)
|
||
|
labels = [varname + '_%s' % str(i) for i in grouplabels]
|
||
|
return (x[:,None]==grouplabels).astype(int), labels
|
||
|
|
||
|
|
||
|
def dummy_product(d1, d2, method='full'):
|
||
|
'''dummy variable from product of two dummy variables
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
d1, d2 : ndarray
|
||
|
two dummy variables, assumes full set for methods 'drop-last'
|
||
|
and 'drop-first'
|
||
|
method : {'full', 'drop-last', 'drop-first'}
|
||
|
'full' returns the full product, encoding of intersection of
|
||
|
categories.
|
||
|
The drop methods provide a difference dummy encoding:
|
||
|
(constant, main effects, interaction effects). The first or last columns
|
||
|
of the dummy variable (i.e. levels) are dropped to get full rank
|
||
|
dummy matrix.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
dummy : ndarray
|
||
|
dummy variable for product, see method
|
||
|
|
||
|
'''
|
||
|
|
||
|
if method == 'full':
|
||
|
dd = (d1[:,:,None]*d2[:,None,:]).reshape(d1.shape[0],-1)
|
||
|
elif method == 'drop-last': #same as SAS transreg
|
||
|
d12rl = dummy_product(d1[:,:-1], d2[:,:-1])
|
||
|
dd = np.column_stack((np.ones(d1.shape[0], int), d1[:,:-1], d2[:,:-1],d12rl))
|
||
|
#Note: dtype int should preserve dtype of d1 and d2
|
||
|
elif method == 'drop-first':
|
||
|
d12r = dummy_product(d1[:,1:], d2[:,1:])
|
||
|
dd = np.column_stack((np.ones(d1.shape[0], int), d1[:,1:], d2[:,1:],d12r))
|
||
|
else:
|
||
|
raise ValueError('method not recognized')
|
||
|
|
||
|
return dd
|
||
|
|
||
|
def dummy_limits(d):
|
||
|
'''start and endpoints of groups in a sorted dummy variable array
|
||
|
|
||
|
helper function for nested categories
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> d1 = np.array([[1, 0, 0],
|
||
|
[1, 0, 0],
|
||
|
[1, 0, 0],
|
||
|
[1, 0, 0],
|
||
|
[0, 1, 0],
|
||
|
[0, 1, 0],
|
||
|
[0, 1, 0],
|
||
|
[0, 1, 0],
|
||
|
[0, 0, 1],
|
||
|
[0, 0, 1],
|
||
|
[0, 0, 1],
|
||
|
[0, 0, 1]])
|
||
|
>>> dummy_limits(d1)
|
||
|
(array([0, 4, 8]), array([ 4, 8, 12]))
|
||
|
|
||
|
get group slices from an array
|
||
|
|
||
|
>>> [np.arange(d1.shape[0])[b:e] for b,e in zip(*dummy_limits(d1))]
|
||
|
[array([0, 1, 2, 3]), array([4, 5, 6, 7]), array([ 8, 9, 10, 11])]
|
||
|
>>> [np.arange(d1.shape[0])[b:e] for b,e in zip(*dummy_limits(d1))]
|
||
|
[array([0, 1, 2, 3]), array([4, 5, 6, 7]), array([ 8, 9, 10, 11])]
|
||
|
'''
|
||
|
nobs, nvars = d.shape
|
||
|
start1, col1 = np.nonzero(np.diff(d,axis=0)==1)
|
||
|
end1, col1_ = np.nonzero(np.diff(d,axis=0)==-1)
|
||
|
cc = np.arange(nvars)
|
||
|
#print(cc, np.r_[[0], col1], np.r_[col1_, [nvars-1]]
|
||
|
if ((not (np.r_[[0], col1] == cc).all())
|
||
|
or (not (np.r_[col1_, [nvars-1]] == cc).all())):
|
||
|
raise ValueError('dummy variable is not sorted')
|
||
|
|
||
|
start = np.r_[[0], start1+1]
|
||
|
end = np.r_[end1+1, [nobs]]
|
||
|
return start, end
|
||
|
|
||
|
|
||
|
|
||
|
def dummy_nested(d1, d2, method='full'):
|
||
|
'''unfinished and incomplete mainly copy past dummy_product
|
||
|
dummy variable from product of two dummy variables
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
d1, d2 : ndarray
|
||
|
two dummy variables, d2 is assumed to be nested in d1
|
||
|
Assumes full set for methods 'drop-last' and 'drop-first'.
|
||
|
method : {'full', 'drop-last', 'drop-first'}
|
||
|
'full' returns the full product, which in this case is d2.
|
||
|
The drop methods provide an effects encoding:
|
||
|
(constant, main effects, subgroup effects). The first or last columns
|
||
|
of the dummy variable (i.e. levels) are dropped to get full rank
|
||
|
encoding.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
dummy : ndarray
|
||
|
dummy variable for product, see method
|
||
|
|
||
|
'''
|
||
|
if method == 'full':
|
||
|
return d2
|
||
|
|
||
|
start1, end1 = dummy_limits(d1)
|
||
|
start2, end2 = dummy_limits(d2)
|
||
|
first = np.in1d(start2, start1)
|
||
|
last = np.in1d(end2, end1)
|
||
|
equal = (first == last)
|
||
|
col_dropf = ~first*~equal
|
||
|
col_dropl = ~last*~equal
|
||
|
|
||
|
|
||
|
if method == 'drop-last':
|
||
|
d12rl = dummy_product(d1[:,:-1], d2[:,:-1])
|
||
|
dd = np.column_stack((np.ones(d1.shape[0], int), d1[:,:-1], d2[:,col_dropl]))
|
||
|
#Note: dtype int should preserve dtype of d1 and d2
|
||
|
elif method == 'drop-first':
|
||
|
d12r = dummy_product(d1[:,1:], d2[:,1:])
|
||
|
dd = np.column_stack((np.ones(d1.shape[0], int), d1[:,1:], d2[:,col_dropf]))
|
||
|
else:
|
||
|
raise ValueError('method not recognized')
|
||
|
|
||
|
return dd, col_dropf, col_dropl
|
||
|
|
||
|
|
||
|
class DummyTransform:
|
||
|
'''Conversion between full rank dummy encodings
|
||
|
|
||
|
|
||
|
y = X b + u
|
||
|
b = C a
|
||
|
a = C^{-1} b
|
||
|
|
||
|
y = X C a + u
|
||
|
|
||
|
define Z = X C, then
|
||
|
|
||
|
y = Z a + u
|
||
|
|
||
|
contrasts:
|
||
|
|
||
|
R_b b = r
|
||
|
|
||
|
R_a a = R_b C a = r
|
||
|
|
||
|
where R_a = R_b C
|
||
|
|
||
|
Here C is the transform matrix, with dot_left and dot_right as the main
|
||
|
methods, and the same for the inverse transform matrix, C^{-1}
|
||
|
|
||
|
Note:
|
||
|
- The class was mainly written to keep left and right straight.
|
||
|
- No checking is done.
|
||
|
- not sure yet if method names make sense
|
||
|
|
||
|
|
||
|
'''
|
||
|
|
||
|
def __init__(self, d1, d2):
|
||
|
'''C such that d1 C = d2, with d1 = X, d2 = Z
|
||
|
|
||
|
should be (x, z) in arguments ?
|
||
|
'''
|
||
|
self.transf_matrix = np.linalg.lstsq(d1, d2, rcond=-1)[0]
|
||
|
self.invtransf_matrix = np.linalg.lstsq(d2, d1, rcond=-1)[0]
|
||
|
|
||
|
def dot_left(self, a):
|
||
|
''' b = C a
|
||
|
'''
|
||
|
return np.dot(self.transf_matrix, a)
|
||
|
|
||
|
def dot_right(self, x):
|
||
|
''' z = x C
|
||
|
'''
|
||
|
return np.dot(x, self.transf_matrix)
|
||
|
|
||
|
def inv_dot_left(self, b):
|
||
|
''' a = C^{-1} b
|
||
|
'''
|
||
|
return np.dot(self.invtransf_matrix, b)
|
||
|
|
||
|
def inv_dot_right(self, z):
|
||
|
''' x = z C^{-1}
|
||
|
'''
|
||
|
return np.dot(z, self.invtransf_matrix)
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
def groupmean_d(x, d):
|
||
|
'''groupmeans using dummy variables
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x : array_like, ndim
|
||
|
data array, tested for 1,2 and 3 dimensions
|
||
|
d : ndarray, 1d
|
||
|
dummy variable, needs to have the same length
|
||
|
as x in axis 0.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
groupmeans : ndarray, ndim-1
|
||
|
means for each group along axis 0, the levels
|
||
|
of the groups are the last axis
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
This will be memory intensive if there are many levels
|
||
|
in the categorical variable, i.e. many columns in the
|
||
|
dummy variable. In this case it is recommended to use
|
||
|
a more efficient version.
|
||
|
|
||
|
'''
|
||
|
x = np.asarray(x)
|
||
|
## if x.ndim == 1:
|
||
|
## nvars = 1
|
||
|
## else:
|
||
|
nvars = x.ndim + 1
|
||
|
sli = [slice(None)] + [None]*(nvars-2) + [slice(None)]
|
||
|
return (x[...,None] * d[sli]).sum(0)*1./d.sum(0)
|
||
|
|
||
|
|
||
|
|
||
|
class TwoWay:
|
||
|
'''a wrapper class for two way anova type of analysis with OLS
|
||
|
|
||
|
|
||
|
currently mainly to bring things together
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
unclear: adding multiple test might assume block design or orthogonality
|
||
|
|
||
|
This estimates the full dummy version with OLS.
|
||
|
The drop first dummy representation can be recovered through the
|
||
|
transform method.
|
||
|
|
||
|
TODO: add more methods, tests, pairwise, multiple, marginal effects
|
||
|
try out what can be added for userfriendly access.
|
||
|
|
||
|
missing: ANOVA table
|
||
|
|
||
|
'''
|
||
|
def __init__(self, endog, factor1, factor2, varnames=None):
|
||
|
self.nobs = factor1.shape[0]
|
||
|
if varnames is None:
|
||
|
vname1 = 'a'
|
||
|
vname2 = 'b'
|
||
|
else:
|
||
|
vname1, vname1 = varnames
|
||
|
|
||
|
self.d1, self.d1_labels = d1, d1_labels = dummy_1d(factor1, vname1)
|
||
|
self.d2, self.d2_labels = d2, d2_labels = dummy_1d(factor2, vname2)
|
||
|
self.nlevel1 = nlevel1 = d1.shape[1]
|
||
|
self.nlevel2 = nlevel2 = d2.shape[1]
|
||
|
|
||
|
|
||
|
#get product dummies
|
||
|
res = contrast_product(d1_labels, d2_labels)
|
||
|
prodlab, C1, C1lab, C2, C2lab, _ = res
|
||
|
self.prod_label, self.C1, self.C1_label, self.C2, self.C2_label, _ = res
|
||
|
dp_full = dummy_product(d1, d2, method='full')
|
||
|
dp_dropf = dummy_product(d1, d2, method='drop-first')
|
||
|
self.transform = DummyTransform(dp_full, dp_dropf)
|
||
|
|
||
|
#estimate the model
|
||
|
self.nvars = dp_full.shape[1]
|
||
|
self.exog = dp_full
|
||
|
self.resols = sm.OLS(endog, dp_full).fit()
|
||
|
self.params = self.resols.params
|
||
|
|
||
|
#get transformed parameters, (constant, main, interaction effect)
|
||
|
self.params_dropf = self.transform.inv_dot_left(self.params)
|
||
|
self.start_interaction = 1 + (nlevel1 - 1) + (nlevel2 - 1)
|
||
|
self.n_interaction = self.nvars - self.start_interaction
|
||
|
|
||
|
#convert to cached property
|
||
|
def r_nointer(self):
|
||
|
'''contrast/restriction matrix for no interaction
|
||
|
'''
|
||
|
nia = self.n_interaction
|
||
|
R_nointer = np.hstack((np.zeros((nia, self.nvars-nia)), np.eye(nia)))
|
||
|
#inter_direct = resols_full_dropf.tval[-nia:]
|
||
|
R_nointer_transf = self.transform.inv_dot_right(R_nointer)
|
||
|
self.R_nointer_transf = R_nointer_transf
|
||
|
return R_nointer_transf
|
||
|
|
||
|
def ttest_interaction(self):
|
||
|
'''ttests for no-interaction terms are zero
|
||
|
'''
|
||
|
#use self.r_nointer instead
|
||
|
nia = self.n_interaction
|
||
|
R_nointer = np.hstack((np.zeros((nia, self.nvars-nia)), np.eye(nia)))
|
||
|
#inter_direct = resols_full_dropf.tval[-nia:]
|
||
|
R_nointer_transf = self.transform.inv_dot_right(R_nointer)
|
||
|
self.R_nointer_transf = R_nointer_transf
|
||
|
t_res = self.resols.t_test(R_nointer_transf)
|
||
|
return t_res
|
||
|
|
||
|
def ftest_interaction(self):
|
||
|
'''ttests for no-interaction terms are zero
|
||
|
'''
|
||
|
R_nointer_transf = self.r_nointer()
|
||
|
return self.resols.f_test(R_nointer_transf)
|
||
|
|
||
|
def ttest_conditional_effect(self, factorind):
|
||
|
if factorind == 1:
|
||
|
return self.resols.t_test(self.C1), self.C1_label
|
||
|
else:
|
||
|
return self.resols.t_test(self.C2), self.C2_label
|
||
|
|
||
|
def summary_coeff(self):
|
||
|
from statsmodels.iolib import SimpleTable
|
||
|
params_arr = self.params.reshape(self.nlevel1, self.nlevel2)
|
||
|
stubs = self.d1_labels
|
||
|
headers = self.d2_labels
|
||
|
title = 'Estimated Coefficients by factors'
|
||
|
table_fmt = dict(
|
||
|
data_fmts = ["%#10.4g"]*self.nlevel2)
|
||
|
return SimpleTable(params_arr, headers, stubs, title=title,
|
||
|
txt_fmt=table_fmt)
|
||
|
|
||
|
|
||
|
# --------------- tests
|
||
|
# TODO: several tests still missing, several are in the example with print
|
||
|
|
||
|
class TestContrastTools:
|
||
|
|
||
|
def __init__(self):
|
||
|
self.v1name = ['a0', 'a1', 'a2']
|
||
|
self.v2name = ['b0', 'b1']
|
||
|
self.d1 = np.array([[1, 0, 0],
|
||
|
[1, 0, 0],
|
||
|
[1, 0, 0],
|
||
|
[1, 0, 0],
|
||
|
[0, 1, 0],
|
||
|
[0, 1, 0],
|
||
|
[0, 1, 0],
|
||
|
[0, 1, 0],
|
||
|
[0, 0, 1],
|
||
|
[0, 0, 1],
|
||
|
[0, 0, 1],
|
||
|
[0, 0, 1]])
|
||
|
|
||
|
def test_dummy_1d(self):
|
||
|
x = np.array(['F', 'F', 'M', 'M', 'F', 'F', 'M', 'M', 'F', 'F', 'M', 'M'],
|
||
|
dtype='|S1')
|
||
|
d, labels = (np.array([[1, 0],
|
||
|
[1, 0],
|
||
|
[0, 1],
|
||
|
[0, 1],
|
||
|
[1, 0],
|
||
|
[1, 0],
|
||
|
[0, 1],
|
||
|
[0, 1],
|
||
|
[1, 0],
|
||
|
[1, 0],
|
||
|
[0, 1],
|
||
|
[0, 1]]), ['gender_F', 'gender_M'])
|
||
|
res_d, res_labels = dummy_1d(x, varname='gender')
|
||
|
assert_equal(res_d, d)
|
||
|
assert_equal(res_labels, labels)
|
||
|
|
||
|
def test_contrast_product(self):
|
||
|
res_cp = contrast_product(self.v1name, self.v2name)
|
||
|
res_t = [0]*6
|
||
|
res_t[0] = ['a0_b0', 'a0_b1', 'a1_b0', 'a1_b1', 'a2_b0', 'a2_b1']
|
||
|
res_t[1] = np.array([[-1., 0., 1., 0., 0., 0.],
|
||
|
[ 0., -1., 0., 1., 0., 0.],
|
||
|
[-1., 0., 0., 0., 1., 0.],
|
||
|
[ 0., -1., 0., 0., 0., 1.]])
|
||
|
res_t[2] = ['a1_b0-a0_b0', 'a1_b1-a0_b1', 'a2_b0-a0_b0', 'a2_b1-a0_b1']
|
||
|
res_t[3] = np.array([[-1., 1., 0., 0., 0., 0.],
|
||
|
[ 0., 0., -1., 1., 0., 0.],
|
||
|
[ 0., 0., 0., 0., -1., 1.]])
|
||
|
res_t[4] = ['a0_b1-a0_b0', 'a1_b1-a1_b0', 'a2_b1-a2_b0']
|
||
|
for ii in range(5):
|
||
|
np.testing.assert_equal(res_cp[ii], res_t[ii], err_msg=str(ii))
|
||
|
|
||
|
def test_dummy_limits(self):
|
||
|
b,e = dummy_limits(self.d1)
|
||
|
assert_equal(b, np.array([0, 4, 8]))
|
||
|
assert_equal(e, np.array([ 4, 8, 12]))
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
tt = TestContrastTools()
|
||
|
tt.test_contrast_product()
|
||
|
tt.test_dummy_1d()
|
||
|
tt.test_dummy_limits()
|
||
|
|
||
|
import statsmodels.api as sm
|
||
|
|
||
|
examples = ['small', 'large', None][1]
|
||
|
|
||
|
v1name = ['a0', 'a1', 'a2']
|
||
|
v2name = ['b0', 'b1']
|
||
|
res_cp = contrast_product(v1name, v2name)
|
||
|
print(res_cp)
|
||
|
|
||
|
y = np.arange(12)
|
||
|
x1 = np.arange(12)//4
|
||
|
x2 = np.arange(12)//2 % 2
|
||
|
|
||
|
if 'small' in examples:
|
||
|
d1, d1_labels = dummy_1d(x1)
|
||
|
d2, d2_labels = dummy_1d(x2)
|
||
|
|
||
|
|
||
|
if 'large' in examples:
|
||
|
x1 = np.repeat(x1, 5, axis=0)
|
||
|
x2 = np.repeat(x2, 5, axis=0)
|
||
|
|
||
|
nobs = x1.shape[0]
|
||
|
d1, d1_labels = dummy_1d(x1)
|
||
|
d2, d2_labels = dummy_1d(x2)
|
||
|
|
||
|
dd_full = dummy_product(d1, d2, method='full')
|
||
|
dd_dropl = dummy_product(d1, d2, method='drop-last')
|
||
|
dd_dropf = dummy_product(d1, d2, method='drop-first')
|
||
|
|
||
|
#Note: full parameterization of dummies is orthogonal
|
||
|
#np.eye(6)*10 in "large" example
|
||
|
print((np.dot(dd_full.T, dd_full) == np.diag(dd_full.sum(0))).all())
|
||
|
|
||
|
#check that transforms work
|
||
|
#generate 3 data sets with the 3 different parameterizations
|
||
|
|
||
|
effect_size = [1., 0.01][1]
|
||
|
noise_scale = [0.001, 0.1][0]
|
||
|
noise = noise_scale * np.random.randn(nobs)
|
||
|
beta = effect_size * np.arange(1,7)
|
||
|
ydata_full = (dd_full * beta).sum(1) + noise
|
||
|
ydata_dropl = (dd_dropl * beta).sum(1) + noise
|
||
|
ydata_dropf = (dd_dropf * beta).sum(1) + noise
|
||
|
|
||
|
resols_full_full = sm.OLS(ydata_full, dd_full).fit()
|
||
|
resols_full_dropf = sm.OLS(ydata_full, dd_dropf).fit()
|
||
|
params_f_f = resols_full_full.params
|
||
|
params_f_df = resols_full_dropf.params
|
||
|
|
||
|
resols_dropf_full = sm.OLS(ydata_dropf, dd_full).fit()
|
||
|
resols_dropf_dropf = sm.OLS(ydata_dropf, dd_dropf).fit()
|
||
|
params_df_f = resols_dropf_full.params
|
||
|
params_df_df = resols_dropf_dropf.params
|
||
|
|
||
|
|
||
|
tr_of = np.linalg.lstsq(dd_dropf, dd_full, rcond=-1)[0]
|
||
|
tr_fo = np.linalg.lstsq(dd_full, dd_dropf, rcond=-1)[0]
|
||
|
print(np.dot(tr_fo, params_df_df) - params_df_f)
|
||
|
print(np.dot(tr_of, params_f_f) - params_f_df)
|
||
|
|
||
|
transf_f_df = DummyTransform(dd_full, dd_dropf)
|
||
|
print(np.max(np.abs(dd_full - transf_f_df.inv_dot_right(dd_dropf))))
|
||
|
print(np.max(np.abs(dd_dropf - transf_f_df.dot_right(dd_full))))
|
||
|
print(np.max(np.abs(params_df_df
|
||
|
- transf_f_df.inv_dot_left(params_df_f))))
|
||
|
np.max(np.abs(params_f_df
|
||
|
- transf_f_df.inv_dot_left(params_f_f)))
|
||
|
|
||
|
prodlab, C1, C1lab, C2, C2lab,_ = contrast_product(v1name, v2name)
|
||
|
|
||
|
print('\ntvalues for no effect of factor 1')
|
||
|
print('each test is conditional on a level of factor 2')
|
||
|
print(C1lab)
|
||
|
print(resols_dropf_full.t_test(C1).tvalue)
|
||
|
|
||
|
print('\ntvalues for no effect of factor 2')
|
||
|
print('each test is conditional on a level of factor 1')
|
||
|
print(C2lab)
|
||
|
print(resols_dropf_full.t_test(C2).tvalue)
|
||
|
|
||
|
#covariance matrix of restrictions C2, note: orthogonal
|
||
|
resols_dropf_full.cov_params(C2)
|
||
|
|
||
|
#testing for no interaction effect
|
||
|
R_noint = np.hstack((np.zeros((2,4)), np.eye(2)))
|
||
|
inter_direct = resols_full_dropf.tvalues[-2:]
|
||
|
inter_transf = resols_full_full.t_test(transf_f_df.inv_dot_right(R_noint)).tvalue
|
||
|
print(np.max(np.abs(inter_direct - inter_transf)))
|
||
|
|
||
|
#now with class version
|
||
|
tw = TwoWay(ydata_dropf, x1, x2)
|
||
|
print(tw.ttest_interaction().tvalue)
|
||
|
print(tw.ttest_interaction().pvalue)
|
||
|
print(tw.ftest_interaction().fvalue)
|
||
|
print(tw.ftest_interaction().pvalue)
|
||
|
print(tw.ttest_conditional_effect(1)[0].tvalue)
|
||
|
print(tw.ttest_conditional_effect(2)[0].tvalue)
|
||
|
print(tw.summary_coeff())
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
''' documentation for early examples while developing - some have changed already
|
||
|
>>> y = np.arange(12)
|
||
|
>>> y
|
||
|
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
|
||
|
>>> x1 = np.arange(12)//4
|
||
|
>>> x1
|
||
|
array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2])
|
||
|
>>> x2 = np.arange(12)//2%2
|
||
|
>>> x2
|
||
|
array([0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1])
|
||
|
|
||
|
>>> d1 = dummy_1d(x1)
|
||
|
>>> d1
|
||
|
array([[1, 0, 0],
|
||
|
[1, 0, 0],
|
||
|
[1, 0, 0],
|
||
|
[1, 0, 0],
|
||
|
[0, 1, 0],
|
||
|
[0, 1, 0],
|
||
|
[0, 1, 0],
|
||
|
[0, 1, 0],
|
||
|
[0, 0, 1],
|
||
|
[0, 0, 1],
|
||
|
[0, 0, 1],
|
||
|
[0, 0, 1]])
|
||
|
|
||
|
>>> d2 = dummy_1d(x2)
|
||
|
>>> d2
|
||
|
array([[1, 0],
|
||
|
[1, 0],
|
||
|
[0, 1],
|
||
|
[0, 1],
|
||
|
[1, 0],
|
||
|
[1, 0],
|
||
|
[0, 1],
|
||
|
[0, 1],
|
||
|
[1, 0],
|
||
|
[1, 0],
|
||
|
[0, 1],
|
||
|
[0, 1]])
|
||
|
|
||
|
>>> d12 = dummy_product(d1, d2)
|
||
|
>>> d12
|
||
|
array([[1, 0, 0, 0, 0, 0],
|
||
|
[1, 0, 0, 0, 0, 0],
|
||
|
[0, 1, 0, 0, 0, 0],
|
||
|
[0, 1, 0, 0, 0, 0],
|
||
|
[0, 0, 1, 0, 0, 0],
|
||
|
[0, 0, 1, 0, 0, 0],
|
||
|
[0, 0, 0, 1, 0, 0],
|
||
|
[0, 0, 0, 1, 0, 0],
|
||
|
[0, 0, 0, 0, 1, 0],
|
||
|
[0, 0, 0, 0, 1, 0],
|
||
|
[0, 0, 0, 0, 0, 1],
|
||
|
[0, 0, 0, 0, 0, 1]])
|
||
|
|
||
|
|
||
|
>>> d12rl = dummy_product(d1[:,:-1], d2[:,:-1])
|
||
|
>>> np.column_stack((np.ones(d1.shape[0]), d1[:,:-1], d2[:,:-1],d12rl))
|
||
|
array([[ 1., 1., 0., 1., 1., 0.],
|
||
|
[ 1., 1., 0., 1., 1., 0.],
|
||
|
[ 1., 1., 0., 0., 0., 0.],
|
||
|
[ 1., 1., 0., 0., 0., 0.],
|
||
|
[ 1., 0., 1., 1., 0., 1.],
|
||
|
[ 1., 0., 1., 1., 0., 1.],
|
||
|
[ 1., 0., 1., 0., 0., 0.],
|
||
|
[ 1., 0., 1., 0., 0., 0.],
|
||
|
[ 1., 0., 0., 1., 0., 0.],
|
||
|
[ 1., 0., 0., 1., 0., 0.],
|
||
|
[ 1., 0., 0., 0., 0., 0.],
|
||
|
[ 1., 0., 0., 0., 0., 0.]])
|
||
|
'''
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
#nprod = ['%s_%s' % (i,j) for i in ['a0', 'a1', 'a2'] for j in ['b0', 'b1']]
|
||
|
#>>> [''.join(['%s%s' % (signstr(c),v) for c,v in zip(row, nprod) if c != 0])
|
||
|
# for row in np.kron(dd[1:], np.eye(2))]
|
||
|
|
||
|
|
||
|
|
||
|
'''
|
||
|
>>> nprod = ['%s_%s' % (i,j) for i in ['a0', 'a1', 'a2'] for j in ['b0', 'b1']]
|
||
|
>>> nprod
|
||
|
['a0_b0', 'a0_b1', 'a1_b0', 'a1_b1', 'a2_b0', 'a2_b1']
|
||
|
>>> [''.join(['%s%s' % (signstr(c),v) for c,v in zip(row, nprod) if c != 0]) for row in np.kron(dd[1:], np.eye(2))]
|
||
|
['-a0b0+a1b0', '-a0b1+a1b1', '-a0b0+a2b0', '-a0b1+a2b1']
|
||
|
>>> [''.join(['%s%s' % (signstr(c),v) for c,v in zip(row, nprod)[::-1] if c != 0]) for row in np.kron(dd[1:], np.eye(2))]
|
||
|
['+a1_b0-a0_b0', '+a1_b1-a0_b1', '+a2_b0-a0_b0', '+a2_b1-a0_b1']
|
||
|
|
||
|
>>> np.r_[[[1,0,0,0,0]],contrast_all_one(5)]
|
||
|
array([[ 1., 0., 0., 0., 0.],
|
||
|
[ 1., -1., 0., 0., 0.],
|
||
|
[ 1., 0., -1., 0., 0.],
|
||
|
[ 1., 0., 0., -1., 0.],
|
||
|
[ 1., 0., 0., 0., -1.]])
|
||
|
|
||
|
>>> idxprod = [(i,j) for i in range(3) for j in range(2)]
|
||
|
>>> idxprod
|
||
|
[(0, 0), (0, 1), (1, 0), (1, 1), (2, 0), (2, 1)]
|
||
|
>>> np.array(idxprod).reshape(2,3,2,order='F')[:,:,0]
|
||
|
array([[0, 1, 2],
|
||
|
[0, 1, 2]])
|
||
|
>>> np.array(idxprod).reshape(2,3,2,order='F')[:,:,1]
|
||
|
array([[0, 0, 0],
|
||
|
[1, 1, 1]])
|
||
|
>>> dd3_ = np.r_[[[0,0,0]],contrast_all_one(3)]
|
||
|
|
||
|
|
||
|
|
||
|
pairwise contrasts and reparameterization
|
||
|
|
||
|
dd = np.r_[[[1,0,0,0,0]],-contrast_all_one(5)]
|
||
|
>>> dd
|
||
|
array([[ 1., 0., 0., 0., 0.],
|
||
|
[-1., 1., 0., 0., 0.],
|
||
|
[-1., 0., 1., 0., 0.],
|
||
|
[-1., 0., 0., 1., 0.],
|
||
|
[-1., 0., 0., 0., 1.]])
|
||
|
>>> np.dot(dd.T, np.arange(5))
|
||
|
array([-10., 1., 2., 3., 4.])
|
||
|
>>> np.round(np.linalg.inv(dd.T)).astype(int)
|
||
|
array([[1, 1, 1, 1, 1],
|
||
|
[0, 1, 0, 0, 0],
|
||
|
[0, 0, 1, 0, 0],
|
||
|
[0, 0, 0, 1, 0],
|
||
|
[0, 0, 0, 0, 1]])
|
||
|
>>> np.round(np.linalg.inv(dd)).astype(int)
|
||
|
array([[1, 0, 0, 0, 0],
|
||
|
[1, 1, 0, 0, 0],
|
||
|
[1, 0, 1, 0, 0],
|
||
|
[1, 0, 0, 1, 0],
|
||
|
[1, 0, 0, 0, 1]])
|
||
|
>>> dd
|
||
|
array([[ 1., 0., 0., 0., 0.],
|
||
|
[-1., 1., 0., 0., 0.],
|
||
|
[-1., 0., 1., 0., 0.],
|
||
|
[-1., 0., 0., 1., 0.],
|
||
|
[-1., 0., 0., 0., 1.]])
|
||
|
>>> ddinv=np.round(np.linalg.inv(dd.T)).astype(int)
|
||
|
>>> np.dot(ddinv, np.arange(5))
|
||
|
array([10, 1, 2, 3, 4])
|
||
|
>>> np.dot(dd, np.arange(5))
|
||
|
array([ 0., 1., 2., 3., 4.])
|
||
|
>>> np.dot(dd, 5+np.arange(5))
|
||
|
array([ 5., 1., 2., 3., 4.])
|
||
|
>>> ddinv2 = np.round(np.linalg.inv(dd)).astype(int)
|
||
|
>>> np.dot(ddinv2, np.arange(5))
|
||
|
array([0, 1, 2, 3, 4])
|
||
|
>>> np.dot(ddinv2, 5+np.arange(5))
|
||
|
array([ 5, 11, 12, 13, 14])
|
||
|
>>> np.dot(ddinv2, [5, 0, 0 , 1, 2])
|
||
|
array([5, 5, 5, 6, 7])
|
||
|
>>> np.dot(ddinv2, dd)
|
||
|
array([[ 1., 0., 0., 0., 0.],
|
||
|
[ 0., 1., 0., 0., 0.],
|
||
|
[ 0., 0., 1., 0., 0.],
|
||
|
[ 0., 0., 0., 1., 0.],
|
||
|
[ 0., 0., 0., 0., 1.]])
|
||
|
|
||
|
|
||
|
|
||
|
>>> dd3 = -np.r_[[[1,0,0]],contrast_all_one(3)]
|
||
|
>>> dd2 = -np.r_[[[1,0]],contrast_all_one(2)]
|
||
|
>>> np.kron(np.eye(3), dd2)
|
||
|
array([[-1., 0., 0., 0., 0., 0.],
|
||
|
[-1., 1., 0., 0., 0., 0.],
|
||
|
[ 0., 0., -1., 0., 0., 0.],
|
||
|
[ 0., 0., -1., 1., 0., 0.],
|
||
|
[ 0., 0., 0., 0., -1., 0.],
|
||
|
[ 0., 0., 0., 0., -1., 1.]])
|
||
|
>>> dd2
|
||
|
array([[-1., 0.],
|
||
|
[-1., 1.]])
|
||
|
>>> np.kron(np.eye(3), dd2[1:])
|
||
|
array([[-1., 1., 0., 0., 0., 0.],
|
||
|
[ 0., 0., -1., 1., 0., 0.],
|
||
|
[ 0., 0., 0., 0., -1., 1.]])
|
||
|
>>> np.kron(dd[1:], np.eye(2))
|
||
|
array([[-1., 0., 1., 0., 0., 0.],
|
||
|
[ 0., -1., 0., 1., 0., 0.],
|
||
|
[-1., 0., 0., 0., 1., 0.],
|
||
|
[ 0., -1., 0., 0., 0., 1.]])
|
||
|
|
||
|
|
||
|
|
||
|
d_ = np.r_[[[1,0,0,0,0]],contrast_all_one(5)]
|
||
|
>>> d_
|
||
|
array([[ 1., 0., 0., 0., 0.],
|
||
|
[ 1., -1., 0., 0., 0.],
|
||
|
[ 1., 0., -1., 0., 0.],
|
||
|
[ 1., 0., 0., -1., 0.],
|
||
|
[ 1., 0., 0., 0., -1.]])
|
||
|
>>> np.round(np.linalg.pinv(d_)).astype(int)
|
||
|
array([[ 1, 0, 0, 0, 0],
|
||
|
[ 1, -1, 0, 0, 0],
|
||
|
[ 1, 0, -1, 0, 0],
|
||
|
[ 1, 0, 0, -1, 0],
|
||
|
[ 1, 0, 0, 0, -1]])
|
||
|
>>> np.linalg.inv(d_).astype(int)
|
||
|
array([[ 1, 0, 0, 0, 0],
|
||
|
[ 1, -1, 0, 0, 0],
|
||
|
[ 1, 0, -1, 0, 0],
|
||
|
[ 1, 0, 0, -1, 0],
|
||
|
[ 1, 0, 0, 0, -1]])
|
||
|
|
||
|
|
||
|
group means
|
||
|
|
||
|
>>> sli = [slice(None)] + [None]*(3-2) + [slice(None)]
|
||
|
>>> (np.column_stack((y, x1, x2))[...,None] * d1[sli]).sum(0)*1./d1.sum(0)
|
||
|
array([[ 1.5, 5.5, 9.5],
|
||
|
[ 0. , 1. , 2. ],
|
||
|
[ 0.5, 0.5, 0.5]])
|
||
|
|
||
|
>>> [(z[:,None] * d1).sum(0)*1./d1.sum(0) for z in np.column_stack((y, x1, x2)).T]
|
||
|
[array([ 1.5, 5.5, 9.5]), array([ 0., 1., 2.]), array([ 0.5, 0.5, 0.5])]
|
||
|
>>>
|
||
|
|
||
|
'''
|