1189 lines
38 KiB
Python
1189 lines
38 KiB
Python
|
r"""
|
||
|
Bayesian inference for generalized linear mixed models.
|
||
|
|
||
|
Currently only families without additional scale or shape parameters
|
||
|
are supported (binomial and Poisson).
|
||
|
|
||
|
Two estimation approaches are supported: Laplace approximation
|
||
|
('maximum a posteriori'), and variational Bayes (mean field
|
||
|
approximation to the posterior distribution).
|
||
|
|
||
|
All realizations of random effects are modeled to be mutually
|
||
|
independent in this implementation.
|
||
|
|
||
|
The `exog_vc` matrix is the design matrix for the random effects.
|
||
|
Every column of `exog_vc` corresponds to an independent realization of
|
||
|
a random effect. These random effects have mean zero and an unknown
|
||
|
standard deviation. The standard deviation parameters are constrained
|
||
|
to be equal within subsets of the columns. When not using formulas,
|
||
|
these subsets are specified through the parameter `ident`. `ident`
|
||
|
must have the same length as the number of columns of `exog_vc`, and
|
||
|
two columns whose `ident` values are equal have the same standard
|
||
|
deviation. When formulas are used, the columns of `exog_vc` derived
|
||
|
from a common formula are constrained to have the same standard
|
||
|
deviation.
|
||
|
|
||
|
In many applications, `exog_vc` will be sparse. A sparse matrix may
|
||
|
be passed when constructing a model class. If a dense matrix is
|
||
|
passed, it will be converted internally to a sparse matrix. There
|
||
|
currently is no way to avoid creating a temporary dense version of
|
||
|
`exog_vc` when using formulas.
|
||
|
|
||
|
Model and parameterization
|
||
|
--------------------------
|
||
|
The joint density of data and parameters factors as:
|
||
|
|
||
|
.. math::
|
||
|
|
||
|
p(y | vc, fep) p(vc | vcp) p(vcp) p(fe)
|
||
|
|
||
|
The terms :math:`p(vcp)` and :math:`p(fe)` are prior distributions
|
||
|
that are taken to be Gaussian (the :math:`vcp` parameters are log
|
||
|
standard deviations so the standard deviations have log-normal
|
||
|
distributions). The random effects distribution :math:`p(vc | vcp)`
|
||
|
is independent Gaussian (random effect realizations are independent
|
||
|
within and between values of the `ident` array). The model
|
||
|
:math:`p(y | vc, fep)` depends on the specific GLM being fit.
|
||
|
"""
|
||
|
|
||
|
import numpy as np
|
||
|
from scipy.optimize import minimize
|
||
|
from scipy import sparse
|
||
|
import statsmodels.base.model as base
|
||
|
from statsmodels.iolib import summary2
|
||
|
from statsmodels.genmod import families
|
||
|
import pandas as pd
|
||
|
import warnings
|
||
|
import patsy
|
||
|
|
||
|
# Gauss-Legendre weights
|
||
|
glw = [
|
||
|
[0.2955242247147529, -0.1488743389816312],
|
||
|
[0.2955242247147529, 0.1488743389816312],
|
||
|
[0.2692667193099963, -0.4333953941292472],
|
||
|
[0.2692667193099963, 0.4333953941292472],
|
||
|
[0.2190863625159820, -0.6794095682990244],
|
||
|
[0.2190863625159820, 0.6794095682990244],
|
||
|
[0.1494513491505806, -0.8650633666889845],
|
||
|
[0.1494513491505806, 0.8650633666889845],
|
||
|
[0.0666713443086881, -0.9739065285171717],
|
||
|
[0.0666713443086881, 0.9739065285171717],
|
||
|
]
|
||
|
|
||
|
_init_doc = r"""
|
||
|
Generalized Linear Mixed Model with Bayesian estimation
|
||
|
|
||
|
The class implements the Laplace approximation to the posterior
|
||
|
distribution (`fit_map`) and a variational Bayes approximation to
|
||
|
the posterior (`fit_vb`). See the two fit method docstrings for
|
||
|
more information about the fitting approaches.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
endog : array_like
|
||
|
Vector of response values.
|
||
|
exog : array_like
|
||
|
Array of covariates for the fixed effects part of the mean
|
||
|
structure.
|
||
|
exog_vc : array_like
|
||
|
Array of covariates for the random part of the model. A
|
||
|
scipy.sparse array may be provided, or else the passed
|
||
|
array will be converted to sparse internally.
|
||
|
ident : array_like
|
||
|
Array of integer labels showing which random terms (columns
|
||
|
of `exog_vc`) have a common variance.
|
||
|
vcp_p : float
|
||
|
Prior standard deviation for variance component parameters
|
||
|
(the prior standard deviation of log(s) is vcp_p, where s is
|
||
|
the standard deviation of a random effect).
|
||
|
fe_p : float
|
||
|
Prior standard deviation for fixed effects parameters.
|
||
|
family : statsmodels.genmod.families instance
|
||
|
The GLM family.
|
||
|
fep_names : list[str]
|
||
|
The names of the fixed effects parameters (corresponding to
|
||
|
columns of exog). If None, default names are constructed.
|
||
|
vcp_names : list[str]
|
||
|
The names of the variance component parameters (corresponding
|
||
|
to distinct labels in ident). If None, default names are
|
||
|
constructed.
|
||
|
vc_names : list[str]
|
||
|
The names of the random effect realizations.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
MixedGLMResults object
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
There are three types of values in the posterior distribution:
|
||
|
fixed effects parameters (fep), corresponding to the columns of
|
||
|
`exog`, random effects realizations (vc), corresponding to the
|
||
|
columns of `exog_vc`, and the standard deviations of the random
|
||
|
effects realizations (vcp), corresponding to the unique integer
|
||
|
labels in `ident`.
|
||
|
|
||
|
All random effects are modeled as being independent Gaussian
|
||
|
values (given the variance structure parameters). Every column of
|
||
|
`exog_vc` has a distinct realized random effect that is used to
|
||
|
form the linear predictors. The elements of `ident` determine the
|
||
|
distinct variance structure parameters. Two random effect
|
||
|
realizations that have the same value in `ident` have the same
|
||
|
variance. When fitting with a formula, `ident` is constructed
|
||
|
internally (each element of `vc_formulas` yields a distinct label
|
||
|
in `ident`).
|
||
|
|
||
|
The random effect standard deviation parameters (`vcp`) have
|
||
|
log-normal prior distributions with mean 0 and standard deviation
|
||
|
`vcp_p`.
|
||
|
|
||
|
Note that for some families, e.g. Binomial, the posterior mode may
|
||
|
be difficult to find numerically if `vcp_p` is set to too large of
|
||
|
a value. Setting `vcp_p` to 0.5 seems to work well.
|
||
|
|
||
|
The prior for the fixed effects parameters is Gaussian with mean 0
|
||
|
and standard deviation `fe_p`. It is recommended that quantitative
|
||
|
covariates be standardized.
|
||
|
|
||
|
Examples
|
||
|
--------{example}
|
||
|
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
Introduction to generalized linear mixed models:
|
||
|
https://stats.idre.ucla.edu/other/mult-pkg/introduction-to-generalized-linear-mixed-models
|
||
|
|
||
|
SAS documentation:
|
||
|
https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_intromix_a0000000215.htm
|
||
|
|
||
|
An assessment of estimation methods for generalized linear mixed
|
||
|
models with binary outcomes
|
||
|
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3866838/
|
||
|
"""
|
||
|
|
||
|
# The code in the example should be identical to what appears in
|
||
|
# the test_doc_examples unit test
|
||
|
_logit_example = """
|
||
|
A binomial (logistic) random effects model with random intercepts
|
||
|
for villages and random slopes for each year within each village:
|
||
|
|
||
|
>>> random = {"a": '0 + C(Village)', "b": '0 + C(Village)*year_cen'}
|
||
|
>>> model = BinomialBayesMixedGLM.from_formula(
|
||
|
'y ~ year_cen', random, data)
|
||
|
>>> result = model.fit_vb()
|
||
|
"""
|
||
|
|
||
|
# The code in the example should be identical to what appears in
|
||
|
# the test_doc_examples unit test
|
||
|
_poisson_example = """
|
||
|
A Poisson random effects model with random intercepts for villages
|
||
|
and random slopes for each year within each village:
|
||
|
|
||
|
>>> random = {"a": '0 + C(Village)', "b": '0 + C(Village)*year_cen'}
|
||
|
>>> model = PoissonBayesMixedGLM.from_formula(
|
||
|
'y ~ year_cen', random, data)
|
||
|
>>> result = model.fit_vb()
|
||
|
"""
|
||
|
|
||
|
|
||
|
class _BayesMixedGLM(base.Model):
|
||
|
def __init__(self,
|
||
|
endog,
|
||
|
exog,
|
||
|
exog_vc=None,
|
||
|
ident=None,
|
||
|
family=None,
|
||
|
vcp_p=1,
|
||
|
fe_p=2,
|
||
|
fep_names=None,
|
||
|
vcp_names=None,
|
||
|
vc_names=None,
|
||
|
**kwargs):
|
||
|
|
||
|
if exog.ndim == 1:
|
||
|
if isinstance(exog, np.ndarray):
|
||
|
exog = exog[:, None]
|
||
|
else:
|
||
|
exog = pd.DataFrame(exog)
|
||
|
|
||
|
if exog.ndim != 2:
|
||
|
msg = "'exog' must have one or two columns"
|
||
|
raise ValueError(msg)
|
||
|
|
||
|
if exog_vc.ndim == 1:
|
||
|
if isinstance(exog_vc, np.ndarray):
|
||
|
exog_vc = exog_vc[:, None]
|
||
|
else:
|
||
|
exog_vc = pd.DataFrame(exog_vc)
|
||
|
|
||
|
if exog_vc.ndim != 2:
|
||
|
msg = "'exog_vc' must have one or two columns"
|
||
|
raise ValueError(msg)
|
||
|
|
||
|
ident = np.asarray(ident)
|
||
|
if ident.ndim != 1:
|
||
|
msg = "ident must be a one-dimensional array"
|
||
|
raise ValueError(msg)
|
||
|
|
||
|
if len(ident) != exog_vc.shape[1]:
|
||
|
msg = "len(ident) should match the number of columns of exog_vc"
|
||
|
raise ValueError(msg)
|
||
|
|
||
|
if not np.issubdtype(ident.dtype, np.integer):
|
||
|
msg = "ident must have an integer dtype"
|
||
|
raise ValueError(msg)
|
||
|
|
||
|
# Get the fixed effects parameter names
|
||
|
if fep_names is None:
|
||
|
if hasattr(exog, "columns"):
|
||
|
fep_names = exog.columns.tolist()
|
||
|
else:
|
||
|
fep_names = ["FE_%d" % (k + 1) for k in range(exog.shape[1])]
|
||
|
|
||
|
# Get the variance parameter names
|
||
|
if vcp_names is None:
|
||
|
vcp_names = ["VC_%d" % (k + 1) for k in range(int(max(ident)) + 1)]
|
||
|
else:
|
||
|
if len(vcp_names) != len(set(ident)):
|
||
|
msg = "The lengths of vcp_names and ident should be the same"
|
||
|
raise ValueError(msg)
|
||
|
|
||
|
if not sparse.issparse(exog_vc):
|
||
|
exog_vc = sparse.csr_matrix(exog_vc)
|
||
|
|
||
|
ident = ident.astype(int)
|
||
|
vcp_p = float(vcp_p)
|
||
|
fe_p = float(fe_p)
|
||
|
|
||
|
# Number of fixed effects parameters
|
||
|
if exog is None:
|
||
|
k_fep = 0
|
||
|
else:
|
||
|
k_fep = exog.shape[1]
|
||
|
|
||
|
# Number of variance component structure parameters and
|
||
|
# variance component realizations.
|
||
|
if exog_vc is None:
|
||
|
k_vc = 0
|
||
|
k_vcp = 0
|
||
|
else:
|
||
|
k_vc = exog_vc.shape[1]
|
||
|
k_vcp = max(ident) + 1
|
||
|
|
||
|
# power might be better but not available in older scipy
|
||
|
exog_vc2 = exog_vc.multiply(exog_vc)
|
||
|
|
||
|
super().__init__(endog, exog, **kwargs)
|
||
|
|
||
|
self.exog_vc = exog_vc
|
||
|
self.exog_vc2 = exog_vc2
|
||
|
self.ident = ident
|
||
|
self.family = family
|
||
|
self.k_fep = k_fep
|
||
|
self.k_vc = k_vc
|
||
|
self.k_vcp = k_vcp
|
||
|
self.fep_names = fep_names
|
||
|
self.vcp_names = vcp_names
|
||
|
self.vc_names = vc_names
|
||
|
self.fe_p = fe_p
|
||
|
self.vcp_p = vcp_p
|
||
|
self.names = fep_names + vcp_names
|
||
|
if vc_names is not None:
|
||
|
self.names += vc_names
|
||
|
|
||
|
def _unpack(self, vec):
|
||
|
|
||
|
ii = 0
|
||
|
|
||
|
# Fixed effects parameters
|
||
|
fep = vec[:ii + self.k_fep]
|
||
|
ii += self.k_fep
|
||
|
|
||
|
# Variance component structure parameters (standard
|
||
|
# deviations). These are on the log scale. The standard
|
||
|
# deviation for random effect j is exp(vcp[ident[j]]).
|
||
|
vcp = vec[ii:ii + self.k_vcp]
|
||
|
ii += self.k_vcp
|
||
|
|
||
|
# Random effect realizations
|
||
|
vc = vec[ii:]
|
||
|
|
||
|
return fep, vcp, vc
|
||
|
|
||
|
def logposterior(self, params):
|
||
|
"""
|
||
|
The overall log-density: log p(y, fe, vc, vcp).
|
||
|
|
||
|
This differs by an additive constant from the log posterior
|
||
|
log p(fe, vc, vcp | y).
|
||
|
"""
|
||
|
|
||
|
fep, vcp, vc = self._unpack(params)
|
||
|
|
||
|
# Contributions from p(y | x, vc)
|
||
|
lp = 0
|
||
|
if self.k_fep > 0:
|
||
|
lp += np.dot(self.exog, fep)
|
||
|
if self.k_vc > 0:
|
||
|
lp += self.exog_vc.dot(vc)
|
||
|
|
||
|
mu = self.family.link.inverse(lp)
|
||
|
ll = self.family.loglike(self.endog, mu)
|
||
|
|
||
|
if self.k_vc > 0:
|
||
|
|
||
|
# Contributions from p(vc | vcp)
|
||
|
vcp0 = vcp[self.ident]
|
||
|
s = np.exp(vcp0)
|
||
|
ll -= 0.5 * np.sum(vc**2 / s**2) + np.sum(vcp0)
|
||
|
|
||
|
# Contributions from p(vc)
|
||
|
ll -= 0.5 * np.sum(vcp**2 / self.vcp_p**2)
|
||
|
|
||
|
# Contributions from p(fep)
|
||
|
if self.k_fep > 0:
|
||
|
ll -= 0.5 * np.sum(fep**2 / self.fe_p**2)
|
||
|
|
||
|
return ll
|
||
|
|
||
|
def logposterior_grad(self, params):
|
||
|
"""
|
||
|
The gradient of the log posterior.
|
||
|
"""
|
||
|
|
||
|
fep, vcp, vc = self._unpack(params)
|
||
|
|
||
|
lp = 0
|
||
|
if self.k_fep > 0:
|
||
|
lp += np.dot(self.exog, fep)
|
||
|
if self.k_vc > 0:
|
||
|
lp += self.exog_vc.dot(vc)
|
||
|
|
||
|
mu = self.family.link.inverse(lp)
|
||
|
|
||
|
score_factor = (self.endog - mu) / self.family.link.deriv(mu)
|
||
|
score_factor /= self.family.variance(mu)
|
||
|
|
||
|
te = [None, None, None]
|
||
|
|
||
|
# Contributions from p(y | x, z, vc)
|
||
|
if self.k_fep > 0:
|
||
|
te[0] = np.dot(score_factor, self.exog)
|
||
|
if self.k_vc > 0:
|
||
|
te[2] = self.exog_vc.transpose().dot(score_factor)
|
||
|
|
||
|
if self.k_vc > 0:
|
||
|
# Contributions from p(vc | vcp)
|
||
|
# vcp0 = vcp[self.ident]
|
||
|
# s = np.exp(vcp0)
|
||
|
# ll -= 0.5 * np.sum(vc**2 / s**2) + np.sum(vcp0)
|
||
|
vcp0 = vcp[self.ident]
|
||
|
s = np.exp(vcp0)
|
||
|
u = vc**2 / s**2 - 1
|
||
|
te[1] = np.bincount(self.ident, weights=u)
|
||
|
te[2] -= vc / s**2
|
||
|
|
||
|
# Contributions from p(vcp)
|
||
|
# ll -= 0.5 * np.sum(vcp**2 / self.vcp_p**2)
|
||
|
te[1] -= vcp / self.vcp_p**2
|
||
|
|
||
|
# Contributions from p(fep)
|
||
|
if self.k_fep > 0:
|
||
|
te[0] -= fep / self.fe_p**2
|
||
|
|
||
|
te = [x for x in te if x is not None]
|
||
|
|
||
|
return np.concatenate(te)
|
||
|
|
||
|
def _get_start(self):
|
||
|
start_fep = np.zeros(self.k_fep)
|
||
|
start_vcp = np.ones(self.k_vcp)
|
||
|
start_vc = np.random.normal(size=self.k_vc)
|
||
|
start = np.concatenate((start_fep, start_vcp, start_vc))
|
||
|
return start
|
||
|
|
||
|
@classmethod
|
||
|
def from_formula(cls,
|
||
|
formula,
|
||
|
vc_formulas,
|
||
|
data,
|
||
|
family=None,
|
||
|
vcp_p=1,
|
||
|
fe_p=2):
|
||
|
"""
|
||
|
Fit a BayesMixedGLM using a formula.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
formula : str
|
||
|
Formula for the endog and fixed effects terms (use ~ to
|
||
|
separate dependent and independent expressions).
|
||
|
vc_formulas : dictionary
|
||
|
vc_formulas[name] is a one-sided formula that creates one
|
||
|
collection of random effects with a common variance
|
||
|
parameter. If using categorical (factor) variables to
|
||
|
produce variance components, note that generally `0 + ...`
|
||
|
should be used so that an intercept is not included.
|
||
|
data : data frame
|
||
|
The data to which the formulas are applied.
|
||
|
family : genmod.families instance
|
||
|
A GLM family.
|
||
|
vcp_p : float
|
||
|
The prior standard deviation for the logarithms of the standard
|
||
|
deviations of the random effects.
|
||
|
fe_p : float
|
||
|
The prior standard deviation for the fixed effects parameters.
|
||
|
"""
|
||
|
|
||
|
ident = []
|
||
|
exog_vc = []
|
||
|
vcp_names = []
|
||
|
j = 0
|
||
|
for na, fml in vc_formulas.items():
|
||
|
mat = patsy.dmatrix(fml, data, return_type='dataframe')
|
||
|
exog_vc.append(mat)
|
||
|
vcp_names.append(na)
|
||
|
ident.append(j * np.ones(mat.shape[1], dtype=np.int_))
|
||
|
j += 1
|
||
|
exog_vc = pd.concat(exog_vc, axis=1)
|
||
|
vc_names = exog_vc.columns.tolist()
|
||
|
|
||
|
ident = np.concatenate(ident)
|
||
|
|
||
|
model = super().from_formula(
|
||
|
formula,
|
||
|
data=data,
|
||
|
family=family,
|
||
|
subset=None,
|
||
|
exog_vc=exog_vc,
|
||
|
ident=ident,
|
||
|
vc_names=vc_names,
|
||
|
vcp_names=vcp_names,
|
||
|
fe_p=fe_p,
|
||
|
vcp_p=vcp_p)
|
||
|
|
||
|
return model
|
||
|
|
||
|
def fit(self, method="BFGS", minim_opts=None):
|
||
|
"""
|
||
|
fit is equivalent to fit_map.
|
||
|
|
||
|
See fit_map for parameter information.
|
||
|
|
||
|
Use `fit_vb` to fit the model using variational Bayes.
|
||
|
"""
|
||
|
self.fit_map(method, minim_opts)
|
||
|
|
||
|
def fit_map(self, method="BFGS", minim_opts=None, scale_fe=False):
|
||
|
"""
|
||
|
Construct the Laplace approximation to the posterior distribution.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
method : str
|
||
|
Optimization method for finding the posterior mode.
|
||
|
minim_opts : dict
|
||
|
Options passed to scipy.minimize.
|
||
|
scale_fe : bool
|
||
|
If True, the columns of the fixed effects design matrix
|
||
|
are centered and scaled to unit variance before fitting
|
||
|
the model. The results are back-transformed so that the
|
||
|
results are presented on the original scale.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
BayesMixedGLMResults instance.
|
||
|
"""
|
||
|
|
||
|
if scale_fe:
|
||
|
mn = self.exog.mean(0)
|
||
|
sc = self.exog.std(0)
|
||
|
self._exog_save = self.exog
|
||
|
self.exog = self.exog.copy()
|
||
|
ixs = np.flatnonzero(sc > 1e-8)
|
||
|
self.exog[:, ixs] -= mn[ixs]
|
||
|
self.exog[:, ixs] /= sc[ixs]
|
||
|
|
||
|
def fun(params):
|
||
|
return -self.logposterior(params)
|
||
|
|
||
|
def grad(params):
|
||
|
return -self.logposterior_grad(params)
|
||
|
|
||
|
start = self._get_start()
|
||
|
|
||
|
r = minimize(fun, start, method=method, jac=grad, options=minim_opts)
|
||
|
if not r.success:
|
||
|
msg = ("Laplace fitting did not converge, |gradient|=%.6f" %
|
||
|
np.sqrt(np.sum(r.jac**2)))
|
||
|
warnings.warn(msg)
|
||
|
|
||
|
from statsmodels.tools.numdiff import approx_fprime
|
||
|
hess = approx_fprime(r.x, grad)
|
||
|
cov = np.linalg.inv(hess)
|
||
|
|
||
|
params = r.x
|
||
|
|
||
|
if scale_fe:
|
||
|
self.exog = self._exog_save
|
||
|
del self._exog_save
|
||
|
params[ixs] /= sc[ixs]
|
||
|
cov[ixs, :][:, ixs] /= np.outer(sc[ixs], sc[ixs])
|
||
|
|
||
|
return BayesMixedGLMResults(self, params, cov, optim_retvals=r)
|
||
|
|
||
|
def predict(self, params, exog=None, linear=False):
|
||
|
"""
|
||
|
Return the fitted mean structure.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
params : array_like
|
||
|
The parameter vector, may be the full parameter vector, or may
|
||
|
be truncated to include only the mean parameters.
|
||
|
exog : array_like
|
||
|
The design matrix for the mean structure. If omitted, use the
|
||
|
model's design matrix.
|
||
|
linear : bool
|
||
|
If True, return the linear predictor without passing through the
|
||
|
link function.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
A 1-dimensional array of predicted values
|
||
|
"""
|
||
|
|
||
|
if exog is None:
|
||
|
exog = self.exog
|
||
|
|
||
|
q = exog.shape[1]
|
||
|
pr = np.dot(exog, params[0:q])
|
||
|
|
||
|
if not linear:
|
||
|
pr = self.family.link.inverse(pr)
|
||
|
|
||
|
return pr
|
||
|
|
||
|
|
||
|
class _VariationalBayesMixedGLM:
|
||
|
"""
|
||
|
A mixin providing generic (not family-specific) methods for
|
||
|
variational Bayes mean field fitting.
|
||
|
"""
|
||
|
|
||
|
# Integration range (from -rng to +rng). The integrals are with
|
||
|
# respect to a standard Gaussian distribution so (-5, 5) will be
|
||
|
# sufficient in many cases.
|
||
|
rng = 5
|
||
|
|
||
|
verbose = False
|
||
|
|
||
|
# Returns the mean and variance of the linear predictor under the
|
||
|
# given distribution parameters.
|
||
|
def _lp_stats(self, fep_mean, fep_sd, vc_mean, vc_sd):
|
||
|
|
||
|
tm = np.dot(self.exog, fep_mean)
|
||
|
tv = np.dot(self.exog**2, fep_sd**2)
|
||
|
tm += self.exog_vc.dot(vc_mean)
|
||
|
tv += self.exog_vc2.dot(vc_sd**2)
|
||
|
|
||
|
return tm, tv
|
||
|
|
||
|
def vb_elbo_base(self, h, tm, fep_mean, vcp_mean, vc_mean, fep_sd, vcp_sd,
|
||
|
vc_sd):
|
||
|
"""
|
||
|
Returns the evidence lower bound (ELBO) for the model.
|
||
|
|
||
|
This function calculates the family-specific ELBO function
|
||
|
based on information provided from a subclass.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
h : function mapping 1d vector to 1d vector
|
||
|
The contribution of the model to the ELBO function can be
|
||
|
expressed as y_i*lp_i + Eh_i(z), where y_i and lp_i are
|
||
|
the response and linear predictor for observation i, and z
|
||
|
is a standard normal random variable. This formulation
|
||
|
can be achieved for any GLM with a canonical link
|
||
|
function.
|
||
|
"""
|
||
|
|
||
|
# p(y | vc) contributions
|
||
|
iv = 0
|
||
|
for w in glw:
|
||
|
z = self.rng * w[1]
|
||
|
iv += w[0] * h(z) * np.exp(-z**2 / 2)
|
||
|
iv /= np.sqrt(2 * np.pi)
|
||
|
iv *= self.rng
|
||
|
iv += self.endog * tm
|
||
|
iv = iv.sum()
|
||
|
|
||
|
# p(vc | vcp) * p(vcp) * p(fep) contributions
|
||
|
iv += self._elbo_common(fep_mean, fep_sd, vcp_mean, vcp_sd, vc_mean,
|
||
|
vc_sd)
|
||
|
|
||
|
r = (iv + np.sum(np.log(fep_sd)) + np.sum(np.log(vcp_sd)) + np.sum(
|
||
|
np.log(vc_sd)))
|
||
|
|
||
|
return r
|
||
|
|
||
|
def vb_elbo_grad_base(self, h, tm, tv, fep_mean, vcp_mean, vc_mean, fep_sd,
|
||
|
vcp_sd, vc_sd):
|
||
|
"""
|
||
|
Return the gradient of the ELBO function.
|
||
|
|
||
|
See vb_elbo_base for parameters.
|
||
|
"""
|
||
|
|
||
|
fep_mean_grad = 0.
|
||
|
fep_sd_grad = 0.
|
||
|
vcp_mean_grad = 0.
|
||
|
vcp_sd_grad = 0.
|
||
|
vc_mean_grad = 0.
|
||
|
vc_sd_grad = 0.
|
||
|
|
||
|
# p(y | vc) contributions
|
||
|
for w in glw:
|
||
|
z = self.rng * w[1]
|
||
|
u = h(z) * np.exp(-z**2 / 2) / np.sqrt(2 * np.pi)
|
||
|
r = u / np.sqrt(tv)
|
||
|
fep_mean_grad += w[0] * np.dot(u, self.exog)
|
||
|
vc_mean_grad += w[0] * self.exog_vc.transpose().dot(u)
|
||
|
fep_sd_grad += w[0] * z * np.dot(r, self.exog**2 * fep_sd)
|
||
|
v = self.exog_vc2.multiply(vc_sd).transpose().dot(r)
|
||
|
v = np.squeeze(np.asarray(v))
|
||
|
vc_sd_grad += w[0] * z * v
|
||
|
|
||
|
fep_mean_grad *= self.rng
|
||
|
vc_mean_grad *= self.rng
|
||
|
fep_sd_grad *= self.rng
|
||
|
vc_sd_grad *= self.rng
|
||
|
fep_mean_grad += np.dot(self.endog, self.exog)
|
||
|
vc_mean_grad += self.exog_vc.transpose().dot(self.endog)
|
||
|
|
||
|
(fep_mean_grad_i, fep_sd_grad_i, vcp_mean_grad_i, vcp_sd_grad_i,
|
||
|
vc_mean_grad_i, vc_sd_grad_i) = self._elbo_grad_common(
|
||
|
fep_mean, fep_sd, vcp_mean, vcp_sd, vc_mean, vc_sd)
|
||
|
|
||
|
fep_mean_grad += fep_mean_grad_i
|
||
|
fep_sd_grad += fep_sd_grad_i
|
||
|
vcp_mean_grad += vcp_mean_grad_i
|
||
|
vcp_sd_grad += vcp_sd_grad_i
|
||
|
vc_mean_grad += vc_mean_grad_i
|
||
|
vc_sd_grad += vc_sd_grad_i
|
||
|
|
||
|
fep_sd_grad += 1 / fep_sd
|
||
|
vcp_sd_grad += 1 / vcp_sd
|
||
|
vc_sd_grad += 1 / vc_sd
|
||
|
|
||
|
mean_grad = np.concatenate((fep_mean_grad, vcp_mean_grad,
|
||
|
vc_mean_grad))
|
||
|
sd_grad = np.concatenate((fep_sd_grad, vcp_sd_grad, vc_sd_grad))
|
||
|
|
||
|
if self.verbose:
|
||
|
print(
|
||
|
"|G|=%f" % np.sqrt(np.sum(mean_grad**2) + np.sum(sd_grad**2)))
|
||
|
|
||
|
return mean_grad, sd_grad
|
||
|
|
||
|
def fit_vb(self,
|
||
|
mean=None,
|
||
|
sd=None,
|
||
|
fit_method="BFGS",
|
||
|
minim_opts=None,
|
||
|
scale_fe=False,
|
||
|
verbose=False):
|
||
|
"""
|
||
|
Fit a model using the variational Bayes mean field approximation.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
mean : array_like
|
||
|
Starting value for VB mean vector
|
||
|
sd : array_like
|
||
|
Starting value for VB standard deviation vector
|
||
|
fit_method : str
|
||
|
Algorithm for scipy.minimize
|
||
|
minim_opts : dict
|
||
|
Options passed to scipy.minimize
|
||
|
scale_fe : bool
|
||
|
If true, the columns of the fixed effects design matrix
|
||
|
are centered and scaled to unit variance before fitting
|
||
|
the model. The results are back-transformed so that the
|
||
|
results are presented on the original scale.
|
||
|
verbose : bool
|
||
|
If True, print the gradient norm to the screen each time
|
||
|
it is calculated.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
The goal is to find a factored Gaussian approximation
|
||
|
q1*q2*... to the posterior distribution, approximately
|
||
|
minimizing the KL divergence from the factored approximation
|
||
|
to the actual posterior. The KL divergence, or ELBO function
|
||
|
has the form
|
||
|
|
||
|
E* log p(y, fe, vcp, vc) - E* log q
|
||
|
|
||
|
where E* is expectation with respect to the product of qj.
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
Blei, Kucukelbir, McAuliffe (2017). Variational Inference: A
|
||
|
review for Statisticians
|
||
|
https://arxiv.org/pdf/1601.00670.pdf
|
||
|
"""
|
||
|
|
||
|
self.verbose = verbose
|
||
|
|
||
|
if scale_fe:
|
||
|
mn = self.exog.mean(0)
|
||
|
sc = self.exog.std(0)
|
||
|
self._exog_save = self.exog
|
||
|
self.exog = self.exog.copy()
|
||
|
ixs = np.flatnonzero(sc > 1e-8)
|
||
|
self.exog[:, ixs] -= mn[ixs]
|
||
|
self.exog[:, ixs] /= sc[ixs]
|
||
|
|
||
|
n = self.k_fep + self.k_vcp + self.k_vc
|
||
|
ml = self.k_fep + self.k_vcp + self.k_vc
|
||
|
if mean is None:
|
||
|
m = np.zeros(n)
|
||
|
else:
|
||
|
if len(mean) != ml:
|
||
|
raise ValueError(
|
||
|
"mean has incorrect length, %d != %d" % (len(mean), ml))
|
||
|
m = mean.copy()
|
||
|
if sd is None:
|
||
|
s = -0.5 + 0.1 * np.random.normal(size=n)
|
||
|
else:
|
||
|
if len(sd) != ml:
|
||
|
raise ValueError(
|
||
|
"sd has incorrect length, %d != %d" % (len(sd), ml))
|
||
|
|
||
|
# s is parametrized on the log-scale internally when
|
||
|
# optimizing the ELBO function (this is transparent to the
|
||
|
# caller)
|
||
|
s = np.log(sd)
|
||
|
|
||
|
# Do not allow the variance parameter starting mean values to
|
||
|
# be too small.
|
||
|
i1, i2 = self.k_fep, self.k_fep + self.k_vcp
|
||
|
m[i1:i2] = np.where(m[i1:i2] < -1, -1, m[i1:i2])
|
||
|
|
||
|
# Do not allow the posterior standard deviation starting values
|
||
|
# to be too small.
|
||
|
s = np.where(s < -1, -1, s)
|
||
|
|
||
|
def elbo(x):
|
||
|
n = len(x) // 2
|
||
|
return -self.vb_elbo(x[:n], np.exp(x[n:]))
|
||
|
|
||
|
def elbo_grad(x):
|
||
|
n = len(x) // 2
|
||
|
gm, gs = self.vb_elbo_grad(x[:n], np.exp(x[n:]))
|
||
|
gs *= np.exp(x[n:])
|
||
|
return -np.concatenate((gm, gs))
|
||
|
|
||
|
start = np.concatenate((m, s))
|
||
|
mm = minimize(
|
||
|
elbo, start, jac=elbo_grad, method=fit_method, options=minim_opts)
|
||
|
if not mm.success:
|
||
|
warnings.warn("VB fitting did not converge")
|
||
|
|
||
|
n = len(mm.x) // 2
|
||
|
params = mm.x[0:n]
|
||
|
va = np.exp(2 * mm.x[n:])
|
||
|
|
||
|
if scale_fe:
|
||
|
self.exog = self._exog_save
|
||
|
del self._exog_save
|
||
|
params[ixs] /= sc[ixs]
|
||
|
va[ixs] /= sc[ixs]**2
|
||
|
|
||
|
return BayesMixedGLMResults(self, params, va, mm)
|
||
|
|
||
|
# Handle terms in the ELBO that are common to all models.
|
||
|
def _elbo_common(self, fep_mean, fep_sd, vcp_mean, vcp_sd, vc_mean, vc_sd):
|
||
|
|
||
|
iv = 0
|
||
|
|
||
|
# p(vc | vcp) contributions
|
||
|
m = vcp_mean[self.ident]
|
||
|
s = vcp_sd[self.ident]
|
||
|
iv -= np.sum((vc_mean**2 + vc_sd**2) * np.exp(2 * (s**2 - m))) / 2
|
||
|
iv -= np.sum(m)
|
||
|
|
||
|
# p(vcp) contributions
|
||
|
iv -= 0.5 * (vcp_mean**2 + vcp_sd**2).sum() / self.vcp_p**2
|
||
|
|
||
|
# p(b) contributions
|
||
|
iv -= 0.5 * (fep_mean**2 + fep_sd**2).sum() / self.fe_p**2
|
||
|
|
||
|
return iv
|
||
|
|
||
|
def _elbo_grad_common(self, fep_mean, fep_sd, vcp_mean, vcp_sd, vc_mean,
|
||
|
vc_sd):
|
||
|
|
||
|
# p(vc | vcp) contributions
|
||
|
m = vcp_mean[self.ident]
|
||
|
s = vcp_sd[self.ident]
|
||
|
u = vc_mean**2 + vc_sd**2
|
||
|
ve = np.exp(2 * (s**2 - m))
|
||
|
dm = u * ve - 1
|
||
|
ds = -2 * u * ve * s
|
||
|
vcp_mean_grad = np.bincount(self.ident, weights=dm)
|
||
|
vcp_sd_grad = np.bincount(self.ident, weights=ds)
|
||
|
|
||
|
vc_mean_grad = -vc_mean.copy() * ve
|
||
|
vc_sd_grad = -vc_sd.copy() * ve
|
||
|
|
||
|
# p(vcp) contributions
|
||
|
vcp_mean_grad -= vcp_mean / self.vcp_p**2
|
||
|
vcp_sd_grad -= vcp_sd / self.vcp_p**2
|
||
|
|
||
|
# p(b) contributions
|
||
|
fep_mean_grad = -fep_mean.copy() / self.fe_p**2
|
||
|
fep_sd_grad = -fep_sd.copy() / self.fe_p**2
|
||
|
|
||
|
return (fep_mean_grad, fep_sd_grad, vcp_mean_grad, vcp_sd_grad,
|
||
|
vc_mean_grad, vc_sd_grad)
|
||
|
|
||
|
|
||
|
class BayesMixedGLMResults:
|
||
|
"""
|
||
|
Class to hold results from a Bayesian estimation of a Mixed GLM model.
|
||
|
|
||
|
Attributes
|
||
|
----------
|
||
|
fe_mean : array_like
|
||
|
Posterior mean of the fixed effects coefficients.
|
||
|
fe_sd : array_like
|
||
|
Posterior standard deviation of the fixed effects coefficients
|
||
|
vcp_mean : array_like
|
||
|
Posterior mean of the logged variance component standard
|
||
|
deviations.
|
||
|
vcp_sd : array_like
|
||
|
Posterior standard deviation of the logged variance component
|
||
|
standard deviations.
|
||
|
vc_mean : array_like
|
||
|
Posterior mean of the random coefficients
|
||
|
vc_sd : array_like
|
||
|
Posterior standard deviation of the random coefficients
|
||
|
"""
|
||
|
|
||
|
def __init__(self, model, params, cov_params, optim_retvals=None):
|
||
|
|
||
|
self.model = model
|
||
|
self.params = params
|
||
|
self._cov_params = cov_params
|
||
|
self.optim_retvals = optim_retvals
|
||
|
|
||
|
self.fe_mean, self.vcp_mean, self.vc_mean = (model._unpack(params))
|
||
|
|
||
|
if cov_params.ndim == 2:
|
||
|
cp = np.diag(cov_params)
|
||
|
else:
|
||
|
cp = cov_params
|
||
|
self.fe_sd, self.vcp_sd, self.vc_sd = model._unpack(cp)
|
||
|
self.fe_sd = np.sqrt(self.fe_sd)
|
||
|
self.vcp_sd = np.sqrt(self.vcp_sd)
|
||
|
self.vc_sd = np.sqrt(self.vc_sd)
|
||
|
|
||
|
def cov_params(self):
|
||
|
|
||
|
if hasattr(self.model.data, "frame"):
|
||
|
# Return the covariance matrix as a dataframe or series
|
||
|
na = (self.model.fep_names + self.model.vcp_names +
|
||
|
self.model.vc_names)
|
||
|
if self._cov_params.ndim == 2:
|
||
|
return pd.DataFrame(self._cov_params, index=na, columns=na)
|
||
|
else:
|
||
|
return pd.Series(self._cov_params, index=na)
|
||
|
|
||
|
# Return the covariance matrix as a ndarray
|
||
|
return self._cov_params
|
||
|
|
||
|
def summary(self):
|
||
|
|
||
|
df = pd.DataFrame()
|
||
|
m = self.model.k_fep + self.model.k_vcp
|
||
|
df["Type"] = (["M" for k in range(self.model.k_fep)] +
|
||
|
["V" for k in range(self.model.k_vcp)])
|
||
|
|
||
|
df["Post. Mean"] = self.params[0:m]
|
||
|
|
||
|
if self._cov_params.ndim == 2:
|
||
|
v = np.diag(self._cov_params)[0:m]
|
||
|
df["Post. SD"] = np.sqrt(v)
|
||
|
else:
|
||
|
df["Post. SD"] = np.sqrt(self._cov_params[0:m])
|
||
|
|
||
|
# Convert variance parameters to natural scale
|
||
|
df["SD"] = np.exp(df["Post. Mean"])
|
||
|
df["SD (LB)"] = np.exp(df["Post. Mean"] - 2 * df["Post. SD"])
|
||
|
df["SD (UB)"] = np.exp(df["Post. Mean"] + 2 * df["Post. SD"])
|
||
|
df["SD"] = ["%.3f" % x for x in df.SD]
|
||
|
df["SD (LB)"] = ["%.3f" % x for x in df["SD (LB)"]]
|
||
|
df["SD (UB)"] = ["%.3f" % x for x in df["SD (UB)"]]
|
||
|
df.loc[df.index < self.model.k_fep, "SD"] = ""
|
||
|
df.loc[df.index < self.model.k_fep, "SD (LB)"] = ""
|
||
|
df.loc[df.index < self.model.k_fep, "SD (UB)"] = ""
|
||
|
|
||
|
df.index = self.model.fep_names + self.model.vcp_names
|
||
|
|
||
|
summ = summary2.Summary()
|
||
|
summ.add_title(self.model.family.__class__.__name__ +
|
||
|
" Mixed GLM Results")
|
||
|
summ.add_df(df)
|
||
|
|
||
|
summ.add_text("Parameter types are mean structure (M) and "
|
||
|
"variance structure (V)")
|
||
|
summ.add_text("Variance parameters are modeled as log "
|
||
|
"standard deviations")
|
||
|
|
||
|
return summ
|
||
|
|
||
|
def random_effects(self, term=None):
|
||
|
"""
|
||
|
Posterior mean and standard deviation of random effects.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
term : int or None
|
||
|
If None, results for all random effects are returned. If
|
||
|
an integer, returns results for a given set of random
|
||
|
effects. The value of `term` refers to an element of the
|
||
|
`ident` vector, or to a position in the `vc_formulas`
|
||
|
list.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Data frame of posterior means and posterior standard
|
||
|
deviations of random effects.
|
||
|
"""
|
||
|
|
||
|
z = self.vc_mean
|
||
|
s = self.vc_sd
|
||
|
na = self.model.vc_names
|
||
|
|
||
|
if term is not None:
|
||
|
termix = self.model.vcp_names.index(term)
|
||
|
ii = np.flatnonzero(self.model.ident == termix)
|
||
|
z = z[ii]
|
||
|
s = s[ii]
|
||
|
na = [na[i] for i in ii]
|
||
|
|
||
|
x = pd.DataFrame({"Mean": z, "SD": s})
|
||
|
|
||
|
if na is not None:
|
||
|
x.index = na
|
||
|
|
||
|
return x
|
||
|
|
||
|
def predict(self, exog=None, linear=False):
|
||
|
"""
|
||
|
Return predicted values for the mean structure.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
exog : array_like
|
||
|
The design matrix for the mean structure. If None,
|
||
|
use the model's design matrix.
|
||
|
linear : bool
|
||
|
If True, returns the linear predictor, otherwise
|
||
|
transform the linear predictor using the link function.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
A one-dimensional array of fitted values.
|
||
|
"""
|
||
|
|
||
|
return self.model.predict(self.params, exog, linear)
|
||
|
|
||
|
|
||
|
class BinomialBayesMixedGLM(_VariationalBayesMixedGLM, _BayesMixedGLM):
|
||
|
|
||
|
__doc__ = _init_doc.format(example=_logit_example)
|
||
|
|
||
|
def __init__(self,
|
||
|
endog,
|
||
|
exog,
|
||
|
exog_vc,
|
||
|
ident,
|
||
|
vcp_p=1,
|
||
|
fe_p=2,
|
||
|
fep_names=None,
|
||
|
vcp_names=None,
|
||
|
vc_names=None):
|
||
|
|
||
|
super().__init__(
|
||
|
endog,
|
||
|
exog,
|
||
|
exog_vc=exog_vc,
|
||
|
ident=ident,
|
||
|
vcp_p=vcp_p,
|
||
|
fe_p=fe_p,
|
||
|
family=families.Binomial(),
|
||
|
fep_names=fep_names,
|
||
|
vcp_names=vcp_names,
|
||
|
vc_names=vc_names)
|
||
|
|
||
|
if not np.all(np.unique(endog) == np.r_[0, 1]):
|
||
|
msg = "endog values must be 0 and 1, and not all identical"
|
||
|
raise ValueError(msg)
|
||
|
|
||
|
@classmethod
|
||
|
def from_formula(cls, formula, vc_formulas, data, vcp_p=1, fe_p=2):
|
||
|
|
||
|
fam = families.Binomial()
|
||
|
x = _BayesMixedGLM.from_formula(
|
||
|
formula, vc_formulas, data, family=fam, vcp_p=vcp_p, fe_p=fe_p)
|
||
|
|
||
|
# Copy over to the intended class structure
|
||
|
mod = BinomialBayesMixedGLM(
|
||
|
x.endog,
|
||
|
x.exog,
|
||
|
exog_vc=x.exog_vc,
|
||
|
ident=x.ident,
|
||
|
vcp_p=x.vcp_p,
|
||
|
fe_p=x.fe_p,
|
||
|
fep_names=x.fep_names,
|
||
|
vcp_names=x.vcp_names,
|
||
|
vc_names=x.vc_names)
|
||
|
mod.data = x.data
|
||
|
|
||
|
return mod
|
||
|
|
||
|
def vb_elbo(self, vb_mean, vb_sd):
|
||
|
"""
|
||
|
Returns the evidence lower bound (ELBO) for the model.
|
||
|
"""
|
||
|
|
||
|
fep_mean, vcp_mean, vc_mean = self._unpack(vb_mean)
|
||
|
fep_sd, vcp_sd, vc_sd = self._unpack(vb_sd)
|
||
|
tm, tv = self._lp_stats(fep_mean, fep_sd, vc_mean, vc_sd)
|
||
|
|
||
|
def h(z):
|
||
|
return -np.log(1 + np.exp(tm + np.sqrt(tv) * z))
|
||
|
|
||
|
return self.vb_elbo_base(h, tm, fep_mean, vcp_mean, vc_mean, fep_sd,
|
||
|
vcp_sd, vc_sd)
|
||
|
|
||
|
def vb_elbo_grad(self, vb_mean, vb_sd):
|
||
|
"""
|
||
|
Returns the gradient of the model's evidence lower bound (ELBO).
|
||
|
"""
|
||
|
|
||
|
fep_mean, vcp_mean, vc_mean = self._unpack(vb_mean)
|
||
|
fep_sd, vcp_sd, vc_sd = self._unpack(vb_sd)
|
||
|
tm, tv = self._lp_stats(fep_mean, fep_sd, vc_mean, vc_sd)
|
||
|
|
||
|
def h(z):
|
||
|
u = tm + np.sqrt(tv) * z
|
||
|
x = np.zeros_like(u)
|
||
|
ii = np.flatnonzero(u > 0)
|
||
|
uu = u[ii]
|
||
|
x[ii] = 1 / (1 + np.exp(-uu))
|
||
|
ii = np.flatnonzero(u <= 0)
|
||
|
uu = u[ii]
|
||
|
x[ii] = np.exp(uu) / (1 + np.exp(uu))
|
||
|
return -x
|
||
|
|
||
|
return self.vb_elbo_grad_base(h, tm, tv, fep_mean, vcp_mean, vc_mean,
|
||
|
fep_sd, vcp_sd, vc_sd)
|
||
|
|
||
|
|
||
|
class PoissonBayesMixedGLM(_VariationalBayesMixedGLM, _BayesMixedGLM):
|
||
|
|
||
|
__doc__ = _init_doc.format(example=_poisson_example)
|
||
|
|
||
|
def __init__(self,
|
||
|
endog,
|
||
|
exog,
|
||
|
exog_vc,
|
||
|
ident,
|
||
|
vcp_p=1,
|
||
|
fe_p=2,
|
||
|
fep_names=None,
|
||
|
vcp_names=None,
|
||
|
vc_names=None):
|
||
|
|
||
|
super().__init__(
|
||
|
endog=endog,
|
||
|
exog=exog,
|
||
|
exog_vc=exog_vc,
|
||
|
ident=ident,
|
||
|
vcp_p=vcp_p,
|
||
|
fe_p=fe_p,
|
||
|
family=families.Poisson(),
|
||
|
fep_names=fep_names,
|
||
|
vcp_names=vcp_names,
|
||
|
vc_names=vc_names)
|
||
|
|
||
|
@classmethod
|
||
|
def from_formula(cls,
|
||
|
formula,
|
||
|
vc_formulas,
|
||
|
data,
|
||
|
vcp_p=1,
|
||
|
fe_p=2,
|
||
|
vcp_names=None,
|
||
|
vc_names=None):
|
||
|
|
||
|
fam = families.Poisson()
|
||
|
x = _BayesMixedGLM.from_formula(
|
||
|
formula,
|
||
|
vc_formulas,
|
||
|
data,
|
||
|
family=fam,
|
||
|
vcp_p=vcp_p,
|
||
|
fe_p=fe_p)
|
||
|
|
||
|
# Copy over to the intended class structure
|
||
|
mod = PoissonBayesMixedGLM(
|
||
|
endog=x.endog,
|
||
|
exog=x.exog,
|
||
|
exog_vc=x.exog_vc,
|
||
|
ident=x.ident,
|
||
|
vcp_p=x.vcp_p,
|
||
|
fe_p=x.fe_p,
|
||
|
fep_names=x.fep_names,
|
||
|
vcp_names=x.vcp_names,
|
||
|
vc_names=x.vc_names)
|
||
|
mod.data = x.data
|
||
|
|
||
|
return mod
|
||
|
|
||
|
def vb_elbo(self, vb_mean, vb_sd):
|
||
|
"""
|
||
|
Returns the evidence lower bound (ELBO) for the model.
|
||
|
"""
|
||
|
|
||
|
fep_mean, vcp_mean, vc_mean = self._unpack(vb_mean)
|
||
|
fep_sd, vcp_sd, vc_sd = self._unpack(vb_sd)
|
||
|
tm, tv = self._lp_stats(fep_mean, fep_sd, vc_mean, vc_sd)
|
||
|
|
||
|
def h(z):
|
||
|
return -np.exp(tm + np.sqrt(tv) * z)
|
||
|
|
||
|
return self.vb_elbo_base(h, tm, fep_mean, vcp_mean, vc_mean, fep_sd,
|
||
|
vcp_sd, vc_sd)
|
||
|
|
||
|
def vb_elbo_grad(self, vb_mean, vb_sd):
|
||
|
"""
|
||
|
Returns the gradient of the model's evidence lower bound (ELBO).
|
||
|
"""
|
||
|
|
||
|
fep_mean, vcp_mean, vc_mean = self._unpack(vb_mean)
|
||
|
fep_sd, vcp_sd, vc_sd = self._unpack(vb_sd)
|
||
|
tm, tv = self._lp_stats(fep_mean, fep_sd, vc_mean, vc_sd)
|
||
|
|
||
|
def h(z):
|
||
|
y = -np.exp(tm + np.sqrt(tv) * z)
|
||
|
return y
|
||
|
|
||
|
return self.vb_elbo_grad_base(h, tm, tv, fep_mean, vcp_mean, vc_mean,
|
||
|
fep_sd, vcp_sd, vc_sd)
|