846 lines
29 KiB
Python
846 lines
29 KiB
Python
"""
|
|
Created on Fri Dec 19 11:29:18 2014
|
|
|
|
Author: Josef Perktold
|
|
License: BSD-3
|
|
|
|
"""
|
|
|
|
import numpy as np
|
|
from scipy import stats
|
|
import pandas as pd
|
|
|
|
|
|
# this is similar to ContrastResults after t_test, partially copied, adjusted
|
|
class PredictionResultsBase:
|
|
"""Based class for get_prediction results
|
|
"""
|
|
|
|
def __init__(self, predicted, var_pred, func=None, deriv=None,
|
|
df=None, dist=None, row_labels=None, **kwds):
|
|
self.predicted = predicted
|
|
self.var_pred = var_pred
|
|
self.func = func
|
|
self.deriv = deriv
|
|
self.df = df
|
|
self.row_labels = row_labels
|
|
self.__dict__.update(kwds)
|
|
|
|
if dist is None or dist == 'norm':
|
|
self.dist = stats.norm
|
|
self.dist_args = ()
|
|
elif dist == 't':
|
|
self.dist = stats.t
|
|
self.dist_args = (self.df,)
|
|
else:
|
|
self.dist = dist
|
|
self.dist_args = ()
|
|
|
|
@property
|
|
def se(self):
|
|
return np.sqrt(self.var_pred)
|
|
|
|
@property
|
|
def tvalues(self):
|
|
return self.predicted / self.se
|
|
|
|
def t_test(self, value=0, alternative='two-sided'):
|
|
'''z- or t-test for hypothesis that mean is equal to value
|
|
|
|
Parameters
|
|
----------
|
|
value : array_like
|
|
value under the null hypothesis
|
|
alternative : str
|
|
'two-sided', 'larger', 'smaller'
|
|
|
|
Returns
|
|
-------
|
|
stat : ndarray
|
|
test statistic
|
|
pvalue : ndarray
|
|
p-value of the hypothesis test, the distribution is given by
|
|
the attribute of the instance, specified in `__init__`. Default
|
|
if not specified is the normal distribution.
|
|
|
|
'''
|
|
# assumes symmetric distribution
|
|
stat = (self.predicted - value) / self.se
|
|
|
|
if alternative in ['two-sided', '2-sided', '2s']:
|
|
pvalue = self.dist.sf(np.abs(stat), *self.dist_args)*2
|
|
elif alternative in ['larger', 'l']:
|
|
pvalue = self.dist.sf(stat, *self.dist_args)
|
|
elif alternative in ['smaller', 's']:
|
|
pvalue = self.dist.cdf(stat, *self.dist_args)
|
|
else:
|
|
raise ValueError('invalid alternative')
|
|
return stat, pvalue
|
|
|
|
def _conf_int_generic(self, center, se, alpha, dist_args=None):
|
|
"""internal function to avoid code duplication
|
|
"""
|
|
if dist_args is None:
|
|
dist_args = ()
|
|
|
|
q = self.dist.ppf(1 - alpha / 2., *dist_args)
|
|
lower = center - q * se
|
|
upper = center + q * se
|
|
ci = np.column_stack((lower, upper))
|
|
# if we want to stack at a new last axis, for lower.ndim > 1
|
|
# np.concatenate((lower[..., None], upper[..., None]), axis=-1)
|
|
return ci
|
|
|
|
def conf_int(self, *, alpha=0.05, **kwds):
|
|
"""Confidence interval for the predicted value.
|
|
|
|
Parameters
|
|
----------
|
|
alpha : float, optional
|
|
The significance level for the confidence interval.
|
|
ie., The default `alpha` = .05 returns a 95% confidence interval.
|
|
|
|
kwds : extra keyword arguments
|
|
Ignored in base class, only for compatibility, consistent signature
|
|
with subclasses
|
|
|
|
Returns
|
|
-------
|
|
ci : ndarray, (k_constraints, 2)
|
|
The array has the lower and the upper limit of the confidence
|
|
interval in the columns.
|
|
"""
|
|
|
|
ci = self._conf_int_generic(self.predicted, self.se, alpha,
|
|
dist_args=self.dist_args)
|
|
return ci
|
|
|
|
def summary_frame(self, alpha=0.05):
|
|
"""Summary frame
|
|
|
|
Parameters
|
|
----------
|
|
alpha : float, optional
|
|
The significance level for the confidence interval.
|
|
ie., The default `alpha` = .05 returns a 95% confidence interval.
|
|
|
|
Returns
|
|
-------
|
|
pandas DataFrame with columns 'predicted', 'se', 'ci_lower', 'ci_upper'
|
|
"""
|
|
ci = self.conf_int(alpha=alpha)
|
|
to_include = {}
|
|
to_include['predicted'] = self.predicted
|
|
to_include['se'] = self.se
|
|
to_include['ci_lower'] = ci[:, 0]
|
|
to_include['ci_upper'] = ci[:, 1]
|
|
|
|
self.table = to_include
|
|
# pandas dict does not handle 2d_array
|
|
# data = np.column_stack(list(to_include.values()))
|
|
# names = ....
|
|
res = pd.DataFrame(to_include, index=self.row_labels,
|
|
columns=to_include.keys())
|
|
return res
|
|
|
|
|
|
class PredictionResultsMonotonic(PredictionResultsBase):
|
|
|
|
def __init__(self, predicted, var_pred, linpred=None, linpred_se=None,
|
|
func=None, deriv=None, df=None, dist=None, row_labels=None):
|
|
# TODO: is var_resid used? drop from arguments?
|
|
self.predicted = predicted
|
|
self.var_pred = var_pred
|
|
self.linpred = linpred
|
|
self.linpred_se = linpred_se
|
|
self.func = func
|
|
self.deriv = deriv
|
|
self.df = df
|
|
self.row_labels = row_labels
|
|
|
|
if dist is None or dist == 'norm':
|
|
self.dist = stats.norm
|
|
self.dist_args = ()
|
|
elif dist == 't':
|
|
self.dist = stats.t
|
|
self.dist_args = (self.df,)
|
|
else:
|
|
self.dist = dist
|
|
self.dist_args = ()
|
|
|
|
def _conf_int_generic(self, center, se, alpha, dist_args=None):
|
|
"""internal function to avoid code duplication
|
|
"""
|
|
if dist_args is None:
|
|
dist_args = ()
|
|
|
|
q = self.dist.ppf(1 - alpha / 2., *dist_args)
|
|
lower = center - q * se
|
|
upper = center + q * se
|
|
ci = np.column_stack((lower, upper))
|
|
# if we want to stack at a new last axis, for lower.ndim > 1
|
|
# np.concatenate((lower[..., None], upper[..., None]), axis=-1)
|
|
return ci
|
|
|
|
def conf_int(self, method='endpoint', alpha=0.05, **kwds):
|
|
"""Confidence interval for the predicted value.
|
|
|
|
This is currently only available for t and z tests.
|
|
|
|
Parameters
|
|
----------
|
|
method : {"endpoint", "delta"}
|
|
Method for confidence interval, "m
|
|
If method is "endpoint", then the confidence interval of the
|
|
linear predictor is transformed by the prediction function.
|
|
If method is "delta", then the delta-method is used. The confidence
|
|
interval in this case might reach outside the range of the
|
|
prediction, for example probabilities larger than one or smaller
|
|
than zero.
|
|
alpha : float, optional
|
|
The significance level for the confidence interval.
|
|
ie., The default `alpha` = .05 returns a 95% confidence interval.
|
|
kwds : extra keyword arguments
|
|
currently ignored, only for compatibility, consistent signature
|
|
|
|
Returns
|
|
-------
|
|
ci : ndarray, (k_constraints, 2)
|
|
The array has the lower and the upper limit of the confidence
|
|
interval in the columns.
|
|
"""
|
|
tmp = np.linspace(0, 1, 6)
|
|
# TODO: drop check?
|
|
is_linear = (self.func(tmp) == tmp).all()
|
|
if method == 'endpoint' and not is_linear:
|
|
ci_linear = self._conf_int_generic(self.linpred, self.linpred_se,
|
|
alpha,
|
|
dist_args=self.dist_args)
|
|
ci = self.func(ci_linear)
|
|
elif method == 'delta' or is_linear:
|
|
ci = self._conf_int_generic(self.predicted, self.se, alpha,
|
|
dist_args=self.dist_args)
|
|
|
|
return ci
|
|
|
|
|
|
class PredictionResultsDelta(PredictionResultsBase):
|
|
"""Prediction results based on delta method
|
|
"""
|
|
|
|
def __init__(self, results_delta, **kwds):
|
|
|
|
predicted = results_delta.predicted()
|
|
var_pred = results_delta.var()
|
|
|
|
super().__init__(predicted, var_pred, **kwds)
|
|
|
|
|
|
class PredictionResultsMean(PredictionResultsBase):
|
|
"""Prediction results for GLM.
|
|
|
|
This results class is used for backwards compatibility for
|
|
`get_prediction` with GLM. The new PredictionResults classes dropped the
|
|
`_mean` post fix in the attribute names.
|
|
"""
|
|
|
|
def __init__(self, predicted_mean, var_pred_mean, var_resid=None,
|
|
df=None, dist=None, row_labels=None, linpred=None, link=None):
|
|
# TODO: is var_resid used? drop from arguments?
|
|
self.predicted = predicted_mean
|
|
self.var_pred = var_pred_mean
|
|
self.df = df
|
|
self.var_resid = var_resid
|
|
self.row_labels = row_labels
|
|
self.linpred = linpred
|
|
self.link = link
|
|
|
|
if dist is None or dist == 'norm':
|
|
self.dist = stats.norm
|
|
self.dist_args = ()
|
|
elif dist == 't':
|
|
self.dist = stats.t
|
|
self.dist_args = (self.df,)
|
|
else:
|
|
self.dist = dist
|
|
self.dist_args = ()
|
|
|
|
@property
|
|
def predicted_mean(self):
|
|
# alias for backwards compatibility
|
|
return self.predicted
|
|
|
|
@property
|
|
def var_pred_mean(self):
|
|
# alias for backwards compatibility
|
|
return self.var_pred
|
|
|
|
@property
|
|
def se_mean(self):
|
|
# alias for backwards compatibility
|
|
return self.se
|
|
|
|
def conf_int(self, method='endpoint', alpha=0.05, **kwds):
|
|
"""Confidence interval for the predicted value.
|
|
|
|
This is currently only available for t and z tests.
|
|
|
|
Parameters
|
|
----------
|
|
method : {"endpoint", "delta"}
|
|
Method for confidence interval, "m
|
|
If method is "endpoint", then the confidence interval of the
|
|
linear predictor is transformed by the prediction function.
|
|
If method is "delta", then the delta-method is used. The confidence
|
|
interval in this case might reach outside the range of the
|
|
prediction, for example probabilities larger than one or smaller
|
|
than zero.
|
|
alpha : float, optional
|
|
The significance level for the confidence interval.
|
|
ie., The default `alpha` = .05 returns a 95% confidence interval.
|
|
kwds : extra keyword arguments
|
|
currently ignored, only for compatibility, consistent signature
|
|
|
|
Returns
|
|
-------
|
|
ci : ndarray, (k_constraints, 2)
|
|
The array has the lower and the upper limit of the confidence
|
|
interval in the columns.
|
|
"""
|
|
tmp = np.linspace(0, 1, 6)
|
|
is_linear = (self.link.inverse(tmp) == tmp).all()
|
|
if method == 'endpoint' and not is_linear:
|
|
ci_linear = self.linpred.conf_int(alpha=alpha, obs=False)
|
|
ci = self.link.inverse(ci_linear)
|
|
elif method == 'delta' or is_linear:
|
|
se = self.se_mean
|
|
q = self.dist.ppf(1 - alpha / 2., *self.dist_args)
|
|
lower = self.predicted_mean - q * se
|
|
upper = self.predicted_mean + q * se
|
|
ci = np.column_stack((lower, upper))
|
|
# if we want to stack at a new last axis, for lower.ndim > 1
|
|
# np.concatenate((lower[..., None], upper[..., None]), axis=-1)
|
|
|
|
return ci
|
|
|
|
def summary_frame(self, alpha=0.05):
|
|
"""Summary frame
|
|
|
|
Parameters
|
|
----------
|
|
alpha : float, optional
|
|
The significance level for the confidence interval.
|
|
ie., The default `alpha` = .05 returns a 95% confidence interval.
|
|
|
|
Returns
|
|
-------
|
|
pandas DataFrame with columns
|
|
'mean', 'mean_se', 'mean_ci_lower', 'mean_ci_upper'.
|
|
"""
|
|
# TODO: finish and cleanup
|
|
ci_mean = self.conf_int(alpha=alpha)
|
|
to_include = {}
|
|
to_include['mean'] = self.predicted_mean
|
|
to_include['mean_se'] = self.se_mean
|
|
to_include['mean_ci_lower'] = ci_mean[:, 0]
|
|
to_include['mean_ci_upper'] = ci_mean[:, 1]
|
|
|
|
self.table = to_include
|
|
# pandas dict does not handle 2d_array
|
|
# data = np.column_stack(list(to_include.values()))
|
|
# names = ....
|
|
res = pd.DataFrame(to_include, index=self.row_labels,
|
|
columns=to_include.keys())
|
|
return res
|
|
|
|
|
|
def _get_exog_predict(self, exog=None, transform=True, row_labels=None):
|
|
"""Prepare or transform exog for prediction
|
|
|
|
Parameters
|
|
----------
|
|
exog : array_like, optional
|
|
The values for which you want to predict.
|
|
transform : bool, optional
|
|
If the model was fit via a formula, do you want to pass
|
|
exog through the formula. Default is True. E.g., if you fit
|
|
a model y ~ log(x1) + log(x2), and transform is True, then
|
|
you can pass a data structure that contains x1 and x2 in
|
|
their original form. Otherwise, you'd need to log the data
|
|
first.
|
|
row_labels : list of str or None
|
|
If row_lables are provided, then they will replace the generated
|
|
labels.
|
|
|
|
Returns
|
|
-------
|
|
exog : ndarray
|
|
Prediction exog
|
|
row_labels : list of str
|
|
Labels or pandas index for rows of prediction
|
|
"""
|
|
|
|
# prepare exog and row_labels, based on base Results.predict
|
|
if transform and hasattr(self.model, 'formula') and exog is not None:
|
|
from patsy import dmatrix
|
|
if isinstance(exog, pd.Series):
|
|
exog = pd.DataFrame(exog)
|
|
exog = dmatrix(self.model.data.design_info, exog)
|
|
|
|
if exog is not None:
|
|
if row_labels is None:
|
|
row_labels = getattr(exog, 'index', None)
|
|
if callable(row_labels):
|
|
row_labels = None
|
|
|
|
exog = np.asarray(exog)
|
|
if exog.ndim == 1 and (self.model.exog.ndim == 1 or
|
|
self.model.exog.shape[1] == 1):
|
|
exog = exog[:, None]
|
|
exog = np.atleast_2d(exog) # needed in count model shape[1]
|
|
else:
|
|
exog = self.model.exog
|
|
|
|
if row_labels is None:
|
|
row_labels = getattr(self.model.data, 'row_labels', None)
|
|
return exog, row_labels
|
|
|
|
|
|
def get_prediction_glm(self, exog=None, transform=True,
|
|
row_labels=None, linpred=None, link=None,
|
|
pred_kwds=None):
|
|
"""
|
|
Compute prediction results for GLM compatible models.
|
|
|
|
Parameters
|
|
----------
|
|
exog : array_like, optional
|
|
The values for which you want to predict.
|
|
transform : bool, optional
|
|
If the model was fit via a formula, do you want to pass
|
|
exog through the formula. Default is True. E.g., if you fit
|
|
a model y ~ log(x1) + log(x2), and transform is True, then
|
|
you can pass a data structure that contains x1 and x2 in
|
|
their original form. Otherwise, you'd need to log the data
|
|
first.
|
|
row_labels : list of str or None
|
|
If row_lables are provided, then they will replace the generated
|
|
labels.
|
|
linpred : linear prediction instance
|
|
Instance of linear prediction results used for confidence intervals
|
|
based on endpoint transformation.
|
|
link : instance of link function
|
|
If no link function is provided, then the `model.family.link` is used.
|
|
pred_kwds : dict
|
|
Some models can take additional keyword arguments, such as offset or
|
|
additional exog in multi-part models. See the predict method of the
|
|
model for the details.
|
|
|
|
Returns
|
|
-------
|
|
prediction_results : generalized_linear_model.PredictionResults
|
|
The prediction results instance contains prediction and prediction
|
|
variance and can on demand calculate confidence intervals and summary
|
|
tables for the prediction of the mean and of new observations.
|
|
"""
|
|
|
|
# prepare exog and row_labels, based on base Results.predict
|
|
exog, row_labels = _get_exog_predict(
|
|
self,
|
|
exog=exog,
|
|
transform=transform,
|
|
row_labels=row_labels,
|
|
)
|
|
|
|
if pred_kwds is None:
|
|
pred_kwds = {}
|
|
|
|
predicted_mean = self.model.predict(self.params, exog, **pred_kwds)
|
|
|
|
covb = self.cov_params()
|
|
|
|
link_deriv = self.model.family.link.inverse_deriv(linpred.predicted_mean)
|
|
var_pred_mean = link_deriv**2 * (exog * np.dot(covb, exog.T).T).sum(1)
|
|
var_resid = self.scale # self.mse_resid / weights
|
|
|
|
# TODO: check that we have correct scale, Refactor scale #???
|
|
# special case for now:
|
|
if self.cov_type == 'fixed scale':
|
|
var_resid = self.cov_kwds['scale']
|
|
|
|
dist = ['norm', 't'][self.use_t]
|
|
return PredictionResultsMean(
|
|
predicted_mean, var_pred_mean, var_resid,
|
|
df=self.df_resid, dist=dist,
|
|
row_labels=row_labels, linpred=linpred, link=link)
|
|
|
|
|
|
def get_prediction_linear(self, exog=None, transform=True,
|
|
row_labels=None, pred_kwds=None, index=None):
|
|
"""
|
|
Compute prediction results for linear prediction.
|
|
|
|
Parameters
|
|
----------
|
|
exog : array_like, optional
|
|
The values for which you want to predict.
|
|
transform : bool, optional
|
|
If the model was fit via a formula, do you want to pass
|
|
exog through the formula. Default is True. E.g., if you fit
|
|
a model y ~ log(x1) + log(x2), and transform is True, then
|
|
you can pass a data structure that contains x1 and x2 in
|
|
their original form. Otherwise, you'd need to log the data
|
|
first.
|
|
row_labels : list of str or None
|
|
If row_lables are provided, then they will replace the generated
|
|
labels.
|
|
pred_kwargs :
|
|
Some models can take additional keyword arguments, such as offset or
|
|
additional exog in multi-part models.
|
|
See the predict method of the model for the details.
|
|
index : slice or array-index
|
|
Is used to select rows and columns of cov_params, if the prediction
|
|
function only depends on a subset of parameters.
|
|
|
|
Returns
|
|
-------
|
|
prediction_results : PredictionResults
|
|
The prediction results instance contains prediction and prediction
|
|
variance and can on demand calculate confidence intervals and summary
|
|
tables for the prediction.
|
|
"""
|
|
|
|
# prepare exog and row_labels, based on base Results.predict
|
|
exog, row_labels = _get_exog_predict(
|
|
self,
|
|
exog=exog,
|
|
transform=transform,
|
|
row_labels=row_labels,
|
|
)
|
|
|
|
if pred_kwds is None:
|
|
pred_kwds = {}
|
|
|
|
k1 = exog.shape[1]
|
|
if len(self.params > k1):
|
|
# TODO: we allow endpoint transformation only for the first link
|
|
index = np.arange(k1)
|
|
else:
|
|
index = None
|
|
# get linear prediction and standard errors
|
|
covb = self.cov_params(column=index)
|
|
var_pred = (exog * np.dot(covb, exog.T).T).sum(1)
|
|
pred_kwds_linear = pred_kwds.copy()
|
|
pred_kwds_linear["which"] = "linear"
|
|
predicted = self.model.predict(self.params, exog, **pred_kwds_linear)
|
|
|
|
dist = ['norm', 't'][self.use_t]
|
|
res = PredictionResultsBase(predicted, var_pred,
|
|
df=self.df_resid, dist=dist,
|
|
row_labels=row_labels
|
|
)
|
|
return res
|
|
|
|
|
|
def get_prediction_monotonic(self, exog=None, transform=True,
|
|
row_labels=None, link=None,
|
|
pred_kwds=None, index=None):
|
|
"""
|
|
Compute prediction results when endpoint transformation is valid.
|
|
|
|
Parameters
|
|
----------
|
|
exog : array_like, optional
|
|
The values for which you want to predict.
|
|
transform : bool, optional
|
|
If the model was fit via a formula, do you want to pass
|
|
exog through the formula. Default is True. E.g., if you fit
|
|
a model y ~ log(x1) + log(x2), and transform is True, then
|
|
you can pass a data structure that contains x1 and x2 in
|
|
their original form. Otherwise, you'd need to log the data
|
|
first.
|
|
row_labels : list of str or None
|
|
If row_lables are provided, then they will replace the generated
|
|
labels.
|
|
link : instance of link function
|
|
If no link function is provided, then the ``mmodel.family.link` is
|
|
used.
|
|
pred_kwargs :
|
|
Some models can take additional keyword arguments, such as offset or
|
|
additional exog in multi-part models.
|
|
See the predict method of the model for the details.
|
|
index : slice or array-index
|
|
Is used to select rows and columns of cov_params, if the prediction
|
|
function only depends on a subset of parameters.
|
|
|
|
Returns
|
|
-------
|
|
prediction_results : PredictionResults
|
|
The prediction results instance contains prediction and prediction
|
|
variance and can on demand calculate confidence intervals and summary
|
|
tables for the prediction.
|
|
"""
|
|
|
|
# prepare exog and row_labels, based on base Results.predict
|
|
exog, row_labels = _get_exog_predict(
|
|
self,
|
|
exog=exog,
|
|
transform=transform,
|
|
row_labels=row_labels,
|
|
)
|
|
|
|
if pred_kwds is None:
|
|
pred_kwds = {}
|
|
|
|
if link is None:
|
|
link = self.model.family.link
|
|
|
|
func_deriv = link.inverse_deriv
|
|
|
|
# get linear prediction and standard errors
|
|
covb = self.cov_params(column=index)
|
|
linpred_var = (exog * np.dot(covb, exog.T).T).sum(1)
|
|
pred_kwds_linear = pred_kwds.copy()
|
|
pred_kwds_linear["which"] = "linear"
|
|
linpred = self.model.predict(self.params, exog, **pred_kwds_linear)
|
|
|
|
predicted = self.model.predict(self.params, exog, **pred_kwds)
|
|
link_deriv = func_deriv(linpred)
|
|
var_pred = link_deriv**2 * linpred_var
|
|
|
|
dist = ['norm', 't'][self.use_t]
|
|
res = PredictionResultsMonotonic(predicted, var_pred,
|
|
df=self.df_resid, dist=dist,
|
|
row_labels=row_labels, linpred=linpred,
|
|
linpred_se=np.sqrt(linpred_var),
|
|
func=link.inverse, deriv=func_deriv)
|
|
return res
|
|
|
|
|
|
def get_prediction_delta(
|
|
self,
|
|
exog=None,
|
|
which="mean",
|
|
average=False,
|
|
agg_weights=None,
|
|
transform=True,
|
|
row_labels=None,
|
|
pred_kwds=None
|
|
):
|
|
"""
|
|
compute prediction results
|
|
|
|
Parameters
|
|
----------
|
|
exog : array_like, optional
|
|
The values for which you want to predict.
|
|
which : str
|
|
The statistic that is prediction. Which statistics are available
|
|
depends on the model.predict method.
|
|
average : bool
|
|
If average is True, then the mean prediction is computed, that is,
|
|
predictions are computed for individual exog and then them mean over
|
|
observation is used.
|
|
If average is False, then the results are the predictions for all
|
|
observations, i.e. same length as ``exog``.
|
|
agg_weights : ndarray, optional
|
|
Aggregation weights, only used if average is True.
|
|
The weights are not normalized.
|
|
transform : bool, optional
|
|
If the model was fit via a formula, do you want to pass
|
|
exog through the formula. Default is True. E.g., if you fit
|
|
a model y ~ log(x1) + log(x2), and transform is True, then
|
|
you can pass a data structure that contains x1 and x2 in
|
|
their original form. Otherwise, you'd need to log the data
|
|
first.
|
|
row_labels : list of str or None
|
|
If row_lables are provided, then they will replace the generated
|
|
labels.
|
|
pred_kwargs :
|
|
Some models can take additional keyword arguments, such as offset or
|
|
additional exog in multi-part models.
|
|
See the predict method of the model for the details.
|
|
|
|
Returns
|
|
-------
|
|
prediction_results : generalized_linear_model.PredictionResults
|
|
The prediction results instance contains prediction and prediction
|
|
variance and can on demand calculate confidence intervals and summary
|
|
tables for the prediction of the mean and of new observations.
|
|
"""
|
|
|
|
# prepare exog and row_labels, based on base Results.predict
|
|
exog, row_labels = _get_exog_predict(
|
|
self,
|
|
exog=exog,
|
|
transform=transform,
|
|
row_labels=row_labels,
|
|
)
|
|
if agg_weights is None:
|
|
agg_weights = np.array(1.)
|
|
|
|
def f_pred(p):
|
|
"""Prediction function as function of params
|
|
"""
|
|
pred = self.model.predict(p, exog, which=which, **pred_kwds)
|
|
if average:
|
|
# using `.T` which should work if aggweights is 1-dim
|
|
pred = (pred.T * agg_weights.T).mean(-1).T
|
|
return pred
|
|
|
|
nlpm = self._get_wald_nonlinear(f_pred)
|
|
# TODO: currently returns NonlinearDeltaCov
|
|
res = PredictionResultsDelta(nlpm)
|
|
return res
|
|
|
|
|
|
def get_prediction(self, exog=None, transform=True, which="mean",
|
|
row_labels=None, average=False, agg_weights=None,
|
|
pred_kwds=None):
|
|
"""
|
|
Compute prediction results when endpoint transformation is valid.
|
|
|
|
Parameters
|
|
----------
|
|
exog : array_like, optional
|
|
The values for which you want to predict.
|
|
transform : bool, optional
|
|
If the model was fit via a formula, do you want to pass
|
|
exog through the formula. Default is True. E.g., if you fit
|
|
a model y ~ log(x1) + log(x2), and transform is True, then
|
|
you can pass a data structure that contains x1 and x2 in
|
|
their original form. Otherwise, you'd need to log the data
|
|
first.
|
|
which : str
|
|
Which statistic is to be predicted. Default is "mean".
|
|
The available statistics and options depend on the model.
|
|
see the model.predict docstring
|
|
linear : bool
|
|
Linear has been replaced by the `which` keyword and will be
|
|
deprecated.
|
|
If linear is True, then `which` is ignored and the linear
|
|
prediction is returned.
|
|
row_labels : list of str or None
|
|
If row_lables are provided, then they will replace the generated
|
|
labels.
|
|
average : bool
|
|
If average is True, then the mean prediction is computed, that is,
|
|
predictions are computed for individual exog and then the average
|
|
over observation is used.
|
|
If average is False, then the results are the predictions for all
|
|
observations, i.e. same length as ``exog``.
|
|
agg_weights : ndarray, optional
|
|
Aggregation weights, only used if average is True.
|
|
The weights are not normalized.
|
|
**kwargs :
|
|
Some models can take additional keyword arguments, such as offset,
|
|
exposure or additional exog in multi-part models like zero inflated
|
|
models.
|
|
See the predict method of the model for the details.
|
|
|
|
Returns
|
|
-------
|
|
prediction_results : PredictionResults
|
|
The prediction results instance contains prediction and prediction
|
|
variance and can on demand calculate confidence intervals and
|
|
summary dataframe for the prediction.
|
|
|
|
Notes
|
|
-----
|
|
Status: new in 0.14, experimental
|
|
"""
|
|
use_endpoint = getattr(self.model, "_use_endpoint", True)
|
|
|
|
if which == "linear":
|
|
res = get_prediction_linear(
|
|
self,
|
|
exog=exog,
|
|
transform=transform,
|
|
row_labels=row_labels,
|
|
pred_kwds=pred_kwds,
|
|
)
|
|
|
|
elif (which == "mean")and (use_endpoint is True) and (average is False):
|
|
# endpoint transformation
|
|
k1 = self.model.exog.shape[1]
|
|
if len(self.params > k1):
|
|
# TODO: we allow endpoint transformation only for the first link
|
|
index = np.arange(k1)
|
|
else:
|
|
index = None
|
|
|
|
pred_kwds["which"] = which
|
|
# TODO: add link or ilink to all link based models (except zi
|
|
link = getattr(self.model, "link", None)
|
|
if link is None:
|
|
# GLM
|
|
if hasattr(self.model, "family"):
|
|
link = getattr(self.model.family, "link", None)
|
|
if link is None:
|
|
# defaulting to log link for count models
|
|
import warnings
|
|
warnings.warn("using default log-link in get_prediction")
|
|
from statsmodels.genmod.families import links
|
|
link = links.Log()
|
|
res = get_prediction_monotonic(
|
|
self,
|
|
exog=exog,
|
|
transform=transform,
|
|
row_labels=row_labels,
|
|
link=link,
|
|
pred_kwds=pred_kwds,
|
|
index=index,
|
|
)
|
|
|
|
else:
|
|
# which is not mean or linear, or we need averaging
|
|
res = get_prediction_delta(
|
|
self,
|
|
exog=exog,
|
|
which=which,
|
|
average=average,
|
|
agg_weights=agg_weights,
|
|
pred_kwds=pred_kwds,
|
|
)
|
|
|
|
return res
|
|
|
|
|
|
def params_transform_univariate(params, cov_params, link=None, transform=None,
|
|
row_labels=None):
|
|
"""
|
|
results for univariate, nonlinear, monotonicaly transformed parameters
|
|
|
|
This provides transformed values, standard errors and confidence interval
|
|
for transformations of parameters, for example in calculating rates with
|
|
`exp(params)` in the case of Poisson or other models with exponential
|
|
mean function.
|
|
"""
|
|
|
|
from statsmodels.genmod.families import links
|
|
if link is None and transform is None:
|
|
link = links.Log()
|
|
|
|
if row_labels is None and hasattr(params, 'index'):
|
|
row_labels = params.index
|
|
|
|
params = np.asarray(params)
|
|
|
|
predicted_mean = link.inverse(params)
|
|
link_deriv = link.inverse_deriv(params)
|
|
var_pred_mean = link_deriv**2 * np.diag(cov_params)
|
|
# TODO: do we want covariance also, or just var/se
|
|
|
|
dist = stats.norm
|
|
|
|
# TODO: need ci for linear prediction, method of `lin_pred
|
|
linpred = PredictionResultsMean(
|
|
params, np.diag(cov_params), dist=dist,
|
|
row_labels=row_labels, link=links.Identity())
|
|
|
|
res = PredictionResultsMean(
|
|
predicted_mean, var_pred_mean, dist=dist,
|
|
row_labels=row_labels, linpred=linpred, link=link)
|
|
|
|
return res
|