AIM-PIbd-32-Kurbanova-A-A/aimenv/Lib/site-packages/statsmodels/tsa/statespace/news.py

1590 lines
70 KiB
Python
Raw Normal View History

2024-10-02 22:15:59 +04:00
"""
News for state space models
Author: Chad Fulton
License: BSD-3
"""
from statsmodels.compat.pandas import FUTURE_STACK
import numpy as np
import pandas as pd
from statsmodels.iolib.summary import Summary
from statsmodels.iolib.table import SimpleTable
from statsmodels.iolib.tableformatting import fmt_params
class NewsResults:
"""
Impacts of data revisions and news on estimates of variables of interest
Parameters
----------
news_results : SimpleNamespace instance
Results from `KalmanSmoother.news`.
model : MLEResults
The results object associated with the model from which the NewsResults
was generated.
updated : MLEResults
The results object associated with the model containing the updated
dataset.
previous : MLEResults
The results object associated with the model containing the previous
dataset.
impacted_variable : str, list, array, or slice, optional
Observation variable label or slice of labels specifying particular
impacted variables to display in output. The impacted variable(s)
describe the variables that were *affected* by the news. If you do not
know the labels for the variables, check the `endog_names` attribute of
the model instance.
tolerance : float, optional
The numerical threshold for determining zero impact. Default is that
any impact less than 1e-10 is assumed to be zero.
row_labels : iterable
Row labels (often dates) for the impacts of the revisions and news.
Attributes
----------
total_impacts : pd.DataFrame
Updates to forecasts of impacted variables from both news and data
revisions, E[y^i | post] - E[y^i | previous].
update_impacts : pd.DataFrame
Updates to forecasts of impacted variables from the news,
E[y^i | post] - E[y^i | revisions] where y^i are the impacted variables
of interest.
revision_impacts : pd.DataFrame
Updates to forecasts of impacted variables from all data revisions,
E[y^i | revisions] - E[y^i | previous].
news : pd.DataFrame
The unexpected component of the updated data,
E[y^u | post] - E[y^u | revisions] where y^u are the updated variables.
weights : pd.DataFrame
Weights describing the effect of news on variables of interest.
revisions : pd.DataFrame
The revisions between the current and previously observed data, for
revisions for which detailed impacts were computed.
revisions_all : pd.DataFrame
The revisions between the current and previously observed data,
y^r_{revised} - y^r_{previous} where y^r are the revised variables.
revision_weights : pd.DataFrame
Weights describing the effect of revisions on variables of interest,
for revisions for which detailed impacts were computed.
revision_weights_all : pd.DataFrame
Weights describing the effect of revisions on variables of interest,
with a new entry that includes NaNs for the revisions for which
detailed impacts were not computed.
update_forecasts : pd.DataFrame
Forecasts based on the previous dataset of the variables that were
updated, E[y^u | previous].
update_realized : pd.DataFrame
Actual observed data associated with the variables that were
updated, y^u
revisions_details_start : int
Integer index of first period in which detailed revision impacts were
computed.
revision_detailed_impacts : pd.DataFrame
Updates to forecasts of impacted variables from data revisions with
detailed impacts, E[y^i | revisions] - E[y^i | grouped revisions].
revision_grouped_impacts : pd.DataFrame
Updates to forecasts of impacted variables from data revisions that
were grouped together, E[y^i | grouped revisions] - E[y^i | previous].
revised_prev : pd.DataFrame
Previously observed data associated with the variables that were
revised, for revisions for which detailed impacts were computed.
revised_prev_all : pd.DataFrame
Previously observed data associated with the variables that were
revised, y^r_{previous}
revised : pd.DataFrame
Currently observed data associated with the variables that were
revised, for revisions for which detailed impacts were computed.
revised_all : pd.DataFrame
Currently observed data associated with the variables that were
revised, y^r_{revised}
prev_impacted_forecasts : pd.DataFrame
Previous forecast of the variables of interest, E[y^i | previous].
post_impacted_forecasts : pd.DataFrame
Forecast of the variables of interest after taking into account both
revisions and updates, E[y^i | post].
revisions_iloc : pd.DataFrame
The integer locations of the data revisions in the dataset.
revisions_ix : pd.DataFrame
The label-based locations of the data revisions in the dataset.
revisions_iloc_detailed : pd.DataFrame
The integer locations of the data revisions in the dataset for which
detailed impacts were computed.
revisions_ix_detailed : pd.DataFrame
The label-based locations of the data revisions in the dataset for
which detailed impacts were computed.
updates_iloc : pd.DataFrame
The integer locations of the updated data points.
updates_ix : pd.DataFrame
The label-based locations of updated data points.
state_index : array_like
Index of state variables used to compute impacts.
References
----------
.. [1] Bańbura, Marta, and Michele Modugno.
"Maximum likelihood estimation of factor models on datasets with
arbitrary pattern of missing data."
Journal of Applied Econometrics 29, no. 1 (2014): 133-160.
.. [2] Bańbura, Marta, Domenico Giannone, and Lucrezia Reichlin.
"Nowcasting."
The Oxford Handbook of Economic Forecasting. July 8, 2011.
.. [3] Bańbura, Marta, Domenico Giannone, Michele Modugno, and Lucrezia
Reichlin.
"Now-casting and the real-time data flow."
In Handbook of economic forecasting, vol. 2, pp. 195-237.
Elsevier, 2013.
"""
def __init__(self, news_results, model, updated, previous,
impacted_variable=None, tolerance=1e-10, row_labels=None):
# Note: `model` will be the same as one of `revised` or `previous`, but
# we need to save it as self.model so that the `predict_dates`, which
# were generated by the `_get_prediction_index` call, will be available
# for use by the base wrapping code.
self.model = model
self.updated = updated
self.previous = previous
self.news_results = news_results
self._impacted_variable = impacted_variable
self._tolerance = tolerance
self.row_labels = row_labels
self.params = [] # required for `summary` to work
self.endog_names = self.updated.model.endog_names
self.k_endog = len(self.endog_names)
self.n_revisions = len(self.news_results.revisions_ix)
self.n_revisions_detailed = len(self.news_results.revisions_details)
self.n_revisions_grouped = len(self.news_results.revisions_grouped)
index = self.updated.model._index
columns = np.atleast_1d(self.endog_names)
# E[y^i | post]
self.post_impacted_forecasts = pd.DataFrame(
news_results.post_impacted_forecasts.T,
index=self.row_labels, columns=columns).rename_axis(
index='impact date', columns='impacted variable')
# E[y^i | previous]
self.prev_impacted_forecasts = pd.DataFrame(
news_results.prev_impacted_forecasts.T,
index=self.row_labels, columns=columns).rename_axis(
index='impact date', columns='impacted variable')
# E[y^i | post] - E[y^i | revisions]
self.update_impacts = pd.DataFrame(
news_results.update_impacts,
index=self.row_labels, columns=columns).rename_axis(
index='impact date', columns='impacted variable')
# E[y^i | revisions] - E[y^i | grouped revisions]
self.revision_detailed_impacts = pd.DataFrame(
news_results.revision_detailed_impacts,
index=self.row_labels,
columns=columns,
dtype=float,
).rename_axis(index="impact date", columns="impacted variable")
# E[y^i | revisions] - E[y^i | previous]
self.revision_impacts = pd.DataFrame(
news_results.revision_impacts,
index=self.row_labels,
columns=columns,
dtype=float,
).rename_axis(index="impact date", columns="impacted variable")
# E[y^i | grouped revisions] - E[y^i | previous]
self.revision_grouped_impacts = (
self.revision_impacts
- self.revision_detailed_impacts.fillna(0))
if self.n_revisions_grouped == 0:
self.revision_grouped_impacts.loc[:] = 0
# E[y^i | post] - E[y^i | previous]
self.total_impacts = (self.post_impacted_forecasts -
self.prev_impacted_forecasts)
# Indices of revisions and updates
self.revisions_details_start = news_results.revisions_details_start
self.revisions_iloc = pd.DataFrame(
list(zip(*news_results.revisions_ix)),
index=['revision date', 'revised variable']).T
iloc = self.revisions_iloc
if len(iloc) > 0:
self.revisions_ix = pd.DataFrame({
'revision date': index[iloc['revision date']],
'revised variable': columns[iloc['revised variable']]})
else:
self.revisions_ix = iloc.copy()
mask = iloc['revision date'] >= self.revisions_details_start
self.revisions_iloc_detailed = self.revisions_iloc[mask]
self.revisions_ix_detailed = self.revisions_ix[mask]
self.updates_iloc = pd.DataFrame(
list(zip(*news_results.updates_ix)),
index=['update date', 'updated variable']).T
iloc = self.updates_iloc
if len(iloc) > 0:
self.updates_ix = pd.DataFrame({
'update date': index[iloc['update date']],
'updated variable': columns[iloc['updated variable']]})
else:
self.updates_ix = iloc.copy()
# Index of the state variables used
self.state_index = news_results.state_index
# Wrap forecasts and forecasts errors
r_ix_all = pd.MultiIndex.from_arrays([
self.revisions_ix['revision date'],
self.revisions_ix['revised variable']])
r_ix = pd.MultiIndex.from_arrays([
self.revisions_ix_detailed['revision date'],
self.revisions_ix_detailed['revised variable']])
u_ix = pd.MultiIndex.from_arrays([
self.updates_ix['update date'],
self.updates_ix['updated variable']])
# E[y^u | post] - E[y^u | revisions]
if news_results.news is None:
self.news = pd.Series([], index=u_ix, name='news',
dtype=model.params.dtype)
else:
self.news = pd.Series(news_results.news, index=u_ix, name='news')
# Revisions to data (y^r_{revised} - y^r_{previous})
if news_results.revisions_all is None:
self.revisions_all = pd.Series([], index=r_ix_all, name='revision',
dtype=model.params.dtype)
else:
self.revisions_all = pd.Series(news_results.revisions_all,
index=r_ix_all, name='revision')
# Revisions to data (y^r_{revised} - y^r_{previous}) for which detailed
# impacts were computed
if news_results.revisions is None:
self.revisions = pd.Series([], index=r_ix, name='revision',
dtype=model.params.dtype)
else:
self.revisions = pd.Series(news_results.revisions,
index=r_ix, name='revision')
# E[y^u | revised]
if news_results.update_forecasts is None:
self.update_forecasts = pd.Series([], index=u_ix,
dtype=model.params.dtype)
else:
self.update_forecasts = pd.Series(
news_results.update_forecasts, index=u_ix)
# y^r_{revised}
if news_results.revised_all is None:
self.revised_all = pd.Series([], index=r_ix_all,
dtype=model.params.dtype,
name='revised')
else:
self.revised_all = pd.Series(news_results.revised_all,
index=r_ix_all, name='revised')
# y^r_{revised} for which detailed impacts were computed
if news_results.revised is None:
self.revised = pd.Series([], index=r_ix, dtype=model.params.dtype,
name='revised')
else:
self.revised = pd.Series(news_results.revised, index=r_ix,
name='revised')
# y^r_{previous}
if news_results.revised_prev_all is None:
self.revised_prev_all = pd.Series([], index=r_ix_all,
dtype=model.params.dtype)
else:
self.revised_prev_all = pd.Series(
news_results.revised_prev_all, index=r_ix_all)
# y^r_{previous} for which detailed impacts were computed
if news_results.revised_prev is None:
self.revised_prev = pd.Series([], index=r_ix,
dtype=model.params.dtype)
else:
self.revised_prev = pd.Series(
news_results.revised_prev, index=r_ix)
# y^u
if news_results.update_realized is None:
self.update_realized = pd.Series([], index=u_ix,
dtype=model.params.dtype)
else:
self.update_realized = pd.Series(
news_results.update_realized, index=u_ix)
cols = pd.MultiIndex.from_product([self.row_labels, columns])
# reshaped version of gain matrix E[y A'] E[A A']^{-1}
if len(self.updates_iloc):
weights = news_results.gain.reshape(
len(cols), len(u_ix))
else:
weights = np.zeros((len(cols), len(u_ix)))
self.weights = pd.DataFrame(weights, index=cols, columns=u_ix).T
self.weights.columns.names = ['impact date', 'impacted variable']
# reshaped version of revision_weights
if self.n_revisions_detailed > 0:
revision_weights = news_results.revision_weights.reshape(
len(cols), len(r_ix))
else:
revision_weights = np.zeros((len(cols), len(r_ix)))
self.revision_weights = pd.DataFrame(
revision_weights, index=cols, columns=r_ix).T
self.revision_weights.columns.names = [
'impact date', 'impacted variable']
self.revision_weights_all = self.revision_weights.reindex(
self.revised_all.index)
@property
def impacted_variable(self):
return self._impacted_variable
@impacted_variable.setter
def impacted_variable(self, value):
self._impacted_variable = value
@property
def tolerance(self):
return self._tolerance
@tolerance.setter
def tolerance(self, value):
self._tolerance = value
@property
def data_revisions(self):
"""
Revisions to data points that existed in the previous dataset
Returns
-------
data_revisions : pd.DataFrame
Index is as MultiIndex consisting of `revision date` and
`revised variable`. The columns are:
- `observed (prev)`: the value of the data as it was observed
in the previous dataset.
- `revised`: the revised value of the data, as it is observed
in the new dataset
- `detailed impacts computed`: whether or not detailed impacts have
been computed in these NewsResults for this revision
See also
--------
data_updates
"""
# Save revisions data
data = pd.concat([
self.revised_all.rename('revised'),
self.revised_prev_all.rename('observed (prev)')
], axis=1).sort_index()
data['detailed impacts computed'] = (
self.revised_all.index.isin(self.revised.index))
return data
@property
def data_updates(self):
"""
Updated data; new entries that did not exist in the previous dataset
Returns
-------
data_updates : pd.DataFrame
Index is as MultiIndex consisting of `update date` and
`updated variable`. The columns are:
- `forecast (prev)`: the previous forecast of the new entry,
based on the information available in the previous dataset
(recall that for these updated data points, the previous dataset
had no observed value for them at all)
- `observed`: the value of the new entry, as it is observed in the
new dataset
See also
--------
data_revisions
"""
data = pd.concat([
self.update_realized.rename('observed'),
self.update_forecasts.rename('forecast (prev)')
], axis=1).sort_index()
return data
@property
def details_by_impact(self):
"""
Details of forecast revisions from news, organized by impacts first
Returns
-------
details : pd.DataFrame
Index is as MultiIndex consisting of:
- `impact date`: the date of the impact on the variable of interest
- `impacted variable`: the variable that is being impacted
- `update date`: the date of the data update, that results in
`news` that impacts the forecast of variables of interest
- `updated variable`: the variable being updated, that results in
`news` that impacts the forecast of variables of interest
The columns are:
- `forecast (prev)`: the previous forecast of the new entry,
based on the information available in the previous dataset
- `observed`: the value of the new entry, as it is observed in the
new dataset
- `news`: the news associated with the update (this is just the
forecast error: `observed` - `forecast (prev)`)
- `weight`: the weight describing how the `news` effects the
forecast of the variable of interest
- `impact`: the impact of the `news` on the forecast of the
variable of interest
Notes
-----
This table decomposes updated forecasts of variables of interest from
the `news` associated with each updated datapoint from the new data
release.
This table does not summarize the impacts or show the effect of
revisions. That information can be found in the `impacts` or
`revision_details_by_impact` tables.
This form of the details table is organized so that the impacted
dates / variables are first in the index. This is convenient for
slicing by impacted variables / dates to view the details of data
updates for a particular variable or date.
However, since the `forecast (prev)` and `observed` columns have a lot
of duplication, printing the entire table gives a result that is less
easy to parse than that produced by the `details_by_update` property.
`details_by_update` contains the same information but is organized to
be more convenient for displaying the entire table of detailed updates.
At the same time, `details_by_update` is less convenient for
subsetting.
See Also
--------
details_by_update
revision_details_by_update
impacts
"""
s = self.weights.stack(level=[0, 1], **FUTURE_STACK)
df = s.rename('weight').to_frame()
if len(self.updates_iloc):
df['forecast (prev)'] = self.update_forecasts
df['observed'] = self.update_realized
df['news'] = self.news
df['impact'] = df['news'] * df['weight']
else:
df['forecast (prev)'] = []
df['observed'] = []
df['news'] = []
df['impact'] = []
df = df[['observed', 'forecast (prev)', 'news', 'weight', 'impact']]
df = df.reorder_levels([2, 3, 0, 1]).sort_index()
if self.impacted_variable is not None and len(df) > 0:
df = df.loc[np.s_[:, self.impacted_variable], :]
mask = np.abs(df['impact']) > self.tolerance
return df[mask]
@property
def _revision_grouped_impacts(self):
s = self.revision_grouped_impacts.stack(**FUTURE_STACK)
df = s.rename('impact').to_frame()
df = df.reindex(['revision date', 'revised variable', 'impact'],
axis=1)
if self.revisions_details_start > 0:
df['revision date'] = (
self.updated.model._index[self.revisions_details_start - 1])
df['revised variable'] = 'all prior revisions'
df = (df.set_index(['revision date', 'revised variable'], append=True)
.reorder_levels([2, 3, 0, 1]))
return df
@property
def revision_details_by_impact(self):
"""
Details of forecast revisions from revised data, organized by impacts
Returns
-------
details : pd.DataFrame
Index is as MultiIndex consisting of:
- `impact date`: the date of the impact on the variable of interest
- `impacted variable`: the variable that is being impacted
- `revision date`: the date of the data revision, that results in
`revision` that impacts the forecast of variables of interest
- `revised variable`: the variable being revised, that results in
`news` that impacts the forecast of variables of interest
The columns are:
- `observed (prev)`: the previous value of the observation, as it
was given in the previous dataset
- `revised`: the value of the revised entry, as it is observed in
the new dataset
- `revision`: the revision (this is `revised` - `observed (prev)`)
- `weight`: the weight describing how the `revision` effects the
forecast of the variable of interest
- `impact`: the impact of the `revision` on the forecast of the
variable of interest
Notes
-----
This table decomposes updated forecasts of variables of interest from
the `revision` associated with each revised datapoint from the new data
release.
This table does not summarize the impacts or show the effect of
new datapoints. That information can be found in the
`impacts` or `details_by_impact` tables.
Grouped impacts are shown in this table, with a "revision date" equal
to the last period prior to which detailed revisions were computed and
with "revised variable" set to the string "all prior revisions". For
these rows, all columns except "impact" will be set to NaNs.
This form of the details table is organized so that the impacted
dates / variables are first in the index. This is convenient for
slicing by impacted variables / dates to view the details of data
updates for a particular variable or date.
However, since the `observed (prev)` and `revised` columns have a lot
of duplication, printing the entire table gives a result that is less
easy to parse than that produced by the `details_by_revision` property.
`details_by_revision` contains the same information but is organized to
be more convenient for displaying the entire table of detailed
revisions. At the same time, `details_by_revision` is less convenient
for subsetting.
See Also
--------
details_by_revision
details_by_impact
impacts
"""
weights = self.revision_weights.stack(level=[0, 1], **FUTURE_STACK)
df = pd.concat([
self.revised.reindex(weights.index),
self.revised_prev.rename('observed (prev)').reindex(weights.index),
self.revisions.reindex(weights.index),
weights.rename('weight'),
(self.revisions.reindex(weights.index) * weights).rename('impact'),
], axis=1)
if self.n_revisions_grouped > 0:
df = pd.concat([df, self._revision_grouped_impacts])
# Explicitly set names for compatibility with pandas=1.2.5
df.index = df.index.set_names(
['revision date', 'revised variable',
'impact date', 'impacted variable'])
df = df.reorder_levels([2, 3, 0, 1]).sort_index()
if self.impacted_variable is not None and len(df) > 0:
df = df.loc[np.s_[:, self.impacted_variable], :]
mask = np.abs(df['impact']) > self.tolerance
return df[mask]
@property
def details_by_update(self):
"""
Details of forecast revisions from news, organized by updates first
Returns
-------
details : pd.DataFrame
Index is as MultiIndex consisting of:
- `update date`: the date of the data update, that results in
`news` that impacts the forecast of variables of interest
- `updated variable`: the variable being updated, that results in
`news` that impacts the forecast of variables of interest
- `forecast (prev)`: the previous forecast of the new entry,
based on the information available in the previous dataset
- `observed`: the value of the new entry, as it is observed in the
new dataset
- `impact date`: the date of the impact on the variable of interest
- `impacted variable`: the variable that is being impacted
The columns are:
- `news`: the news associated with the update (this is just the
forecast error: `observed` - `forecast (prev)`)
- `weight`: the weight describing how the `news` affects the
forecast of the variable of interest
- `impact`: the impact of the `news` on the forecast of the
variable of interest
Notes
-----
This table decomposes updated forecasts of variables of interest from
the `news` associated with each updated datapoint from the new data
release.
This table does not summarize the impacts or show the effect of
revisions. That information can be found in the `impacts` table.
This form of the details table is organized so that the updated
dates / variables are first in the index, and in this table the index
also contains the forecasts and observed values of the updates. This is
convenient for displaying the entire table of detailed updates because
it allows sparsifying duplicate entries.
However, since it includes forecasts and observed values in the index
of the table, it is not convenient for subsetting by the variable of
interest. Instead, the `details_by_impact` property is organized to
make slicing by impacted variables / dates easy. This allows, for
example, viewing the details of data updates on a particular variable
or date of interest.
See Also
--------
details_by_impact
impacts
"""
s = self.weights.stack(level=[0, 1], **FUTURE_STACK)
df = s.rename('weight').to_frame()
if len(self.updates_iloc):
df['forecast (prev)'] = self.update_forecasts
df['observed'] = self.update_realized
df['news'] = self.news
df['impact'] = df['news'] * df['weight']
else:
df['forecast (prev)'] = []
df['observed'] = []
df['news'] = []
df['impact'] = []
df = df[['forecast (prev)', 'observed', 'news',
'weight', 'impact']]
df = df.reset_index()
keys = ['update date', 'updated variable', 'observed',
'forecast (prev)', 'impact date', 'impacted variable']
df.index = pd.MultiIndex.from_arrays([df[key] for key in keys])
details = df.drop(keys, axis=1).sort_index()
if self.impacted_variable is not None and len(df) > 0:
details = details.loc[
np.s_[:, :, :, :, :, self.impacted_variable], :]
mask = np.abs(details['impact']) > self.tolerance
return details[mask]
@property
def revision_details_by_update(self):
"""
Details of forecast revisions from revisions, organized by updates
Returns
-------
details : pd.DataFrame
Index is as MultiIndex consisting of:
- `revision date`: the date of the data revision, that results in
`revision` that impacts the forecast of variables of interest
- `revised variable`: the variable being revised, that results in
`news` that impacts the forecast of variables of interest
- `observed (prev)`: the previous value of the observation, as it
was given in the previous dataset
- `revised`: the value of the revised entry, as it is observed in
the new dataset
- `impact date`: the date of the impact on the variable of interest
- `impacted variable`: the variable that is being impacted
The columns are:
- `revision`: the revision (this is `revised` - `observed (prev)`)
- `weight`: the weight describing how the `revision` affects the
forecast of the variable of interest
- `impact`: the impact of the `revision` on the forecast of the
variable of interest
Notes
-----
This table decomposes updated forecasts of variables of interest from
the `revision` associated with each revised datapoint from the new data
release.
This table does not summarize the impacts or show the effect of
new datapoints, see `details_by_update` instead.
Grouped impacts are shown in this table, with a "revision date" equal
to the last period prior to which detailed revisions were computed and
with "revised variable" set to the string "all prior revisions". For
these rows, all columns except "impact" will be set to NaNs.
This form of the details table is organized so that the revision
dates / variables are first in the index, and in this table the index
also contains the previously observed and revised values. This is
convenient for displaying the entire table of detailed revisions
because it allows sparsifying duplicate entries.
However, since it includes previous observations and revisions in the
index of the table, it is not convenient for subsetting by the variable
of interest. Instead, the `revision_details_by_impact` property is
organized to make slicing by impacted variables / dates easy. This
allows, for example, viewing the details of data revisions on a
particular variable or date of interest.
See Also
--------
details_by_impact
impacts
"""
weights = self.revision_weights.stack(level=[0, 1], **FUTURE_STACK)
df = pd.concat([
self.revised_prev.rename('observed (prev)').reindex(weights.index),
self.revised.reindex(weights.index),
self.revisions.reindex(weights.index),
weights.rename('weight'),
(self.revisions.reindex(weights.index) * weights).rename('impact'),
], axis=1)
if self.n_revisions_grouped > 0:
df = pd.concat([df, self._revision_grouped_impacts])
# Explicitly set names for compatibility with pandas=1.2.5
df.index = df.index.set_names(
['revision date', 'revised variable',
'impact date', 'impacted variable'])
details = (df.set_index(['observed (prev)', 'revised'], append=True)
.reorder_levels([
'revision date', 'revised variable', 'revised',
'observed (prev)', 'impact date',
'impacted variable'])
.sort_index())
if self.impacted_variable is not None and len(df) > 0:
details = details.loc[
np.s_[:, :, :, :, :, self.impacted_variable], :]
mask = np.abs(details['impact']) > self.tolerance
return details[mask]
@property
def impacts(self):
"""
Impacts from news and revisions on all dates / variables of interest
Returns
-------
impacts : pd.DataFrame
Index is as MultiIndex consisting of:
- `impact date`: the date of the impact on the variable of interest
- `impacted variable`: the variable that is being impacted
The columns are:
- `estimate (prev)`: the previous estimate / forecast of the
date / variable of interest.
- `impact of revisions`: the impact of all data revisions on
the estimate of the date / variable of interest.
- `impact of news`: the impact of all news on the estimate of
the date / variable of interest.
- `total impact`: the total impact of both revisions and news on
the estimate of the date / variable of interest.
- `estimate (new)`: the new estimate / forecast of the
date / variable of interest after taking into account the effects
of the revisions and news.
Notes
-----
This table decomposes updated forecasts of variables of interest into
the overall effect from revisions and news.
This table does not break down the detail by the updated
dates / variables. That information can be found in the
`details_by_impact` `details_by_update` tables.
See Also
--------
details_by_impact
details_by_update
"""
# Summary of impacts
impacts = pd.concat([
self.prev_impacted_forecasts.unstack().rename('estimate (prev)'),
self.revision_impacts.unstack().rename('impact of revisions'),
self.update_impacts.unstack().rename('impact of news'),
self.post_impacted_forecasts.unstack().rename('estimate (new)')],
axis=1)
impacts['impact of revisions'] = (
impacts['impact of revisions'].astype(float).fillna(0))
impacts['impact of news'] = (
impacts['impact of news'].astype(float).fillna(0))
impacts['total impact'] = (impacts['impact of revisions'] +
impacts['impact of news'])
impacts = impacts.reorder_levels([1, 0]).sort_index()
impacts.index.names = ['impact date', 'impacted variable']
impacts = impacts[['estimate (prev)', 'impact of revisions',
'impact of news', 'total impact', 'estimate (new)']]
if self.impacted_variable is not None:
impacts = impacts.loc[np.s_[:, self.impacted_variable], :]
tmp = np.abs(impacts[['impact of revisions', 'impact of news']])
mask = (tmp > self.tolerance).any(axis=1)
return impacts[mask]
def summary_impacts(self, impact_date=None, impacted_variable=None,
groupby='impact date', show_revisions_columns=None,
sparsify=True, float_format='%.2f'):
"""
Create summary table with detailed impacts from news; by date, variable
Parameters
----------
impact_date : int, str, datetime, list, array, or slice, optional
Observation index label or slice of labels specifying particular
impact periods to display. The impact date(s) describe the periods
in which impacted variables were *affected* by the news. If this
argument is given, the output table will only show this impact date
or dates. Note that this argument is passed to the Pandas `loc`
accessor, and so it should correspond to the labels of the model's
index. If the model was created with data in a list or numpy array,
then these labels will be zero-indexes observation integers.
impacted_variable : str, list, array, or slice, optional
Observation variable label or slice of labels specifying particular
impacted variables to display. The impacted variable(s) describe
the variables that were *affected* by the news. If you do not know
the labels for the variables, check the `endog_names` attribute of
the model instance.
groupby : {impact date, impacted date}
The primary variable for grouping results in the impacts table. The
default is to group by update date.
show_revisions_columns : bool, optional
If set to False, the impacts table will not show the impacts from
data revisions or the total impacts. Default is to show the
revisions and totals columns if any revisions were made and
otherwise to hide them.
sparsify : bool, optional, default True
Set to False for the table to include every one of the multiindex
keys at each row.
float_format : str, optional
Formatter format string syntax for converting numbers to strings.
Default is '%.2f'.
Returns
-------
impacts_table : SimpleTable
Table describing total impacts from both revisions and news. See
the documentation for the `impacts` attribute for more details
about the index and columns.
See Also
--------
impacts
"""
# Squeeze for univariate models
if impacted_variable is None and self.k_endog == 1:
impacted_variable = self.endog_names[0]
# Default is to only show the revisions columns if there were any
# revisions (otherwise it would just be a column of zeros)
if show_revisions_columns is None:
show_revisions_columns = self.n_revisions > 0
# Select only the variables / dates of interest
s = list(np.s_[:, :])
if impact_date is not None:
s[0] = np.s_[impact_date]
if impacted_variable is not None:
s[1] = np.s_[impacted_variable]
s = tuple(s)
impacts = self.impacts.loc[s, :]
# Make the first index level the groupby level
groupby = groupby.lower()
if groupby in ['impacted variable', 'impacted_variable']:
impacts.index = impacts.index.swaplevel(1, 0)
elif groupby not in ['impact date', 'impact_date']:
raise ValueError('Invalid groupby for impacts table. Valid options'
' are "impact date" or "impacted variable".'
f'Got "{groupby}".')
impacts = impacts.sort_index()
# Drop the non-groupby level if there's only one value
tmp_index = impacts.index.remove_unused_levels()
k_vars = len(tmp_index.levels[1])
removed_level = None
if sparsify and k_vars == 1:
name = tmp_index.names[1]
value = tmp_index.levels[1][0]
removed_level = f'{name} = {value}'
impacts.index = tmp_index.droplevel(1)
try:
impacts = impacts.map(
lambda num: '' if pd.isnull(num) else float_format % num)
except AttributeError:
impacts = impacts.applymap(
lambda num: '' if pd.isnull(num) else float_format % num)
impacts = impacts.reset_index()
try:
impacts.iloc[:, 0] = impacts.iloc[:, 0].map(str)
except AttributeError:
impacts.iloc[:, 0] = impacts.iloc[:, 0].applymap(str)
else:
impacts = impacts.reset_index()
try:
impacts.iloc[:, :2] = impacts.iloc[:, :2].map(str)
impacts.iloc[:, 2:] = impacts.iloc[:, 2:].map(
lambda num: '' if pd.isnull(num) else float_format % num)
except AttributeError:
impacts.iloc[:, :2] = impacts.iloc[:, :2].applymap(str)
impacts.iloc[:, 2:] = impacts.iloc[:, 2:].applymap(
lambda num: '' if pd.isnull(num) else float_format % num)
# Sparsify the groupby column
if sparsify and groupby in impacts:
mask = impacts[groupby] == impacts[groupby].shift(1)
tmp = impacts.loc[mask, groupby]
if len(tmp) > 0:
impacts.loc[mask, groupby] = ''
# Drop revisions and totals columns if applicable
if not show_revisions_columns:
impacts.drop(['impact of revisions', 'total impact'], axis=1,
inplace=True)
params_data = impacts.values
params_header = impacts.columns.tolist()
params_stubs = None
title = 'Impacts'
if removed_level is not None:
join = 'on' if groupby == 'date' else 'for'
title += f' {join} [{removed_level}]'
impacts_table = SimpleTable(
params_data, params_header, params_stubs,
txt_fmt=fmt_params, title=title)
return impacts_table
def summary_details(self, source='news', impact_date=None,
impacted_variable=None, update_date=None,
updated_variable=None, groupby='update date',
sparsify=True, float_format='%.2f',
multiple_tables=False):
"""
Create summary table with detailed impacts; by date, variable
Parameters
----------
source : {news, revisions}
The source of impacts to summarize. Default is "news".
impact_date : int, str, datetime, list, array, or slice, optional
Observation index label or slice of labels specifying particular
impact periods to display. The impact date(s) describe the periods
in which impacted variables were *affected* by the news. If this
argument is given, the output table will only show this impact date
or dates. Note that this argument is passed to the Pandas `loc`
accessor, and so it should correspond to the labels of the model's
index. If the model was created with data in a list or numpy array,
then these labels will be zero-indexes observation integers.
impacted_variable : str, list, array, or slice, optional
Observation variable label or slice of labels specifying particular
impacted variables to display. The impacted variable(s) describe
the variables that were *affected* by the news. If you do not know
the labels for the variables, check the `endog_names` attribute of
the model instance.
update_date : int, str, datetime, list, array, or slice, optional
Observation index label or slice of labels specifying particular
updated periods to display. The updated date(s) describe the
periods in which the new data points were available that generated
the news). See the note on `impact_date` for details about what
these labels are.
updated_variable : str, list, array, or slice, optional
Observation variable label or slice of labels specifying particular
updated variables to display. The updated variable(s) describe the
variables that were *affected* by the news. If you do not know the
labels for the variables, check the `endog_names` attribute of the
model instance.
groupby : {update date, updated date, impact date, impacted date}
The primary variable for grouping results in the details table. The
default is to group by update date.
sparsify : bool, optional, default True
Set to False for the table to include every one of the multiindex
keys at each row.
float_format : str, optional
Formatter format string syntax for converting numbers to strings.
Default is '%.2f'.
multiple_tables : bool, optional
If set to True, this function will return a list of tables, one
table for each of the unique `groupby` levels. Default is False,
in which case this function returns a single table.
Returns
-------
details_table : SimpleTable or list of SimpleTable
Table or list of tables describing how the news from each update
(i.e. news from a particular variable / date) translates into
changes to the forecasts of each impacted variable variable / date.
This table contains information about the updates and about the
impacts. Updates are newly observed datapoints that were not
available in the previous results set. Each update leads to news,
and the news may cause changes in the forecasts of the impacted
variables. The amount that a particular piece of news (from an
update to some variable at some date) impacts a variable at some
date depends on weights that can be computed from the model
results.
The data contained in this table that refer to updates are:
- `update date` : The date at which a new datapoint was added.
- `updated variable` : The variable for which a new datapoint was
added.
- `forecast (prev)` : The value that had been forecast by the
previous model for the given updated variable and date.
- `observed` : The observed value of the new datapoint.
- `news` : The news is the difference between the observed value
and the previously forecast value for a given updated variable
and date.
The data contained in this table that refer to impacts are:
- `impact date` : A date associated with an impact.
- `impacted variable` : A variable that was impacted by the news.
- `weight` : The weight of news from a given `update date` and
`update variable` on a given `impacted variable` at a given
`impact date`.
- `impact` : The revision to the smoothed estimate / forecast of
the impacted variable at the impact date based specifically on
the news generated by the `updated variable` at the
`update date`.
See Also
--------
details_by_impact
details_by_update
"""
# Squeeze for univariate models
if self.k_endog == 1:
if impacted_variable is None:
impacted_variable = self.endog_names[0]
if updated_variable is None:
updated_variable = self.endog_names[0]
# Select only the variables / dates of interest
s = list(np.s_[:, :, :, :, :, :])
if impact_date is not None:
s[0] = np.s_[impact_date]
if impacted_variable is not None:
s[1] = np.s_[impacted_variable]
if update_date is not None:
s[2] = np.s_[update_date]
if updated_variable is not None:
s[3] = np.s_[updated_variable]
s = tuple(s)
if source == 'news':
details = self.details_by_impact.loc[s, :]
columns = {
'current': 'observed',
'prev': 'forecast (prev)',
'update date': 'update date',
'updated variable': 'updated variable',
'news': 'news',
}
elif source == 'revisions':
details = self.revision_details_by_impact.loc[s, :]
columns = {
'current': 'revised',
'prev': 'observed (prev)',
'update date': 'revision date',
'updated variable': 'revised variable',
'news': 'revision',
}
else:
raise ValueError(f'Invalid `source`: {source}. Must be "news" or'
' "revisions".')
# Make the first index level the groupby level
groupby = groupby.lower().replace('_', ' ')
groupby_overall = 'impact'
levels_order = [0, 1, 2, 3]
if groupby == 'update date':
levels_order = [2, 3, 0, 1]
groupby_overall = 'update'
elif groupby == 'updated variable':
levels_order = [3, 2, 1, 0]
groupby_overall = 'update'
elif groupby == 'impacted variable':
levels_order = [1, 0, 3, 2]
elif groupby != 'impact date':
raise ValueError('Invalid groupby for details table. Valid options'
' are "update date", "updated variable",'
' "impact date",or "impacted variable".'
f' Got "{groupby}".')
details.index = (details.index.reorder_levels(levels_order)
.remove_unused_levels())
details = details.sort_index()
# If our overall group-by is `update`, move forecast (prev) and
# observed into the index
base_levels = [0, 1, 2, 3]
if groupby_overall == 'update':
details.set_index([columns['current'], columns['prev']],
append=True, inplace=True)
details.index = details.index.reorder_levels([0, 1, 4, 5, 2, 3])
base_levels = [0, 1, 4, 5]
# Drop the non-groupby levels if there's only one value
tmp_index = details.index.remove_unused_levels()
n_levels = len(tmp_index.levels)
k_level_values = [len(tmp_index.levels[i]) for i in range(n_levels)]
removed_levels = []
if sparsify:
for i in sorted(base_levels)[::-1][:-1]:
if k_level_values[i] == 1:
name = tmp_index.names[i]
value = tmp_index.levels[i][0]
can_drop = (
(name == columns['update date']
and update_date is not None) or
(name == columns['updated variable']
and updated_variable is not None) or
(name == 'impact date'
and impact_date is not None) or
(name == 'impacted variable'
and (impacted_variable is not None or
self.impacted_variable is not None)))
if can_drop or not multiple_tables:
removed_levels.insert(0, f'{name} = {value}')
details.index = tmp_index = tmp_index.droplevel(i)
# Move everything to columns
details = details.reset_index()
# Function for formatting numbers
def str_format(num, mark_ones=False, mark_zeroes=False):
if pd.isnull(num):
out = ''
elif mark_ones and np.abs(1 - num) < self.tolerance:
out = '1.0'
elif mark_zeroes and np.abs(num) < self.tolerance:
out = '0'
else:
out = float_format % num
return out
# Function to create the table
def create_table(details, removed_levels):
# Convert everything to strings
for key in [columns['current'], columns['prev'], columns['news'],
'weight', 'impact']:
if key in details:
args = (
# mark_ones
True if key in ['weight'] else False,
# mark_zeroes
True if key in ['weight', 'impact'] else False)
details[key] = details[key].apply(str_format, args=args)
for key in [columns['update date'], 'impact date']:
if key in details:
details[key] = details[key].apply(str)
# Sparsify index columns
if sparsify:
sparsify_cols = [columns['update date'],
columns['updated variable'], 'impact date',
'impacted variable']
data_cols = [columns['current'], columns['prev']]
if groupby_overall == 'update':
# Put data columns first, since we need to do an additional
# check based on the other columns before sparsifying
sparsify_cols = data_cols + sparsify_cols
for key in sparsify_cols:
if key in details:
mask = details[key] == details[key].shift(1)
if key in data_cols:
if columns['update date'] in details:
tmp = details[columns['update date']]
mask &= tmp == tmp.shift(1)
if columns['updated variable'] in details:
tmp = details[columns['updated variable']]
mask &= tmp == tmp.shift(1)
details.loc[mask, key] = ''
params_data = details.values
params_header = [str(x) for x in details.columns.tolist()]
params_stubs = None
title = f"Details of {source}"
if len(removed_levels):
title += ' for [' + ', '.join(removed_levels) + ']'
return SimpleTable(params_data, params_header, params_stubs,
txt_fmt=fmt_params, title=title)
if multiple_tables:
details_table = []
for item in details[columns[groupby]].unique():
mask = details[columns[groupby]] == item
item_details = details[mask].drop(columns[groupby], axis=1)
item_removed_levels = (
[f'{columns[groupby]} = {item}'] + removed_levels)
details_table.append(create_table(item_details,
item_removed_levels))
else:
details_table = create_table(details, removed_levels)
return details_table
def summary_revisions(self, sparsify=True):
"""
Create summary table showing revisions to the previous results' data
Parameters
----------
sparsify : bool, optional, default True
Set to False for the table to include every one of the multiindex
keys at each row.
Returns
-------
revisions_table : SimpleTable
Table showing revisions to the previous results' data. Columns are:
- `revision date` : date associated with a revised data point
- `revised variable` : variable that was revised at `revision date`
- `observed (prev)` : the observed value prior to the revision
- `revised` : the new value after the revision
- `revision` : the new value after the revision
- `detailed impacts computed` : whether detailed impacts were
computed for this revision
"""
data = pd.merge(
self.data_revisions, self.revisions_all, left_index=True,
right_index=True).sort_index().reset_index()
data = data[['revision date', 'revised variable', 'observed (prev)',
'revision', 'detailed impacts computed']]
try:
data[['revision date', 'revised variable']] = (
data[['revision date', 'revised variable']].map(str))
data.iloc[:, 2:-1] = data.iloc[:, 2:-1].map(
lambda num: '' if pd.isnull(num) else '%.2f' % num)
except AttributeError:
data[['revision date', 'revised variable']] = (
data[['revision date', 'revised variable']].applymap(str))
data.iloc[:, 2:-1] = data.iloc[:, 2:-1].applymap(
lambda num: '' if pd.isnull(num) else '%.2f' % num)
# Sparsify the date column
if sparsify:
mask = data['revision date'] == data['revision date'].shift(1)
data.loc[mask, 'revision date'] = ''
params_data = data.values
params_header = data.columns.tolist()
params_stubs = None
title = 'Revisions to dataset:'
revisions_table = SimpleTable(
params_data, params_header, params_stubs,
txt_fmt=fmt_params, title=title)
return revisions_table
def summary_news(self, sparsify=True):
"""
Create summary table showing news from new data since previous results
Parameters
----------
sparsify : bool, optional, default True
Set to False for the table to include every one of the multiindex
keys at each row.
Returns
-------
updates_table : SimpleTable
Table showing new datapoints that were not in the previous results'
data. Columns are:
- `update date` : date associated with a new data point.
- `updated variable` : variable for which new data was added at
`update date`.
- `forecast (prev)` : the forecast value for the updated variable
at the update date in the previous results object (i.e. prior to
the data being available).
- `observed` : the observed value of the new datapoint.
See Also
--------
data_updates
"""
data = pd.merge(
self.data_updates, self.news, left_index=True,
right_index=True).sort_index().reset_index()
try:
data[['update date', 'updated variable']] = (
data[['update date', 'updated variable']].map(str))
data.iloc[:, 2:] = data.iloc[:, 2:].map(
lambda num: '' if pd.isnull(num) else '%.2f' % num)
except AttributeError:
data[['update date', 'updated variable']] = (
data[['update date', 'updated variable']].applymap(str))
data.iloc[:, 2:] = data.iloc[:, 2:].applymap(
lambda num: '' if pd.isnull(num) else '%.2f' % num)
# Sparsify the date column
if sparsify:
mask = data['update date'] == data['update date'].shift(1)
data.loc[mask, 'update date'] = ''
params_data = data.values
params_header = data.columns.tolist()
params_stubs = None
title = 'News from updated observations:'
updates_table = SimpleTable(
params_data, params_header, params_stubs,
txt_fmt=fmt_params, title=title)
return updates_table
def summary(self, impact_date=None, impacted_variable=None,
update_date=None, updated_variable=None,
revision_date=None, revised_variable=None,
impacts_groupby='impact date', details_groupby='update date',
show_revisions_columns=None, sparsify=True,
include_details_tables=None, include_revisions_tables=False,
float_format='%.2f'):
"""
Create summary tables describing news and impacts
Parameters
----------
impact_date : int, str, datetime, list, array, or slice, optional
Observation index label or slice of labels specifying particular
impact periods to display. The impact date(s) describe the periods
in which impacted variables were *affected* by the news. If this
argument is given, the impact and details tables will only show
this impact date or dates. Note that this argument is passed to the
Pandas `loc` accessor, and so it should correspond to the labels of
the model's index. If the model was created with data in a list or
numpy array, then these labels will be zero-indexes observation
integers.
impacted_variable : str, list, array, or slice, optional
Observation variable label or slice of labels specifying particular
impacted variables to display. The impacted variable(s) describe
the variables that were *affected* by the news. If you do not know
the labels for the variables, check the `endog_names` attribute of
the model instance.
update_date : int, str, datetime, list, array, or slice, optional
Observation index label or slice of labels specifying particular
updated periods to display. The updated date(s) describe the
periods in which the new data points were available that generated
the news). See the note on `impact_date` for details about what
these labels are.
updated_variable : str, list, array, or slice, optional
Observation variable label or slice of labels specifying particular
updated variables to display. The updated variable(s) describe the
variables that newly added in the updated dataset and which
generated the news. If you do not know the labels for the
variables, check the `endog_names` attribute of the model instance.
revision_date : int, str, datetime, list, array, or slice, optional
Observation index label or slice of labels specifying particular
revision periods to display. The revision date(s) describe the
periods in which the data points were revised. See the note on
`impact_date` for details about what these labels are.
revised_variable : str, list, array, or slice, optional
Observation variable label or slice of labels specifying particular
revised variables to display. The updated variable(s) describe the
variables that were *revised*. If you do not know the labels for
the variables, check the `endog_names` attribute of the model
instance.
impacts_groupby : {impact date, impacted date}
The primary variable for grouping results in the impacts table. The
default is to group by update date.
details_groupby : str
One of "update date", "updated date", "impact date", or
"impacted date". The primary variable for grouping results in the
details table. Only used if the details tables are included. The
default is to group by update date.
show_revisions_columns : bool, optional
If set to False, the impacts table will not show the impacts from
data revisions or the total impacts. Default is to show the
revisions and totals columns if any revisions were made and
otherwise to hide them.
sparsify : bool, optional, default True
Set to False for the table to include every one of the multiindex
keys at each row.
include_details_tables : bool, optional
If set to True, the summary will show tables describing the details
of how news from specific updates translate into specific impacts.
These tables can be very long, particularly in cases where there
were many updates and in multivariate models. The default is to
show detailed tables only for univariate models.
include_revisions_tables : bool, optional
If set to True, the summary will show tables describing the
revisions and updates that lead to impacts on variables of
interest.
float_format : str, optional
Formatter format string syntax for converting numbers to strings.
Default is '%.2f'.
Returns
-------
summary_tables : Summary
Summary tables describing news and impacts. Basic tables include:
- A table with general information about the sample.
- A table describing the impacts of revisions and news.
- Tables describing revisions in the dataset since the previous
results set (unless `include_revisions_tables=False`).
In univariate models or if `include_details_tables=True`, one or
more tables will additionally be included describing the details
of how news from specific updates translate into specific impacts.
See Also
--------
summary_impacts
summary_details
summary_revisions
summary_updates
"""
# Default for include_details_tables
if include_details_tables is None:
include_details_tables = (self.k_endog == 1)
# Model specification results
model = self.model.model
title = 'News'
def get_sample(model):
if model._index_dates:
mask = ~np.isnan(model.endog).all(axis=1)
ix = model._index[mask]
d = ix[0]
sample = ['%s' % d]
d = ix[-1]
sample += ['- ' + '%s' % d]
else:
sample = [str(0), ' - ' + str(model.nobs)]
return sample
previous_sample = get_sample(self.previous.model)
revised_sample = get_sample(self.updated.model)
# Standardize the model name as a list of str
model_name = model.__class__.__name__
# Top summary table
top_left = [('Model:', [model_name]),
('Date:', None),
('Time:', None)]
if self.state_index is not None:
k_states_used = len(self.state_index)
if k_states_used != self.model.model.k_states:
top_left.append(('# of included states:', [k_states_used]))
top_right = [
('Original sample:', [previous_sample[0]]),
('', [previous_sample[1]]),
('Update through:', [revised_sample[1][2:]]),
('# of revisions:', [len(self.revisions_ix)]),
('# of new datapoints:', [len(self.updates_ix)])]
summary = Summary()
self.model.endog_names = self.model.model.endog_names
summary.add_table_2cols(self, gleft=top_left, gright=top_right,
title=title)
table_ix = 1
# Impact table
summary.tables.insert(table_ix, self.summary_impacts(
impact_date=impact_date, impacted_variable=impacted_variable,
groupby=impacts_groupby,
show_revisions_columns=show_revisions_columns, sparsify=sparsify,
float_format=float_format))
table_ix += 1
# News table
if len(self.updates_iloc) > 0:
summary.tables.insert(
table_ix, self.summary_news(sparsify=sparsify))
table_ix += 1
# Detail tables
multiple_tables = (self.k_endog > 1)
details_tables = self.summary_details(
source='news',
impact_date=impact_date, impacted_variable=impacted_variable,
update_date=update_date, updated_variable=updated_variable,
groupby=details_groupby, sparsify=sparsify,
float_format=float_format, multiple_tables=multiple_tables)
if not multiple_tables:
details_tables = [details_tables]
if include_details_tables:
for table in details_tables:
summary.tables.insert(table_ix, table)
table_ix += 1
# Revisions
if include_revisions_tables and self.n_revisions > 0:
summary.tables.insert(
table_ix, self.summary_revisions(sparsify=sparsify))
table_ix += 1
# Revision detail tables
revision_details_tables = self.summary_details(
source='revisions',
impact_date=impact_date, impacted_variable=impacted_variable,
update_date=revision_date, updated_variable=revised_variable,
groupby=details_groupby, sparsify=sparsify,
float_format=float_format, multiple_tables=multiple_tables)
if not multiple_tables:
revision_details_tables = [revision_details_tables]
if include_details_tables:
for table in revision_details_tables:
summary.tables.insert(table_ix, table)
table_ix += 1
return summary
def get_details(self, include_revisions=True, include_updates=True):
details = []
if include_updates:
details.append(self.details_by_impact.rename(
columns={'forecast (prev)': 'previous'}))
if include_revisions:
tmp = self.revision_details_by_impact.rename_axis(
index={'revision date': 'update date',
'revised variable': 'updated variable'})
tmp = tmp.rename(columns={'revised': 'observed',
'observed (prev)': 'previous',
'revision': 'news'})
details.append(tmp)
if not (include_updates or include_revisions):
details.append(self.details_by_impact.rename(
columns={'forecast (prev)': 'previous'}).iloc[:0])
return pd.concat(details)
def get_impacts(self, groupby=None, include_revisions=True,
include_updates=True):
details = self.get_details(include_revisions=include_revisions,
include_updates=include_updates)
impacts = details['impact'].unstack(['impact date',
'impacted variable'])
if groupby is not None:
impacts = (impacts.unstack('update date')
.groupby(groupby).sum(min_count=1)
.stack('update date')
.swaplevel()
.sort_index())
return impacts