1069 lines
45 KiB
Python
1069 lines
45 KiB
Python
"""
|
|
SARIMAX specification class.
|
|
|
|
Author: Chad Fulton
|
|
License: BSD-3
|
|
"""
|
|
import numpy as np
|
|
import pandas as pd
|
|
|
|
from statsmodels.tools.data import _is_using_pandas
|
|
from statsmodels.tsa.base.tsa_model import TimeSeriesModel
|
|
from statsmodels.tsa.statespace.tools import (
|
|
is_invertible, constrain_stationary_univariate as constrain,
|
|
unconstrain_stationary_univariate as unconstrain,
|
|
prepare_exog, prepare_trend_spec, prepare_trend_data)
|
|
|
|
from statsmodels.tsa.arima.tools import standardize_lag_order, validate_basic
|
|
|
|
|
|
class SARIMAXSpecification:
|
|
"""
|
|
SARIMAX specification.
|
|
|
|
Parameters
|
|
----------
|
|
endog : array_like, optional
|
|
The observed time-series process :math:`y`.
|
|
exog : array_like, optional
|
|
Array of exogenous regressors.
|
|
order : tuple, optional
|
|
The (p,d,q) order of the model for the autoregressive, differences, and
|
|
moving average components. d is always an integer, while p and q may
|
|
either be integers or lists of integers. May not be used in combination
|
|
with the arguments `ar_order`, `diff`, or `ma_order`.
|
|
seasonal_order : tuple, optional
|
|
The (P,D,Q,s) order of the seasonal component of the model for the
|
|
AR parameters, differences, MA parameters, and periodicity. Default
|
|
is (0, 0, 0, 0). D and s are always integers, while P and Q
|
|
may either be integers or lists of positive integers. May not be used
|
|
in combination with the arguments `seasonal_ar_order`, `seasonal_diff`,
|
|
or `seasonal_ma_order`.
|
|
ar_order : int or list of int
|
|
The autoregressive order of the model. May be an integer, in which case
|
|
all autoregressive lags up to and including it will be included.
|
|
Alternatively, may be a list of integers specifying which lag orders
|
|
are included. May not be used in combination with `order`.
|
|
diff : int
|
|
The order of integration of the model. May not be used in combination
|
|
with `order`.
|
|
ma_order : int or list of int
|
|
The moving average order of the model. May be an integer or
|
|
list of integers. See the documentation for `ar_order` for details.
|
|
May not be used in combination with `order`.
|
|
seasonal_ar_order : int or list of int
|
|
The seasonal autoregressive order of the model. May be an integer or
|
|
list of integers. See the documentation for `ar_order` for examples.
|
|
Note that if `seasonal_periods = 4` and `seasonal_ar_order = 2`, then
|
|
this implies that the overall model will include lags 4 and 8.
|
|
May not be used in combination with `seasonal_order`.
|
|
seasonal_diff : int
|
|
The order of seasonal integration of the model. May not be used in
|
|
combination with `seasonal_order`.
|
|
seasonal_ma_order : int or list of int
|
|
The moving average order of the model. May be an integer or
|
|
list of integers. See the documentation for `ar_order` and
|
|
`seasonal_ar_order` for additional details. May not be used in
|
|
combination with `seasonal_order`.
|
|
seasonal_periods : int
|
|
Number of periods in a season. May not be used in combination with
|
|
`seasonal_order`.
|
|
enforce_stationarity : bool, optional
|
|
Whether or not to require the autoregressive parameters to correspond
|
|
to a stationarity process. This is only possible in estimation by
|
|
numerical maximum likelihood.
|
|
enforce_invertibility : bool, optional
|
|
Whether or not to require the moving average parameters to correspond
|
|
to an invertible process. This is only possible in estimation by
|
|
numerical maximum likelihood.
|
|
concentrate_scale : bool, optional
|
|
Whether or not to concentrate the scale (variance of the error term)
|
|
out of the likelihood. This reduces the number of parameters by one.
|
|
This is only applicable when considering estimation by numerical
|
|
maximum likelihood.
|
|
dates : array_like of datetime, optional
|
|
If no index is given by `endog` or `exog`, an array-like object of
|
|
datetime objects can be provided.
|
|
freq : str, optional
|
|
If no index is given by `endog` or `exog`, the frequency of the
|
|
time-series may be specified here as a Pandas offset or offset string.
|
|
missing : str
|
|
Available options are 'none', 'drop', and 'raise'. If 'none', no nan
|
|
checking is done. If 'drop', any observations with nans are dropped.
|
|
If 'raise', an error is raised. Default is 'none'.
|
|
|
|
Attributes
|
|
----------
|
|
order : tuple, optional
|
|
The (p,d,q) order of the model for the autoregressive, differences, and
|
|
moving average components. d is always an integer, while p and q may
|
|
either be integers or lists of integers.
|
|
seasonal_order : tuple, optional
|
|
The (P,D,Q,s) order of the seasonal component of the model for the
|
|
AR parameters, differences, MA parameters, and periodicity. Default
|
|
is (0, 0, 0, 0). D and s are always integers, while P and Q
|
|
may either be integers or lists of positive integers.
|
|
ar_order : int or list of int
|
|
The autoregressive order of the model. May be an integer, in which case
|
|
all autoregressive lags up to and including it will be included. For
|
|
example, if `ar_order = 3`, then the model will include lags 1, 2,
|
|
and 3. Alternatively, may be a list of integers specifying exactly
|
|
which lag orders are included. For example, if `ar_order = [1, 3]`,
|
|
then the model will include lags 1 and 3 but will exclude lag 2.
|
|
diff : int
|
|
The order of integration of the model.
|
|
ma_order : int or list of int
|
|
The moving average order of the model. May be an integer or
|
|
list of integers. See the documentation for `ar_order` for examples.
|
|
seasonal_ar_order : int or list of int
|
|
The seasonal autoregressive order of the model. May be an integer or
|
|
list of integers. See the documentation for `ar_order` for examples.
|
|
Note that if `seasonal_periods = 4` and `seasonal_ar_order = 2`, then
|
|
this implies that the overall model will include lags 4 and 8.
|
|
seasonal_diff : int
|
|
The order of seasonal integration of the model.
|
|
seasonal_ma_order : int or list of int
|
|
The moving average order of the model. May be an integer or
|
|
list of integers. See the documentation for `ar_order` and
|
|
`seasonal_ar_order` for additional details.
|
|
seasonal_periods : int
|
|
Number of periods in a season.
|
|
trend : str{'n','c','t','ct'} or iterable, optional
|
|
Parameter controlling the deterministic trend polynomial :math:`A(t)`.
|
|
Can be specified as a string where 'c' indicates a constant (i.e. a
|
|
degree zero component of the trend polynomial), 't' indicates a
|
|
linear trend with time, and 'ct' is both. Can also be specified as an
|
|
iterable defining the polynomial as in `numpy.poly1d`, where
|
|
`[1,1,0,1]` would denote :math:`a + bt + ct^3`. Default is to not
|
|
include a trend component.
|
|
ar_lags : list of int
|
|
List of included autoregressive lags. If `ar_order` is a list, then
|
|
`ar_lags == ar_order`. If `ar_lags = [1, 2]`, then the overall model
|
|
will include the 1st and 2nd autoregressive lags.
|
|
ma_lags : list of int
|
|
List of included moving average lags. If `ma_order` is a list, then
|
|
`ma_lags == ma_order`. If `ma_lags = [1, 2]`, then the overall model
|
|
will include the 1st and 2nd moving average lags.
|
|
seasonal_ar_lags : list of int
|
|
List of included seasonal autoregressive lags. If `seasonal_ar_order`
|
|
is a list, then `seasonal_ar_lags == seasonal_ar_order`. If
|
|
`seasonal_periods = 4` and `seasonal_ar_lags = [1, 2]`, then the
|
|
overall model will include the 4th and 8th autoregressive lags.
|
|
seasonal_ma_lags : list of int
|
|
List of included seasonal moving average lags. If `seasonal_ma_order`
|
|
is a list, then `seasonal_ma_lags == seasonal_ma_order`. See the
|
|
documentation to `seasonal_ar_lags` for examples.
|
|
max_ar_order : int
|
|
Largest included autoregressive lag.
|
|
max_ma_order : int
|
|
Largest included moving average lag.
|
|
max_seasonal_ar_order : int
|
|
Largest included seasonal autoregressive lag.
|
|
max_seasonal_ma_order : int
|
|
Largest included seasonal moving average lag.
|
|
max_reduced_ar_order : int
|
|
Largest lag in the reduced autoregressive polynomial. Equal to
|
|
`max_ar_order + max_seasonal_ar_order * seasonal_periods`.
|
|
max_reduced_ma_order : int
|
|
Largest lag in the reduced moving average polynomial. Equal to
|
|
`max_ma_order + max_seasonal_ma_order * seasonal_periods`.
|
|
enforce_stationarity : bool
|
|
Whether or not to transform the AR parameters to enforce stationarity
|
|
in the autoregressive component of the model. This is only possible
|
|
in estimation by numerical maximum likelihood.
|
|
enforce_invertibility : bool
|
|
Whether or not to transform the MA parameters to enforce invertibility
|
|
in the moving average component of the model. This is only possible
|
|
in estimation by numerical maximum likelihood.
|
|
concentrate_scale : bool
|
|
Whether or not to concentrate the variance (scale term) out of the
|
|
log-likelihood function. This is only applicable when considering
|
|
estimation by numerical maximum likelihood.
|
|
is_ar_consecutive
|
|
is_ma_consecutive
|
|
is_integrated
|
|
is_seasonal
|
|
k_exog_params
|
|
k_ar_params
|
|
k_ma_params
|
|
k_seasonal_ar_params
|
|
k_seasonal_ma_params
|
|
k_params
|
|
exog_names
|
|
ar_names
|
|
ma_names
|
|
seasonal_ar_names
|
|
seasonal_ma_names
|
|
param_names
|
|
|
|
Examples
|
|
--------
|
|
>>> SARIMAXSpecification(order=(1, 0, 2))
|
|
SARIMAXSpecification(endog=y, order=(1, 0, 2))
|
|
|
|
>>> spec = SARIMAXSpecification(ar_order=1, ma_order=2)
|
|
SARIMAXSpecification(endog=y, order=(1, 0, 2))
|
|
|
|
>>> spec = SARIMAXSpecification(ar_order=1, seasonal_order=(1, 0, 0, 4))
|
|
SARIMAXSpecification(endog=y, order=(1, 0, 0), seasonal_order=(1, 0, 0, 4))
|
|
"""
|
|
|
|
def __init__(self, endog=None, exog=None, order=None,
|
|
seasonal_order=None, ar_order=None, diff=None, ma_order=None,
|
|
seasonal_ar_order=None, seasonal_diff=None,
|
|
seasonal_ma_order=None, seasonal_periods=None, trend=None,
|
|
enforce_stationarity=None, enforce_invertibility=None,
|
|
concentrate_scale=None, trend_offset=1, dates=None, freq=None,
|
|
missing='none', validate_specification=True):
|
|
|
|
# Basic parameters
|
|
self.enforce_stationarity = enforce_stationarity
|
|
self.enforce_invertibility = enforce_invertibility
|
|
self.concentrate_scale = concentrate_scale
|
|
self.trend_offset = trend_offset
|
|
|
|
# Validate that we were not given conflicting specifications
|
|
has_order = order is not None
|
|
has_specific_order = (ar_order is not None or diff is not None or
|
|
ma_order is not None)
|
|
has_seasonal_order = seasonal_order is not None
|
|
has_specific_seasonal_order = (seasonal_ar_order is not None or
|
|
seasonal_diff is not None or
|
|
seasonal_ma_order is not None or
|
|
seasonal_periods is not None)
|
|
if has_order and has_specific_order:
|
|
raise ValueError('Cannot specify both `order` and either of'
|
|
' `ar_order` or `ma_order`.')
|
|
if has_seasonal_order and has_specific_seasonal_order:
|
|
raise ValueError('Cannot specify both `seasonal_order` and any of'
|
|
' `seasonal_ar_order`, `seasonal_ma_order`,'
|
|
' or `seasonal_periods`.')
|
|
|
|
# Compute `order`
|
|
if has_specific_order:
|
|
ar_order = 0 if ar_order is None else ar_order
|
|
diff = 0 if diff is None else diff
|
|
ma_order = 0 if ma_order is None else ma_order
|
|
order = (ar_order, diff, ma_order)
|
|
elif not has_order:
|
|
order = (0, 0, 0)
|
|
|
|
# Compute `seasonal_order`
|
|
if has_specific_seasonal_order:
|
|
seasonal_ar_order = (
|
|
0 if seasonal_ar_order is None else seasonal_ar_order)
|
|
seasonal_diff = 0 if seasonal_diff is None else seasonal_diff
|
|
seasonal_ma_order = (
|
|
0 if seasonal_ma_order is None else seasonal_ma_order)
|
|
seasonal_periods = (
|
|
0 if seasonal_periods is None else seasonal_periods)
|
|
seasonal_order = (seasonal_ar_order, seasonal_diff,
|
|
seasonal_ma_order, seasonal_periods)
|
|
elif not has_seasonal_order:
|
|
seasonal_order = (0, 0, 0, 0)
|
|
|
|
# Validate shapes of `order`, `seasonal_order`
|
|
if len(order) != 3:
|
|
raise ValueError('`order` argument must be an iterable with three'
|
|
' elements.')
|
|
if len(seasonal_order) != 4:
|
|
raise ValueError('`seasonal_order` argument must be an iterable'
|
|
' with four elements.')
|
|
|
|
# Validate differencing parameters
|
|
if validate_specification:
|
|
if order[1] < 0:
|
|
raise ValueError('Cannot specify negative differencing.')
|
|
if order[1] != int(order[1]):
|
|
raise ValueError('Cannot specify fractional differencing.')
|
|
if seasonal_order[1] < 0:
|
|
raise ValueError('Cannot specify negative seasonal'
|
|
' differencing.')
|
|
if seasonal_order[1] != int(seasonal_order[1]):
|
|
raise ValueError('Cannot specify fractional seasonal'
|
|
' differencing.')
|
|
if seasonal_order[3] < 0:
|
|
raise ValueError('Cannot specify negative seasonal'
|
|
' periodicity.')
|
|
|
|
# Standardize to integers or lists of integers
|
|
order = (
|
|
standardize_lag_order(order[0], 'AR'),
|
|
int(order[1]),
|
|
standardize_lag_order(order[2], 'MA'))
|
|
seasonal_order = (
|
|
standardize_lag_order(seasonal_order[0], 'seasonal AR'),
|
|
int(seasonal_order[1]),
|
|
standardize_lag_order(seasonal_order[2], 'seasonal MA'),
|
|
int(seasonal_order[3]))
|
|
|
|
# Validate seasonals
|
|
if validate_specification:
|
|
if seasonal_order[3] == 1:
|
|
raise ValueError('Seasonal periodicity must be greater'
|
|
' than 1.')
|
|
if ((seasonal_order[0] != 0 or seasonal_order[1] != 0 or
|
|
seasonal_order[2] != 0) and seasonal_order[3] == 0):
|
|
raise ValueError('Must include nonzero seasonal periodicity if'
|
|
' including seasonal AR, MA, or'
|
|
' differencing.')
|
|
|
|
# Basic order
|
|
self.order = order
|
|
self.ar_order, self.diff, self.ma_order = order
|
|
|
|
self.seasonal_order = seasonal_order
|
|
(self.seasonal_ar_order, self.seasonal_diff, self.seasonal_ma_order,
|
|
self.seasonal_periods) = seasonal_order
|
|
|
|
# Lists of included lags
|
|
if isinstance(self.ar_order, list):
|
|
self.ar_lags = self.ar_order
|
|
else:
|
|
self.ar_lags = np.arange(1, self.ar_order + 1).tolist()
|
|
if isinstance(self.ma_order, list):
|
|
self.ma_lags = self.ma_order
|
|
else:
|
|
self.ma_lags = np.arange(1, self.ma_order + 1).tolist()
|
|
|
|
if isinstance(self.seasonal_ar_order, list):
|
|
self.seasonal_ar_lags = self.seasonal_ar_order
|
|
else:
|
|
self.seasonal_ar_lags = (
|
|
np.arange(1, self.seasonal_ar_order + 1).tolist())
|
|
if isinstance(self.seasonal_ma_order, list):
|
|
self.seasonal_ma_lags = self.seasonal_ma_order
|
|
else:
|
|
self.seasonal_ma_lags = (
|
|
np.arange(1, self.seasonal_ma_order + 1).tolist())
|
|
|
|
# Maximum lag orders
|
|
self.max_ar_order = self.ar_lags[-1] if self.ar_lags else 0
|
|
self.max_ma_order = self.ma_lags[-1] if self.ma_lags else 0
|
|
|
|
self.max_seasonal_ar_order = (
|
|
self.seasonal_ar_lags[-1] if self.seasonal_ar_lags else 0)
|
|
self.max_seasonal_ma_order = (
|
|
self.seasonal_ma_lags[-1] if self.seasonal_ma_lags else 0)
|
|
|
|
self.max_reduced_ar_order = (
|
|
self.max_ar_order +
|
|
self.max_seasonal_ar_order * self.seasonal_periods)
|
|
self.max_reduced_ma_order = (
|
|
self.max_ma_order +
|
|
self.max_seasonal_ma_order * self.seasonal_periods)
|
|
|
|
# Check that we don't have duplicate AR or MA lags from the seasonal
|
|
# component
|
|
ar_lags = set(self.ar_lags)
|
|
seasonal_ar_lags = set(np.array(self.seasonal_ar_lags)
|
|
* self.seasonal_periods)
|
|
duplicate_ar_lags = ar_lags.intersection(seasonal_ar_lags)
|
|
if validate_specification and len(duplicate_ar_lags) > 0:
|
|
raise ValueError('Invalid model: autoregressive lag(s) %s are'
|
|
' in both the seasonal and non-seasonal'
|
|
' autoregressive components.'
|
|
% duplicate_ar_lags)
|
|
|
|
ma_lags = set(self.ma_lags)
|
|
seasonal_ma_lags = set(np.array(self.seasonal_ma_lags)
|
|
* self.seasonal_periods)
|
|
duplicate_ma_lags = ma_lags.intersection(seasonal_ma_lags)
|
|
if validate_specification and len(duplicate_ma_lags) > 0:
|
|
raise ValueError('Invalid model: moving average lag(s) %s are'
|
|
' in both the seasonal and non-seasonal'
|
|
' moving average components.'
|
|
% duplicate_ma_lags)
|
|
|
|
# Handle trend
|
|
self.trend = trend
|
|
self.trend_poly, _ = prepare_trend_spec(trend)
|
|
|
|
# Check for a constant column in the provided exog
|
|
exog_is_pandas = _is_using_pandas(exog, None)
|
|
if (validate_specification and exog is not None and
|
|
len(self.trend_poly) > 0 and self.trend_poly[0] == 1):
|
|
# Figure out if we have any constant columns
|
|
x = np.asanyarray(exog)
|
|
ptp0 = np.ptp(x, axis=0)
|
|
col_is_const = ptp0 == 0
|
|
nz_const = col_is_const & (x[0] != 0)
|
|
col_const = nz_const
|
|
|
|
# If we already have a constant column, raise an error
|
|
if np.any(col_const):
|
|
raise ValueError('A constant trend was included in the model'
|
|
' specification, but the `exog` data already'
|
|
' contains a column of constants.')
|
|
|
|
# This contains the included exponents of the trend polynomial,
|
|
# where e.g. the constant term has exponent 0, a linear trend has
|
|
# exponent 1, etc.
|
|
self.trend_terms = np.where(self.trend_poly == 1)[0]
|
|
# Trend order is either the degree of the trend polynomial, if all
|
|
# exponents are included, or a list of included exponents. Here we need
|
|
# to make a distinction between a degree zero polynomial (i.e. a
|
|
# constant) and the zero polynomial (i.e. not even a constant). The
|
|
# former has `trend_order = 0`, while the latter has
|
|
# `trend_order = None`.
|
|
self.k_trend = len(self.trend_terms)
|
|
if len(self.trend_terms) == 0:
|
|
self.trend_order = None
|
|
self.trend_degree = None
|
|
elif np.all(self.trend_terms == np.arange(len(self.trend_terms))):
|
|
self.trend_order = self.trend_terms[-1]
|
|
self.trend_degree = self.trend_terms[-1]
|
|
else:
|
|
self.trend_order = self.trend_terms
|
|
self.trend_degree = self.trend_terms[-1]
|
|
|
|
# Handle endog / exog
|
|
# Standardize exog
|
|
self.k_exog, exog = prepare_exog(exog)
|
|
|
|
# Standardize endog (including creating a faux endog if necessary)
|
|
faux_endog = endog is None
|
|
if endog is None:
|
|
endog = [] if exog is None else np.zeros(len(exog)) * np.nan
|
|
|
|
# Add trend data into exog
|
|
nobs = len(endog) if exog is None else len(exog)
|
|
if self.trend_order is not None:
|
|
# Add in the data
|
|
trend_data = self.construct_trend_data(nobs, trend_offset)
|
|
if exog is None:
|
|
exog = trend_data
|
|
elif exog_is_pandas:
|
|
trend_data = pd.DataFrame(trend_data, index=exog.index,
|
|
columns=self.construct_trend_names())
|
|
exog = pd.concat([trend_data, exog], axis=1)
|
|
else:
|
|
exog = np.c_[trend_data, exog]
|
|
|
|
# Create an underlying time series model, to handle endog / exog,
|
|
# especially validating shapes, retrieving names, and potentially
|
|
# providing us with a time series index
|
|
self._model = TimeSeriesModel(endog, exog=exog, dates=dates, freq=freq,
|
|
missing=missing)
|
|
self.endog = None if faux_endog else self._model.endog
|
|
self.exog = self._model.exog
|
|
|
|
# Validate endog shape
|
|
if (validate_specification and not faux_endog and
|
|
self.endog.ndim > 1 and self.endog.shape[1] > 1):
|
|
raise ValueError('SARIMAX models require univariate `endog`. Got'
|
|
' shape %s.' % str(self.endog.shape))
|
|
|
|
self._has_missing = (
|
|
None if faux_endog else np.any(np.isnan(self.endog)))
|
|
|
|
@property
|
|
def is_ar_consecutive(self):
|
|
"""
|
|
(bool) Is autoregressive lag polynomial consecutive.
|
|
|
|
I.e. does it include all lags up to and including the maximum lag.
|
|
"""
|
|
return (self.max_seasonal_ar_order == 0 and
|
|
not isinstance(self.ar_order, list))
|
|
|
|
@property
|
|
def is_ma_consecutive(self):
|
|
"""
|
|
(bool) Is moving average lag polynomial consecutive.
|
|
|
|
I.e. does it include all lags up to and including the maximum lag.
|
|
"""
|
|
return (self.max_seasonal_ma_order == 0 and
|
|
not isinstance(self.ma_order, list))
|
|
|
|
@property
|
|
def is_integrated(self):
|
|
"""
|
|
(bool) Is the model integrated.
|
|
|
|
I.e. does it have a nonzero `diff` or `seasonal_diff`.
|
|
"""
|
|
return self.diff > 0 or self.seasonal_diff > 0
|
|
|
|
@property
|
|
def is_seasonal(self):
|
|
"""(bool) Does the model include a seasonal component."""
|
|
return self.seasonal_periods != 0
|
|
|
|
@property
|
|
def k_exog_params(self):
|
|
"""(int) Number of parameters associated with exogenous variables."""
|
|
return len(self.exog_names)
|
|
|
|
@property
|
|
def k_ar_params(self):
|
|
"""(int) Number of autoregressive (non-seasonal) parameters."""
|
|
return len(self.ar_lags)
|
|
|
|
@property
|
|
def k_ma_params(self):
|
|
"""(int) Number of moving average (non-seasonal) parameters."""
|
|
return len(self.ma_lags)
|
|
|
|
@property
|
|
def k_seasonal_ar_params(self):
|
|
"""(int) Number of seasonal autoregressive parameters."""
|
|
return len(self.seasonal_ar_lags)
|
|
|
|
@property
|
|
def k_seasonal_ma_params(self):
|
|
"""(int) Number of seasonal moving average parameters."""
|
|
return len(self.seasonal_ma_lags)
|
|
|
|
@property
|
|
def k_params(self):
|
|
"""(int) Total number of model parameters."""
|
|
k_params = (self.k_exog_params + self.k_ar_params + self.k_ma_params +
|
|
self.k_seasonal_ar_params + self.k_seasonal_ma_params)
|
|
if not self.concentrate_scale:
|
|
k_params += 1
|
|
return k_params
|
|
|
|
@property
|
|
def exog_names(self):
|
|
"""(list of str) Names associated with exogenous parameters."""
|
|
exog_names = self._model.exog_names
|
|
return [] if exog_names is None else exog_names
|
|
|
|
@property
|
|
def ar_names(self):
|
|
"""(list of str) Names of (non-seasonal) autoregressive parameters."""
|
|
return ['ar.L%d' % i for i in self.ar_lags]
|
|
|
|
@property
|
|
def ma_names(self):
|
|
"""(list of str) Names of (non-seasonal) moving average parameters."""
|
|
return ['ma.L%d' % i for i in self.ma_lags]
|
|
|
|
@property
|
|
def seasonal_ar_names(self):
|
|
"""(list of str) Names of seasonal autoregressive parameters."""
|
|
s = self.seasonal_periods
|
|
return ['ar.S.L%d' % (i * s) for i in self.seasonal_ar_lags]
|
|
|
|
@property
|
|
def seasonal_ma_names(self):
|
|
"""(list of str) Names of seasonal moving average parameters."""
|
|
s = self.seasonal_periods
|
|
return ['ma.S.L%d' % (i * s) for i in self.seasonal_ma_lags]
|
|
|
|
@property
|
|
def param_names(self):
|
|
"""(list of str) Names of all model parameters."""
|
|
names = (self.exog_names + self.ar_names + self.ma_names +
|
|
self.seasonal_ar_names + self.seasonal_ma_names)
|
|
if not self.concentrate_scale:
|
|
names.append('sigma2')
|
|
return names
|
|
|
|
@property
|
|
def valid_estimators(self):
|
|
"""
|
|
(list of str) Estimators that could be used with specification.
|
|
|
|
Note: does not consider the presense of `exog` in determining valid
|
|
estimators. If there are exogenous variables, then feasible Generalized
|
|
Least Squares should be used through the `gls` estimator, and the
|
|
`valid_estimators` are the estimators that could be passed as the
|
|
`arma_estimator` argument to `gls`.
|
|
"""
|
|
estimators = {'yule_walker', 'burg', 'innovations',
|
|
'hannan_rissanen', 'innovations_mle', 'statespace'}
|
|
|
|
# Properties
|
|
has_ar = self.max_ar_order != 0
|
|
has_ma = self.max_ma_order != 0
|
|
has_seasonal = self.seasonal_periods != 0
|
|
|
|
# Only state space can handle missing data or concentrated scale
|
|
if self._has_missing:
|
|
estimators.intersection_update(['statespace'])
|
|
|
|
# Only numerical MLE estimators can enforce restrictions
|
|
if ((self.enforce_stationarity and self.max_ar_order > 0) or
|
|
(self.enforce_invertibility and self.max_ma_order > 0)):
|
|
estimators.intersection_update(['innovations_mle', 'statespace'])
|
|
|
|
# Innovations: no AR, non-consecutive MA, seasonal
|
|
if has_ar or not self.is_ma_consecutive or has_seasonal:
|
|
estimators.discard('innovations')
|
|
# Yule-Walker/Burg: no MA, non-consecutive AR, seasonal
|
|
if has_ma or not self.is_ar_consecutive or has_seasonal:
|
|
estimators.discard('yule_walker')
|
|
estimators.discard('burg')
|
|
# Hannan-Rissanen: no seasonal
|
|
if has_seasonal:
|
|
estimators.discard('hannan_rissanen')
|
|
# Innovations MLE: cannot have enforce_stationary=False or
|
|
# concentratre_scale=True
|
|
if self.enforce_stationarity is False or self.concentrate_scale:
|
|
estimators.discard('innovations_mle')
|
|
|
|
return estimators
|
|
|
|
def validate_estimator(self, estimator):
|
|
"""
|
|
Validate an SARIMA estimator.
|
|
|
|
Parameters
|
|
----------
|
|
estimator : str
|
|
Name of the estimator to validate against the current state of
|
|
the specification. Possible values are: 'yule_walker', 'burg',
|
|
'innovations', 'hannan_rissanen', 'innovoations_mle', 'statespace'.
|
|
|
|
Notes
|
|
-----
|
|
This method will raise a `ValueError` if an invalid method is passed,
|
|
and otherwise will return None.
|
|
|
|
This method does not consider the presense of `exog` in determining
|
|
valid estimators. If there are exogenous variables, then feasible
|
|
Generalized Least Squares should be used through the `gls` estimator,
|
|
and a "valid" estimator is one that could be passed as the
|
|
`arma_estimator` argument to `gls`.
|
|
|
|
This method only uses the attributes `enforce_stationarity` and
|
|
`concentrate_scale` to determine the validity of numerical maximum
|
|
likelihood estimators. These only include 'innovations_mle' (which
|
|
does not support `enforce_stationarity=False` or
|
|
`concentrate_scale=True`) and 'statespace' (which supports all
|
|
combinations of each).
|
|
|
|
Examples
|
|
--------
|
|
>>> spec = SARIMAXSpecification(order=(1, 0, 2))
|
|
|
|
>>> spec.validate_estimator('yule_walker')
|
|
ValueError: Yule-Walker estimator does not support moving average
|
|
components.
|
|
|
|
>>> spec.validate_estimator('burg')
|
|
ValueError: Burg estimator does not support moving average components.
|
|
|
|
>>> spec.validate_estimator('innovations')
|
|
ValueError: Burg estimator does not support autoregressive components.
|
|
|
|
>>> spec.validate_estimator('hannan_rissanen') # returns None
|
|
>>> spec.validate_estimator('innovations_mle') # returns None
|
|
>>> spec.validate_estimator('statespace') # returns None
|
|
|
|
>>> spec.validate_estimator('not_an_estimator')
|
|
ValueError: "not_an_estimator" is not a valid estimator.
|
|
"""
|
|
has_ar = self.max_ar_order != 0
|
|
has_ma = self.max_ma_order != 0
|
|
has_seasonal = self.seasonal_periods != 0
|
|
has_missing = self._has_missing
|
|
|
|
titles = {
|
|
'yule_walker': 'Yule-Walker',
|
|
'burg': 'Burg',
|
|
'innovations': 'Innovations',
|
|
'hannan_rissanen': 'Hannan-Rissanen',
|
|
'innovations_mle': 'Innovations MLE',
|
|
'statespace': 'State space'
|
|
}
|
|
|
|
# Only state space form can support missing data
|
|
if estimator != 'statespace':
|
|
if has_missing:
|
|
raise ValueError('%s estimator does not support missing'
|
|
' values in `endog`.' % titles[estimator])
|
|
|
|
# Only state space and innovations MLE can enforce parameter
|
|
# restrictions
|
|
if estimator not in ['innovations_mle', 'statespace']:
|
|
if self.max_ar_order > 0 and self.enforce_stationarity:
|
|
raise ValueError('%s estimator cannot enforce a stationary'
|
|
' autoregressive lag polynomial.'
|
|
% titles[estimator])
|
|
if self.max_ma_order > 0 and self.enforce_invertibility:
|
|
raise ValueError('%s estimator cannot enforce an invertible'
|
|
' moving average lag polynomial.'
|
|
% titles[estimator])
|
|
|
|
# Now go through specific disqualifications for each estimator
|
|
if estimator in ['yule_walker', 'burg']:
|
|
if has_seasonal:
|
|
raise ValueError('%s estimator does not support seasonal'
|
|
' components.' % titles[estimator])
|
|
if not self.is_ar_consecutive:
|
|
raise ValueError('%s estimator does not support'
|
|
' non-consecutive autoregressive lags.'
|
|
% titles[estimator])
|
|
if has_ma:
|
|
raise ValueError('%s estimator does not support moving average'
|
|
' components.' % titles[estimator])
|
|
elif estimator == 'innovations':
|
|
if has_seasonal:
|
|
raise ValueError('Innovations estimator does not support'
|
|
' seasonal components.')
|
|
if not self.is_ma_consecutive:
|
|
raise ValueError('Innovations estimator does not support'
|
|
' non-consecutive moving average lags.')
|
|
if has_ar:
|
|
raise ValueError('Innovations estimator does not support'
|
|
' autoregressive components.')
|
|
elif estimator == 'hannan_rissanen':
|
|
if has_seasonal:
|
|
raise ValueError('Hannan-Rissanen estimator does not support'
|
|
' seasonal components.')
|
|
elif estimator == 'innovations_mle':
|
|
if self.enforce_stationarity is False:
|
|
raise ValueError('Innovations MLE estimator does not support'
|
|
' non-stationary autoregressive components,'
|
|
' but `enforce_stationarity` is set to False')
|
|
if self.concentrate_scale:
|
|
raise ValueError('Innovations MLE estimator does not support'
|
|
' concentrating the scale out of the'
|
|
' log-likelihood function')
|
|
elif estimator == 'statespace':
|
|
# State space form supports all variations of SARIMAX.
|
|
pass
|
|
else:
|
|
raise ValueError('"%s" is not a valid estimator.' % estimator)
|
|
|
|
def split_params(self, params, allow_infnan=False):
|
|
"""
|
|
Split parameter array by type into dictionary.
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
Array of model parameters.
|
|
allow_infnan : bool, optional
|
|
Whether or not to allow `params` to contain -np.inf, np.inf, and
|
|
np.nan. Default is False.
|
|
|
|
Returns
|
|
-------
|
|
split_params : dict
|
|
Dictionary with keys 'exog_params', 'ar_params', 'ma_params',
|
|
'seasonal_ar_params', 'seasonal_ma_params', and (unless
|
|
`concentrate_scale=True`) 'sigma2'. Values are the parameters
|
|
associated with the key, based on the `params` argument.
|
|
|
|
Examples
|
|
--------
|
|
>>> spec = SARIMAXSpecification(ar_order=1)
|
|
>>> spec.split_params([0.5, 4])
|
|
{'exog_params': array([], dtype=float64),
|
|
'ar_params': array([0.5]),
|
|
'ma_params': array([], dtype=float64),
|
|
'seasonal_ar_params': array([], dtype=float64),
|
|
'seasonal_ma_params': array([], dtype=float64),
|
|
'sigma2': 4.0}
|
|
"""
|
|
params = validate_basic(params, self.k_params,
|
|
allow_infnan=allow_infnan,
|
|
title='joint parameters')
|
|
|
|
ix = [self.k_exog_params, self.k_ar_params, self.k_ma_params,
|
|
self.k_seasonal_ar_params, self.k_seasonal_ma_params]
|
|
names = ['exog_params', 'ar_params', 'ma_params',
|
|
'seasonal_ar_params', 'seasonal_ma_params']
|
|
if not self.concentrate_scale:
|
|
ix.append(1)
|
|
names.append('sigma2')
|
|
ix = np.cumsum(ix)
|
|
|
|
out = dict(zip(names, np.split(params, ix)))
|
|
if 'sigma2' in out:
|
|
out['sigma2'] = out['sigma2'].item()
|
|
|
|
return out
|
|
|
|
def join_params(self, exog_params=None, ar_params=None, ma_params=None,
|
|
seasonal_ar_params=None, seasonal_ma_params=None,
|
|
sigma2=None):
|
|
"""
|
|
Join parameters into a single vector.
|
|
|
|
Parameters
|
|
----------
|
|
exog_params : array_like, optional
|
|
Parameters associated with exogenous regressors. Required if
|
|
`exog` is part of specification.
|
|
ar_params : array_like, optional
|
|
Parameters associated with (non-seasonal) autoregressive component.
|
|
Required if this component is part of the specification.
|
|
ma_params : array_like, optional
|
|
Parameters associated with (non-seasonal) moving average component.
|
|
Required if this component is part of the specification.
|
|
seasonal_ar_params : array_like, optional
|
|
Parameters associated with seasonal autoregressive component.
|
|
Required if this component is part of the specification.
|
|
seasonal_ma_params : array_like, optional
|
|
Parameters associated with seasonal moving average component.
|
|
Required if this component is part of the specification.
|
|
sigma2 : array_like, optional
|
|
Innovation variance parameter. Required unless
|
|
`concentrated_scale=True`.
|
|
|
|
Returns
|
|
-------
|
|
params : ndarray
|
|
Array of parameters.
|
|
|
|
Examples
|
|
--------
|
|
>>> spec = SARIMAXSpecification(ar_order=1)
|
|
>>> spec.join_params(ar_params=0.5, sigma2=4)
|
|
array([0.5, 4. ])
|
|
"""
|
|
definitions = [
|
|
('exogenous variables', self.k_exog_params, exog_params),
|
|
('AR terms', self.k_ar_params, ar_params),
|
|
('MA terms', self.k_ma_params, ma_params),
|
|
('seasonal AR terms', self.k_seasonal_ar_params,
|
|
seasonal_ar_params),
|
|
('seasonal MA terms', self.k_seasonal_ma_params,
|
|
seasonal_ma_params),
|
|
('variance', int(not self.concentrate_scale), sigma2)]
|
|
|
|
params_list = []
|
|
for title, k, params in definitions:
|
|
if k > 0:
|
|
# Validate
|
|
if params is None:
|
|
raise ValueError('Specification includes %s, but no'
|
|
' parameters were provided.' % title)
|
|
params = np.atleast_1d(np.squeeze(params))
|
|
if not params.shape == (k,):
|
|
raise ValueError('Specification included %d %s, but'
|
|
' parameters with shape %s were provided.'
|
|
% (k, title, params.shape))
|
|
|
|
# Otherwise add to the list
|
|
params_list.append(params)
|
|
|
|
return np.concatenate(params_list)
|
|
|
|
def validate_params(self, params):
|
|
"""
|
|
Validate parameter vector by raising ValueError on invalid values.
|
|
|
|
Parameters
|
|
----------
|
|
params : array_like
|
|
Array of model parameters.
|
|
|
|
Notes
|
|
-----
|
|
Primarily checks that the parameters have the right shape and are not
|
|
NaN or infinite. Also checks if parameters are consistent with a
|
|
stationary process if `enforce_stationarity=True` and that they are
|
|
consistent with an invertible process if `enforce_invertibility=True`.
|
|
Finally, checks that the variance term is positive, unless
|
|
`concentrate_scale=True`.
|
|
|
|
Examples
|
|
--------
|
|
>>> spec = SARIMAXSpecification(ar_order=1)
|
|
>>> spec.validate_params([-0.5, 4.]) # returns None
|
|
>>> spec.validate_params([-0.5, -2])
|
|
ValueError: Non-positive variance term.
|
|
>>> spec.validate_params([-1.5, 4.])
|
|
ValueError: Non-stationary autoregressive polynomial.
|
|
"""
|
|
# Note: split_params includes basic validation
|
|
params = self.split_params(params)
|
|
|
|
# Specific checks
|
|
if self.enforce_stationarity:
|
|
if self.k_ar_params:
|
|
ar_poly = np.r_[1, -params['ar_params']]
|
|
if not is_invertible(ar_poly):
|
|
raise ValueError('Non-stationary autoregressive'
|
|
' polynomial.')
|
|
if self.k_seasonal_ar_params:
|
|
seasonal_ar_poly = np.r_[1, -params['seasonal_ar_params']]
|
|
if not is_invertible(seasonal_ar_poly):
|
|
raise ValueError('Non-stationary seasonal autoregressive'
|
|
' polynomial.')
|
|
|
|
if self.enforce_invertibility:
|
|
if self.k_ma_params:
|
|
ma_poly = np.r_[1, params['ma_params']]
|
|
if not is_invertible(ma_poly):
|
|
raise ValueError('Non-invertible moving average'
|
|
' polynomial.')
|
|
if self.k_seasonal_ma_params:
|
|
seasonal_ma_poly = np.r_[1, params['seasonal_ma_params']]
|
|
if not is_invertible(seasonal_ma_poly):
|
|
raise ValueError('Non-invertible seasonal moving average'
|
|
' polynomial.')
|
|
|
|
if not self.concentrate_scale:
|
|
if params['sigma2'] <= 0:
|
|
raise ValueError('Non-positive variance term.')
|
|
|
|
def constrain_params(self, unconstrained):
|
|
"""
|
|
Constrain parameter values to be valid through transformations.
|
|
|
|
Parameters
|
|
----------
|
|
unconstrained : array_like
|
|
Array of model unconstrained parameters.
|
|
|
|
Returns
|
|
-------
|
|
constrained : ndarray
|
|
Array of model parameters transformed to produce a valid model.
|
|
|
|
Notes
|
|
-----
|
|
This is usually only used when performing numerical minimization
|
|
of the log-likelihood function. This function is necessary because
|
|
the minimizers consider values over the entire real space, while
|
|
SARIMAX models require parameters in subspaces (for example positive
|
|
variances).
|
|
|
|
Examples
|
|
--------
|
|
>>> spec = SARIMAXSpecification(ar_order=1)
|
|
>>> spec.constrain_params([10, -2])
|
|
array([-0.99504, 4. ])
|
|
"""
|
|
unconstrained = self.split_params(unconstrained)
|
|
params = {}
|
|
|
|
if self.k_exog_params:
|
|
params['exog_params'] = unconstrained['exog_params']
|
|
if self.k_ar_params:
|
|
if self.enforce_stationarity:
|
|
params['ar_params'] = constrain(unconstrained['ar_params'])
|
|
else:
|
|
params['ar_params'] = unconstrained['ar_params']
|
|
if self.k_ma_params:
|
|
if self.enforce_invertibility:
|
|
params['ma_params'] = -constrain(unconstrained['ma_params'])
|
|
else:
|
|
params['ma_params'] = unconstrained['ma_params']
|
|
if self.k_seasonal_ar_params:
|
|
if self.enforce_stationarity:
|
|
params['seasonal_ar_params'] = (
|
|
constrain(unconstrained['seasonal_ar_params']))
|
|
else:
|
|
params['seasonal_ar_params'] = (
|
|
unconstrained['seasonal_ar_params'])
|
|
if self.k_seasonal_ma_params:
|
|
if self.enforce_invertibility:
|
|
params['seasonal_ma_params'] = (
|
|
-constrain(unconstrained['seasonal_ma_params']))
|
|
else:
|
|
params['seasonal_ma_params'] = (
|
|
unconstrained['seasonal_ma_params'])
|
|
if not self.concentrate_scale:
|
|
params['sigma2'] = unconstrained['sigma2']**2
|
|
|
|
return self.join_params(**params)
|
|
|
|
def unconstrain_params(self, constrained):
|
|
"""
|
|
Reverse transformations used to constrain parameter values to be valid.
|
|
|
|
Parameters
|
|
----------
|
|
constrained : array_like
|
|
Array of model parameters.
|
|
|
|
Returns
|
|
-------
|
|
unconstrained : ndarray
|
|
Array of parameters with constraining transformions reversed.
|
|
|
|
Notes
|
|
-----
|
|
This is usually only used when performing numerical minimization
|
|
of the log-likelihood function. This function is the (approximate)
|
|
inverse of `constrain_params`.
|
|
|
|
Examples
|
|
--------
|
|
>>> spec = SARIMAXSpecification(ar_order=1)
|
|
>>> spec.unconstrain_params([-0.5, 4.])
|
|
array([0.57735, 2. ])
|
|
"""
|
|
constrained = self.split_params(constrained)
|
|
params = {}
|
|
|
|
if self.k_exog_params:
|
|
params['exog_params'] = constrained['exog_params']
|
|
if self.k_ar_params:
|
|
if self.enforce_stationarity:
|
|
params['ar_params'] = unconstrain(constrained['ar_params'])
|
|
else:
|
|
params['ar_params'] = constrained['ar_params']
|
|
if self.k_ma_params:
|
|
if self.enforce_invertibility:
|
|
params['ma_params'] = unconstrain(-constrained['ma_params'])
|
|
else:
|
|
params['ma_params'] = constrained['ma_params']
|
|
if self.k_seasonal_ar_params:
|
|
if self.enforce_stationarity:
|
|
params['seasonal_ar_params'] = (
|
|
unconstrain(constrained['seasonal_ar_params']))
|
|
else:
|
|
params['seasonal_ar_params'] = (
|
|
constrained['seasonal_ar_params'])
|
|
if self.k_seasonal_ma_params:
|
|
if self.enforce_invertibility:
|
|
params['seasonal_ma_params'] = (
|
|
unconstrain(-constrained['seasonal_ma_params']))
|
|
else:
|
|
params['seasonal_ma_params'] = (
|
|
constrained['seasonal_ma_params'])
|
|
if not self.concentrate_scale:
|
|
params['sigma2'] = constrained['sigma2']**0.5
|
|
|
|
return self.join_params(**params)
|
|
|
|
def construct_trend_data(self, nobs, offset=1):
|
|
if self.trend_order is None:
|
|
trend_data = None
|
|
else:
|
|
trend_data = prepare_trend_data(
|
|
self.trend_poly, int(np.sum(self.trend_poly)), nobs, offset)
|
|
|
|
return trend_data
|
|
|
|
def construct_trend_names(self):
|
|
names = []
|
|
for i in self.trend_terms:
|
|
if i == 0:
|
|
names.append('const')
|
|
elif i == 1:
|
|
names.append('drift')
|
|
else:
|
|
names.append('trend.%d' % i)
|
|
return names
|
|
|
|
def __repr__(self):
|
|
"""Represent SARIMAXSpecification object as a string."""
|
|
components = []
|
|
if self.endog is not None:
|
|
components.append('endog=%s' % self._model.endog_names)
|
|
if self.k_exog_params:
|
|
components.append('exog=%s' % self.exog_names)
|
|
components.append('order=%s' % str(self.order))
|
|
if self.seasonal_periods > 0:
|
|
components.append('seasonal_order=%s' % str(self.seasonal_order))
|
|
if self.enforce_stationarity is not None:
|
|
components.append('enforce_stationarity=%s'
|
|
% self.enforce_stationarity)
|
|
if self.enforce_invertibility is not None:
|
|
components.append('enforce_invertibility=%s'
|
|
% self.enforce_invertibility)
|
|
if self.concentrate_scale is not None:
|
|
components.append('concentrate_scale=%s' % self.concentrate_scale)
|
|
return 'SARIMAXSpecification(%s)' % ', '.join(components)
|