828 lines
23 KiB
Python
828 lines
23 KiB
Python
|
from __future__ import annotations
|
||
|
|
||
|
from statsmodels.compat.python import lrange
|
||
|
|
||
|
import warnings
|
||
|
|
||
|
import numpy as np
|
||
|
import pandas as pd
|
||
|
from pandas import DataFrame
|
||
|
from pandas.tseries import offsets
|
||
|
from pandas.tseries.frequencies import to_offset
|
||
|
from typing import Literal
|
||
|
|
||
|
from statsmodels.tools.data import _is_recarray, _is_using_pandas
|
||
|
from statsmodels.tools.sm_exceptions import ValueWarning
|
||
|
from statsmodels.tools.typing import NDArray
|
||
|
from statsmodels.tools.validation import (
|
||
|
array_like,
|
||
|
bool_like,
|
||
|
int_like,
|
||
|
string_like,
|
||
|
)
|
||
|
|
||
|
__all__ = [
|
||
|
"lagmat",
|
||
|
"lagmat2ds",
|
||
|
"add_trend",
|
||
|
"duplication_matrix",
|
||
|
"elimination_matrix",
|
||
|
"commutation_matrix",
|
||
|
"vec",
|
||
|
"vech",
|
||
|
"unvec",
|
||
|
"unvech",
|
||
|
"freq_to_period",
|
||
|
]
|
||
|
|
||
|
|
||
|
def add_trend(x, trend="c", prepend=False, has_constant="skip"):
|
||
|
"""
|
||
|
Add a trend and/or constant to an array.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x : array_like
|
||
|
Original array of data.
|
||
|
trend : str {'n', 'c', 't', 'ct', 'ctt'}
|
||
|
The trend to add.
|
||
|
|
||
|
* 'n' add no trend.
|
||
|
* 'c' add constant only.
|
||
|
* 't' add trend only.
|
||
|
* 'ct' add constant and linear trend.
|
||
|
* 'ctt' add constant and linear and quadratic trend.
|
||
|
prepend : bool
|
||
|
If True, prepends the new data to the columns of X.
|
||
|
has_constant : str {'raise', 'add', 'skip'}
|
||
|
Controls what happens when trend is 'c' and a constant column already
|
||
|
exists in x. 'raise' will raise an error. 'add' will add a column of
|
||
|
1s. 'skip' will return the data without change. 'skip' is the default.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
array_like
|
||
|
The original data with the additional trend columns. If x is a
|
||
|
pandas Series or DataFrame, then the trend column names are 'const',
|
||
|
'trend' and 'trend_squared'.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
statsmodels.tools.tools.add_constant
|
||
|
Add a constant column to an array.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
Returns columns as ['ctt','ct','c'] whenever applicable. There is currently
|
||
|
no checking for an existing trend.
|
||
|
"""
|
||
|
prepend = bool_like(prepend, "prepend")
|
||
|
trend = string_like(trend, "trend", options=("n", "c", "t", "ct", "ctt"))
|
||
|
has_constant = string_like(
|
||
|
has_constant, "has_constant", options=("raise", "add", "skip")
|
||
|
)
|
||
|
|
||
|
# TODO: could be generalized for trend of aribitrary order
|
||
|
columns = ["const", "trend", "trend_squared"]
|
||
|
if trend == "n":
|
||
|
return x.copy()
|
||
|
elif trend == "c": # handles structured arrays
|
||
|
columns = columns[:1]
|
||
|
trendorder = 0
|
||
|
elif trend == "ct" or trend == "t":
|
||
|
columns = columns[:2]
|
||
|
if trend == "t":
|
||
|
columns = columns[1:2]
|
||
|
trendorder = 1
|
||
|
elif trend == "ctt":
|
||
|
trendorder = 2
|
||
|
|
||
|
if _is_recarray(x):
|
||
|
from statsmodels.tools.sm_exceptions import recarray_exception
|
||
|
|
||
|
raise NotImplementedError(recarray_exception)
|
||
|
|
||
|
is_pandas = _is_using_pandas(x, None)
|
||
|
if is_pandas:
|
||
|
if isinstance(x, pd.Series):
|
||
|
x = pd.DataFrame(x)
|
||
|
else:
|
||
|
x = x.copy()
|
||
|
else:
|
||
|
x = np.asanyarray(x)
|
||
|
|
||
|
nobs = len(x)
|
||
|
trendarr = np.vander(
|
||
|
np.arange(1, nobs + 1, dtype=np.float64), trendorder + 1
|
||
|
)
|
||
|
# put in order ctt
|
||
|
trendarr = np.fliplr(trendarr)
|
||
|
if trend == "t":
|
||
|
trendarr = trendarr[:, 1]
|
||
|
|
||
|
if "c" in trend:
|
||
|
if is_pandas:
|
||
|
# Mixed type protection
|
||
|
def safe_is_const(s):
|
||
|
try:
|
||
|
return np.ptp(s) == 0.0 and np.any(s != 0.0)
|
||
|
except:
|
||
|
return False
|
||
|
|
||
|
col_const = x.apply(safe_is_const, 0)
|
||
|
else:
|
||
|
ptp0 = np.ptp(np.asanyarray(x), axis=0)
|
||
|
col_is_const = ptp0 == 0
|
||
|
nz_const = col_is_const & (x[0] != 0)
|
||
|
col_const = nz_const
|
||
|
|
||
|
if np.any(col_const):
|
||
|
if has_constant == "raise":
|
||
|
if x.ndim == 1:
|
||
|
base_err = "x is constant."
|
||
|
else:
|
||
|
columns = np.arange(x.shape[1])[col_const]
|
||
|
if isinstance(x, pd.DataFrame):
|
||
|
columns = x.columns
|
||
|
const_cols = ", ".join([str(c) for c in columns])
|
||
|
base_err = (
|
||
|
"x contains one or more constant columns. Column(s) "
|
||
|
f"{const_cols} are constant."
|
||
|
)
|
||
|
msg = f"{base_err} Adding a constant with trend='{trend}' is not allowed."
|
||
|
raise ValueError(msg)
|
||
|
elif has_constant == "skip":
|
||
|
columns = columns[1:]
|
||
|
trendarr = trendarr[:, 1:]
|
||
|
|
||
|
order = 1 if prepend else -1
|
||
|
if is_pandas:
|
||
|
trendarr = pd.DataFrame(trendarr, index=x.index, columns=columns)
|
||
|
x = [trendarr, x]
|
||
|
x = pd.concat(x[::order], axis=1)
|
||
|
else:
|
||
|
x = [trendarr, x]
|
||
|
x = np.column_stack(x[::order])
|
||
|
|
||
|
return x
|
||
|
|
||
|
|
||
|
def add_lag(x, col=None, lags=1, drop=False, insert=True):
|
||
|
"""
|
||
|
Returns an array with lags included given an array.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x : array_like
|
||
|
An array or NumPy ndarray subclass. Can be either a 1d or 2d array with
|
||
|
observations in columns.
|
||
|
col : int or None
|
||
|
`col` can be an int of the zero-based column index. If it's a
|
||
|
1d array `col` can be None.
|
||
|
lags : int
|
||
|
The number of lags desired.
|
||
|
drop : bool
|
||
|
Whether to keep the contemporaneous variable for the data.
|
||
|
insert : bool or int
|
||
|
If True, inserts the lagged values after `col`. If False, appends
|
||
|
the data. If int inserts the lags at int.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
array : ndarray
|
||
|
Array with lags
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
|
||
|
>>> import statsmodels.api as sm
|
||
|
>>> data = sm.datasets.macrodata.load()
|
||
|
>>> data = data.data[['year','quarter','realgdp','cpi']]
|
||
|
>>> data = sm.tsa.add_lag(data, 'realgdp', lags=2)
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
Trims the array both forward and backward, so that the array returned
|
||
|
so that the length of the returned array is len(`X`) - lags. The lags are
|
||
|
returned in increasing order, ie., t-1,t-2,...,t-lags
|
||
|
"""
|
||
|
lags = int_like(lags, "lags")
|
||
|
drop = bool_like(drop, "drop")
|
||
|
x = array_like(x, "x", ndim=2)
|
||
|
if col is None:
|
||
|
col = 0
|
||
|
|
||
|
# handle negative index
|
||
|
if col < 0:
|
||
|
col = x.shape[1] + col
|
||
|
if x.ndim == 1:
|
||
|
x = x[:, None]
|
||
|
contemp = x[:, col]
|
||
|
|
||
|
if insert is True:
|
||
|
ins_idx = col + 1
|
||
|
elif insert is False:
|
||
|
ins_idx = x.shape[1]
|
||
|
else:
|
||
|
if insert < 0: # handle negative index
|
||
|
insert = x.shape[1] + insert + 1
|
||
|
if insert > x.shape[1]:
|
||
|
insert = x.shape[1]
|
||
|
|
||
|
warnings.warn(
|
||
|
"insert > number of variables, inserting at the"
|
||
|
" last position",
|
||
|
ValueWarning,
|
||
|
)
|
||
|
ins_idx = insert
|
||
|
|
||
|
ndlags = lagmat(contemp, lags, trim="Both")
|
||
|
first_cols = lrange(ins_idx)
|
||
|
last_cols = lrange(ins_idx, x.shape[1])
|
||
|
if drop:
|
||
|
if col in first_cols:
|
||
|
first_cols.pop(first_cols.index(col))
|
||
|
else:
|
||
|
last_cols.pop(last_cols.index(col))
|
||
|
return np.column_stack((x[lags:, first_cols], ndlags, x[lags:, last_cols]))
|
||
|
|
||
|
|
||
|
def detrend(x, order=1, axis=0):
|
||
|
"""
|
||
|
Detrend an array with a trend of given order along axis 0 or 1.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x : array_like, 1d or 2d
|
||
|
Data, if 2d, then each row or column is independently detrended with
|
||
|
the same trendorder, but independent trend estimates.
|
||
|
order : int
|
||
|
The polynomial order of the trend, zero is constant, one is
|
||
|
linear trend, two is quadratic trend.
|
||
|
axis : int
|
||
|
Axis can be either 0, observations by rows, or 1, observations by
|
||
|
columns.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
ndarray
|
||
|
The detrended series is the residual of the linear regression of the
|
||
|
data on the trend of given order.
|
||
|
"""
|
||
|
order = int_like(order, "order")
|
||
|
axis = int_like(axis, "axis")
|
||
|
|
||
|
if x.ndim == 2 and int(axis) == 1:
|
||
|
x = x.T
|
||
|
elif x.ndim > 2:
|
||
|
raise NotImplementedError(
|
||
|
"x.ndim > 2 is not implemented until it is needed"
|
||
|
)
|
||
|
|
||
|
nobs = x.shape[0]
|
||
|
if order == 0:
|
||
|
# Special case demean
|
||
|
resid = x - x.mean(axis=0)
|
||
|
else:
|
||
|
trends = np.vander(np.arange(float(nobs)), N=order + 1)
|
||
|
beta = np.linalg.pinv(trends).dot(x)
|
||
|
resid = x - np.dot(trends, beta)
|
||
|
|
||
|
if x.ndim == 2 and int(axis) == 1:
|
||
|
resid = resid.T
|
||
|
|
||
|
return resid
|
||
|
|
||
|
|
||
|
def lagmat(x,
|
||
|
maxlag: int,
|
||
|
trim: Literal["forward", "backward", "both", "none"]='forward',
|
||
|
original: Literal["ex", "sep", "in"]="ex",
|
||
|
use_pandas: bool=False
|
||
|
)-> NDArray | DataFrame | tuple[NDArray, NDArray] | tuple[DataFrame, DataFrame]:
|
||
|
"""
|
||
|
Create 2d array of lags.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x : array_like
|
||
|
Data; if 2d, observation in rows and variables in columns.
|
||
|
maxlag : int
|
||
|
All lags from zero to maxlag are included.
|
||
|
trim : {'forward', 'backward', 'both', 'none', None}
|
||
|
The trimming method to use.
|
||
|
|
||
|
* 'forward' : trim invalid observations in front.
|
||
|
* 'backward' : trim invalid initial observations.
|
||
|
* 'both' : trim invalid observations on both sides.
|
||
|
* 'none', None : no trimming of observations.
|
||
|
original : {'ex','sep','in'}
|
||
|
How the original is treated.
|
||
|
|
||
|
* 'ex' : drops the original array returning only the lagged values.
|
||
|
* 'in' : returns the original array and the lagged values as a single
|
||
|
array.
|
||
|
* 'sep' : returns a tuple (original array, lagged values). The original
|
||
|
array is truncated to have the same number of rows as
|
||
|
the returned lagmat.
|
||
|
use_pandas : bool
|
||
|
If true, returns a DataFrame when the input is a pandas
|
||
|
Series or DataFrame. If false, return numpy ndarrays.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
lagmat : ndarray
|
||
|
The array with lagged observations.
|
||
|
y : ndarray, optional
|
||
|
Only returned if original == 'sep'.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
When using a pandas DataFrame or Series with use_pandas=True, trim can only
|
||
|
be 'forward' or 'both' since it is not possible to consistently extend
|
||
|
index values.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> from statsmodels.tsa.tsatools import lagmat
|
||
|
>>> import numpy as np
|
||
|
>>> X = np.arange(1,7).reshape(-1,2)
|
||
|
>>> lagmat(X, maxlag=2, trim="forward", original='in')
|
||
|
array([[ 1., 2., 0., 0., 0., 0.],
|
||
|
[ 3., 4., 1., 2., 0., 0.],
|
||
|
[ 5., 6., 3., 4., 1., 2.]])
|
||
|
|
||
|
>>> lagmat(X, maxlag=2, trim="backward", original='in')
|
||
|
array([[ 5., 6., 3., 4., 1., 2.],
|
||
|
[ 0., 0., 5., 6., 3., 4.],
|
||
|
[ 0., 0., 0., 0., 5., 6.]])
|
||
|
|
||
|
>>> lagmat(X, maxlag=2, trim="both", original='in')
|
||
|
array([[ 5., 6., 3., 4., 1., 2.]])
|
||
|
|
||
|
>>> lagmat(X, maxlag=2, trim="none", original='in')
|
||
|
array([[ 1., 2., 0., 0., 0., 0.],
|
||
|
[ 3., 4., 1., 2., 0., 0.],
|
||
|
[ 5., 6., 3., 4., 1., 2.],
|
||
|
[ 0., 0., 5., 6., 3., 4.],
|
||
|
[ 0., 0., 0., 0., 5., 6.]])
|
||
|
"""
|
||
|
maxlag = int_like(maxlag, "maxlag")
|
||
|
use_pandas = bool_like(use_pandas, "use_pandas")
|
||
|
trim = string_like(
|
||
|
trim,
|
||
|
"trim",
|
||
|
optional=True,
|
||
|
options=("forward", "backward", "both", "none"),
|
||
|
)
|
||
|
original = string_like(original, "original", options=("ex", "sep", "in"))
|
||
|
|
||
|
# TODO: allow list of lags additional to maxlag
|
||
|
orig = x
|
||
|
x = array_like(x, "x", ndim=2, dtype=None)
|
||
|
is_pandas = _is_using_pandas(orig, None) and use_pandas
|
||
|
trim = "none" if trim is None else trim
|
||
|
trim = trim.lower()
|
||
|
if is_pandas and trim in ("none", "backward"):
|
||
|
raise ValueError(
|
||
|
"trim cannot be 'none' or 'backward' when used on "
|
||
|
"Series or DataFrames"
|
||
|
)
|
||
|
|
||
|
dropidx = 0
|
||
|
nobs, nvar = x.shape
|
||
|
if original in ["ex", "sep"]:
|
||
|
dropidx = nvar
|
||
|
if maxlag >= nobs:
|
||
|
raise ValueError("maxlag should be < nobs")
|
||
|
lm = np.zeros((nobs + maxlag, nvar * (maxlag + 1)))
|
||
|
for k in range(0, int(maxlag + 1)):
|
||
|
lm[
|
||
|
maxlag - k: nobs + maxlag - k,
|
||
|
nvar * (maxlag - k): nvar * (maxlag - k + 1),
|
||
|
] = x
|
||
|
|
||
|
if trim in ("none", "forward"):
|
||
|
startobs = 0
|
||
|
elif trim in ("backward", "both"):
|
||
|
startobs = maxlag
|
||
|
else:
|
||
|
raise ValueError("trim option not valid")
|
||
|
|
||
|
if trim in ("none", "backward"):
|
||
|
stopobs = len(lm)
|
||
|
else:
|
||
|
stopobs = nobs
|
||
|
|
||
|
if is_pandas:
|
||
|
x = orig
|
||
|
if isinstance(x, DataFrame):
|
||
|
x_columns = [str(c) for c in x.columns]
|
||
|
if len(set(x_columns)) != x.shape[1]:
|
||
|
raise ValueError(
|
||
|
"Columns names must be distinct after conversion to string "
|
||
|
"(if not already strings)."
|
||
|
)
|
||
|
else:
|
||
|
x_columns = [str(x.name)]
|
||
|
columns = [str(col) for col in x_columns]
|
||
|
for lag in range(maxlag):
|
||
|
lag_str = str(lag + 1)
|
||
|
columns.extend([str(col) + ".L." + lag_str for col in x_columns])
|
||
|
lm = DataFrame(lm[:stopobs], index=x.index, columns=columns)
|
||
|
lags = lm.iloc[startobs:]
|
||
|
if original in ("sep", "ex"):
|
||
|
leads = lags[x_columns]
|
||
|
lags = lags.drop(x_columns, axis=1)
|
||
|
else:
|
||
|
lags = lm[startobs:stopobs, dropidx:]
|
||
|
if original == "sep":
|
||
|
leads = lm[startobs:stopobs, :dropidx]
|
||
|
|
||
|
if original == "sep":
|
||
|
return lags, leads
|
||
|
else:
|
||
|
return lags
|
||
|
|
||
|
|
||
|
def lagmat2ds(
|
||
|
x, maxlag0, maxlagex=None, dropex=0, trim="forward", use_pandas=False
|
||
|
):
|
||
|
"""
|
||
|
Generate lagmatrix for 2d array, columns arranged by variables.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x : array_like
|
||
|
Data, 2d. Observations in rows and variables in columns.
|
||
|
maxlag0 : int
|
||
|
The first variable all lags from zero to maxlag are included.
|
||
|
maxlagex : {None, int}
|
||
|
The max lag for all other variables all lags from zero to maxlag are
|
||
|
included.
|
||
|
dropex : int
|
||
|
Exclude first dropex lags from other variables. For all variables,
|
||
|
except the first, lags from dropex to maxlagex are included.
|
||
|
trim : str
|
||
|
The trimming method to use.
|
||
|
|
||
|
* 'forward' : trim invalid observations in front.
|
||
|
* 'backward' : trim invalid initial observations.
|
||
|
* 'both' : trim invalid observations on both sides.
|
||
|
* 'none' : no trimming of observations.
|
||
|
use_pandas : bool
|
||
|
If true, returns a DataFrame when the input is a pandas
|
||
|
Series or DataFrame. If false, return numpy ndarrays.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
ndarray
|
||
|
The array with lagged observations, columns ordered by variable.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
Inefficient implementation for unequal lags, implemented for convenience.
|
||
|
"""
|
||
|
maxlag0 = int_like(maxlag0, "maxlag0")
|
||
|
maxlagex = int_like(maxlagex, "maxlagex", optional=True)
|
||
|
trim = string_like(
|
||
|
trim,
|
||
|
"trim",
|
||
|
optional=True,
|
||
|
options=("forward", "backward", "both", "none"),
|
||
|
)
|
||
|
if maxlagex is None:
|
||
|
maxlagex = maxlag0
|
||
|
maxlag = max(maxlag0, maxlagex)
|
||
|
is_pandas = _is_using_pandas(x, None)
|
||
|
|
||
|
if x.ndim == 1:
|
||
|
if is_pandas:
|
||
|
x = pd.DataFrame(x)
|
||
|
else:
|
||
|
x = x[:, None]
|
||
|
elif x.ndim == 0 or x.ndim > 2:
|
||
|
raise ValueError("Only supports 1 and 2-dimensional data.")
|
||
|
|
||
|
nobs, nvar = x.shape
|
||
|
|
||
|
if is_pandas and use_pandas:
|
||
|
lags = lagmat(
|
||
|
x.iloc[:, 0], maxlag, trim=trim, original="in", use_pandas=True
|
||
|
)
|
||
|
lagsli = [lags.iloc[:, : maxlag0 + 1]]
|
||
|
for k in range(1, nvar):
|
||
|
lags = lagmat(
|
||
|
x.iloc[:, k], maxlag, trim=trim, original="in", use_pandas=True
|
||
|
)
|
||
|
lagsli.append(lags.iloc[:, dropex : maxlagex + 1])
|
||
|
return pd.concat(lagsli, axis=1)
|
||
|
elif is_pandas:
|
||
|
x = np.asanyarray(x)
|
||
|
|
||
|
lagsli = [
|
||
|
lagmat(x[:, 0], maxlag, trim=trim, original="in")[:, : maxlag0 + 1]
|
||
|
]
|
||
|
for k in range(1, nvar):
|
||
|
lagsli.append(
|
||
|
lagmat(x[:, k], maxlag, trim=trim, original="in")[
|
||
|
:, dropex : maxlagex + 1
|
||
|
]
|
||
|
)
|
||
|
return np.column_stack(lagsli)
|
||
|
|
||
|
|
||
|
def vec(mat):
|
||
|
return mat.ravel("F")
|
||
|
|
||
|
|
||
|
def vech(mat):
|
||
|
# Gets Fortran-order
|
||
|
return mat.T.take(_triu_indices(len(mat)))
|
||
|
|
||
|
|
||
|
# tril/triu/diag, suitable for ndarray.take
|
||
|
|
||
|
|
||
|
def _tril_indices(n):
|
||
|
rows, cols = np.tril_indices(n)
|
||
|
return rows * n + cols
|
||
|
|
||
|
|
||
|
def _triu_indices(n):
|
||
|
rows, cols = np.triu_indices(n)
|
||
|
return rows * n + cols
|
||
|
|
||
|
|
||
|
def _diag_indices(n):
|
||
|
rows, cols = np.diag_indices(n)
|
||
|
return rows * n + cols
|
||
|
|
||
|
|
||
|
def unvec(v):
|
||
|
k = int(np.sqrt(len(v)))
|
||
|
assert k * k == len(v)
|
||
|
return v.reshape((k, k), order="F")
|
||
|
|
||
|
|
||
|
def unvech(v):
|
||
|
# quadratic formula, correct fp error
|
||
|
rows = 0.5 * (-1 + np.sqrt(1 + 8 * len(v)))
|
||
|
rows = int(np.round(rows))
|
||
|
|
||
|
result = np.zeros((rows, rows))
|
||
|
result[np.triu_indices(rows)] = v
|
||
|
result = result + result.T
|
||
|
|
||
|
# divide diagonal elements by 2
|
||
|
result[np.diag_indices(rows)] /= 2
|
||
|
|
||
|
return result
|
||
|
|
||
|
|
||
|
def duplication_matrix(n):
|
||
|
"""
|
||
|
Create duplication matrix D_n which satisfies vec(S) = D_n vech(S) for
|
||
|
symmetric matrix S
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
D_n : ndarray
|
||
|
"""
|
||
|
n = int_like(n, "n")
|
||
|
tmp = np.eye(n * (n + 1) // 2)
|
||
|
return np.array([unvech(x).ravel() for x in tmp]).T
|
||
|
|
||
|
|
||
|
def elimination_matrix(n):
|
||
|
"""
|
||
|
Create the elimination matrix L_n which satisfies vech(M) = L_n vec(M) for
|
||
|
any matrix M
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
"""
|
||
|
n = int_like(n, "n")
|
||
|
vech_indices = vec(np.tril(np.ones((n, n))))
|
||
|
return np.eye(n * n)[vech_indices != 0]
|
||
|
|
||
|
|
||
|
def commutation_matrix(p, q):
|
||
|
"""
|
||
|
Create the commutation matrix K_{p,q} satisfying vec(A') = K_{p,q} vec(A)
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
p : int
|
||
|
q : int
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
K : ndarray (pq x pq)
|
||
|
"""
|
||
|
p = int_like(p, "p")
|
||
|
q = int_like(q, "q")
|
||
|
|
||
|
K = np.eye(p * q)
|
||
|
indices = np.arange(p * q).reshape((p, q), order="F")
|
||
|
return K.take(indices.ravel(), axis=0)
|
||
|
|
||
|
|
||
|
def _ar_transparams(params):
|
||
|
"""
|
||
|
Transforms params to induce stationarity/invertability.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
params : array_like
|
||
|
The AR coefficients
|
||
|
|
||
|
Reference
|
||
|
---------
|
||
|
Jones(1980)
|
||
|
"""
|
||
|
newparams = np.tanh(params / 2)
|
||
|
tmp = np.tanh(params / 2)
|
||
|
for j in range(1, len(params)):
|
||
|
a = newparams[j]
|
||
|
for kiter in range(j):
|
||
|
tmp[kiter] -= a * newparams[j - kiter - 1]
|
||
|
newparams[:j] = tmp[:j]
|
||
|
return newparams
|
||
|
|
||
|
|
||
|
def _ar_invtransparams(params):
|
||
|
"""
|
||
|
Inverse of the Jones reparameterization
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
params : array_like
|
||
|
The transformed AR coefficients
|
||
|
"""
|
||
|
params = params.copy()
|
||
|
tmp = params.copy()
|
||
|
for j in range(len(params) - 1, 0, -1):
|
||
|
a = params[j]
|
||
|
for kiter in range(j):
|
||
|
tmp[kiter] = (params[kiter] + a * params[j - kiter - 1]) / (
|
||
|
1 - a ** 2
|
||
|
)
|
||
|
params[:j] = tmp[:j]
|
||
|
invarcoefs = 2 * np.arctanh(params)
|
||
|
return invarcoefs
|
||
|
|
||
|
|
||
|
def _ma_transparams(params):
|
||
|
"""
|
||
|
Transforms params to induce stationarity/invertability.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
params : ndarray
|
||
|
The ma coeffecients of an (AR)MA model.
|
||
|
|
||
|
Reference
|
||
|
---------
|
||
|
Jones(1980)
|
||
|
"""
|
||
|
newparams = ((1 - np.exp(-params)) / (1 + np.exp(-params))).copy()
|
||
|
tmp = ((1 - np.exp(-params)) / (1 + np.exp(-params))).copy()
|
||
|
|
||
|
# levinson-durbin to get macf
|
||
|
for j in range(1, len(params)):
|
||
|
b = newparams[j]
|
||
|
for kiter in range(j):
|
||
|
tmp[kiter] += b * newparams[j - kiter - 1]
|
||
|
newparams[:j] = tmp[:j]
|
||
|
return newparams
|
||
|
|
||
|
|
||
|
def _ma_invtransparams(macoefs):
|
||
|
"""
|
||
|
Inverse of the Jones reparameterization
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
params : ndarray
|
||
|
The transformed MA coefficients
|
||
|
"""
|
||
|
tmp = macoefs.copy()
|
||
|
for j in range(len(macoefs) - 1, 0, -1):
|
||
|
b = macoefs[j]
|
||
|
for kiter in range(j):
|
||
|
tmp[kiter] = (macoefs[kiter] - b * macoefs[j - kiter - 1]) / (
|
||
|
1 - b ** 2
|
||
|
)
|
||
|
macoefs[:j] = tmp[:j]
|
||
|
invmacoefs = -np.log((1 - macoefs) / (1 + macoefs))
|
||
|
return invmacoefs
|
||
|
|
||
|
|
||
|
def unintegrate_levels(x, d):
|
||
|
"""
|
||
|
Returns the successive differences needed to unintegrate the series.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x : array_like
|
||
|
The original series
|
||
|
d : int
|
||
|
The number of differences of the differenced series.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
y : array_like
|
||
|
The increasing differences from 0 to d-1 of the first d elements
|
||
|
of x.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
unintegrate
|
||
|
"""
|
||
|
d = int_like(d, "d")
|
||
|
x = x[:d]
|
||
|
return np.asarray([np.diff(x, d - i)[0] for i in range(d, 0, -1)])
|
||
|
|
||
|
|
||
|
def unintegrate(x, levels):
|
||
|
"""
|
||
|
After taking n-differences of a series, return the original series
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x : array_like
|
||
|
The n-th differenced series
|
||
|
levels : list
|
||
|
A list of the first-value in each differenced series, for
|
||
|
[first-difference, second-difference, ..., n-th difference]
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
y : array_like
|
||
|
The original series de-differenced
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> x = np.array([1, 3, 9., 19, 8.])
|
||
|
>>> levels = unintegrate_levels(x, 2)
|
||
|
>>> levels
|
||
|
array([ 1., 2.])
|
||
|
>>> unintegrate(np.diff(x, 2), levels)
|
||
|
array([ 1., 3., 9., 19., 8.])
|
||
|
"""
|
||
|
levels = list(levels)[:] # copy
|
||
|
if len(levels) > 1:
|
||
|
x0 = levels.pop(-1)
|
||
|
return unintegrate(np.cumsum(np.r_[x0, x]), levels)
|
||
|
x0 = levels[0]
|
||
|
return np.cumsum(np.r_[x0, x])
|
||
|
|
||
|
|
||
|
def freq_to_period(freq: str | offsets.DateOffset) -> int:
|
||
|
"""
|
||
|
Convert a pandas frequency to a periodicity
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
freq : str or offset
|
||
|
Frequency to convert
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
int
|
||
|
Periodicity of freq
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
Annual maps to 1, quarterly maps to 4, monthly to 12, weekly to 52.
|
||
|
"""
|
||
|
if not isinstance(freq, offsets.DateOffset):
|
||
|
freq = to_offset(freq) # go ahead and standardize
|
||
|
assert isinstance(freq, offsets.DateOffset)
|
||
|
freq = freq.rule_code.upper()
|
||
|
|
||
|
yearly_freqs = ("A-", "AS-", "Y-", "YS-", "YE-")
|
||
|
if freq in ("A", "Y") or freq.startswith(yearly_freqs):
|
||
|
return 1
|
||
|
elif freq == "Q" or freq.startswith(("Q-", "QS", "QE")):
|
||
|
return 4
|
||
|
elif freq == "M" or freq.startswith(("M-", "MS", "ME")):
|
||
|
return 12
|
||
|
elif freq == "W" or freq.startswith("W-"):
|
||
|
return 52
|
||
|
elif freq == "D":
|
||
|
return 7
|
||
|
elif freq == "B":
|
||
|
return 5
|
||
|
elif freq == "H":
|
||
|
return 24
|
||
|
else: # pragma : no cover
|
||
|
raise ValueError(
|
||
|
"freq {} not understood. Please report if you "
|
||
|
"think this is in error.".format(freq)
|
||
|
)
|