408 lines
11 KiB
Python
408 lines
11 KiB
Python
'''trying to verify theoretical acf of arma
|
|
|
|
explicit functions for autocovariance functions of ARIMA(1,1), MA(1), MA(2)
|
|
plus 3 functions from nitime.utils
|
|
|
|
'''
|
|
import numpy as np
|
|
from numpy.testing import assert_array_almost_equal
|
|
import matplotlib.pyplot as plt
|
|
|
|
from statsmodels import regression
|
|
from statsmodels.tsa.arima_process import arma_generate_sample, arma_impulse_response
|
|
from statsmodels.tsa.arima_process import arma_acovf, arma_acf
|
|
from statsmodels.tsa.arima.model import ARIMA
|
|
from statsmodels.tsa.stattools import acf, acovf
|
|
from statsmodels.graphics.tsaplots import plot_acf
|
|
|
|
ar = [1., -0.6]
|
|
#ar = [1., 0.]
|
|
ma = [1., 0.4]
|
|
#ma = [1., 0.4, 0.6]
|
|
#ma = [1., 0.]
|
|
mod = ''#'ma2'
|
|
x = arma_generate_sample(ar, ma, 5000)
|
|
x_acf = acf(x)[:10]
|
|
x_ir = arma_impulse_response(ar, ma)
|
|
|
|
#print x_acf[:10]
|
|
#print x_ir[:10]
|
|
#irc2 = np.correlate(x_ir,x_ir,'full')[len(x_ir)-1:]
|
|
#print irc2[:10]
|
|
#print irc2[:10]/irc2[0]
|
|
#print irc2[:10-1] / irc2[1:10]
|
|
#print x_acf[:10-1] / x_acf[1:10]
|
|
|
|
# detrend helper from matplotlib.mlab
|
|
def detrend(x, key=None):
|
|
if key is None or key=='constant':
|
|
return detrend_mean(x)
|
|
elif key=='linear':
|
|
return detrend_linear(x)
|
|
|
|
def demean(x, axis=0):
|
|
"Return x minus its mean along the specified axis"
|
|
x = np.asarray(x)
|
|
if axis:
|
|
ind = [slice(None)] * axis
|
|
ind.append(np.newaxis)
|
|
return x - x.mean(axis)[ind]
|
|
return x - x.mean(axis)
|
|
|
|
def detrend_mean(x):
|
|
"Return x minus the mean(x)"
|
|
return x - x.mean()
|
|
|
|
def detrend_none(x):
|
|
"Return x: no detrending"
|
|
return x
|
|
|
|
def detrend_linear(y):
|
|
"Return y minus best fit line; 'linear' detrending "
|
|
# This is faster than an algorithm based on linalg.lstsq.
|
|
x = np.arange(len(y), dtype=np.float_)
|
|
C = np.cov(x, y, bias=1)
|
|
b = C[0,1]/C[0,0]
|
|
a = y.mean() - b*x.mean()
|
|
return y - (b*x + a)
|
|
|
|
def acovf_explicit(ar, ma, nobs):
|
|
'''add correlation of MA representation explicitely
|
|
|
|
'''
|
|
ir = arma_impulse_response(ar, ma)
|
|
acovfexpl = [np.dot(ir[:nobs-t], ir[t:nobs]) for t in range(10)]
|
|
return acovfexpl
|
|
|
|
def acovf_arma11(ar, ma):
|
|
# ARMA(1,1)
|
|
# Florens et al page 278
|
|
# wrong result ?
|
|
# new calculation bigJudge p 311, now the same
|
|
a = -ar[1]
|
|
b = ma[1]
|
|
#rho = [1.]
|
|
#rho.append((1-a*b)*(a-b)/(1.+a**2-2*a*b))
|
|
rho = [(1.+b**2+2*a*b)/(1.-a**2)]
|
|
rho.append((1+a*b)*(a+b)/(1.-a**2))
|
|
for _ in range(8):
|
|
last = rho[-1]
|
|
rho.append(a*last)
|
|
return np.array(rho)
|
|
|
|
# print acf11[:10]
|
|
# print acf11[:10] /acf11[0]
|
|
|
|
def acovf_ma2(ma):
|
|
# MA(2)
|
|
# from Greene p616 (with typo), Florens p280
|
|
b1 = -ma[1]
|
|
b2 = -ma[2]
|
|
rho = np.zeros(10)
|
|
rho[0] = (1 + b1**2 + b2**2)
|
|
rho[1] = (-b1 + b1*b2)
|
|
rho[2] = -b2
|
|
return rho
|
|
|
|
# rho2 = rho/rho[0]
|
|
# print rho2
|
|
# print irc2[:10]/irc2[0]
|
|
|
|
def acovf_ma1(ma):
|
|
# MA(1)
|
|
# from Greene p616 (with typo), Florens p280
|
|
b = -ma[1]
|
|
rho = np.zeros(10)
|
|
rho[0] = (1 + b**2)
|
|
rho[1] = -b
|
|
return rho
|
|
|
|
# rho2 = rho/rho[0]
|
|
# print rho2
|
|
# print irc2[:10]/irc2[0]
|
|
|
|
|
|
ar1 = [1., -0.8]
|
|
ar0 = [1., 0.]
|
|
ma1 = [1., 0.4]
|
|
ma2 = [1., 0.4, 0.6]
|
|
ma0 = [1., 0.]
|
|
|
|
comparefn = dict(
|
|
[('ma1', acovf_ma1),
|
|
('ma2', acovf_ma2),
|
|
('arma11', acovf_arma11),
|
|
('ar1', acovf_arma11)])
|
|
|
|
cases = [('ma1', (ar0, ma1)),
|
|
('ma2', (ar0, ma2)),
|
|
('arma11', (ar1, ma1)),
|
|
('ar1', (ar1, ma0))]
|
|
|
|
for c, args in cases:
|
|
|
|
ar, ma = args
|
|
print('')
|
|
print(c, ar, ma)
|
|
myacovf = arma_acovf(ar, ma, nobs=10)
|
|
myacf = arma_acf(ar, ma, lags=10)
|
|
if c[:2]=='ma':
|
|
othacovf = comparefn[c](ma)
|
|
else:
|
|
othacovf = comparefn[c](ar, ma)
|
|
print(myacovf[:5])
|
|
print(othacovf[:5])
|
|
#something broke again,
|
|
#for high persistence case eg ar=0.99, nobs of IR has to be large
|
|
#made changes to arma_acovf
|
|
assert_array_almost_equal(myacovf, othacovf,10)
|
|
assert_array_almost_equal(myacf, othacovf/othacovf[0],10)
|
|
|
|
|
|
#from nitime.utils
|
|
def ar_generator(N=512, sigma=1.):
|
|
# this generates a signal u(n) = a1*u(n-1) + a2*u(n-2) + ... + v(n)
|
|
# where v(n) is a stationary stochastic process with zero mean
|
|
# and variance = sigma
|
|
# this sequence is shown to be estimated well by an order 8 AR system
|
|
taps = np.array([2.7607, -3.8106, 2.6535, -0.9238])
|
|
v = np.random.normal(size=N, scale=sigma**0.5)
|
|
u = np.zeros(N)
|
|
P = len(taps)
|
|
for l in range(P):
|
|
u[l] = v[l] + np.dot(u[:l][::-1], taps[:l])
|
|
for l in range(P,N):
|
|
u[l] = v[l] + np.dot(u[l-P:l][::-1], taps)
|
|
return u, v, taps
|
|
|
|
#JP: small differences to using np.correlate, because assumes mean(s)=0
|
|
# denominator is N, not N-k, biased estimator
|
|
# misnomer: (biased) autocovariance not autocorrelation
|
|
#from nitime.utils
|
|
def autocorr(s, axis=-1):
|
|
"""Returns the autocorrelation of signal s at all lags. Adheres to the
|
|
definition r(k) = E{s(n)s*(n-k)} where E{} is the expectation operator.
|
|
"""
|
|
N = s.shape[axis]
|
|
S = np.fft.fft(s, n=2*N-1, axis=axis)
|
|
sxx = np.fft.ifft(S*S.conjugate(), axis=axis).real[:N]
|
|
return sxx/N
|
|
|
|
#JP: with valid this returns a single value, if x and y have same length
|
|
# e.g. norm_corr(x, x)
|
|
# using std subtracts mean, but correlate does not, requires means are exactly 0
|
|
# biased, no n-k correction for laglength
|
|
#from nitime.utils
|
|
def norm_corr(x,y,mode = 'valid'):
|
|
"""Returns the correlation between two ndarrays, by calling np.correlate in
|
|
'same' mode and normalizing the result by the std of the arrays and by
|
|
their lengths. This results in a correlation = 1 for an auto-correlation"""
|
|
|
|
return ( np.correlate(x,y,mode) /
|
|
(np.std(x)*np.std(y)*(x.shape[-1])) )
|
|
|
|
|
|
|
|
# from matplotlib axes.py
|
|
# note: self is axis
|
|
def pltacorr(self, x, **kwargs):
|
|
r"""
|
|
call signature::
|
|
|
|
acorr(x, normed=True, detrend=detrend_none, usevlines=True,
|
|
maxlags=10, **kwargs)
|
|
|
|
Plot the autocorrelation of *x*. If *normed* = *True*,
|
|
normalize the data by the autocorrelation at 0-th lag. *x* is
|
|
detrended by the *detrend* callable (default no normalization).
|
|
|
|
Data are plotted as ``plot(lags, c, **kwargs)``
|
|
|
|
Return value is a tuple (*lags*, *c*, *line*) where:
|
|
|
|
- *lags* are a length 2*maxlags+1 lag vector
|
|
|
|
- *c* is the 2*maxlags+1 auto correlation vector
|
|
|
|
- *line* is a :class:`~matplotlib.lines.Line2D` instance
|
|
returned by :meth:`plot`
|
|
|
|
The default *linestyle* is None and the default *marker* is
|
|
``'o'``, though these can be overridden with keyword args.
|
|
The cross correlation is performed with
|
|
:func:`numpy.correlate` with *mode* = 2.
|
|
|
|
If *usevlines* is *True*, :meth:`~matplotlib.axes.Axes.vlines`
|
|
rather than :meth:`~matplotlib.axes.Axes.plot` is used to draw
|
|
vertical lines from the origin to the acorr. Otherwise, the
|
|
plot style is determined by the kwargs, which are
|
|
:class:`~matplotlib.lines.Line2D` properties.
|
|
|
|
*maxlags* is a positive integer detailing the number of lags
|
|
to show. The default value of *None* will return all
|
|
:math:`2 \mathrm{len}(x) - 1` lags.
|
|
|
|
The return value is a tuple (*lags*, *c*, *linecol*, *b*)
|
|
where
|
|
|
|
- *linecol* is the
|
|
:class:`~matplotlib.collections.LineCollection`
|
|
|
|
- *b* is the *x*-axis.
|
|
|
|
.. seealso::
|
|
|
|
:meth:`~matplotlib.axes.Axes.plot` or
|
|
:meth:`~matplotlib.axes.Axes.vlines`
|
|
For documentation on valid kwargs.
|
|
|
|
**Example:**
|
|
|
|
:func:`~matplotlib.pyplot.xcorr` above, and
|
|
:func:`~matplotlib.pyplot.acorr` below.
|
|
|
|
**Example:**
|
|
|
|
.. plot:: mpl_examples/pylab_examples/xcorr_demo.py
|
|
"""
|
|
return self.xcorr(x, x, **kwargs)
|
|
|
|
def pltxcorr(self, x, y, normed=True, detrend=detrend_none,
|
|
usevlines=True, maxlags=10, **kwargs):
|
|
"""
|
|
call signature::
|
|
|
|
def xcorr(self, x, y, normed=True, detrend=detrend_none,
|
|
usevlines=True, maxlags=10, **kwargs):
|
|
|
|
Plot the cross correlation between *x* and *y*. If *normed* =
|
|
*True*, normalize the data by the cross correlation at 0-th
|
|
lag. *x* and y are detrended by the *detrend* callable
|
|
(default no normalization). *x* and *y* must be equal length.
|
|
|
|
Data are plotted as ``plot(lags, c, **kwargs)``
|
|
|
|
Return value is a tuple (*lags*, *c*, *line*) where:
|
|
|
|
- *lags* are a length ``2*maxlags+1`` lag vector
|
|
|
|
- *c* is the ``2*maxlags+1`` auto correlation vector
|
|
|
|
- *line* is a :class:`~matplotlib.lines.Line2D` instance
|
|
returned by :func:`~matplotlib.pyplot.plot`.
|
|
|
|
The default *linestyle* is *None* and the default *marker* is
|
|
'o', though these can be overridden with keyword args. The
|
|
cross correlation is performed with :func:`numpy.correlate`
|
|
with *mode* = 2.
|
|
|
|
If *usevlines* is *True*:
|
|
|
|
:func:`~matplotlib.pyplot.vlines`
|
|
rather than :func:`~matplotlib.pyplot.plot` is used to draw
|
|
vertical lines from the origin to the xcorr. Otherwise the
|
|
plotstyle is determined by the kwargs, which are
|
|
:class:`~matplotlib.lines.Line2D` properties.
|
|
|
|
The return value is a tuple (*lags*, *c*, *linecol*, *b*)
|
|
where *linecol* is the
|
|
:class:`matplotlib.collections.LineCollection` instance and
|
|
*b* is the *x*-axis.
|
|
|
|
*maxlags* is a positive integer detailing the number of lags to show.
|
|
The default value of *None* will return all ``(2*len(x)-1)`` lags.
|
|
|
|
**Example:**
|
|
|
|
:func:`~matplotlib.pyplot.xcorr` above, and
|
|
:func:`~matplotlib.pyplot.acorr` below.
|
|
|
|
**Example:**
|
|
|
|
.. plot:: mpl_examples/pylab_examples/xcorr_demo.py
|
|
"""
|
|
|
|
|
|
Nx = len(x)
|
|
if Nx!=len(y):
|
|
raise ValueError('x and y must be equal length')
|
|
|
|
x = detrend(np.asarray(x))
|
|
y = detrend(np.asarray(y))
|
|
|
|
c = np.correlate(x, y, mode=2)
|
|
|
|
if normed:
|
|
c /= np.sqrt(np.dot(x, x) * np.dot(y, y))
|
|
|
|
if maxlags is None:
|
|
maxlags = Nx - 1
|
|
|
|
if maxlags >= Nx or maxlags < 1:
|
|
raise ValueError('maxlags must be None or strictly '
|
|
'positive < %d' % Nx)
|
|
|
|
lags = np.arange(-maxlags,maxlags+1)
|
|
c = c[Nx-1-maxlags:Nx+maxlags]
|
|
|
|
|
|
if usevlines:
|
|
a = self.vlines(lags, [0], c, **kwargs)
|
|
b = self.axhline(**kwargs)
|
|
kwargs.setdefault('marker', 'o')
|
|
kwargs.setdefault('linestyle', 'None')
|
|
d = self.plot(lags, c, **kwargs)
|
|
else:
|
|
|
|
kwargs.setdefault('marker', 'o')
|
|
kwargs.setdefault('linestyle', 'None')
|
|
a, = self.plot(lags, c, **kwargs)
|
|
b = None
|
|
return lags, c, a, b
|
|
|
|
|
|
|
|
|
|
|
|
|
|
arrvs = ar_generator()
|
|
##arma = ARIMA()
|
|
##res = arma.fit(arrvs[0], 4, 0)
|
|
arma = ARIMA(arrvs[0])
|
|
res = arma.fit((4,0, 0))
|
|
|
|
print(res[0])
|
|
|
|
acf1 = acf(arrvs[0])
|
|
acovf1b = acovf(arrvs[0], unbiased=False)
|
|
acf2 = autocorr(arrvs[0])
|
|
acf2m = autocorr(arrvs[0]-arrvs[0].mean())
|
|
print(acf1[:10])
|
|
print(acovf1b[:10])
|
|
print(acf2[:10])
|
|
print(acf2m[:10])
|
|
|
|
|
|
x = arma_generate_sample([1.0, -0.8], [1.0], 500)
|
|
print(acf(x)[:20])
|
|
print(regression.yule_walker(x, 10))
|
|
|
|
#ax = plt.axes()
|
|
plt.plot(x)
|
|
#plt.show()
|
|
|
|
plt.figure()
|
|
pltxcorr(plt,x,x)
|
|
plt.figure()
|
|
pltxcorr(plt,x,x, usevlines=False)
|
|
plt.figure()
|
|
#FIXME: plotacf was moved to graphics/tsaplots.py, and interface changed
|
|
plot_acf(plt, acf1[:20], np.arange(len(acf1[:20])), usevlines=True)
|
|
plt.figure()
|
|
ax = plt.subplot(211)
|
|
plot_acf(ax, acf1[:20], usevlines=True)
|
|
ax = plt.subplot(212)
|
|
plot_acf(ax, acf1[:20], np.arange(len(acf1[:20])), usevlines=False)
|
|
|
|
#plt.show()
|