312 lines
11 KiB
Python
312 lines
11 KiB
Python
import numpy as np
|
|
from numpy import (dot, eye, diag_indices, zeros, ones, diag,
|
|
asarray, r_)
|
|
from numpy.linalg import solve
|
|
|
|
|
|
# def denton(indicator, benchmark, freq="aq", **kwarg):
|
|
# """
|
|
# Denton's method to convert low-frequency to high frequency data.
|
|
#
|
|
# Parameters
|
|
# ----------
|
|
# benchmark : array_like
|
|
# The higher frequency benchmark. A 1d or 2d data series in columns.
|
|
# If 2d, then M series are assumed.
|
|
# indicator
|
|
# A low-frequency indicator series. It is assumed that there are no
|
|
# pre-sample indicators. Ie., the first indicators line up with
|
|
# the first benchmark.
|
|
# freq : str {"aq","qm", "other"}
|
|
# "aq" - Benchmarking an annual series to quarterly.
|
|
# "mq" - Benchmarking a quarterly series to monthly.
|
|
# "other" - Custom stride. A kwarg, k, must be supplied.
|
|
# kwargs :
|
|
# k : int
|
|
# The number of high-frequency observations that sum to make an
|
|
# aggregate low-frequency observation. `k` is used with
|
|
# `freq` == "other".
|
|
# Returns
|
|
# -------
|
|
# benchmarked series : ndarray
|
|
#
|
|
# Notes
|
|
# -----
|
|
# Denton's method minimizes the distance given by the penalty function, in
|
|
# a least squares sense, between the unknown benchmarked series and the
|
|
# indicator series subject to the condition that the sum of the benchmarked
|
|
# series is equal to the benchmark.
|
|
#
|
|
#
|
|
# References
|
|
# ----------
|
|
# Bloem, A.M, Dippelsman, R.J. and Maehle, N.O. 2001 Quarterly National
|
|
# Accounts Manual--Concepts, Data Sources, and Compilation. IMF.
|
|
# http://www.imf.org/external/pubs/ft/qna/2000/Textbook/index.htm
|
|
# Denton, F.T. 1971. "Adjustment of monthly or quarterly series to annual
|
|
# totals: an approach based on quadratic minimization." Journal of the
|
|
# American Statistical Association. 99-102.
|
|
#
|
|
# """
|
|
# # check arrays and make 2d
|
|
# indicator = np.asarray(indicator)
|
|
# if indicator.ndim == 1:
|
|
# indicator = indicator[:,None]
|
|
# benchmark = np.asarray(benchmark)
|
|
# if benchmark.ndim == 1:
|
|
# benchmark = benchmark[:,None]
|
|
#
|
|
# # get dimensions
|
|
# N = len(indicator) # total number of high-freq
|
|
# m = len(benchmark) # total number of low-freq
|
|
#
|
|
# # number of low-freq observations for aggregate measure
|
|
# # 4 for annual to quarter and 3 for quarter to monthly
|
|
# if freq == "aq":
|
|
# k = 4
|
|
# elif freq == "qm":
|
|
# k = 3
|
|
# elif freq == "other":
|
|
# k = kwargs.get("k")
|
|
# if not k:
|
|
# raise ValueError("k must be supplied with freq=\"other\"")
|
|
# else:
|
|
# raise ValueError("freq %s not understood" % freq)
|
|
#
|
|
# n = k*m # number of indicator series with a benchmark for back-series
|
|
# # if k*m != n, then we are going to extrapolate q observations
|
|
#
|
|
# B = block_diag(*(np.ones((k,1)),)*m)
|
|
#
|
|
# r = benchmark - B.T.dot(indicator)
|
|
#TODO: take code in the string at the end and implement Denton's original
|
|
# method with a few of the penalty functions.
|
|
|
|
|
|
def dentonm(indicator, benchmark, freq="aq", **kwargs):
|
|
"""
|
|
Modified Denton's method to convert low-frequency to high-frequency data.
|
|
|
|
Uses proportionate first-differences as the penalty function. See notes.
|
|
|
|
Parameters
|
|
----------
|
|
indicator : array_like
|
|
A low-frequency indicator series. It is assumed that there are no
|
|
pre-sample indicators. Ie., the first indicators line up with
|
|
the first benchmark.
|
|
benchmark : array_like
|
|
The higher frequency benchmark. A 1d or 2d data series in columns.
|
|
If 2d, then M series are assumed.
|
|
freq : str {"aq","qm", "other"}
|
|
The frequency to use in the conversion.
|
|
|
|
* "aq" - Benchmarking an annual series to quarterly.
|
|
* "mq" - Benchmarking a quarterly series to monthly.
|
|
* "other" - Custom stride. A kwarg, k, must be supplied.
|
|
**kwargs
|
|
Additional keyword argument. For example:
|
|
|
|
* k, an int, the number of high-frequency observations that sum to make
|
|
an aggregate low-frequency observation. `k` is used with
|
|
`freq` == "other".
|
|
|
|
Returns
|
|
-------
|
|
transformed : ndarray
|
|
The transformed series.
|
|
|
|
Examples
|
|
--------
|
|
>>> indicator = [50,100,150,100] * 5
|
|
>>> benchmark = [500,400,300,400,500]
|
|
>>> benchmarked = dentonm(indicator, benchmark, freq="aq")
|
|
|
|
Notes
|
|
-----
|
|
Denton's method minimizes the distance given by the penalty function, in
|
|
a least squares sense, between the unknown benchmarked series and the
|
|
indicator series subject to the condition that the sum of the benchmarked
|
|
series is equal to the benchmark. The modification allows that the first
|
|
value not be pre-determined as is the case with Denton's original method.
|
|
If the there is no benchmark provided for the last few indicator
|
|
observations, then extrapolation is performed using the last
|
|
benchmark-indicator ratio of the previous period.
|
|
|
|
Minimizes sum((X[t]/I[t] - X[t-1]/I[t-1])**2)
|
|
|
|
s.t.
|
|
|
|
sum(X) = A, for each period. Where X is the benchmarked series, I is
|
|
the indicator, and A is the benchmark.
|
|
|
|
References
|
|
----------
|
|
Bloem, A.M, Dippelsman, R.J. and Maehle, N.O. 2001 Quarterly National
|
|
Accounts Manual--Concepts, Data Sources, and Compilation. IMF.
|
|
http://www.imf.org/external/pubs/ft/qna/2000/Textbook/index.htm
|
|
Cholette, P. 1988. "Benchmarking systems of socio-economic time series."
|
|
Statistics Canada, Time Series Research and Analysis Division,
|
|
Working Paper No TSRA-88-017E.
|
|
Denton, F.T. 1971. "Adjustment of monthly or quarterly series to annual
|
|
totals: an approach based on quadratic minimization." Journal of the
|
|
American Statistical Association. 99-102.
|
|
"""
|
|
# penalty : str
|
|
# Penalty function. Can be "D1", "D2", "D3", "D4", "D5".
|
|
# X is the benchmarked series and I is the indicator.
|
|
# D1 - sum((X[t] - X[t-1]) - (I[t] - I[ti-1])**2)
|
|
# D2 - sum((ln(X[t]/X[t-1]) - ln(I[t]/I[t-1]))**2)
|
|
# D3 - sum((X[t]/X[t-1] / I[t]/I[t-1])**2)
|
|
# D4 - sum((X[t]/I[t] - X[t-1]/I[t-1])**2)
|
|
# D5 - sum((X[t]/I[t] / X[t-1]/I[t-1] - 1)**2)
|
|
#NOTE: only D4 is the only one implemented, see IMF chapter 6.
|
|
|
|
# check arrays and make 2d
|
|
indicator = asarray(indicator)
|
|
if indicator.ndim == 1:
|
|
indicator = indicator[:,None]
|
|
benchmark = asarray(benchmark)
|
|
if benchmark.ndim == 1:
|
|
benchmark = benchmark[:,None]
|
|
|
|
# get dimensions
|
|
N = len(indicator) # total number of high-freq
|
|
m = len(benchmark) # total number of low-freq
|
|
|
|
# number of low-freq observations for aggregate measure
|
|
# 4 for annual to quarter and 3 for quarter to monthly
|
|
if freq == "aq":
|
|
k = 4
|
|
elif freq == "qm":
|
|
k = 3
|
|
elif freq == "other":
|
|
k = kwargs.get("k")
|
|
if not k:
|
|
raise ValueError("k must be supplied with freq=\"other\"")
|
|
else:
|
|
raise ValueError("freq %s not understood" % freq)
|
|
|
|
n = k*m # number of indicator series with a benchmark for back-series
|
|
# if k*m != n, then we are going to extrapolate q observations
|
|
if N > n:
|
|
q = N - n
|
|
else:
|
|
q = 0
|
|
|
|
# make the aggregator matrix
|
|
#B = block_diag(*(ones((k,1)),)*m)
|
|
B = np.kron(np.eye(m), ones((k,1)))
|
|
|
|
# following the IMF paper, we can do
|
|
Zinv = diag(1./indicator.squeeze()[:n])
|
|
# this is D in Denton's notation (not using initial value correction)
|
|
# D = eye(n)
|
|
# make off-diagonal = -1
|
|
# D[((np.diag_indices(n)[0])[:-1]+1,(np.diag_indices(n)[1])[:-1])] = -1
|
|
# account for starting conditions
|
|
# H = D[1:,:]
|
|
# HTH = dot(H.T,H)
|
|
# just make HTH
|
|
HTH = eye(n)
|
|
diag_idx0, diag_idx1 = diag_indices(n)
|
|
HTH[diag_idx0[1:-1], diag_idx1[1:-1]] += 1
|
|
HTH[diag_idx0[:-1]+1, diag_idx1[:-1]] = -1
|
|
HTH[diag_idx0[:-1], diag_idx1[:-1]+1] = -1
|
|
|
|
W = dot(dot(Zinv,HTH),Zinv)
|
|
|
|
# make partitioned matrices
|
|
# TODO: break this out so that we can simplify the linalg?
|
|
I = zeros((n+m, n+m)) # noqa:E741
|
|
I[:n,:n] = W
|
|
I[:n,n:] = B
|
|
I[n:,:n] = B.T
|
|
|
|
A = zeros((m+n,1)) # zero first-order constraints
|
|
A[-m:] = benchmark # adding up constraints
|
|
X = solve(I,A)
|
|
X = X[:-m] # drop the lagrange multipliers
|
|
|
|
# handle extrapolation
|
|
if q > 0:
|
|
# get last Benchmark-Indicator ratio
|
|
bi = X[n-1]/indicator[n-1]
|
|
extrapolated = bi * indicator[n:]
|
|
X = r_[X,extrapolated]
|
|
|
|
return X.squeeze()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
#these will be the tests
|
|
# from IMF paper
|
|
|
|
# quarterly data
|
|
indicator = np.array([98.2, 100.8, 102.2, 100.8, 99.0, 101.6,
|
|
102.7, 101.5, 100.5, 103.0, 103.5, 101.5])
|
|
# two annual observations
|
|
benchmark = np.array([4000.,4161.4])
|
|
x_imf = dentonm(indicator, benchmark, freq="aq")
|
|
|
|
imf_stata = np.array([969.8, 998.4, 1018.3, 1013.4, 1007.2, 1042.9,
|
|
1060.3, 1051.0, 1040.6, 1066.5, 1071.7, 1051.0])
|
|
np.testing.assert_almost_equal(imf_stata, x_imf, 1)
|
|
|
|
# Denton example
|
|
zQ = np.array([50,100,150,100] * 5)
|
|
Y = np.array([500,400,300,400,500])
|
|
x_denton = dentonm(zQ, Y, freq="aq")
|
|
x_stata = np.array([64.334796,127.80616,187.82379,120.03526,56.563894,
|
|
105.97568,147.50144,89.958987,40.547201,74.445963,
|
|
108.34473,76.66211,42.763347,94.14664,153.41596,
|
|
109.67405,58.290761,122.62556,190.41409,128.66959])
|
|
|
|
|
|
"""
|
|
# Examples from the Denton 1971 paper
|
|
k = 4
|
|
m = 5
|
|
n = m*k
|
|
|
|
zQ = [50,100,150,100] * m
|
|
Y = [500,400,300,400,500]
|
|
|
|
A = np.eye(n)
|
|
B = block_diag(*(np.ones((k,1)),)*m)
|
|
|
|
r = Y - B.T.dot(zQ)
|
|
#Ainv = inv(A)
|
|
Ainv = A # shortcut for identity
|
|
C = Ainv.dot(B).dot(inv(B.T.dot(Ainv).dot(B)))
|
|
x = zQ + C.dot(r)
|
|
|
|
# minimize first difference d(x-z)
|
|
R = linalg.tri(n, dtype=float) # R is tril so actually R.T in paper
|
|
Ainv = R.dot(R.T)
|
|
C = Ainv.dot(B).dot(inv(B.T.dot(Ainv).dot(B)))
|
|
x1 = zQ + C.dot(r)
|
|
|
|
# minimize the second difference d**2(x-z)
|
|
Ainv = R.dot(Ainv).dot(R.T)
|
|
C = Ainv.dot(B).dot(inv(B.T.dot(Ainv).dot(B)))
|
|
x12 = zQ + C.dot(r)
|
|
|
|
|
|
# # do it proportionately (x-z)/z
|
|
Z = np.diag(zQ)
|
|
Ainv = np.eye(n)
|
|
C = Z.dot(Ainv).dot(Z).dot(B).dot(inv(B.T.dot(Z).dot(Ainv).dot(Z).dot(B)))
|
|
x11 = zQ + C.dot(r)
|
|
|
|
# do it proportionately with differencing d((x-z)/z)
|
|
Ainv = R.dot(R.T)
|
|
C = Z.dot(Ainv).dot(Z).dot(B).dot(inv(B.T.dot(Z).dot(Ainv).dot(Z).dot(B)))
|
|
x111 = zQ + C.dot(r)
|
|
|
|
x_stata = np.array([64.334796,127.80616,187.82379,120.03526,56.563894,
|
|
105.97568,147.50144,89.958987,40.547201,74.445963,
|
|
108.34473,76.66211,42.763347,94.14664,153.41596,
|
|
109.67405,58.290761,122.62556,190.41409,128.66959])
|
|
"""
|