825 lines
26 KiB
Python
825 lines
26 KiB
Python
|
"""Asymmetric kernels for R+ and unit interval
|
|||
|
|
|||
|
References
|
|||
|
----------
|
|||
|
|
|||
|
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
|||
|
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
|||
|
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
|||
|
|
|||
|
.. [2] Chen, Song Xi. 1999. “Beta Kernel Estimators for Density Functions.”
|
|||
|
Computational Statistics & Data Analysis 31 (2): 131–45.
|
|||
|
https://doi.org/10.1016/S0167-9473(99)00010-9.
|
|||
|
|
|||
|
.. [3] Chen, Song Xi. 2000. “Probability Density Function Estimation Using
|
|||
|
Gamma Kernels.”
|
|||
|
Annals of the Institute of Statistical Mathematics 52 (3): 471–80.
|
|||
|
https://doi.org/10.1023/A:1004165218295.
|
|||
|
|
|||
|
.. [4] Jin, Xiaodong, and Janusz Kawczak. 2003. “Birnbaum-Saunders and
|
|||
|
Lognormal Kernel Estimators for Modelling Durations in High Frequency
|
|||
|
Financial Data.” Annals of Economics and Finance 4: 103–24.
|
|||
|
|
|||
|
.. [5] Micheaux, Pierre Lafaye de, and Frédéric Ouimet. 2020. “A Study of Seven
|
|||
|
Asymmetric Kernels for the Estimation of Cumulative Distribution Functions,”
|
|||
|
November. https://arxiv.org/abs/2011.14893v1.
|
|||
|
|
|||
|
.. [6] Mombeni, Habib Allah, B Masouri, and Mohammad Reza Akhoond. 2019.
|
|||
|
“Asymmetric Kernels for Boundary Modification in Distribution Function
|
|||
|
Estimation.” REVSTAT, 1–27.
|
|||
|
|
|||
|
.. [7] Scaillet, O. 2004. “Density Estimation Using Inverse and Reciprocal
|
|||
|
Inverse Gaussian Kernels.”
|
|||
|
Journal of Nonparametric Statistics 16 (1–2): 217–26.
|
|||
|
https://doi.org/10.1080/10485250310001624819.
|
|||
|
|
|||
|
|
|||
|
Created on Mon Mar 8 11:12:24 2021
|
|||
|
|
|||
|
Author: Josef Perktold
|
|||
|
License: BSD-3
|
|||
|
|
|||
|
"""
|
|||
|
|
|||
|
import numpy as np
|
|||
|
from scipy import special, stats
|
|||
|
|
|||
|
doc_params = """\
|
|||
|
Parameters
|
|||
|
----------
|
|||
|
x : array_like, float
|
|||
|
Points for which density is evaluated. ``x`` can be scalar or 1-dim.
|
|||
|
sample : ndarray, 1-d
|
|||
|
Sample from which kde is computed.
|
|||
|
bw : float
|
|||
|
Bandwidth parameter, there is currently no default value for it.
|
|||
|
|
|||
|
Returns
|
|||
|
-------
|
|||
|
Components for kernel estimation"""
|
|||
|
|
|||
|
|
|||
|
def pdf_kernel_asym(x, sample, bw, kernel_type, weights=None, batch_size=10):
|
|||
|
"""Density estimate based on asymmetric kernel.
|
|||
|
|
|||
|
Parameters
|
|||
|
----------
|
|||
|
x : array_like, float
|
|||
|
Points for which density is evaluated. ``x`` can be scalar or 1-dim.
|
|||
|
sample : ndarray, 1-d
|
|||
|
Sample from which kernel estimate is computed.
|
|||
|
bw : float
|
|||
|
Bandwidth parameter, there is currently no default value for it.
|
|||
|
kernel_type : str or callable
|
|||
|
Kernel name or kernel function.
|
|||
|
Currently supported kernel names are "beta", "beta2", "gamma",
|
|||
|
"gamma2", "bs", "invgamma", "invgauss", "lognorm", "recipinvgauss" and
|
|||
|
"weibull".
|
|||
|
weights : None or ndarray
|
|||
|
If weights is not None, then kernel for sample points are weighted
|
|||
|
by it. No weights corresponds to uniform weighting of each component
|
|||
|
with 1 / nobs, where nobs is the size of `sample`.
|
|||
|
batch_size : float
|
|||
|
If x is an 1-dim array, then points can be evaluated in vectorized
|
|||
|
form. To limit the amount of memory, a loop can work in batches.
|
|||
|
The number of batches is determined so that the intermediate array
|
|||
|
sizes are limited by
|
|||
|
|
|||
|
``np.size(batch) * len(sample) < batch_size * 1000``.
|
|||
|
|
|||
|
Default is to have at most 10000 elements in intermediate arrays.
|
|||
|
|
|||
|
Returns
|
|||
|
-------
|
|||
|
pdf : float or ndarray
|
|||
|
Estimate of pdf at points x. ``pdf`` has the same size or shape as x.
|
|||
|
"""
|
|||
|
|
|||
|
if callable(kernel_type):
|
|||
|
kfunc = kernel_type
|
|||
|
else:
|
|||
|
kfunc = kernel_dict_pdf[kernel_type]
|
|||
|
|
|||
|
batch_size = batch_size * 1000
|
|||
|
|
|||
|
if np.size(x) * len(sample) < batch_size:
|
|||
|
# no batch-loop
|
|||
|
if np.size(x) > 1:
|
|||
|
x = np.asarray(x)[:, None]
|
|||
|
|
|||
|
pdfi = kfunc(x, sample, bw)
|
|||
|
if weights is None:
|
|||
|
pdf = pdfi.mean(-1)
|
|||
|
else:
|
|||
|
pdf = pdfi @ weights
|
|||
|
else:
|
|||
|
# batch, designed for 1-d x
|
|||
|
if weights is None:
|
|||
|
weights = np.ones(len(sample)) / len(sample)
|
|||
|
|
|||
|
k = batch_size // len(sample)
|
|||
|
n = len(x) // k
|
|||
|
x_split = np.array_split(x, n)
|
|||
|
pdf = np.concatenate([(kfunc(xi[:, None], sample, bw) @ weights)
|
|||
|
for xi in x_split])
|
|||
|
|
|||
|
return pdf
|
|||
|
|
|||
|
|
|||
|
def cdf_kernel_asym(x, sample, bw, kernel_type, weights=None, batch_size=10):
|
|||
|
"""Estimate of cumulative distribution based on asymmetric kernel.
|
|||
|
|
|||
|
Parameters
|
|||
|
----------
|
|||
|
x : array_like, float
|
|||
|
Points for which density is evaluated. ``x`` can be scalar or 1-dim.
|
|||
|
sample : ndarray, 1-d
|
|||
|
Sample from which kernel estimate is computed.
|
|||
|
bw : float
|
|||
|
Bandwidth parameter, there is currently no default value for it.
|
|||
|
kernel_type : str or callable
|
|||
|
Kernel name or kernel function.
|
|||
|
Currently supported kernel names are "beta", "beta2", "gamma",
|
|||
|
"gamma2", "bs", "invgamma", "invgauss", "lognorm", "recipinvgauss" and
|
|||
|
"weibull".
|
|||
|
weights : None or ndarray
|
|||
|
If weights is not None, then kernel for sample points are weighted
|
|||
|
by it. No weights corresponds to uniform weighting of each component
|
|||
|
with 1 / nobs, where nobs is the size of `sample`.
|
|||
|
batch_size : float
|
|||
|
If x is an 1-dim array, then points can be evaluated in vectorized
|
|||
|
form. To limit the amount of memory, a loop can work in batches.
|
|||
|
The number of batches is determined so that the intermediate array
|
|||
|
sizes are limited by
|
|||
|
|
|||
|
``np.size(batch) * len(sample) < batch_size * 1000``.
|
|||
|
|
|||
|
Default is to have at most 10000 elements in intermediate arrays.
|
|||
|
|
|||
|
Returns
|
|||
|
-------
|
|||
|
cdf : float or ndarray
|
|||
|
Estimate of cdf at points x. ``cdf`` has the same size or shape as x.
|
|||
|
"""
|
|||
|
|
|||
|
if callable(kernel_type):
|
|||
|
kfunc = kernel_type
|
|||
|
else:
|
|||
|
kfunc = kernel_dict_cdf[kernel_type]
|
|||
|
|
|||
|
batch_size = batch_size * 1000
|
|||
|
|
|||
|
if np.size(x) * len(sample) < batch_size:
|
|||
|
# no batch-loop
|
|||
|
if np.size(x) > 1:
|
|||
|
x = np.asarray(x)[:, None]
|
|||
|
|
|||
|
cdfi = kfunc(x, sample, bw)
|
|||
|
if weights is None:
|
|||
|
cdf = cdfi.mean(-1)
|
|||
|
else:
|
|||
|
cdf = cdfi @ weights
|
|||
|
else:
|
|||
|
# batch, designed for 1-d x
|
|||
|
if weights is None:
|
|||
|
weights = np.ones(len(sample)) / len(sample)
|
|||
|
|
|||
|
k = batch_size // len(sample)
|
|||
|
n = len(x) // k
|
|||
|
x_split = np.array_split(x, n)
|
|||
|
cdf = np.concatenate([(kfunc(xi[:, None], sample, bw) @ weights)
|
|||
|
for xi in x_split])
|
|||
|
|
|||
|
return cdf
|
|||
|
|
|||
|
|
|||
|
def kernel_pdf_beta(x, sample, bw):
|
|||
|
# Beta kernel for density, pdf, estimation
|
|||
|
return stats.beta.pdf(sample, x / bw + 1, (1 - x) / bw + 1)
|
|||
|
|
|||
|
|
|||
|
kernel_pdf_beta.__doc__ = """\
|
|||
|
Beta kernel for density, pdf, estimation.
|
|||
|
|
|||
|
{doc_params}
|
|||
|
|
|||
|
References
|
|||
|
----------
|
|||
|
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
|||
|
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
|||
|
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
|||
|
|
|||
|
.. [2] Chen, Song Xi. 1999. “Beta Kernel Estimators for Density Functions.”
|
|||
|
Computational Statistics & Data Analysis 31 (2): 131–45.
|
|||
|
https://doi.org/10.1016/S0167-9473(99)00010-9.
|
|||
|
""".format(doc_params=doc_params)
|
|||
|
|
|||
|
|
|||
|
def kernel_cdf_beta(x, sample, bw):
|
|||
|
# Beta kernel for cumulative distribution, cdf, estimation
|
|||
|
return stats.beta.sf(sample, x / bw + 1, (1 - x) / bw + 1)
|
|||
|
|
|||
|
|
|||
|
kernel_cdf_beta.__doc__ = """\
|
|||
|
Beta kernel for cumulative distribution, cdf, estimation.
|
|||
|
|
|||
|
{doc_params}
|
|||
|
|
|||
|
References
|
|||
|
----------
|
|||
|
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
|||
|
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
|||
|
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
|||
|
|
|||
|
.. [2] Chen, Song Xi. 1999. “Beta Kernel Estimators for Density Functions.”
|
|||
|
Computational Statistics & Data Analysis 31 (2): 131–45.
|
|||
|
https://doi.org/10.1016/S0167-9473(99)00010-9.
|
|||
|
""".format(doc_params=doc_params)
|
|||
|
|
|||
|
|
|||
|
def kernel_pdf_beta2(x, sample, bw):
|
|||
|
# Beta kernel for density, pdf, estimation with boundary corrections
|
|||
|
|
|||
|
# a = 2 * bw**2 + 2.5 -
|
|||
|
# np.sqrt(4 * bw**4 + 6 * bw**2 + 2.25 - x**2 - x / bw)
|
|||
|
# terms a1 and a2 are independent of x
|
|||
|
a1 = 2 * bw**2 + 2.5
|
|||
|
a2 = 4 * bw**4 + 6 * bw**2 + 2.25
|
|||
|
|
|||
|
if np.size(x) == 1:
|
|||
|
# without vectorizing:
|
|||
|
if x < 2 * bw:
|
|||
|
a = a1 - np.sqrt(a2 - x**2 - x / bw)
|
|||
|
pdf = stats.beta.pdf(sample, a, (1 - x) / bw)
|
|||
|
elif x > (1 - 2 * bw):
|
|||
|
x_ = 1 - x
|
|||
|
a = a1 - np.sqrt(a2 - x_**2 - x_ / bw)
|
|||
|
pdf = stats.beta.pdf(sample, x / bw, a)
|
|||
|
else:
|
|||
|
pdf = stats.beta.pdf(sample, x / bw, (1 - x) / bw)
|
|||
|
else:
|
|||
|
alpha = x / bw
|
|||
|
beta = (1 - x) / bw
|
|||
|
|
|||
|
mask_low = x < 2 * bw
|
|||
|
x_ = x[mask_low]
|
|||
|
alpha[mask_low] = a1 - np.sqrt(a2 - x_**2 - x_ / bw)
|
|||
|
|
|||
|
mask_upp = x > (1 - 2 * bw)
|
|||
|
x_ = 1 - x[mask_upp]
|
|||
|
beta[mask_upp] = a1 - np.sqrt(a2 - x_**2 - x_ / bw)
|
|||
|
|
|||
|
pdf = stats.beta.pdf(sample, alpha, beta)
|
|||
|
|
|||
|
return pdf
|
|||
|
|
|||
|
|
|||
|
kernel_pdf_beta2.__doc__ = """\
|
|||
|
Beta kernel for density, pdf, estimation with boundary corrections.
|
|||
|
|
|||
|
{doc_params}
|
|||
|
|
|||
|
References
|
|||
|
----------
|
|||
|
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
|||
|
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
|||
|
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
|||
|
|
|||
|
.. [2] Chen, Song Xi. 1999. “Beta Kernel Estimators for Density Functions.”
|
|||
|
Computational Statistics & Data Analysis 31 (2): 131–45.
|
|||
|
https://doi.org/10.1016/S0167-9473(99)00010-9.
|
|||
|
""".format(doc_params=doc_params)
|
|||
|
|
|||
|
|
|||
|
def kernel_cdf_beta2(x, sample, bw):
|
|||
|
# Beta kernel for cdf estimation with boundary correction
|
|||
|
|
|||
|
# a = 2 * bw**2 + 2.5 -
|
|||
|
# np.sqrt(4 * bw**4 + 6 * bw**2 + 2.25 - x**2 - x / bw)
|
|||
|
# terms a1 and a2 are independent of x
|
|||
|
a1 = 2 * bw**2 + 2.5
|
|||
|
a2 = 4 * bw**4 + 6 * bw**2 + 2.25
|
|||
|
|
|||
|
if np.size(x) == 1:
|
|||
|
# without vectorizing:
|
|||
|
if x < 2 * bw:
|
|||
|
a = a1 - np.sqrt(a2 - x**2 - x / bw)
|
|||
|
pdf = stats.beta.sf(sample, a, (1 - x) / bw)
|
|||
|
elif x > (1 - 2 * bw):
|
|||
|
x_ = 1 - x
|
|||
|
a = a1 - np.sqrt(a2 - x_**2 - x_ / bw)
|
|||
|
pdf = stats.beta.sf(sample, x / bw, a)
|
|||
|
else:
|
|||
|
pdf = stats.beta.sf(sample, x / bw, (1 - x) / bw)
|
|||
|
else:
|
|||
|
alpha = x / bw
|
|||
|
beta = (1 - x) / bw
|
|||
|
mask_low = x < 2 * bw
|
|||
|
|
|||
|
x_ = x[mask_low]
|
|||
|
alpha[mask_low] = a1 - np.sqrt(a2 - x_**2 - x_ / bw)
|
|||
|
|
|||
|
mask_upp = x > (1 - 2 * bw)
|
|||
|
x_ = 1 - x[mask_upp]
|
|||
|
beta[mask_upp] = a1 - np.sqrt(a2 - x_**2 - x_ / bw)
|
|||
|
|
|||
|
pdf = stats.beta.sf(sample, alpha, beta)
|
|||
|
|
|||
|
return pdf
|
|||
|
|
|||
|
|
|||
|
kernel_cdf_beta2.__doc__ = """\
|
|||
|
Beta kernel for cdf estimation with boundary correction.
|
|||
|
|
|||
|
{doc_params}
|
|||
|
|
|||
|
References
|
|||
|
----------
|
|||
|
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
|||
|
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
|||
|
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
|||
|
|
|||
|
.. [2] Chen, Song Xi. 1999. “Beta Kernel Estimators for Density Functions.”
|
|||
|
Computational Statistics & Data Analysis 31 (2): 131–45.
|
|||
|
https://doi.org/10.1016/S0167-9473(99)00010-9.
|
|||
|
""".format(doc_params=doc_params)
|
|||
|
|
|||
|
|
|||
|
def kernel_pdf_gamma(x, sample, bw):
|
|||
|
# Gamma kernel for density, pdf, estimation
|
|||
|
pdfi = stats.gamma.pdf(sample, x / bw + 1, scale=bw)
|
|||
|
return pdfi
|
|||
|
|
|||
|
|
|||
|
kernel_pdf_gamma.__doc__ = """\
|
|||
|
Gamma kernel for density, pdf, estimation.
|
|||
|
|
|||
|
{doc_params}
|
|||
|
|
|||
|
References
|
|||
|
----------
|
|||
|
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
|||
|
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
|||
|
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
|||
|
|
|||
|
.. [2] Chen, Song Xi. 2000. “Probability Density Function Estimation Using
|
|||
|
Gamma Krnels.”
|
|||
|
Annals of the Institute of Statistical Mathematics 52 (3): 471–80.
|
|||
|
https://doi.org/10.1023/A:1004165218295.
|
|||
|
""".format(doc_params=doc_params)
|
|||
|
|
|||
|
|
|||
|
def kernel_cdf_gamma(x, sample, bw):
|
|||
|
# Gamma kernel for density, pdf, estimation
|
|||
|
# kernel cdf uses the survival function, but I don't know why.
|
|||
|
cdfi = stats.gamma.sf(sample, x / bw + 1, scale=bw)
|
|||
|
return cdfi
|
|||
|
|
|||
|
|
|||
|
kernel_cdf_gamma.__doc__ = """\
|
|||
|
Gamma kernel for cumulative distribution, cdf, estimation.
|
|||
|
|
|||
|
{doc_params}
|
|||
|
|
|||
|
References
|
|||
|
----------
|
|||
|
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
|||
|
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
|||
|
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
|||
|
|
|||
|
.. [2] Chen, Song Xi. 2000. “Probability Density Function Estimation Using
|
|||
|
Gamma Krnels.”
|
|||
|
Annals of the Institute of Statistical Mathematics 52 (3): 471–80.
|
|||
|
https://doi.org/10.1023/A:1004165218295.
|
|||
|
""".format(doc_params=doc_params)
|
|||
|
|
|||
|
|
|||
|
def _kernel_pdf_gamma(x, sample, bw):
|
|||
|
"""Gamma kernel for pdf, without boundary corrected part.
|
|||
|
|
|||
|
drops `+ 1` in shape parameter
|
|||
|
|
|||
|
It should be possible to use this if probability in
|
|||
|
neighborhood of zero boundary is small.
|
|||
|
|
|||
|
"""
|
|||
|
return stats.gamma.pdf(sample, x / bw, scale=bw)
|
|||
|
|
|||
|
|
|||
|
def _kernel_cdf_gamma(x, sample, bw):
|
|||
|
"""Gamma kernel for cdf, without boundary corrected part.
|
|||
|
|
|||
|
drops `+ 1` in shape parameter
|
|||
|
|
|||
|
It should be possible to use this if probability in
|
|||
|
neighborhood of zero boundary is small.
|
|||
|
|
|||
|
"""
|
|||
|
return stats.gamma.sf(sample, x / bw, scale=bw)
|
|||
|
|
|||
|
|
|||
|
def kernel_pdf_gamma2(x, sample, bw):
|
|||
|
# Gamma kernel for density, pdf, estimation with boundary correction
|
|||
|
if np.size(x) == 1:
|
|||
|
# without vectorizing, easier to read
|
|||
|
if x < 2 * bw:
|
|||
|
a = (x / bw)**2 + 1
|
|||
|
else:
|
|||
|
a = x / bw
|
|||
|
else:
|
|||
|
a = x / bw
|
|||
|
mask = x < 2 * bw
|
|||
|
a[mask] = a[mask]**2 + 1
|
|||
|
pdf = stats.gamma.pdf(sample, a, scale=bw)
|
|||
|
|
|||
|
return pdf
|
|||
|
|
|||
|
|
|||
|
kernel_pdf_gamma2.__doc__ = """\
|
|||
|
Gamma kernel for density, pdf, estimation with boundary correction.
|
|||
|
|
|||
|
{doc_params}
|
|||
|
|
|||
|
References
|
|||
|
----------
|
|||
|
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
|||
|
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
|||
|
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
|||
|
|
|||
|
.. [2] Chen, Song Xi. 2000. “Probability Density Function Estimation Using
|
|||
|
Gamma Krnels.”
|
|||
|
Annals of the Institute of Statistical Mathematics 52 (3): 471–80.
|
|||
|
https://doi.org/10.1023/A:1004165218295.
|
|||
|
""".format(doc_params=doc_params)
|
|||
|
|
|||
|
|
|||
|
def kernel_cdf_gamma2(x, sample, bw):
|
|||
|
# Gamma kernel for cdf estimation with boundary correction
|
|||
|
if np.size(x) == 1:
|
|||
|
# without vectorizing
|
|||
|
if x < 2 * bw:
|
|||
|
a = (x / bw)**2 + 1
|
|||
|
else:
|
|||
|
a = x / bw
|
|||
|
else:
|
|||
|
a = x / bw
|
|||
|
mask = x < 2 * bw
|
|||
|
a[mask] = a[mask]**2 + 1
|
|||
|
pdf = stats.gamma.sf(sample, a, scale=bw)
|
|||
|
|
|||
|
return pdf
|
|||
|
|
|||
|
|
|||
|
kernel_cdf_gamma2.__doc__ = """\
|
|||
|
Gamma kernel for cdf estimation with boundary correction.
|
|||
|
|
|||
|
{doc_params}
|
|||
|
|
|||
|
References
|
|||
|
----------
|
|||
|
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
|||
|
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
|||
|
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
|||
|
|
|||
|
.. [2] Chen, Song Xi. 2000. “Probability Density Function Estimation Using
|
|||
|
Gamma Krnels.”
|
|||
|
Annals of the Institute of Statistical Mathematics 52 (3): 471–80.
|
|||
|
https://doi.org/10.1023/A:1004165218295.
|
|||
|
""".format(doc_params=doc_params)
|
|||
|
|
|||
|
|
|||
|
def kernel_pdf_invgamma(x, sample, bw):
|
|||
|
# Inverse gamma kernel for density, pdf, estimation
|
|||
|
return stats.invgamma.pdf(sample, 1 / bw + 1, scale=x / bw)
|
|||
|
|
|||
|
|
|||
|
kernel_pdf_invgamma.__doc__ = """\
|
|||
|
Inverse gamma kernel for density, pdf, estimation.
|
|||
|
|
|||
|
Based on cdf kernel by Micheaux and Ouimet (2020)
|
|||
|
|
|||
|
{doc_params}
|
|||
|
|
|||
|
References
|
|||
|
----------
|
|||
|
.. [1] Micheaux, Pierre Lafaye de, and Frédéric Ouimet. 2020. “A Study of
|
|||
|
Seven Asymmetric Kernels for the Estimation of Cumulative Distribution
|
|||
|
Functions,” November. https://arxiv.org/abs/2011.14893v1.
|
|||
|
""".format(doc_params=doc_params)
|
|||
|
|
|||
|
|
|||
|
def kernel_cdf_invgamma(x, sample, bw):
|
|||
|
# Inverse gamma kernel for cumulative distribution, cdf, estimation
|
|||
|
return stats.invgamma.sf(sample, 1 / bw + 1, scale=x / bw)
|
|||
|
|
|||
|
|
|||
|
kernel_cdf_invgamma.__doc__ = """\
|
|||
|
Inverse gamma kernel for cumulative distribution, cdf, estimation.
|
|||
|
|
|||
|
{doc_params}
|
|||
|
|
|||
|
References
|
|||
|
----------
|
|||
|
.. [1] Micheaux, Pierre Lafaye de, and Frédéric Ouimet. 2020. “A Study of
|
|||
|
Seven Asymmetric Kernels for the Estimation of Cumulative Distribution
|
|||
|
Functions,” November. https://arxiv.org/abs/2011.14893v1.
|
|||
|
""".format(doc_params=doc_params)
|
|||
|
|
|||
|
|
|||
|
def kernel_pdf_invgauss(x, sample, bw):
|
|||
|
# Inverse gaussian kernel for density, pdf, estimation
|
|||
|
m = x
|
|||
|
lam = 1 / bw
|
|||
|
return stats.invgauss.pdf(sample, m / lam, scale=lam)
|
|||
|
|
|||
|
|
|||
|
kernel_pdf_invgauss.__doc__ = """\
|
|||
|
Inverse gaussian kernel for density, pdf, estimation.
|
|||
|
|
|||
|
{doc_params}
|
|||
|
|
|||
|
References
|
|||
|
----------
|
|||
|
.. [1] Scaillet, O. 2004. “Density Estimation Using Inverse and Reciprocal
|
|||
|
Inverse Gaussian Kernels.”
|
|||
|
Journal of Nonparametric Statistics 16 (1–2): 217–26.
|
|||
|
https://doi.org/10.1080/10485250310001624819.
|
|||
|
""".format(doc_params=doc_params)
|
|||
|
|
|||
|
|
|||
|
def kernel_pdf_invgauss_(x, sample, bw):
|
|||
|
"""Inverse gaussian kernel density, explicit formula.
|
|||
|
|
|||
|
Scaillet 2004
|
|||
|
"""
|
|||
|
pdf = (1 / np.sqrt(2 * np.pi * bw * sample**3) *
|
|||
|
np.exp(- 1 / (2 * bw * x) * (sample / x - 2 + x / sample)))
|
|||
|
return pdf.mean(-1)
|
|||
|
|
|||
|
|
|||
|
def kernel_cdf_invgauss(x, sample, bw):
|
|||
|
# Inverse gaussian kernel for cumulative distribution, cdf, estimation
|
|||
|
m = x
|
|||
|
lam = 1 / bw
|
|||
|
return stats.invgauss.sf(sample, m / lam, scale=lam)
|
|||
|
|
|||
|
|
|||
|
kernel_cdf_invgauss.__doc__ = """\
|
|||
|
Inverse gaussian kernel for cumulative distribution, cdf, estimation.
|
|||
|
|
|||
|
{doc_params}
|
|||
|
|
|||
|
References
|
|||
|
----------
|
|||
|
.. [1] Scaillet, O. 2004. “Density Estimation Using Inverse and Reciprocal
|
|||
|
Inverse Gaussian Kernels.”
|
|||
|
Journal of Nonparametric Statistics 16 (1–2): 217–26.
|
|||
|
https://doi.org/10.1080/10485250310001624819.
|
|||
|
""".format(doc_params=doc_params)
|
|||
|
|
|||
|
|
|||
|
def kernel_pdf_recipinvgauss(x, sample, bw):
|
|||
|
# Reciprocal inverse gaussian kernel for density, pdf, estimation
|
|||
|
|
|||
|
# need shape-scale parameterization for scipy
|
|||
|
# references use m, lambda parameterization
|
|||
|
m = 1 / (x - bw)
|
|||
|
lam = 1 / bw
|
|||
|
return stats.recipinvgauss.pdf(sample, m / lam, scale=1 / lam)
|
|||
|
|
|||
|
|
|||
|
kernel_pdf_recipinvgauss.__doc__ = """\
|
|||
|
Reciprocal inverse gaussian kernel for density, pdf, estimation.
|
|||
|
|
|||
|
{doc_params}
|
|||
|
|
|||
|
References
|
|||
|
----------
|
|||
|
.. [1] Scaillet, O. 2004. “Density Estimation Using Inverse and Reciprocal
|
|||
|
Inverse Gaussian Kernels.”
|
|||
|
Journal of Nonparametric Statistics 16 (1–2): 217–26.
|
|||
|
https://doi.org/10.1080/10485250310001624819.
|
|||
|
""".format(doc_params=doc_params)
|
|||
|
|
|||
|
|
|||
|
def kernel_pdf_recipinvgauss_(x, sample, bw):
|
|||
|
"""Reciprocal inverse gaussian kernel density, explicit formula.
|
|||
|
|
|||
|
Scaillet 2004
|
|||
|
"""
|
|||
|
|
|||
|
pdf = (1 / np.sqrt(2 * np.pi * bw * sample) *
|
|||
|
np.exp(- (x - bw) / (2 * bw) * sample / (x - bw) - 2 +
|
|||
|
(x - bw) / sample))
|
|||
|
return pdf
|
|||
|
|
|||
|
|
|||
|
def kernel_cdf_recipinvgauss(x, sample, bw):
|
|||
|
# Reciprocal inverse gaussian kernel for cdf estimation
|
|||
|
|
|||
|
# need shape-scale parameterization for scipy
|
|||
|
# references use m, lambda parameterization
|
|||
|
m = 1 / (x - bw)
|
|||
|
lam = 1 / bw
|
|||
|
return stats.recipinvgauss.sf(sample, m / lam, scale=1 / lam)
|
|||
|
|
|||
|
|
|||
|
kernel_cdf_recipinvgauss.__doc__ = """\
|
|||
|
Reciprocal inverse gaussian kernel for cdf estimation.
|
|||
|
|
|||
|
{doc_params}
|
|||
|
|
|||
|
References
|
|||
|
----------
|
|||
|
.. [1] Scaillet, O. 2004. “Density Estimation Using Inverse and Reciprocal
|
|||
|
Inverse Gaussian Kernels.”
|
|||
|
Journal of Nonparametric Statistics 16 (1–2): 217–26.
|
|||
|
https://doi.org/10.1080/10485250310001624819.
|
|||
|
""".format(doc_params=doc_params)
|
|||
|
|
|||
|
|
|||
|
def kernel_pdf_bs(x, sample, bw):
|
|||
|
# Birnbaum Saunders (normal) kernel for density, pdf, estimation
|
|||
|
return stats.fatiguelife.pdf(sample, bw, scale=x)
|
|||
|
|
|||
|
|
|||
|
kernel_pdf_bs.__doc__ = """\
|
|||
|
Birnbaum Saunders (normal) kernel for density, pdf, estimation.
|
|||
|
|
|||
|
{doc_params}
|
|||
|
|
|||
|
References
|
|||
|
----------
|
|||
|
.. [1] Jin, Xiaodong, and Janusz Kawczak. 2003. “Birnbaum-Saunders and
|
|||
|
Lognormal Kernel Estimators for Modelling Durations in High Frequency
|
|||
|
Financial Data.” Annals of Economics and Finance 4: 103–24.
|
|||
|
""".format(doc_params=doc_params)
|
|||
|
|
|||
|
|
|||
|
def kernel_cdf_bs(x, sample, bw):
|
|||
|
# Birnbaum Saunders (normal) kernel for cdf estimation
|
|||
|
return stats.fatiguelife.sf(sample, bw, scale=x)
|
|||
|
|
|||
|
|
|||
|
kernel_cdf_bs.__doc__ = """\
|
|||
|
Birnbaum Saunders (normal) kernel for cdf estimation.
|
|||
|
|
|||
|
{doc_params}
|
|||
|
|
|||
|
References
|
|||
|
----------
|
|||
|
.. [1] Jin, Xiaodong, and Janusz Kawczak. 2003. “Birnbaum-Saunders and
|
|||
|
Lognormal Kernel Estimators for Modelling Durations in High Frequency
|
|||
|
Financial Data.” Annals of Economics and Finance 4: 103–24.
|
|||
|
.. [2] Mombeni, Habib Allah, B Masouri, and Mohammad Reza Akhoond. 2019.
|
|||
|
“Asymmetric Kernels for Boundary Modification in Distribution Function
|
|||
|
Estimation.” REVSTAT, 1–27.
|
|||
|
""".format(doc_params=doc_params)
|
|||
|
|
|||
|
|
|||
|
def kernel_pdf_lognorm(x, sample, bw):
|
|||
|
# Log-normal kernel for density, pdf, estimation
|
|||
|
|
|||
|
# need shape-scale parameterization for scipy
|
|||
|
# not sure why JK picked this normalization, makes required bw small
|
|||
|
# maybe we should skip this transformation and just use bw
|
|||
|
# Funke and Kawka 2015 (table 1) use bw (or bw**2) corresponding to
|
|||
|
# variance of normal pdf
|
|||
|
# bw = np.exp(bw_**2 / 4) - 1 # this is inverse transformation
|
|||
|
bw_ = np.sqrt(4*np.log(1+bw))
|
|||
|
return stats.lognorm.pdf(sample, bw_, scale=x)
|
|||
|
|
|||
|
|
|||
|
kernel_pdf_lognorm.__doc__ = """\
|
|||
|
Log-normal kernel for density, pdf, estimation.
|
|||
|
|
|||
|
{doc_params}
|
|||
|
|
|||
|
Notes
|
|||
|
-----
|
|||
|
Warning: parameterization of bandwidth will likely be changed
|
|||
|
|
|||
|
References
|
|||
|
----------
|
|||
|
.. [1] Jin, Xiaodong, and Janusz Kawczak. 2003. “Birnbaum-Saunders and
|
|||
|
Lognormal Kernel Estimators for Modelling Durations in High Frequency
|
|||
|
Financial Data.” Annals of Economics and Finance 4: 103–24.
|
|||
|
""".format(doc_params=doc_params)
|
|||
|
|
|||
|
|
|||
|
def kernel_cdf_lognorm(x, sample, bw):
|
|||
|
# Log-normal kernel for cumulative distribution, cdf, estimation
|
|||
|
|
|||
|
# need shape-scale parameterization for scipy
|
|||
|
# not sure why JK picked this normalization, makes required bw small
|
|||
|
# maybe we should skip this transformation and just use bw
|
|||
|
# Funke and Kawka 2015 (table 1) use bw (or bw**2) corresponding to
|
|||
|
# variance of normal pdf
|
|||
|
# bw = np.exp(bw_**2 / 4) - 1 # this is inverse transformation
|
|||
|
bw_ = np.sqrt(4*np.log(1+bw))
|
|||
|
return stats.lognorm.sf(sample, bw_, scale=x)
|
|||
|
|
|||
|
|
|||
|
kernel_cdf_lognorm.__doc__ = """\
|
|||
|
Log-normal kernel for cumulative distribution, cdf, estimation.
|
|||
|
|
|||
|
{doc_params}
|
|||
|
|
|||
|
Notes
|
|||
|
-----
|
|||
|
Warning: parameterization of bandwidth will likely be changed
|
|||
|
|
|||
|
References
|
|||
|
----------
|
|||
|
.. [1] Jin, Xiaodong, and Janusz Kawczak. 2003. “Birnbaum-Saunders and
|
|||
|
Lognormal Kernel Estimators for Modelling Durations in High Frequency
|
|||
|
Financial Data.” Annals of Economics and Finance 4: 103–24.
|
|||
|
""".format(doc_params=doc_params)
|
|||
|
|
|||
|
|
|||
|
def kernel_pdf_lognorm_(x, sample, bw):
|
|||
|
"""Log-normal kernel for density, pdf, estimation, explicit formula.
|
|||
|
|
|||
|
Jin, Kawczak 2003
|
|||
|
"""
|
|||
|
term = 8 * np.log(1 + bw) # this is 2 * variance in normal pdf
|
|||
|
pdf = (1 / np.sqrt(term * np.pi) / sample *
|
|||
|
np.exp(- (np.log(x) - np.log(sample))**2 / term))
|
|||
|
return pdf.mean(-1)
|
|||
|
|
|||
|
|
|||
|
def kernel_pdf_weibull(x, sample, bw):
|
|||
|
# Weibull kernel for density, pdf, estimation
|
|||
|
|
|||
|
# need shape-scale parameterization for scipy
|
|||
|
# references use m, lambda parameterization
|
|||
|
return stats.weibull_min.pdf(sample, 1 / bw,
|
|||
|
scale=x / special.gamma(1 + bw))
|
|||
|
|
|||
|
|
|||
|
kernel_pdf_weibull.__doc__ = """\
|
|||
|
Weibull kernel for density, pdf, estimation.
|
|||
|
|
|||
|
Based on cdf kernel by Mombeni et al. (2019)
|
|||
|
|
|||
|
{doc_params}
|
|||
|
|
|||
|
References
|
|||
|
----------
|
|||
|
.. [1] Mombeni, Habib Allah, B Masouri, and Mohammad Reza Akhoond. 2019.
|
|||
|
“Asymmetric Kernels for Boundary Modification in Distribution Function
|
|||
|
Estimation.” REVSTAT, 1–27.
|
|||
|
""".format(doc_params=doc_params)
|
|||
|
|
|||
|
|
|||
|
def kernel_cdf_weibull(x, sample, bw):
|
|||
|
# Weibull kernel for cumulative distribution, cdf, estimation
|
|||
|
|
|||
|
# need shape-scale parameterization for scipy
|
|||
|
# references use m, lambda parameterization
|
|||
|
return stats.weibull_min.sf(sample, 1 / bw,
|
|||
|
scale=x / special.gamma(1 + bw))
|
|||
|
|
|||
|
|
|||
|
kernel_cdf_weibull.__doc__ = """\
|
|||
|
Weibull kernel for cumulative distribution, cdf, estimation.
|
|||
|
|
|||
|
{doc_params}
|
|||
|
|
|||
|
References
|
|||
|
----------
|
|||
|
.. [1] Mombeni, Habib Allah, B Masouri, and Mohammad Reza Akhoond. 2019.
|
|||
|
“Asymmetric Kernels for Boundary Modification in Distribution Function
|
|||
|
Estimation.” REVSTAT, 1–27.
|
|||
|
""".format(doc_params=doc_params)
|
|||
|
|
|||
|
|
|||
|
# produced wth
|
|||
|
# print("\n".join(['"%s": %s,' % (i.split("_")[-1], i) for i in dir(kern)
|
|||
|
# if "kernel" in i and not i.endswith("_")]))
|
|||
|
kernel_dict_cdf = {
|
|||
|
"beta": kernel_cdf_beta,
|
|||
|
"beta2": kernel_cdf_beta2,
|
|||
|
"bs": kernel_cdf_bs,
|
|||
|
"gamma": kernel_cdf_gamma,
|
|||
|
"gamma2": kernel_cdf_gamma2,
|
|||
|
"invgamma": kernel_cdf_invgamma,
|
|||
|
"invgauss": kernel_cdf_invgauss,
|
|||
|
"lognorm": kernel_cdf_lognorm,
|
|||
|
"recipinvgauss": kernel_cdf_recipinvgauss,
|
|||
|
"weibull": kernel_cdf_weibull,
|
|||
|
}
|
|||
|
|
|||
|
kernel_dict_pdf = {
|
|||
|
"beta": kernel_pdf_beta,
|
|||
|
"beta2": kernel_pdf_beta2,
|
|||
|
"bs": kernel_pdf_bs,
|
|||
|
"gamma": kernel_pdf_gamma,
|
|||
|
"gamma2": kernel_pdf_gamma2,
|
|||
|
"invgamma": kernel_pdf_invgamma,
|
|||
|
"invgauss": kernel_pdf_invgauss,
|
|||
|
"lognorm": kernel_pdf_lognorm,
|
|||
|
"recipinvgauss": kernel_pdf_recipinvgauss,
|
|||
|
"weibull": kernel_pdf_weibull,
|
|||
|
}
|