12174 lines
387 KiB
Python
12174 lines
387 KiB
Python
#
|
|
# Author: Travis Oliphant 2002-2011 with contributions from
|
|
# SciPy Developers 2004-2011
|
|
#
|
|
import warnings
|
|
from collections.abc import Iterable
|
|
from functools import wraps, cached_property
|
|
import ctypes
|
|
|
|
import numpy as np
|
|
from numpy.polynomial import Polynomial
|
|
from scipy.interpolate import BSpline
|
|
from scipy._lib.doccer import (extend_notes_in_docstring,
|
|
replace_notes_in_docstring,
|
|
inherit_docstring_from)
|
|
from scipy._lib._ccallback import LowLevelCallable
|
|
from scipy import optimize
|
|
from scipy import integrate
|
|
import scipy.special as sc
|
|
|
|
import scipy.special._ufuncs as scu
|
|
from scipy._lib._util import _lazyselect, _lazywhere
|
|
|
|
from . import _stats
|
|
from ._tukeylambda_stats import (tukeylambda_variance as _tlvar,
|
|
tukeylambda_kurtosis as _tlkurt)
|
|
from ._distn_infrastructure import (_vectorize_rvs_over_shapes,
|
|
get_distribution_names, _kurtosis, _isintegral,
|
|
rv_continuous, _skew, _get_fixed_fit_value, _check_shape, _ShapeInfo)
|
|
from ._ksstats import kolmogn, kolmognp, kolmogni
|
|
from ._constants import (_XMIN, _LOGXMIN, _EULER, _ZETA3, _SQRT_PI,
|
|
_SQRT_2_OVER_PI, _LOG_SQRT_2_OVER_PI)
|
|
from ._censored_data import CensoredData
|
|
from scipy.optimize import root_scalar
|
|
from scipy.stats._warnings_errors import FitError
|
|
import scipy.stats as stats
|
|
|
|
|
|
def _remove_optimizer_parameters(kwds):
|
|
"""
|
|
Remove the optimizer-related keyword arguments 'loc', 'scale' and
|
|
'optimizer' from `kwds`. Then check that `kwds` is empty, and
|
|
raise `TypeError("Unknown arguments: %s." % kwds)` if it is not.
|
|
|
|
This function is used in the fit method of distributions that override
|
|
the default method and do not use the default optimization code.
|
|
|
|
`kwds` is modified in-place.
|
|
"""
|
|
kwds.pop('loc', None)
|
|
kwds.pop('scale', None)
|
|
kwds.pop('optimizer', None)
|
|
kwds.pop('method', None)
|
|
if kwds:
|
|
raise TypeError("Unknown arguments: %s." % kwds)
|
|
|
|
|
|
def _call_super_mom(fun):
|
|
# If fit method is overridden only for MLE and doesn't specify what to do
|
|
# if method == 'mm' or with censored data, this decorator calls the generic
|
|
# implementation.
|
|
@wraps(fun)
|
|
def wrapper(self, data, *args, **kwds):
|
|
method = kwds.get('method', 'mle').lower()
|
|
censored = isinstance(data, CensoredData)
|
|
if method == 'mm' or (censored and data.num_censored() > 0):
|
|
return super(type(self), self).fit(data, *args, **kwds)
|
|
else:
|
|
if censored:
|
|
# data is an instance of CensoredData, but actually holds
|
|
# no censored values, so replace it with the array of
|
|
# uncensored values.
|
|
data = data._uncensored
|
|
return fun(self, data, *args, **kwds)
|
|
|
|
return wrapper
|
|
|
|
|
|
def _get_left_bracket(fun, rbrack, lbrack=None):
|
|
# find left bracket for `root_scalar`. A guess for lbrack may be provided.
|
|
lbrack = lbrack or rbrack - 1
|
|
diff = rbrack - lbrack
|
|
|
|
# if there is no sign change in `fun` between the brackets, expand
|
|
# rbrack - lbrack until a sign change occurs
|
|
def interval_contains_root(lbrack, rbrack):
|
|
# return true if the signs disagree.
|
|
return np.sign(fun(lbrack)) != np.sign(fun(rbrack))
|
|
|
|
while not interval_contains_root(lbrack, rbrack):
|
|
diff *= 2
|
|
lbrack = rbrack - diff
|
|
|
|
msg = ("The solver could not find a bracket containing a "
|
|
"root to an MLE first order condition.")
|
|
if np.isinf(lbrack):
|
|
raise FitSolverError(msg)
|
|
|
|
return lbrack
|
|
|
|
|
|
class ksone_gen(rv_continuous):
|
|
r"""Kolmogorov-Smirnov one-sided test statistic distribution.
|
|
|
|
This is the distribution of the one-sided Kolmogorov-Smirnov (KS)
|
|
statistics :math:`D_n^+` and :math:`D_n^-`
|
|
for a finite sample size ``n >= 1`` (the shape parameter).
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
kstwobign, kstwo, kstest
|
|
|
|
Notes
|
|
-----
|
|
:math:`D_n^+` and :math:`D_n^-` are given by
|
|
|
|
.. math::
|
|
|
|
D_n^+ &= \text{sup}_x (F_n(x) - F(x)),\\
|
|
D_n^- &= \text{sup}_x (F(x) - F_n(x)),\\
|
|
|
|
where :math:`F` is a continuous CDF and :math:`F_n` is an empirical CDF.
|
|
`ksone` describes the distribution under the null hypothesis of the KS test
|
|
that the empirical CDF corresponds to :math:`n` i.i.d. random variates
|
|
with CDF :math:`F`.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
.. [1] Birnbaum, Z. W. and Tingey, F.H. "One-sided confidence contours
|
|
for probability distribution functions", The Annals of Mathematical
|
|
Statistics, 22(4), pp 592-596 (1951).
|
|
|
|
Examples
|
|
--------
|
|
>>> import numpy as np
|
|
>>> from scipy.stats import ksone
|
|
>>> import matplotlib.pyplot as plt
|
|
>>> fig, ax = plt.subplots(1, 1)
|
|
|
|
Display the probability density function (``pdf``):
|
|
|
|
>>> n = 1e+03
|
|
>>> x = np.linspace(ksone.ppf(0.01, n),
|
|
... ksone.ppf(0.99, n), 100)
|
|
>>> ax.plot(x, ksone.pdf(x, n),
|
|
... 'r-', lw=5, alpha=0.6, label='ksone pdf')
|
|
|
|
Alternatively, the distribution object can be called (as a function)
|
|
to fix the shape, location and scale parameters. This returns a "frozen"
|
|
RV object holding the given parameters fixed.
|
|
|
|
Freeze the distribution and display the frozen ``pdf``:
|
|
|
|
>>> rv = ksone(n)
|
|
>>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
|
|
>>> ax.legend(loc='best', frameon=False)
|
|
>>> plt.show()
|
|
|
|
Check accuracy of ``cdf`` and ``ppf``:
|
|
|
|
>>> vals = ksone.ppf([0.001, 0.5, 0.999], n)
|
|
>>> np.allclose([0.001, 0.5, 0.999], ksone.cdf(vals, n))
|
|
True
|
|
|
|
"""
|
|
def _argcheck(self, n):
|
|
return (n >= 1) & (n == np.round(n))
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("n", True, (1, np.inf), (True, False))]
|
|
|
|
def _pdf(self, x, n):
|
|
return -scu._smirnovp(n, x)
|
|
|
|
def _cdf(self, x, n):
|
|
return scu._smirnovc(n, x)
|
|
|
|
def _sf(self, x, n):
|
|
return sc.smirnov(n, x)
|
|
|
|
def _ppf(self, q, n):
|
|
return scu._smirnovci(n, q)
|
|
|
|
def _isf(self, q, n):
|
|
return sc.smirnovi(n, q)
|
|
|
|
|
|
ksone = ksone_gen(a=0.0, b=1.0, name='ksone')
|
|
|
|
|
|
class kstwo_gen(rv_continuous):
|
|
r"""Kolmogorov-Smirnov two-sided test statistic distribution.
|
|
|
|
This is the distribution of the two-sided Kolmogorov-Smirnov (KS)
|
|
statistic :math:`D_n` for a finite sample size ``n >= 1``
|
|
(the shape parameter).
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
kstwobign, ksone, kstest
|
|
|
|
Notes
|
|
-----
|
|
:math:`D_n` is given by
|
|
|
|
.. math::
|
|
|
|
D_n = \text{sup}_x |F_n(x) - F(x)|
|
|
|
|
where :math:`F` is a (continuous) CDF and :math:`F_n` is an empirical CDF.
|
|
`kstwo` describes the distribution under the null hypothesis of the KS test
|
|
that the empirical CDF corresponds to :math:`n` i.i.d. random variates
|
|
with CDF :math:`F`.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
.. [1] Simard, R., L'Ecuyer, P. "Computing the Two-Sided
|
|
Kolmogorov-Smirnov Distribution", Journal of Statistical Software,
|
|
Vol 39, 11, 1-18 (2011).
|
|
|
|
Examples
|
|
--------
|
|
>>> import numpy as np
|
|
>>> from scipy.stats import kstwo
|
|
>>> import matplotlib.pyplot as plt
|
|
>>> fig, ax = plt.subplots(1, 1)
|
|
|
|
Display the probability density function (``pdf``):
|
|
|
|
>>> n = 10
|
|
>>> x = np.linspace(kstwo.ppf(0.01, n),
|
|
... kstwo.ppf(0.99, n), 100)
|
|
>>> ax.plot(x, kstwo.pdf(x, n),
|
|
... 'r-', lw=5, alpha=0.6, label='kstwo pdf')
|
|
|
|
Alternatively, the distribution object can be called (as a function)
|
|
to fix the shape, location and scale parameters. This returns a "frozen"
|
|
RV object holding the given parameters fixed.
|
|
|
|
Freeze the distribution and display the frozen ``pdf``:
|
|
|
|
>>> rv = kstwo(n)
|
|
>>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
|
|
>>> ax.legend(loc='best', frameon=False)
|
|
>>> plt.show()
|
|
|
|
Check accuracy of ``cdf`` and ``ppf``:
|
|
|
|
>>> vals = kstwo.ppf([0.001, 0.5, 0.999], n)
|
|
>>> np.allclose([0.001, 0.5, 0.999], kstwo.cdf(vals, n))
|
|
True
|
|
|
|
"""
|
|
def _argcheck(self, n):
|
|
return (n >= 1) & (n == np.round(n))
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("n", True, (1, np.inf), (True, False))]
|
|
|
|
def _get_support(self, n):
|
|
return (0.5/(n if not isinstance(n, Iterable) else np.asanyarray(n)),
|
|
1.0)
|
|
|
|
def _pdf(self, x, n):
|
|
return kolmognp(n, x)
|
|
|
|
def _cdf(self, x, n):
|
|
return kolmogn(n, x)
|
|
|
|
def _sf(self, x, n):
|
|
return kolmogn(n, x, cdf=False)
|
|
|
|
def _ppf(self, q, n):
|
|
return kolmogni(n, q, cdf=True)
|
|
|
|
def _isf(self, q, n):
|
|
return kolmogni(n, q, cdf=False)
|
|
|
|
|
|
# Use the pdf, (not the ppf) to compute moments
|
|
kstwo = kstwo_gen(momtype=0, a=0.0, b=1.0, name='kstwo')
|
|
|
|
|
|
class kstwobign_gen(rv_continuous):
|
|
r"""Limiting distribution of scaled Kolmogorov-Smirnov two-sided test statistic.
|
|
|
|
This is the asymptotic distribution of the two-sided Kolmogorov-Smirnov
|
|
statistic :math:`\sqrt{n} D_n` that measures the maximum absolute
|
|
distance of the theoretical (continuous) CDF from the empirical CDF.
|
|
(see `kstest`).
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
ksone, kstwo, kstest
|
|
|
|
Notes
|
|
-----
|
|
:math:`\sqrt{n} D_n` is given by
|
|
|
|
.. math::
|
|
|
|
D_n = \text{sup}_x |F_n(x) - F(x)|
|
|
|
|
where :math:`F` is a continuous CDF and :math:`F_n` is an empirical CDF.
|
|
`kstwobign` describes the asymptotic distribution (i.e. the limit of
|
|
:math:`\sqrt{n} D_n`) under the null hypothesis of the KS test that the
|
|
empirical CDF corresponds to i.i.d. random variates with CDF :math:`F`.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
.. [1] Feller, W. "On the Kolmogorov-Smirnov Limit Theorems for Empirical
|
|
Distributions", Ann. Math. Statist. Vol 19, 177-189 (1948).
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return []
|
|
|
|
def _pdf(self, x):
|
|
return -scu._kolmogp(x)
|
|
|
|
def _cdf(self, x):
|
|
return scu._kolmogc(x)
|
|
|
|
def _sf(self, x):
|
|
return sc.kolmogorov(x)
|
|
|
|
def _ppf(self, q):
|
|
return scu._kolmogci(q)
|
|
|
|
def _isf(self, q):
|
|
return sc.kolmogi(q)
|
|
|
|
|
|
kstwobign = kstwobign_gen(a=0.0, name='kstwobign')
|
|
|
|
|
|
## Normal distribution
|
|
|
|
# loc = mu, scale = std
|
|
# Keep these implementations out of the class definition so they can be reused
|
|
# by other distributions.
|
|
_norm_pdf_C = np.sqrt(2*np.pi)
|
|
_norm_pdf_logC = np.log(_norm_pdf_C)
|
|
|
|
|
|
def _norm_pdf(x):
|
|
return np.exp(-x**2/2.0) / _norm_pdf_C
|
|
|
|
|
|
def _norm_logpdf(x):
|
|
return -x**2 / 2.0 - _norm_pdf_logC
|
|
|
|
|
|
def _norm_cdf(x):
|
|
return sc.ndtr(x)
|
|
|
|
|
|
def _norm_logcdf(x):
|
|
return sc.log_ndtr(x)
|
|
|
|
|
|
def _norm_ppf(q):
|
|
return sc.ndtri(q)
|
|
|
|
|
|
def _norm_sf(x):
|
|
return _norm_cdf(-x)
|
|
|
|
|
|
def _norm_logsf(x):
|
|
return _norm_logcdf(-x)
|
|
|
|
|
|
def _norm_isf(q):
|
|
return -_norm_ppf(q)
|
|
|
|
|
|
class norm_gen(rv_continuous):
|
|
r"""A normal continuous random variable.
|
|
|
|
The location (``loc``) keyword specifies the mean.
|
|
The scale (``scale``) keyword specifies the standard deviation.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `norm` is:
|
|
|
|
.. math::
|
|
|
|
f(x) = \frac{\exp(-x^2/2)}{\sqrt{2\pi}}
|
|
|
|
for a real number :math:`x`.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return []
|
|
|
|
def _rvs(self, size=None, random_state=None):
|
|
return random_state.standard_normal(size)
|
|
|
|
def _pdf(self, x):
|
|
# norm.pdf(x) = exp(-x**2/2)/sqrt(2*pi)
|
|
return _norm_pdf(x)
|
|
|
|
def _logpdf(self, x):
|
|
return _norm_logpdf(x)
|
|
|
|
def _cdf(self, x):
|
|
return _norm_cdf(x)
|
|
|
|
def _logcdf(self, x):
|
|
return _norm_logcdf(x)
|
|
|
|
def _sf(self, x):
|
|
return _norm_sf(x)
|
|
|
|
def _logsf(self, x):
|
|
return _norm_logsf(x)
|
|
|
|
def _ppf(self, q):
|
|
return _norm_ppf(q)
|
|
|
|
def _isf(self, q):
|
|
return _norm_isf(q)
|
|
|
|
def _stats(self):
|
|
return 0.0, 1.0, 0.0, 0.0
|
|
|
|
def _entropy(self):
|
|
return 0.5*(np.log(2*np.pi)+1)
|
|
|
|
@_call_super_mom
|
|
@replace_notes_in_docstring(rv_continuous, notes="""\
|
|
For the normal distribution, method of moments and maximum likelihood
|
|
estimation give identical fits, and explicit formulas for the estimates
|
|
are available.
|
|
This function uses these explicit formulas for the maximum likelihood
|
|
estimation of the normal distribution parameters, so the
|
|
`optimizer` and `method` arguments are ignored.\n\n""")
|
|
def fit(self, data, **kwds):
|
|
floc = kwds.pop('floc', None)
|
|
fscale = kwds.pop('fscale', None)
|
|
|
|
_remove_optimizer_parameters(kwds)
|
|
|
|
if floc is not None and fscale is not None:
|
|
# This check is for consistency with `rv_continuous.fit`.
|
|
# Without this check, this function would just return the
|
|
# parameters that were given.
|
|
raise ValueError("All parameters fixed. There is nothing to "
|
|
"optimize.")
|
|
|
|
data = np.asarray(data)
|
|
|
|
if not np.isfinite(data).all():
|
|
raise ValueError("The data contains non-finite values.")
|
|
|
|
if floc is None:
|
|
loc = data.mean()
|
|
else:
|
|
loc = floc
|
|
|
|
if fscale is None:
|
|
scale = np.sqrt(((data - loc)**2).mean())
|
|
else:
|
|
scale = fscale
|
|
|
|
return loc, scale
|
|
|
|
def _munp(self, n):
|
|
"""
|
|
@returns Moments of standard normal distribution for integer n >= 0
|
|
|
|
See eq. 16 of https://arxiv.org/abs/1209.4340v2
|
|
"""
|
|
if n % 2 == 0:
|
|
return sc.factorial2(n - 1)
|
|
else:
|
|
return 0.
|
|
|
|
|
|
norm = norm_gen(name='norm')
|
|
|
|
|
|
class alpha_gen(rv_continuous):
|
|
r"""An alpha continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `alpha` ([1]_, [2]_) is:
|
|
|
|
.. math::
|
|
|
|
f(x, a) = \frac{1}{x^2 \Phi(a) \sqrt{2\pi}} *
|
|
\exp(-\frac{1}{2} (a-1/x)^2)
|
|
|
|
where :math:`\Phi` is the normal CDF, :math:`x > 0`, and :math:`a > 0`.
|
|
|
|
`alpha` takes ``a`` as a shape parameter.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
.. [1] Johnson, Kotz, and Balakrishnan, "Continuous Univariate
|
|
Distributions, Volume 1", Second Edition, John Wiley and Sons,
|
|
p. 173 (1994).
|
|
.. [2] Anthony A. Salvia, "Reliability applications of the Alpha
|
|
Distribution", IEEE Transactions on Reliability, Vol. R-34,
|
|
No. 3, pp. 251-252 (1985).
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
_support_mask = rv_continuous._open_support_mask
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
|
|
|
|
def _pdf(self, x, a):
|
|
# alpha.pdf(x, a) = 1/(x**2*Phi(a)*sqrt(2*pi)) * exp(-1/2 * (a-1/x)**2)
|
|
return 1.0/(x**2)/_norm_cdf(a)*_norm_pdf(a-1.0/x)
|
|
|
|
def _logpdf(self, x, a):
|
|
return -2*np.log(x) + _norm_logpdf(a-1.0/x) - np.log(_norm_cdf(a))
|
|
|
|
def _cdf(self, x, a):
|
|
return _norm_cdf(a-1.0/x) / _norm_cdf(a)
|
|
|
|
def _ppf(self, q, a):
|
|
return 1.0/np.asarray(a - _norm_ppf(q*_norm_cdf(a)))
|
|
|
|
def _stats(self, a):
|
|
return [np.inf]*2 + [np.nan]*2
|
|
|
|
|
|
alpha = alpha_gen(a=0.0, name='alpha')
|
|
|
|
|
|
class anglit_gen(rv_continuous):
|
|
r"""An anglit continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `anglit` is:
|
|
|
|
.. math::
|
|
|
|
f(x) = \sin(2x + \pi/2) = \cos(2x)
|
|
|
|
for :math:`-\pi/4 \le x \le \pi/4`.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return []
|
|
|
|
def _pdf(self, x):
|
|
# anglit.pdf(x) = sin(2*x + \pi/2) = cos(2*x)
|
|
return np.cos(2*x)
|
|
|
|
def _cdf(self, x):
|
|
return np.sin(x+np.pi/4)**2.0
|
|
|
|
def _sf(self, x):
|
|
return np.cos(x + np.pi / 4) ** 2.0
|
|
|
|
def _ppf(self, q):
|
|
return np.arcsin(np.sqrt(q))-np.pi/4
|
|
|
|
def _stats(self):
|
|
return 0.0, np.pi*np.pi/16-0.5, 0.0, -2*(np.pi**4 - 96)/(np.pi*np.pi-8)**2
|
|
|
|
def _entropy(self):
|
|
return 1-np.log(2)
|
|
|
|
|
|
anglit = anglit_gen(a=-np.pi/4, b=np.pi/4, name='anglit')
|
|
|
|
|
|
class arcsine_gen(rv_continuous):
|
|
r"""An arcsine continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `arcsine` is:
|
|
|
|
.. math::
|
|
|
|
f(x) = \frac{1}{\pi \sqrt{x (1-x)}}
|
|
|
|
for :math:`0 < x < 1`.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return []
|
|
|
|
def _pdf(self, x):
|
|
# arcsine.pdf(x) = 1/(pi*sqrt(x*(1-x)))
|
|
with np.errstate(divide='ignore'):
|
|
return 1.0/np.pi/np.sqrt(x*(1-x))
|
|
|
|
def _cdf(self, x):
|
|
return 2.0/np.pi*np.arcsin(np.sqrt(x))
|
|
|
|
def _ppf(self, q):
|
|
return np.sin(np.pi/2.0*q)**2.0
|
|
|
|
def _stats(self):
|
|
mu = 0.5
|
|
mu2 = 1.0/8
|
|
g1 = 0
|
|
g2 = -3.0/2.0
|
|
return mu, mu2, g1, g2
|
|
|
|
def _entropy(self):
|
|
return -0.24156447527049044468
|
|
|
|
|
|
arcsine = arcsine_gen(a=0.0, b=1.0, name='arcsine')
|
|
|
|
|
|
class FitDataError(ValueError):
|
|
"""Raised when input data is inconsistent with fixed parameters."""
|
|
# This exception is raised by, for example, beta_gen.fit when both floc
|
|
# and fscale are fixed and there are values in the data not in the open
|
|
# interval (floc, floc+fscale).
|
|
def __init__(self, distr, lower, upper):
|
|
self.args = (
|
|
"Invalid values in `data`. Maximum likelihood "
|
|
f"estimation with {distr!r} requires that {lower!r} < "
|
|
f"(x - loc)/scale < {upper!r} for each x in `data`.",
|
|
)
|
|
|
|
|
|
class FitSolverError(FitError):
|
|
"""
|
|
Raised when a solver fails to converge while fitting a distribution.
|
|
"""
|
|
# This exception is raised by, for example, beta_gen.fit when
|
|
# optimize.fsolve returns with ier != 1.
|
|
def __init__(self, mesg):
|
|
emsg = "Solver for the MLE equations failed to converge: "
|
|
emsg += mesg.replace('\n', '')
|
|
self.args = (emsg,)
|
|
|
|
|
|
def _beta_mle_a(a, b, n, s1):
|
|
# The zeros of this function give the MLE for `a`, with
|
|
# `b`, `n` and `s1` given. `s1` is the sum of the logs of
|
|
# the data. `n` is the number of data points.
|
|
psiab = sc.psi(a + b)
|
|
func = s1 - n * (-psiab + sc.psi(a))
|
|
return func
|
|
|
|
|
|
def _beta_mle_ab(theta, n, s1, s2):
|
|
# Zeros of this function are critical points of
|
|
# the maximum likelihood function. Solving this system
|
|
# for theta (which contains a and b) gives the MLE for a and b
|
|
# given `n`, `s1` and `s2`. `s1` is the sum of the logs of the data,
|
|
# and `s2` is the sum of the logs of 1 - data. `n` is the number
|
|
# of data points.
|
|
a, b = theta
|
|
psiab = sc.psi(a + b)
|
|
func = [s1 - n * (-psiab + sc.psi(a)),
|
|
s2 - n * (-psiab + sc.psi(b))]
|
|
return func
|
|
|
|
|
|
class beta_gen(rv_continuous):
|
|
r"""A beta continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `beta` is:
|
|
|
|
.. math::
|
|
|
|
f(x, a, b) = \frac{\Gamma(a+b) x^{a-1} (1-x)^{b-1}}
|
|
{\Gamma(a) \Gamma(b)}
|
|
|
|
for :math:`0 <= x <= 1`, :math:`a > 0`, :math:`b > 0`, where
|
|
:math:`\Gamma` is the gamma function (`scipy.special.gamma`).
|
|
|
|
`beta` takes :math:`a` and :math:`b` as shape parameters.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
|
|
ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
|
|
return [ia, ib]
|
|
|
|
def _rvs(self, a, b, size=None, random_state=None):
|
|
return random_state.beta(a, b, size)
|
|
|
|
def _pdf(self, x, a, b):
|
|
# gamma(a+b) * x**(a-1) * (1-x)**(b-1)
|
|
# beta.pdf(x, a, b) = ------------------------------------
|
|
# gamma(a)*gamma(b)
|
|
with np.errstate(over='ignore'):
|
|
return scu._beta_pdf(x, a, b)
|
|
|
|
def _logpdf(self, x, a, b):
|
|
lPx = sc.xlog1py(b - 1.0, -x) + sc.xlogy(a - 1.0, x)
|
|
lPx -= sc.betaln(a, b)
|
|
return lPx
|
|
|
|
def _cdf(self, x, a, b):
|
|
return sc.betainc(a, b, x)
|
|
|
|
def _sf(self, x, a, b):
|
|
return sc.betaincc(a, b, x)
|
|
|
|
def _isf(self, x, a, b):
|
|
return sc.betainccinv(a, b, x)
|
|
|
|
def _ppf(self, q, a, b):
|
|
return scu._beta_ppf(q, a, b)
|
|
|
|
def _stats(self, a, b):
|
|
a_plus_b = a + b
|
|
_beta_mean = a/a_plus_b
|
|
_beta_variance = a*b / (a_plus_b**2 * (a_plus_b + 1))
|
|
_beta_skewness = ((2 * (b - a) * np.sqrt(a_plus_b + 1)) /
|
|
((a_plus_b + 2) * np.sqrt(a * b)))
|
|
_beta_kurtosis_excess_n = 6 * ((a - b)**2 * (a_plus_b + 1) -
|
|
a * b * (a_plus_b + 2))
|
|
_beta_kurtosis_excess_d = a * b * (a_plus_b + 2) * (a_plus_b + 3)
|
|
_beta_kurtosis_excess = _beta_kurtosis_excess_n / _beta_kurtosis_excess_d
|
|
return (
|
|
_beta_mean,
|
|
_beta_variance,
|
|
_beta_skewness,
|
|
_beta_kurtosis_excess)
|
|
|
|
def _fitstart(self, data):
|
|
if isinstance(data, CensoredData):
|
|
data = data._uncensor()
|
|
|
|
g1 = _skew(data)
|
|
g2 = _kurtosis(data)
|
|
|
|
def func(x):
|
|
a, b = x
|
|
sk = 2*(b-a)*np.sqrt(a + b + 1) / (a + b + 2) / np.sqrt(a*b)
|
|
ku = a**3 - a**2*(2*b-1) + b**2*(b+1) - 2*a*b*(b+2)
|
|
ku /= a*b*(a+b+2)*(a+b+3)
|
|
ku *= 6
|
|
return [sk-g1, ku-g2]
|
|
a, b = optimize.fsolve(func, (1.0, 1.0))
|
|
return super()._fitstart(data, args=(a, b))
|
|
|
|
@_call_super_mom
|
|
@extend_notes_in_docstring(rv_continuous, notes="""\
|
|
In the special case where `method="MLE"` and
|
|
both `floc` and `fscale` are given, a
|
|
`ValueError` is raised if any value `x` in `data` does not satisfy
|
|
`floc < x < floc + fscale`.\n\n""")
|
|
def fit(self, data, *args, **kwds):
|
|
# Override rv_continuous.fit, so we can more efficiently handle the
|
|
# case where floc and fscale are given.
|
|
|
|
floc = kwds.get('floc', None)
|
|
fscale = kwds.get('fscale', None)
|
|
|
|
if floc is None or fscale is None:
|
|
# do general fit
|
|
return super().fit(data, *args, **kwds)
|
|
|
|
# We already got these from kwds, so just pop them.
|
|
kwds.pop('floc', None)
|
|
kwds.pop('fscale', None)
|
|
|
|
f0 = _get_fixed_fit_value(kwds, ['f0', 'fa', 'fix_a'])
|
|
f1 = _get_fixed_fit_value(kwds, ['f1', 'fb', 'fix_b'])
|
|
|
|
_remove_optimizer_parameters(kwds)
|
|
|
|
if f0 is not None and f1 is not None:
|
|
# This check is for consistency with `rv_continuous.fit`.
|
|
raise ValueError("All parameters fixed. There is nothing to "
|
|
"optimize.")
|
|
|
|
# Special case: loc and scale are constrained, so we are fitting
|
|
# just the shape parameters. This can be done much more efficiently
|
|
# than the method used in `rv_continuous.fit`. (See the subsection
|
|
# "Two unknown parameters" in the section "Maximum likelihood" of
|
|
# the Wikipedia article on the Beta distribution for the formulas.)
|
|
|
|
if not np.isfinite(data).all():
|
|
raise ValueError("The data contains non-finite values.")
|
|
|
|
# Normalize the data to the interval [0, 1].
|
|
data = (np.ravel(data) - floc) / fscale
|
|
if np.any(data <= 0) or np.any(data >= 1):
|
|
raise FitDataError("beta", lower=floc, upper=floc + fscale)
|
|
|
|
xbar = data.mean()
|
|
|
|
if f0 is not None or f1 is not None:
|
|
# One of the shape parameters is fixed.
|
|
|
|
if f0 is not None:
|
|
# The shape parameter a is fixed, so swap the parameters
|
|
# and flip the data. We always solve for `a`. The result
|
|
# will be swapped back before returning.
|
|
b = f0
|
|
data = 1 - data
|
|
xbar = 1 - xbar
|
|
else:
|
|
b = f1
|
|
|
|
# Initial guess for a. Use the formula for the mean of the beta
|
|
# distribution, E[x] = a / (a + b), to generate a reasonable
|
|
# starting point based on the mean of the data and the given
|
|
# value of b.
|
|
a = b * xbar / (1 - xbar)
|
|
|
|
# Compute the MLE for `a` by solving _beta_mle_a.
|
|
theta, info, ier, mesg = optimize.fsolve(
|
|
_beta_mle_a, a,
|
|
args=(b, len(data), np.log(data).sum()),
|
|
full_output=True
|
|
)
|
|
if ier != 1:
|
|
raise FitSolverError(mesg=mesg)
|
|
a = theta[0]
|
|
|
|
if f0 is not None:
|
|
# The shape parameter a was fixed, so swap back the
|
|
# parameters.
|
|
a, b = b, a
|
|
|
|
else:
|
|
# Neither of the shape parameters is fixed.
|
|
|
|
# s1 and s2 are used in the extra arguments passed to _beta_mle_ab
|
|
# by optimize.fsolve.
|
|
s1 = np.log(data).sum()
|
|
s2 = sc.log1p(-data).sum()
|
|
|
|
# Use the "method of moments" to estimate the initial
|
|
# guess for a and b.
|
|
fac = xbar * (1 - xbar) / data.var(ddof=0) - 1
|
|
a = xbar * fac
|
|
b = (1 - xbar) * fac
|
|
|
|
# Compute the MLE for a and b by solving _beta_mle_ab.
|
|
theta, info, ier, mesg = optimize.fsolve(
|
|
_beta_mle_ab, [a, b],
|
|
args=(len(data), s1, s2),
|
|
full_output=True
|
|
)
|
|
if ier != 1:
|
|
raise FitSolverError(mesg=mesg)
|
|
a, b = theta
|
|
|
|
return a, b, floc, fscale
|
|
|
|
def _entropy(self, a, b):
|
|
def regular(a, b):
|
|
return (sc.betaln(a, b) - (a - 1) * sc.psi(a) -
|
|
(b - 1) * sc.psi(b) + (a + b - 2) * sc.psi(a + b))
|
|
|
|
def asymptotic_ab_large(a, b):
|
|
sum_ab = a + b
|
|
log_term = 0.5 * (
|
|
np.log(2*np.pi) + np.log(a) + np.log(b) - 3*np.log(sum_ab) + 1
|
|
)
|
|
t1 = 110/sum_ab + 20*sum_ab**-2.0 + sum_ab**-3.0 - 2*sum_ab**-4.0
|
|
t2 = -50/a - 10*a**-2.0 - a**-3.0 + a**-4.0
|
|
t3 = -50/b - 10*b**-2.0 - b**-3.0 + b**-4.0
|
|
return log_term + (t1 + t2 + t3) / 120
|
|
|
|
def asymptotic_b_large(a, b):
|
|
sum_ab = a + b
|
|
t1 = sc.gammaln(a) - (a - 1) * sc.psi(a)
|
|
t2 = (
|
|
- 1/(2*b) + 1/(12*b) - b**-2.0/12 - b**-3.0/120 + b**-4.0/120
|
|
+ b**-5.0/252 - b**-6.0/252 + 1/sum_ab - 1/(12*sum_ab)
|
|
+ sum_ab**-2.0/6 + sum_ab**-3.0/120 - sum_ab**-4.0/60
|
|
- sum_ab**-5.0/252 + sum_ab**-6.0/126
|
|
)
|
|
log_term = sum_ab*np.log1p(a/b) + np.log(b) - 2*np.log(sum_ab)
|
|
return t1 + t2 + log_term
|
|
|
|
def threshold_large(v):
|
|
if v == 1.0:
|
|
return 1000
|
|
|
|
j = np.log10(v)
|
|
digits = int(j)
|
|
d = int(v / 10 ** digits) + 2
|
|
return d*10**(7 + j)
|
|
|
|
if a >= 4.96e6 and b >= 4.96e6:
|
|
return asymptotic_ab_large(a, b)
|
|
elif a <= 4.9e6 and b - a >= 1e6 and b >= threshold_large(a):
|
|
return asymptotic_b_large(a, b)
|
|
elif b <= 4.9e6 and a - b >= 1e6 and a >= threshold_large(b):
|
|
return asymptotic_b_large(b, a)
|
|
else:
|
|
return regular(a, b)
|
|
|
|
|
|
beta = beta_gen(a=0.0, b=1.0, name='beta')
|
|
|
|
|
|
class betaprime_gen(rv_continuous):
|
|
r"""A beta prime continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `betaprime` is:
|
|
|
|
.. math::
|
|
|
|
f(x, a, b) = \frac{x^{a-1} (1+x)^{-a-b}}{\beta(a, b)}
|
|
|
|
for :math:`x >= 0`, :math:`a > 0`, :math:`b > 0`, where
|
|
:math:`\beta(a, b)` is the beta function (see `scipy.special.beta`).
|
|
|
|
`betaprime` takes ``a`` and ``b`` as shape parameters.
|
|
|
|
The distribution is related to the `beta` distribution as follows:
|
|
If :math:`X` follows a beta distribution with parameters :math:`a, b`,
|
|
then :math:`Y = X/(1-X)` has a beta prime distribution with
|
|
parameters :math:`a, b` ([1]_).
|
|
|
|
The beta prime distribution is a reparametrized version of the
|
|
F distribution. The beta prime distribution with shape parameters
|
|
``a`` and ``b`` and ``scale = s`` is equivalent to the F distribution
|
|
with parameters ``d1 = 2*a``, ``d2 = 2*b`` and ``scale = (a/b)*s``.
|
|
For example,
|
|
|
|
>>> from scipy.stats import betaprime, f
|
|
>>> x = [1, 2, 5, 10]
|
|
>>> a = 12
|
|
>>> b = 5
|
|
>>> betaprime.pdf(x, a, b, scale=2)
|
|
array([0.00541179, 0.08331299, 0.14669185, 0.03150079])
|
|
>>> f.pdf(x, 2*a, 2*b, scale=(a/b)*2)
|
|
array([0.00541179, 0.08331299, 0.14669185, 0.03150079])
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
.. [1] Beta prime distribution, Wikipedia,
|
|
https://en.wikipedia.org/wiki/Beta_prime_distribution
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
_support_mask = rv_continuous._open_support_mask
|
|
|
|
def _shape_info(self):
|
|
ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
|
|
ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
|
|
return [ia, ib]
|
|
|
|
def _rvs(self, a, b, size=None, random_state=None):
|
|
u1 = gamma.rvs(a, size=size, random_state=random_state)
|
|
u2 = gamma.rvs(b, size=size, random_state=random_state)
|
|
return u1 / u2
|
|
|
|
def _pdf(self, x, a, b):
|
|
# betaprime.pdf(x, a, b) = x**(a-1) * (1+x)**(-a-b) / beta(a, b)
|
|
return np.exp(self._logpdf(x, a, b))
|
|
|
|
def _logpdf(self, x, a, b):
|
|
return sc.xlogy(a - 1.0, x) - sc.xlog1py(a + b, x) - sc.betaln(a, b)
|
|
|
|
def _cdf(self, x, a, b):
|
|
# note: f2 is the direct way to compute the cdf if the relationship
|
|
# to the beta distribution is used.
|
|
# however, for very large x, x/(1+x) == 1. since the distribution
|
|
# has very fat tails if b is small, this can cause inaccurate results
|
|
# use the following relationship of the incomplete beta function:
|
|
# betainc(x, a, b) = 1 - betainc(1-x, b, a)
|
|
# see gh-17631
|
|
return _lazywhere(
|
|
x > 1, [x, a, b],
|
|
lambda x_, a_, b_: beta._sf(1/(1+x_), b_, a_),
|
|
f2=lambda x_, a_, b_: beta._cdf(x_/(1+x_), a_, b_))
|
|
|
|
def _sf(self, x, a, b):
|
|
return _lazywhere(
|
|
x > 1, [x, a, b],
|
|
lambda x_, a_, b_: beta._cdf(1/(1+x_), b_, a_),
|
|
f2=lambda x_, a_, b_: beta._sf(x_/(1+x_), a_, b_)
|
|
)
|
|
|
|
def _ppf(self, p, a, b):
|
|
p, a, b = np.broadcast_arrays(p, a, b)
|
|
# by default, compute compute the ppf by solving the following:
|
|
# p = beta._cdf(x/(1+x), a, b). This implies x = r/(1-r) with
|
|
# r = beta._ppf(p, a, b). This can cause numerical issues if r is
|
|
# very close to 1. in that case, invert the alternative expression of
|
|
# the cdf: p = beta._sf(1/(1+x), b, a).
|
|
r = stats.beta._ppf(p, a, b)
|
|
with np.errstate(divide='ignore'):
|
|
out = r / (1 - r)
|
|
i = (r > 0.9999)
|
|
out[i] = 1/stats.beta._isf(p[i], b[i], a[i]) - 1
|
|
return out
|
|
|
|
def _munp(self, n, a, b):
|
|
return _lazywhere(
|
|
b > n, (a, b),
|
|
lambda a, b: np.prod([(a+i-1)/(b-i) for i in range(1, n+1)], axis=0),
|
|
fillvalue=np.inf)
|
|
|
|
|
|
betaprime = betaprime_gen(a=0.0, name='betaprime')
|
|
|
|
|
|
class bradford_gen(rv_continuous):
|
|
r"""A Bradford continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `bradford` is:
|
|
|
|
.. math::
|
|
|
|
f(x, c) = \frac{c}{\log(1+c) (1+cx)}
|
|
|
|
for :math:`0 <= x <= 1` and :math:`c > 0`.
|
|
|
|
`bradford` takes ``c`` as a shape parameter for :math:`c`.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
|
|
|
|
def _pdf(self, x, c):
|
|
# bradford.pdf(x, c) = c / (k * (1+c*x))
|
|
return c / (c*x + 1.0) / sc.log1p(c)
|
|
|
|
def _cdf(self, x, c):
|
|
return sc.log1p(c*x) / sc.log1p(c)
|
|
|
|
def _ppf(self, q, c):
|
|
return sc.expm1(q * sc.log1p(c)) / c
|
|
|
|
def _stats(self, c, moments='mv'):
|
|
k = np.log(1.0+c)
|
|
mu = (c-k)/(c*k)
|
|
mu2 = ((c+2.0)*k-2.0*c)/(2*c*k*k)
|
|
g1 = None
|
|
g2 = None
|
|
if 's' in moments:
|
|
g1 = np.sqrt(2)*(12*c*c-9*c*k*(c+2)+2*k*k*(c*(c+3)+3))
|
|
g1 /= np.sqrt(c*(c*(k-2)+2*k))*(3*c*(k-2)+6*k)
|
|
if 'k' in moments:
|
|
g2 = (c**3*(k-3)*(k*(3*k-16)+24)+12*k*c*c*(k-4)*(k-3) +
|
|
6*c*k*k*(3*k-14) + 12*k**3)
|
|
g2 /= 3*c*(c*(k-2)+2*k)**2
|
|
return mu, mu2, g1, g2
|
|
|
|
def _entropy(self, c):
|
|
k = np.log(1+c)
|
|
return k/2.0 - np.log(c/k)
|
|
|
|
|
|
bradford = bradford_gen(a=0.0, b=1.0, name='bradford')
|
|
|
|
|
|
class burr_gen(rv_continuous):
|
|
r"""A Burr (Type III) continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
fisk : a special case of either `burr` or `burr12` with ``d=1``
|
|
burr12 : Burr Type XII distribution
|
|
mielke : Mielke Beta-Kappa / Dagum distribution
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `burr` is:
|
|
|
|
.. math::
|
|
|
|
f(x; c, d) = c d \frac{x^{-c - 1}}
|
|
{{(1 + x^{-c})}^{d + 1}}
|
|
|
|
for :math:`x >= 0` and :math:`c, d > 0`.
|
|
|
|
`burr` takes ``c`` and ``d`` as shape parameters for :math:`c` and
|
|
:math:`d`.
|
|
|
|
This is the PDF corresponding to the third CDF given in Burr's list;
|
|
specifically, it is equation (11) in Burr's paper [1]_. The distribution
|
|
is also commonly referred to as the Dagum distribution [2]_. If the
|
|
parameter :math:`c < 1` then the mean of the distribution does not
|
|
exist and if :math:`c < 2` the variance does not exist [2]_.
|
|
The PDF is finite at the left endpoint :math:`x = 0` if :math:`c * d >= 1`.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
.. [1] Burr, I. W. "Cumulative frequency functions", Annals of
|
|
Mathematical Statistics, 13(2), pp 215-232 (1942).
|
|
.. [2] https://en.wikipedia.org/wiki/Dagum_distribution
|
|
.. [3] Kleiber, Christian. "A guide to the Dagum distributions."
|
|
Modeling Income Distributions and Lorenz Curves pp 97-117 (2008).
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
# Do not set _support_mask to rv_continuous._open_support_mask
|
|
# Whether the left-hand endpoint is suitable for pdf evaluation is dependent
|
|
# on the values of c and d: if c*d >= 1, the pdf is finite, otherwise infinite.
|
|
|
|
def _shape_info(self):
|
|
ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
|
|
id = _ShapeInfo("d", False, (0, np.inf), (False, False))
|
|
return [ic, id]
|
|
|
|
def _pdf(self, x, c, d):
|
|
# burr.pdf(x, c, d) = c * d * x**(-c-1) * (1+x**(-c))**(-d-1)
|
|
output = _lazywhere(
|
|
x == 0, [x, c, d],
|
|
lambda x_, c_, d_: c_ * d_ * (x_**(c_*d_-1)) / (1 + x_**c_),
|
|
f2=lambda x_, c_, d_: (c_ * d_ * (x_ ** (-c_ - 1.0)) /
|
|
((1 + x_ ** (-c_)) ** (d_ + 1.0))))
|
|
if output.ndim == 0:
|
|
return output[()]
|
|
return output
|
|
|
|
def _logpdf(self, x, c, d):
|
|
output = _lazywhere(
|
|
x == 0, [x, c, d],
|
|
lambda x_, c_, d_: (np.log(c_) + np.log(d_) + sc.xlogy(c_*d_ - 1, x_)
|
|
- (d_+1) * sc.log1p(x_**(c_))),
|
|
f2=lambda x_, c_, d_: (np.log(c_) + np.log(d_)
|
|
+ sc.xlogy(-c_ - 1, x_)
|
|
- sc.xlog1py(d_+1, x_**(-c_))))
|
|
if output.ndim == 0:
|
|
return output[()]
|
|
return output
|
|
|
|
def _cdf(self, x, c, d):
|
|
return (1 + x**(-c))**(-d)
|
|
|
|
def _logcdf(self, x, c, d):
|
|
return sc.log1p(x**(-c)) * (-d)
|
|
|
|
def _sf(self, x, c, d):
|
|
return np.exp(self._logsf(x, c, d))
|
|
|
|
def _logsf(self, x, c, d):
|
|
return np.log1p(- (1 + x**(-c))**(-d))
|
|
|
|
def _ppf(self, q, c, d):
|
|
return (q**(-1.0/d) - 1)**(-1.0/c)
|
|
|
|
def _isf(self, q, c, d):
|
|
_q = sc.xlog1py(-1.0 / d, -q)
|
|
return sc.expm1(_q) ** (-1.0 / c)
|
|
|
|
def _stats(self, c, d):
|
|
nc = np.arange(1, 5).reshape(4,1) / c
|
|
# ek is the kth raw moment, e1 is the mean e2-e1**2 variance etc.
|
|
e1, e2, e3, e4 = sc.beta(d + nc, 1. - nc) * d
|
|
mu = np.where(c > 1.0, e1, np.nan)
|
|
mu2_if_c = e2 - mu**2
|
|
mu2 = np.where(c > 2.0, mu2_if_c, np.nan)
|
|
g1 = _lazywhere(
|
|
c > 3.0,
|
|
(c, e1, e2, e3, mu2_if_c),
|
|
lambda c, e1, e2, e3, mu2_if_c: ((e3 - 3*e2*e1 + 2*e1**3)
|
|
/ np.sqrt((mu2_if_c)**3)),
|
|
fillvalue=np.nan)
|
|
g2 = _lazywhere(
|
|
c > 4.0,
|
|
(c, e1, e2, e3, e4, mu2_if_c),
|
|
lambda c, e1, e2, e3, e4, mu2_if_c: (
|
|
((e4 - 4*e3*e1 + 6*e2*e1**2 - 3*e1**4) / mu2_if_c**2) - 3),
|
|
fillvalue=np.nan)
|
|
if np.ndim(c) == 0:
|
|
return mu.item(), mu2.item(), g1.item(), g2.item()
|
|
return mu, mu2, g1, g2
|
|
|
|
def _munp(self, n, c, d):
|
|
def __munp(n, c, d):
|
|
nc = 1. * n / c
|
|
return d * sc.beta(1.0 - nc, d + nc)
|
|
n, c, d = np.asarray(n), np.asarray(c), np.asarray(d)
|
|
return _lazywhere((c > n) & (n == n) & (d == d), (c, d, n),
|
|
lambda c, d, n: __munp(n, c, d),
|
|
np.nan)
|
|
|
|
|
|
burr = burr_gen(a=0.0, name='burr')
|
|
|
|
|
|
class burr12_gen(rv_continuous):
|
|
r"""A Burr (Type XII) continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
fisk : a special case of either `burr` or `burr12` with ``d=1``
|
|
burr : Burr Type III distribution
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `burr12` is:
|
|
|
|
.. math::
|
|
|
|
f(x; c, d) = c d \frac{x^{c-1}}
|
|
{(1 + x^c)^{d + 1}}
|
|
|
|
for :math:`x >= 0` and :math:`c, d > 0`.
|
|
|
|
`burr12` takes ``c`` and ``d`` as shape parameters for :math:`c`
|
|
and :math:`d`.
|
|
|
|
This is the PDF corresponding to the twelfth CDF given in Burr's list;
|
|
specifically, it is equation (20) in Burr's paper [1]_.
|
|
|
|
%(after_notes)s
|
|
|
|
The Burr type 12 distribution is also sometimes referred to as
|
|
the Singh-Maddala distribution from NIST [2]_.
|
|
|
|
References
|
|
----------
|
|
.. [1] Burr, I. W. "Cumulative frequency functions", Annals of
|
|
Mathematical Statistics, 13(2), pp 215-232 (1942).
|
|
|
|
.. [2] https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/b12pdf.htm
|
|
|
|
.. [3] "Burr distribution",
|
|
https://en.wikipedia.org/wiki/Burr_distribution
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
|
|
id = _ShapeInfo("d", False, (0, np.inf), (False, False))
|
|
return [ic, id]
|
|
|
|
def _pdf(self, x, c, d):
|
|
# burr12.pdf(x, c, d) = c * d * x**(c-1) * (1+x**(c))**(-d-1)
|
|
return np.exp(self._logpdf(x, c, d))
|
|
|
|
def _logpdf(self, x, c, d):
|
|
return np.log(c) + np.log(d) + sc.xlogy(c - 1, x) + sc.xlog1py(-d-1, x**c)
|
|
|
|
def _cdf(self, x, c, d):
|
|
return -sc.expm1(self._logsf(x, c, d))
|
|
|
|
def _logcdf(self, x, c, d):
|
|
return sc.log1p(-(1 + x**c)**(-d))
|
|
|
|
def _sf(self, x, c, d):
|
|
return np.exp(self._logsf(x, c, d))
|
|
|
|
def _logsf(self, x, c, d):
|
|
return sc.xlog1py(-d, x**c)
|
|
|
|
def _ppf(self, q, c, d):
|
|
# The following is an implementation of
|
|
# ((1 - q)**(-1.0/d) - 1)**(1.0/c)
|
|
# that does a better job handling small values of q.
|
|
return sc.expm1(-1/d * sc.log1p(-q))**(1/c)
|
|
|
|
def _isf(self, p, c, d):
|
|
return sc.expm1(-1/d * np.log(p))**(1/c)
|
|
|
|
def _munp(self, n, c, d):
|
|
def moment_if_exists(n, c, d):
|
|
nc = 1. * n / c
|
|
return d * sc.beta(1.0 + nc, d - nc)
|
|
|
|
return _lazywhere(c * d > n, (n, c, d), moment_if_exists,
|
|
fillvalue=np.nan)
|
|
|
|
|
|
burr12 = burr12_gen(a=0.0, name='burr12')
|
|
|
|
|
|
class fisk_gen(burr_gen):
|
|
r"""A Fisk continuous random variable.
|
|
|
|
The Fisk distribution is also known as the log-logistic distribution.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
burr
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `fisk` is:
|
|
|
|
.. math::
|
|
|
|
f(x, c) = \frac{c x^{c-1}}
|
|
{(1 + x^c)^2}
|
|
|
|
for :math:`x >= 0` and :math:`c > 0`.
|
|
|
|
Please note that the above expression can be transformed into the following
|
|
one, which is also commonly used:
|
|
|
|
.. math::
|
|
|
|
f(x, c) = \frac{c x^{-c-1}}
|
|
{(1 + x^{-c})^2}
|
|
|
|
`fisk` takes ``c`` as a shape parameter for :math:`c`.
|
|
|
|
`fisk` is a special case of `burr` or `burr12` with ``d=1``.
|
|
|
|
Suppose ``X`` is a logistic random variable with location ``l``
|
|
and scale ``s``. Then ``Y = exp(X)`` is a Fisk (log-logistic)
|
|
random variable with ``scale = exp(l)`` and shape ``c = 1/s``.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
|
|
|
|
def _pdf(self, x, c):
|
|
# fisk.pdf(x, c) = c * x**(-c-1) * (1 + x**(-c))**(-2)
|
|
return burr._pdf(x, c, 1.0)
|
|
|
|
def _cdf(self, x, c):
|
|
return burr._cdf(x, c, 1.0)
|
|
|
|
def _sf(self, x, c):
|
|
return burr._sf(x, c, 1.0)
|
|
|
|
def _logpdf(self, x, c):
|
|
# fisk.pdf(x, c) = c * x**(-c-1) * (1 + x**(-c))**(-2)
|
|
return burr._logpdf(x, c, 1.0)
|
|
|
|
def _logcdf(self, x, c):
|
|
return burr._logcdf(x, c, 1.0)
|
|
|
|
def _logsf(self, x, c):
|
|
return burr._logsf(x, c, 1.0)
|
|
|
|
def _ppf(self, x, c):
|
|
return burr._ppf(x, c, 1.0)
|
|
|
|
def _isf(self, q, c):
|
|
return burr._isf(q, c, 1.0)
|
|
|
|
def _munp(self, n, c):
|
|
return burr._munp(n, c, 1.0)
|
|
|
|
def _stats(self, c):
|
|
return burr._stats(c, 1.0)
|
|
|
|
def _entropy(self, c):
|
|
return 2 - np.log(c)
|
|
|
|
|
|
fisk = fisk_gen(a=0.0, name='fisk')
|
|
|
|
|
|
class cauchy_gen(rv_continuous):
|
|
r"""A Cauchy continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `cauchy` is
|
|
|
|
.. math::
|
|
|
|
f(x) = \frac{1}{\pi (1 + x^2)}
|
|
|
|
for a real number :math:`x`.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return []
|
|
|
|
def _pdf(self, x):
|
|
# cauchy.pdf(x) = 1 / (pi * (1 + x**2))
|
|
return 1.0/np.pi/(1.0+x*x)
|
|
|
|
def _cdf(self, x):
|
|
return 0.5 + 1.0/np.pi*np.arctan(x)
|
|
|
|
def _ppf(self, q):
|
|
return np.tan(np.pi*q-np.pi/2.0)
|
|
|
|
def _sf(self, x):
|
|
return 0.5 - 1.0/np.pi*np.arctan(x)
|
|
|
|
def _isf(self, q):
|
|
return np.tan(np.pi/2.0-np.pi*q)
|
|
|
|
def _stats(self):
|
|
return np.nan, np.nan, np.nan, np.nan
|
|
|
|
def _entropy(self):
|
|
return np.log(4*np.pi)
|
|
|
|
def _fitstart(self, data, args=None):
|
|
# Initialize ML guesses using quartiles instead of moments.
|
|
if isinstance(data, CensoredData):
|
|
data = data._uncensor()
|
|
p25, p50, p75 = np.percentile(data, [25, 50, 75])
|
|
return p50, (p75 - p25)/2
|
|
|
|
|
|
cauchy = cauchy_gen(name='cauchy')
|
|
|
|
|
|
class chi_gen(rv_continuous):
|
|
r"""A chi continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `chi` is:
|
|
|
|
.. math::
|
|
|
|
f(x, k) = \frac{1}{2^{k/2-1} \Gamma \left( k/2 \right)}
|
|
x^{k-1} \exp \left( -x^2/2 \right)
|
|
|
|
for :math:`x >= 0` and :math:`k > 0` (degrees of freedom, denoted ``df``
|
|
in the implementation). :math:`\Gamma` is the gamma function
|
|
(`scipy.special.gamma`).
|
|
|
|
Special cases of `chi` are:
|
|
|
|
- ``chi(1, loc, scale)`` is equivalent to `halfnorm`
|
|
- ``chi(2, 0, scale)`` is equivalent to `rayleigh`
|
|
- ``chi(3, 0, scale)`` is equivalent to `maxwell`
|
|
|
|
`chi` takes ``df`` as a shape parameter.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("df", False, (0, np.inf), (False, False))]
|
|
|
|
def _rvs(self, df, size=None, random_state=None):
|
|
return np.sqrt(chi2.rvs(df, size=size, random_state=random_state))
|
|
|
|
def _pdf(self, x, df):
|
|
# x**(df-1) * exp(-x**2/2)
|
|
# chi.pdf(x, df) = -------------------------
|
|
# 2**(df/2-1) * gamma(df/2)
|
|
return np.exp(self._logpdf(x, df))
|
|
|
|
def _logpdf(self, x, df):
|
|
l = np.log(2) - .5*np.log(2)*df - sc.gammaln(.5*df)
|
|
return l + sc.xlogy(df - 1., x) - .5*x**2
|
|
|
|
def _cdf(self, x, df):
|
|
return sc.gammainc(.5*df, .5*x**2)
|
|
|
|
def _sf(self, x, df):
|
|
return sc.gammaincc(.5*df, .5*x**2)
|
|
|
|
def _ppf(self, q, df):
|
|
return np.sqrt(2*sc.gammaincinv(.5*df, q))
|
|
|
|
def _isf(self, q, df):
|
|
return np.sqrt(2*sc.gammainccinv(.5*df, q))
|
|
|
|
def _stats(self, df):
|
|
# poch(df/2, 1/2) = gamma(df/2 + 1/2) / gamma(df/2)
|
|
mu = np.sqrt(2) * sc.poch(0.5 * df, 0.5)
|
|
mu2 = df - mu*mu
|
|
g1 = (2*mu**3.0 + mu*(1-2*df))/np.asarray(np.power(mu2, 1.5))
|
|
g2 = 2*df*(1.0-df)-6*mu**4 + 4*mu**2 * (2*df-1)
|
|
g2 /= np.asarray(mu2**2.0)
|
|
return mu, mu2, g1, g2
|
|
|
|
def _entropy(self, df):
|
|
|
|
def regular_formula(df):
|
|
return (sc.gammaln(.5 * df)
|
|
+ 0.5 * (df - np.log(2) - (df - 1) * sc.digamma(0.5 * df)))
|
|
|
|
def asymptotic_formula(df):
|
|
return (0.5 + np.log(np.pi)/2 - (df**-1)/6 - (df**-2)/6
|
|
- 4/45*(df**-3) + (df**-4)/15)
|
|
|
|
return _lazywhere(df < 3e2, (df, ), regular_formula,
|
|
f2=asymptotic_formula)
|
|
|
|
|
|
chi = chi_gen(a=0.0, name='chi')
|
|
|
|
|
|
class chi2_gen(rv_continuous):
|
|
r"""A chi-squared continuous random variable.
|
|
|
|
For the noncentral chi-square distribution, see `ncx2`.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
ncx2
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `chi2` is:
|
|
|
|
.. math::
|
|
|
|
f(x, k) = \frac{1}{2^{k/2} \Gamma \left( k/2 \right)}
|
|
x^{k/2-1} \exp \left( -x/2 \right)
|
|
|
|
for :math:`x > 0` and :math:`k > 0` (degrees of freedom, denoted ``df``
|
|
in the implementation).
|
|
|
|
`chi2` takes ``df`` as a shape parameter.
|
|
|
|
The chi-squared distribution is a special case of the gamma
|
|
distribution, with gamma parameters ``a = df/2``, ``loc = 0`` and
|
|
``scale = 2``.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("df", False, (0, np.inf), (False, False))]
|
|
|
|
def _rvs(self, df, size=None, random_state=None):
|
|
return random_state.chisquare(df, size)
|
|
|
|
def _pdf(self, x, df):
|
|
# chi2.pdf(x, df) = 1 / (2*gamma(df/2)) * (x/2)**(df/2-1) * exp(-x/2)
|
|
return np.exp(self._logpdf(x, df))
|
|
|
|
def _logpdf(self, x, df):
|
|
return sc.xlogy(df/2.-1, x) - x/2. - sc.gammaln(df/2.) - (np.log(2)*df)/2.
|
|
|
|
def _cdf(self, x, df):
|
|
return sc.chdtr(df, x)
|
|
|
|
def _sf(self, x, df):
|
|
return sc.chdtrc(df, x)
|
|
|
|
def _isf(self, p, df):
|
|
return sc.chdtri(df, p)
|
|
|
|
def _ppf(self, p, df):
|
|
return 2*sc.gammaincinv(df/2, p)
|
|
|
|
def _stats(self, df):
|
|
mu = df
|
|
mu2 = 2*df
|
|
g1 = 2*np.sqrt(2.0/df)
|
|
g2 = 12.0/df
|
|
return mu, mu2, g1, g2
|
|
|
|
def _entropy(self, df):
|
|
half_df = 0.5 * df
|
|
|
|
def regular_formula(half_df):
|
|
return (half_df + np.log(2) + sc.gammaln(half_df) +
|
|
(1 - half_df) * sc.psi(half_df))
|
|
|
|
def asymptotic_formula(half_df):
|
|
# plug in the above formula the following asymptotic
|
|
# expansions:
|
|
# ln(gamma(a)) ~ (a - 0.5) * ln(a) - a + 0.5 * ln(2 * pi) +
|
|
# 1/(12 * a) - 1/(360 * a**3)
|
|
# psi(a) ~ ln(a) - 1/(2 * a) - 1/(3 * a**2) + 1/120 * a**4)
|
|
c = np.log(2) + 0.5*(1 + np.log(2*np.pi))
|
|
h = 0.5/half_df
|
|
return (h*(-2/3 + h*(-1/3 + h*(-4/45 + h/7.5))) +
|
|
0.5*np.log(half_df) + c)
|
|
|
|
return _lazywhere(half_df < 125, (half_df, ),
|
|
regular_formula,
|
|
f2=asymptotic_formula)
|
|
|
|
|
|
chi2 = chi2_gen(a=0.0, name='chi2')
|
|
|
|
|
|
class cosine_gen(rv_continuous):
|
|
r"""A cosine continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The cosine distribution is an approximation to the normal distribution.
|
|
The probability density function for `cosine` is:
|
|
|
|
.. math::
|
|
|
|
f(x) = \frac{1}{2\pi} (1+\cos(x))
|
|
|
|
for :math:`-\pi \le x \le \pi`.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return []
|
|
|
|
def _pdf(self, x):
|
|
# cosine.pdf(x) = 1/(2*pi) * (1+cos(x))
|
|
return 1.0/2/np.pi*(1+np.cos(x))
|
|
|
|
def _logpdf(self, x):
|
|
c = np.cos(x)
|
|
return _lazywhere(c != -1, (c,),
|
|
lambda c: np.log1p(c) - np.log(2*np.pi),
|
|
fillvalue=-np.inf)
|
|
|
|
def _cdf(self, x):
|
|
return scu._cosine_cdf(x)
|
|
|
|
def _sf(self, x):
|
|
return scu._cosine_cdf(-x)
|
|
|
|
def _ppf(self, p):
|
|
return scu._cosine_invcdf(p)
|
|
|
|
def _isf(self, p):
|
|
return -scu._cosine_invcdf(p)
|
|
|
|
def _stats(self):
|
|
v = (np.pi * np.pi / 3.0) - 2.0
|
|
k = -6.0 * (np.pi**4 - 90) / (5.0 * (np.pi * np.pi - 6)**2)
|
|
return 0.0, v, 0.0, k
|
|
|
|
def _entropy(self):
|
|
return np.log(4*np.pi)-1.0
|
|
|
|
|
|
cosine = cosine_gen(a=-np.pi, b=np.pi, name='cosine')
|
|
|
|
|
|
class dgamma_gen(rv_continuous):
|
|
r"""A double gamma continuous random variable.
|
|
|
|
The double gamma distribution is also known as the reflected gamma
|
|
distribution [1]_.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `dgamma` is:
|
|
|
|
.. math::
|
|
|
|
f(x, a) = \frac{1}{2\Gamma(a)} |x|^{a-1} \exp(-|x|)
|
|
|
|
for a real number :math:`x` and :math:`a > 0`. :math:`\Gamma` is the
|
|
gamma function (`scipy.special.gamma`).
|
|
|
|
`dgamma` takes ``a`` as a shape parameter for :math:`a`.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
.. [1] Johnson, Kotz, and Balakrishnan, "Continuous Univariate
|
|
Distributions, Volume 1", Second Edition, John Wiley and Sons
|
|
(1994).
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
|
|
|
|
def _rvs(self, a, size=None, random_state=None):
|
|
u = random_state.uniform(size=size)
|
|
gm = gamma.rvs(a, size=size, random_state=random_state)
|
|
return gm * np.where(u >= 0.5, 1, -1)
|
|
|
|
def _pdf(self, x, a):
|
|
# dgamma.pdf(x, a) = 1 / (2*gamma(a)) * abs(x)**(a-1) * exp(-abs(x))
|
|
ax = abs(x)
|
|
return 1.0/(2*sc.gamma(a))*ax**(a-1.0) * np.exp(-ax)
|
|
|
|
def _logpdf(self, x, a):
|
|
ax = abs(x)
|
|
return sc.xlogy(a - 1.0, ax) - ax - np.log(2) - sc.gammaln(a)
|
|
|
|
def _cdf(self, x, a):
|
|
return np.where(x > 0,
|
|
0.5 + 0.5*sc.gammainc(a, x),
|
|
0.5*sc.gammaincc(a, -x))
|
|
|
|
def _sf(self, x, a):
|
|
return np.where(x > 0,
|
|
0.5*sc.gammaincc(a, x),
|
|
0.5 + 0.5*sc.gammainc(a, -x))
|
|
|
|
def _entropy(self, a):
|
|
return stats.gamma._entropy(a) - np.log(0.5)
|
|
|
|
def _ppf(self, q, a):
|
|
return np.where(q > 0.5,
|
|
sc.gammaincinv(a, 2*q - 1),
|
|
-sc.gammainccinv(a, 2*q))
|
|
|
|
def _isf(self, q, a):
|
|
return np.where(q > 0.5,
|
|
-sc.gammaincinv(a, 2*q - 1),
|
|
sc.gammainccinv(a, 2*q))
|
|
|
|
def _stats(self, a):
|
|
mu2 = a*(a+1.0)
|
|
return 0.0, mu2, 0.0, (a+2.0)*(a+3.0)/mu2-3.0
|
|
|
|
|
|
dgamma = dgamma_gen(name='dgamma')
|
|
|
|
|
|
class dweibull_gen(rv_continuous):
|
|
r"""A double Weibull continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `dweibull` is given by
|
|
|
|
.. math::
|
|
|
|
f(x, c) = c / 2 |x|^{c-1} \exp(-|x|^c)
|
|
|
|
for a real number :math:`x` and :math:`c > 0`.
|
|
|
|
`dweibull` takes ``c`` as a shape parameter for :math:`c`.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
|
|
|
|
def _rvs(self, c, size=None, random_state=None):
|
|
u = random_state.uniform(size=size)
|
|
w = weibull_min.rvs(c, size=size, random_state=random_state)
|
|
return w * (np.where(u >= 0.5, 1, -1))
|
|
|
|
def _pdf(self, x, c):
|
|
# dweibull.pdf(x, c) = c / 2 * abs(x)**(c-1) * exp(-abs(x)**c)
|
|
ax = abs(x)
|
|
Px = c / 2.0 * ax**(c-1.0) * np.exp(-ax**c)
|
|
return Px
|
|
|
|
def _logpdf(self, x, c):
|
|
ax = abs(x)
|
|
return np.log(c) - np.log(2.0) + sc.xlogy(c - 1.0, ax) - ax**c
|
|
|
|
def _cdf(self, x, c):
|
|
Cx1 = 0.5 * np.exp(-abs(x)**c)
|
|
return np.where(x > 0, 1 - Cx1, Cx1)
|
|
|
|
def _ppf(self, q, c):
|
|
fac = 2. * np.where(q <= 0.5, q, 1. - q)
|
|
fac = np.power(-np.log(fac), 1.0 / c)
|
|
return np.where(q > 0.5, fac, -fac)
|
|
|
|
def _sf(self, x, c):
|
|
half_weibull_min_sf = 0.5 * stats.weibull_min._sf(np.abs(x), c)
|
|
return np.where(x > 0, half_weibull_min_sf, 1 - half_weibull_min_sf)
|
|
|
|
def _isf(self, q, c):
|
|
double_q = 2. * np.where(q <= 0.5, q, 1. - q)
|
|
weibull_min_isf = stats.weibull_min._isf(double_q, c)
|
|
return np.where(q > 0.5, -weibull_min_isf, weibull_min_isf)
|
|
|
|
def _munp(self, n, c):
|
|
return (1 - (n % 2)) * sc.gamma(1.0 + 1.0 * n / c)
|
|
|
|
# since we know that all odd moments are zeros, return them at once.
|
|
# returning Nones from _stats makes the public stats call _munp
|
|
# so overall we're saving one or two gamma function evaluations here.
|
|
def _stats(self, c):
|
|
return 0, None, 0, None
|
|
|
|
def _entropy(self, c):
|
|
h = stats.weibull_min._entropy(c) - np.log(0.5)
|
|
return h
|
|
|
|
|
|
dweibull = dweibull_gen(name='dweibull')
|
|
|
|
|
|
class expon_gen(rv_continuous):
|
|
r"""An exponential continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `expon` is:
|
|
|
|
.. math::
|
|
|
|
f(x) = \exp(-x)
|
|
|
|
for :math:`x \ge 0`.
|
|
|
|
%(after_notes)s
|
|
|
|
A common parameterization for `expon` is in terms of the rate parameter
|
|
``lambda``, such that ``pdf = lambda * exp(-lambda * x)``. This
|
|
parameterization corresponds to using ``scale = 1 / lambda``.
|
|
|
|
The exponential distribution is a special case of the gamma
|
|
distributions, with gamma shape parameter ``a = 1``.
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return []
|
|
|
|
def _rvs(self, size=None, random_state=None):
|
|
return random_state.standard_exponential(size)
|
|
|
|
def _pdf(self, x):
|
|
# expon.pdf(x) = exp(-x)
|
|
return np.exp(-x)
|
|
|
|
def _logpdf(self, x):
|
|
return -x
|
|
|
|
def _cdf(self, x):
|
|
return -sc.expm1(-x)
|
|
|
|
def _ppf(self, q):
|
|
return -sc.log1p(-q)
|
|
|
|
def _sf(self, x):
|
|
return np.exp(-x)
|
|
|
|
def _logsf(self, x):
|
|
return -x
|
|
|
|
def _isf(self, q):
|
|
return -np.log(q)
|
|
|
|
def _stats(self):
|
|
return 1.0, 1.0, 2.0, 6.0
|
|
|
|
def _entropy(self):
|
|
return 1.0
|
|
|
|
@_call_super_mom
|
|
@replace_notes_in_docstring(rv_continuous, notes="""\
|
|
When `method='MLE'`,
|
|
this function uses explicit formulas for the maximum likelihood
|
|
estimation of the exponential distribution parameters, so the
|
|
`optimizer`, `loc` and `scale` keyword arguments are
|
|
ignored.\n\n""")
|
|
def fit(self, data, *args, **kwds):
|
|
if len(args) > 0:
|
|
raise TypeError("Too many arguments.")
|
|
|
|
floc = kwds.pop('floc', None)
|
|
fscale = kwds.pop('fscale', None)
|
|
|
|
_remove_optimizer_parameters(kwds)
|
|
|
|
if floc is not None and fscale is not None:
|
|
# This check is for consistency with `rv_continuous.fit`.
|
|
raise ValueError("All parameters fixed. There is nothing to "
|
|
"optimize.")
|
|
|
|
data = np.asarray(data)
|
|
|
|
if not np.isfinite(data).all():
|
|
raise ValueError("The data contains non-finite values.")
|
|
|
|
data_min = data.min()
|
|
|
|
if floc is None:
|
|
# ML estimate of the location is the minimum of the data.
|
|
loc = data_min
|
|
else:
|
|
loc = floc
|
|
if data_min < loc:
|
|
# There are values that are less than the specified loc.
|
|
raise FitDataError("expon", lower=floc, upper=np.inf)
|
|
|
|
if fscale is None:
|
|
# ML estimate of the scale is the shifted mean.
|
|
scale = data.mean() - loc
|
|
else:
|
|
scale = fscale
|
|
|
|
# We expect the return values to be floating point, so ensure it
|
|
# by explicitly converting to float.
|
|
return float(loc), float(scale)
|
|
|
|
|
|
expon = expon_gen(a=0.0, name='expon')
|
|
|
|
|
|
class exponnorm_gen(rv_continuous):
|
|
r"""An exponentially modified Normal continuous random variable.
|
|
|
|
Also known as the exponentially modified Gaussian distribution [1]_.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `exponnorm` is:
|
|
|
|
.. math::
|
|
|
|
f(x, K) = \frac{1}{2K} \exp\left(\frac{1}{2 K^2} - x / K \right)
|
|
\text{erfc}\left(-\frac{x - 1/K}{\sqrt{2}}\right)
|
|
|
|
where :math:`x` is a real number and :math:`K > 0`.
|
|
|
|
It can be thought of as the sum of a standard normal random variable
|
|
and an independent exponentially distributed random variable with rate
|
|
``1/K``.
|
|
|
|
%(after_notes)s
|
|
|
|
An alternative parameterization of this distribution (for example, in
|
|
the Wikipedia article [1]_) involves three parameters, :math:`\mu`,
|
|
:math:`\lambda` and :math:`\sigma`.
|
|
|
|
In the present parameterization this corresponds to having ``loc`` and
|
|
``scale`` equal to :math:`\mu` and :math:`\sigma`, respectively, and
|
|
shape parameter :math:`K = 1/(\sigma\lambda)`.
|
|
|
|
.. versionadded:: 0.16.0
|
|
|
|
References
|
|
----------
|
|
.. [1] Exponentially modified Gaussian distribution, Wikipedia,
|
|
https://en.wikipedia.org/wiki/Exponentially_modified_Gaussian_distribution
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("K", False, (0, np.inf), (False, False))]
|
|
|
|
def _rvs(self, K, size=None, random_state=None):
|
|
expval = random_state.standard_exponential(size) * K
|
|
gval = random_state.standard_normal(size)
|
|
return expval + gval
|
|
|
|
def _pdf(self, x, K):
|
|
return np.exp(self._logpdf(x, K))
|
|
|
|
def _logpdf(self, x, K):
|
|
invK = 1.0 / K
|
|
exparg = invK * (0.5 * invK - x)
|
|
return exparg + _norm_logcdf(x - invK) - np.log(K)
|
|
|
|
def _cdf(self, x, K):
|
|
invK = 1.0 / K
|
|
expval = invK * (0.5 * invK - x)
|
|
logprod = expval + _norm_logcdf(x - invK)
|
|
return _norm_cdf(x) - np.exp(logprod)
|
|
|
|
def _sf(self, x, K):
|
|
invK = 1.0 / K
|
|
expval = invK * (0.5 * invK - x)
|
|
logprod = expval + _norm_logcdf(x - invK)
|
|
return _norm_cdf(-x) + np.exp(logprod)
|
|
|
|
def _stats(self, K):
|
|
K2 = K * K
|
|
opK2 = 1.0 + K2
|
|
skw = 2 * K**3 * opK2**(-1.5)
|
|
krt = 6.0 * K2 * K2 * opK2**(-2)
|
|
return K, opK2, skw, krt
|
|
|
|
|
|
exponnorm = exponnorm_gen(name='exponnorm')
|
|
|
|
|
|
def _pow1pm1(x, y):
|
|
"""
|
|
Compute (1 + x)**y - 1.
|
|
|
|
Uses expm1 and xlog1py to avoid loss of precision when
|
|
(1 + x)**y is close to 1.
|
|
|
|
Note that the inverse of this function with respect to x is
|
|
``_pow1pm1(x, 1/y)``. That is, if
|
|
|
|
t = _pow1pm1(x, y)
|
|
|
|
then
|
|
|
|
x = _pow1pm1(t, 1/y)
|
|
"""
|
|
return np.expm1(sc.xlog1py(y, x))
|
|
|
|
|
|
class exponweib_gen(rv_continuous):
|
|
r"""An exponentiated Weibull continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
weibull_min, numpy.random.Generator.weibull
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `exponweib` is:
|
|
|
|
.. math::
|
|
|
|
f(x, a, c) = a c [1-\exp(-x^c)]^{a-1} \exp(-x^c) x^{c-1}
|
|
|
|
and its cumulative distribution function is:
|
|
|
|
.. math::
|
|
|
|
F(x, a, c) = [1-\exp(-x^c)]^a
|
|
|
|
for :math:`x > 0`, :math:`a > 0`, :math:`c > 0`.
|
|
|
|
`exponweib` takes :math:`a` and :math:`c` as shape parameters:
|
|
|
|
* :math:`a` is the exponentiation parameter,
|
|
with the special case :math:`a=1` corresponding to the
|
|
(non-exponentiated) Weibull distribution `weibull_min`.
|
|
* :math:`c` is the shape parameter of the non-exponentiated Weibull law.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
https://en.wikipedia.org/wiki/Exponentiated_Weibull_distribution
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
|
|
ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
|
|
return [ia, ic]
|
|
|
|
def _pdf(self, x, a, c):
|
|
# exponweib.pdf(x, a, c) =
|
|
# a * c * (1-exp(-x**c))**(a-1) * exp(-x**c)*x**(c-1)
|
|
return np.exp(self._logpdf(x, a, c))
|
|
|
|
def _logpdf(self, x, a, c):
|
|
negxc = -x**c
|
|
exm1c = -sc.expm1(negxc)
|
|
logp = (np.log(a) + np.log(c) + sc.xlogy(a - 1.0, exm1c) +
|
|
negxc + sc.xlogy(c - 1.0, x))
|
|
return logp
|
|
|
|
def _cdf(self, x, a, c):
|
|
exm1c = -sc.expm1(-x**c)
|
|
return exm1c**a
|
|
|
|
def _ppf(self, q, a, c):
|
|
return (-sc.log1p(-q**(1.0/a)))**np.asarray(1.0/c)
|
|
|
|
def _sf(self, x, a, c):
|
|
return -_pow1pm1(-np.exp(-x**c), a)
|
|
|
|
def _isf(self, p, a, c):
|
|
return (-np.log(-_pow1pm1(-p, 1/a)))**(1/c)
|
|
|
|
|
|
exponweib = exponweib_gen(a=0.0, name='exponweib')
|
|
|
|
|
|
class exponpow_gen(rv_continuous):
|
|
r"""An exponential power continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `exponpow` is:
|
|
|
|
.. math::
|
|
|
|
f(x, b) = b x^{b-1} \exp(1 + x^b - \exp(x^b))
|
|
|
|
for :math:`x \ge 0`, :math:`b > 0`. Note that this is a different
|
|
distribution from the exponential power distribution that is also known
|
|
under the names "generalized normal" or "generalized Gaussian".
|
|
|
|
`exponpow` takes ``b`` as a shape parameter for :math:`b`.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
http://www.math.wm.edu/~leemis/chart/UDR/PDFs/Exponentialpower.pdf
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("b", False, (0, np.inf), (False, False))]
|
|
|
|
def _pdf(self, x, b):
|
|
# exponpow.pdf(x, b) = b * x**(b-1) * exp(1 + x**b - exp(x**b))
|
|
return np.exp(self._logpdf(x, b))
|
|
|
|
def _logpdf(self, x, b):
|
|
xb = x**b
|
|
f = 1 + np.log(b) + sc.xlogy(b - 1.0, x) + xb - np.exp(xb)
|
|
return f
|
|
|
|
def _cdf(self, x, b):
|
|
return -sc.expm1(-sc.expm1(x**b))
|
|
|
|
def _sf(self, x, b):
|
|
return np.exp(-sc.expm1(x**b))
|
|
|
|
def _isf(self, x, b):
|
|
return (sc.log1p(-np.log(x)))**(1./b)
|
|
|
|
def _ppf(self, q, b):
|
|
return pow(sc.log1p(-sc.log1p(-q)), 1.0/b)
|
|
|
|
|
|
exponpow = exponpow_gen(a=0.0, name='exponpow')
|
|
|
|
|
|
class fatiguelife_gen(rv_continuous):
|
|
r"""A fatigue-life (Birnbaum-Saunders) continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `fatiguelife` is:
|
|
|
|
.. math::
|
|
|
|
f(x, c) = \frac{x+1}{2c\sqrt{2\pi x^3}} \exp(-\frac{(x-1)^2}{2x c^2})
|
|
|
|
for :math:`x >= 0` and :math:`c > 0`.
|
|
|
|
`fatiguelife` takes ``c`` as a shape parameter for :math:`c`.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
.. [1] "Birnbaum-Saunders distribution",
|
|
https://en.wikipedia.org/wiki/Birnbaum-Saunders_distribution
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
_support_mask = rv_continuous._open_support_mask
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
|
|
|
|
def _rvs(self, c, size=None, random_state=None):
|
|
z = random_state.standard_normal(size)
|
|
x = 0.5*c*z
|
|
x2 = x*x
|
|
t = 1.0 + 2*x2 + 2*x*np.sqrt(1 + x2)
|
|
return t
|
|
|
|
def _pdf(self, x, c):
|
|
# fatiguelife.pdf(x, c) =
|
|
# (x+1) / (2*c*sqrt(2*pi*x**3)) * exp(-(x-1)**2/(2*x*c**2))
|
|
return np.exp(self._logpdf(x, c))
|
|
|
|
def _logpdf(self, x, c):
|
|
return (np.log(x+1) - (x-1)**2 / (2.0*x*c**2) - np.log(2*c) -
|
|
0.5*(np.log(2*np.pi) + 3*np.log(x)))
|
|
|
|
def _cdf(self, x, c):
|
|
return _norm_cdf(1.0 / c * (np.sqrt(x) - 1.0/np.sqrt(x)))
|
|
|
|
def _ppf(self, q, c):
|
|
tmp = c * _norm_ppf(q)
|
|
return 0.25 * (tmp + np.sqrt(tmp**2 + 4))**2
|
|
|
|
def _sf(self, x, c):
|
|
return _norm_sf(1.0 / c * (np.sqrt(x) - 1.0/np.sqrt(x)))
|
|
|
|
def _isf(self, q, c):
|
|
tmp = -c * _norm_ppf(q)
|
|
return 0.25 * (tmp + np.sqrt(tmp**2 + 4))**2
|
|
|
|
def _stats(self, c):
|
|
# NB: the formula for kurtosis in wikipedia seems to have an error:
|
|
# it's 40, not 41. At least it disagrees with the one from Wolfram
|
|
# Alpha. And the latter one, below, passes the tests, while the wiki
|
|
# one doesn't So far I didn't have the guts to actually check the
|
|
# coefficients from the expressions for the raw moments.
|
|
c2 = c*c
|
|
mu = c2 / 2.0 + 1.0
|
|
den = 5.0 * c2 + 4.0
|
|
mu2 = c2*den / 4.0
|
|
g1 = 4 * c * (11*c2 + 6.0) / np.power(den, 1.5)
|
|
g2 = 6 * c2 * (93*c2 + 40.0) / den**2.0
|
|
return mu, mu2, g1, g2
|
|
|
|
|
|
fatiguelife = fatiguelife_gen(a=0.0, name='fatiguelife')
|
|
|
|
|
|
class foldcauchy_gen(rv_continuous):
|
|
r"""A folded Cauchy continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `foldcauchy` is:
|
|
|
|
.. math::
|
|
|
|
f(x, c) = \frac{1}{\pi (1+(x-c)^2)} + \frac{1}{\pi (1+(x+c)^2)}
|
|
|
|
for :math:`x \ge 0` and :math:`c \ge 0`.
|
|
|
|
`foldcauchy` takes ``c`` as a shape parameter for :math:`c`.
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _argcheck(self, c):
|
|
return c >= 0
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("c", False, (0, np.inf), (True, False))]
|
|
|
|
def _rvs(self, c, size=None, random_state=None):
|
|
return abs(cauchy.rvs(loc=c, size=size,
|
|
random_state=random_state))
|
|
|
|
def _pdf(self, x, c):
|
|
# foldcauchy.pdf(x, c) = 1/(pi*(1+(x-c)**2)) + 1/(pi*(1+(x+c)**2))
|
|
return 1.0/np.pi*(1.0/(1+(x-c)**2) + 1.0/(1+(x+c)**2))
|
|
|
|
def _cdf(self, x, c):
|
|
return 1.0/np.pi*(np.arctan(x-c) + np.arctan(x+c))
|
|
|
|
def _sf(self, x, c):
|
|
# 1 - CDF(x, c) = 1 - (atan(x - c) + atan(x + c))/pi
|
|
# = ((pi/2 - atan(x - c)) + (pi/2 - atan(x + c)))/pi
|
|
# = (acot(x - c) + acot(x + c))/pi
|
|
# = (atan2(1, x - c) + atan2(1, x + c))/pi
|
|
return (np.arctan2(1, x - c) + np.arctan2(1, x + c))/np.pi
|
|
|
|
def _stats(self, c):
|
|
return np.inf, np.inf, np.nan, np.nan
|
|
|
|
|
|
foldcauchy = foldcauchy_gen(a=0.0, name='foldcauchy')
|
|
|
|
|
|
class f_gen(rv_continuous):
|
|
r"""An F continuous random variable.
|
|
|
|
For the noncentral F distribution, see `ncf`.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
ncf
|
|
|
|
Notes
|
|
-----
|
|
The F distribution with :math:`df_1 > 0` and :math:`df_2 > 0` degrees of freedom is
|
|
the distribution of the ratio of two independent chi-squared distributions with
|
|
:math:`df_1` and :math:`df_2` degrees of freedom, after rescaling by
|
|
:math:`df_2 / df_1`.
|
|
|
|
The probability density function for `f` is:
|
|
|
|
.. math::
|
|
|
|
f(x, df_1, df_2) = \frac{df_2^{df_2/2} df_1^{df_1/2} x^{df_1 / 2-1}}
|
|
{(df_2+df_1 x)^{(df_1+df_2)/2}
|
|
B(df_1/2, df_2/2)}
|
|
|
|
for :math:`x > 0`.
|
|
|
|
`f` accepts shape parameters ``dfn`` and ``dfd`` for :math:`df_1`, the degrees of
|
|
freedom of the chi-squared distribution in the numerator, and :math:`df_2`, the
|
|
degrees of freedom of the chi-squared distribution in the denominator, respectively.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
idfn = _ShapeInfo("dfn", False, (0, np.inf), (False, False))
|
|
idfd = _ShapeInfo("dfd", False, (0, np.inf), (False, False))
|
|
return [idfn, idfd]
|
|
|
|
def _rvs(self, dfn, dfd, size=None, random_state=None):
|
|
return random_state.f(dfn, dfd, size)
|
|
|
|
def _pdf(self, x, dfn, dfd):
|
|
# df2**(df2/2) * df1**(df1/2) * x**(df1/2-1)
|
|
# F.pdf(x, df1, df2) = --------------------------------------------
|
|
# (df2+df1*x)**((df1+df2)/2) * B(df1/2, df2/2)
|
|
return np.exp(self._logpdf(x, dfn, dfd))
|
|
|
|
def _logpdf(self, x, dfn, dfd):
|
|
n = 1.0 * dfn
|
|
m = 1.0 * dfd
|
|
lPx = (m/2 * np.log(m) + n/2 * np.log(n) + sc.xlogy(n/2 - 1, x)
|
|
- (((n+m)/2) * np.log(m + n*x) + sc.betaln(n/2, m/2)))
|
|
return lPx
|
|
|
|
def _cdf(self, x, dfn, dfd):
|
|
return sc.fdtr(dfn, dfd, x)
|
|
|
|
def _sf(self, x, dfn, dfd):
|
|
return sc.fdtrc(dfn, dfd, x)
|
|
|
|
def _ppf(self, q, dfn, dfd):
|
|
return sc.fdtri(dfn, dfd, q)
|
|
|
|
def _stats(self, dfn, dfd):
|
|
v1, v2 = 1. * dfn, 1. * dfd
|
|
v2_2, v2_4, v2_6, v2_8 = v2 - 2., v2 - 4., v2 - 6., v2 - 8.
|
|
|
|
mu = _lazywhere(
|
|
v2 > 2, (v2, v2_2),
|
|
lambda v2, v2_2: v2 / v2_2,
|
|
np.inf)
|
|
|
|
mu2 = _lazywhere(
|
|
v2 > 4, (v1, v2, v2_2, v2_4),
|
|
lambda v1, v2, v2_2, v2_4:
|
|
2 * v2 * v2 * (v1 + v2_2) / (v1 * v2_2**2 * v2_4),
|
|
np.inf)
|
|
|
|
g1 = _lazywhere(
|
|
v2 > 6, (v1, v2_2, v2_4, v2_6),
|
|
lambda v1, v2_2, v2_4, v2_6:
|
|
(2 * v1 + v2_2) / v2_6 * np.sqrt(v2_4 / (v1 * (v1 + v2_2))),
|
|
np.nan)
|
|
g1 *= np.sqrt(8.)
|
|
|
|
g2 = _lazywhere(
|
|
v2 > 8, (g1, v2_6, v2_8),
|
|
lambda g1, v2_6, v2_8: (8 + g1 * g1 * v2_6) / v2_8,
|
|
np.nan)
|
|
g2 *= 3. / 2.
|
|
|
|
return mu, mu2, g1, g2
|
|
|
|
def _entropy(self, dfn, dfd):
|
|
# the formula found in literature is incorrect. This one yields the
|
|
# same result as numerical integration using the generic entropy
|
|
# definition. This is also tested in tests/test_conntinous_basic
|
|
half_dfn = 0.5 * dfn
|
|
half_dfd = 0.5 * dfd
|
|
half_sum = 0.5 * (dfn + dfd)
|
|
|
|
return (np.log(dfd) - np.log(dfn) + sc.betaln(half_dfn, half_dfd) +
|
|
(1 - half_dfn) * sc.psi(half_dfn) - (1 + half_dfd) *
|
|
sc.psi(half_dfd) + half_sum * sc.psi(half_sum))
|
|
|
|
|
|
f = f_gen(a=0.0, name='f')
|
|
|
|
|
|
## Folded Normal
|
|
## abs(Z) where (Z is normal with mu=L and std=S so that c=abs(L)/S)
|
|
##
|
|
## note: regress docs have scale parameter correct, but first parameter
|
|
## he gives is a shape parameter A = c * scale
|
|
|
|
## Half-normal is folded normal with shape-parameter c=0.
|
|
|
|
class foldnorm_gen(rv_continuous):
|
|
r"""A folded normal continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `foldnorm` is:
|
|
|
|
.. math::
|
|
|
|
f(x, c) = \sqrt{2/\pi} cosh(c x) \exp(-\frac{x^2+c^2}{2})
|
|
|
|
for :math:`x \ge 0` and :math:`c \ge 0`.
|
|
|
|
`foldnorm` takes ``c`` as a shape parameter for :math:`c`.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _argcheck(self, c):
|
|
return c >= 0
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("c", False, (0, np.inf), (True, False))]
|
|
|
|
def _rvs(self, c, size=None, random_state=None):
|
|
return abs(random_state.standard_normal(size) + c)
|
|
|
|
def _pdf(self, x, c):
|
|
# foldnormal.pdf(x, c) = sqrt(2/pi) * cosh(c*x) * exp(-(x**2+c**2)/2)
|
|
return _norm_pdf(x + c) + _norm_pdf(x-c)
|
|
|
|
def _cdf(self, x, c):
|
|
sqrt_two = np.sqrt(2)
|
|
return 0.5 * (sc.erf((x - c)/sqrt_two) + sc.erf((x + c)/sqrt_two))
|
|
|
|
def _sf(self, x, c):
|
|
return _norm_sf(x - c) + _norm_sf(x + c)
|
|
|
|
def _stats(self, c):
|
|
# Regina C. Elandt, Technometrics 3, 551 (1961)
|
|
# https://www.jstor.org/stable/1266561
|
|
#
|
|
c2 = c*c
|
|
expfac = np.exp(-0.5*c2) / np.sqrt(2.*np.pi)
|
|
|
|
mu = 2.*expfac + c * sc.erf(c/np.sqrt(2))
|
|
mu2 = c2 + 1 - mu*mu
|
|
|
|
g1 = 2. * (mu*mu*mu - c2*mu - expfac)
|
|
g1 /= np.power(mu2, 1.5)
|
|
|
|
g2 = c2 * (c2 + 6.) + 3 + 8.*expfac*mu
|
|
g2 += (2. * (c2 - 3.) - 3. * mu**2) * mu**2
|
|
g2 = g2 / mu2**2.0 - 3.
|
|
|
|
return mu, mu2, g1, g2
|
|
|
|
|
|
foldnorm = foldnorm_gen(a=0.0, name='foldnorm')
|
|
|
|
|
|
class weibull_min_gen(rv_continuous):
|
|
r"""Weibull minimum continuous random variable.
|
|
|
|
The Weibull Minimum Extreme Value distribution, from extreme value theory
|
|
(Fisher-Gnedenko theorem), is also often simply called the Weibull
|
|
distribution. It arises as the limiting distribution of the rescaled
|
|
minimum of iid random variables.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
weibull_max, numpy.random.Generator.weibull, exponweib
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `weibull_min` is:
|
|
|
|
.. math::
|
|
|
|
f(x, c) = c x^{c-1} \exp(-x^c)
|
|
|
|
for :math:`x > 0`, :math:`c > 0`.
|
|
|
|
`weibull_min` takes ``c`` as a shape parameter for :math:`c`.
|
|
(named :math:`k` in Wikipedia article and :math:`a` in
|
|
``numpy.random.weibull``). Special shape values are :math:`c=1` and
|
|
:math:`c=2` where Weibull distribution reduces to the `expon` and
|
|
`rayleigh` distributions respectively.
|
|
|
|
Suppose ``X`` is an exponentially distributed random variable with
|
|
scale ``s``. Then ``Y = X**k`` is `weibull_min` distributed with shape
|
|
``c = 1/k`` and scale ``s**k``.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
https://en.wikipedia.org/wiki/Weibull_distribution
|
|
|
|
https://en.wikipedia.org/wiki/Fisher-Tippett-Gnedenko_theorem
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
|
|
|
|
def _pdf(self, x, c):
|
|
# weibull_min.pdf(x, c) = c * x**(c-1) * exp(-x**c)
|
|
return c*pow(x, c-1)*np.exp(-pow(x, c))
|
|
|
|
def _logpdf(self, x, c):
|
|
return np.log(c) + sc.xlogy(c - 1, x) - pow(x, c)
|
|
|
|
def _cdf(self, x, c):
|
|
return -sc.expm1(-pow(x, c))
|
|
|
|
def _ppf(self, q, c):
|
|
return pow(-sc.log1p(-q), 1.0/c)
|
|
|
|
def _sf(self, x, c):
|
|
return np.exp(self._logsf(x, c))
|
|
|
|
def _logsf(self, x, c):
|
|
return -pow(x, c)
|
|
|
|
def _isf(self, q, c):
|
|
return (-np.log(q))**(1/c)
|
|
|
|
def _munp(self, n, c):
|
|
return sc.gamma(1.0+n*1.0/c)
|
|
|
|
def _entropy(self, c):
|
|
return -_EULER / c - np.log(c) + _EULER + 1
|
|
|
|
@extend_notes_in_docstring(rv_continuous, notes="""\
|
|
If ``method='mm'``, parameters fixed by the user are respected, and the
|
|
remaining parameters are used to match distribution and sample moments
|
|
where possible. For example, if the user fixes the location with
|
|
``floc``, the parameters will only match the distribution skewness and
|
|
variance to the sample skewness and variance; no attempt will be made
|
|
to match the means or minimize a norm of the errors.
|
|
\n\n""")
|
|
def fit(self, data, *args, **kwds):
|
|
|
|
if isinstance(data, CensoredData):
|
|
if data.num_censored() == 0:
|
|
data = data._uncensor()
|
|
else:
|
|
return super().fit(data, *args, **kwds)
|
|
|
|
if kwds.pop('superfit', False):
|
|
return super().fit(data, *args, **kwds)
|
|
|
|
# this extracts fixed shape, location, and scale however they
|
|
# are specified, and also leaves them in `kwds`
|
|
data, fc, floc, fscale = _check_fit_input_parameters(self, data,
|
|
args, kwds)
|
|
method = kwds.get("method", "mle").lower()
|
|
|
|
# See https://en.wikipedia.org/wiki/Weibull_distribution#Moments for
|
|
# moment formulas.
|
|
def skew(c):
|
|
gamma1 = sc.gamma(1+1/c)
|
|
gamma2 = sc.gamma(1+2/c)
|
|
gamma3 = sc.gamma(1+3/c)
|
|
num = 2 * gamma1**3 - 3*gamma1*gamma2 + gamma3
|
|
den = (gamma2 - gamma1**2)**(3/2)
|
|
return num/den
|
|
|
|
# For c in [1e2, 3e4], population skewness appears to approach
|
|
# asymptote near -1.139, but past c > 3e4, skewness begins to vary
|
|
# wildly, and MoM won't provide a good guess. Get out early.
|
|
s = stats.skew(data)
|
|
max_c = 1e4
|
|
s_min = skew(max_c)
|
|
if s < s_min and method != "mm" and fc is None and not args:
|
|
return super().fit(data, *args, **kwds)
|
|
|
|
# If method is method of moments, we don't need the user's guesses.
|
|
# Otherwise, extract the guesses from args and kwds.
|
|
if method == "mm":
|
|
c, loc, scale = None, None, None
|
|
else:
|
|
c = args[0] if len(args) else None
|
|
loc = kwds.pop('loc', None)
|
|
scale = kwds.pop('scale', None)
|
|
|
|
if fc is None and c is None: # not fixed and no guess: use MoM
|
|
# Solve for c that matches sample distribution skewness to sample
|
|
# skewness.
|
|
# we start having numerical issues with `weibull_min` with
|
|
# parameters outside this range - and not just in this method.
|
|
# We could probably improve the situation by doing everything
|
|
# in the log space, but that is for another time.
|
|
c = root_scalar(lambda c: skew(c) - s, bracket=[0.02, max_c],
|
|
method='bisect').root
|
|
elif fc is not None: # fixed: use it
|
|
c = fc
|
|
|
|
if fscale is None and scale is None:
|
|
v = np.var(data)
|
|
scale = np.sqrt(v / (sc.gamma(1+2/c) - sc.gamma(1+1/c)**2))
|
|
elif fscale is not None:
|
|
scale = fscale
|
|
|
|
if floc is None and loc is None:
|
|
m = np.mean(data)
|
|
loc = m - scale*sc.gamma(1 + 1/c)
|
|
elif floc is not None:
|
|
loc = floc
|
|
|
|
if method == 'mm':
|
|
return c, loc, scale
|
|
else:
|
|
# At this point, parameter "guesses" may equal the fixed parameters
|
|
# in kwds. No harm in passing them as guesses, too.
|
|
return super().fit(data, c, loc=loc, scale=scale, **kwds)
|
|
|
|
|
|
weibull_min = weibull_min_gen(a=0.0, name='weibull_min')
|
|
|
|
|
|
class truncweibull_min_gen(rv_continuous):
|
|
r"""A doubly truncated Weibull minimum continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
weibull_min, truncexpon
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `truncweibull_min` is:
|
|
|
|
.. math::
|
|
|
|
f(x, a, b, c) = \frac{c x^{c-1} \exp(-x^c)}{\exp(-a^c) - \exp(-b^c)}
|
|
|
|
for :math:`a < x <= b`, :math:`0 \le a < b` and :math:`c > 0`.
|
|
|
|
`truncweibull_min` takes :math:`a`, :math:`b`, and :math:`c` as shape
|
|
parameters.
|
|
|
|
Notice that the truncation values, :math:`a` and :math:`b`, are defined in
|
|
standardized form:
|
|
|
|
.. math::
|
|
|
|
a = (u_l - loc)/scale
|
|
b = (u_r - loc)/scale
|
|
|
|
where :math:`u_l` and :math:`u_r` are the specific left and right
|
|
truncation values, respectively. In other words, the support of the
|
|
distribution becomes :math:`(a*scale + loc) < x <= (b*scale + loc)` when
|
|
:math:`loc` and/or :math:`scale` are provided.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
|
|
.. [1] Rinne, H. "The Weibull Distribution: A Handbook". CRC Press (2009).
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _argcheck(self, c, a, b):
|
|
return (a >= 0.) & (b > a) & (c > 0.)
|
|
|
|
def _shape_info(self):
|
|
ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
|
|
ia = _ShapeInfo("a", False, (0, np.inf), (True, False))
|
|
ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
|
|
return [ic, ia, ib]
|
|
|
|
def _fitstart(self, data):
|
|
# Arbitrary, but default a=b=c=1 is not valid
|
|
return super()._fitstart(data, args=(1, 0, 1))
|
|
|
|
def _get_support(self, c, a, b):
|
|
return a, b
|
|
|
|
def _pdf(self, x, c, a, b):
|
|
denum = (np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
|
|
return (c * pow(x, c-1) * np.exp(-pow(x, c))) / denum
|
|
|
|
def _logpdf(self, x, c, a, b):
|
|
logdenum = np.log(np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
|
|
return np.log(c) + sc.xlogy(c - 1, x) - pow(x, c) - logdenum
|
|
|
|
def _cdf(self, x, c, a, b):
|
|
num = (np.exp(-pow(a, c)) - np.exp(-pow(x, c)))
|
|
denum = (np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
|
|
return num / denum
|
|
|
|
def _logcdf(self, x, c, a, b):
|
|
lognum = np.log(np.exp(-pow(a, c)) - np.exp(-pow(x, c)))
|
|
logdenum = np.log(np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
|
|
return lognum - logdenum
|
|
|
|
def _sf(self, x, c, a, b):
|
|
num = (np.exp(-pow(x, c)) - np.exp(-pow(b, c)))
|
|
denum = (np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
|
|
return num / denum
|
|
|
|
def _logsf(self, x, c, a, b):
|
|
lognum = np.log(np.exp(-pow(x, c)) - np.exp(-pow(b, c)))
|
|
logdenum = np.log(np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
|
|
return lognum - logdenum
|
|
|
|
def _isf(self, q, c, a, b):
|
|
return pow(
|
|
-np.log((1 - q) * np.exp(-pow(b, c)) + q * np.exp(-pow(a, c))), 1/c
|
|
)
|
|
|
|
def _ppf(self, q, c, a, b):
|
|
return pow(
|
|
-np.log((1 - q) * np.exp(-pow(a, c)) + q * np.exp(-pow(b, c))), 1/c
|
|
)
|
|
|
|
def _munp(self, n, c, a, b):
|
|
gamma_fun = sc.gamma(n/c + 1.) * (
|
|
sc.gammainc(n/c + 1., pow(b, c)) - sc.gammainc(n/c + 1., pow(a, c))
|
|
)
|
|
denum = (np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
|
|
return gamma_fun / denum
|
|
|
|
|
|
truncweibull_min = truncweibull_min_gen(name='truncweibull_min')
|
|
|
|
|
|
class weibull_max_gen(rv_continuous):
|
|
r"""Weibull maximum continuous random variable.
|
|
|
|
The Weibull Maximum Extreme Value distribution, from extreme value theory
|
|
(Fisher-Gnedenko theorem), is the limiting distribution of rescaled
|
|
maximum of iid random variables. This is the distribution of -X
|
|
if X is from the `weibull_min` function.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
weibull_min
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `weibull_max` is:
|
|
|
|
.. math::
|
|
|
|
f(x, c) = c (-x)^{c-1} \exp(-(-x)^c)
|
|
|
|
for :math:`x < 0`, :math:`c > 0`.
|
|
|
|
`weibull_max` takes ``c`` as a shape parameter for :math:`c`.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
https://en.wikipedia.org/wiki/Weibull_distribution
|
|
|
|
https://en.wikipedia.org/wiki/Fisher-Tippett-Gnedenko_theorem
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
|
|
|
|
def _pdf(self, x, c):
|
|
# weibull_max.pdf(x, c) = c * (-x)**(c-1) * exp(-(-x)**c)
|
|
return c*pow(-x, c-1)*np.exp(-pow(-x, c))
|
|
|
|
def _logpdf(self, x, c):
|
|
return np.log(c) + sc.xlogy(c-1, -x) - pow(-x, c)
|
|
|
|
def _cdf(self, x, c):
|
|
return np.exp(-pow(-x, c))
|
|
|
|
def _logcdf(self, x, c):
|
|
return -pow(-x, c)
|
|
|
|
def _sf(self, x, c):
|
|
return -sc.expm1(-pow(-x, c))
|
|
|
|
def _ppf(self, q, c):
|
|
return -pow(-np.log(q), 1.0/c)
|
|
|
|
def _munp(self, n, c):
|
|
val = sc.gamma(1.0+n*1.0/c)
|
|
if int(n) % 2:
|
|
sgn = -1
|
|
else:
|
|
sgn = 1
|
|
return sgn * val
|
|
|
|
def _entropy(self, c):
|
|
return -_EULER / c - np.log(c) + _EULER + 1
|
|
|
|
|
|
weibull_max = weibull_max_gen(b=0.0, name='weibull_max')
|
|
|
|
|
|
class genlogistic_gen(rv_continuous):
|
|
r"""A generalized logistic continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `genlogistic` is:
|
|
|
|
.. math::
|
|
|
|
f(x, c) = c \frac{\exp(-x)}
|
|
{(1 + \exp(-x))^{c+1}}
|
|
|
|
for real :math:`x` and :math:`c > 0`. In literature, different
|
|
generalizations of the logistic distribution can be found. This is the type 1
|
|
generalized logistic distribution according to [1]_. It is also referred to
|
|
as the skew-logistic distribution [2]_.
|
|
|
|
`genlogistic` takes ``c`` as a shape parameter for :math:`c`.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
.. [1] Johnson et al. "Continuous Univariate Distributions", Volume 2,
|
|
Wiley. 1995.
|
|
.. [2] "Generalized Logistic Distribution", Wikipedia,
|
|
https://en.wikipedia.org/wiki/Generalized_logistic_distribution
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
|
|
|
|
def _pdf(self, x, c):
|
|
# genlogistic.pdf(x, c) = c * exp(-x) / (1 + exp(-x))**(c+1)
|
|
return np.exp(self._logpdf(x, c))
|
|
|
|
def _logpdf(self, x, c):
|
|
# Two mathematically equivalent expressions for log(pdf(x, c)):
|
|
# log(pdf(x, c)) = log(c) - x - (c + 1)*log(1 + exp(-x))
|
|
# = log(c) + c*x - (c + 1)*log(1 + exp(x))
|
|
mult = -(c - 1) * (x < 0) - 1
|
|
absx = np.abs(x)
|
|
return np.log(c) + mult*absx - (c+1) * sc.log1p(np.exp(-absx))
|
|
|
|
def _cdf(self, x, c):
|
|
Cx = (1+np.exp(-x))**(-c)
|
|
return Cx
|
|
|
|
def _logcdf(self, x, c):
|
|
return -c * np.log1p(np.exp(-x))
|
|
|
|
def _ppf(self, q, c):
|
|
return -np.log(sc.powm1(q, -1.0/c))
|
|
|
|
def _sf(self, x, c):
|
|
return -sc.expm1(self._logcdf(x, c))
|
|
|
|
def _isf(self, q, c):
|
|
return self._ppf(1 - q, c)
|
|
|
|
def _stats(self, c):
|
|
mu = _EULER + sc.psi(c)
|
|
mu2 = np.pi*np.pi/6.0 + sc.zeta(2, c)
|
|
g1 = -2*sc.zeta(3, c) + 2*_ZETA3
|
|
g1 /= np.power(mu2, 1.5)
|
|
g2 = np.pi**4/15.0 + 6*sc.zeta(4, c)
|
|
g2 /= mu2**2.0
|
|
return mu, mu2, g1, g2
|
|
|
|
def _entropy(self, c):
|
|
return _lazywhere(c < 8e6, (c, ),
|
|
lambda c: -np.log(c) + sc.psi(c + 1) + _EULER + 1,
|
|
# asymptotic expansion: psi(c) ~ log(c) - 1/(2 * c)
|
|
# a = -log(c) + psi(c + 1)
|
|
# = -log(c) + psi(c) + 1/c
|
|
# ~ -log(c) + log(c) - 1/(2 * c) + 1/c
|
|
# = 1/(2 * c)
|
|
f2=lambda c: 1/(2 * c) + _EULER + 1)
|
|
|
|
|
|
genlogistic = genlogistic_gen(name='genlogistic')
|
|
|
|
|
|
class genpareto_gen(rv_continuous):
|
|
r"""A generalized Pareto continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `genpareto` is:
|
|
|
|
.. math::
|
|
|
|
f(x, c) = (1 + c x)^{-1 - 1/c}
|
|
|
|
defined for :math:`x \ge 0` if :math:`c \ge 0`, and for
|
|
:math:`0 \le x \le -1/c` if :math:`c < 0`.
|
|
|
|
`genpareto` takes ``c`` as a shape parameter for :math:`c`.
|
|
|
|
For :math:`c=0`, `genpareto` reduces to the exponential
|
|
distribution, `expon`:
|
|
|
|
.. math::
|
|
|
|
f(x, 0) = \exp(-x)
|
|
|
|
For :math:`c=-1`, `genpareto` is uniform on ``[0, 1]``:
|
|
|
|
.. math::
|
|
|
|
f(x, -1) = 1
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _argcheck(self, c):
|
|
return np.isfinite(c)
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("c", False, (-np.inf, np.inf), (False, False))]
|
|
|
|
def _get_support(self, c):
|
|
c = np.asarray(c)
|
|
b = _lazywhere(c < 0, (c,),
|
|
lambda c: -1. / c,
|
|
np.inf)
|
|
a = np.where(c >= 0, self.a, self.a)
|
|
return a, b
|
|
|
|
def _pdf(self, x, c):
|
|
# genpareto.pdf(x, c) = (1 + c * x)**(-1 - 1/c)
|
|
return np.exp(self._logpdf(x, c))
|
|
|
|
def _logpdf(self, x, c):
|
|
return _lazywhere((x == x) & (c != 0), (x, c),
|
|
lambda x, c: -sc.xlog1py(c + 1., c*x) / c,
|
|
-x)
|
|
|
|
def _cdf(self, x, c):
|
|
return -sc.inv_boxcox1p(-x, -c)
|
|
|
|
def _sf(self, x, c):
|
|
return sc.inv_boxcox(-x, -c)
|
|
|
|
def _logsf(self, x, c):
|
|
return _lazywhere((x == x) & (c != 0), (x, c),
|
|
lambda x, c: -sc.log1p(c*x) / c,
|
|
-x)
|
|
|
|
def _ppf(self, q, c):
|
|
return -sc.boxcox1p(-q, -c)
|
|
|
|
def _isf(self, q, c):
|
|
return -sc.boxcox(q, -c)
|
|
|
|
def _stats(self, c, moments='mv'):
|
|
if 'm' not in moments:
|
|
m = None
|
|
else:
|
|
m = _lazywhere(c < 1, (c,),
|
|
lambda xi: 1/(1 - xi),
|
|
np.inf)
|
|
if 'v' not in moments:
|
|
v = None
|
|
else:
|
|
v = _lazywhere(c < 1/2, (c,),
|
|
lambda xi: 1 / (1 - xi)**2 / (1 - 2*xi),
|
|
np.nan)
|
|
if 's' not in moments:
|
|
s = None
|
|
else:
|
|
s = _lazywhere(c < 1/3, (c,),
|
|
lambda xi: (2 * (1 + xi) * np.sqrt(1 - 2*xi) /
|
|
(1 - 3*xi)),
|
|
np.nan)
|
|
if 'k' not in moments:
|
|
k = None
|
|
else:
|
|
k = _lazywhere(c < 1/4, (c,),
|
|
lambda xi: (3 * (1 - 2*xi) * (2*xi**2 + xi + 3) /
|
|
(1 - 3*xi) / (1 - 4*xi) - 3),
|
|
np.nan)
|
|
return m, v, s, k
|
|
|
|
def _munp(self, n, c):
|
|
def __munp(n, c):
|
|
val = 0.0
|
|
k = np.arange(0, n + 1)
|
|
for ki, cnk in zip(k, sc.comb(n, k)):
|
|
val = val + cnk * (-1) ** ki / (1.0 - c * ki)
|
|
return np.where(c * n < 1, val * (-1.0 / c) ** n, np.inf)
|
|
return _lazywhere(c != 0, (c,),
|
|
lambda c: __munp(n, c),
|
|
sc.gamma(n + 1))
|
|
|
|
def _entropy(self, c):
|
|
return 1. + c
|
|
|
|
|
|
genpareto = genpareto_gen(a=0.0, name='genpareto')
|
|
|
|
|
|
class genexpon_gen(rv_continuous):
|
|
r"""A generalized exponential continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `genexpon` is:
|
|
|
|
.. math::
|
|
|
|
f(x, a, b, c) = (a + b (1 - \exp(-c x)))
|
|
\exp(-a x - b x + \frac{b}{c} (1-\exp(-c x)))
|
|
|
|
for :math:`x \ge 0`, :math:`a, b, c > 0`.
|
|
|
|
`genexpon` takes :math:`a`, :math:`b` and :math:`c` as shape parameters.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
H.K. Ryu, "An Extension of Marshall and Olkin's Bivariate Exponential
|
|
Distribution", Journal of the American Statistical Association, 1993.
|
|
|
|
N. Balakrishnan, Asit P. Basu (editors), *The Exponential Distribution:
|
|
Theory, Methods and Applications*, Gordon and Breach, 1995.
|
|
ISBN 10: 2884491929
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
|
|
ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
|
|
ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
|
|
return [ia, ib, ic]
|
|
|
|
def _pdf(self, x, a, b, c):
|
|
# genexpon.pdf(x, a, b, c) = (a + b * (1 - exp(-c*x))) * \
|
|
# exp(-a*x - b*x + b/c * (1-exp(-c*x)))
|
|
return (a + b*(-sc.expm1(-c*x)))*np.exp((-a-b)*x +
|
|
b*(-sc.expm1(-c*x))/c)
|
|
|
|
def _logpdf(self, x, a, b, c):
|
|
return np.log(a+b*(-sc.expm1(-c*x))) + (-a-b)*x+b*(-sc.expm1(-c*x))/c
|
|
|
|
def _cdf(self, x, a, b, c):
|
|
return -sc.expm1((-a-b)*x + b*(-sc.expm1(-c*x))/c)
|
|
|
|
def _ppf(self, p, a, b, c):
|
|
s = a + b
|
|
t = (b - c*np.log1p(-p))/s
|
|
return (t + sc.lambertw(-b/s * np.exp(-t)).real)/c
|
|
|
|
def _sf(self, x, a, b, c):
|
|
return np.exp((-a-b)*x + b*(-sc.expm1(-c*x))/c)
|
|
|
|
def _isf(self, p, a, b, c):
|
|
s = a + b
|
|
t = (b - c*np.log(p))/s
|
|
return (t + sc.lambertw(-b/s * np.exp(-t)).real)/c
|
|
|
|
|
|
genexpon = genexpon_gen(a=0.0, name='genexpon')
|
|
|
|
|
|
class genextreme_gen(rv_continuous):
|
|
r"""A generalized extreme value continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
gumbel_r
|
|
|
|
Notes
|
|
-----
|
|
For :math:`c=0`, `genextreme` is equal to `gumbel_r` with
|
|
probability density function
|
|
|
|
.. math::
|
|
|
|
f(x) = \exp(-\exp(-x)) \exp(-x),
|
|
|
|
where :math:`-\infty < x < \infty`.
|
|
|
|
For :math:`c \ne 0`, the probability density function for `genextreme` is:
|
|
|
|
.. math::
|
|
|
|
f(x, c) = \exp(-(1-c x)^{1/c}) (1-c x)^{1/c-1},
|
|
|
|
where :math:`-\infty < x \le 1/c` if :math:`c > 0` and
|
|
:math:`1/c \le x < \infty` if :math:`c < 0`.
|
|
|
|
Note that several sources and software packages use the opposite
|
|
convention for the sign of the shape parameter :math:`c`.
|
|
|
|
`genextreme` takes ``c`` as a shape parameter for :math:`c`.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _argcheck(self, c):
|
|
return np.isfinite(c)
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("c", False, (-np.inf, np.inf), (False, False))]
|
|
|
|
def _get_support(self, c):
|
|
_b = np.where(c > 0, 1.0 / np.maximum(c, _XMIN), np.inf)
|
|
_a = np.where(c < 0, 1.0 / np.minimum(c, -_XMIN), -np.inf)
|
|
return _a, _b
|
|
|
|
def _loglogcdf(self, x, c):
|
|
# Returns log(-log(cdf(x, c)))
|
|
return _lazywhere((x == x) & (c != 0), (x, c),
|
|
lambda x, c: sc.log1p(-c*x)/c, -x)
|
|
|
|
def _pdf(self, x, c):
|
|
# genextreme.pdf(x, c) =
|
|
# exp(-exp(-x))*exp(-x), for c==0
|
|
# exp(-(1-c*x)**(1/c))*(1-c*x)**(1/c-1), for x \le 1/c, c > 0
|
|
return np.exp(self._logpdf(x, c))
|
|
|
|
def _logpdf(self, x, c):
|
|
cx = _lazywhere((x == x) & (c != 0), (x, c), lambda x, c: c*x, 0.0)
|
|
logex2 = sc.log1p(-cx)
|
|
logpex2 = self._loglogcdf(x, c)
|
|
pex2 = np.exp(logpex2)
|
|
# Handle special cases
|
|
np.putmask(logpex2, (c == 0) & (x == -np.inf), 0.0)
|
|
logpdf = _lazywhere(~((cx == 1) | (cx == -np.inf)),
|
|
(pex2, logpex2, logex2),
|
|
lambda pex2, lpex2, lex2: -pex2 + lpex2 - lex2,
|
|
fillvalue=-np.inf)
|
|
np.putmask(logpdf, (c == 1) & (x == 1), 0.0)
|
|
return logpdf
|
|
|
|
def _logcdf(self, x, c):
|
|
return -np.exp(self._loglogcdf(x, c))
|
|
|
|
def _cdf(self, x, c):
|
|
return np.exp(self._logcdf(x, c))
|
|
|
|
def _sf(self, x, c):
|
|
return -sc.expm1(self._logcdf(x, c))
|
|
|
|
def _ppf(self, q, c):
|
|
x = -np.log(-np.log(q))
|
|
return _lazywhere((x == x) & (c != 0), (x, c),
|
|
lambda x, c: -sc.expm1(-c * x) / c, x)
|
|
|
|
def _isf(self, q, c):
|
|
x = -np.log(-sc.log1p(-q))
|
|
return _lazywhere((x == x) & (c != 0), (x, c),
|
|
lambda x, c: -sc.expm1(-c * x) / c, x)
|
|
|
|
def _stats(self, c):
|
|
def g(n):
|
|
return sc.gamma(n * c + 1)
|
|
g1 = g(1)
|
|
g2 = g(2)
|
|
g3 = g(3)
|
|
g4 = g(4)
|
|
g2mg12 = np.where(abs(c) < 1e-7, (c*np.pi)**2.0/6.0, g2-g1**2.0)
|
|
gam2k = np.where(abs(c) < 1e-7, np.pi**2.0/6.0,
|
|
sc.expm1(sc.gammaln(2.0*c+1.0)-2*sc.gammaln(c + 1.0))/c**2.0)
|
|
eps = 1e-14
|
|
gamk = np.where(abs(c) < eps, -_EULER, sc.expm1(sc.gammaln(c + 1))/c)
|
|
|
|
m = np.where(c < -1.0, np.nan, -gamk)
|
|
v = np.where(c < -0.5, np.nan, g1**2.0*gam2k)
|
|
|
|
# skewness
|
|
sk1 = _lazywhere(c >= -1./3,
|
|
(c, g1, g2, g3, g2mg12),
|
|
lambda c, g1, g2, g3, g2mg12:
|
|
np.sign(c)*(-g3 + (g2 + 2*g2mg12)*g1)/g2mg12**1.5,
|
|
fillvalue=np.nan)
|
|
sk = np.where(abs(c) <= eps**0.29, 12*np.sqrt(6)*_ZETA3/np.pi**3, sk1)
|
|
|
|
# kurtosis
|
|
ku1 = _lazywhere(c >= -1./4,
|
|
(g1, g2, g3, g4, g2mg12),
|
|
lambda g1, g2, g3, g4, g2mg12:
|
|
(g4 + (-4*g3 + 3*(g2 + g2mg12)*g1)*g1)/g2mg12**2,
|
|
fillvalue=np.nan)
|
|
ku = np.where(abs(c) <= (eps)**0.23, 12.0/5.0, ku1-3.0)
|
|
return m, v, sk, ku
|
|
|
|
def _fitstart(self, data):
|
|
if isinstance(data, CensoredData):
|
|
data = data._uncensor()
|
|
# This is better than the default shape of (1,).
|
|
g = _skew(data)
|
|
if g < 0:
|
|
a = 0.5
|
|
else:
|
|
a = -0.5
|
|
return super()._fitstart(data, args=(a,))
|
|
|
|
def _munp(self, n, c):
|
|
k = np.arange(0, n+1)
|
|
vals = 1.0/c**n * np.sum(
|
|
sc.comb(n, k) * (-1)**k * sc.gamma(c*k + 1),
|
|
axis=0)
|
|
return np.where(c*n > -1, vals, np.inf)
|
|
|
|
def _entropy(self, c):
|
|
return _EULER*(1 - c) + 1
|
|
|
|
|
|
genextreme = genextreme_gen(name='genextreme')
|
|
|
|
|
|
def _digammainv(y):
|
|
"""Inverse of the digamma function (real positive arguments only).
|
|
|
|
This function is used in the `fit` method of `gamma_gen`.
|
|
The function uses either optimize.fsolve or optimize.newton
|
|
to solve `sc.digamma(x) - y = 0`. There is probably room for
|
|
improvement, but currently it works over a wide range of y:
|
|
|
|
>>> import numpy as np
|
|
>>> rng = np.random.default_rng()
|
|
>>> y = 64*rng.standard_normal(1000000)
|
|
>>> y.min(), y.max()
|
|
(-311.43592651416662, 351.77388222276869)
|
|
>>> x = [_digammainv(t) for t in y]
|
|
>>> np.abs(sc.digamma(x) - y).max()
|
|
1.1368683772161603e-13
|
|
|
|
"""
|
|
_em = 0.5772156649015328606065120
|
|
|
|
def func(x):
|
|
return sc.digamma(x) - y
|
|
|
|
if y > -0.125:
|
|
x0 = np.exp(y) + 0.5
|
|
if y < 10:
|
|
# Some experimentation shows that newton reliably converges
|
|
# must faster than fsolve in this y range. For larger y,
|
|
# newton sometimes fails to converge.
|
|
value = optimize.newton(func, x0, tol=1e-10)
|
|
return value
|
|
elif y > -3:
|
|
x0 = np.exp(y/2.332) + 0.08661
|
|
else:
|
|
x0 = 1.0 / (-y - _em)
|
|
|
|
value, info, ier, mesg = optimize.fsolve(func, x0, xtol=1e-11,
|
|
full_output=True)
|
|
if ier != 1:
|
|
raise RuntimeError("_digammainv: fsolve failed, y = %r" % y)
|
|
|
|
return value[0]
|
|
|
|
|
|
## Gamma (Use MATLAB and MATHEMATICA (b=theta=scale, a=alpha=shape) definition)
|
|
|
|
## gamma(a, loc, scale) with a an integer is the Erlang distribution
|
|
## gamma(1, loc, scale) is the Exponential distribution
|
|
## gamma(df/2, 0, 2) is the chi2 distribution with df degrees of freedom.
|
|
|
|
class gamma_gen(rv_continuous):
|
|
r"""A gamma continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
erlang, expon
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `gamma` is:
|
|
|
|
.. math::
|
|
|
|
f(x, a) = \frac{x^{a-1} e^{-x}}{\Gamma(a)}
|
|
|
|
for :math:`x \ge 0`, :math:`a > 0`. Here :math:`\Gamma(a)` refers to the
|
|
gamma function.
|
|
|
|
`gamma` takes ``a`` as a shape parameter for :math:`a`.
|
|
|
|
When :math:`a` is an integer, `gamma` reduces to the Erlang
|
|
distribution, and when :math:`a=1` to the exponential distribution.
|
|
|
|
Gamma distributions are sometimes parameterized with two variables,
|
|
with a probability density function of:
|
|
|
|
.. math::
|
|
|
|
f(x, \alpha, \beta) =
|
|
\frac{\beta^\alpha x^{\alpha - 1} e^{-\beta x }}{\Gamma(\alpha)}
|
|
|
|
Note that this parameterization is equivalent to the above, with
|
|
``scale = 1 / beta``.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
|
|
|
|
def _rvs(self, a, size=None, random_state=None):
|
|
return random_state.standard_gamma(a, size)
|
|
|
|
def _pdf(self, x, a):
|
|
# gamma.pdf(x, a) = x**(a-1) * exp(-x) / gamma(a)
|
|
return np.exp(self._logpdf(x, a))
|
|
|
|
def _logpdf(self, x, a):
|
|
return sc.xlogy(a-1.0, x) - x - sc.gammaln(a)
|
|
|
|
def _cdf(self, x, a):
|
|
return sc.gammainc(a, x)
|
|
|
|
def _sf(self, x, a):
|
|
return sc.gammaincc(a, x)
|
|
|
|
def _ppf(self, q, a):
|
|
return sc.gammaincinv(a, q)
|
|
|
|
def _isf(self, q, a):
|
|
return sc.gammainccinv(a, q)
|
|
|
|
def _stats(self, a):
|
|
return a, a, 2.0/np.sqrt(a), 6.0/a
|
|
|
|
def _entropy(self, a):
|
|
|
|
def regular_formula(a):
|
|
return sc.psi(a) * (1-a) + a + sc.gammaln(a)
|
|
|
|
def asymptotic_formula(a):
|
|
# plug in above formula the expansions:
|
|
# psi(a) ~ ln(a) - 1/2a - 1/12a^2 + 1/120a^4
|
|
# gammaln(a) ~ a * ln(a) - a - 1/2 * ln(a) + 1/2 ln(2 * pi) +
|
|
# 1/12a - 1/360a^3
|
|
return (0.5 * (1. + np.log(2*np.pi) + np.log(a)) - 1/(3 * a)
|
|
- (a**-2.)/12 - (a**-3.)/90 + (a**-4.)/120)
|
|
|
|
return _lazywhere(a < 250, (a, ), regular_formula,
|
|
f2=asymptotic_formula)
|
|
|
|
def _fitstart(self, data):
|
|
# The skewness of the gamma distribution is `2 / np.sqrt(a)`.
|
|
# We invert that to estimate the shape `a` using the skewness
|
|
# of the data. The formula is regularized with 1e-8 in the
|
|
# denominator to allow for degenerate data where the skewness
|
|
# is close to 0.
|
|
if isinstance(data, CensoredData):
|
|
data = data._uncensor()
|
|
sk = _skew(data)
|
|
a = 4 / (1e-8 + sk**2)
|
|
return super()._fitstart(data, args=(a,))
|
|
|
|
@extend_notes_in_docstring(rv_continuous, notes="""\
|
|
When the location is fixed by using the argument `floc`
|
|
and `method='MLE'`, this
|
|
function uses explicit formulas or solves a simpler numerical
|
|
problem than the full ML optimization problem. So in that case,
|
|
the `optimizer`, `loc` and `scale` arguments are ignored.
|
|
\n\n""")
|
|
def fit(self, data, *args, **kwds):
|
|
floc = kwds.get('floc', None)
|
|
method = kwds.get('method', 'mle')
|
|
|
|
if (isinstance(data, CensoredData) or
|
|
floc is None and method.lower() != 'mm'):
|
|
# loc is not fixed or we're not doing standard MLE.
|
|
# Use the default fit method.
|
|
return super().fit(data, *args, **kwds)
|
|
|
|
# We already have this value, so just pop it from kwds.
|
|
kwds.pop('floc', None)
|
|
|
|
f0 = _get_fixed_fit_value(kwds, ['f0', 'fa', 'fix_a'])
|
|
fscale = kwds.pop('fscale', None)
|
|
|
|
_remove_optimizer_parameters(kwds)
|
|
|
|
if f0 is not None and floc is not None and fscale is not None:
|
|
# This check is for consistency with `rv_continuous.fit`.
|
|
# Without this check, this function would just return the
|
|
# parameters that were given.
|
|
raise ValueError("All parameters fixed. There is nothing to "
|
|
"optimize.")
|
|
|
|
# Fixed location is handled by shifting the data.
|
|
data = np.asarray(data)
|
|
|
|
if not np.isfinite(data).all():
|
|
raise ValueError("The data contains non-finite values.")
|
|
|
|
# Use explicit formulas for mm (gh-19884)
|
|
if method.lower() == 'mm':
|
|
m1 = np.mean(data)
|
|
m2 = np.var(data)
|
|
m3 = np.mean((data - m1) ** 3)
|
|
a, loc, scale = f0, floc, fscale
|
|
# Three unknowns
|
|
if a is None and loc is None and scale is None:
|
|
scale = m3 / (2 * m2)
|
|
# Two unknowns
|
|
if loc is None and scale is None:
|
|
scale = np.sqrt(m2 / a)
|
|
if a is None and scale is None:
|
|
scale = m2 / (m1 - loc)
|
|
if a is None and loc is None:
|
|
a = m2 / (scale ** 2)
|
|
# One unknown
|
|
if a is None:
|
|
a = (m1 - loc) / scale
|
|
if loc is None:
|
|
loc = m1 - a * scale
|
|
if scale is None:
|
|
scale = (m1 - loc) / a
|
|
return a, loc, scale
|
|
|
|
# Special case: loc is fixed.
|
|
|
|
# NB: data == loc is ok if a >= 1; the below check is more strict.
|
|
if np.any(data <= floc):
|
|
raise FitDataError("gamma", lower=floc, upper=np.inf)
|
|
|
|
if floc != 0:
|
|
# Don't do the subtraction in-place, because `data` might be a
|
|
# view of the input array.
|
|
data = data - floc
|
|
xbar = data.mean()
|
|
|
|
# Three cases to handle:
|
|
# * shape and scale both free
|
|
# * shape fixed, scale free
|
|
# * shape free, scale fixed
|
|
|
|
if fscale is None:
|
|
# scale is free
|
|
if f0 is not None:
|
|
# shape is fixed
|
|
a = f0
|
|
else:
|
|
# shape and scale are both free.
|
|
# The MLE for the shape parameter `a` is the solution to:
|
|
# np.log(a) - sc.digamma(a) - np.log(xbar) +
|
|
# np.log(data).mean() = 0
|
|
s = np.log(xbar) - np.log(data).mean()
|
|
aest = (3-s + np.sqrt((s-3)**2 + 24*s)) / (12*s)
|
|
xa = aest*(1-0.4)
|
|
xb = aest*(1+0.4)
|
|
a = optimize.brentq(lambda a: np.log(a) - sc.digamma(a) - s,
|
|
xa, xb, disp=0)
|
|
|
|
# The MLE for the scale parameter is just the data mean
|
|
# divided by the shape parameter.
|
|
scale = xbar / a
|
|
else:
|
|
# scale is fixed, shape is free
|
|
# The MLE for the shape parameter `a` is the solution to:
|
|
# sc.digamma(a) - np.log(data).mean() + np.log(fscale) = 0
|
|
c = np.log(data).mean() - np.log(fscale)
|
|
a = _digammainv(c)
|
|
scale = fscale
|
|
|
|
return a, floc, scale
|
|
|
|
|
|
gamma = gamma_gen(a=0.0, name='gamma')
|
|
|
|
|
|
class erlang_gen(gamma_gen):
|
|
"""An Erlang continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
gamma
|
|
|
|
Notes
|
|
-----
|
|
The Erlang distribution is a special case of the Gamma distribution, with
|
|
the shape parameter `a` an integer. Note that this restriction is not
|
|
enforced by `erlang`. It will, however, generate a warning the first time
|
|
a non-integer value is used for the shape parameter.
|
|
|
|
Refer to `gamma` for examples.
|
|
|
|
"""
|
|
|
|
def _argcheck(self, a):
|
|
allint = np.all(np.floor(a) == a)
|
|
if not allint:
|
|
# An Erlang distribution shouldn't really have a non-integer
|
|
# shape parameter, so warn the user.
|
|
message = ('The shape parameter of the erlang distribution '
|
|
f'has been given a non-integer value {a!r}.')
|
|
warnings.warn(message, RuntimeWarning, stacklevel=3)
|
|
return a > 0
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("a", True, (1, np.inf), (True, False))]
|
|
|
|
def _fitstart(self, data):
|
|
# Override gamma_gen_fitstart so that an integer initial value is
|
|
# used. (Also regularize the division, to avoid issues when
|
|
# _skew(data) is 0 or close to 0.)
|
|
if isinstance(data, CensoredData):
|
|
data = data._uncensor()
|
|
a = int(4.0 / (1e-8 + _skew(data)**2))
|
|
return super(gamma_gen, self)._fitstart(data, args=(a,))
|
|
|
|
# Trivial override of the fit method, so we can monkey-patch its
|
|
# docstring.
|
|
@extend_notes_in_docstring(rv_continuous, notes="""\
|
|
The Erlang distribution is generally defined to have integer values
|
|
for the shape parameter. This is not enforced by the `erlang` class.
|
|
When fitting the distribution, it will generally return a non-integer
|
|
value for the shape parameter. By using the keyword argument
|
|
`f0=<integer>`, the fit method can be constrained to fit the data to
|
|
a specific integer shape parameter.""")
|
|
def fit(self, data, *args, **kwds):
|
|
return super().fit(data, *args, **kwds)
|
|
|
|
|
|
erlang = erlang_gen(a=0.0, name='erlang')
|
|
|
|
|
|
class gengamma_gen(rv_continuous):
|
|
r"""A generalized gamma continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
gamma, invgamma, weibull_min
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `gengamma` is ([1]_):
|
|
|
|
.. math::
|
|
|
|
f(x, a, c) = \frac{|c| x^{c a-1} \exp(-x^c)}{\Gamma(a)}
|
|
|
|
for :math:`x \ge 0`, :math:`a > 0`, and :math:`c \ne 0`.
|
|
:math:`\Gamma` is the gamma function (`scipy.special.gamma`).
|
|
|
|
`gengamma` takes :math:`a` and :math:`c` as shape parameters.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
.. [1] E.W. Stacy, "A Generalization of the Gamma Distribution",
|
|
Annals of Mathematical Statistics, Vol 33(3), pp. 1187--1192.
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _argcheck(self, a, c):
|
|
return (a > 0) & (c != 0)
|
|
|
|
def _shape_info(self):
|
|
ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
|
|
ic = _ShapeInfo("c", False, (-np.inf, np.inf), (False, False))
|
|
return [ia, ic]
|
|
|
|
def _pdf(self, x, a, c):
|
|
return np.exp(self._logpdf(x, a, c))
|
|
|
|
def _logpdf(self, x, a, c):
|
|
return _lazywhere((x != 0) | (c > 0), (x, c),
|
|
lambda x, c: (np.log(abs(c)) + sc.xlogy(c*a - 1, x)
|
|
- x**c - sc.gammaln(a)),
|
|
fillvalue=-np.inf)
|
|
|
|
def _cdf(self, x, a, c):
|
|
xc = x**c
|
|
val1 = sc.gammainc(a, xc)
|
|
val2 = sc.gammaincc(a, xc)
|
|
return np.where(c > 0, val1, val2)
|
|
|
|
def _rvs(self, a, c, size=None, random_state=None):
|
|
r = random_state.standard_gamma(a, size=size)
|
|
return r**(1./c)
|
|
|
|
def _sf(self, x, a, c):
|
|
xc = x**c
|
|
val1 = sc.gammainc(a, xc)
|
|
val2 = sc.gammaincc(a, xc)
|
|
return np.where(c > 0, val2, val1)
|
|
|
|
def _ppf(self, q, a, c):
|
|
val1 = sc.gammaincinv(a, q)
|
|
val2 = sc.gammainccinv(a, q)
|
|
return np.where(c > 0, val1, val2)**(1.0/c)
|
|
|
|
def _isf(self, q, a, c):
|
|
val1 = sc.gammaincinv(a, q)
|
|
val2 = sc.gammainccinv(a, q)
|
|
return np.where(c > 0, val2, val1)**(1.0/c)
|
|
|
|
def _munp(self, n, a, c):
|
|
# Pochhammer symbol: sc.pocha,n) = gamma(a+n)/gamma(a)
|
|
return sc.poch(a, n*1.0/c)
|
|
|
|
def _entropy(self, a, c):
|
|
def regular(a, c):
|
|
val = sc.psi(a)
|
|
A = a * (1 - val) + val / c
|
|
B = sc.gammaln(a) - np.log(abs(c))
|
|
h = A + B
|
|
return h
|
|
|
|
def asymptotic(a, c):
|
|
# using asymptotic expansions for gammaln and psi (see gh-18093)
|
|
return (norm._entropy() - np.log(a)/2
|
|
- np.log(np.abs(c)) + (a**-1.)/6 - (a**-3.)/90
|
|
+ (np.log(a) - (a**-1.)/2 - (a**-2.)/12 + (a**-4.)/120)/c)
|
|
|
|
h = _lazywhere(a >= 2e2, (a, c), f=asymptotic, f2=regular)
|
|
return h
|
|
|
|
|
|
gengamma = gengamma_gen(a=0.0, name='gengamma')
|
|
|
|
|
|
class genhalflogistic_gen(rv_continuous):
|
|
r"""A generalized half-logistic continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `genhalflogistic` is:
|
|
|
|
.. math::
|
|
|
|
f(x, c) = \frac{2 (1 - c x)^{1/(c-1)}}{[1 + (1 - c x)^{1/c}]^2}
|
|
|
|
for :math:`0 \le x \le 1/c`, and :math:`c > 0`.
|
|
|
|
`genhalflogistic` takes ``c`` as a shape parameter for :math:`c`.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
|
|
|
|
def _get_support(self, c):
|
|
return self.a, 1.0/c
|
|
|
|
def _pdf(self, x, c):
|
|
# genhalflogistic.pdf(x, c) =
|
|
# 2 * (1-c*x)**(1/c-1) / (1+(1-c*x)**(1/c))**2
|
|
limit = 1.0/c
|
|
tmp = np.asarray(1-c*x)
|
|
tmp0 = tmp**(limit-1)
|
|
tmp2 = tmp0*tmp
|
|
return 2*tmp0 / (1+tmp2)**2
|
|
|
|
def _cdf(self, x, c):
|
|
limit = 1.0/c
|
|
tmp = np.asarray(1-c*x)
|
|
tmp2 = tmp**(limit)
|
|
return (1.0-tmp2) / (1+tmp2)
|
|
|
|
def _ppf(self, q, c):
|
|
return 1.0/c*(1-((1.0-q)/(1.0+q))**c)
|
|
|
|
def _entropy(self, c):
|
|
return 2 - (2*c+1)*np.log(2)
|
|
|
|
|
|
genhalflogistic = genhalflogistic_gen(a=0.0, name='genhalflogistic')
|
|
|
|
|
|
class genhyperbolic_gen(rv_continuous):
|
|
r"""A generalized hyperbolic continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
t, norminvgauss, geninvgauss, laplace, cauchy
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `genhyperbolic` is:
|
|
|
|
.. math::
|
|
|
|
f(x, p, a, b) =
|
|
\frac{(a^2 - b^2)^{p/2}}
|
|
{\sqrt{2\pi}a^{p-1/2}
|
|
K_p\Big(\sqrt{a^2 - b^2}\Big)}
|
|
e^{bx} \times \frac{K_{p - 1/2}
|
|
(a \sqrt{1 + x^2})}
|
|
{(\sqrt{1 + x^2})^{1/2 - p}}
|
|
|
|
for :math:`x, p \in ( - \infty; \infty)`,
|
|
:math:`|b| < a` if :math:`p \ge 0`,
|
|
:math:`|b| \le a` if :math:`p < 0`.
|
|
:math:`K_{p}(.)` denotes the modified Bessel function of the second
|
|
kind and order :math:`p` (`scipy.special.kv`)
|
|
|
|
`genhyperbolic` takes ``p`` as a tail parameter,
|
|
``a`` as a shape parameter,
|
|
``b`` as a skewness parameter.
|
|
|
|
%(after_notes)s
|
|
|
|
The original parameterization of the Generalized Hyperbolic Distribution
|
|
is found in [1]_ as follows
|
|
|
|
.. math::
|
|
|
|
f(x, \lambda, \alpha, \beta, \delta, \mu) =
|
|
\frac{(\gamma/\delta)^\lambda}{\sqrt{2\pi}K_\lambda(\delta \gamma)}
|
|
e^{\beta (x - \mu)} \times \frac{K_{\lambda - 1/2}
|
|
(\alpha \sqrt{\delta^2 + (x - \mu)^2})}
|
|
{(\sqrt{\delta^2 + (x - \mu)^2} / \alpha)^{1/2 - \lambda}}
|
|
|
|
for :math:`x \in ( - \infty; \infty)`,
|
|
:math:`\gamma := \sqrt{\alpha^2 - \beta^2}`,
|
|
:math:`\lambda, \mu \in ( - \infty; \infty)`,
|
|
:math:`\delta \ge 0, |\beta| < \alpha` if :math:`\lambda \ge 0`,
|
|
:math:`\delta > 0, |\beta| \le \alpha` if :math:`\lambda < 0`.
|
|
|
|
The location-scale-based parameterization implemented in
|
|
SciPy is based on [2]_, where :math:`a = \alpha\delta`,
|
|
:math:`b = \beta\delta`, :math:`p = \lambda`,
|
|
:math:`scale=\delta` and :math:`loc=\mu`
|
|
|
|
Moments are implemented based on [3]_ and [4]_.
|
|
|
|
For the distributions that are a special case such as Student's t,
|
|
it is not recommended to rely on the implementation of genhyperbolic.
|
|
To avoid potential numerical problems and for performance reasons,
|
|
the methods of the specific distributions should be used.
|
|
|
|
References
|
|
----------
|
|
.. [1] O. Barndorff-Nielsen, "Hyperbolic Distributions and Distributions
|
|
on Hyperbolae", Scandinavian Journal of Statistics, Vol. 5(3),
|
|
pp. 151-157, 1978. https://www.jstor.org/stable/4615705
|
|
|
|
.. [2] Eberlein E., Prause K. (2002) The Generalized Hyperbolic Model:
|
|
Financial Derivatives and Risk Measures. In: Geman H., Madan D.,
|
|
Pliska S.R., Vorst T. (eds) Mathematical Finance - Bachelier
|
|
Congress 2000. Springer Finance. Springer, Berlin, Heidelberg.
|
|
:doi:`10.1007/978-3-662-12429-1_12`
|
|
|
|
.. [3] Scott, David J, Würtz, Diethelm, Dong, Christine and Tran,
|
|
Thanh Tam, (2009), Moments of the generalized hyperbolic
|
|
distribution, MPRA Paper, University Library of Munich, Germany,
|
|
https://EconPapers.repec.org/RePEc:pra:mprapa:19081.
|
|
|
|
.. [4] E. Eberlein and E. A. von Hammerstein. Generalized hyperbolic
|
|
and inverse Gaussian distributions: Limiting cases and approximation
|
|
of processes. FDM Preprint 80, April 2003. University of Freiburg.
|
|
https://freidok.uni-freiburg.de/fedora/objects/freidok:7974/datastreams/FILE1/content
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
|
|
def _argcheck(self, p, a, b):
|
|
return (np.logical_and(np.abs(b) < a, p >= 0)
|
|
| np.logical_and(np.abs(b) <= a, p < 0))
|
|
|
|
def _shape_info(self):
|
|
ip = _ShapeInfo("p", False, (-np.inf, np.inf), (False, False))
|
|
ia = _ShapeInfo("a", False, (0, np.inf), (True, False))
|
|
ib = _ShapeInfo("b", False, (-np.inf, np.inf), (False, False))
|
|
return [ip, ia, ib]
|
|
|
|
def _fitstart(self, data):
|
|
# Arbitrary, but the default p = a = b = 1 is not valid; the
|
|
# distribution requires |b| < a if p >= 0.
|
|
return super()._fitstart(data, args=(1, 1, 0.5))
|
|
|
|
def _logpdf(self, x, p, a, b):
|
|
# kve instead of kv works better for large values of p
|
|
# and smaller values of sqrt(a^2 - b^2)
|
|
@np.vectorize
|
|
def _logpdf_single(x, p, a, b):
|
|
return _stats.genhyperbolic_logpdf(x, p, a, b)
|
|
|
|
return _logpdf_single(x, p, a, b)
|
|
|
|
def _pdf(self, x, p, a, b):
|
|
# kve instead of kv works better for large values of p
|
|
# and smaller values of sqrt(a^2 - b^2)
|
|
@np.vectorize
|
|
def _pdf_single(x, p, a, b):
|
|
return _stats.genhyperbolic_pdf(x, p, a, b)
|
|
|
|
return _pdf_single(x, p, a, b)
|
|
|
|
# np.vectorize isn't currently designed to be used as a decorator,
|
|
# so use a lambda instead. This allows us to decorate the function
|
|
# with `np.vectorize` and still provide the `otypes` parameter.
|
|
# The first argument to `vectorize` is `func.__get__(object)` for
|
|
# compatibility with Python 3.9. In Python 3.10, this can be
|
|
# simplified to just `func`.
|
|
@lambda func: np.vectorize(func.__get__(object), otypes=[np.float64])
|
|
@staticmethod
|
|
def _integrate_pdf(x0, x1, p, a, b):
|
|
"""
|
|
Integrate the pdf of the genhyberbolic distribution from x0 to x1.
|
|
This is a private function used by _cdf() and _sf() only; either x0
|
|
will be -inf or x1 will be inf.
|
|
"""
|
|
user_data = np.array([p, a, b], float).ctypes.data_as(ctypes.c_void_p)
|
|
llc = LowLevelCallable.from_cython(_stats, '_genhyperbolic_pdf',
|
|
user_data)
|
|
d = np.sqrt((a + b)*(a - b))
|
|
mean = b/d * sc.kv(p + 1, d) / sc.kv(p, d)
|
|
epsrel = 1e-10
|
|
epsabs = 0
|
|
if x0 < mean < x1:
|
|
# If the interval includes the mean, integrate over the two
|
|
# intervals [x0, mean] and [mean, x1] and add. If we try to do
|
|
# the integral in one call of quad and the non-infinite endpoint
|
|
# is far in the tail, quad might return an incorrect result
|
|
# because it does not "see" the peak of the PDF.
|
|
intgrl = (integrate.quad(llc, x0, mean,
|
|
epsrel=epsrel, epsabs=epsabs)[0]
|
|
+ integrate.quad(llc, mean, x1,
|
|
epsrel=epsrel, epsabs=epsabs)[0])
|
|
else:
|
|
intgrl = integrate.quad(llc, x0, x1,
|
|
epsrel=epsrel, epsabs=epsabs)[0]
|
|
if np.isnan(intgrl):
|
|
msg = ("Infinite values encountered in scipy.special.kve. "
|
|
"Values replaced by NaN to avoid incorrect results.")
|
|
warnings.warn(msg, RuntimeWarning, stacklevel=3)
|
|
return max(0.0, min(1.0, intgrl))
|
|
|
|
def _cdf(self, x, p, a, b):
|
|
return self._integrate_pdf(-np.inf, x, p, a, b)
|
|
|
|
def _sf(self, x, p, a, b):
|
|
return self._integrate_pdf(x, np.inf, p, a, b)
|
|
|
|
def _rvs(self, p, a, b, size=None, random_state=None):
|
|
# note: X = b * V + sqrt(V) * X has a
|
|
# generalized hyperbolic distribution
|
|
# if X is standard normal and V is
|
|
# geninvgauss(p = p, b = t2, loc = loc, scale = t3)
|
|
t1 = np.float_power(a, 2) - np.float_power(b, 2)
|
|
# b in the GIG
|
|
t2 = np.float_power(t1, 0.5)
|
|
# scale in the GIG
|
|
t3 = np.float_power(t1, - 0.5)
|
|
gig = geninvgauss.rvs(
|
|
p=p,
|
|
b=t2,
|
|
scale=t3,
|
|
size=size,
|
|
random_state=random_state
|
|
)
|
|
normst = norm.rvs(size=size, random_state=random_state)
|
|
|
|
return b * gig + np.sqrt(gig) * normst
|
|
|
|
def _stats(self, p, a, b):
|
|
# https://mpra.ub.uni-muenchen.de/19081/1/MPRA_paper_19081.pdf
|
|
# https://freidok.uni-freiburg.de/fedora/objects/freidok:7974/datastreams/FILE1/content
|
|
# standardized moments
|
|
p, a, b = np.broadcast_arrays(p, a, b)
|
|
t1 = np.float_power(a, 2) - np.float_power(b, 2)
|
|
t1 = np.float_power(t1, 0.5)
|
|
t2 = np.float_power(1, 2) * np.float_power(t1, - 1)
|
|
integers = np.linspace(0, 4, 5)
|
|
# make integers perpendicular to existing dimensions
|
|
integers = integers.reshape(integers.shape + (1,) * p.ndim)
|
|
b0, b1, b2, b3, b4 = sc.kv(p + integers, t1)
|
|
r1, r2, r3, r4 = (b / b0 for b in (b1, b2, b3, b4))
|
|
|
|
m = b * t2 * r1
|
|
v = (
|
|
t2 * r1 + np.float_power(b, 2) * np.float_power(t2, 2) *
|
|
(r2 - np.float_power(r1, 2))
|
|
)
|
|
m3e = (
|
|
np.float_power(b, 3) * np.float_power(t2, 3) *
|
|
(r3 - 3 * b2 * b1 * np.float_power(b0, -2) +
|
|
2 * np.float_power(r1, 3)) +
|
|
3 * b * np.float_power(t2, 2) *
|
|
(r2 - np.float_power(r1, 2))
|
|
)
|
|
s = m3e * np.float_power(v, - 3 / 2)
|
|
m4e = (
|
|
np.float_power(b, 4) * np.float_power(t2, 4) *
|
|
(r4 - 4 * b3 * b1 * np.float_power(b0, - 2) +
|
|
6 * b2 * np.float_power(b1, 2) * np.float_power(b0, - 3) -
|
|
3 * np.float_power(r1, 4)) +
|
|
np.float_power(b, 2) * np.float_power(t2, 3) *
|
|
(6 * r3 - 12 * b2 * b1 * np.float_power(b0, - 2) +
|
|
6 * np.float_power(r1, 3)) +
|
|
3 * np.float_power(t2, 2) * r2
|
|
)
|
|
k = m4e * np.float_power(v, -2) - 3
|
|
|
|
return m, v, s, k
|
|
|
|
|
|
genhyperbolic = genhyperbolic_gen(name='genhyperbolic')
|
|
|
|
|
|
class gompertz_gen(rv_continuous):
|
|
r"""A Gompertz (or truncated Gumbel) continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `gompertz` is:
|
|
|
|
.. math::
|
|
|
|
f(x, c) = c \exp(x) \exp(-c (e^x-1))
|
|
|
|
for :math:`x \ge 0`, :math:`c > 0`.
|
|
|
|
`gompertz` takes ``c`` as a shape parameter for :math:`c`.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
|
|
|
|
def _pdf(self, x, c):
|
|
# gompertz.pdf(x, c) = c * exp(x) * exp(-c*(exp(x)-1))
|
|
return np.exp(self._logpdf(x, c))
|
|
|
|
def _logpdf(self, x, c):
|
|
return np.log(c) + x - c * sc.expm1(x)
|
|
|
|
def _cdf(self, x, c):
|
|
return -sc.expm1(-c * sc.expm1(x))
|
|
|
|
def _ppf(self, q, c):
|
|
return sc.log1p(-1.0 / c * sc.log1p(-q))
|
|
|
|
def _sf(self, x, c):
|
|
return np.exp(-c * sc.expm1(x))
|
|
|
|
def _isf(self, p, c):
|
|
return sc.log1p(-np.log(p)/c)
|
|
|
|
def _entropy(self, c):
|
|
return 1.0 - np.log(c) - sc._ufuncs._scaled_exp1(c)/c
|
|
|
|
|
|
gompertz = gompertz_gen(a=0.0, name='gompertz')
|
|
|
|
|
|
def _average_with_log_weights(x, logweights):
|
|
x = np.asarray(x)
|
|
logweights = np.asarray(logweights)
|
|
maxlogw = logweights.max()
|
|
weights = np.exp(logweights - maxlogw)
|
|
return np.average(x, weights=weights)
|
|
|
|
|
|
class gumbel_r_gen(rv_continuous):
|
|
r"""A right-skewed Gumbel continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
gumbel_l, gompertz, genextreme
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `gumbel_r` is:
|
|
|
|
.. math::
|
|
|
|
f(x) = \exp(-(x + e^{-x}))
|
|
|
|
The Gumbel distribution is sometimes referred to as a type I Fisher-Tippett
|
|
distribution. It is also related to the extreme value distribution,
|
|
log-Weibull and Gompertz distributions.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return []
|
|
|
|
def _pdf(self, x):
|
|
# gumbel_r.pdf(x) = exp(-(x + exp(-x)))
|
|
return np.exp(self._logpdf(x))
|
|
|
|
def _logpdf(self, x):
|
|
return -x - np.exp(-x)
|
|
|
|
def _cdf(self, x):
|
|
return np.exp(-np.exp(-x))
|
|
|
|
def _logcdf(self, x):
|
|
return -np.exp(-x)
|
|
|
|
def _ppf(self, q):
|
|
return -np.log(-np.log(q))
|
|
|
|
def _sf(self, x):
|
|
return -sc.expm1(-np.exp(-x))
|
|
|
|
def _isf(self, p):
|
|
return -np.log(-np.log1p(-p))
|
|
|
|
def _stats(self):
|
|
return _EULER, np.pi*np.pi/6.0, 12*np.sqrt(6)/np.pi**3 * _ZETA3, 12.0/5
|
|
|
|
def _entropy(self):
|
|
# https://en.wikipedia.org/wiki/Gumbel_distribution
|
|
return _EULER + 1.
|
|
|
|
@_call_super_mom
|
|
@inherit_docstring_from(rv_continuous)
|
|
def fit(self, data, *args, **kwds):
|
|
data, floc, fscale = _check_fit_input_parameters(self, data,
|
|
args, kwds)
|
|
|
|
# By the method of maximum likelihood, the estimators of the
|
|
# location and scale are the roots of the equations defined in
|
|
# `func` and the value of the expression for `loc` that follows.
|
|
# The first `func` is a first order derivative of the log-likelihood
|
|
# equation and the second is from Source: Statistical Distributions,
|
|
# 3rd Edition. Evans, Hastings, and Peacock (2000), Page 101.
|
|
|
|
def get_loc_from_scale(scale):
|
|
return -scale * (sc.logsumexp(-data / scale) - np.log(len(data)))
|
|
|
|
if fscale is not None:
|
|
# if the scale is fixed, the location can be analytically
|
|
# determined.
|
|
scale = fscale
|
|
loc = get_loc_from_scale(scale)
|
|
else:
|
|
# A different function is solved depending on whether the location
|
|
# is fixed.
|
|
if floc is not None:
|
|
loc = floc
|
|
|
|
# equation to use if the location is fixed.
|
|
# note that one cannot use the equation in Evans, Hastings,
|
|
# and Peacock (2000) (since it assumes that the derivative
|
|
# w.r.t. the log-likelihood is zero). however, it is easy to
|
|
# derive the MLE condition directly if loc is fixed
|
|
def func(scale):
|
|
term1 = (loc - data) * np.exp((loc - data) / scale) + data
|
|
term2 = len(data) * (loc + scale)
|
|
return term1.sum() - term2
|
|
else:
|
|
|
|
# equation to use if both location and scale are free
|
|
def func(scale):
|
|
sdata = -data / scale
|
|
wavg = _average_with_log_weights(data, logweights=sdata)
|
|
return data.mean() - wavg - scale
|
|
|
|
# set brackets for `root_scalar` to use when optimizing over the
|
|
# scale such that a root is likely between them. Use user supplied
|
|
# guess or default 1.
|
|
brack_start = kwds.get('scale', 1)
|
|
lbrack, rbrack = brack_start / 2, brack_start * 2
|
|
|
|
# if a root is not between the brackets, iteratively expand them
|
|
# until they include a sign change, checking after each bracket is
|
|
# modified.
|
|
def interval_contains_root(lbrack, rbrack):
|
|
# return true if the signs disagree.
|
|
return (np.sign(func(lbrack)) !=
|
|
np.sign(func(rbrack)))
|
|
while (not interval_contains_root(lbrack, rbrack)
|
|
and (lbrack > 0 or rbrack < np.inf)):
|
|
lbrack /= 2
|
|
rbrack *= 2
|
|
|
|
res = optimize.root_scalar(func, bracket=(lbrack, rbrack),
|
|
rtol=1e-14, xtol=1e-14)
|
|
scale = res.root
|
|
loc = floc if floc is not None else get_loc_from_scale(scale)
|
|
return loc, scale
|
|
|
|
|
|
gumbel_r = gumbel_r_gen(name='gumbel_r')
|
|
|
|
|
|
class gumbel_l_gen(rv_continuous):
|
|
r"""A left-skewed Gumbel continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
gumbel_r, gompertz, genextreme
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `gumbel_l` is:
|
|
|
|
.. math::
|
|
|
|
f(x) = \exp(x - e^x)
|
|
|
|
The Gumbel distribution is sometimes referred to as a type I Fisher-Tippett
|
|
distribution. It is also related to the extreme value distribution,
|
|
log-Weibull and Gompertz distributions.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
|
|
def _shape_info(self):
|
|
return []
|
|
|
|
def _pdf(self, x):
|
|
# gumbel_l.pdf(x) = exp(x - exp(x))
|
|
return np.exp(self._logpdf(x))
|
|
|
|
def _logpdf(self, x):
|
|
return x - np.exp(x)
|
|
|
|
def _cdf(self, x):
|
|
return -sc.expm1(-np.exp(x))
|
|
|
|
def _ppf(self, q):
|
|
return np.log(-sc.log1p(-q))
|
|
|
|
def _logsf(self, x):
|
|
return -np.exp(x)
|
|
|
|
def _sf(self, x):
|
|
return np.exp(-np.exp(x))
|
|
|
|
def _isf(self, x):
|
|
return np.log(-np.log(x))
|
|
|
|
def _stats(self):
|
|
return -_EULER, np.pi*np.pi/6.0, \
|
|
-12*np.sqrt(6)/np.pi**3 * _ZETA3, 12.0/5
|
|
|
|
def _entropy(self):
|
|
return _EULER + 1.
|
|
|
|
@_call_super_mom
|
|
@inherit_docstring_from(rv_continuous)
|
|
def fit(self, data, *args, **kwds):
|
|
# The fit method of `gumbel_r` can be used for this distribution with
|
|
# small modifications. The process to do this is
|
|
# 1. pass the sign negated data into `gumbel_r.fit`
|
|
# - if the location is fixed, it should also be negated.
|
|
# 2. negate the sign of the resulting location, leaving the scale
|
|
# unmodified.
|
|
# `gumbel_r.fit` holds necessary input checks.
|
|
|
|
if kwds.get('floc') is not None:
|
|
kwds['floc'] = -kwds['floc']
|
|
loc_r, scale_r, = gumbel_r.fit(-np.asarray(data), *args, **kwds)
|
|
return -loc_r, scale_r
|
|
|
|
|
|
gumbel_l = gumbel_l_gen(name='gumbel_l')
|
|
|
|
|
|
class halfcauchy_gen(rv_continuous):
|
|
r"""A Half-Cauchy continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `halfcauchy` is:
|
|
|
|
.. math::
|
|
|
|
f(x) = \frac{2}{\pi (1 + x^2)}
|
|
|
|
for :math:`x \ge 0`.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return []
|
|
|
|
def _pdf(self, x):
|
|
# halfcauchy.pdf(x) = 2 / (pi * (1 + x**2))
|
|
return 2.0/np.pi/(1.0+x*x)
|
|
|
|
def _logpdf(self, x):
|
|
return np.log(2.0/np.pi) - sc.log1p(x*x)
|
|
|
|
def _cdf(self, x):
|
|
return 2.0/np.pi*np.arctan(x)
|
|
|
|
def _ppf(self, q):
|
|
return np.tan(np.pi/2*q)
|
|
|
|
def _sf(self, x):
|
|
return 2.0/np.pi * np.arctan2(1, x)
|
|
|
|
def _isf(self, p):
|
|
return 1.0/np.tan(np.pi*p/2)
|
|
|
|
def _stats(self):
|
|
return np.inf, np.inf, np.nan, np.nan
|
|
|
|
def _entropy(self):
|
|
return np.log(2*np.pi)
|
|
|
|
@_call_super_mom
|
|
@inherit_docstring_from(rv_continuous)
|
|
def fit(self, data, *args, **kwds):
|
|
if kwds.pop('superfit', False):
|
|
return super().fit(data, *args, **kwds)
|
|
|
|
data, floc, fscale = _check_fit_input_parameters(self, data,
|
|
args, kwds)
|
|
|
|
# location is independent from the scale
|
|
data_min = np.min(data)
|
|
if floc is not None:
|
|
if data_min < floc:
|
|
# There are values that are less than the specified loc.
|
|
raise FitDataError("halfcauchy", lower=floc, upper=np.inf)
|
|
loc = floc
|
|
else:
|
|
# if not provided, location MLE is the minimal data point
|
|
loc = data_min
|
|
|
|
# find scale
|
|
def find_scale(loc, data):
|
|
shifted_data = data - loc
|
|
n = data.size
|
|
shifted_data_squared = np.square(shifted_data)
|
|
|
|
def fun_to_solve(scale):
|
|
denominator = scale**2 + shifted_data_squared
|
|
return 2 * np.sum(shifted_data_squared/denominator) - n
|
|
|
|
small = np.finfo(1.0).tiny**0.5 # avoid underflow
|
|
res = root_scalar(fun_to_solve, bracket=(small, np.max(shifted_data)))
|
|
return res.root
|
|
|
|
if fscale is not None:
|
|
scale = fscale
|
|
else:
|
|
scale = find_scale(loc, data)
|
|
|
|
return loc, scale
|
|
|
|
|
|
halfcauchy = halfcauchy_gen(a=0.0, name='halfcauchy')
|
|
|
|
|
|
class halflogistic_gen(rv_continuous):
|
|
r"""A half-logistic continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `halflogistic` is:
|
|
|
|
.. math::
|
|
|
|
f(x) = \frac{ 2 e^{-x} }{ (1+e^{-x})^2 }
|
|
= \frac{1}{2} \text{sech}(x/2)^2
|
|
|
|
for :math:`x \ge 0`.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
.. [1] Asgharzadeh et al (2011). "Comparisons of Methods of Estimation for the
|
|
Half-Logistic Distribution". Selcuk J. Appl. Math. 93-108.
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return []
|
|
|
|
def _pdf(self, x):
|
|
# halflogistic.pdf(x) = 2 * exp(-x) / (1+exp(-x))**2
|
|
# = 1/2 * sech(x/2)**2
|
|
return np.exp(self._logpdf(x))
|
|
|
|
def _logpdf(self, x):
|
|
return np.log(2) - x - 2. * sc.log1p(np.exp(-x))
|
|
|
|
def _cdf(self, x):
|
|
return np.tanh(x/2.0)
|
|
|
|
def _ppf(self, q):
|
|
return 2*np.arctanh(q)
|
|
|
|
def _sf(self, x):
|
|
return 2 * sc.expit(-x)
|
|
|
|
def _isf(self, q):
|
|
return _lazywhere(q < 0.5, (q, ),
|
|
lambda q: -sc.logit(0.5 * q),
|
|
f2=lambda q: 2*np.arctanh(1 - q))
|
|
|
|
def _munp(self, n):
|
|
if n == 1:
|
|
return 2*np.log(2)
|
|
if n == 2:
|
|
return np.pi*np.pi/3.0
|
|
if n == 3:
|
|
return 9*_ZETA3
|
|
if n == 4:
|
|
return 7*np.pi**4 / 15.0
|
|
return 2*(1-pow(2.0, 1-n))*sc.gamma(n+1)*sc.zeta(n, 1)
|
|
|
|
def _entropy(self):
|
|
return 2-np.log(2)
|
|
|
|
@_call_super_mom
|
|
@inherit_docstring_from(rv_continuous)
|
|
def fit(self, data, *args, **kwds):
|
|
if kwds.pop('superfit', False):
|
|
return super().fit(data, *args, **kwds)
|
|
|
|
data, floc, fscale = _check_fit_input_parameters(self, data,
|
|
args, kwds)
|
|
|
|
def find_scale(data, loc):
|
|
# scale is solution to a fix point problem ([1] 2.6)
|
|
# use approximate MLE as starting point ([1] 3.1)
|
|
n_observations = data.shape[0]
|
|
sorted_data = np.sort(data, axis=0)
|
|
p = np.arange(1, n_observations + 1)/(n_observations + 1)
|
|
q = 1 - p
|
|
pp1 = 1 + p
|
|
alpha = p - 0.5 * q * pp1 * np.log(pp1 / q)
|
|
beta = 0.5 * q * pp1
|
|
sorted_data = sorted_data - loc
|
|
B = 2 * np.sum(alpha[1:] * sorted_data[1:])
|
|
C = 2 * np.sum(beta[1:] * sorted_data[1:]**2)
|
|
# starting guess
|
|
scale = ((B + np.sqrt(B**2 + 8 * n_observations * C))
|
|
/(4 * n_observations))
|
|
|
|
# relative tolerance of fix point iterator
|
|
rtol = 1e-8
|
|
relative_residual = 1
|
|
shifted_mean = sorted_data.mean() # y_mean - y_min
|
|
|
|
# find fix point by repeated application of eq. (2.6)
|
|
# simplify as
|
|
# exp(-x) / (1 + exp(-x)) = 1 / (1 + exp(x))
|
|
# = expit(-x))
|
|
while relative_residual > rtol:
|
|
sum_term = sorted_data * sc.expit(-sorted_data/scale)
|
|
scale_new = shifted_mean - 2/n_observations * sum_term.sum()
|
|
relative_residual = abs((scale - scale_new)/scale)
|
|
scale = scale_new
|
|
return scale
|
|
|
|
# location is independent from the scale
|
|
data_min = np.min(data)
|
|
if floc is not None:
|
|
if data_min < floc:
|
|
# There are values that are less than the specified loc.
|
|
raise FitDataError("halflogistic", lower=floc, upper=np.inf)
|
|
loc = floc
|
|
else:
|
|
# if not provided, location MLE is the minimal data point
|
|
loc = data_min
|
|
|
|
# scale depends on location
|
|
scale = fscale if fscale is not None else find_scale(data, loc)
|
|
|
|
return loc, scale
|
|
|
|
|
|
halflogistic = halflogistic_gen(a=0.0, name='halflogistic')
|
|
|
|
|
|
class halfnorm_gen(rv_continuous):
|
|
r"""A half-normal continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `halfnorm` is:
|
|
|
|
.. math::
|
|
|
|
f(x) = \sqrt{2/\pi} \exp(-x^2 / 2)
|
|
|
|
for :math:`x >= 0`.
|
|
|
|
`halfnorm` is a special case of `chi` with ``df=1``.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return []
|
|
|
|
def _rvs(self, size=None, random_state=None):
|
|
return abs(random_state.standard_normal(size=size))
|
|
|
|
def _pdf(self, x):
|
|
# halfnorm.pdf(x) = sqrt(2/pi) * exp(-x**2/2)
|
|
return np.sqrt(2.0/np.pi)*np.exp(-x*x/2.0)
|
|
|
|
def _logpdf(self, x):
|
|
return 0.5 * np.log(2.0/np.pi) - x*x/2.0
|
|
|
|
def _cdf(self, x):
|
|
return sc.erf(x / np.sqrt(2))
|
|
|
|
def _ppf(self, q):
|
|
return _norm_ppf((1+q)/2.0)
|
|
|
|
def _sf(self, x):
|
|
return 2 * _norm_sf(x)
|
|
|
|
def _isf(self, p):
|
|
return _norm_isf(p/2)
|
|
|
|
def _stats(self):
|
|
return (np.sqrt(2.0/np.pi),
|
|
1-2.0/np.pi,
|
|
np.sqrt(2)*(4-np.pi)/(np.pi-2)**1.5,
|
|
8*(np.pi-3)/(np.pi-2)**2)
|
|
|
|
def _entropy(self):
|
|
return 0.5*np.log(np.pi/2.0)+0.5
|
|
|
|
@_call_super_mom
|
|
@inherit_docstring_from(rv_continuous)
|
|
def fit(self, data, *args, **kwds):
|
|
if kwds.pop('superfit', False):
|
|
return super().fit(data, *args, **kwds)
|
|
|
|
data, floc, fscale = _check_fit_input_parameters(self, data,
|
|
args, kwds)
|
|
|
|
data_min = np.min(data)
|
|
|
|
if floc is not None:
|
|
if data_min < floc:
|
|
# There are values that are less than the specified loc.
|
|
raise FitDataError("halfnorm", lower=floc, upper=np.inf)
|
|
loc = floc
|
|
else:
|
|
loc = data_min
|
|
|
|
if fscale is not None:
|
|
scale = fscale
|
|
else:
|
|
scale = stats.moment(data, order=2, center=loc)**0.5
|
|
|
|
return loc, scale
|
|
|
|
|
|
halfnorm = halfnorm_gen(a=0.0, name='halfnorm')
|
|
|
|
|
|
class hypsecant_gen(rv_continuous):
|
|
r"""A hyperbolic secant continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `hypsecant` is:
|
|
|
|
.. math::
|
|
|
|
f(x) = \frac{1}{\pi} \text{sech}(x)
|
|
|
|
for a real number :math:`x`.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return []
|
|
|
|
def _pdf(self, x):
|
|
# hypsecant.pdf(x) = 1/pi * sech(x)
|
|
return 1.0/(np.pi*np.cosh(x))
|
|
|
|
def _cdf(self, x):
|
|
return 2.0/np.pi*np.arctan(np.exp(x))
|
|
|
|
def _ppf(self, q):
|
|
return np.log(np.tan(np.pi*q/2.0))
|
|
|
|
def _sf(self, x):
|
|
return 2.0/np.pi*np.arctan(np.exp(-x))
|
|
|
|
def _isf(self, q):
|
|
return -np.log(np.tan(np.pi*q/2.0))
|
|
|
|
def _stats(self):
|
|
return 0, np.pi*np.pi/4, 0, 2
|
|
|
|
def _entropy(self):
|
|
return np.log(2*np.pi)
|
|
|
|
|
|
hypsecant = hypsecant_gen(name='hypsecant')
|
|
|
|
|
|
class gausshyper_gen(rv_continuous):
|
|
r"""A Gauss hypergeometric continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `gausshyper` is:
|
|
|
|
.. math::
|
|
|
|
f(x, a, b, c, z) = C x^{a-1} (1-x)^{b-1} (1+zx)^{-c}
|
|
|
|
for :math:`0 \le x \le 1`, :math:`a,b > 0`, :math:`c` a real number,
|
|
:math:`z > -1`, and :math:`C = \frac{1}{B(a, b) F[2, 1](c, a; a+b; -z)}`.
|
|
:math:`F[2, 1]` is the Gauss hypergeometric function
|
|
`scipy.special.hyp2f1`.
|
|
|
|
`gausshyper` takes :math:`a`, :math:`b`, :math:`c` and :math:`z` as shape
|
|
parameters.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
.. [1] Armero, C., and M. J. Bayarri. "Prior Assessments for Prediction in
|
|
Queues." *Journal of the Royal Statistical Society*. Series D (The
|
|
Statistician) 43, no. 1 (1994): 139-53. doi:10.2307/2348939
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
|
|
def _argcheck(self, a, b, c, z):
|
|
# z > -1 per gh-10134
|
|
return (a > 0) & (b > 0) & (c == c) & (z > -1)
|
|
|
|
def _shape_info(self):
|
|
ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
|
|
ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
|
|
ic = _ShapeInfo("c", False, (-np.inf, np.inf), (False, False))
|
|
iz = _ShapeInfo("z", False, (-1, np.inf), (False, False))
|
|
return [ia, ib, ic, iz]
|
|
|
|
def _pdf(self, x, a, b, c, z):
|
|
normalization_constant = sc.beta(a, b) * sc.hyp2f1(c, a, a + b, -z)
|
|
return (1./normalization_constant * x**(a - 1.) * (1. - x)**(b - 1.0)
|
|
/ (1.0 + z*x)**c)
|
|
|
|
def _munp(self, n, a, b, c, z):
|
|
fac = sc.beta(n+a, b) / sc.beta(a, b)
|
|
num = sc.hyp2f1(c, a+n, a+b+n, -z)
|
|
den = sc.hyp2f1(c, a, a+b, -z)
|
|
return fac*num / den
|
|
|
|
|
|
gausshyper = gausshyper_gen(a=0.0, b=1.0, name='gausshyper')
|
|
|
|
|
|
class invgamma_gen(rv_continuous):
|
|
r"""An inverted gamma continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `invgamma` is:
|
|
|
|
.. math::
|
|
|
|
f(x, a) = \frac{x^{-a-1}}{\Gamma(a)} \exp(-\frac{1}{x})
|
|
|
|
for :math:`x >= 0`, :math:`a > 0`. :math:`\Gamma` is the gamma function
|
|
(`scipy.special.gamma`).
|
|
|
|
`invgamma` takes ``a`` as a shape parameter for :math:`a`.
|
|
|
|
`invgamma` is a special case of `gengamma` with ``c=-1``, and it is a
|
|
different parameterization of the scaled inverse chi-squared distribution.
|
|
Specifically, if the scaled inverse chi-squared distribution is
|
|
parameterized with degrees of freedom :math:`\nu` and scaling parameter
|
|
:math:`\tau^2`, then it can be modeled using `invgamma` with
|
|
``a=`` :math:`\nu/2` and ``scale=`` :math:`\nu \tau^2/2`.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
_support_mask = rv_continuous._open_support_mask
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
|
|
|
|
def _pdf(self, x, a):
|
|
# invgamma.pdf(x, a) = x**(-a-1) / gamma(a) * exp(-1/x)
|
|
return np.exp(self._logpdf(x, a))
|
|
|
|
def _logpdf(self, x, a):
|
|
return -(a+1) * np.log(x) - sc.gammaln(a) - 1.0/x
|
|
|
|
def _cdf(self, x, a):
|
|
return sc.gammaincc(a, 1.0 / x)
|
|
|
|
def _ppf(self, q, a):
|
|
return 1.0 / sc.gammainccinv(a, q)
|
|
|
|
def _sf(self, x, a):
|
|
return sc.gammainc(a, 1.0 / x)
|
|
|
|
def _isf(self, q, a):
|
|
return 1.0 / sc.gammaincinv(a, q)
|
|
|
|
def _stats(self, a, moments='mvsk'):
|
|
m1 = _lazywhere(a > 1, (a,), lambda x: 1. / (x - 1.), np.inf)
|
|
m2 = _lazywhere(a > 2, (a,), lambda x: 1. / (x - 1.)**2 / (x - 2.),
|
|
np.inf)
|
|
|
|
g1, g2 = None, None
|
|
if 's' in moments:
|
|
g1 = _lazywhere(
|
|
a > 3, (a,),
|
|
lambda x: 4. * np.sqrt(x - 2.) / (x - 3.), np.nan)
|
|
if 'k' in moments:
|
|
g2 = _lazywhere(
|
|
a > 4, (a,),
|
|
lambda x: 6. * (5. * x - 11.) / (x - 3.) / (x - 4.), np.nan)
|
|
return m1, m2, g1, g2
|
|
|
|
def _entropy(self, a):
|
|
def regular(a):
|
|
h = a - (a + 1.0) * sc.psi(a) + sc.gammaln(a)
|
|
return h
|
|
|
|
def asymptotic(a):
|
|
# gammaln(a) ~ a * ln(a) - a - 0.5 * ln(a) + 0.5 * ln(2 * pi)
|
|
# psi(a) ~ ln(a) - 1 / (2 * a)
|
|
h = ((1 - 3*np.log(a) + np.log(2) + np.log(np.pi))/2
|
|
+ 2/3*a**-1. + a**-2./12 - a**-3./90 - a**-4./120)
|
|
return h
|
|
|
|
h = _lazywhere(a >= 2e2, (a,), f=asymptotic, f2=regular)
|
|
return h
|
|
|
|
|
|
invgamma = invgamma_gen(a=0.0, name='invgamma')
|
|
|
|
|
|
class invgauss_gen(rv_continuous):
|
|
r"""An inverse Gaussian continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `invgauss` is:
|
|
|
|
.. math::
|
|
|
|
f(x; \mu) = \frac{1}{\sqrt{2 \pi x^3}}
|
|
\exp\left(-\frac{(x-\mu)^2}{2 \mu^2 x}\right)
|
|
|
|
for :math:`x \ge 0` and :math:`\mu > 0`.
|
|
|
|
`invgauss` takes ``mu`` as a shape parameter for :math:`\mu`.
|
|
|
|
%(after_notes)s
|
|
|
|
A common shape-scale parameterization of the inverse Gaussian distribution
|
|
has density
|
|
|
|
.. math::
|
|
|
|
f(x; \nu, \lambda) = \sqrt{\frac{\lambda}{2 \pi x^3}}
|
|
\exp\left( -\frac{\lambda(x-\nu)^2}{2 \nu^2 x}\right)
|
|
|
|
Using ``nu`` for :math:`\nu` and ``lam`` for :math:`\lambda`, this
|
|
parameterization is equivalent to the one above with ``mu = nu/lam``,
|
|
``loc = 0``, and ``scale = lam``.
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
_support_mask = rv_continuous._open_support_mask
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("mu", False, (0, np.inf), (False, False))]
|
|
|
|
def _rvs(self, mu, size=None, random_state=None):
|
|
return random_state.wald(mu, 1.0, size=size)
|
|
|
|
def _pdf(self, x, mu):
|
|
# invgauss.pdf(x, mu) =
|
|
# 1 / sqrt(2*pi*x**3) * exp(-(x-mu)**2/(2*x*mu**2))
|
|
return 1.0/np.sqrt(2*np.pi*x**3.0)*np.exp(-1.0/(2*x)*((x-mu)/mu)**2)
|
|
|
|
def _logpdf(self, x, mu):
|
|
return -0.5*np.log(2*np.pi) - 1.5*np.log(x) - ((x-mu)/mu)**2/(2*x)
|
|
|
|
# approach adapted from equations in
|
|
# https://journal.r-project.org/archive/2016-1/giner-smyth.pdf,
|
|
# not R code. see gh-13616
|
|
|
|
def _logcdf(self, x, mu):
|
|
fac = 1 / np.sqrt(x)
|
|
a = _norm_logcdf(fac * ((x / mu) - 1))
|
|
b = 2 / mu + _norm_logcdf(-fac * ((x / mu) + 1))
|
|
return a + np.log1p(np.exp(b - a))
|
|
|
|
def _logsf(self, x, mu):
|
|
fac = 1 / np.sqrt(x)
|
|
a = _norm_logsf(fac * ((x / mu) - 1))
|
|
b = 2 / mu + _norm_logcdf(-fac * (x + mu) / mu)
|
|
return a + np.log1p(-np.exp(b - a))
|
|
|
|
def _sf(self, x, mu):
|
|
return np.exp(self._logsf(x, mu))
|
|
|
|
def _cdf(self, x, mu):
|
|
return np.exp(self._logcdf(x, mu))
|
|
|
|
def _ppf(self, x, mu):
|
|
with np.errstate(divide='ignore', over='ignore', invalid='ignore'):
|
|
x, mu = np.broadcast_arrays(x, mu)
|
|
ppf = scu._invgauss_ppf(x, mu, 1)
|
|
i_wt = x > 0.5 # "wrong tail" - sometimes too inaccurate
|
|
ppf[i_wt] = scu._invgauss_isf(1-x[i_wt], mu[i_wt], 1)
|
|
i_nan = np.isnan(ppf)
|
|
ppf[i_nan] = super()._ppf(x[i_nan], mu[i_nan])
|
|
return ppf
|
|
|
|
def _isf(self, x, mu):
|
|
with np.errstate(divide='ignore', over='ignore', invalid='ignore'):
|
|
x, mu = np.broadcast_arrays(x, mu)
|
|
isf = scu._invgauss_isf(x, mu, 1)
|
|
i_wt = x > 0.5 # "wrong tail" - sometimes too inaccurate
|
|
isf[i_wt] = scu._invgauss_ppf(1-x[i_wt], mu[i_wt], 1)
|
|
i_nan = np.isnan(isf)
|
|
isf[i_nan] = super()._isf(x[i_nan], mu[i_nan])
|
|
return isf
|
|
|
|
def _stats(self, mu):
|
|
return mu, mu**3.0, 3*np.sqrt(mu), 15*mu
|
|
|
|
@inherit_docstring_from(rv_continuous)
|
|
def fit(self, data, *args, **kwds):
|
|
method = kwds.get('method', 'mle')
|
|
|
|
if (isinstance(data, CensoredData) or type(self) == wald_gen
|
|
or method.lower() == 'mm'):
|
|
return super().fit(data, *args, **kwds)
|
|
|
|
data, fshape_s, floc, fscale = _check_fit_input_parameters(self, data,
|
|
args, kwds)
|
|
'''
|
|
Source: Statistical Distributions, 3rd Edition. Evans, Hastings,
|
|
and Peacock (2000), Page 121. Their shape parameter is equivalent to
|
|
SciPy's with the conversion `fshape_s = fshape / scale`.
|
|
|
|
MLE formulas are not used in 3 conditions:
|
|
- `loc` is not fixed
|
|
- `mu` is fixed
|
|
These cases fall back on the superclass fit method.
|
|
- `loc` is fixed but translation results in negative data raises
|
|
a `FitDataError`.
|
|
'''
|
|
if floc is None or fshape_s is not None:
|
|
return super().fit(data, *args, **kwds)
|
|
elif np.any(data - floc < 0):
|
|
raise FitDataError("invgauss", lower=0, upper=np.inf)
|
|
else:
|
|
data = data - floc
|
|
fshape_n = np.mean(data)
|
|
if fscale is None:
|
|
fscale = len(data) / (np.sum(data ** -1 - fshape_n ** -1))
|
|
fshape_s = fshape_n / fscale
|
|
return fshape_s, floc, fscale
|
|
|
|
def _entropy(self, mu):
|
|
"""
|
|
Ref.: https://moser-isi.ethz.ch/docs/papers/smos-2012-10.pdf (eq. 9)
|
|
"""
|
|
# a = log(2*pi*e*mu**3)
|
|
# = 1 + log(2*pi) + 3 * log(mu)
|
|
a = 1. + np.log(2 * np.pi) + 3 * np.log(mu)
|
|
# b = exp(2/mu) * exp1(2/mu)
|
|
# = _scaled_exp1(2/mu) / (2/mu)
|
|
r = 2/mu
|
|
b = sc._ufuncs._scaled_exp1(r)/r
|
|
return 0.5 * a - 1.5 * b
|
|
|
|
|
|
invgauss = invgauss_gen(a=0.0, name='invgauss')
|
|
|
|
|
|
class geninvgauss_gen(rv_continuous):
|
|
r"""A Generalized Inverse Gaussian continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `geninvgauss` is:
|
|
|
|
.. math::
|
|
|
|
f(x, p, b) = x^{p-1} \exp(-b (x + 1/x) / 2) / (2 K_p(b))
|
|
|
|
where `x > 0`, `p` is a real number and `b > 0`\([1]_).
|
|
:math:`K_p` is the modified Bessel function of second kind of order `p`
|
|
(`scipy.special.kv`).
|
|
|
|
%(after_notes)s
|
|
|
|
The inverse Gaussian distribution `stats.invgauss(mu)` is a special case of
|
|
`geninvgauss` with `p = -1/2`, `b = 1 / mu` and `scale = mu`.
|
|
|
|
Generating random variates is challenging for this distribution. The
|
|
implementation is based on [2]_.
|
|
|
|
References
|
|
----------
|
|
.. [1] O. Barndorff-Nielsen, P. Blaesild, C. Halgreen, "First hitting time
|
|
models for the generalized inverse gaussian distribution",
|
|
Stochastic Processes and their Applications 7, pp. 49--54, 1978.
|
|
|
|
.. [2] W. Hoermann and J. Leydold, "Generating generalized inverse Gaussian
|
|
random variates", Statistics and Computing, 24(4), p. 547--557, 2014.
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _argcheck(self, p, b):
|
|
return (p == p) & (b > 0)
|
|
|
|
def _shape_info(self):
|
|
ip = _ShapeInfo("p", False, (-np.inf, np.inf), (False, False))
|
|
ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
|
|
return [ip, ib]
|
|
|
|
def _logpdf(self, x, p, b):
|
|
# kve instead of kv works better for large values of b
|
|
# warn if kve produces infinite values and replace by nan
|
|
# otherwise c = -inf and the results are often incorrect
|
|
def logpdf_single(x, p, b):
|
|
return _stats.geninvgauss_logpdf(x, p, b)
|
|
|
|
logpdf_single = np.vectorize(logpdf_single, otypes=[np.float64])
|
|
|
|
z = logpdf_single(x, p, b)
|
|
if np.isnan(z).any():
|
|
msg = ("Infinite values encountered in scipy.special.kve(p, b). "
|
|
"Values replaced by NaN to avoid incorrect results.")
|
|
warnings.warn(msg, RuntimeWarning, stacklevel=3)
|
|
return z
|
|
|
|
def _pdf(self, x, p, b):
|
|
# relying on logpdf avoids overflow of x**(p-1) for large x and p
|
|
return np.exp(self._logpdf(x, p, b))
|
|
|
|
def _cdf(self, x, *args):
|
|
_a, _b = self._get_support(*args)
|
|
|
|
def _cdf_single(x, *args):
|
|
p, b = args
|
|
user_data = np.array([p, b], float).ctypes.data_as(ctypes.c_void_p)
|
|
llc = LowLevelCallable.from_cython(_stats, '_geninvgauss_pdf',
|
|
user_data)
|
|
|
|
return integrate.quad(llc, _a, x)[0]
|
|
|
|
_cdf_single = np.vectorize(_cdf_single, otypes=[np.float64])
|
|
|
|
return _cdf_single(x, *args)
|
|
|
|
def _logquasipdf(self, x, p, b):
|
|
# log of the quasi-density (w/o normalizing constant) used in _rvs
|
|
return _lazywhere(x > 0, (x, p, b),
|
|
lambda x, p, b: (p - 1)*np.log(x) - b*(x + 1/x)/2,
|
|
-np.inf)
|
|
|
|
def _rvs(self, p, b, size=None, random_state=None):
|
|
# if p and b are scalar, use _rvs_scalar, otherwise need to create
|
|
# output by iterating over parameters
|
|
if np.isscalar(p) and np.isscalar(b):
|
|
out = self._rvs_scalar(p, b, size, random_state)
|
|
elif p.size == 1 and b.size == 1:
|
|
out = self._rvs_scalar(p.item(), b.item(), size, random_state)
|
|
else:
|
|
# When this method is called, size will be a (possibly empty)
|
|
# tuple of integers. It will not be None; if `size=None` is passed
|
|
# to `rvs()`, size will be the empty tuple ().
|
|
|
|
p, b = np.broadcast_arrays(p, b)
|
|
# p and b now have the same shape.
|
|
|
|
# `shp` is the shape of the blocks of random variates that are
|
|
# generated for each combination of parameters associated with
|
|
# broadcasting p and b.
|
|
# bc is a tuple the same length as size. The values
|
|
# in bc are bools. If bc[j] is True, it means that
|
|
# entire axis is filled in for a given combination of the
|
|
# broadcast arguments.
|
|
shp, bc = _check_shape(p.shape, size)
|
|
|
|
# `numsamples` is the total number of variates to be generated
|
|
# for each combination of the input arguments.
|
|
numsamples = int(np.prod(shp))
|
|
|
|
# `out` is the array to be returned. It is filled in the
|
|
# loop below.
|
|
out = np.empty(size)
|
|
|
|
it = np.nditer([p, b],
|
|
flags=['multi_index'],
|
|
op_flags=[['readonly'], ['readonly']])
|
|
while not it.finished:
|
|
# Convert the iterator's multi_index into an index into the
|
|
# `out` array where the call to _rvs_scalar() will be stored.
|
|
# Where bc is True, we use a full slice; otherwise we use the
|
|
# index value from it.multi_index. len(it.multi_index) might
|
|
# be less than len(bc), and in that case we want to align these
|
|
# two sequences to the right, so the loop variable j runs from
|
|
# -len(size) to 0. This doesn't cause an IndexError, as
|
|
# bc[j] will be True in those cases where it.multi_index[j]
|
|
# would cause an IndexError.
|
|
idx = tuple((it.multi_index[j] if not bc[j] else slice(None))
|
|
for j in range(-len(size), 0))
|
|
out[idx] = self._rvs_scalar(it[0], it[1], numsamples,
|
|
random_state).reshape(shp)
|
|
it.iternext()
|
|
|
|
if size == ():
|
|
out = out.item()
|
|
return out
|
|
|
|
def _rvs_scalar(self, p, b, numsamples, random_state):
|
|
# following [2], the quasi-pdf is used instead of the pdf for the
|
|
# generation of rvs
|
|
invert_res = False
|
|
if not numsamples:
|
|
numsamples = 1
|
|
if p < 0:
|
|
# note: if X is geninvgauss(p, b), then 1/X is geninvgauss(-p, b)
|
|
p = -p
|
|
invert_res = True
|
|
m = self._mode(p, b)
|
|
|
|
# determine method to be used following [2]
|
|
ratio_unif = True
|
|
if p >= 1 or b > 1:
|
|
# ratio of uniforms with mode shift below
|
|
mode_shift = True
|
|
elif b >= min(0.5, 2 * np.sqrt(1 - p) / 3):
|
|
# ratio of uniforms without mode shift below
|
|
mode_shift = False
|
|
else:
|
|
# new algorithm in [2]
|
|
ratio_unif = False
|
|
|
|
# prepare sampling of rvs
|
|
size1d = tuple(np.atleast_1d(numsamples))
|
|
N = np.prod(size1d) # number of rvs needed, reshape upon return
|
|
x = np.zeros(N)
|
|
simulated = 0
|
|
|
|
if ratio_unif:
|
|
# use ratio of uniforms method
|
|
if mode_shift:
|
|
a2 = -2 * (p + 1) / b - m
|
|
a1 = 2 * m * (p - 1) / b - 1
|
|
# find roots of x**3 + a2*x**2 + a1*x + m (Cardano's formula)
|
|
p1 = a1 - a2**2 / 3
|
|
q1 = 2 * a2**3 / 27 - a2 * a1 / 3 + m
|
|
phi = np.arccos(-q1 * np.sqrt(-27 / p1**3) / 2)
|
|
s1 = -np.sqrt(-4 * p1 / 3)
|
|
root1 = s1 * np.cos(phi / 3 + np.pi / 3) - a2 / 3
|
|
root2 = -s1 * np.cos(phi / 3) - a2 / 3
|
|
# root3 = s1 * np.cos(phi / 3 - np.pi / 3) - a2 / 3
|
|
|
|
# if g is the quasipdf, rescale: g(x) / g(m) which we can write
|
|
# as exp(log(g(x)) - log(g(m))). This is important
|
|
# since for large values of p and b, g cannot be evaluated.
|
|
# denote the rescaled quasipdf by h
|
|
lm = self._logquasipdf(m, p, b)
|
|
d1 = self._logquasipdf(root1, p, b) - lm
|
|
d2 = self._logquasipdf(root2, p, b) - lm
|
|
# compute the bounding rectangle w.r.t. h. Note that
|
|
# np.exp(0.5*d1) = np.sqrt(g(root1)/g(m)) = np.sqrt(h(root1))
|
|
vmin = (root1 - m) * np.exp(0.5 * d1)
|
|
vmax = (root2 - m) * np.exp(0.5 * d2)
|
|
umax = 1 # umax = sqrt(h(m)) = 1
|
|
|
|
def logqpdf(x):
|
|
return self._logquasipdf(x, p, b) - lm
|
|
|
|
c = m
|
|
else:
|
|
# ratio of uniforms without mode shift
|
|
# compute np.sqrt(quasipdf(m))
|
|
umax = np.exp(0.5*self._logquasipdf(m, p, b))
|
|
xplus = ((1 + p) + np.sqrt((1 + p)**2 + b**2))/b
|
|
vmin = 0
|
|
# compute xplus * np.sqrt(quasipdf(xplus))
|
|
vmax = xplus * np.exp(0.5 * self._logquasipdf(xplus, p, b))
|
|
c = 0
|
|
|
|
def logqpdf(x):
|
|
return self._logquasipdf(x, p, b)
|
|
|
|
if vmin >= vmax:
|
|
raise ValueError("vmin must be smaller than vmax.")
|
|
if umax <= 0:
|
|
raise ValueError("umax must be positive.")
|
|
|
|
i = 1
|
|
while simulated < N:
|
|
k = N - simulated
|
|
# simulate uniform rvs on [0, umax] and [vmin, vmax]
|
|
u = umax * random_state.uniform(size=k)
|
|
v = random_state.uniform(size=k)
|
|
v = vmin + (vmax - vmin) * v
|
|
rvs = v / u + c
|
|
# rewrite acceptance condition u**2 <= pdf(rvs) by taking logs
|
|
accept = (2*np.log(u) <= logqpdf(rvs))
|
|
num_accept = np.sum(accept)
|
|
if num_accept > 0:
|
|
x[simulated:(simulated + num_accept)] = rvs[accept]
|
|
simulated += num_accept
|
|
|
|
if (simulated == 0) and (i*N >= 50000):
|
|
msg = ("Not a single random variate could be generated "
|
|
f"in {i*N} attempts. Sampling does not appear to "
|
|
"work for the provided parameters.")
|
|
raise RuntimeError(msg)
|
|
i += 1
|
|
else:
|
|
# use new algorithm in [2]
|
|
x0 = b / (1 - p)
|
|
xs = np.max((x0, 2 / b))
|
|
k1 = np.exp(self._logquasipdf(m, p, b))
|
|
A1 = k1 * x0
|
|
if x0 < 2 / b:
|
|
k2 = np.exp(-b)
|
|
if p > 0:
|
|
A2 = k2 * ((2 / b)**p - x0**p) / p
|
|
else:
|
|
A2 = k2 * np.log(2 / b**2)
|
|
else:
|
|
k2, A2 = 0, 0
|
|
k3 = xs**(p - 1)
|
|
A3 = 2 * k3 * np.exp(-xs * b / 2) / b
|
|
A = A1 + A2 + A3
|
|
|
|
# [2]: rejection constant is < 2.73; so expected runtime is finite
|
|
while simulated < N:
|
|
k = N - simulated
|
|
h, rvs = np.zeros(k), np.zeros(k)
|
|
# simulate uniform rvs on [x1, x2] and [0, y2]
|
|
u = random_state.uniform(size=k)
|
|
v = A * random_state.uniform(size=k)
|
|
cond1 = v <= A1
|
|
cond2 = np.logical_not(cond1) & (v <= A1 + A2)
|
|
cond3 = np.logical_not(cond1 | cond2)
|
|
# subdomain (0, x0)
|
|
rvs[cond1] = x0 * v[cond1] / A1
|
|
h[cond1] = k1
|
|
# subdomain (x0, 2 / b)
|
|
if p > 0:
|
|
rvs[cond2] = (x0**p + (v[cond2] - A1) * p / k2)**(1 / p)
|
|
else:
|
|
rvs[cond2] = b * np.exp((v[cond2] - A1) * np.exp(b))
|
|
h[cond2] = k2 * rvs[cond2]**(p - 1)
|
|
# subdomain (xs, infinity)
|
|
z = np.exp(-xs * b / 2) - b * (v[cond3] - A1 - A2) / (2 * k3)
|
|
rvs[cond3] = -2 / b * np.log(z)
|
|
h[cond3] = k3 * np.exp(-rvs[cond3] * b / 2)
|
|
# apply rejection method
|
|
accept = (np.log(u * h) <= self._logquasipdf(rvs, p, b))
|
|
num_accept = sum(accept)
|
|
if num_accept > 0:
|
|
x[simulated:(simulated + num_accept)] = rvs[accept]
|
|
simulated += num_accept
|
|
|
|
rvs = np.reshape(x, size1d)
|
|
if invert_res:
|
|
rvs = 1 / rvs
|
|
return rvs
|
|
|
|
def _mode(self, p, b):
|
|
# distinguish cases to avoid catastrophic cancellation (see [2])
|
|
if p < 1:
|
|
return b / (np.sqrt((p - 1)**2 + b**2) + 1 - p)
|
|
else:
|
|
return (np.sqrt((1 - p)**2 + b**2) - (1 - p)) / b
|
|
|
|
def _munp(self, n, p, b):
|
|
num = sc.kve(p + n, b)
|
|
denom = sc.kve(p, b)
|
|
inf_vals = np.isinf(num) | np.isinf(denom)
|
|
if inf_vals.any():
|
|
msg = ("Infinite values encountered in the moment calculation "
|
|
"involving scipy.special.kve. Values replaced by NaN to "
|
|
"avoid incorrect results.")
|
|
warnings.warn(msg, RuntimeWarning, stacklevel=3)
|
|
m = np.full_like(num, np.nan, dtype=np.float64)
|
|
m[~inf_vals] = num[~inf_vals] / denom[~inf_vals]
|
|
else:
|
|
m = num / denom
|
|
return m
|
|
|
|
|
|
geninvgauss = geninvgauss_gen(a=0.0, name="geninvgauss")
|
|
|
|
|
|
class norminvgauss_gen(rv_continuous):
|
|
r"""A Normal Inverse Gaussian continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `norminvgauss` is:
|
|
|
|
.. math::
|
|
|
|
f(x, a, b) = \frac{a \, K_1(a \sqrt{1 + x^2})}{\pi \sqrt{1 + x^2}} \,
|
|
\exp(\sqrt{a^2 - b^2} + b x)
|
|
|
|
where :math:`x` is a real number, the parameter :math:`a` is the tail
|
|
heaviness and :math:`b` is the asymmetry parameter satisfying
|
|
:math:`a > 0` and :math:`|b| <= a`.
|
|
:math:`K_1` is the modified Bessel function of second kind
|
|
(`scipy.special.k1`).
|
|
|
|
%(after_notes)s
|
|
|
|
A normal inverse Gaussian random variable `Y` with parameters `a` and `b`
|
|
can be expressed as a normal mean-variance mixture:
|
|
`Y = b * V + sqrt(V) * X` where `X` is `norm(0,1)` and `V` is
|
|
`invgauss(mu=1/sqrt(a**2 - b**2))`. This representation is used
|
|
to generate random variates.
|
|
|
|
Another common parametrization of the distribution (see Equation 2.1 in
|
|
[2]_) is given by the following expression of the pdf:
|
|
|
|
.. math::
|
|
|
|
g(x, \alpha, \beta, \delta, \mu) =
|
|
\frac{\alpha\delta K_1\left(\alpha\sqrt{\delta^2 + (x - \mu)^2}\right)}
|
|
{\pi \sqrt{\delta^2 + (x - \mu)^2}} \,
|
|
e^{\delta \sqrt{\alpha^2 - \beta^2} + \beta (x - \mu)}
|
|
|
|
In SciPy, this corresponds to
|
|
`a = alpha * delta, b = beta * delta, loc = mu, scale=delta`.
|
|
|
|
References
|
|
----------
|
|
.. [1] O. Barndorff-Nielsen, "Hyperbolic Distributions and Distributions on
|
|
Hyperbolae", Scandinavian Journal of Statistics, Vol. 5(3),
|
|
pp. 151-157, 1978.
|
|
|
|
.. [2] O. Barndorff-Nielsen, "Normal Inverse Gaussian Distributions and
|
|
Stochastic Volatility Modelling", Scandinavian Journal of
|
|
Statistics, Vol. 24, pp. 1-13, 1997.
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
_support_mask = rv_continuous._open_support_mask
|
|
|
|
def _argcheck(self, a, b):
|
|
return (a > 0) & (np.absolute(b) < a)
|
|
|
|
def _shape_info(self):
|
|
ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
|
|
ib = _ShapeInfo("b", False, (-np.inf, np.inf), (False, False))
|
|
return [ia, ib]
|
|
|
|
def _fitstart(self, data):
|
|
# Arbitrary, but the default a = b = 1 is not valid; the distribution
|
|
# requires |b| < a.
|
|
return super()._fitstart(data, args=(1, 0.5))
|
|
|
|
def _pdf(self, x, a, b):
|
|
gamma = np.sqrt(a**2 - b**2)
|
|
fac1 = a / np.pi
|
|
sq = np.hypot(1, x) # reduce overflows
|
|
return fac1 * sc.k1e(a * sq) * np.exp(b*x - a*sq + gamma) / sq
|
|
|
|
def _sf(self, x, a, b):
|
|
if np.isscalar(x):
|
|
# If x is a scalar, then so are a and b.
|
|
return integrate.quad(self._pdf, x, np.inf, args=(a, b))[0]
|
|
else:
|
|
a = np.atleast_1d(a)
|
|
b = np.atleast_1d(b)
|
|
result = []
|
|
for (x0, a0, b0) in zip(x, a, b):
|
|
result.append(integrate.quad(self._pdf, x0, np.inf,
|
|
args=(a0, b0))[0])
|
|
return np.array(result)
|
|
|
|
def _isf(self, q, a, b):
|
|
def _isf_scalar(q, a, b):
|
|
|
|
def eq(x, a, b, q):
|
|
# Solve eq(x, a, b, q) = 0 to obtain isf(x, a, b) = q.
|
|
return self._sf(x, a, b) - q
|
|
|
|
# Find a bracketing interval for the root.
|
|
# Start at the mean, and grow the length of the interval
|
|
# by 2 each iteration until there is a sign change in eq.
|
|
xm = self.mean(a, b)
|
|
em = eq(xm, a, b, q)
|
|
if em == 0:
|
|
# Unlikely, but might as well check.
|
|
return xm
|
|
if em > 0:
|
|
delta = 1
|
|
left = xm
|
|
right = xm + delta
|
|
while eq(right, a, b, q) > 0:
|
|
delta = 2*delta
|
|
right = xm + delta
|
|
else:
|
|
# em < 0
|
|
delta = 1
|
|
right = xm
|
|
left = xm - delta
|
|
while eq(left, a, b, q) < 0:
|
|
delta = 2*delta
|
|
left = xm - delta
|
|
result = optimize.brentq(eq, left, right, args=(a, b, q),
|
|
xtol=self.xtol)
|
|
return result
|
|
|
|
if np.isscalar(q):
|
|
return _isf_scalar(q, a, b)
|
|
else:
|
|
result = []
|
|
for (q0, a0, b0) in zip(q, a, b):
|
|
result.append(_isf_scalar(q0, a0, b0))
|
|
return np.array(result)
|
|
|
|
def _rvs(self, a, b, size=None, random_state=None):
|
|
# note: X = b * V + sqrt(V) * X is norminvgaus(a,b) if X is standard
|
|
# normal and V is invgauss(mu=1/sqrt(a**2 - b**2))
|
|
gamma = np.sqrt(a**2 - b**2)
|
|
ig = invgauss.rvs(mu=1/gamma, size=size, random_state=random_state)
|
|
return b * ig + np.sqrt(ig) * norm.rvs(size=size,
|
|
random_state=random_state)
|
|
|
|
def _stats(self, a, b):
|
|
gamma = np.sqrt(a**2 - b**2)
|
|
mean = b / gamma
|
|
variance = a**2 / gamma**3
|
|
skewness = 3.0 * b / (a * np.sqrt(gamma))
|
|
kurtosis = 3.0 * (1 + 4 * b**2 / a**2) / gamma
|
|
return mean, variance, skewness, kurtosis
|
|
|
|
|
|
norminvgauss = norminvgauss_gen(name="norminvgauss")
|
|
|
|
|
|
class invweibull_gen(rv_continuous):
|
|
"""An inverted Weibull continuous random variable.
|
|
|
|
This distribution is also known as the Fréchet distribution or the
|
|
type II extreme value distribution.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `invweibull` is:
|
|
|
|
.. math::
|
|
|
|
f(x, c) = c x^{-c-1} \\exp(-x^{-c})
|
|
|
|
for :math:`x > 0`, :math:`c > 0`.
|
|
|
|
`invweibull` takes ``c`` as a shape parameter for :math:`c`.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
F.R.S. de Gusmao, E.M.M Ortega and G.M. Cordeiro, "The generalized inverse
|
|
Weibull distribution", Stat. Papers, vol. 52, pp. 591-619, 2011.
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
_support_mask = rv_continuous._open_support_mask
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
|
|
|
|
def _pdf(self, x, c):
|
|
# invweibull.pdf(x, c) = c * x**(-c-1) * exp(-x**(-c))
|
|
xc1 = np.power(x, -c - 1.0)
|
|
xc2 = np.power(x, -c)
|
|
xc2 = np.exp(-xc2)
|
|
return c * xc1 * xc2
|
|
|
|
def _cdf(self, x, c):
|
|
xc1 = np.power(x, -c)
|
|
return np.exp(-xc1)
|
|
|
|
def _sf(self, x, c):
|
|
return -np.expm1(-x**-c)
|
|
|
|
def _ppf(self, q, c):
|
|
return np.power(-np.log(q), -1.0/c)
|
|
|
|
def _isf(self, p, c):
|
|
return (-np.log1p(-p))**(-1/c)
|
|
|
|
def _munp(self, n, c):
|
|
return sc.gamma(1 - n / c)
|
|
|
|
def _entropy(self, c):
|
|
return 1+_EULER + _EULER / c - np.log(c)
|
|
|
|
def _fitstart(self, data, args=None):
|
|
# invweibull requires c > 1 for the first moment to exist, so use 2.0
|
|
args = (2.0,) if args is None else args
|
|
return super()._fitstart(data, args=args)
|
|
|
|
|
|
invweibull = invweibull_gen(a=0, name='invweibull')
|
|
|
|
|
|
class jf_skew_t_gen(rv_continuous):
|
|
r"""Jones and Faddy skew-t distribution.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `jf_skew_t` is:
|
|
|
|
.. math::
|
|
|
|
f(x; a, b) = C_{a,b}^{-1}
|
|
\left(1+\frac{x}{\left(a+b+x^2\right)^{1/2}}\right)^{a+1/2}
|
|
\left(1-\frac{x}{\left(a+b+x^2\right)^{1/2}}\right)^{b+1/2}
|
|
|
|
for real numbers :math:`a>0` and :math:`b>0`, where
|
|
:math:`C_{a,b} = 2^{a+b-1}B(a,b)(a+b)^{1/2}`, and :math:`B` denotes the
|
|
beta function (`scipy.special.beta`).
|
|
|
|
When :math:`a<b`, the distribution is negatively skewed, and when
|
|
:math:`a>b`, the distribution is positively skewed. If :math:`a=b`, then
|
|
we recover the `t` distribution with :math:`2a` degrees of freedom.
|
|
|
|
`jf_skew_t` takes :math:`a` and :math:`b` as shape parameters.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
.. [1] M.C. Jones and M.J. Faddy. "A skew extension of the t distribution,
|
|
with applications" *Journal of the Royal Statistical Society*.
|
|
Series B (Statistical Methodology) 65, no. 1 (2003): 159-174.
|
|
:doi:`10.1111/1467-9868.00378`
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
|
|
ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
|
|
return [ia, ib]
|
|
|
|
def _pdf(self, x, a, b):
|
|
c = 2 ** (a + b - 1) * sc.beta(a, b) * np.sqrt(a + b)
|
|
d1 = (1 + x / np.sqrt(a + b + x ** 2)) ** (a + 0.5)
|
|
d2 = (1 - x / np.sqrt(a + b + x ** 2)) ** (b + 0.5)
|
|
return d1 * d2 / c
|
|
|
|
def _rvs(self, a, b, size=None, random_state=None):
|
|
d1 = random_state.beta(a, b, size)
|
|
d2 = (2 * d1 - 1) * np.sqrt(a + b)
|
|
d3 = 2 * np.sqrt(d1 * (1 - d1))
|
|
return d2 / d3
|
|
|
|
def _cdf(self, x, a, b):
|
|
y = (1 + x / np.sqrt(a + b + x ** 2)) * 0.5
|
|
return sc.betainc(a, b, y)
|
|
|
|
def _ppf(self, q, a, b):
|
|
d1 = beta.ppf(q, a, b)
|
|
d2 = (2 * d1 - 1) * np.sqrt(a + b)
|
|
d3 = 2 * np.sqrt(d1 * (1 - d1))
|
|
return d2 / d3
|
|
|
|
def _munp(self, n, a, b):
|
|
"""Returns the n-th moment(s) where all the following hold:
|
|
|
|
- n >= 0
|
|
- a > n / 2
|
|
- b > n / 2
|
|
|
|
The result is np.nan in all other cases.
|
|
"""
|
|
def nth_moment(n_k, a_k, b_k):
|
|
"""Computes E[T^(n_k)] where T is skew-t distributed with
|
|
parameters a_k and b_k.
|
|
"""
|
|
num = (a_k + b_k) ** (0.5 * n_k)
|
|
denom = 2 ** n_k * sc.beta(a_k, b_k)
|
|
|
|
indices = np.arange(n_k + 1)
|
|
sgn = np.where(indices % 2 > 0, -1, 1)
|
|
d = sc.beta(a_k + 0.5 * n_k - indices, b_k - 0.5 * n_k + indices)
|
|
sum_terms = sc.comb(n_k, indices) * sgn * d
|
|
|
|
return num / denom * sum_terms.sum()
|
|
|
|
nth_moment_valid = (a > 0.5 * n) & (b > 0.5 * n) & (n >= 0)
|
|
return _lazywhere(
|
|
nth_moment_valid,
|
|
(n, a, b),
|
|
np.vectorize(nth_moment, otypes=[np.float64]),
|
|
np.nan,
|
|
)
|
|
|
|
|
|
jf_skew_t = jf_skew_t_gen(name='jf_skew_t')
|
|
|
|
|
|
class johnsonsb_gen(rv_continuous):
|
|
r"""A Johnson SB continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
johnsonsu
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `johnsonsb` is:
|
|
|
|
.. math::
|
|
|
|
f(x, a, b) = \frac{b}{x(1-x)} \phi(a + b \log \frac{x}{1-x} )
|
|
|
|
where :math:`x`, :math:`a`, and :math:`b` are real scalars; :math:`b > 0`
|
|
and :math:`x \in [0,1]`. :math:`\phi` is the pdf of the normal
|
|
distribution.
|
|
|
|
`johnsonsb` takes :math:`a` and :math:`b` as shape parameters.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
_support_mask = rv_continuous._open_support_mask
|
|
|
|
def _argcheck(self, a, b):
|
|
return (b > 0) & (a == a)
|
|
|
|
def _shape_info(self):
|
|
ia = _ShapeInfo("a", False, (-np.inf, np.inf), (False, False))
|
|
ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
|
|
return [ia, ib]
|
|
|
|
def _pdf(self, x, a, b):
|
|
# johnsonsb.pdf(x, a, b) = b / (x*(1-x)) * phi(a + b * log(x/(1-x)))
|
|
trm = _norm_pdf(a + b*sc.logit(x))
|
|
return b*1.0/(x*(1-x))*trm
|
|
|
|
def _cdf(self, x, a, b):
|
|
return _norm_cdf(a + b*sc.logit(x))
|
|
|
|
def _ppf(self, q, a, b):
|
|
return sc.expit(1.0 / b * (_norm_ppf(q) - a))
|
|
|
|
def _sf(self, x, a, b):
|
|
return _norm_sf(a + b*sc.logit(x))
|
|
|
|
def _isf(self, q, a, b):
|
|
return sc.expit(1.0 / b * (_norm_isf(q) - a))
|
|
|
|
|
|
johnsonsb = johnsonsb_gen(a=0.0, b=1.0, name='johnsonsb')
|
|
|
|
|
|
class johnsonsu_gen(rv_continuous):
|
|
r"""A Johnson SU continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
johnsonsb
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `johnsonsu` is:
|
|
|
|
.. math::
|
|
|
|
f(x, a, b) = \frac{b}{\sqrt{x^2 + 1}}
|
|
\phi(a + b \log(x + \sqrt{x^2 + 1}))
|
|
|
|
where :math:`x`, :math:`a`, and :math:`b` are real scalars; :math:`b > 0`.
|
|
:math:`\phi` is the pdf of the normal distribution.
|
|
|
|
`johnsonsu` takes :math:`a` and :math:`b` as shape parameters.
|
|
|
|
The first four central moments are calculated according to the formulas
|
|
in [1]_.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
.. [1] Taylor Enterprises. "Johnson Family of Distributions".
|
|
https://variation.com/wp-content/distribution_analyzer_help/hs126.htm
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _argcheck(self, a, b):
|
|
return (b > 0) & (a == a)
|
|
|
|
def _shape_info(self):
|
|
ia = _ShapeInfo("a", False, (-np.inf, np.inf), (False, False))
|
|
ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
|
|
return [ia, ib]
|
|
|
|
def _pdf(self, x, a, b):
|
|
# johnsonsu.pdf(x, a, b) = b / sqrt(x**2 + 1) *
|
|
# phi(a + b * log(x + sqrt(x**2 + 1)))
|
|
x2 = x*x
|
|
trm = _norm_pdf(a + b * np.arcsinh(x))
|
|
return b*1.0/np.sqrt(x2+1.0)*trm
|
|
|
|
def _cdf(self, x, a, b):
|
|
return _norm_cdf(a + b * np.arcsinh(x))
|
|
|
|
def _ppf(self, q, a, b):
|
|
return np.sinh((_norm_ppf(q) - a) / b)
|
|
|
|
def _sf(self, x, a, b):
|
|
return _norm_sf(a + b * np.arcsinh(x))
|
|
|
|
def _isf(self, x, a, b):
|
|
return np.sinh((_norm_isf(x) - a) / b)
|
|
|
|
def _stats(self, a, b, moments='mv'):
|
|
# Naive implementation of first and second moment to address gh-18071.
|
|
# https://variation.com/wp-content/distribution_analyzer_help/hs126.htm
|
|
# Numerical improvements left to future enhancements.
|
|
mu, mu2, g1, g2 = None, None, None, None
|
|
|
|
bn2 = b**-2.
|
|
expbn2 = np.exp(bn2)
|
|
a_b = a / b
|
|
|
|
if 'm' in moments:
|
|
mu = -expbn2**0.5 * np.sinh(a_b)
|
|
if 'v' in moments:
|
|
mu2 = 0.5*sc.expm1(bn2)*(expbn2*np.cosh(2*a_b) + 1)
|
|
if 's' in moments:
|
|
t1 = expbn2**.5 * sc.expm1(bn2)**0.5
|
|
t2 = 3*np.sinh(a_b)
|
|
t3 = expbn2 * (expbn2 + 2) * np.sinh(3*a_b)
|
|
denom = np.sqrt(2) * (1 + expbn2 * np.cosh(2*a_b))**(3/2)
|
|
g1 = -t1 * (t2 + t3) / denom
|
|
if 'k' in moments:
|
|
t1 = 3 + 6*expbn2
|
|
t2 = 4*expbn2**2 * (expbn2 + 2) * np.cosh(2*a_b)
|
|
t3 = expbn2**2 * np.cosh(4*a_b)
|
|
t4 = -3 + 3*expbn2**2 + 2*expbn2**3 + expbn2**4
|
|
denom = 2*(1 + expbn2*np.cosh(2*a_b))**2
|
|
g2 = (t1 + t2 + t3*t4) / denom - 3
|
|
return mu, mu2, g1, g2
|
|
|
|
|
|
johnsonsu = johnsonsu_gen(name='johnsonsu')
|
|
|
|
|
|
class laplace_gen(rv_continuous):
|
|
r"""A Laplace continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `laplace` is
|
|
|
|
.. math::
|
|
|
|
f(x) = \frac{1}{2} \exp(-|x|)
|
|
|
|
for a real number :math:`x`.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return []
|
|
|
|
def _rvs(self, size=None, random_state=None):
|
|
return random_state.laplace(0, 1, size=size)
|
|
|
|
def _pdf(self, x):
|
|
# laplace.pdf(x) = 1/2 * exp(-abs(x))
|
|
return 0.5*np.exp(-abs(x))
|
|
|
|
def _cdf(self, x):
|
|
with np.errstate(over='ignore'):
|
|
return np.where(x > 0, 1.0 - 0.5*np.exp(-x), 0.5*np.exp(x))
|
|
|
|
def _sf(self, x):
|
|
# By symmetry...
|
|
return self._cdf(-x)
|
|
|
|
def _ppf(self, q):
|
|
return np.where(q > 0.5, -np.log(2*(1-q)), np.log(2*q))
|
|
|
|
def _isf(self, q):
|
|
# By symmetry...
|
|
return -self._ppf(q)
|
|
|
|
def _stats(self):
|
|
return 0, 2, 0, 3
|
|
|
|
def _entropy(self):
|
|
return np.log(2)+1
|
|
|
|
@_call_super_mom
|
|
@replace_notes_in_docstring(rv_continuous, notes="""\
|
|
This function uses explicit formulas for the maximum likelihood
|
|
estimation of the Laplace distribution parameters, so the keyword
|
|
arguments `loc`, `scale`, and `optimizer` are ignored.\n\n""")
|
|
def fit(self, data, *args, **kwds):
|
|
data, floc, fscale = _check_fit_input_parameters(self, data,
|
|
args, kwds)
|
|
|
|
# Source: Statistical Distributions, 3rd Edition. Evans, Hastings,
|
|
# and Peacock (2000), Page 124
|
|
|
|
if floc is None:
|
|
floc = np.median(data)
|
|
|
|
if fscale is None:
|
|
fscale = (np.sum(np.abs(data - floc))) / len(data)
|
|
|
|
return floc, fscale
|
|
|
|
|
|
laplace = laplace_gen(name='laplace')
|
|
|
|
|
|
class laplace_asymmetric_gen(rv_continuous):
|
|
r"""An asymmetric Laplace continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
laplace : Laplace distribution
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `laplace_asymmetric` is
|
|
|
|
.. math::
|
|
|
|
f(x, \kappa) &= \frac{1}{\kappa+\kappa^{-1}}\exp(-x\kappa),\quad x\ge0\\
|
|
&= \frac{1}{\kappa+\kappa^{-1}}\exp(x/\kappa),\quad x<0\\
|
|
|
|
for :math:`-\infty < x < \infty`, :math:`\kappa > 0`.
|
|
|
|
`laplace_asymmetric` takes ``kappa`` as a shape parameter for
|
|
:math:`\kappa`. For :math:`\kappa = 1`, it is identical to a
|
|
Laplace distribution.
|
|
|
|
%(after_notes)s
|
|
|
|
Note that the scale parameter of some references is the reciprocal of
|
|
SciPy's ``scale``. For example, :math:`\lambda = 1/2` in the
|
|
parameterization of [1]_ is equivalent to ``scale = 2`` with
|
|
`laplace_asymmetric`.
|
|
|
|
References
|
|
----------
|
|
.. [1] "Asymmetric Laplace distribution", Wikipedia
|
|
https://en.wikipedia.org/wiki/Asymmetric_Laplace_distribution
|
|
|
|
.. [2] Kozubowski TJ and Podgórski K. A Multivariate and
|
|
Asymmetric Generalization of Laplace Distribution,
|
|
Computational Statistics 15, 531--540 (2000).
|
|
:doi:`10.1007/PL00022717`
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("kappa", False, (0, np.inf), (False, False))]
|
|
|
|
def _pdf(self, x, kappa):
|
|
return np.exp(self._logpdf(x, kappa))
|
|
|
|
def _logpdf(self, x, kappa):
|
|
kapinv = 1/kappa
|
|
lPx = x * np.where(x >= 0, -kappa, kapinv)
|
|
lPx -= np.log(kappa+kapinv)
|
|
return lPx
|
|
|
|
def _cdf(self, x, kappa):
|
|
kapinv = 1/kappa
|
|
kappkapinv = kappa+kapinv
|
|
return np.where(x >= 0,
|
|
1 - np.exp(-x*kappa)*(kapinv/kappkapinv),
|
|
np.exp(x*kapinv)*(kappa/kappkapinv))
|
|
|
|
def _sf(self, x, kappa):
|
|
kapinv = 1/kappa
|
|
kappkapinv = kappa+kapinv
|
|
return np.where(x >= 0,
|
|
np.exp(-x*kappa)*(kapinv/kappkapinv),
|
|
1 - np.exp(x*kapinv)*(kappa/kappkapinv))
|
|
|
|
def _ppf(self, q, kappa):
|
|
kapinv = 1/kappa
|
|
kappkapinv = kappa+kapinv
|
|
return np.where(q >= kappa/kappkapinv,
|
|
-np.log((1 - q)*kappkapinv*kappa)*kapinv,
|
|
np.log(q*kappkapinv/kappa)*kappa)
|
|
|
|
def _isf(self, q, kappa):
|
|
kapinv = 1/kappa
|
|
kappkapinv = kappa+kapinv
|
|
return np.where(q <= kapinv/kappkapinv,
|
|
-np.log(q*kappkapinv*kappa)*kapinv,
|
|
np.log((1 - q)*kappkapinv/kappa)*kappa)
|
|
|
|
def _stats(self, kappa):
|
|
kapinv = 1/kappa
|
|
mn = kapinv - kappa
|
|
var = kapinv*kapinv + kappa*kappa
|
|
g1 = 2.0*(1-np.power(kappa, 6))/np.power(1+np.power(kappa, 4), 1.5)
|
|
g2 = 6.0*(1+np.power(kappa, 8))/np.power(1+np.power(kappa, 4), 2)
|
|
return mn, var, g1, g2
|
|
|
|
def _entropy(self, kappa):
|
|
return 1 + np.log(kappa+1/kappa)
|
|
|
|
|
|
laplace_asymmetric = laplace_asymmetric_gen(name='laplace_asymmetric')
|
|
|
|
|
|
def _check_fit_input_parameters(dist, data, args, kwds):
|
|
if not isinstance(data, CensoredData):
|
|
data = np.asarray(data)
|
|
|
|
floc = kwds.get('floc', None)
|
|
fscale = kwds.get('fscale', None)
|
|
|
|
num_shapes = len(dist.shapes.split(",")) if dist.shapes else 0
|
|
fshape_keys = []
|
|
fshapes = []
|
|
|
|
# user has many options for fixing the shape, so here we standardize it
|
|
# into 'f' + the number of the shape.
|
|
# Adapted from `_reduce_func` in `_distn_infrastructure.py`:
|
|
if dist.shapes:
|
|
shapes = dist.shapes.replace(',', ' ').split()
|
|
for j, s in enumerate(shapes):
|
|
key = 'f' + str(j)
|
|
names = [key, 'f' + s, 'fix_' + s]
|
|
val = _get_fixed_fit_value(kwds, names)
|
|
fshape_keys.append(key)
|
|
fshapes.append(val)
|
|
if val is not None:
|
|
kwds[key] = val
|
|
|
|
# determine if there are any unknown arguments in kwds
|
|
known_keys = {'loc', 'scale', 'optimizer', 'method',
|
|
'floc', 'fscale', *fshape_keys}
|
|
unknown_keys = set(kwds).difference(known_keys)
|
|
if unknown_keys:
|
|
raise TypeError(f"Unknown keyword arguments: {unknown_keys}.")
|
|
|
|
if len(args) > num_shapes:
|
|
raise TypeError("Too many positional arguments.")
|
|
|
|
if None not in {floc, fscale, *fshapes}:
|
|
# This check is for consistency with `rv_continuous.fit`.
|
|
# Without this check, this function would just return the
|
|
# parameters that were given.
|
|
raise RuntimeError("All parameters fixed. There is nothing to "
|
|
"optimize.")
|
|
|
|
uncensored = data._uncensor() if isinstance(data, CensoredData) else data
|
|
if not np.isfinite(uncensored).all():
|
|
raise ValueError("The data contains non-finite values.")
|
|
|
|
return (data, *fshapes, floc, fscale)
|
|
|
|
|
|
class levy_gen(rv_continuous):
|
|
r"""A Levy continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
levy_stable, levy_l
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `levy` is:
|
|
|
|
.. math::
|
|
|
|
f(x) = \frac{1}{\sqrt{2\pi x^3}} \exp\left(-\frac{1}{2x}\right)
|
|
|
|
for :math:`x > 0`.
|
|
|
|
This is the same as the Levy-stable distribution with :math:`a=1/2` and
|
|
:math:`b=1`.
|
|
|
|
%(after_notes)s
|
|
|
|
Examples
|
|
--------
|
|
>>> import numpy as np
|
|
>>> from scipy.stats import levy
|
|
>>> import matplotlib.pyplot as plt
|
|
>>> fig, ax = plt.subplots(1, 1)
|
|
|
|
Calculate the first four moments:
|
|
|
|
>>> mean, var, skew, kurt = levy.stats(moments='mvsk')
|
|
|
|
Display the probability density function (``pdf``):
|
|
|
|
>>> # `levy` is very heavy-tailed.
|
|
>>> # To show a nice plot, let's cut off the upper 40 percent.
|
|
>>> a, b = levy.ppf(0), levy.ppf(0.6)
|
|
>>> x = np.linspace(a, b, 100)
|
|
>>> ax.plot(x, levy.pdf(x),
|
|
... 'r-', lw=5, alpha=0.6, label='levy pdf')
|
|
|
|
Alternatively, the distribution object can be called (as a function)
|
|
to fix the shape, location and scale parameters. This returns a "frozen"
|
|
RV object holding the given parameters fixed.
|
|
|
|
Freeze the distribution and display the frozen ``pdf``:
|
|
|
|
>>> rv = levy()
|
|
>>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
|
|
|
|
Check accuracy of ``cdf`` and ``ppf``:
|
|
|
|
>>> vals = levy.ppf([0.001, 0.5, 0.999])
|
|
>>> np.allclose([0.001, 0.5, 0.999], levy.cdf(vals))
|
|
True
|
|
|
|
Generate random numbers:
|
|
|
|
>>> r = levy.rvs(size=1000)
|
|
|
|
And compare the histogram:
|
|
|
|
>>> # manual binning to ignore the tail
|
|
>>> bins = np.concatenate((np.linspace(a, b, 20), [np.max(r)]))
|
|
>>> ax.hist(r, bins=bins, density=True, histtype='stepfilled', alpha=0.2)
|
|
>>> ax.set_xlim([x[0], x[-1]])
|
|
>>> ax.legend(loc='best', frameon=False)
|
|
>>> plt.show()
|
|
|
|
"""
|
|
_support_mask = rv_continuous._open_support_mask
|
|
|
|
def _shape_info(self):
|
|
return []
|
|
|
|
def _pdf(self, x):
|
|
# levy.pdf(x) = 1 / (x * sqrt(2*pi*x)) * exp(-1/(2*x))
|
|
return 1 / np.sqrt(2*np.pi*x) / x * np.exp(-1/(2*x))
|
|
|
|
def _cdf(self, x):
|
|
# Equivalent to 2*norm.sf(np.sqrt(1/x))
|
|
return sc.erfc(np.sqrt(0.5 / x))
|
|
|
|
def _sf(self, x):
|
|
return sc.erf(np.sqrt(0.5 / x))
|
|
|
|
def _ppf(self, q):
|
|
# Equivalent to 1.0/(norm.isf(q/2)**2) or 0.5/(erfcinv(q)**2)
|
|
val = _norm_isf(q/2)
|
|
return 1.0 / (val * val)
|
|
|
|
def _isf(self, p):
|
|
return 1/(2*sc.erfinv(p)**2)
|
|
|
|
def _stats(self):
|
|
return np.inf, np.inf, np.nan, np.nan
|
|
|
|
|
|
levy = levy_gen(a=0.0, name="levy")
|
|
|
|
|
|
class levy_l_gen(rv_continuous):
|
|
r"""A left-skewed Levy continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
levy, levy_stable
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `levy_l` is:
|
|
|
|
.. math::
|
|
f(x) = \frac{1}{|x| \sqrt{2\pi |x|}} \exp{ \left(-\frac{1}{2|x|} \right)}
|
|
|
|
for :math:`x < 0`.
|
|
|
|
This is the same as the Levy-stable distribution with :math:`a=1/2` and
|
|
:math:`b=-1`.
|
|
|
|
%(after_notes)s
|
|
|
|
Examples
|
|
--------
|
|
>>> import numpy as np
|
|
>>> from scipy.stats import levy_l
|
|
>>> import matplotlib.pyplot as plt
|
|
>>> fig, ax = plt.subplots(1, 1)
|
|
|
|
Calculate the first four moments:
|
|
|
|
>>> mean, var, skew, kurt = levy_l.stats(moments='mvsk')
|
|
|
|
Display the probability density function (``pdf``):
|
|
|
|
>>> # `levy_l` is very heavy-tailed.
|
|
>>> # To show a nice plot, let's cut off the lower 40 percent.
|
|
>>> a, b = levy_l.ppf(0.4), levy_l.ppf(1)
|
|
>>> x = np.linspace(a, b, 100)
|
|
>>> ax.plot(x, levy_l.pdf(x),
|
|
... 'r-', lw=5, alpha=0.6, label='levy_l pdf')
|
|
|
|
Alternatively, the distribution object can be called (as a function)
|
|
to fix the shape, location and scale parameters. This returns a "frozen"
|
|
RV object holding the given parameters fixed.
|
|
|
|
Freeze the distribution and display the frozen ``pdf``:
|
|
|
|
>>> rv = levy_l()
|
|
>>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
|
|
|
|
Check accuracy of ``cdf`` and ``ppf``:
|
|
|
|
>>> vals = levy_l.ppf([0.001, 0.5, 0.999])
|
|
>>> np.allclose([0.001, 0.5, 0.999], levy_l.cdf(vals))
|
|
True
|
|
|
|
Generate random numbers:
|
|
|
|
>>> r = levy_l.rvs(size=1000)
|
|
|
|
And compare the histogram:
|
|
|
|
>>> # manual binning to ignore the tail
|
|
>>> bins = np.concatenate(([np.min(r)], np.linspace(a, b, 20)))
|
|
>>> ax.hist(r, bins=bins, density=True, histtype='stepfilled', alpha=0.2)
|
|
>>> ax.set_xlim([x[0], x[-1]])
|
|
>>> ax.legend(loc='best', frameon=False)
|
|
>>> plt.show()
|
|
|
|
"""
|
|
_support_mask = rv_continuous._open_support_mask
|
|
|
|
def _shape_info(self):
|
|
return []
|
|
|
|
def _pdf(self, x):
|
|
# levy_l.pdf(x) = 1 / (abs(x) * sqrt(2*pi*abs(x))) * exp(-1/(2*abs(x)))
|
|
ax = abs(x)
|
|
return 1/np.sqrt(2*np.pi*ax)/ax*np.exp(-1/(2*ax))
|
|
|
|
def _cdf(self, x):
|
|
ax = abs(x)
|
|
return 2 * _norm_cdf(1 / np.sqrt(ax)) - 1
|
|
|
|
def _sf(self, x):
|
|
ax = abs(x)
|
|
return 2 * _norm_sf(1 / np.sqrt(ax))
|
|
|
|
def _ppf(self, q):
|
|
val = _norm_ppf((q + 1.0) / 2)
|
|
return -1.0 / (val * val)
|
|
|
|
def _isf(self, p):
|
|
return -1/_norm_isf(p/2)**2
|
|
|
|
def _stats(self):
|
|
return np.inf, np.inf, np.nan, np.nan
|
|
|
|
|
|
levy_l = levy_l_gen(b=0.0, name="levy_l")
|
|
|
|
|
|
class logistic_gen(rv_continuous):
|
|
r"""A logistic (or Sech-squared) continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `logistic` is:
|
|
|
|
.. math::
|
|
|
|
f(x) = \frac{\exp(-x)}
|
|
{(1+\exp(-x))^2}
|
|
|
|
`logistic` is a special case of `genlogistic` with ``c=1``.
|
|
|
|
Remark that the survival function (``logistic.sf``) is equal to the
|
|
Fermi-Dirac distribution describing fermionic statistics.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return []
|
|
|
|
def _rvs(self, size=None, random_state=None):
|
|
return random_state.logistic(size=size)
|
|
|
|
def _pdf(self, x):
|
|
# logistic.pdf(x) = exp(-x) / (1+exp(-x))**2
|
|
return np.exp(self._logpdf(x))
|
|
|
|
def _logpdf(self, x):
|
|
y = -np.abs(x)
|
|
return y - 2. * sc.log1p(np.exp(y))
|
|
|
|
def _cdf(self, x):
|
|
return sc.expit(x)
|
|
|
|
def _logcdf(self, x):
|
|
return sc.log_expit(x)
|
|
|
|
def _ppf(self, q):
|
|
return sc.logit(q)
|
|
|
|
def _sf(self, x):
|
|
return sc.expit(-x)
|
|
|
|
def _logsf(self, x):
|
|
return sc.log_expit(-x)
|
|
|
|
def _isf(self, q):
|
|
return -sc.logit(q)
|
|
|
|
def _stats(self):
|
|
return 0, np.pi*np.pi/3.0, 0, 6.0/5.0
|
|
|
|
def _entropy(self):
|
|
# https://en.wikipedia.org/wiki/Logistic_distribution
|
|
return 2.0
|
|
|
|
@_call_super_mom
|
|
@inherit_docstring_from(rv_continuous)
|
|
def fit(self, data, *args, **kwds):
|
|
if kwds.pop('superfit', False):
|
|
return super().fit(data, *args, **kwds)
|
|
|
|
data, floc, fscale = _check_fit_input_parameters(self, data,
|
|
args, kwds)
|
|
n = len(data)
|
|
|
|
# rv_continuous provided guesses
|
|
loc, scale = self._fitstart(data)
|
|
# these are trumped by user-provided guesses
|
|
loc, scale = kwds.get('loc', loc), kwds.get('scale', scale)
|
|
|
|
# the maximum likelihood estimators `a` and `b` of the location and
|
|
# scale parameters are roots of the two equations described in `func`.
|
|
# Source: Statistical Distributions, 3rd Edition. Evans, Hastings, and
|
|
# Peacock (2000), Page 130
|
|
|
|
def dl_dloc(loc, scale=fscale):
|
|
c = (data - loc) / scale
|
|
return np.sum(sc.expit(c)) - n/2
|
|
|
|
def dl_dscale(scale, loc=floc):
|
|
c = (data - loc) / scale
|
|
return np.sum(c*np.tanh(c/2)) - n
|
|
|
|
def func(params):
|
|
loc, scale = params
|
|
return dl_dloc(loc, scale), dl_dscale(scale, loc)
|
|
|
|
if fscale is not None and floc is None:
|
|
res = optimize.root(dl_dloc, (loc,))
|
|
loc = res.x[0]
|
|
scale = fscale
|
|
elif floc is not None and fscale is None:
|
|
res = optimize.root(dl_dscale, (scale,))
|
|
scale = res.x[0]
|
|
loc = floc
|
|
else:
|
|
res = optimize.root(func, (loc, scale))
|
|
loc, scale = res.x
|
|
|
|
# Note: gh-18176 reported data for which the reported MLE had
|
|
# `scale < 0`. To fix the bug, we return abs(scale). This is OK because
|
|
# `dl_dscale` and `dl_dloc` are even and odd functions of `scale`,
|
|
# respectively, so if `-scale` is a solution, so is `scale`.
|
|
scale = abs(scale)
|
|
return ((loc, scale) if res.success
|
|
else super().fit(data, *args, **kwds))
|
|
|
|
|
|
logistic = logistic_gen(name='logistic')
|
|
|
|
|
|
class loggamma_gen(rv_continuous):
|
|
r"""A log gamma continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `loggamma` is:
|
|
|
|
.. math::
|
|
|
|
f(x, c) = \frac{\exp(c x - \exp(x))}
|
|
{\Gamma(c)}
|
|
|
|
for all :math:`x, c > 0`. Here, :math:`\Gamma` is the
|
|
gamma function (`scipy.special.gamma`).
|
|
|
|
`loggamma` takes ``c`` as a shape parameter for :math:`c`.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
|
|
|
|
def _rvs(self, c, size=None, random_state=None):
|
|
# Use the property of the gamma distribution Gamma(c)
|
|
# Gamma(c) ~ Gamma(c + 1)*U**(1/c),
|
|
# where U is uniform on [0, 1]. (See, e.g.,
|
|
# G. Marsaglia and W.W. Tsang, "A simple method for generating gamma
|
|
# variables", https://doi.org/10.1145/358407.358414)
|
|
# So
|
|
# log(Gamma(c)) ~ log(Gamma(c + 1)) + log(U)/c
|
|
# Generating a sample with this formulation is a bit slower
|
|
# than the more obvious log(Gamma(c)), but it avoids loss
|
|
# of precision when c << 1.
|
|
return (np.log(random_state.gamma(c + 1, size=size))
|
|
+ np.log(random_state.uniform(size=size))/c)
|
|
|
|
def _pdf(self, x, c):
|
|
# loggamma.pdf(x, c) = exp(c*x-exp(x)) / gamma(c)
|
|
return np.exp(c*x-np.exp(x)-sc.gammaln(c))
|
|
|
|
def _logpdf(self, x, c):
|
|
return c*x - np.exp(x) - sc.gammaln(c)
|
|
|
|
def _cdf(self, x, c):
|
|
# This function is gammainc(c, exp(x)), where gammainc(c, z) is
|
|
# the regularized incomplete gamma function.
|
|
# The first term in a series expansion of gamminc(c, z) is
|
|
# z**c/Gamma(c+1); see 6.5.29 of Abramowitz & Stegun (and refer
|
|
# back to 6.5.1, 6.5.2 and 6.5.4 for the relevant notation).
|
|
# This can also be found in the wikipedia article
|
|
# https://en.wikipedia.org/wiki/Incomplete_gamma_function.
|
|
# Here we use that formula when x is sufficiently negative that
|
|
# exp(x) will result in subnormal numbers and lose precision.
|
|
# We evaluate the log of the expression first to allow the possible
|
|
# cancellation of the terms in the division, and then exponentiate.
|
|
# That is,
|
|
# exp(x)**c/Gamma(c+1) = exp(log(exp(x)**c/Gamma(c+1)))
|
|
# = exp(c*x - gammaln(c+1))
|
|
return _lazywhere(x < _LOGXMIN, (x, c),
|
|
lambda x, c: np.exp(c*x - sc.gammaln(c+1)),
|
|
f2=lambda x, c: sc.gammainc(c, np.exp(x)))
|
|
|
|
def _ppf(self, q, c):
|
|
# The expression used when g < _XMIN inverts the one term expansion
|
|
# given in the comments of _cdf().
|
|
g = sc.gammaincinv(c, q)
|
|
return _lazywhere(g < _XMIN, (g, q, c),
|
|
lambda g, q, c: (np.log(q) + sc.gammaln(c+1))/c,
|
|
f2=lambda g, q, c: np.log(g))
|
|
|
|
def _sf(self, x, c):
|
|
# See the comments for _cdf() for how x < _LOGXMIN is handled.
|
|
return _lazywhere(x < _LOGXMIN, (x, c),
|
|
lambda x, c: -np.expm1(c*x - sc.gammaln(c+1)),
|
|
f2=lambda x, c: sc.gammaincc(c, np.exp(x)))
|
|
|
|
def _isf(self, q, c):
|
|
# The expression used when g < _XMIN inverts the complement of
|
|
# the one term expansion given in the comments of _cdf().
|
|
g = sc.gammainccinv(c, q)
|
|
return _lazywhere(g < _XMIN, (g, q, c),
|
|
lambda g, q, c: (np.log1p(-q) + sc.gammaln(c+1))/c,
|
|
f2=lambda g, q, c: np.log(g))
|
|
|
|
def _stats(self, c):
|
|
# See, for example, "A Statistical Study of Log-Gamma Distribution", by
|
|
# Ping Shing Chan (thesis, McMaster University, 1993).
|
|
mean = sc.digamma(c)
|
|
var = sc.polygamma(1, c)
|
|
skewness = sc.polygamma(2, c) / np.power(var, 1.5)
|
|
excess_kurtosis = sc.polygamma(3, c) / (var*var)
|
|
return mean, var, skewness, excess_kurtosis
|
|
|
|
def _entropy(self, c):
|
|
def regular(c):
|
|
h = sc.gammaln(c) - c * sc.digamma(c) + c
|
|
return h
|
|
|
|
def asymptotic(c):
|
|
# using asymptotic expansions for gammaln and psi (see gh-18093)
|
|
term = -0.5*np.log(c) + c**-1./6 - c**-3./90 + c**-5./210
|
|
h = norm._entropy() + term
|
|
return h
|
|
|
|
h = _lazywhere(c >= 45, (c, ), f=asymptotic, f2=regular)
|
|
return h
|
|
|
|
|
|
loggamma = loggamma_gen(name='loggamma')
|
|
|
|
|
|
class loglaplace_gen(rv_continuous):
|
|
r"""A log-Laplace continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `loglaplace` is:
|
|
|
|
.. math::
|
|
|
|
f(x, c) = \begin{cases}\frac{c}{2} x^{ c-1} &\text{for } 0 < x < 1\\
|
|
\frac{c}{2} x^{-c-1} &\text{for } x \ge 1
|
|
\end{cases}
|
|
|
|
for :math:`c > 0`.
|
|
|
|
`loglaplace` takes ``c`` as a shape parameter for :math:`c`.
|
|
|
|
%(after_notes)s
|
|
|
|
Suppose a random variable ``X`` follows the Laplace distribution with
|
|
location ``a`` and scale ``b``. Then ``Y = exp(X)`` follows the
|
|
log-Laplace distribution with ``c = 1 / b`` and ``scale = exp(a)``.
|
|
|
|
References
|
|
----------
|
|
T.J. Kozubowski and K. Podgorski, "A log-Laplace growth rate model",
|
|
The Mathematical Scientist, vol. 28, pp. 49-60, 2003.
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
|
|
|
|
def _pdf(self, x, c):
|
|
# loglaplace.pdf(x, c) = c / 2 * x**(c-1), for 0 < x < 1
|
|
# = c / 2 * x**(-c-1), for x >= 1
|
|
cd2 = c/2.0
|
|
c = np.where(x < 1, c, -c)
|
|
return cd2*x**(c-1)
|
|
|
|
def _cdf(self, x, c):
|
|
return np.where(x < 1, 0.5*x**c, 1-0.5*x**(-c))
|
|
|
|
def _sf(self, x, c):
|
|
return np.where(x < 1, 1 - 0.5*x**c, 0.5*x**(-c))
|
|
|
|
def _ppf(self, q, c):
|
|
return np.where(q < 0.5, (2.0*q)**(1.0/c), (2*(1.0-q))**(-1.0/c))
|
|
|
|
def _isf(self, q, c):
|
|
return np.where(q > 0.5, (2.0*(1.0 - q))**(1.0/c), (2*q)**(-1.0/c))
|
|
|
|
def _munp(self, n, c):
|
|
with np.errstate(divide='ignore'):
|
|
c2, n2 = c**2, n**2
|
|
return np.where(n2 < c2, c2 / (c2 - n2), np.inf)
|
|
|
|
def _entropy(self, c):
|
|
return np.log(2.0/c) + 1.0
|
|
|
|
@_call_super_mom
|
|
@inherit_docstring_from(rv_continuous)
|
|
def fit(self, data, *args, **kwds):
|
|
data, fc, floc, fscale = _check_fit_input_parameters(self, data,
|
|
args, kwds)
|
|
|
|
# Specialize MLE only when location is known.
|
|
if floc is None:
|
|
return super(type(self), self).fit(data, *args, **kwds)
|
|
|
|
# Raise an error if any observation has zero likelihood.
|
|
if np.any(data <= floc):
|
|
raise FitDataError("loglaplace", lower=floc, upper=np.inf)
|
|
|
|
# Remove location from data.
|
|
if floc != 0:
|
|
data = data - floc
|
|
|
|
# When location is zero, the log-Laplace distribution is related to
|
|
# the Laplace distribution in that if X ~ Laplace(loc=a, scale=b),
|
|
# then Y = exp(X) ~ LogLaplace(c=1/b, loc=0, scale=exp(a)). It can
|
|
# be shown that the MLE for Y is the same as the MLE for X = ln(Y).
|
|
# Therefore, we reuse the formulas from laplace.fit() and transform
|
|
# the result back into log-laplace's parameter space.
|
|
a, b = laplace.fit(np.log(data),
|
|
floc=np.log(fscale) if fscale is not None else None,
|
|
fscale=1/fc if fc is not None else None,
|
|
method='mle')
|
|
loc = floc
|
|
scale = np.exp(a) if fscale is None else fscale
|
|
c = 1 / b if fc is None else fc
|
|
return c, loc, scale
|
|
|
|
loglaplace = loglaplace_gen(a=0.0, name='loglaplace')
|
|
|
|
|
|
def _lognorm_logpdf(x, s):
|
|
return _lazywhere(x != 0, (x, s),
|
|
lambda x, s: (-np.log(x)**2 / (2 * s**2)
|
|
- np.log(s * x * np.sqrt(2 * np.pi))),
|
|
-np.inf)
|
|
|
|
|
|
class lognorm_gen(rv_continuous):
|
|
r"""A lognormal continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `lognorm` is:
|
|
|
|
.. math::
|
|
|
|
f(x, s) = \frac{1}{s x \sqrt{2\pi}}
|
|
\exp\left(-\frac{\log^2(x)}{2s^2}\right)
|
|
|
|
for :math:`x > 0`, :math:`s > 0`.
|
|
|
|
`lognorm` takes ``s`` as a shape parameter for :math:`s`.
|
|
|
|
%(after_notes)s
|
|
|
|
Suppose a normally distributed random variable ``X`` has mean ``mu`` and
|
|
standard deviation ``sigma``. Then ``Y = exp(X)`` is lognormally
|
|
distributed with ``s = sigma`` and ``scale = exp(mu)``.
|
|
|
|
%(example)s
|
|
|
|
The logarithm of a log-normally distributed random variable is
|
|
normally distributed:
|
|
|
|
>>> import numpy as np
|
|
>>> import matplotlib.pyplot as plt
|
|
>>> from scipy import stats
|
|
>>> fig, ax = plt.subplots(1, 1)
|
|
>>> mu, sigma = 2, 0.5
|
|
>>> X = stats.norm(loc=mu, scale=sigma)
|
|
>>> Y = stats.lognorm(s=sigma, scale=np.exp(mu))
|
|
>>> x = np.linspace(*X.interval(0.999))
|
|
>>> y = Y.rvs(size=10000)
|
|
>>> ax.plot(x, X.pdf(x), label='X (pdf)')
|
|
>>> ax.hist(np.log(y), density=True, bins=x, label='log(Y) (histogram)')
|
|
>>> ax.legend()
|
|
>>> plt.show()
|
|
|
|
"""
|
|
_support_mask = rv_continuous._open_support_mask
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("s", False, (0, np.inf), (False, False))]
|
|
|
|
def _rvs(self, s, size=None, random_state=None):
|
|
return np.exp(s * random_state.standard_normal(size))
|
|
|
|
def _pdf(self, x, s):
|
|
# lognorm.pdf(x, s) = 1 / (s*x*sqrt(2*pi)) * exp(-1/2*(log(x)/s)**2)
|
|
return np.exp(self._logpdf(x, s))
|
|
|
|
def _logpdf(self, x, s):
|
|
return _lognorm_logpdf(x, s)
|
|
|
|
def _cdf(self, x, s):
|
|
return _norm_cdf(np.log(x) / s)
|
|
|
|
def _logcdf(self, x, s):
|
|
return _norm_logcdf(np.log(x) / s)
|
|
|
|
def _ppf(self, q, s):
|
|
return np.exp(s * _norm_ppf(q))
|
|
|
|
def _sf(self, x, s):
|
|
return _norm_sf(np.log(x) / s)
|
|
|
|
def _logsf(self, x, s):
|
|
return _norm_logsf(np.log(x) / s)
|
|
|
|
def _isf(self, q, s):
|
|
return np.exp(s * _norm_isf(q))
|
|
|
|
def _stats(self, s):
|
|
p = np.exp(s*s)
|
|
mu = np.sqrt(p)
|
|
mu2 = p*(p-1)
|
|
g1 = np.sqrt(p-1)*(2+p)
|
|
g2 = np.polyval([1, 2, 3, 0, -6.0], p)
|
|
return mu, mu2, g1, g2
|
|
|
|
def _entropy(self, s):
|
|
return 0.5 * (1 + np.log(2*np.pi) + 2 * np.log(s))
|
|
|
|
@_call_super_mom
|
|
@extend_notes_in_docstring(rv_continuous, notes="""\
|
|
When `method='MLE'` and
|
|
the location parameter is fixed by using the `floc` argument,
|
|
this function uses explicit formulas for the maximum likelihood
|
|
estimation of the log-normal shape and scale parameters, so the
|
|
`optimizer`, `loc` and `scale` keyword arguments are ignored.
|
|
If the location is free, a likelihood maximum is found by
|
|
setting its partial derivative wrt to location to 0, and
|
|
solving by substituting the analytical expressions of shape
|
|
and scale (or provided parameters).
|
|
See, e.g., equation 3.1 in
|
|
A. Clifford Cohen & Betty Jones Whitten (1980)
|
|
Estimation in the Three-Parameter Lognormal Distribution,
|
|
Journal of the American Statistical Association, 75:370, 399-404
|
|
https://doi.org/10.2307/2287466
|
|
\n\n""")
|
|
def fit(self, data, *args, **kwds):
|
|
if kwds.pop('superfit', False):
|
|
return super().fit(data, *args, **kwds)
|
|
|
|
parameters = _check_fit_input_parameters(self, data, args, kwds)
|
|
data, fshape, floc, fscale = parameters
|
|
data_min = np.min(data)
|
|
|
|
def get_shape_scale(loc):
|
|
# Calculate maximum likelihood scale and shape with analytical
|
|
# formulas unless provided by the user
|
|
if fshape is None or fscale is None:
|
|
lndata = np.log(data - loc)
|
|
scale = fscale or np.exp(lndata.mean())
|
|
shape = fshape or np.sqrt(np.mean((lndata - np.log(scale))**2))
|
|
return shape, scale
|
|
|
|
def dL_dLoc(loc):
|
|
# Derivative of (positive) LL w.r.t. loc
|
|
shape, scale = get_shape_scale(loc)
|
|
shifted = data - loc
|
|
return np.sum((1 + np.log(shifted/scale)/shape**2)/shifted)
|
|
|
|
def ll(loc):
|
|
# (Positive) log-likelihood
|
|
shape, scale = get_shape_scale(loc)
|
|
return -self.nnlf((shape, loc, scale), data)
|
|
|
|
if floc is None:
|
|
# The location must be less than the minimum of the data.
|
|
# Back off a bit to avoid numerical issues.
|
|
spacing = np.spacing(data_min)
|
|
rbrack = data_min - spacing
|
|
|
|
# Find the right end of the bracket by successive doubling of the
|
|
# distance to data_min. We're interested in a maximum LL, so the
|
|
# slope dL_dLoc_rbrack should be negative at the right end.
|
|
# optimization for later: share shape, scale
|
|
dL_dLoc_rbrack = dL_dLoc(rbrack)
|
|
ll_rbrack = ll(rbrack)
|
|
delta = 2 * spacing # 2 * (data_min - rbrack)
|
|
while dL_dLoc_rbrack >= -1e-6:
|
|
rbrack = data_min - delta
|
|
dL_dLoc_rbrack = dL_dLoc(rbrack)
|
|
delta *= 2
|
|
|
|
if not np.isfinite(rbrack) or not np.isfinite(dL_dLoc_rbrack):
|
|
# If we never find a negative slope, either we missed it or the
|
|
# slope is always positive. It's usually the latter,
|
|
# which means
|
|
# loc = data_min - spacing
|
|
# But sometimes when shape and/or scale are fixed there are
|
|
# other issues, so be cautious.
|
|
return super().fit(data, *args, **kwds)
|
|
|
|
# Now find the left end of the bracket. Guess is `rbrack-1`
|
|
# unless that is too small of a difference to resolve. Double
|
|
# the size of the interval until the left end is found.
|
|
lbrack = np.minimum(np.nextafter(rbrack, -np.inf), rbrack-1)
|
|
dL_dLoc_lbrack = dL_dLoc(lbrack)
|
|
delta = 2 * (rbrack - lbrack)
|
|
while (np.isfinite(lbrack) and np.isfinite(dL_dLoc_lbrack)
|
|
and np.sign(dL_dLoc_lbrack) == np.sign(dL_dLoc_rbrack)):
|
|
lbrack = rbrack - delta
|
|
dL_dLoc_lbrack = dL_dLoc(lbrack)
|
|
delta *= 2
|
|
|
|
# I don't recall observing this, but just in case...
|
|
if not np.isfinite(lbrack) or not np.isfinite(dL_dLoc_lbrack):
|
|
return super().fit(data, *args, **kwds)
|
|
|
|
# If we have a valid bracket, find the root
|
|
res = root_scalar(dL_dLoc, bracket=(lbrack, rbrack))
|
|
if not res.converged:
|
|
return super().fit(data, *args, **kwds)
|
|
|
|
# If the slope was positive near the minimum of the data,
|
|
# the maximum LL could be there instead of at the root. Compare
|
|
# the LL of the two points to decide.
|
|
ll_root = ll(res.root)
|
|
loc = res.root if ll_root > ll_rbrack else data_min-spacing
|
|
|
|
else:
|
|
if floc >= data_min:
|
|
raise FitDataError("lognorm", lower=0., upper=np.inf)
|
|
loc = floc
|
|
|
|
shape, scale = get_shape_scale(loc)
|
|
if not (self._argcheck(shape) and scale > 0):
|
|
return super().fit(data, *args, **kwds)
|
|
return shape, loc, scale
|
|
|
|
|
|
lognorm = lognorm_gen(a=0.0, name='lognorm')
|
|
|
|
|
|
class gibrat_gen(rv_continuous):
|
|
r"""A Gibrat continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `gibrat` is:
|
|
|
|
.. math::
|
|
|
|
f(x) = \frac{1}{x \sqrt{2\pi}} \exp(-\frac{1}{2} (\log(x))^2)
|
|
|
|
`gibrat` is a special case of `lognorm` with ``s=1``.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
_support_mask = rv_continuous._open_support_mask
|
|
|
|
def _shape_info(self):
|
|
return []
|
|
|
|
def _rvs(self, size=None, random_state=None):
|
|
return np.exp(random_state.standard_normal(size))
|
|
|
|
def _pdf(self, x):
|
|
# gibrat.pdf(x) = 1/(x*sqrt(2*pi)) * exp(-1/2*(log(x))**2)
|
|
return np.exp(self._logpdf(x))
|
|
|
|
def _logpdf(self, x):
|
|
return _lognorm_logpdf(x, 1.0)
|
|
|
|
def _cdf(self, x):
|
|
return _norm_cdf(np.log(x))
|
|
|
|
def _ppf(self, q):
|
|
return np.exp(_norm_ppf(q))
|
|
|
|
def _sf(self, x):
|
|
return _norm_sf(np.log(x))
|
|
|
|
def _isf(self, p):
|
|
return np.exp(_norm_isf(p))
|
|
|
|
def _stats(self):
|
|
p = np.e
|
|
mu = np.sqrt(p)
|
|
mu2 = p * (p - 1)
|
|
g1 = np.sqrt(p - 1) * (2 + p)
|
|
g2 = np.polyval([1, 2, 3, 0, -6.0], p)
|
|
return mu, mu2, g1, g2
|
|
|
|
def _entropy(self):
|
|
return 0.5 * np.log(2 * np.pi) + 0.5
|
|
|
|
|
|
gibrat = gibrat_gen(a=0.0, name='gibrat')
|
|
|
|
|
|
class maxwell_gen(rv_continuous):
|
|
r"""A Maxwell continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
A special case of a `chi` distribution, with ``df=3``, ``loc=0.0``,
|
|
and given ``scale = a``, where ``a`` is the parameter used in the
|
|
Mathworld description [1]_.
|
|
|
|
The probability density function for `maxwell` is:
|
|
|
|
.. math::
|
|
|
|
f(x) = \sqrt{2/\pi}x^2 \exp(-x^2/2)
|
|
|
|
for :math:`x >= 0`.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
.. [1] http://mathworld.wolfram.com/MaxwellDistribution.html
|
|
|
|
%(example)s
|
|
"""
|
|
def _shape_info(self):
|
|
return []
|
|
|
|
def _rvs(self, size=None, random_state=None):
|
|
return chi.rvs(3.0, size=size, random_state=random_state)
|
|
|
|
def _pdf(self, x):
|
|
# maxwell.pdf(x) = sqrt(2/pi)x**2 * exp(-x**2/2)
|
|
return _SQRT_2_OVER_PI*x*x*np.exp(-x*x/2.0)
|
|
|
|
def _logpdf(self, x):
|
|
# Allow x=0 without 'divide by zero' warnings
|
|
with np.errstate(divide='ignore'):
|
|
return _LOG_SQRT_2_OVER_PI + 2*np.log(x) - 0.5*x*x
|
|
|
|
def _cdf(self, x):
|
|
return sc.gammainc(1.5, x*x/2.0)
|
|
|
|
def _ppf(self, q):
|
|
return np.sqrt(2*sc.gammaincinv(1.5, q))
|
|
|
|
def _sf(self, x):
|
|
return sc.gammaincc(1.5, x*x/2.0)
|
|
|
|
def _isf(self, q):
|
|
return np.sqrt(2*sc.gammainccinv(1.5, q))
|
|
|
|
def _stats(self):
|
|
val = 3*np.pi-8
|
|
return (2*np.sqrt(2.0/np.pi),
|
|
3-8/np.pi,
|
|
np.sqrt(2)*(32-10*np.pi)/val**1.5,
|
|
(-12*np.pi*np.pi + 160*np.pi - 384) / val**2.0)
|
|
|
|
def _entropy(self):
|
|
return _EULER + 0.5*np.log(2*np.pi)-0.5
|
|
|
|
|
|
maxwell = maxwell_gen(a=0.0, name='maxwell')
|
|
|
|
|
|
class mielke_gen(rv_continuous):
|
|
r"""A Mielke Beta-Kappa / Dagum continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `mielke` is:
|
|
|
|
.. math::
|
|
|
|
f(x, k, s) = \frac{k x^{k-1}}{(1+x^s)^{1+k/s}}
|
|
|
|
for :math:`x > 0` and :math:`k, s > 0`. The distribution is sometimes
|
|
called Dagum distribution ([2]_). It was already defined in [3]_, called
|
|
a Burr Type III distribution (`burr` with parameters ``c=s`` and
|
|
``d=k/s``).
|
|
|
|
`mielke` takes ``k`` and ``s`` as shape parameters.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
.. [1] Mielke, P.W., 1973 "Another Family of Distributions for Describing
|
|
and Analyzing Precipitation Data." J. Appl. Meteor., 12, 275-280
|
|
.. [2] Dagum, C., 1977 "A new model for personal income distribution."
|
|
Economie Appliquee, 33, 327-367.
|
|
.. [3] Burr, I. W. "Cumulative frequency functions", Annals of
|
|
Mathematical Statistics, 13(2), pp 215-232 (1942).
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
ik = _ShapeInfo("k", False, (0, np.inf), (False, False))
|
|
i_s = _ShapeInfo("s", False, (0, np.inf), (False, False))
|
|
return [ik, i_s]
|
|
|
|
def _pdf(self, x, k, s):
|
|
return k*x**(k-1.0) / (1.0+x**s)**(1.0+k*1.0/s)
|
|
|
|
def _logpdf(self, x, k, s):
|
|
# Allow x=0 without 'divide by zero' warnings.
|
|
with np.errstate(divide='ignore'):
|
|
return np.log(k) + np.log(x)*(k - 1) - np.log1p(x**s)*(1 + k/s)
|
|
|
|
def _cdf(self, x, k, s):
|
|
return x**k / (1.0+x**s)**(k*1.0/s)
|
|
|
|
def _ppf(self, q, k, s):
|
|
qsk = pow(q, s*1.0/k)
|
|
return pow(qsk/(1.0-qsk), 1.0/s)
|
|
|
|
def _munp(self, n, k, s):
|
|
def nth_moment(n, k, s):
|
|
# n-th moment is defined for -k < n < s
|
|
return sc.gamma((k+n)/s)*sc.gamma(1-n/s)/sc.gamma(k/s)
|
|
|
|
return _lazywhere(n < s, (n, k, s), nth_moment, np.inf)
|
|
|
|
|
|
mielke = mielke_gen(a=0.0, name='mielke')
|
|
|
|
|
|
class kappa4_gen(rv_continuous):
|
|
r"""Kappa 4 parameter distribution.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for kappa4 is:
|
|
|
|
.. math::
|
|
|
|
f(x, h, k) = (1 - k x)^{1/k - 1} (1 - h (1 - k x)^{1/k})^{1/h-1}
|
|
|
|
if :math:`h` and :math:`k` are not equal to 0.
|
|
|
|
If :math:`h` or :math:`k` are zero then the pdf can be simplified:
|
|
|
|
h = 0 and k != 0::
|
|
|
|
kappa4.pdf(x, h, k) = (1.0 - k*x)**(1.0/k - 1.0)*
|
|
exp(-(1.0 - k*x)**(1.0/k))
|
|
|
|
h != 0 and k = 0::
|
|
|
|
kappa4.pdf(x, h, k) = exp(-x)*(1.0 - h*exp(-x))**(1.0/h - 1.0)
|
|
|
|
h = 0 and k = 0::
|
|
|
|
kappa4.pdf(x, h, k) = exp(-x)*exp(-exp(-x))
|
|
|
|
kappa4 takes :math:`h` and :math:`k` as shape parameters.
|
|
|
|
The kappa4 distribution returns other distributions when certain
|
|
:math:`h` and :math:`k` values are used.
|
|
|
|
+------+-------------+----------------+------------------+
|
|
| h | k=0.0 | k=1.0 | -inf<=k<=inf |
|
|
+======+=============+================+==================+
|
|
| -1.0 | Logistic | | Generalized |
|
|
| | | | Logistic(1) |
|
|
| | | | |
|
|
| | logistic(x) | | |
|
|
+------+-------------+----------------+------------------+
|
|
| 0.0 | Gumbel | Reverse | Generalized |
|
|
| | | Exponential(2) | Extreme Value |
|
|
| | | | |
|
|
| | gumbel_r(x) | | genextreme(x, k) |
|
|
+------+-------------+----------------+------------------+
|
|
| 1.0 | Exponential | Uniform | Generalized |
|
|
| | | | Pareto |
|
|
| | | | |
|
|
| | expon(x) | uniform(x) | genpareto(x, -k) |
|
|
+------+-------------+----------------+------------------+
|
|
|
|
(1) There are at least five generalized logistic distributions.
|
|
Four are described here:
|
|
https://en.wikipedia.org/wiki/Generalized_logistic_distribution
|
|
The "fifth" one is the one kappa4 should match which currently
|
|
isn't implemented in scipy:
|
|
https://en.wikipedia.org/wiki/Talk:Generalized_logistic_distribution
|
|
https://www.mathwave.com/help/easyfit/html/analyses/distributions/gen_logistic.html
|
|
(2) This distribution is currently not in scipy.
|
|
|
|
References
|
|
----------
|
|
J.C. Finney, "Optimization of a Skewed Logistic Distribution With Respect
|
|
to the Kolmogorov-Smirnov Test", A Dissertation Submitted to the Graduate
|
|
Faculty of the Louisiana State University and Agricultural and Mechanical
|
|
College, (August, 2004),
|
|
https://digitalcommons.lsu.edu/gradschool_dissertations/3672
|
|
|
|
J.R.M. Hosking, "The four-parameter kappa distribution". IBM J. Res.
|
|
Develop. 38 (3), 25 1-258 (1994).
|
|
|
|
B. Kumphon, A. Kaew-Man, P. Seenoi, "A Rainfall Distribution for the Lampao
|
|
Site in the Chi River Basin, Thailand", Journal of Water Resource and
|
|
Protection, vol. 4, 866-869, (2012).
|
|
:doi:`10.4236/jwarp.2012.410101`
|
|
|
|
C. Winchester, "On Estimation of the Four-Parameter Kappa Distribution", A
|
|
Thesis Submitted to Dalhousie University, Halifax, Nova Scotia, (March
|
|
2000).
|
|
http://www.nlc-bnc.ca/obj/s4/f2/dsk2/ftp01/MQ57336.pdf
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _argcheck(self, h, k):
|
|
shape = np.broadcast_arrays(h, k)[0].shape
|
|
return np.full(shape, fill_value=True)
|
|
|
|
def _shape_info(self):
|
|
ih = _ShapeInfo("h", False, (-np.inf, np.inf), (False, False))
|
|
ik = _ShapeInfo("k", False, (-np.inf, np.inf), (False, False))
|
|
return [ih, ik]
|
|
|
|
def _get_support(self, h, k):
|
|
condlist = [np.logical_and(h > 0, k > 0),
|
|
np.logical_and(h > 0, k == 0),
|
|
np.logical_and(h > 0, k < 0),
|
|
np.logical_and(h <= 0, k > 0),
|
|
np.logical_and(h <= 0, k == 0),
|
|
np.logical_and(h <= 0, k < 0)]
|
|
|
|
def f0(h, k):
|
|
return (1.0 - np.float_power(h, -k))/k
|
|
|
|
def f1(h, k):
|
|
return np.log(h)
|
|
|
|
def f3(h, k):
|
|
a = np.empty(np.shape(h))
|
|
a[:] = -np.inf
|
|
return a
|
|
|
|
def f5(h, k):
|
|
return 1.0/k
|
|
|
|
_a = _lazyselect(condlist,
|
|
[f0, f1, f0, f3, f3, f5],
|
|
[h, k],
|
|
default=np.nan)
|
|
|
|
def f0(h, k):
|
|
return 1.0/k
|
|
|
|
def f1(h, k):
|
|
a = np.empty(np.shape(h))
|
|
a[:] = np.inf
|
|
return a
|
|
|
|
_b = _lazyselect(condlist,
|
|
[f0, f1, f1, f0, f1, f1],
|
|
[h, k],
|
|
default=np.nan)
|
|
return _a, _b
|
|
|
|
def _pdf(self, x, h, k):
|
|
# kappa4.pdf(x, h, k) = (1.0 - k*x)**(1.0/k - 1.0)*
|
|
# (1.0 - h*(1.0 - k*x)**(1.0/k))**(1.0/h-1)
|
|
return np.exp(self._logpdf(x, h, k))
|
|
|
|
def _logpdf(self, x, h, k):
|
|
condlist = [np.logical_and(h != 0, k != 0),
|
|
np.logical_and(h == 0, k != 0),
|
|
np.logical_and(h != 0, k == 0),
|
|
np.logical_and(h == 0, k == 0)]
|
|
|
|
def f0(x, h, k):
|
|
'''pdf = (1.0 - k*x)**(1.0/k - 1.0)*(
|
|
1.0 - h*(1.0 - k*x)**(1.0/k))**(1.0/h-1.0)
|
|
logpdf = ...
|
|
'''
|
|
return (sc.xlog1py(1.0/k - 1.0, -k*x) +
|
|
sc.xlog1py(1.0/h - 1.0, -h*(1.0 - k*x)**(1.0/k)))
|
|
|
|
def f1(x, h, k):
|
|
'''pdf = (1.0 - k*x)**(1.0/k - 1.0)*np.exp(-(
|
|
1.0 - k*x)**(1.0/k))
|
|
logpdf = ...
|
|
'''
|
|
return sc.xlog1py(1.0/k - 1.0, -k*x) - (1.0 - k*x)**(1.0/k)
|
|
|
|
def f2(x, h, k):
|
|
'''pdf = np.exp(-x)*(1.0 - h*np.exp(-x))**(1.0/h - 1.0)
|
|
logpdf = ...
|
|
'''
|
|
return -x + sc.xlog1py(1.0/h - 1.0, -h*np.exp(-x))
|
|
|
|
def f3(x, h, k):
|
|
'''pdf = np.exp(-x-np.exp(-x))
|
|
logpdf = ...
|
|
'''
|
|
return -x - np.exp(-x)
|
|
|
|
return _lazyselect(condlist,
|
|
[f0, f1, f2, f3],
|
|
[x, h, k],
|
|
default=np.nan)
|
|
|
|
def _cdf(self, x, h, k):
|
|
return np.exp(self._logcdf(x, h, k))
|
|
|
|
def _logcdf(self, x, h, k):
|
|
condlist = [np.logical_and(h != 0, k != 0),
|
|
np.logical_and(h == 0, k != 0),
|
|
np.logical_and(h != 0, k == 0),
|
|
np.logical_and(h == 0, k == 0)]
|
|
|
|
def f0(x, h, k):
|
|
'''cdf = (1.0 - h*(1.0 - k*x)**(1.0/k))**(1.0/h)
|
|
logcdf = ...
|
|
'''
|
|
return (1.0/h)*sc.log1p(-h*(1.0 - k*x)**(1.0/k))
|
|
|
|
def f1(x, h, k):
|
|
'''cdf = np.exp(-(1.0 - k*x)**(1.0/k))
|
|
logcdf = ...
|
|
'''
|
|
return -(1.0 - k*x)**(1.0/k)
|
|
|
|
def f2(x, h, k):
|
|
'''cdf = (1.0 - h*np.exp(-x))**(1.0/h)
|
|
logcdf = ...
|
|
'''
|
|
return (1.0/h)*sc.log1p(-h*np.exp(-x))
|
|
|
|
def f3(x, h, k):
|
|
'''cdf = np.exp(-np.exp(-x))
|
|
logcdf = ...
|
|
'''
|
|
return -np.exp(-x)
|
|
|
|
return _lazyselect(condlist,
|
|
[f0, f1, f2, f3],
|
|
[x, h, k],
|
|
default=np.nan)
|
|
|
|
def _ppf(self, q, h, k):
|
|
condlist = [np.logical_and(h != 0, k != 0),
|
|
np.logical_and(h == 0, k != 0),
|
|
np.logical_and(h != 0, k == 0),
|
|
np.logical_and(h == 0, k == 0)]
|
|
|
|
def f0(q, h, k):
|
|
return 1.0/k*(1.0 - ((1.0 - (q**h))/h)**k)
|
|
|
|
def f1(q, h, k):
|
|
return 1.0/k*(1.0 - (-np.log(q))**k)
|
|
|
|
def f2(q, h, k):
|
|
'''ppf = -np.log((1.0 - (q**h))/h)
|
|
'''
|
|
return -sc.log1p(-(q**h)) + np.log(h)
|
|
|
|
def f3(q, h, k):
|
|
return -np.log(-np.log(q))
|
|
|
|
return _lazyselect(condlist,
|
|
[f0, f1, f2, f3],
|
|
[q, h, k],
|
|
default=np.nan)
|
|
|
|
def _get_stats_info(self, h, k):
|
|
condlist = [
|
|
np.logical_and(h < 0, k >= 0),
|
|
k < 0,
|
|
]
|
|
|
|
def f0(h, k):
|
|
return (-1.0/h*k).astype(int)
|
|
|
|
def f1(h, k):
|
|
return (-1.0/k).astype(int)
|
|
|
|
return _lazyselect(condlist, [f0, f1], [h, k], default=5)
|
|
|
|
def _stats(self, h, k):
|
|
maxr = self._get_stats_info(h, k)
|
|
outputs = [None if np.any(r < maxr) else np.nan for r in range(1, 5)]
|
|
return outputs[:]
|
|
|
|
def _mom1_sc(self, m, *args):
|
|
maxr = self._get_stats_info(args[0], args[1])
|
|
if m >= maxr:
|
|
return np.nan
|
|
return integrate.quad(self._mom_integ1, 0, 1, args=(m,)+args)[0]
|
|
|
|
|
|
kappa4 = kappa4_gen(name='kappa4')
|
|
|
|
|
|
class kappa3_gen(rv_continuous):
|
|
r"""Kappa 3 parameter distribution.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `kappa3` is:
|
|
|
|
.. math::
|
|
|
|
f(x, a) = a (a + x^a)^{-(a + 1)/a}
|
|
|
|
for :math:`x > 0` and :math:`a > 0`.
|
|
|
|
`kappa3` takes ``a`` as a shape parameter for :math:`a`.
|
|
|
|
References
|
|
----------
|
|
P.W. Mielke and E.S. Johnson, "Three-Parameter Kappa Distribution Maximum
|
|
Likelihood and Likelihood Ratio Tests", Methods in Weather Research,
|
|
701-707, (September, 1973),
|
|
:doi:`10.1175/1520-0493(1973)101<0701:TKDMLE>2.3.CO;2`
|
|
|
|
B. Kumphon, "Maximum Entropy and Maximum Likelihood Estimation for the
|
|
Three-Parameter Kappa Distribution", Open Journal of Statistics, vol 2,
|
|
415-419 (2012), :doi:`10.4236/ojs.2012.24050`
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
|
|
|
|
def _pdf(self, x, a):
|
|
# kappa3.pdf(x, a) = a*(a + x**a)**(-(a + 1)/a), for x > 0
|
|
return a*(a + x**a)**(-1.0/a-1)
|
|
|
|
def _cdf(self, x, a):
|
|
return x*(a + x**a)**(-1.0/a)
|
|
|
|
def _sf(self, x, a):
|
|
x, a = np.broadcast_arrays(x, a) # some code paths pass scalars
|
|
sf = super()._sf(x, a)
|
|
|
|
# When the SF is small, another formulation is typically more accurate.
|
|
# However, it blows up for large `a`, so use it only if it also returns
|
|
# a small value of the SF.
|
|
cutoff = 0.01
|
|
i = sf < cutoff
|
|
sf2 = -sc.expm1(sc.xlog1py(-1.0 / a[i], a[i] * x[i]**-a[i]))
|
|
i2 = sf2 > cutoff
|
|
sf2[i2] = sf[i][i2] # replace bad values with original values
|
|
|
|
sf[i] = sf2
|
|
return sf
|
|
|
|
def _ppf(self, q, a):
|
|
return (a/(q**-a - 1.0))**(1.0/a)
|
|
|
|
def _isf(self, q, a):
|
|
lg = sc.xlog1py(-a, -q)
|
|
denom = sc.expm1(lg)
|
|
return (a / denom)**(1.0 / a)
|
|
|
|
def _stats(self, a):
|
|
outputs = [None if np.any(i < a) else np.nan for i in range(1, 5)]
|
|
return outputs[:]
|
|
|
|
def _mom1_sc(self, m, *args):
|
|
if np.any(m >= args[0]):
|
|
return np.nan
|
|
return integrate.quad(self._mom_integ1, 0, 1, args=(m,)+args)[0]
|
|
|
|
|
|
kappa3 = kappa3_gen(a=0.0, name='kappa3')
|
|
|
|
|
|
class moyal_gen(rv_continuous):
|
|
r"""A Moyal continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `moyal` is:
|
|
|
|
.. math::
|
|
|
|
f(x) = \exp(-(x + \exp(-x))/2) / \sqrt{2\pi}
|
|
|
|
for a real number :math:`x`.
|
|
|
|
%(after_notes)s
|
|
|
|
This distribution has utility in high-energy physics and radiation
|
|
detection. It describes the energy loss of a charged relativistic
|
|
particle due to ionization of the medium [1]_. It also provides an
|
|
approximation for the Landau distribution. For an in depth description
|
|
see [2]_. For additional description, see [3]_.
|
|
|
|
References
|
|
----------
|
|
.. [1] J.E. Moyal, "XXX. Theory of ionization fluctuations",
|
|
The London, Edinburgh, and Dublin Philosophical Magazine
|
|
and Journal of Science, vol 46, 263-280, (1955).
|
|
:doi:`10.1080/14786440308521076` (gated)
|
|
.. [2] G. Cordeiro et al., "The beta Moyal: a useful skew distribution",
|
|
International Journal of Research and Reviews in Applied Sciences,
|
|
vol 10, 171-192, (2012).
|
|
http://www.arpapress.com/Volumes/Vol10Issue2/IJRRAS_10_2_02.pdf
|
|
.. [3] C. Walck, "Handbook on Statistical Distributions for
|
|
Experimentalists; International Report SUF-PFY/96-01", Chapter 26,
|
|
University of Stockholm: Stockholm, Sweden, (2007).
|
|
http://www.stat.rice.edu/~dobelman/textfiles/DistributionsHandbook.pdf
|
|
|
|
.. versionadded:: 1.1.0
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return []
|
|
|
|
def _rvs(self, size=None, random_state=None):
|
|
u1 = gamma.rvs(a=0.5, scale=2, size=size,
|
|
random_state=random_state)
|
|
return -np.log(u1)
|
|
|
|
def _pdf(self, x):
|
|
return np.exp(-0.5 * (x + np.exp(-x))) / np.sqrt(2*np.pi)
|
|
|
|
def _cdf(self, x):
|
|
return sc.erfc(np.exp(-0.5 * x) / np.sqrt(2))
|
|
|
|
def _sf(self, x):
|
|
return sc.erf(np.exp(-0.5 * x) / np.sqrt(2))
|
|
|
|
def _ppf(self, x):
|
|
return -np.log(2 * sc.erfcinv(x)**2)
|
|
|
|
def _stats(self):
|
|
mu = np.log(2) + np.euler_gamma
|
|
mu2 = np.pi**2 / 2
|
|
g1 = 28 * np.sqrt(2) * sc.zeta(3) / np.pi**3
|
|
g2 = 4.
|
|
return mu, mu2, g1, g2
|
|
|
|
def _munp(self, n):
|
|
if n == 1.0:
|
|
return np.log(2) + np.euler_gamma
|
|
elif n == 2.0:
|
|
return np.pi**2 / 2 + (np.log(2) + np.euler_gamma)**2
|
|
elif n == 3.0:
|
|
tmp1 = 1.5 * np.pi**2 * (np.log(2)+np.euler_gamma)
|
|
tmp2 = (np.log(2)+np.euler_gamma)**3
|
|
tmp3 = 14 * sc.zeta(3)
|
|
return tmp1 + tmp2 + tmp3
|
|
elif n == 4.0:
|
|
tmp1 = 4 * 14 * sc.zeta(3) * (np.log(2) + np.euler_gamma)
|
|
tmp2 = 3 * np.pi**2 * (np.log(2) + np.euler_gamma)**2
|
|
tmp3 = (np.log(2) + np.euler_gamma)**4
|
|
tmp4 = 7 * np.pi**4 / 4
|
|
return tmp1 + tmp2 + tmp3 + tmp4
|
|
else:
|
|
# return generic for higher moments
|
|
# return rv_continuous._mom1_sc(self, n, b)
|
|
return self._mom1_sc(n)
|
|
|
|
|
|
moyal = moyal_gen(name="moyal")
|
|
|
|
|
|
class nakagami_gen(rv_continuous):
|
|
r"""A Nakagami continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `nakagami` is:
|
|
|
|
.. math::
|
|
|
|
f(x, \nu) = \frac{2 \nu^\nu}{\Gamma(\nu)} x^{2\nu-1} \exp(-\nu x^2)
|
|
|
|
for :math:`x >= 0`, :math:`\nu > 0`. The distribution was introduced in
|
|
[2]_, see also [1]_ for further information.
|
|
|
|
`nakagami` takes ``nu`` as a shape parameter for :math:`\nu`.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
.. [1] "Nakagami distribution", Wikipedia
|
|
https://en.wikipedia.org/wiki/Nakagami_distribution
|
|
.. [2] M. Nakagami, "The m-distribution - A general formula of intensity
|
|
distribution of rapid fading", Statistical methods in radio wave
|
|
propagation, Pergamon Press, 1960, 3-36.
|
|
:doi:`10.1016/B978-0-08-009306-2.50005-4`
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _argcheck(self, nu):
|
|
return nu > 0
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("nu", False, (0, np.inf), (False, False))]
|
|
|
|
def _pdf(self, x, nu):
|
|
return np.exp(self._logpdf(x, nu))
|
|
|
|
def _logpdf(self, x, nu):
|
|
# nakagami.pdf(x, nu) = 2 * nu**nu / gamma(nu) *
|
|
# x**(2*nu-1) * exp(-nu*x**2)
|
|
return (np.log(2) + sc.xlogy(nu, nu) - sc.gammaln(nu) +
|
|
sc.xlogy(2*nu - 1, x) - nu*x**2)
|
|
|
|
def _cdf(self, x, nu):
|
|
return sc.gammainc(nu, nu*x*x)
|
|
|
|
def _ppf(self, q, nu):
|
|
return np.sqrt(1.0/nu*sc.gammaincinv(nu, q))
|
|
|
|
def _sf(self, x, nu):
|
|
return sc.gammaincc(nu, nu*x*x)
|
|
|
|
def _isf(self, p, nu):
|
|
return np.sqrt(1/nu * sc.gammainccinv(nu, p))
|
|
|
|
def _stats(self, nu):
|
|
mu = sc.poch(nu, 0.5)/np.sqrt(nu)
|
|
mu2 = 1.0-mu*mu
|
|
g1 = mu * (1 - 4*nu*mu2) / 2.0 / nu / np.power(mu2, 1.5)
|
|
g2 = -6*mu**4*nu + (8*nu-2)*mu**2-2*nu + 1
|
|
g2 /= nu*mu2**2.0
|
|
return mu, mu2, g1, g2
|
|
|
|
def _entropy(self, nu):
|
|
shape = np.shape(nu)
|
|
# because somehow this isn't taken care of by the infrastructure...
|
|
nu = np.atleast_1d(nu)
|
|
A = sc.gammaln(nu)
|
|
B = nu - (nu - 0.5) * sc.digamma(nu)
|
|
C = -0.5 * np.log(nu) - np.log(2)
|
|
h = A + B + C
|
|
# This is the asymptotic sum of A and B (see gh-17868)
|
|
norm_entropy = stats.norm._entropy()
|
|
# Above, this is lost to rounding error for large nu, so use the
|
|
# asymptotic sum when the approximation becomes accurate
|
|
i = nu > 5e4 # roundoff error ~ approximation error
|
|
# -1 / (12 * nu) is the O(1/nu) term; see gh-17929
|
|
h[i] = C[i] + norm_entropy - 1/(12*nu[i])
|
|
return h.reshape(shape)[()]
|
|
|
|
def _rvs(self, nu, size=None, random_state=None):
|
|
# this relationship can be found in [1] or by a direct calculation
|
|
return np.sqrt(random_state.standard_gamma(nu, size=size) / nu)
|
|
|
|
def _fitstart(self, data, args=None):
|
|
if isinstance(data, CensoredData):
|
|
data = data._uncensor()
|
|
if args is None:
|
|
args = (1.0,) * self.numargs
|
|
# Analytical justified estimates
|
|
# see: https://docs.scipy.org/doc/scipy/reference/tutorial/stats/continuous_nakagami.html
|
|
loc = np.min(data)
|
|
scale = np.sqrt(np.sum((data - loc)**2) / len(data))
|
|
return args + (loc, scale)
|
|
|
|
|
|
nakagami = nakagami_gen(a=0.0, name="nakagami")
|
|
|
|
|
|
# The function name ncx2 is an abbreviation for noncentral chi squared.
|
|
def _ncx2_log_pdf(x, df, nc):
|
|
# We use (xs**2 + ns**2)/2 = (xs - ns)**2/2 + xs*ns, and include the
|
|
# factor of exp(-xs*ns) into the ive function to improve numerical
|
|
# stability at large values of xs. See also `rice.pdf`.
|
|
df2 = df/2.0 - 1.0
|
|
xs, ns = np.sqrt(x), np.sqrt(nc)
|
|
res = sc.xlogy(df2/2.0, x/nc) - 0.5*(xs - ns)**2
|
|
corr = sc.ive(df2, xs*ns) / 2.0
|
|
# Return res + np.log(corr) avoiding np.log(0)
|
|
return _lazywhere(
|
|
corr > 0,
|
|
(res, corr),
|
|
f=lambda r, c: r + np.log(c),
|
|
fillvalue=-np.inf)
|
|
|
|
|
|
class ncx2_gen(rv_continuous):
|
|
r"""A non-central chi-squared continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `ncx2` is:
|
|
|
|
.. math::
|
|
|
|
f(x, k, \lambda) = \frac{1}{2} \exp(-(\lambda+x)/2)
|
|
(x/\lambda)^{(k-2)/4} I_{(k-2)/2}(\sqrt{\lambda x})
|
|
|
|
for :math:`x >= 0`, :math:`k > 0` and :math:`\lambda \ge 0`.
|
|
:math:`k` specifies the degrees of freedom (denoted ``df`` in the
|
|
implementation) and :math:`\lambda` is the non-centrality parameter
|
|
(denoted ``nc`` in the implementation). :math:`I_\nu` denotes the
|
|
modified Bessel function of first order of degree :math:`\nu`
|
|
(`scipy.special.iv`).
|
|
|
|
`ncx2` takes ``df`` and ``nc`` as shape parameters.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _argcheck(self, df, nc):
|
|
return (df > 0) & np.isfinite(df) & (nc >= 0)
|
|
|
|
def _shape_info(self):
|
|
idf = _ShapeInfo("df", False, (0, np.inf), (False, False))
|
|
inc = _ShapeInfo("nc", False, (0, np.inf), (True, False))
|
|
return [idf, inc]
|
|
|
|
def _rvs(self, df, nc, size=None, random_state=None):
|
|
return random_state.noncentral_chisquare(df, nc, size)
|
|
|
|
def _logpdf(self, x, df, nc):
|
|
cond = np.ones_like(x, dtype=bool) & (nc != 0)
|
|
return _lazywhere(cond, (x, df, nc), f=_ncx2_log_pdf,
|
|
f2=lambda x, df, _: chi2._logpdf(x, df))
|
|
|
|
def _pdf(self, x, df, nc):
|
|
cond = np.ones_like(x, dtype=bool) & (nc != 0)
|
|
with np.errstate(over='ignore'): # see gh-17432
|
|
return _lazywhere(cond, (x, df, nc), f=scu._ncx2_pdf,
|
|
f2=lambda x, df, _: chi2._pdf(x, df))
|
|
|
|
def _cdf(self, x, df, nc):
|
|
cond = np.ones_like(x, dtype=bool) & (nc != 0)
|
|
with np.errstate(over='ignore'): # see gh-17432
|
|
return _lazywhere(cond, (x, df, nc), f=scu._ncx2_cdf,
|
|
f2=lambda x, df, _: chi2._cdf(x, df))
|
|
|
|
def _ppf(self, q, df, nc):
|
|
cond = np.ones_like(q, dtype=bool) & (nc != 0)
|
|
with np.errstate(over='ignore'): # see gh-17432
|
|
return _lazywhere(cond, (q, df, nc), f=scu._ncx2_ppf,
|
|
f2=lambda x, df, _: chi2._ppf(x, df))
|
|
|
|
def _sf(self, x, df, nc):
|
|
cond = np.ones_like(x, dtype=bool) & (nc != 0)
|
|
with np.errstate(over='ignore'): # see gh-17432
|
|
return _lazywhere(cond, (x, df, nc), f=scu._ncx2_sf,
|
|
f2=lambda x, df, _: chi2._sf(x, df))
|
|
|
|
def _isf(self, x, df, nc):
|
|
cond = np.ones_like(x, dtype=bool) & (nc != 0)
|
|
with np.errstate(over='ignore'): # see gh-17432
|
|
return _lazywhere(cond, (x, df, nc), f=scu._ncx2_isf,
|
|
f2=lambda x, df, _: chi2._isf(x, df))
|
|
|
|
def _stats(self, df, nc):
|
|
_ncx2_mean = df + nc
|
|
def k_plus_cl(k, l, c):
|
|
return k + c*l
|
|
_ncx2_variance = 2.0 * k_plus_cl(df, nc, 2.0)
|
|
_ncx2_skewness = (np.sqrt(8.0) * k_plus_cl(df, nc, 3) /
|
|
np.sqrt(k_plus_cl(df, nc, 2.0)**3))
|
|
_ncx2_kurtosis_excess = (12.0 * k_plus_cl(df, nc, 4.0) /
|
|
k_plus_cl(df, nc, 2.0)**2)
|
|
return (
|
|
_ncx2_mean,
|
|
_ncx2_variance,
|
|
_ncx2_skewness,
|
|
_ncx2_kurtosis_excess,
|
|
)
|
|
|
|
|
|
ncx2 = ncx2_gen(a=0.0, name='ncx2')
|
|
|
|
|
|
class ncf_gen(rv_continuous):
|
|
r"""A non-central F distribution continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
scipy.stats.f : Fisher distribution
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `ncf` is:
|
|
|
|
.. math::
|
|
|
|
f(x, n_1, n_2, \lambda) =
|
|
\exp\left(\frac{\lambda}{2} +
|
|
\lambda n_1 \frac{x}{2(n_1 x + n_2)}
|
|
\right)
|
|
n_1^{n_1/2} n_2^{n_2/2} x^{n_1/2 - 1} \\
|
|
(n_2 + n_1 x)^{-(n_1 + n_2)/2}
|
|
\gamma(n_1/2) \gamma(1 + n_2/2) \\
|
|
\frac{L^{\frac{n_1}{2}-1}_{n_2/2}
|
|
\left(-\lambda n_1 \frac{x}{2(n_1 x + n_2)}\right)}
|
|
{B(n_1/2, n_2/2)
|
|
\gamma\left(\frac{n_1 + n_2}{2}\right)}
|
|
|
|
for :math:`n_1, n_2 > 0`, :math:`\lambda \ge 0`. Here :math:`n_1` is the
|
|
degrees of freedom in the numerator, :math:`n_2` the degrees of freedom in
|
|
the denominator, :math:`\lambda` the non-centrality parameter,
|
|
:math:`\gamma` is the logarithm of the Gamma function, :math:`L_n^k` is a
|
|
generalized Laguerre polynomial and :math:`B` is the beta function.
|
|
|
|
`ncf` takes ``df1``, ``df2`` and ``nc`` as shape parameters. If ``nc=0``,
|
|
the distribution becomes equivalent to the Fisher distribution.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _argcheck(self, df1, df2, nc):
|
|
return (df1 > 0) & (df2 > 0) & (nc >= 0)
|
|
|
|
def _shape_info(self):
|
|
idf1 = _ShapeInfo("df1", False, (0, np.inf), (False, False))
|
|
idf2 = _ShapeInfo("df2", False, (0, np.inf), (False, False))
|
|
inc = _ShapeInfo("nc", False, (0, np.inf), (True, False))
|
|
return [idf1, idf2, inc]
|
|
|
|
def _rvs(self, dfn, dfd, nc, size=None, random_state=None):
|
|
return random_state.noncentral_f(dfn, dfd, nc, size)
|
|
|
|
def _pdf(self, x, dfn, dfd, nc):
|
|
# ncf.pdf(x, df1, df2, nc) = exp(nc/2 + nc*df1*x/(2*(df1*x+df2))) *
|
|
# df1**(df1/2) * df2**(df2/2) * x**(df1/2-1) *
|
|
# (df2+df1*x)**(-(df1+df2)/2) *
|
|
# gamma(df1/2)*gamma(1+df2/2) *
|
|
# L^{v1/2-1}^{v2/2}(-nc*v1*x/(2*(v1*x+v2))) /
|
|
# (B(v1/2, v2/2) * gamma((v1+v2)/2))
|
|
return scu._ncf_pdf(x, dfn, dfd, nc)
|
|
|
|
def _cdf(self, x, dfn, dfd, nc):
|
|
return scu._ncf_cdf(x, dfn, dfd, nc)
|
|
|
|
def _ppf(self, q, dfn, dfd, nc):
|
|
with np.errstate(over='ignore'): # see gh-17432
|
|
return scu._ncf_ppf(q, dfn, dfd, nc)
|
|
|
|
def _sf(self, x, dfn, dfd, nc):
|
|
return scu._ncf_sf(x, dfn, dfd, nc)
|
|
|
|
def _isf(self, x, dfn, dfd, nc):
|
|
with np.errstate(over='ignore'): # see gh-17432
|
|
return scu._ncf_isf(x, dfn, dfd, nc)
|
|
|
|
def _munp(self, n, dfn, dfd, nc):
|
|
val = (dfn * 1.0/dfd)**n
|
|
term = sc.gammaln(n+0.5*dfn) + sc.gammaln(0.5*dfd-n) - sc.gammaln(dfd*0.5)
|
|
val *= np.exp(-nc / 2.0+term)
|
|
val *= sc.hyp1f1(n+0.5*dfn, 0.5*dfn, 0.5*nc)
|
|
return val
|
|
|
|
def _stats(self, dfn, dfd, nc, moments='mv'):
|
|
mu = scu._ncf_mean(dfn, dfd, nc)
|
|
mu2 = scu._ncf_variance(dfn, dfd, nc)
|
|
g1 = scu._ncf_skewness(dfn, dfd, nc) if 's' in moments else None
|
|
g2 = scu._ncf_kurtosis_excess(
|
|
dfn, dfd, nc) if 'k' in moments else None
|
|
return mu, mu2, g1, g2
|
|
|
|
|
|
ncf = ncf_gen(a=0.0, name='ncf')
|
|
|
|
|
|
class t_gen(rv_continuous):
|
|
r"""A Student's t continuous random variable.
|
|
|
|
For the noncentral t distribution, see `nct`.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
nct
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `t` is:
|
|
|
|
.. math::
|
|
|
|
f(x, \nu) = \frac{\Gamma((\nu+1)/2)}
|
|
{\sqrt{\pi \nu} \Gamma(\nu/2)}
|
|
(1+x^2/\nu)^{-(\nu+1)/2}
|
|
|
|
where :math:`x` is a real number and the degrees of freedom parameter
|
|
:math:`\nu` (denoted ``df`` in the implementation) satisfies
|
|
:math:`\nu > 0`. :math:`\Gamma` is the gamma function
|
|
(`scipy.special.gamma`).
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("df", False, (0, np.inf), (False, False))]
|
|
|
|
def _rvs(self, df, size=None, random_state=None):
|
|
return random_state.standard_t(df, size=size)
|
|
|
|
def _pdf(self, x, df):
|
|
return _lazywhere(
|
|
df == np.inf, (x, df),
|
|
f=lambda x, df: norm._pdf(x),
|
|
f2=lambda x, df: (
|
|
np.exp(self._logpdf(x, df))
|
|
)
|
|
)
|
|
|
|
def _logpdf(self, x, df):
|
|
|
|
def t_logpdf(x, df):
|
|
return (np.log(sc.poch(0.5 * df, 0.5))
|
|
- 0.5 * (np.log(df) + np.log(np.pi))
|
|
- (df + 1)/2*np.log1p(x * x/df))
|
|
|
|
def norm_logpdf(x, df):
|
|
return norm._logpdf(x)
|
|
|
|
return _lazywhere(df == np.inf, (x, df, ), f=norm_logpdf, f2=t_logpdf)
|
|
|
|
def _cdf(self, x, df):
|
|
return sc.stdtr(df, x)
|
|
|
|
def _sf(self, x, df):
|
|
return sc.stdtr(df, -x)
|
|
|
|
def _ppf(self, q, df):
|
|
return sc.stdtrit(df, q)
|
|
|
|
def _isf(self, q, df):
|
|
return -sc.stdtrit(df, q)
|
|
|
|
def _stats(self, df):
|
|
# infinite df -> normal distribution (0.0, 1.0, 0.0, 0.0)
|
|
infinite_df = np.isposinf(df)
|
|
|
|
mu = np.where(df > 1, 0.0, np.inf)
|
|
|
|
condlist = ((df > 1) & (df <= 2),
|
|
(df > 2) & np.isfinite(df),
|
|
infinite_df)
|
|
choicelist = (lambda df: np.broadcast_to(np.inf, df.shape),
|
|
lambda df: df / (df-2.0),
|
|
lambda df: np.broadcast_to(1, df.shape))
|
|
mu2 = _lazyselect(condlist, choicelist, (df,), np.nan)
|
|
|
|
g1 = np.where(df > 3, 0.0, np.nan)
|
|
|
|
condlist = ((df > 2) & (df <= 4),
|
|
(df > 4) & np.isfinite(df),
|
|
infinite_df)
|
|
choicelist = (lambda df: np.broadcast_to(np.inf, df.shape),
|
|
lambda df: 6.0 / (df-4.0),
|
|
lambda df: np.broadcast_to(0, df.shape))
|
|
g2 = _lazyselect(condlist, choicelist, (df,), np.nan)
|
|
|
|
return mu, mu2, g1, g2
|
|
|
|
def _entropy(self, df):
|
|
if df == np.inf:
|
|
return norm._entropy()
|
|
|
|
def regular(df):
|
|
half = df/2
|
|
half1 = (df + 1)/2
|
|
return (half1*(sc.digamma(half1) - sc.digamma(half))
|
|
+ np.log(np.sqrt(df)*sc.beta(half, 0.5)))
|
|
|
|
def asymptotic(df):
|
|
# Formula from Wolfram Alpha:
|
|
# "asymptotic expansion (d+1)/2 * (digamma((d+1)/2) - digamma(d/2))
|
|
# + log(sqrt(d) * beta(d/2, 1/2))"
|
|
h = (norm._entropy() + 1/df + (df**-2.)/4 - (df**-3.)/6
|
|
- (df**-4.)/8 + 3/10*(df**-5.) + (df**-6.)/4)
|
|
return h
|
|
|
|
h = _lazywhere(df >= 100, (df, ), f=asymptotic, f2=regular)
|
|
return h
|
|
|
|
|
|
t = t_gen(name='t')
|
|
|
|
|
|
class nct_gen(rv_continuous):
|
|
r"""A non-central Student's t continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
If :math:`Y` is a standard normal random variable and :math:`V` is
|
|
an independent chi-square random variable (`chi2`) with :math:`k` degrees
|
|
of freedom, then
|
|
|
|
.. math::
|
|
|
|
X = \frac{Y + c}{\sqrt{V/k}}
|
|
|
|
has a non-central Student's t distribution on the real line.
|
|
The degrees of freedom parameter :math:`k` (denoted ``df`` in the
|
|
implementation) satisfies :math:`k > 0` and the noncentrality parameter
|
|
:math:`c` (denoted ``nc`` in the implementation) is a real number.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _argcheck(self, df, nc):
|
|
return (df > 0) & (nc == nc)
|
|
|
|
def _shape_info(self):
|
|
idf = _ShapeInfo("df", False, (0, np.inf), (False, False))
|
|
inc = _ShapeInfo("nc", False, (-np.inf, np.inf), (False, False))
|
|
return [idf, inc]
|
|
|
|
def _rvs(self, df, nc, size=None, random_state=None):
|
|
n = norm.rvs(loc=nc, size=size, random_state=random_state)
|
|
c2 = chi2.rvs(df, size=size, random_state=random_state)
|
|
return n * np.sqrt(df) / np.sqrt(c2)
|
|
|
|
def _pdf(self, x, df, nc):
|
|
# Boost version has accuracy issues in left tail; see gh-16591
|
|
n = df*1.0
|
|
nc = nc*1.0
|
|
x2 = x*x
|
|
ncx2 = nc*nc*x2
|
|
fac1 = n + x2
|
|
trm1 = (n/2.*np.log(n) + sc.gammaln(n+1)
|
|
- (n*np.log(2) + nc*nc/2 + (n/2)*np.log(fac1)
|
|
+ sc.gammaln(n/2)))
|
|
Px = np.exp(trm1)
|
|
valF = ncx2 / (2*fac1)
|
|
trm1 = (np.sqrt(2)*nc*x*sc.hyp1f1(n/2+1, 1.5, valF)
|
|
/ np.asarray(fac1*sc.gamma((n+1)/2)))
|
|
trm2 = (sc.hyp1f1((n+1)/2, 0.5, valF)
|
|
/ np.asarray(np.sqrt(fac1)*sc.gamma(n/2+1)))
|
|
Px *= trm1+trm2
|
|
return np.clip(Px, 0, None)
|
|
|
|
def _cdf(self, x, df, nc):
|
|
with np.errstate(over='ignore'): # see gh-17432
|
|
return np.clip(scu._nct_cdf(x, df, nc), 0, 1)
|
|
|
|
def _ppf(self, q, df, nc):
|
|
with np.errstate(over='ignore'): # see gh-17432
|
|
return scu._nct_ppf(q, df, nc)
|
|
|
|
def _sf(self, x, df, nc):
|
|
with np.errstate(over='ignore'): # see gh-17432
|
|
return np.clip(scu._nct_sf(x, df, nc), 0, 1)
|
|
|
|
def _isf(self, x, df, nc):
|
|
with np.errstate(over='ignore'): # see gh-17432
|
|
return scu._nct_isf(x, df, nc)
|
|
|
|
def _stats(self, df, nc, moments='mv'):
|
|
mu = scu._nct_mean(df, nc)
|
|
mu2 = scu._nct_variance(df, nc)
|
|
g1 = scu._nct_skewness(df, nc) if 's' in moments else None
|
|
g2 = scu._nct_kurtosis_excess(df, nc) if 'k' in moments else None
|
|
return mu, mu2, g1, g2
|
|
|
|
|
|
nct = nct_gen(name="nct")
|
|
|
|
|
|
class pareto_gen(rv_continuous):
|
|
r"""A Pareto continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `pareto` is:
|
|
|
|
.. math::
|
|
|
|
f(x, b) = \frac{b}{x^{b+1}}
|
|
|
|
for :math:`x \ge 1`, :math:`b > 0`.
|
|
|
|
`pareto` takes ``b`` as a shape parameter for :math:`b`.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("b", False, (0, np.inf), (False, False))]
|
|
|
|
def _pdf(self, x, b):
|
|
# pareto.pdf(x, b) = b / x**(b+1)
|
|
return b * x**(-b-1)
|
|
|
|
def _cdf(self, x, b):
|
|
return 1 - x**(-b)
|
|
|
|
def _ppf(self, q, b):
|
|
return pow(1-q, -1.0/b)
|
|
|
|
def _sf(self, x, b):
|
|
return x**(-b)
|
|
|
|
def _isf(self, q, b):
|
|
return np.power(q, -1.0 / b)
|
|
|
|
def _stats(self, b, moments='mv'):
|
|
mu, mu2, g1, g2 = None, None, None, None
|
|
if 'm' in moments:
|
|
mask = b > 1
|
|
bt = np.extract(mask, b)
|
|
mu = np.full(np.shape(b), fill_value=np.inf)
|
|
np.place(mu, mask, bt / (bt-1.0))
|
|
if 'v' in moments:
|
|
mask = b > 2
|
|
bt = np.extract(mask, b)
|
|
mu2 = np.full(np.shape(b), fill_value=np.inf)
|
|
np.place(mu2, mask, bt / (bt-2.0) / (bt-1.0)**2)
|
|
if 's' in moments:
|
|
mask = b > 3
|
|
bt = np.extract(mask, b)
|
|
g1 = np.full(np.shape(b), fill_value=np.nan)
|
|
vals = 2 * (bt + 1.0) * np.sqrt(bt - 2.0) / ((bt - 3.0) * np.sqrt(bt))
|
|
np.place(g1, mask, vals)
|
|
if 'k' in moments:
|
|
mask = b > 4
|
|
bt = np.extract(mask, b)
|
|
g2 = np.full(np.shape(b), fill_value=np.nan)
|
|
vals = (6.0*np.polyval([1.0, 1.0, -6, -2], bt) /
|
|
np.polyval([1.0, -7.0, 12.0, 0.0], bt))
|
|
np.place(g2, mask, vals)
|
|
return mu, mu2, g1, g2
|
|
|
|
def _entropy(self, c):
|
|
return 1 + 1.0/c - np.log(c)
|
|
|
|
@_call_super_mom
|
|
@inherit_docstring_from(rv_continuous)
|
|
def fit(self, data, *args, **kwds):
|
|
parameters = _check_fit_input_parameters(self, data, args, kwds)
|
|
data, fshape, floc, fscale = parameters
|
|
|
|
# ensure that any fixed parameters don't violate constraints of the
|
|
# distribution before continuing.
|
|
if floc is not None and np.min(data) - floc < (fscale or 0):
|
|
raise FitDataError("pareto", lower=1, upper=np.inf)
|
|
|
|
ndata = data.shape[0]
|
|
|
|
def get_shape(scale, location):
|
|
# The first-order necessary condition on `shape` can be solved in
|
|
# closed form
|
|
return ndata / np.sum(np.log((data - location) / scale))
|
|
|
|
if floc is fscale is None:
|
|
# The support of the distribution is `(x - loc)/scale > 0`.
|
|
# The method of Lagrange multipliers turns this constraint
|
|
# into an equation that can be solved numerically.
|
|
# See gh-12545 for details.
|
|
|
|
def dL_dScale(shape, scale):
|
|
# The partial derivative of the log-likelihood function w.r.t.
|
|
# the scale.
|
|
return ndata * shape / scale
|
|
|
|
def dL_dLocation(shape, location):
|
|
# The partial derivative of the log-likelihood function w.r.t.
|
|
# the location.
|
|
return (shape + 1) * np.sum(1 / (data - location))
|
|
|
|
def fun_to_solve(scale):
|
|
# optimize the scale by setting the partial derivatives
|
|
# w.r.t. to location and scale equal and solving.
|
|
location = np.min(data) - scale
|
|
shape = fshape or get_shape(scale, location)
|
|
return dL_dLocation(shape, location) - dL_dScale(shape, scale)
|
|
|
|
def interval_contains_root(lbrack, rbrack):
|
|
# return true if the signs disagree.
|
|
return (np.sign(fun_to_solve(lbrack)) !=
|
|
np.sign(fun_to_solve(rbrack)))
|
|
|
|
# set brackets for `root_scalar` to use when optimizing over the
|
|
# scale such that a root is likely between them. Use user supplied
|
|
# guess or default 1.
|
|
brack_start = float(kwds.get('scale', 1))
|
|
lbrack, rbrack = brack_start / 2, brack_start * 2
|
|
# if a root is not between the brackets, iteratively expand them
|
|
# until they include a sign change, checking after each bracket is
|
|
# modified.
|
|
while (not interval_contains_root(lbrack, rbrack)
|
|
and (lbrack > 0 or rbrack < np.inf)):
|
|
lbrack /= 2
|
|
rbrack *= 2
|
|
res = root_scalar(fun_to_solve, bracket=[lbrack, rbrack])
|
|
if res.converged:
|
|
scale = res.root
|
|
loc = np.min(data) - scale
|
|
shape = fshape or get_shape(scale, loc)
|
|
|
|
# The Pareto distribution requires that its parameters satisfy
|
|
# the condition `fscale + floc <= min(data)`. However, to
|
|
# avoid numerical issues, we require that `fscale + floc`
|
|
# is strictly less than `min(data)`. If this condition
|
|
# is not satisfied, reduce the scale with `np.nextafter` to
|
|
# ensure that data does not fall outside of the support.
|
|
if not (scale + loc) < np.min(data):
|
|
scale = np.min(data) - loc
|
|
scale = np.nextafter(scale, 0)
|
|
return shape, loc, scale
|
|
else:
|
|
return super().fit(data, **kwds)
|
|
elif floc is None:
|
|
loc = np.min(data) - fscale
|
|
else:
|
|
loc = floc
|
|
# Source: Evans, Hastings, and Peacock (2000), Statistical
|
|
# Distributions, 3rd. Ed., John Wiley and Sons. Page 149.
|
|
scale = fscale or np.min(data) - loc
|
|
shape = fshape or get_shape(scale, loc)
|
|
return shape, loc, scale
|
|
|
|
|
|
pareto = pareto_gen(a=1.0, name="pareto")
|
|
|
|
|
|
class lomax_gen(rv_continuous):
|
|
r"""A Lomax (Pareto of the second kind) continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `lomax` is:
|
|
|
|
.. math::
|
|
|
|
f(x, c) = \frac{c}{(1+x)^{c+1}}
|
|
|
|
for :math:`x \ge 0`, :math:`c > 0`.
|
|
|
|
`lomax` takes ``c`` as a shape parameter for :math:`c`.
|
|
|
|
`lomax` is a special case of `pareto` with ``loc=-1.0``.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
|
|
|
|
def _pdf(self, x, c):
|
|
# lomax.pdf(x, c) = c / (1+x)**(c+1)
|
|
return c*1.0/(1.0+x)**(c+1.0)
|
|
|
|
def _logpdf(self, x, c):
|
|
return np.log(c) - (c+1)*sc.log1p(x)
|
|
|
|
def _cdf(self, x, c):
|
|
return -sc.expm1(-c*sc.log1p(x))
|
|
|
|
def _sf(self, x, c):
|
|
return np.exp(-c*sc.log1p(x))
|
|
|
|
def _logsf(self, x, c):
|
|
return -c*sc.log1p(x)
|
|
|
|
def _ppf(self, q, c):
|
|
return sc.expm1(-sc.log1p(-q)/c)
|
|
|
|
def _isf(self, q, c):
|
|
return q**(-1.0 / c) - 1
|
|
|
|
def _stats(self, c):
|
|
mu, mu2, g1, g2 = pareto.stats(c, loc=-1.0, moments='mvsk')
|
|
return mu, mu2, g1, g2
|
|
|
|
def _entropy(self, c):
|
|
return 1+1.0/c-np.log(c)
|
|
|
|
|
|
lomax = lomax_gen(a=0.0, name="lomax")
|
|
|
|
|
|
class pearson3_gen(rv_continuous):
|
|
r"""A pearson type III continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `pearson3` is:
|
|
|
|
.. math::
|
|
|
|
f(x, \kappa) = \frac{|\beta|}{\Gamma(\alpha)}
|
|
(\beta (x - \zeta))^{\alpha - 1}
|
|
\exp(-\beta (x - \zeta))
|
|
|
|
where:
|
|
|
|
.. math::
|
|
|
|
\beta = \frac{2}{\kappa}
|
|
|
|
\alpha = \beta^2 = \frac{4}{\kappa^2}
|
|
|
|
\zeta = -\frac{\alpha}{\beta} = -\beta
|
|
|
|
:math:`\Gamma` is the gamma function (`scipy.special.gamma`).
|
|
Pass the skew :math:`\kappa` into `pearson3` as the shape parameter
|
|
``skew``.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
References
|
|
----------
|
|
R.W. Vogel and D.E. McMartin, "Probability Plot Goodness-of-Fit and
|
|
Skewness Estimation Procedures for the Pearson Type 3 Distribution", Water
|
|
Resources Research, Vol.27, 3149-3158 (1991).
|
|
|
|
L.R. Salvosa, "Tables of Pearson's Type III Function", Ann. Math. Statist.,
|
|
Vol.1, 191-198 (1930).
|
|
|
|
"Using Modern Computing Tools to Fit the Pearson Type III Distribution to
|
|
Aviation Loads Data", Office of Aviation Research (2003).
|
|
|
|
"""
|
|
def _preprocess(self, x, skew):
|
|
# The real 'loc' and 'scale' are handled in the calling pdf(...). The
|
|
# local variables 'loc' and 'scale' within pearson3._pdf are set to
|
|
# the defaults just to keep them as part of the equations for
|
|
# documentation.
|
|
loc = 0.0
|
|
scale = 1.0
|
|
|
|
# If skew is small, return _norm_pdf. The divide between pearson3
|
|
# and norm was found by brute force and is approximately a skew of
|
|
# 0.000016. No one, I hope, would actually use a skew value even
|
|
# close to this small.
|
|
norm2pearson_transition = 0.000016
|
|
|
|
ans, x, skew = np.broadcast_arrays(1.0, x, skew)
|
|
ans = ans.copy()
|
|
|
|
# mask is True where skew is small enough to use the normal approx.
|
|
mask = np.absolute(skew) < norm2pearson_transition
|
|
invmask = ~mask
|
|
|
|
beta = 2.0 / (skew[invmask] * scale)
|
|
alpha = (scale * beta)**2
|
|
zeta = loc - alpha / beta
|
|
|
|
transx = beta * (x[invmask] - zeta)
|
|
return ans, x, transx, mask, invmask, beta, alpha, zeta
|
|
|
|
def _argcheck(self, skew):
|
|
# The _argcheck function in rv_continuous only allows positive
|
|
# arguments. The skew argument for pearson3 can be zero (which I want
|
|
# to handle inside pearson3._pdf) or negative. So just return True
|
|
# for all skew args.
|
|
return np.isfinite(skew)
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("skew", False, (-np.inf, np.inf), (False, False))]
|
|
|
|
def _stats(self, skew):
|
|
m = 0.0
|
|
v = 1.0
|
|
s = skew
|
|
k = 1.5*skew**2
|
|
return m, v, s, k
|
|
|
|
def _pdf(self, x, skew):
|
|
# pearson3.pdf(x, skew) = abs(beta) / gamma(alpha) *
|
|
# (beta * (x - zeta))**(alpha - 1) * exp(-beta*(x - zeta))
|
|
# Do the calculation in _logpdf since helps to limit
|
|
# overflow/underflow problems
|
|
ans = np.exp(self._logpdf(x, skew))
|
|
if ans.ndim == 0:
|
|
if np.isnan(ans):
|
|
return 0.0
|
|
return ans
|
|
ans[np.isnan(ans)] = 0.0
|
|
return ans
|
|
|
|
def _logpdf(self, x, skew):
|
|
# PEARSON3 logpdf GAMMA logpdf
|
|
# np.log(abs(beta))
|
|
# + (alpha - 1)*np.log(beta*(x - zeta)) + (a - 1)*np.log(x)
|
|
# - beta*(x - zeta) - x
|
|
# - sc.gammalnalpha) - sc.gammalna)
|
|
ans, x, transx, mask, invmask, beta, alpha, _ = (
|
|
self._preprocess(x, skew))
|
|
|
|
ans[mask] = np.log(_norm_pdf(x[mask]))
|
|
# use logpdf instead of _logpdf to fix issue mentioned in gh-12640
|
|
# (_logpdf does not return correct result for alpha = 1)
|
|
ans[invmask] = np.log(abs(beta)) + gamma.logpdf(transx, alpha)
|
|
return ans
|
|
|
|
def _cdf(self, x, skew):
|
|
ans, x, transx, mask, invmask, _, alpha, _ = (
|
|
self._preprocess(x, skew))
|
|
|
|
ans[mask] = _norm_cdf(x[mask])
|
|
|
|
skew = np.broadcast_to(skew, invmask.shape)
|
|
invmask1a = np.logical_and(invmask, skew > 0)
|
|
invmask1b = skew[invmask] > 0
|
|
# use cdf instead of _cdf to fix issue mentioned in gh-12640
|
|
# (_cdf produces NaNs for inputs outside support)
|
|
ans[invmask1a] = gamma.cdf(transx[invmask1b], alpha[invmask1b])
|
|
|
|
# The gamma._cdf approach wasn't working with negative skew.
|
|
# Note that multiplying the skew by -1 reflects about x=0.
|
|
# So instead of evaluating the CDF with negative skew at x,
|
|
# evaluate the SF with positive skew at -x.
|
|
invmask2a = np.logical_and(invmask, skew < 0)
|
|
invmask2b = skew[invmask] < 0
|
|
# gamma._sf produces NaNs when transx < 0, so use gamma.sf
|
|
ans[invmask2a] = gamma.sf(transx[invmask2b], alpha[invmask2b])
|
|
|
|
return ans
|
|
|
|
def _sf(self, x, skew):
|
|
ans, x, transx, mask, invmask, _, alpha, _ = (
|
|
self._preprocess(x, skew))
|
|
|
|
ans[mask] = _norm_sf(x[mask])
|
|
|
|
skew = np.broadcast_to(skew, invmask.shape)
|
|
invmask1a = np.logical_and(invmask, skew > 0)
|
|
invmask1b = skew[invmask] > 0
|
|
ans[invmask1a] = gamma.sf(transx[invmask1b], alpha[invmask1b])
|
|
|
|
invmask2a = np.logical_and(invmask, skew < 0)
|
|
invmask2b = skew[invmask] < 0
|
|
ans[invmask2a] = gamma.cdf(transx[invmask2b], alpha[invmask2b])
|
|
|
|
return ans
|
|
|
|
def _rvs(self, skew, size=None, random_state=None):
|
|
skew = np.broadcast_to(skew, size)
|
|
ans, _, _, mask, invmask, beta, alpha, zeta = (
|
|
self._preprocess([0], skew))
|
|
|
|
nsmall = mask.sum()
|
|
nbig = mask.size - nsmall
|
|
ans[mask] = random_state.standard_normal(nsmall)
|
|
ans[invmask] = random_state.standard_gamma(alpha, nbig)/beta + zeta
|
|
|
|
if size == ():
|
|
ans = ans[0]
|
|
return ans
|
|
|
|
def _ppf(self, q, skew):
|
|
ans, q, _, mask, invmask, beta, alpha, zeta = (
|
|
self._preprocess(q, skew))
|
|
ans[mask] = _norm_ppf(q[mask])
|
|
q = q[invmask]
|
|
q[beta < 0] = 1 - q[beta < 0] # for negative skew; see gh-17050
|
|
ans[invmask] = sc.gammaincinv(alpha, q)/beta + zeta
|
|
return ans
|
|
|
|
@_call_super_mom
|
|
@extend_notes_in_docstring(rv_continuous, notes="""\
|
|
Note that method of moments (`method='MM'`) is not
|
|
available for this distribution.\n\n""")
|
|
def fit(self, data, *args, **kwds):
|
|
if kwds.get("method", None) == 'MM':
|
|
raise NotImplementedError("Fit `method='MM'` is not available for "
|
|
"the Pearson3 distribution. Please try "
|
|
"the default `method='MLE'`.")
|
|
else:
|
|
return super(type(self), self).fit(data, *args, **kwds)
|
|
|
|
|
|
pearson3 = pearson3_gen(name="pearson3")
|
|
|
|
|
|
class powerlaw_gen(rv_continuous):
|
|
r"""A power-function continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
pareto
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `powerlaw` is:
|
|
|
|
.. math::
|
|
|
|
f(x, a) = a x^{a-1}
|
|
|
|
for :math:`0 \le x \le 1`, :math:`a > 0`.
|
|
|
|
`powerlaw` takes ``a`` as a shape parameter for :math:`a`.
|
|
|
|
%(after_notes)s
|
|
|
|
For example, the support of `powerlaw` can be adjusted from the default
|
|
interval ``[0, 1]`` to the interval ``[c, c+d]`` by setting ``loc=c`` and
|
|
``scale=d``. For a power-law distribution with infinite support, see
|
|
`pareto`.
|
|
|
|
`powerlaw` is a special case of `beta` with ``b=1``.
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
|
|
|
|
def _pdf(self, x, a):
|
|
# powerlaw.pdf(x, a) = a * x**(a-1)
|
|
return a*x**(a-1.0)
|
|
|
|
def _logpdf(self, x, a):
|
|
return np.log(a) + sc.xlogy(a - 1, x)
|
|
|
|
def _cdf(self, x, a):
|
|
return x**(a*1.0)
|
|
|
|
def _logcdf(self, x, a):
|
|
return a*np.log(x)
|
|
|
|
def _ppf(self, q, a):
|
|
return pow(q, 1.0/a)
|
|
|
|
def _sf(self, p, a):
|
|
return -sc.powm1(p, a)
|
|
|
|
def _munp(self, n, a):
|
|
# The following expression is correct for all real n (provided a > 0).
|
|
return a / (a + n)
|
|
|
|
def _stats(self, a):
|
|
return (a / (a + 1.0),
|
|
a / (a + 2.0) / (a + 1.0) ** 2,
|
|
-2.0 * ((a - 1.0) / (a + 3.0)) * np.sqrt((a + 2.0) / a),
|
|
6 * np.polyval([1, -1, -6, 2], a) / (a * (a + 3.0) * (a + 4)))
|
|
|
|
def _entropy(self, a):
|
|
return 1 - 1.0/a - np.log(a)
|
|
|
|
def _support_mask(self, x, a):
|
|
return (super()._support_mask(x, a)
|
|
& ((x != 0) | (a >= 1)))
|
|
|
|
@_call_super_mom
|
|
@extend_notes_in_docstring(rv_continuous, notes="""\
|
|
Notes specifically for ``powerlaw.fit``: If the location is a free
|
|
parameter and the value returned for the shape parameter is less than
|
|
one, the true maximum likelihood approaches infinity. This causes
|
|
numerical difficulties, and the resulting estimates are approximate.
|
|
\n\n""")
|
|
def fit(self, data, *args, **kwds):
|
|
# Summary of the strategy:
|
|
#
|
|
# 1) If the scale and location are fixed, return the shape according
|
|
# to a formula.
|
|
#
|
|
# 2) If the scale is fixed, there are two possibilities for the other
|
|
# parameters - one corresponding with shape less than one, and
|
|
# another with shape greater than one. Calculate both, and return
|
|
# whichever has the better log-likelihood.
|
|
#
|
|
# At this point, the scale is known to be free.
|
|
#
|
|
# 3) If the location is fixed, return the scale and shape according to
|
|
# formulas (or, if the shape is fixed, the fixed shape).
|
|
#
|
|
# At this point, the location and scale are both free. There are
|
|
# separate equations depending on whether the shape is less than one or
|
|
# greater than one.
|
|
#
|
|
# 4a) If the shape is less than one, there are formulas for shape,
|
|
# location, and scale.
|
|
# 4b) If the shape is greater than one, there are formulas for shape
|
|
# and scale, but there is a condition for location to be solved
|
|
# numerically.
|
|
#
|
|
# If the shape is fixed and less than one, we use 4a.
|
|
# If the shape is fixed and greater than one, we use 4b.
|
|
# If the shape is also free, we calculate fits using both 4a and 4b
|
|
# and choose the one that results a better log-likelihood.
|
|
#
|
|
# In many cases, the use of `np.nextafter` is used to avoid numerical
|
|
# issues.
|
|
if kwds.pop('superfit', False):
|
|
return super().fit(data, *args, **kwds)
|
|
|
|
if len(np.unique(data)) == 1:
|
|
return super().fit(data, *args, **kwds)
|
|
|
|
data, fshape, floc, fscale = _check_fit_input_parameters(self, data,
|
|
args, kwds)
|
|
penalized_nllf_args = [data, (self._fitstart(data),)]
|
|
penalized_nllf = self._reduce_func(penalized_nllf_args, {})[1]
|
|
|
|
# ensure that any fixed parameters don't violate constraints of the
|
|
# distribution before continuing. The support of the distribution
|
|
# is `0 < (x - loc)/scale < 1`.
|
|
if floc is not None:
|
|
if not data.min() > floc:
|
|
raise FitDataError('powerlaw', 0, 1)
|
|
if fscale is not None and not data.max() <= floc + fscale:
|
|
raise FitDataError('powerlaw', 0, 1)
|
|
|
|
if fscale is not None:
|
|
if fscale <= 0:
|
|
raise ValueError("Negative or zero `fscale` is outside the "
|
|
"range allowed by the distribution.")
|
|
if fscale <= np.ptp(data):
|
|
msg = "`fscale` must be greater than the range of data."
|
|
raise ValueError(msg)
|
|
|
|
def get_shape(data, loc, scale):
|
|
# The first-order necessary condition on `shape` can be solved in
|
|
# closed form. It can be used no matter the assumption of the
|
|
# value of the shape.
|
|
N = len(data)
|
|
return - N / (np.sum(np.log(data - loc)) - N*np.log(scale))
|
|
|
|
def get_scale(data, loc):
|
|
# analytical solution for `scale` based on the location.
|
|
# It can be used no matter the assumption of the value of the
|
|
# shape.
|
|
return data.max() - loc
|
|
|
|
# 1) The location and scale are both fixed. Analytically determine the
|
|
# shape.
|
|
if fscale is not None and floc is not None:
|
|
return get_shape(data, floc, fscale), floc, fscale
|
|
|
|
# 2) The scale is fixed. There are two possibilities for the other
|
|
# parameters. Choose the option with better log-likelihood.
|
|
if fscale is not None:
|
|
# using `data.min()` as the optimal location
|
|
loc_lt1 = np.nextafter(data.min(), -np.inf)
|
|
shape_lt1 = fshape or get_shape(data, loc_lt1, fscale)
|
|
ll_lt1 = penalized_nllf((shape_lt1, loc_lt1, fscale), data)
|
|
|
|
# using `data.max() - scale` as the optimal location
|
|
loc_gt1 = np.nextafter(data.max() - fscale, np.inf)
|
|
shape_gt1 = fshape or get_shape(data, loc_gt1, fscale)
|
|
ll_gt1 = penalized_nllf((shape_gt1, loc_gt1, fscale), data)
|
|
|
|
if ll_lt1 < ll_gt1:
|
|
return shape_lt1, loc_lt1, fscale
|
|
else:
|
|
return shape_gt1, loc_gt1, fscale
|
|
|
|
# 3) The location is fixed. Return the analytical scale and the
|
|
# analytical (or fixed) shape.
|
|
if floc is not None:
|
|
scale = get_scale(data, floc)
|
|
shape = fshape or get_shape(data, floc, scale)
|
|
return shape, floc, scale
|
|
|
|
# 4) Location and scale are both free
|
|
# 4a) Use formulas that assume `shape <= 1`.
|
|
|
|
def fit_loc_scale_w_shape_lt_1():
|
|
loc = np.nextafter(data.min(), -np.inf)
|
|
if np.abs(loc) < np.finfo(loc.dtype).tiny:
|
|
loc = np.sign(loc) * np.finfo(loc.dtype).tiny
|
|
scale = np.nextafter(get_scale(data, loc), np.inf)
|
|
shape = fshape or get_shape(data, loc, scale)
|
|
return shape, loc, scale
|
|
|
|
# 4b) Fit under the assumption that `shape > 1`. The support
|
|
# of the distribution is `(x - loc)/scale <= 1`. The method of Lagrange
|
|
# multipliers turns this constraint into the condition that
|
|
# dL_dScale - dL_dLocation must be zero, which is solved numerically.
|
|
# (Alternatively, substitute the constraint into the objective
|
|
# function before deriving the likelihood equation for location.)
|
|
|
|
def dL_dScale(data, shape, scale):
|
|
# The partial derivative of the log-likelihood function w.r.t.
|
|
# the scale.
|
|
return -data.shape[0] * shape / scale
|
|
|
|
def dL_dLocation(data, shape, loc):
|
|
# The partial derivative of the log-likelihood function w.r.t.
|
|
# the location.
|
|
return (shape - 1) * np.sum(1 / (loc - data)) # -1/(data-loc)
|
|
|
|
def dL_dLocation_star(loc):
|
|
# The derivative of the log-likelihood function w.r.t.
|
|
# the location, given optimal shape and scale
|
|
scale = np.nextafter(get_scale(data, loc), -np.inf)
|
|
shape = fshape or get_shape(data, loc, scale)
|
|
return dL_dLocation(data, shape, loc)
|
|
|
|
def fun_to_solve(loc):
|
|
# optimize the location by setting the partial derivatives
|
|
# w.r.t. to location and scale equal and solving.
|
|
scale = np.nextafter(get_scale(data, loc), -np.inf)
|
|
shape = fshape or get_shape(data, loc, scale)
|
|
return (dL_dScale(data, shape, scale)
|
|
- dL_dLocation(data, shape, loc))
|
|
|
|
def fit_loc_scale_w_shape_gt_1():
|
|
# set brackets for `root_scalar` to use when optimizing over the
|
|
# location such that a root is likely between them.
|
|
rbrack = np.nextafter(data.min(), -np.inf)
|
|
|
|
# if the sign of `dL_dLocation_star` is positive at rbrack,
|
|
# we're not going to find the root we're looking for
|
|
delta = (data.min() - rbrack)
|
|
while dL_dLocation_star(rbrack) > 0:
|
|
rbrack = data.min() - delta
|
|
delta *= 2
|
|
|
|
def interval_contains_root(lbrack, rbrack):
|
|
# Check if the interval (lbrack, rbrack) contains the root.
|
|
return (np.sign(fun_to_solve(lbrack))
|
|
!= np.sign(fun_to_solve(rbrack)))
|
|
|
|
lbrack = rbrack - 1
|
|
|
|
# if the sign doesn't change between the brackets, move the left
|
|
# bracket until it does. (The right bracket remains fixed at the
|
|
# maximum permissible value.)
|
|
i = 1.0
|
|
while (not interval_contains_root(lbrack, rbrack)
|
|
and lbrack != -np.inf):
|
|
lbrack = (data.min() - i)
|
|
i *= 2
|
|
|
|
root = optimize.root_scalar(fun_to_solve, bracket=(lbrack, rbrack))
|
|
|
|
loc = np.nextafter(root.root, -np.inf)
|
|
scale = np.nextafter(get_scale(data, loc), np.inf)
|
|
shape = fshape or get_shape(data, loc, scale)
|
|
return shape, loc, scale
|
|
|
|
# Shape is fixed - choose 4a or 4b accordingly.
|
|
if fshape is not None and fshape <= 1:
|
|
return fit_loc_scale_w_shape_lt_1()
|
|
elif fshape is not None and fshape > 1:
|
|
return fit_loc_scale_w_shape_gt_1()
|
|
|
|
# Shape is free
|
|
fit_shape_lt1 = fit_loc_scale_w_shape_lt_1()
|
|
ll_lt1 = self.nnlf(fit_shape_lt1, data)
|
|
|
|
fit_shape_gt1 = fit_loc_scale_w_shape_gt_1()
|
|
ll_gt1 = self.nnlf(fit_shape_gt1, data)
|
|
|
|
if ll_lt1 <= ll_gt1 and fit_shape_lt1[0] <= 1:
|
|
return fit_shape_lt1
|
|
elif ll_lt1 > ll_gt1 and fit_shape_gt1[0] > 1:
|
|
return fit_shape_gt1
|
|
else:
|
|
return super().fit(data, *args, **kwds)
|
|
|
|
|
|
powerlaw = powerlaw_gen(a=0.0, b=1.0, name="powerlaw")
|
|
|
|
|
|
class powerlognorm_gen(rv_continuous):
|
|
r"""A power log-normal continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `powerlognorm` is:
|
|
|
|
.. math::
|
|
|
|
f(x, c, s) = \frac{c}{x s} \phi(\log(x)/s)
|
|
(\Phi(-\log(x)/s))^{c-1}
|
|
|
|
where :math:`\phi` is the normal pdf, and :math:`\Phi` is the normal cdf,
|
|
and :math:`x > 0`, :math:`s, c > 0`.
|
|
|
|
`powerlognorm` takes :math:`c` and :math:`s` as shape parameters.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
_support_mask = rv_continuous._open_support_mask
|
|
|
|
def _shape_info(self):
|
|
ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
|
|
i_s = _ShapeInfo("s", False, (0, np.inf), (False, False))
|
|
return [ic, i_s]
|
|
|
|
def _pdf(self, x, c, s):
|
|
return np.exp(self._logpdf(x, c, s))
|
|
|
|
def _logpdf(self, x, c, s):
|
|
return (np.log(c) - np.log(x) - np.log(s) +
|
|
_norm_logpdf(np.log(x) / s) +
|
|
_norm_logcdf(-np.log(x) / s) * (c - 1.))
|
|
|
|
def _cdf(self, x, c, s):
|
|
return -sc.expm1(self._logsf(x, c, s))
|
|
|
|
def _ppf(self, q, c, s):
|
|
return self._isf(1 - q, c, s)
|
|
|
|
def _sf(self, x, c, s):
|
|
return np.exp(self._logsf(x, c, s))
|
|
|
|
def _logsf(self, x, c, s):
|
|
return _norm_logcdf(-np.log(x) / s) * c
|
|
|
|
def _isf(self, q, c, s):
|
|
return np.exp(-_norm_ppf(q**(1/c)) * s)
|
|
|
|
|
|
powerlognorm = powerlognorm_gen(a=0.0, name="powerlognorm")
|
|
|
|
|
|
class powernorm_gen(rv_continuous):
|
|
r"""A power normal continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `powernorm` is:
|
|
|
|
.. math::
|
|
|
|
f(x, c) = c \phi(x) (\Phi(-x))^{c-1}
|
|
|
|
where :math:`\phi` is the normal pdf, :math:`\Phi` is the normal cdf,
|
|
:math:`x` is any real, and :math:`c > 0` [1]_.
|
|
|
|
`powernorm` takes ``c`` as a shape parameter for :math:`c`.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
.. [1] NIST Engineering Statistics Handbook, Section 1.3.6.6.13,
|
|
https://www.itl.nist.gov/div898/handbook//eda/section3/eda366d.htm
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
|
|
|
|
def _pdf(self, x, c):
|
|
# powernorm.pdf(x, c) = c * phi(x) * (Phi(-x))**(c-1)
|
|
return c*_norm_pdf(x) * (_norm_cdf(-x)**(c-1.0))
|
|
|
|
def _logpdf(self, x, c):
|
|
return np.log(c) + _norm_logpdf(x) + (c-1)*_norm_logcdf(-x)
|
|
|
|
def _cdf(self, x, c):
|
|
return -sc.expm1(self._logsf(x, c))
|
|
|
|
def _ppf(self, q, c):
|
|
return -_norm_ppf(pow(1.0 - q, 1.0 / c))
|
|
|
|
def _sf(self, x, c):
|
|
return np.exp(self._logsf(x, c))
|
|
|
|
def _logsf(self, x, c):
|
|
return c * _norm_logcdf(-x)
|
|
|
|
def _isf(self, q, c):
|
|
return -_norm_ppf(np.exp(np.log(q) / c))
|
|
|
|
|
|
powernorm = powernorm_gen(name='powernorm')
|
|
|
|
|
|
class rdist_gen(rv_continuous):
|
|
r"""An R-distributed (symmetric beta) continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `rdist` is:
|
|
|
|
.. math::
|
|
|
|
f(x, c) = \frac{(1-x^2)^{c/2-1}}{B(1/2, c/2)}
|
|
|
|
for :math:`-1 \le x \le 1`, :math:`c > 0`. `rdist` is also called the
|
|
symmetric beta distribution: if B has a `beta` distribution with
|
|
parameters (c/2, c/2), then X = 2*B - 1 follows a R-distribution with
|
|
parameter c.
|
|
|
|
`rdist` takes ``c`` as a shape parameter for :math:`c`.
|
|
|
|
This distribution includes the following distribution kernels as
|
|
special cases::
|
|
|
|
c = 2: uniform
|
|
c = 3: `semicircular`
|
|
c = 4: Epanechnikov (parabolic)
|
|
c = 6: quartic (biweight)
|
|
c = 8: triweight
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
|
|
|
|
# use relation to the beta distribution for pdf, cdf, etc
|
|
def _pdf(self, x, c):
|
|
return np.exp(self._logpdf(x, c))
|
|
|
|
def _logpdf(self, x, c):
|
|
return -np.log(2) + beta._logpdf((x + 1)/2, c/2, c/2)
|
|
|
|
def _cdf(self, x, c):
|
|
return beta._cdf((x + 1)/2, c/2, c/2)
|
|
|
|
def _sf(self, x, c):
|
|
return beta._sf((x + 1)/2, c/2, c/2)
|
|
|
|
def _ppf(self, q, c):
|
|
return 2*beta._ppf(q, c/2, c/2) - 1
|
|
|
|
def _rvs(self, c, size=None, random_state=None):
|
|
return 2 * random_state.beta(c/2, c/2, size) - 1
|
|
|
|
def _munp(self, n, c):
|
|
numerator = (1 - (n % 2)) * sc.beta((n + 1.0) / 2, c / 2.0)
|
|
return numerator / sc.beta(1. / 2, c / 2.)
|
|
|
|
|
|
rdist = rdist_gen(a=-1.0, b=1.0, name="rdist")
|
|
|
|
|
|
class rayleigh_gen(rv_continuous):
|
|
r"""A Rayleigh continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `rayleigh` is:
|
|
|
|
.. math::
|
|
|
|
f(x) = x \exp(-x^2/2)
|
|
|
|
for :math:`x \ge 0`.
|
|
|
|
`rayleigh` is a special case of `chi` with ``df=2``.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
_support_mask = rv_continuous._open_support_mask
|
|
|
|
def _shape_info(self):
|
|
return []
|
|
|
|
def _rvs(self, size=None, random_state=None):
|
|
return chi.rvs(2, size=size, random_state=random_state)
|
|
|
|
def _pdf(self, r):
|
|
# rayleigh.pdf(r) = r * exp(-r**2/2)
|
|
return np.exp(self._logpdf(r))
|
|
|
|
def _logpdf(self, r):
|
|
return np.log(r) - 0.5 * r * r
|
|
|
|
def _cdf(self, r):
|
|
return -sc.expm1(-0.5 * r**2)
|
|
|
|
def _ppf(self, q):
|
|
return np.sqrt(-2 * sc.log1p(-q))
|
|
|
|
def _sf(self, r):
|
|
return np.exp(self._logsf(r))
|
|
|
|
def _logsf(self, r):
|
|
return -0.5 * r * r
|
|
|
|
def _isf(self, q):
|
|
return np.sqrt(-2 * np.log(q))
|
|
|
|
def _stats(self):
|
|
val = 4 - np.pi
|
|
return (np.sqrt(np.pi/2),
|
|
val/2,
|
|
2*(np.pi-3)*np.sqrt(np.pi)/val**1.5,
|
|
6*np.pi/val-16/val**2)
|
|
|
|
def _entropy(self):
|
|
return _EULER/2.0 + 1 - 0.5*np.log(2)
|
|
|
|
@_call_super_mom
|
|
@extend_notes_in_docstring(rv_continuous, notes="""\
|
|
Notes specifically for ``rayleigh.fit``: If the location is fixed with
|
|
the `floc` parameter, this method uses an analytical formula to find
|
|
the scale. Otherwise, this function uses a numerical root finder on
|
|
the first order conditions of the log-likelihood function to find the
|
|
MLE. Only the (optional) `loc` parameter is used as the initial guess
|
|
for the root finder; the `scale` parameter and any other parameters
|
|
for the optimizer are ignored.\n\n""")
|
|
def fit(self, data, *args, **kwds):
|
|
if kwds.pop('superfit', False):
|
|
return super().fit(data, *args, **kwds)
|
|
data, floc, fscale = _check_fit_input_parameters(self, data,
|
|
args, kwds)
|
|
|
|
def scale_mle(loc):
|
|
# Source: Statistical Distributions, 3rd Edition. Evans, Hastings,
|
|
# and Peacock (2000), Page 175
|
|
return (np.sum((data - loc) ** 2) / (2 * len(data))) ** .5
|
|
|
|
def loc_mle(loc):
|
|
# This implicit equation for `loc` is used when
|
|
# both `loc` and `scale` are free.
|
|
xm = data - loc
|
|
s1 = xm.sum()
|
|
s2 = (xm**2).sum()
|
|
s3 = (1/xm).sum()
|
|
return s1 - s2/(2*len(data))*s3
|
|
|
|
def loc_mle_scale_fixed(loc, scale=fscale):
|
|
# This implicit equation for `loc` is used when
|
|
# `scale` is fixed but `loc` is not.
|
|
xm = data - loc
|
|
return xm.sum() - scale**2 * (1/xm).sum()
|
|
|
|
if floc is not None:
|
|
# `loc` is fixed, analytically determine `scale`.
|
|
if np.any(data - floc <= 0):
|
|
raise FitDataError("rayleigh", lower=1, upper=np.inf)
|
|
else:
|
|
return floc, scale_mle(floc)
|
|
|
|
# Account for user provided guess of `loc`.
|
|
loc0 = kwds.get('loc')
|
|
if loc0 is None:
|
|
# Use _fitstart to estimate loc; ignore the returned scale.
|
|
loc0 = self._fitstart(data)[0]
|
|
|
|
fun = loc_mle if fscale is None else loc_mle_scale_fixed
|
|
rbrack = np.nextafter(np.min(data), -np.inf)
|
|
lbrack = _get_left_bracket(fun, rbrack)
|
|
res = optimize.root_scalar(fun, bracket=(lbrack, rbrack))
|
|
if not res.converged:
|
|
raise FitSolverError(res.flag)
|
|
loc = res.root
|
|
scale = fscale or scale_mle(loc)
|
|
return loc, scale
|
|
|
|
|
|
rayleigh = rayleigh_gen(a=0.0, name="rayleigh")
|
|
|
|
|
|
class reciprocal_gen(rv_continuous):
|
|
r"""A loguniform or reciprocal continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for this class is:
|
|
|
|
.. math::
|
|
|
|
f(x, a, b) = \frac{1}{x \log(b/a)}
|
|
|
|
for :math:`a \le x \le b`, :math:`b > a > 0`. This class takes
|
|
:math:`a` and :math:`b` as shape parameters.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
This doesn't show the equal probability of ``0.01``, ``0.1`` and
|
|
``1``. This is best when the x-axis is log-scaled:
|
|
|
|
>>> import numpy as np
|
|
>>> import matplotlib.pyplot as plt
|
|
>>> fig, ax = plt.subplots(1, 1)
|
|
>>> ax.hist(np.log10(r))
|
|
>>> ax.set_ylabel("Frequency")
|
|
>>> ax.set_xlabel("Value of random variable")
|
|
>>> ax.xaxis.set_major_locator(plt.FixedLocator([-2, -1, 0]))
|
|
>>> ticks = ["$10^{{ {} }}$".format(i) for i in [-2, -1, 0]]
|
|
>>> ax.set_xticklabels(ticks) # doctest: +SKIP
|
|
>>> plt.show()
|
|
|
|
This random variable will be log-uniform regardless of the base chosen for
|
|
``a`` and ``b``. Let's specify with base ``2`` instead:
|
|
|
|
>>> rvs = %(name)s(2**-2, 2**0).rvs(size=1000)
|
|
|
|
Values of ``1/4``, ``1/2`` and ``1`` are equally likely with this random
|
|
variable. Here's the histogram:
|
|
|
|
>>> fig, ax = plt.subplots(1, 1)
|
|
>>> ax.hist(np.log2(rvs))
|
|
>>> ax.set_ylabel("Frequency")
|
|
>>> ax.set_xlabel("Value of random variable")
|
|
>>> ax.xaxis.set_major_locator(plt.FixedLocator([-2, -1, 0]))
|
|
>>> ticks = ["$2^{{ {} }}$".format(i) for i in [-2, -1, 0]]
|
|
>>> ax.set_xticklabels(ticks) # doctest: +SKIP
|
|
>>> plt.show()
|
|
|
|
"""
|
|
def _argcheck(self, a, b):
|
|
return (a > 0) & (b > a)
|
|
|
|
def _shape_info(self):
|
|
ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
|
|
ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
|
|
return [ia, ib]
|
|
|
|
def _fitstart(self, data):
|
|
if isinstance(data, CensoredData):
|
|
data = data._uncensor()
|
|
# Reasonable, since support is [a, b]
|
|
return super()._fitstart(data, args=(np.min(data), np.max(data)))
|
|
|
|
def _get_support(self, a, b):
|
|
return a, b
|
|
|
|
def _pdf(self, x, a, b):
|
|
# reciprocal.pdf(x, a, b) = 1 / (x*(log(b) - log(a)))
|
|
return np.exp(self._logpdf(x, a, b))
|
|
|
|
def _logpdf(self, x, a, b):
|
|
return -np.log(x) - np.log(np.log(b) - np.log(a))
|
|
|
|
def _cdf(self, x, a, b):
|
|
return (np.log(x)-np.log(a)) / (np.log(b) - np.log(a))
|
|
|
|
def _ppf(self, q, a, b):
|
|
return np.exp(np.log(a) + q*(np.log(b) - np.log(a)))
|
|
|
|
def _munp(self, n, a, b):
|
|
t1 = 1 / (np.log(b) - np.log(a)) / n
|
|
t2 = np.real(np.exp(_log_diff(n * np.log(b), n*np.log(a))))
|
|
return t1 * t2
|
|
|
|
def _entropy(self, a, b):
|
|
return 0.5*(np.log(a) + np.log(b)) + np.log(np.log(b) - np.log(a))
|
|
|
|
fit_note = """\
|
|
`loguniform`/`reciprocal` is over-parameterized. `fit` automatically
|
|
fixes `scale` to 1 unless `fscale` is provided by the user.\n\n"""
|
|
|
|
@extend_notes_in_docstring(rv_continuous, notes=fit_note)
|
|
def fit(self, data, *args, **kwds):
|
|
fscale = kwds.pop('fscale', 1)
|
|
return super().fit(data, *args, fscale=fscale, **kwds)
|
|
|
|
# Details related to the decision of not defining
|
|
# the survival function for this distribution can be
|
|
# found in the PR: https://github.com/scipy/scipy/pull/18614
|
|
|
|
|
|
loguniform = reciprocal_gen(name="loguniform")
|
|
reciprocal = reciprocal_gen(name="reciprocal")
|
|
|
|
|
|
class rice_gen(rv_continuous):
|
|
r"""A Rice continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `rice` is:
|
|
|
|
.. math::
|
|
|
|
f(x, b) = x \exp(- \frac{x^2 + b^2}{2}) I_0(x b)
|
|
|
|
for :math:`x >= 0`, :math:`b > 0`. :math:`I_0` is the modified Bessel
|
|
function of order zero (`scipy.special.i0`).
|
|
|
|
`rice` takes ``b`` as a shape parameter for :math:`b`.
|
|
|
|
%(after_notes)s
|
|
|
|
The Rice distribution describes the length, :math:`r`, of a 2-D vector with
|
|
components :math:`(U+u, V+v)`, where :math:`U, V` are constant, :math:`u,
|
|
v` are independent Gaussian random variables with standard deviation
|
|
:math:`s`. Let :math:`R = \sqrt{U^2 + V^2}`. Then the pdf of :math:`r` is
|
|
``rice.pdf(x, R/s, scale=s)``.
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _argcheck(self, b):
|
|
return b >= 0
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("b", False, (0, np.inf), (True, False))]
|
|
|
|
def _rvs(self, b, size=None, random_state=None):
|
|
# https://en.wikipedia.org/wiki/Rice_distribution
|
|
t = b/np.sqrt(2) + random_state.standard_normal(size=(2,) + size)
|
|
return np.sqrt((t*t).sum(axis=0))
|
|
|
|
def _cdf(self, x, b):
|
|
return sc.chndtr(np.square(x), 2, np.square(b))
|
|
|
|
def _ppf(self, q, b):
|
|
return np.sqrt(sc.chndtrix(q, 2, np.square(b)))
|
|
|
|
def _pdf(self, x, b):
|
|
# rice.pdf(x, b) = x * exp(-(x**2+b**2)/2) * I[0](x*b)
|
|
#
|
|
# We use (x**2 + b**2)/2 = ((x-b)**2)/2 + xb.
|
|
# The factor of np.exp(-xb) is then included in the i0e function
|
|
# in place of the modified Bessel function, i0, improving
|
|
# numerical stability for large values of xb.
|
|
return x * np.exp(-(x-b)*(x-b)/2.0) * sc.i0e(x*b)
|
|
|
|
def _munp(self, n, b):
|
|
nd2 = n/2.0
|
|
n1 = 1 + nd2
|
|
b2 = b*b/2.0
|
|
return (2.0**(nd2) * np.exp(-b2) * sc.gamma(n1) *
|
|
sc.hyp1f1(n1, 1, b2))
|
|
|
|
|
|
rice = rice_gen(a=0.0, name="rice")
|
|
|
|
class irwinhall_gen(rv_continuous):
|
|
r"""An Irwin-Hall (Uniform Sum) continuous random variable.
|
|
|
|
An `Irwin-Hall <https://en.wikipedia.org/wiki/Irwin-Hall_distribution/>`_
|
|
continuous random variable is the sum of :math:`n` independent
|
|
standard uniform random variables [1]_ [2]_.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
Applications include `Rao's Spacing Test
|
|
<https://jammalam.faculty.pstat.ucsb.edu/html/favorite/test.htm>`_,
|
|
a more powerful alternative to the Rayleigh test
|
|
when the data are not unimodal, and radar [3]_.
|
|
|
|
Conveniently, the pdf and cdf are the :math:`n`-fold convolution of
|
|
the ones for the standard uniform distribution, which is also the
|
|
definition of the cardinal B-splines of degree :math:`n-1`
|
|
having knots evenly spaced from :math:`1` to :math:`n` [4]_ [5]_.
|
|
|
|
The Bates distribution, which represents the *mean* of statistically
|
|
independent, uniformly distributed random variables, is simply the
|
|
Irwin-Hall distribution scaled by :math:`1/n`. For example, the frozen
|
|
distribution ``bates = irwinhall(10, scale=1/10)`` represents the
|
|
distribution of the mean of 10 uniformly distributed random variables.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
.. [1] P. Hall, "The distribution of means for samples of size N drawn
|
|
from a population in which the variate takes values between 0 and 1,
|
|
all such values being equally probable",
|
|
Biometrika, Volume 19, Issue 3-4, December 1927, Pages 240-244,
|
|
:doi:`10.1093/biomet/19.3-4.240`.
|
|
.. [2] J. O. Irwin, "On the frequency distribution of the means of samples
|
|
from a population having any law of frequency with finite moments,
|
|
with special reference to Pearson's Type II,
|
|
Biometrika, Volume 19, Issue 3-4, December 1927, Pages 225-239,
|
|
:doi:`0.1093/biomet/19.3-4.225`.
|
|
.. [3] K. Buchanan, T. Adeyemi, C. Flores-Molina, S. Wheeland and D. Overturf,
|
|
"Sidelobe behavior and bandwidth characteristics
|
|
of distributed antenna arrays,"
|
|
2018 United States National Committee of
|
|
URSI National Radio Science Meeting (USNC-URSI NRSM),
|
|
Boulder, CO, USA, 2018, pp. 1-2.
|
|
https://www.usnc-ursi-archive.org/nrsm/2018/papers/B15-9.pdf.
|
|
.. [4] Amos Ron, "Lecture 1: Cardinal B-splines and convolution operators", p. 1
|
|
https://pages.cs.wisc.edu/~deboor/887/lec1new.pdf.
|
|
.. [5] Trefethen, N. (2012, July). B-splines and convolution. Chebfun.
|
|
Retrieved April 30, 2024, from http://www.chebfun.org/examples/approx/BSplineConv.html.
|
|
|
|
%(example)s
|
|
""" # noqa: E501
|
|
|
|
@replace_notes_in_docstring(rv_continuous, notes="""\
|
|
Raises a ``NotImplementedError`` for the Irwin-Hall distribution because
|
|
the generic `fit` implementation is unreliable and no custom implementation
|
|
is available. Consider using `scipy.stats.fit`.\n\n""")
|
|
def fit(self, data, *args, **kwds):
|
|
fit_notes = ("The generic `fit` implementation is unreliable for this "
|
|
"distribution, and no custom implementation is available. "
|
|
"Consider using `scipy.stats.fit`.")
|
|
raise NotImplementedError(fit_notes)
|
|
|
|
def _argcheck(self, n):
|
|
return (n > 0) & _isintegral(n) & np.isrealobj(n)
|
|
|
|
def _get_support(self, n):
|
|
return 0, n
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("n", True, (1, np.inf), (True, False))]
|
|
|
|
def _munp(self, order, n):
|
|
# see https://link.springer.com/content/pdf/10.1007/s10959-020-01050-9.pdf
|
|
# page 640, with m=n, j=n+order
|
|
def vmunp(order, n):
|
|
return (sc.stirling2(n+order, n, exact=True)
|
|
/ sc.comb(n+order, n, exact=True))
|
|
|
|
# exact rationals, but we convert to float anyway
|
|
return np.vectorize(vmunp, otypes=[np.float64])(order, n)
|
|
|
|
@staticmethod
|
|
def _cardbspl(n):
|
|
t = np.arange(n+1)
|
|
return BSpline.basis_element(t)
|
|
|
|
def _pdf(self, x, n):
|
|
def vpdf(x, n):
|
|
return self._cardbspl(n)(x)
|
|
return np.vectorize(vpdf, otypes=[np.float64])(x, n)
|
|
|
|
def _cdf(self, x, n):
|
|
def vcdf(x, n):
|
|
return self._cardbspl(n).antiderivative()(x)
|
|
return np.vectorize(vcdf, otypes=[np.float64])(x, n)
|
|
|
|
def _sf(self, x, n):
|
|
def vsf(x, n):
|
|
return self._cardbspl(n).antiderivative()(n-x)
|
|
return np.vectorize(vsf, otypes=[np.float64])(x, n)
|
|
|
|
def _rvs(self, n, size=None, random_state=None, *args):
|
|
@_vectorize_rvs_over_shapes
|
|
def _rvs1(n, size=None, random_state=None):
|
|
n = np.floor(n).astype(int)
|
|
usize = (n,) if size is None else (n, *size)
|
|
return random_state.uniform(size=usize).sum(axis=0)
|
|
return _rvs1(n, size=size, random_state=random_state)
|
|
|
|
def _stats(self, n):
|
|
# mgf = ((exp(t) - 1)/t)**n
|
|
# m'th derivative follows from the generalized Leibniz rule
|
|
# Moments follow directly from the definition as the sum of n iid unif(0,1)
|
|
# and the summation rules for moments of a sum of iid random variables
|
|
# E(IH((n))) = n*E(U(0,1)) = n/2
|
|
# Var(IH((n))) = n*Var(U(0,1)) = n/12
|
|
# Skew(IH((n))) = Skew(U(0,1))/sqrt(n) = 0
|
|
# Kurt(IH((n))) = Kurt(U(0,1))/n = -6/(5*n) -- Fisher's excess kurtosis
|
|
# See e.g. https://en.wikipedia.org/wiki/Irwin%E2%80%93Hall_distribution
|
|
|
|
return n/2, n/12, 0, -6/(5*n)
|
|
|
|
irwinhall = irwinhall_gen(name="irwinhall")
|
|
|
|
class recipinvgauss_gen(rv_continuous):
|
|
r"""A reciprocal inverse Gaussian continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `recipinvgauss` is:
|
|
|
|
.. math::
|
|
|
|
f(x, \mu) = \frac{1}{\sqrt{2\pi x}}
|
|
\exp\left(\frac{-(1-\mu x)^2}{2\mu^2x}\right)
|
|
|
|
for :math:`x \ge 0`.
|
|
|
|
`recipinvgauss` takes ``mu`` as a shape parameter for :math:`\mu`.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("mu", False, (0, np.inf), (False, False))]
|
|
|
|
def _pdf(self, x, mu):
|
|
# recipinvgauss.pdf(x, mu) =
|
|
# 1/sqrt(2*pi*x) * exp(-(1-mu*x)**2/(2*x*mu**2))
|
|
return np.exp(self._logpdf(x, mu))
|
|
|
|
def _logpdf(self, x, mu):
|
|
return _lazywhere(x > 0, (x, mu),
|
|
lambda x, mu: (-(1 - mu*x)**2.0 / (2*x*mu**2.0)
|
|
- 0.5*np.log(2*np.pi*x)),
|
|
fillvalue=-np.inf)
|
|
|
|
def _cdf(self, x, mu):
|
|
trm1 = 1.0/mu - x
|
|
trm2 = 1.0/mu + x
|
|
isqx = 1.0/np.sqrt(x)
|
|
return _norm_cdf(-isqx*trm1) - np.exp(2.0/mu)*_norm_cdf(-isqx*trm2)
|
|
|
|
def _sf(self, x, mu):
|
|
trm1 = 1.0/mu - x
|
|
trm2 = 1.0/mu + x
|
|
isqx = 1.0/np.sqrt(x)
|
|
return _norm_cdf(isqx*trm1) + np.exp(2.0/mu)*_norm_cdf(-isqx*trm2)
|
|
|
|
def _rvs(self, mu, size=None, random_state=None):
|
|
return 1.0/random_state.wald(mu, 1.0, size=size)
|
|
|
|
|
|
recipinvgauss = recipinvgauss_gen(a=0.0, name='recipinvgauss')
|
|
|
|
|
|
class semicircular_gen(rv_continuous):
|
|
r"""A semicircular continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
rdist
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `semicircular` is:
|
|
|
|
.. math::
|
|
|
|
f(x) = \frac{2}{\pi} \sqrt{1-x^2}
|
|
|
|
for :math:`-1 \le x \le 1`.
|
|
|
|
The distribution is a special case of `rdist` with `c = 3`.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
.. [1] "Wigner semicircle distribution",
|
|
https://en.wikipedia.org/wiki/Wigner_semicircle_distribution
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return []
|
|
|
|
def _pdf(self, x):
|
|
return 2.0/np.pi*np.sqrt(1-x*x)
|
|
|
|
def _logpdf(self, x):
|
|
return np.log(2/np.pi) + 0.5*sc.log1p(-x*x)
|
|
|
|
def _cdf(self, x):
|
|
return 0.5+1.0/np.pi*(x*np.sqrt(1-x*x) + np.arcsin(x))
|
|
|
|
def _ppf(self, q):
|
|
return rdist._ppf(q, 3)
|
|
|
|
def _rvs(self, size=None, random_state=None):
|
|
# generate values uniformly distributed on the area under the pdf
|
|
# (semi-circle) by randomly generating the radius and angle
|
|
r = np.sqrt(random_state.uniform(size=size))
|
|
a = np.cos(np.pi * random_state.uniform(size=size))
|
|
return r * a
|
|
|
|
def _stats(self):
|
|
return 0, 0.25, 0, -1.0
|
|
|
|
def _entropy(self):
|
|
return 0.64472988584940017414
|
|
|
|
|
|
semicircular = semicircular_gen(a=-1.0, b=1.0, name="semicircular")
|
|
|
|
|
|
class skewcauchy_gen(rv_continuous):
|
|
r"""A skewed Cauchy random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
cauchy : Cauchy distribution
|
|
|
|
Notes
|
|
-----
|
|
|
|
The probability density function for `skewcauchy` is:
|
|
|
|
.. math::
|
|
|
|
f(x) = \frac{1}{\pi \left(\frac{x^2}{\left(a\, \text{sign}(x) + 1
|
|
\right)^2} + 1 \right)}
|
|
|
|
for a real number :math:`x` and skewness parameter :math:`-1 < a < 1`.
|
|
|
|
When :math:`a=0`, the distribution reduces to the usual Cauchy
|
|
distribution.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
.. [1] "Skewed generalized *t* distribution", Wikipedia
|
|
https://en.wikipedia.org/wiki/Skewed_generalized_t_distribution#Skewed_Cauchy_distribution
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _argcheck(self, a):
|
|
return np.abs(a) < 1
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("a", False, (-1.0, 1.0), (False, False))]
|
|
|
|
def _pdf(self, x, a):
|
|
return 1 / (np.pi * (x**2 / (a * np.sign(x) + 1)**2 + 1))
|
|
|
|
def _cdf(self, x, a):
|
|
return np.where(x <= 0,
|
|
(1 - a) / 2 + (1 - a) / np.pi * np.arctan(x / (1 - a)),
|
|
(1 - a) / 2 + (1 + a) / np.pi * np.arctan(x / (1 + a)))
|
|
|
|
def _ppf(self, x, a):
|
|
i = x < self._cdf(0, a)
|
|
return np.where(i,
|
|
np.tan(np.pi / (1 - a) * (x - (1 - a) / 2)) * (1 - a),
|
|
np.tan(np.pi / (1 + a) * (x - (1 - a) / 2)) * (1 + a))
|
|
|
|
def _stats(self, a, moments='mvsk'):
|
|
return np.nan, np.nan, np.nan, np.nan
|
|
|
|
def _fitstart(self, data):
|
|
# Use 0 as the initial guess of the skewness shape parameter.
|
|
# For the location and scale, estimate using the median and
|
|
# quartiles.
|
|
if isinstance(data, CensoredData):
|
|
data = data._uncensor()
|
|
p25, p50, p75 = np.percentile(data, [25, 50, 75])
|
|
return 0.0, p50, (p75 - p25)/2
|
|
|
|
|
|
skewcauchy = skewcauchy_gen(name='skewcauchy')
|
|
|
|
|
|
class skewnorm_gen(rv_continuous):
|
|
r"""A skew-normal random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The pdf is::
|
|
|
|
skewnorm.pdf(x, a) = 2 * norm.pdf(x) * norm.cdf(a*x)
|
|
|
|
`skewnorm` takes a real number :math:`a` as a skewness parameter
|
|
When ``a = 0`` the distribution is identical to a normal distribution
|
|
(`norm`). `rvs` implements the method of [1]_.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
References
|
|
----------
|
|
.. [1] A. Azzalini and A. Capitanio (1999). Statistical applications of
|
|
the multivariate skew-normal distribution. J. Roy. Statist. Soc.,
|
|
B 61, 579-602. :arxiv:`0911.2093`
|
|
|
|
"""
|
|
def _argcheck(self, a):
|
|
return np.isfinite(a)
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("a", False, (-np.inf, np.inf), (False, False))]
|
|
|
|
def _pdf(self, x, a):
|
|
return _lazywhere(
|
|
a == 0, (x, a), lambda x, a: _norm_pdf(x),
|
|
f2=lambda x, a: 2.*_norm_pdf(x)*_norm_cdf(a*x)
|
|
)
|
|
|
|
def _logpdf(self, x, a):
|
|
return _lazywhere(
|
|
a == 0, (x, a), lambda x, a: _norm_logpdf(x),
|
|
f2=lambda x, a: np.log(2)+_norm_logpdf(x)+_norm_logcdf(a*x),
|
|
)
|
|
|
|
def _cdf(self, x, a):
|
|
a = np.atleast_1d(a)
|
|
cdf = scu._skewnorm_cdf(x, 0.0, 1.0, a)
|
|
# for some reason, a isn't broadcasted if some of x are invalid
|
|
a = np.broadcast_to(a, cdf.shape)
|
|
# Boost is not accurate in left tail when a > 0
|
|
i_small_cdf = (cdf < 1e-6) & (a > 0)
|
|
cdf[i_small_cdf] = super()._cdf(x[i_small_cdf], a[i_small_cdf])
|
|
return np.clip(cdf, 0, 1)
|
|
|
|
def _ppf(self, x, a):
|
|
return scu._skewnorm_ppf(x, 0.0, 1.0, a)
|
|
|
|
def _sf(self, x, a):
|
|
# Boost's SF is implemented this way. Use whatever customizations
|
|
# we made in the _cdf.
|
|
return self._cdf(-x, -a)
|
|
|
|
def _isf(self, x, a):
|
|
return scu._skewnorm_isf(x, 0.0, 1.0, a)
|
|
|
|
def _rvs(self, a, size=None, random_state=None):
|
|
u0 = random_state.normal(size=size)
|
|
v = random_state.normal(size=size)
|
|
d = a/np.sqrt(1 + a**2)
|
|
u1 = d*u0 + v*np.sqrt(1 - d**2)
|
|
return np.where(u0 >= 0, u1, -u1)
|
|
|
|
def _stats(self, a, moments='mvsk'):
|
|
output = [None, None, None, None]
|
|
const = np.sqrt(2/np.pi) * a/np.sqrt(1 + a**2)
|
|
|
|
if 'm' in moments:
|
|
output[0] = const
|
|
if 'v' in moments:
|
|
output[1] = 1 - const**2
|
|
if 's' in moments:
|
|
output[2] = ((4 - np.pi)/2) * (const/np.sqrt(1 - const**2))**3
|
|
if 'k' in moments:
|
|
output[3] = (2*(np.pi - 3)) * (const**4/(1 - const**2)**2)
|
|
|
|
return output
|
|
|
|
# For odd order, the each noncentral moment of the skew-normal distribution
|
|
# with location 0 and scale 1 can be expressed as a polynomial in delta,
|
|
# where delta = a/sqrt(1 + a**2) and `a` is the skew-normal shape
|
|
# parameter. The dictionary _skewnorm_odd_moments defines those
|
|
# polynomials for orders up to 19. The dict is implemented as a cached
|
|
# property to reduce the impact of the creation of the dict on import time.
|
|
@cached_property
|
|
def _skewnorm_odd_moments(self):
|
|
skewnorm_odd_moments = {
|
|
1: Polynomial([1]),
|
|
3: Polynomial([3, -1]),
|
|
5: Polynomial([15, -10, 3]),
|
|
7: Polynomial([105, -105, 63, -15]),
|
|
9: Polynomial([945, -1260, 1134, -540, 105]),
|
|
11: Polynomial([10395, -17325, 20790, -14850, 5775, -945]),
|
|
13: Polynomial([135135, -270270, 405405, -386100, 225225, -73710,
|
|
10395]),
|
|
15: Polynomial([2027025, -4729725, 8513505, -10135125, 7882875,
|
|
-3869775, 1091475, -135135]),
|
|
17: Polynomial([34459425, -91891800, 192972780, -275675400,
|
|
268017750, -175429800, 74220300, -18378360,
|
|
2027025]),
|
|
19: Polynomial([654729075, -1964187225, 4714049340, -7856748900,
|
|
9166207050, -7499623950, 4230557100, -1571349780,
|
|
346621275, -34459425]),
|
|
}
|
|
return skewnorm_odd_moments
|
|
|
|
def _munp(self, order, a):
|
|
if order & 1:
|
|
if order > 19:
|
|
raise NotImplementedError("skewnorm noncentral moments not "
|
|
"implemented for odd orders greater "
|
|
"than 19.")
|
|
# Use the precomputed polynomials that were derived from the
|
|
# moment generating function.
|
|
delta = a/np.sqrt(1 + a**2)
|
|
return (delta * self._skewnorm_odd_moments[order](delta**2)
|
|
* _SQRT_2_OVER_PI)
|
|
else:
|
|
# For even order, the moment is just (order-1)!!, where !! is the
|
|
# notation for the double factorial; for an odd integer m, m!! is
|
|
# m*(m-2)*...*3*1.
|
|
# We could use special.factorial2, but we know the argument is odd,
|
|
# so avoid the overhead of that function and compute the result
|
|
# directly here.
|
|
return sc.gamma((order + 1)/2) * 2**(order/2) / _SQRT_PI
|
|
|
|
@extend_notes_in_docstring(rv_continuous, notes="""\
|
|
If ``method='mm'``, parameters fixed by the user are respected, and the
|
|
remaining parameters are used to match distribution and sample moments
|
|
where possible. For example, if the user fixes the location with
|
|
``floc``, the parameters will only match the distribution skewness and
|
|
variance to the sample skewness and variance; no attempt will be made
|
|
to match the means or minimize a norm of the errors.
|
|
Note that the maximum possible skewness magnitude of a
|
|
`scipy.stats.skewnorm` distribution is approximately 0.9952717; if the
|
|
magnitude of the data's sample skewness exceeds this, the returned
|
|
shape parameter ``a`` will be infinite.
|
|
\n\n""")
|
|
def fit(self, data, *args, **kwds):
|
|
if kwds.pop("superfit", False):
|
|
return super().fit(data, *args, **kwds)
|
|
if isinstance(data, CensoredData):
|
|
if data.num_censored() == 0:
|
|
data = data._uncensor()
|
|
else:
|
|
return super().fit(data, *args, **kwds)
|
|
|
|
# this extracts fixed shape, location, and scale however they
|
|
# are specified, and also leaves them in `kwds`
|
|
data, fa, floc, fscale = _check_fit_input_parameters(self, data,
|
|
args, kwds)
|
|
method = kwds.get("method", "mle").lower()
|
|
|
|
# See https://en.wikipedia.org/wiki/Skew_normal_distribution for
|
|
# moment formulas.
|
|
def skew_d(d): # skewness in terms of delta
|
|
return (4-np.pi)/2 * ((d * np.sqrt(2 / np.pi))**3
|
|
/ (1 - 2*d**2 / np.pi)**(3/2))
|
|
def d_skew(skew): # delta in terms of skewness
|
|
s_23 = np.abs(skew)**(2/3)
|
|
return np.sign(skew) * np.sqrt(
|
|
np.pi/2 * s_23 / (s_23 + ((4 - np.pi)/2)**(2/3))
|
|
)
|
|
|
|
# If method is method of moments, we don't need the user's guesses.
|
|
# Otherwise, extract the guesses from args and kwds.
|
|
if method == "mm":
|
|
a, loc, scale = None, None, None
|
|
else:
|
|
a = args[0] if len(args) else None
|
|
loc = kwds.pop('loc', None)
|
|
scale = kwds.pop('scale', None)
|
|
|
|
if fa is None and a is None: # not fixed and no guess: use MoM
|
|
# Solve for a that matches sample distribution skewness to sample
|
|
# skewness.
|
|
s = stats.skew(data)
|
|
if method == 'mle':
|
|
# For MLE initial conditions, clip skewness to a large but
|
|
# reasonable value in case the data skewness is out-of-range.
|
|
s = np.clip(s, -0.99, 0.99)
|
|
else:
|
|
s_max = skew_d(1)
|
|
s = np.clip(s, -s_max, s_max)
|
|
d = d_skew(s)
|
|
with np.errstate(divide='ignore'):
|
|
a = np.sqrt(np.divide(d**2, (1-d**2)))*np.sign(s)
|
|
else:
|
|
a = fa if fa is not None else a
|
|
d = a / np.sqrt(1 + a**2)
|
|
|
|
if fscale is None and scale is None:
|
|
v = np.var(data)
|
|
scale = np.sqrt(v / (1 - 2*d**2/np.pi))
|
|
elif fscale is not None:
|
|
scale = fscale
|
|
|
|
if floc is None and loc is None:
|
|
m = np.mean(data)
|
|
loc = m - scale*d*np.sqrt(2/np.pi)
|
|
elif floc is not None:
|
|
loc = floc
|
|
|
|
if method == 'mm':
|
|
return a, loc, scale
|
|
else:
|
|
# At this point, parameter "guesses" may equal the fixed parameters
|
|
# in kwds. No harm in passing them as guesses, too.
|
|
return super().fit(data, a, loc=loc, scale=scale, **kwds)
|
|
|
|
|
|
skewnorm = skewnorm_gen(name='skewnorm')
|
|
|
|
|
|
class trapezoid_gen(rv_continuous):
|
|
r"""A trapezoidal continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The trapezoidal distribution can be represented with an up-sloping line
|
|
from ``loc`` to ``(loc + c*scale)``, then constant to ``(loc + d*scale)``
|
|
and then downsloping from ``(loc + d*scale)`` to ``(loc+scale)``. This
|
|
defines the trapezoid base from ``loc`` to ``(loc+scale)`` and the flat
|
|
top from ``c`` to ``d`` proportional to the position along the base
|
|
with ``0 <= c <= d <= 1``. When ``c=d``, this is equivalent to `triang`
|
|
with the same values for `loc`, `scale` and `c`.
|
|
The method of [1]_ is used for computing moments.
|
|
|
|
`trapezoid` takes :math:`c` and :math:`d` as shape parameters.
|
|
|
|
%(after_notes)s
|
|
|
|
The standard form is in the range [0, 1] with c the mode.
|
|
The location parameter shifts the start to `loc`.
|
|
The scale parameter changes the width from 1 to `scale`.
|
|
|
|
%(example)s
|
|
|
|
References
|
|
----------
|
|
.. [1] Kacker, R.N. and Lawrence, J.F. (2007). Trapezoidal and triangular
|
|
distributions for Type B evaluation of standard uncertainty.
|
|
Metrologia 44, 117-127. :doi:`10.1088/0026-1394/44/2/003`
|
|
|
|
|
|
"""
|
|
def _argcheck(self, c, d):
|
|
return (c >= 0) & (c <= 1) & (d >= 0) & (d <= 1) & (d >= c)
|
|
|
|
def _shape_info(self):
|
|
ic = _ShapeInfo("c", False, (0, 1.0), (True, True))
|
|
id = _ShapeInfo("d", False, (0, 1.0), (True, True))
|
|
return [ic, id]
|
|
|
|
def _pdf(self, x, c, d):
|
|
u = 2 / (d-c+1)
|
|
|
|
return _lazyselect([x < c,
|
|
(c <= x) & (x <= d),
|
|
x > d],
|
|
[lambda x, c, d, u: u * x / c,
|
|
lambda x, c, d, u: u,
|
|
lambda x, c, d, u: u * (1-x) / (1-d)],
|
|
(x, c, d, u))
|
|
|
|
def _cdf(self, x, c, d):
|
|
return _lazyselect([x < c,
|
|
(c <= x) & (x <= d),
|
|
x > d],
|
|
[lambda x, c, d: x**2 / c / (d-c+1),
|
|
lambda x, c, d: (c + 2 * (x-c)) / (d-c+1),
|
|
lambda x, c, d: 1-((1-x) ** 2
|
|
/ (d-c+1) / (1-d))],
|
|
(x, c, d))
|
|
|
|
def _ppf(self, q, c, d):
|
|
qc, qd = self._cdf(c, c, d), self._cdf(d, c, d)
|
|
condlist = [q < qc, q <= qd, q > qd]
|
|
choicelist = [np.sqrt(q * c * (1 + d - c)),
|
|
0.5 * q * (1 + d - c) + 0.5 * c,
|
|
1 - np.sqrt((1 - q) * (d - c + 1) * (1 - d))]
|
|
return np.select(condlist, choicelist)
|
|
|
|
def _munp(self, n, c, d):
|
|
# Using the parameterization from Kacker, 2007, with
|
|
# a=bottom left, c=top left, d=top right, b=bottom right, then
|
|
# E[X^n] = h/(n+1)/(n+2) [(b^{n+2}-d^{n+2})/(b-d)
|
|
# - ((c^{n+2} - a^{n+2})/(c-a)]
|
|
# with h = 2/((b-a) - (d-c)). The corresponding parameterization
|
|
# in scipy, has a'=loc, c'=loc+c*scale, d'=loc+d*scale, b'=loc+scale,
|
|
# which for standard form reduces to a'=0, b'=1, c'=c, d'=d.
|
|
# Substituting into E[X^n] gives the bd' term as (1 - d^{n+2})/(1 - d)
|
|
# and the ac' term as c^{n-1} for the standard form. The bd' term has
|
|
# numerical difficulties near d=1, so replace (1 - d^{n+2})/(1-d)
|
|
# with expm1((n+2)*log(d))/(d-1).
|
|
# Testing with n=18 for c=(1e-30,1-eps) shows that this is stable.
|
|
# We still require an explicit test for d=1 to prevent divide by zero,
|
|
# and now a test for d=0 to prevent log(0).
|
|
ab_term = c**(n+1)
|
|
dc_term = _lazyselect(
|
|
[d == 0.0, (0.0 < d) & (d < 1.0), d == 1.0],
|
|
[lambda d: 1.0,
|
|
lambda d: np.expm1((n+2) * np.log(d)) / (d-1.0),
|
|
lambda d: n+2],
|
|
[d])
|
|
val = 2.0 / (1.0+d-c) * (dc_term - ab_term) / ((n+1) * (n+2))
|
|
return val
|
|
|
|
def _entropy(self, c, d):
|
|
# Using the parameterization from Wikipedia (van Dorp, 2003)
|
|
# with a=bottom left, c=top left, d=top right, b=bottom right
|
|
# gives a'=loc, b'=loc+c*scale, c'=loc+d*scale, d'=loc+scale,
|
|
# which for loc=0, scale=1 is a'=0, b'=c, c'=d, d'=1.
|
|
# Substituting into the entropy formula from Wikipedia gives
|
|
# the following result.
|
|
return 0.5 * (1.0-d+c) / (1.0+d-c) + np.log(0.5 * (1.0+d-c))
|
|
|
|
|
|
# deprecation of trapz, see #20486
|
|
deprmsg = ("`trapz` is deprecated in favour of `trapezoid` "
|
|
"and will be removed in SciPy 1.16.0.")
|
|
|
|
|
|
class trapz_gen(trapezoid_gen):
|
|
# override __call__ protocol from rv_generic to also
|
|
# deprecate instantiation of frozen distributions
|
|
"""
|
|
|
|
.. deprecated:: 1.14.0
|
|
`trapz` is deprecated and will be removed in SciPy 1.16.
|
|
Plese use `trapezoid` instead!
|
|
"""
|
|
def __call__(self, *args, **kwds):
|
|
warnings.warn(deprmsg, DeprecationWarning, stacklevel=2)
|
|
return self.freeze(*args, **kwds)
|
|
|
|
|
|
trapezoid = trapezoid_gen(a=0.0, b=1.0, name="trapezoid")
|
|
trapz = trapz_gen(a=0.0, b=1.0, name="trapz")
|
|
|
|
# since the deprecated class gets intantiated upon import (and we only want to
|
|
# warn upon use), add the deprecation to each class method
|
|
_method_names = [
|
|
"cdf", "entropy", "expect", "fit", "interval", "isf", "logcdf", "logpdf",
|
|
"logsf", "mean", "median", "moment", "pdf", "ppf", "rvs", "sf", "stats",
|
|
"std", "var"
|
|
]
|
|
|
|
|
|
class _DeprecationWrapper:
|
|
def __init__(self, method):
|
|
self.msg = (f"`trapz.{method}` is deprecated in favour of trapezoid.{method}. "
|
|
"Please replace all uses of the distribution class "
|
|
"`trapz` with `trapezoid`. `trapz` will be removed in SciPy 1.16.")
|
|
self.method = getattr(trapezoid, method)
|
|
|
|
def __call__(self, *args, **kwargs):
|
|
warnings.warn(self.msg, DeprecationWarning, stacklevel=2)
|
|
return self.method(*args, **kwargs)
|
|
|
|
|
|
for m in _method_names:
|
|
setattr(trapz, m, _DeprecationWrapper(m))
|
|
|
|
|
|
class triang_gen(rv_continuous):
|
|
r"""A triangular continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The triangular distribution can be represented with an up-sloping line from
|
|
``loc`` to ``(loc + c*scale)`` and then downsloping for ``(loc + c*scale)``
|
|
to ``(loc + scale)``.
|
|
|
|
`triang` takes ``c`` as a shape parameter for :math:`0 \le c \le 1`.
|
|
|
|
%(after_notes)s
|
|
|
|
The standard form is in the range [0, 1] with c the mode.
|
|
The location parameter shifts the start to `loc`.
|
|
The scale parameter changes the width from 1 to `scale`.
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _rvs(self, c, size=None, random_state=None):
|
|
return random_state.triangular(0, c, 1, size)
|
|
|
|
def _argcheck(self, c):
|
|
return (c >= 0) & (c <= 1)
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("c", False, (0, 1.0), (True, True))]
|
|
|
|
def _pdf(self, x, c):
|
|
# 0: edge case where c=0
|
|
# 1: generalised case for x < c, don't use x <= c, as it doesn't cope
|
|
# with c = 0.
|
|
# 2: generalised case for x >= c, but doesn't cope with c = 1
|
|
# 3: edge case where c=1
|
|
r = _lazyselect([c == 0,
|
|
x < c,
|
|
(x >= c) & (c != 1),
|
|
c == 1],
|
|
[lambda x, c: 2 - 2 * x,
|
|
lambda x, c: 2 * x / c,
|
|
lambda x, c: 2 * (1 - x) / (1 - c),
|
|
lambda x, c: 2 * x],
|
|
(x, c))
|
|
return r
|
|
|
|
def _cdf(self, x, c):
|
|
r = _lazyselect([c == 0,
|
|
x < c,
|
|
(x >= c) & (c != 1),
|
|
c == 1],
|
|
[lambda x, c: 2*x - x*x,
|
|
lambda x, c: x * x / c,
|
|
lambda x, c: (x*x - 2*x + c) / (c-1),
|
|
lambda x, c: x * x],
|
|
(x, c))
|
|
return r
|
|
|
|
def _ppf(self, q, c):
|
|
return np.where(q < c, np.sqrt(c * q), 1-np.sqrt((1-c) * (1-q)))
|
|
|
|
def _stats(self, c):
|
|
return ((c+1.0)/3.0,
|
|
(1.0-c+c*c)/18,
|
|
np.sqrt(2)*(2*c-1)*(c+1)*(c-2) / (5*np.power((1.0-c+c*c), 1.5)),
|
|
-3.0/5.0)
|
|
|
|
def _entropy(self, c):
|
|
return 0.5-np.log(2)
|
|
|
|
|
|
triang = triang_gen(a=0.0, b=1.0, name="triang")
|
|
|
|
|
|
class truncexpon_gen(rv_continuous):
|
|
r"""A truncated exponential continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `truncexpon` is:
|
|
|
|
.. math::
|
|
|
|
f(x, b) = \frac{\exp(-x)}{1 - \exp(-b)}
|
|
|
|
for :math:`0 <= x <= b`.
|
|
|
|
`truncexpon` takes ``b`` as a shape parameter for :math:`b`.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("b", False, (0, np.inf), (False, False))]
|
|
|
|
def _get_support(self, b):
|
|
return self.a, b
|
|
|
|
def _pdf(self, x, b):
|
|
# truncexpon.pdf(x, b) = exp(-x) / (1-exp(-b))
|
|
return np.exp(-x)/(-sc.expm1(-b))
|
|
|
|
def _logpdf(self, x, b):
|
|
return -x - np.log(-sc.expm1(-b))
|
|
|
|
def _cdf(self, x, b):
|
|
return sc.expm1(-x)/sc.expm1(-b)
|
|
|
|
def _ppf(self, q, b):
|
|
return -sc.log1p(q*sc.expm1(-b))
|
|
|
|
def _sf(self, x, b):
|
|
return (np.exp(-b) - np.exp(-x))/sc.expm1(-b)
|
|
|
|
def _isf(self, q, b):
|
|
return -np.log(np.exp(-b) - q * sc.expm1(-b))
|
|
|
|
def _munp(self, n, b):
|
|
# wrong answer with formula, same as in continuous.pdf
|
|
# return sc.gamman+1)-sc.gammainc1+n, b)
|
|
if n == 1:
|
|
return (1-(b+1)*np.exp(-b))/(-sc.expm1(-b))
|
|
elif n == 2:
|
|
return 2*(1-0.5*(b*b+2*b+2)*np.exp(-b))/(-sc.expm1(-b))
|
|
else:
|
|
# return generic for higher moments
|
|
return super()._munp(n, b)
|
|
|
|
def _entropy(self, b):
|
|
eB = np.exp(b)
|
|
return np.log(eB-1)+(1+eB*(b-1.0))/(1.0-eB)
|
|
|
|
|
|
truncexpon = truncexpon_gen(a=0.0, name='truncexpon')
|
|
|
|
|
|
# logsumexp trick for log(p + q) with only log(p) and log(q)
|
|
def _log_sum(log_p, log_q):
|
|
return sc.logsumexp([log_p, log_q], axis=0)
|
|
|
|
|
|
# same as above, but using -exp(x) = exp(x + πi)
|
|
def _log_diff(log_p, log_q):
|
|
return sc.logsumexp([log_p, log_q+np.pi*1j], axis=0)
|
|
|
|
|
|
def _log_gauss_mass(a, b):
|
|
"""Log of Gaussian probability mass within an interval"""
|
|
a, b = np.broadcast_arrays(a, b)
|
|
|
|
# Calculations in right tail are inaccurate, so we'll exploit the
|
|
# symmetry and work only in the left tail
|
|
case_left = b <= 0
|
|
case_right = a > 0
|
|
case_central = ~(case_left | case_right)
|
|
|
|
def mass_case_left(a, b):
|
|
return _log_diff(_norm_logcdf(b), _norm_logcdf(a))
|
|
|
|
def mass_case_right(a, b):
|
|
return mass_case_left(-b, -a)
|
|
|
|
def mass_case_central(a, b):
|
|
# Previously, this was implemented as:
|
|
# left_mass = mass_case_left(a, 0)
|
|
# right_mass = mass_case_right(0, b)
|
|
# return _log_sum(left_mass, right_mass)
|
|
# Catastrophic cancellation occurs as np.exp(log_mass) approaches 1.
|
|
# Correct for this with an alternative formulation.
|
|
# We're not concerned with underflow here: if only one term
|
|
# underflows, it was insignificant; if both terms underflow,
|
|
# the result can't accurately be represented in logspace anyway
|
|
# because sc.log1p(x) ~ x for small x.
|
|
return sc.log1p(-_norm_cdf(a) - _norm_cdf(-b))
|
|
|
|
# _lazyselect not working; don't care to debug it
|
|
out = np.full_like(a, fill_value=np.nan, dtype=np.complex128)
|
|
if a[case_left].size:
|
|
out[case_left] = mass_case_left(a[case_left], b[case_left])
|
|
if a[case_right].size:
|
|
out[case_right] = mass_case_right(a[case_right], b[case_right])
|
|
if a[case_central].size:
|
|
out[case_central] = mass_case_central(a[case_central], b[case_central])
|
|
return np.real(out) # discard ~0j
|
|
|
|
|
|
class truncnorm_gen(rv_continuous):
|
|
r"""A truncated normal continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
This distribution is the normal distribution centered on ``loc`` (default
|
|
0), with standard deviation ``scale`` (default 1), and truncated at ``a``
|
|
and ``b`` *standard deviations* from ``loc``. For arbitrary ``loc`` and
|
|
``scale``, ``a`` and ``b`` are *not* the abscissae at which the shifted
|
|
and scaled distribution is truncated.
|
|
|
|
.. note::
|
|
If ``a_trunc`` and ``b_trunc`` are the abscissae at which we wish
|
|
to truncate the distribution (as opposed to the number of standard
|
|
deviations from ``loc``), then we can calculate the distribution
|
|
parameters ``a`` and ``b`` as follows::
|
|
|
|
a, b = (a_trunc - loc) / scale, (b_trunc - loc) / scale
|
|
|
|
This is a common point of confusion. For additional clarification,
|
|
please see the example below.
|
|
|
|
%(example)s
|
|
|
|
In the examples above, ``loc=0`` and ``scale=1``, so the plot is truncated
|
|
at ``a`` on the left and ``b`` on the right. However, suppose we were to
|
|
produce the same histogram with ``loc = 1`` and ``scale=0.5``.
|
|
|
|
>>> loc, scale = 1, 0.5
|
|
>>> rv = truncnorm(a, b, loc=loc, scale=scale)
|
|
>>> x = np.linspace(truncnorm.ppf(0.01, a, b),
|
|
... truncnorm.ppf(0.99, a, b), 100)
|
|
>>> r = rv.rvs(size=1000)
|
|
|
|
>>> fig, ax = plt.subplots(1, 1)
|
|
>>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
|
|
>>> ax.hist(r, density=True, bins='auto', histtype='stepfilled', alpha=0.2)
|
|
>>> ax.set_xlim(a, b)
|
|
>>> ax.legend(loc='best', frameon=False)
|
|
>>> plt.show()
|
|
|
|
Note that the distribution is no longer appears to be truncated at
|
|
abscissae ``a`` and ``b``. That is because the *standard* normal
|
|
distribution is first truncated at ``a`` and ``b``, *then* the resulting
|
|
distribution is scaled by ``scale`` and shifted by ``loc``. If we instead
|
|
want the shifted and scaled distribution to be truncated at ``a`` and
|
|
``b``, we need to transform these values before passing them as the
|
|
distribution parameters.
|
|
|
|
>>> a_transformed, b_transformed = (a - loc) / scale, (b - loc) / scale
|
|
>>> rv = truncnorm(a_transformed, b_transformed, loc=loc, scale=scale)
|
|
>>> x = np.linspace(truncnorm.ppf(0.01, a, b),
|
|
... truncnorm.ppf(0.99, a, b), 100)
|
|
>>> r = rv.rvs(size=10000)
|
|
|
|
>>> fig, ax = plt.subplots(1, 1)
|
|
>>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
|
|
>>> ax.hist(r, density=True, bins='auto', histtype='stepfilled', alpha=0.2)
|
|
>>> ax.set_xlim(a-0.1, b+0.1)
|
|
>>> ax.legend(loc='best', frameon=False)
|
|
>>> plt.show()
|
|
"""
|
|
|
|
def _argcheck(self, a, b):
|
|
return a < b
|
|
|
|
def _shape_info(self):
|
|
ia = _ShapeInfo("a", False, (-np.inf, np.inf), (True, False))
|
|
ib = _ShapeInfo("b", False, (-np.inf, np.inf), (False, True))
|
|
return [ia, ib]
|
|
|
|
def _fitstart(self, data):
|
|
# Reasonable, since support is [a, b]
|
|
if isinstance(data, CensoredData):
|
|
data = data._uncensor()
|
|
return super()._fitstart(data, args=(np.min(data), np.max(data)))
|
|
|
|
def _get_support(self, a, b):
|
|
return a, b
|
|
|
|
def _pdf(self, x, a, b):
|
|
return np.exp(self._logpdf(x, a, b))
|
|
|
|
def _logpdf(self, x, a, b):
|
|
return _norm_logpdf(x) - _log_gauss_mass(a, b)
|
|
|
|
def _cdf(self, x, a, b):
|
|
return np.exp(self._logcdf(x, a, b))
|
|
|
|
def _logcdf(self, x, a, b):
|
|
x, a, b = np.broadcast_arrays(x, a, b)
|
|
logcdf = np.asarray(_log_gauss_mass(a, x) - _log_gauss_mass(a, b))
|
|
i = logcdf > -0.1 # avoid catastrophic cancellation
|
|
if np.any(i):
|
|
logcdf[i] = np.log1p(-np.exp(self._logsf(x[i], a[i], b[i])))
|
|
return logcdf
|
|
|
|
def _sf(self, x, a, b):
|
|
return np.exp(self._logsf(x, a, b))
|
|
|
|
def _logsf(self, x, a, b):
|
|
x, a, b = np.broadcast_arrays(x, a, b)
|
|
logsf = np.asarray(_log_gauss_mass(x, b) - _log_gauss_mass(a, b))
|
|
i = logsf > -0.1 # avoid catastrophic cancellation
|
|
if np.any(i):
|
|
logsf[i] = np.log1p(-np.exp(self._logcdf(x[i], a[i], b[i])))
|
|
return logsf
|
|
|
|
def _entropy(self, a, b):
|
|
A = _norm_cdf(a)
|
|
B = _norm_cdf(b)
|
|
Z = B - A
|
|
C = np.log(np.sqrt(2 * np.pi * np.e) * Z)
|
|
D = (a * _norm_pdf(a) - b * _norm_pdf(b)) / (2 * Z)
|
|
h = C + D
|
|
return h
|
|
|
|
def _ppf(self, q, a, b):
|
|
q, a, b = np.broadcast_arrays(q, a, b)
|
|
|
|
case_left = a < 0
|
|
case_right = ~case_left
|
|
|
|
def ppf_left(q, a, b):
|
|
log_Phi_x = _log_sum(_norm_logcdf(a),
|
|
np.log(q) + _log_gauss_mass(a, b))
|
|
return sc.ndtri_exp(log_Phi_x)
|
|
|
|
def ppf_right(q, a, b):
|
|
log_Phi_x = _log_sum(_norm_logcdf(-b),
|
|
np.log1p(-q) + _log_gauss_mass(a, b))
|
|
return -sc.ndtri_exp(log_Phi_x)
|
|
|
|
out = np.empty_like(q)
|
|
|
|
q_left = q[case_left]
|
|
q_right = q[case_right]
|
|
|
|
if q_left.size:
|
|
out[case_left] = ppf_left(q_left, a[case_left], b[case_left])
|
|
if q_right.size:
|
|
out[case_right] = ppf_right(q_right, a[case_right], b[case_right])
|
|
|
|
return out
|
|
|
|
def _isf(self, q, a, b):
|
|
# Mostly copy-paste of _ppf, but I think this is simpler than combining
|
|
q, a, b = np.broadcast_arrays(q, a, b)
|
|
|
|
case_left = b < 0
|
|
case_right = ~case_left
|
|
|
|
def isf_left(q, a, b):
|
|
log_Phi_x = _log_diff(_norm_logcdf(b),
|
|
np.log(q) + _log_gauss_mass(a, b))
|
|
return sc.ndtri_exp(np.real(log_Phi_x))
|
|
|
|
def isf_right(q, a, b):
|
|
log_Phi_x = _log_diff(_norm_logcdf(-a),
|
|
np.log1p(-q) + _log_gauss_mass(a, b))
|
|
return -sc.ndtri_exp(np.real(log_Phi_x))
|
|
|
|
out = np.empty_like(q)
|
|
|
|
q_left = q[case_left]
|
|
q_right = q[case_right]
|
|
|
|
if q_left.size:
|
|
out[case_left] = isf_left(q_left, a[case_left], b[case_left])
|
|
if q_right.size:
|
|
out[case_right] = isf_right(q_right, a[case_right], b[case_right])
|
|
|
|
return out
|
|
|
|
def _munp(self, n, a, b):
|
|
def n_th_moment(n, a, b):
|
|
"""
|
|
Returns n-th moment. Defined only if n >= 0.
|
|
Function cannot broadcast due to the loop over n
|
|
"""
|
|
pA, pB = self._pdf(np.asarray([a, b]), a, b)
|
|
probs = [pA, -pB]
|
|
moments = [0, 1]
|
|
for k in range(1, n+1):
|
|
# a or b might be infinite, and the corresponding pdf value
|
|
# is 0 in that case, but nan is returned for the
|
|
# multiplication. However, as b->infinity, pdf(b)*b**k -> 0.
|
|
# So it is safe to use _lazywhere to avoid the nan.
|
|
vals = _lazywhere(probs, [probs, [a, b]],
|
|
lambda x, y: x * y**(k-1), fillvalue=0)
|
|
mk = np.sum(vals) + (k-1) * moments[-2]
|
|
moments.append(mk)
|
|
return moments[-1]
|
|
|
|
return _lazywhere((n >= 0) & (a == a) & (b == b), (n, a, b),
|
|
np.vectorize(n_th_moment, otypes=[np.float64]),
|
|
np.nan)
|
|
|
|
def _stats(self, a, b, moments='mv'):
|
|
pA, pB = self.pdf(np.array([a, b]), a, b)
|
|
|
|
def _truncnorm_stats_scalar(a, b, pA, pB, moments):
|
|
m1 = pA - pB
|
|
mu = m1
|
|
# use _lazywhere to avoid nan (See detailed comment in _munp)
|
|
probs = [pA, -pB]
|
|
vals = _lazywhere(probs, [probs, [a, b]], lambda x, y: x*y,
|
|
fillvalue=0)
|
|
m2 = 1 + np.sum(vals)
|
|
vals = _lazywhere(probs, [probs, [a-mu, b-mu]], lambda x, y: x*y,
|
|
fillvalue=0)
|
|
# mu2 = m2 - mu**2, but not as numerically stable as:
|
|
# mu2 = (a-mu)*pA - (b-mu)*pB + 1
|
|
mu2 = 1 + np.sum(vals)
|
|
vals = _lazywhere(probs, [probs, [a, b]], lambda x, y: x*y**2,
|
|
fillvalue=0)
|
|
m3 = 2*m1 + np.sum(vals)
|
|
vals = _lazywhere(probs, [probs, [a, b]], lambda x, y: x*y**3,
|
|
fillvalue=0)
|
|
m4 = 3*m2 + np.sum(vals)
|
|
|
|
mu3 = m3 + m1 * (-3*m2 + 2*m1**2)
|
|
g1 = mu3 / np.power(mu2, 1.5)
|
|
mu4 = m4 + m1*(-4*m3 + 3*m1*(2*m2 - m1**2))
|
|
g2 = mu4 / mu2**2 - 3
|
|
return mu, mu2, g1, g2
|
|
|
|
_truncnorm_stats = np.vectorize(_truncnorm_stats_scalar,
|
|
excluded=('moments',))
|
|
return _truncnorm_stats(a, b, pA, pB, moments)
|
|
|
|
|
|
truncnorm = truncnorm_gen(name='truncnorm', momtype=1)
|
|
|
|
|
|
class truncpareto_gen(rv_continuous):
|
|
r"""An upper truncated Pareto continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
pareto : Pareto distribution
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `truncpareto` is:
|
|
|
|
.. math::
|
|
|
|
f(x, b, c) = \frac{b}{1 - c^{-b}} \frac{1}{x^{b+1}}
|
|
|
|
for :math:`b > 0`, :math:`c > 1` and :math:`1 \le x \le c`.
|
|
|
|
`truncpareto` takes `b` and `c` as shape parameters for :math:`b` and
|
|
:math:`c`.
|
|
|
|
Notice that the upper truncation value :math:`c` is defined in
|
|
standardized form so that random values of an unscaled, unshifted variable
|
|
are within the range ``[1, c]``.
|
|
If ``u_r`` is the upper bound to a scaled and/or shifted variable,
|
|
then ``c = (u_r - loc) / scale``. In other words, the support of the
|
|
distribution becomes ``(scale + loc) <= x <= (c*scale + loc)`` when
|
|
`scale` and/or `loc` are provided.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
.. [1] Burroughs, S. M., and Tebbens S. F.
|
|
"Upper-truncated power laws in natural systems."
|
|
Pure and Applied Geophysics 158.4 (2001): 741-757.
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
|
|
def _shape_info(self):
|
|
ib = _ShapeInfo("b", False, (0.0, np.inf), (False, False))
|
|
ic = _ShapeInfo("c", False, (1.0, np.inf), (False, False))
|
|
return [ib, ic]
|
|
|
|
def _argcheck(self, b, c):
|
|
return (b > 0.) & (c > 1.)
|
|
|
|
def _get_support(self, b, c):
|
|
return self.a, c
|
|
|
|
def _pdf(self, x, b, c):
|
|
return b * x**-(b+1) / (1 - 1/c**b)
|
|
|
|
def _logpdf(self, x, b, c):
|
|
return np.log(b) - np.log(-np.expm1(-b*np.log(c))) - (b+1)*np.log(x)
|
|
|
|
def _cdf(self, x, b, c):
|
|
return (1 - x**-b) / (1 - 1/c**b)
|
|
|
|
def _logcdf(self, x, b, c):
|
|
return np.log1p(-x**-b) - np.log1p(-1/c**b)
|
|
|
|
def _ppf(self, q, b, c):
|
|
return pow(1 - (1 - 1/c**b)*q, -1/b)
|
|
|
|
def _sf(self, x, b, c):
|
|
return (x**-b - 1/c**b) / (1 - 1/c**b)
|
|
|
|
def _logsf(self, x, b, c):
|
|
return np.log(x**-b - 1/c**b) - np.log1p(-1/c**b)
|
|
|
|
def _isf(self, q, b, c):
|
|
return pow(1/c**b + (1 - 1/c**b)*q, -1/b)
|
|
|
|
def _entropy(self, b, c):
|
|
return -(np.log(b/(1 - 1/c**b))
|
|
+ (b+1)*(np.log(c)/(c**b - 1) - 1/b))
|
|
|
|
def _munp(self, n, b, c):
|
|
if (n == b).all():
|
|
return b*np.log(c) / (1 - 1/c**b)
|
|
else:
|
|
return b / (b-n) * (c**b - c**n) / (c**b - 1)
|
|
|
|
def _fitstart(self, data):
|
|
if isinstance(data, CensoredData):
|
|
data = data._uncensor()
|
|
b, loc, scale = pareto.fit(data)
|
|
c = (max(data) - loc)/scale
|
|
return b, c, loc, scale
|
|
|
|
@_call_super_mom
|
|
@inherit_docstring_from(rv_continuous)
|
|
def fit(self, data, *args, **kwds):
|
|
if kwds.pop("superfit", False):
|
|
return super().fit(data, *args, **kwds)
|
|
|
|
def log_mean(x):
|
|
return np.mean(np.log(x))
|
|
|
|
def harm_mean(x):
|
|
return 1/np.mean(1/x)
|
|
|
|
def get_b(c, loc, scale):
|
|
u = (data-loc)/scale
|
|
harm_m = harm_mean(u)
|
|
log_m = log_mean(u)
|
|
quot = (harm_m-1)/log_m
|
|
return (1 - (quot-1) / (quot - (1 - 1/c)*harm_m/np.log(c)))/log_m
|
|
|
|
def get_c(loc, scale):
|
|
return (mx - loc)/scale
|
|
|
|
def get_loc(fc, fscale):
|
|
if fscale: # (fscale and fc) or (fscale and not fc)
|
|
loc = mn - fscale
|
|
return loc
|
|
if fc:
|
|
loc = (fc*mn - mx)/(fc - 1)
|
|
return loc
|
|
|
|
def get_scale(loc):
|
|
return mn - loc
|
|
|
|
# Functions used for optimisation; partial derivatives of
|
|
# the Lagrangian, set to equal 0.
|
|
|
|
def dL_dLoc(loc, b_=None):
|
|
# Partial derivative wrt location.
|
|
# Optimised upon when no parameters, or only b, are fixed.
|
|
scale = get_scale(loc)
|
|
c = get_c(loc, scale)
|
|
b = get_b(c, loc, scale) if b_ is None else b_
|
|
harm_m = harm_mean((data - loc)/scale)
|
|
return 1 - (1 + (c - 1)/(c**(b+1) - c)) * (1 - 1/(b+1)) * harm_m
|
|
|
|
def dL_dB(b, logc, logm):
|
|
# Partial derivative wrt b.
|
|
# Optimised upon whenever at least one parameter but b is fixed,
|
|
# and b is free.
|
|
return b - np.log1p(b*logc / (1 - b*logm)) / logc
|
|
|
|
def fallback(data, *args, **kwargs):
|
|
# Should any issue arise, default to the general fit method.
|
|
return super(truncpareto_gen, self).fit(data, *args, **kwargs)
|
|
|
|
parameters = _check_fit_input_parameters(self, data, args, kwds)
|
|
data, fb, fc, floc, fscale = parameters
|
|
mn, mx = data.min(), data.max()
|
|
mn_inf = np.nextafter(mn, -np.inf)
|
|
|
|
if (fb is not None
|
|
and fc is not None
|
|
and floc is not None
|
|
and fscale is not None):
|
|
raise ValueError("All parameters fixed."
|
|
"There is nothing to optimize.")
|
|
elif fc is None and floc is None and fscale is None:
|
|
if fb is None:
|
|
def cond_b(loc):
|
|
# b is positive only if this function is positive
|
|
scale = get_scale(loc)
|
|
c = get_c(loc, scale)
|
|
harm_m = harm_mean((data - loc)/scale)
|
|
return (1 + 1/(c-1)) * np.log(c) / harm_m - 1
|
|
|
|
# This gives an upper bound on loc allowing for a positive b.
|
|
# Iteratively look for a bracket for root_scalar.
|
|
mn_inf = np.nextafter(mn, -np.inf)
|
|
rbrack = mn_inf
|
|
i = 0
|
|
lbrack = rbrack - 1
|
|
while ((lbrack > -np.inf)
|
|
and (cond_b(lbrack)*cond_b(rbrack) >= 0)):
|
|
i += 1
|
|
lbrack = rbrack - np.power(2., i)
|
|
if not lbrack > -np.inf:
|
|
return fallback(data, *args, **kwds)
|
|
res = root_scalar(cond_b, bracket=(lbrack, rbrack))
|
|
if not res.converged:
|
|
return fallback(data, *args, **kwds)
|
|
|
|
# Determine the MLE for loc.
|
|
# Iteratively look for a bracket for root_scalar.
|
|
rbrack = res.root - 1e-3 # grad_loc is numerically ill-behaved
|
|
lbrack = rbrack - 1
|
|
i = 0
|
|
while ((lbrack > -np.inf)
|
|
and (dL_dLoc(lbrack)*dL_dLoc(rbrack) >= 0)):
|
|
i += 1
|
|
lbrack = rbrack - np.power(2., i)
|
|
if not lbrack > -np.inf:
|
|
return fallback(data, *args, **kwds)
|
|
res = root_scalar(dL_dLoc, bracket=(lbrack, rbrack))
|
|
if not res.converged:
|
|
return fallback(data, *args, **kwds)
|
|
loc = res.root
|
|
scale = get_scale(loc)
|
|
c = get_c(loc, scale)
|
|
b = get_b(c, loc, scale)
|
|
|
|
std_data = (data - loc)/scale
|
|
# The expression of b relies on b being bounded above.
|
|
up_bound_b = min(1/log_mean(std_data),
|
|
1/(harm_mean(std_data)-1))
|
|
if not (b < up_bound_b):
|
|
return fallback(data, *args, **kwds)
|
|
else:
|
|
# We know b is positive (or a FitError will be triggered)
|
|
# so we let loc get close to min(data).
|
|
rbrack = mn_inf
|
|
lbrack = mn_inf - 1
|
|
i = 0
|
|
# Iteratively look for a bracket for root_scalar.
|
|
while (lbrack > -np.inf
|
|
and (dL_dLoc(lbrack, fb)
|
|
* dL_dLoc(rbrack, fb) >= 0)):
|
|
i += 1
|
|
lbrack = rbrack - 2**i
|
|
if not lbrack > -np.inf:
|
|
return fallback(data, *args, **kwds)
|
|
res = root_scalar(dL_dLoc, (fb,),
|
|
bracket=(lbrack, rbrack))
|
|
if not res.converged:
|
|
return fallback(data, *args, **kwds)
|
|
loc = res.root
|
|
scale = get_scale(loc)
|
|
c = get_c(loc, scale)
|
|
b = fb
|
|
else:
|
|
# At least one of the parameters determining the support is fixed;
|
|
# the others then have analytical expressions from the constraints.
|
|
# The completely determined case (fixed c, loc and scale)
|
|
# has to be checked for not overflowing the support.
|
|
# If not fixed, b has to be determined numerically.
|
|
loc = floc if floc is not None else get_loc(fc, fscale)
|
|
scale = fscale or get_scale(loc)
|
|
c = fc or get_c(loc, scale)
|
|
|
|
# Unscaled, translated values should be positive when the location
|
|
# is fixed. If it is not the case, we end up with negative `scale`
|
|
# and `c`, which would trigger a FitError before exiting the
|
|
# method.
|
|
if floc is not None and data.min() - floc < 0:
|
|
raise FitDataError("truncpareto", lower=1, upper=c)
|
|
|
|
# Standardised values should be within the distribution support
|
|
# when all parameters controlling it are fixed. If it not the case,
|
|
# `fc` is overridden by `c` determined from `floc` and `fscale` when
|
|
# raising the exception.
|
|
if fc and (floc is not None) and fscale:
|
|
if data.max() > fc*fscale + floc:
|
|
raise FitDataError("truncpareto", lower=1,
|
|
upper=get_c(loc, scale))
|
|
|
|
# The other constraints should be automatically satisfied
|
|
# from the analytical expressions of the parameters.
|
|
# If fc or fscale are respectively less than one or less than 0,
|
|
# a FitError is triggered before exiting the method.
|
|
|
|
if fb is None:
|
|
std_data = (data - loc)/scale
|
|
logm = log_mean(std_data)
|
|
logc = np.log(c)
|
|
# Condition for a positive root to exist.
|
|
if not (2*logm < logc):
|
|
return fallback(data, *args, **kwds)
|
|
|
|
lbrack = 1/logm + 1/(logm - logc)
|
|
rbrack = np.nextafter(1/logm, 0)
|
|
try:
|
|
res = root_scalar(dL_dB, (logc, logm),
|
|
bracket=(lbrack, rbrack))
|
|
# we should then never get there
|
|
if not res.converged:
|
|
return fallback(data, *args, **kwds)
|
|
b = res.root
|
|
except ValueError:
|
|
b = rbrack
|
|
else:
|
|
b = fb
|
|
|
|
# The distribution requires that `scale+loc <= data <= c*scale+loc`.
|
|
# To avoid numerical issues, some tuning may be necessary.
|
|
# We adjust `scale` to satisfy the lower bound, and we adjust
|
|
# `c` to satisfy the upper bound.
|
|
if not (scale+loc) < mn:
|
|
if fscale:
|
|
loc = np.nextafter(loc, -np.inf)
|
|
else:
|
|
scale = get_scale(loc)
|
|
scale = np.nextafter(scale, 0)
|
|
if not (c*scale+loc) > mx:
|
|
c = get_c(loc, scale)
|
|
c = np.nextafter(c, np.inf)
|
|
|
|
if not (np.all(self._argcheck(b, c)) and (scale > 0)):
|
|
return fallback(data, *args, **kwds)
|
|
|
|
params_override = b, c, loc, scale
|
|
if floc is None and fscale is None:
|
|
# Based on testing in gh-16782, the following methods are only
|
|
# reliable if either `floc` or `fscale` are provided. They are
|
|
# fast, though, so might as well see if they are better than the
|
|
# generic method.
|
|
params_super = fallback(data, *args, **kwds)
|
|
nllf_override = self.nnlf(params_override, data)
|
|
nllf_super = self.nnlf(params_super, data)
|
|
if nllf_super < nllf_override:
|
|
return params_super
|
|
|
|
return params_override
|
|
|
|
|
|
truncpareto = truncpareto_gen(a=1.0, name='truncpareto')
|
|
|
|
|
|
class tukeylambda_gen(rv_continuous):
|
|
r"""A Tukey-Lamdba continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
A flexible distribution, able to represent and interpolate between the
|
|
following distributions:
|
|
|
|
- Cauchy (:math:`lambda = -1`)
|
|
- logistic (:math:`lambda = 0`)
|
|
- approx Normal (:math:`lambda = 0.14`)
|
|
- uniform from -1 to 1 (:math:`lambda = 1`)
|
|
|
|
`tukeylambda` takes a real number :math:`lambda` (denoted ``lam``
|
|
in the implementation) as a shape parameter.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _argcheck(self, lam):
|
|
return np.isfinite(lam)
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("lam", False, (-np.inf, np.inf), (False, False))]
|
|
|
|
def _pdf(self, x, lam):
|
|
Fx = np.asarray(sc.tklmbda(x, lam))
|
|
Px = Fx**(lam-1.0) + (np.asarray(1-Fx))**(lam-1.0)
|
|
Px = 1.0/np.asarray(Px)
|
|
return np.where((lam <= 0) | (abs(x) < 1.0/np.asarray(lam)), Px, 0.0)
|
|
|
|
def _cdf(self, x, lam):
|
|
return sc.tklmbda(x, lam)
|
|
|
|
def _ppf(self, q, lam):
|
|
return sc.boxcox(q, lam) - sc.boxcox1p(-q, lam)
|
|
|
|
def _stats(self, lam):
|
|
return 0, _tlvar(lam), 0, _tlkurt(lam)
|
|
|
|
def _entropy(self, lam):
|
|
def integ(p):
|
|
return np.log(pow(p, lam-1)+pow(1-p, lam-1))
|
|
return integrate.quad(integ, 0, 1)[0]
|
|
|
|
|
|
tukeylambda = tukeylambda_gen(name='tukeylambda')
|
|
|
|
|
|
class FitUniformFixedScaleDataError(FitDataError):
|
|
def __init__(self, ptp, fscale):
|
|
self.args = (
|
|
"Invalid values in `data`. Maximum likelihood estimation with "
|
|
"the uniform distribution and fixed scale requires that "
|
|
f"np.ptp(data) <= fscale, but np.ptp(data) = {ptp} and "
|
|
f"fscale = {fscale}."
|
|
)
|
|
|
|
|
|
class uniform_gen(rv_continuous):
|
|
r"""A uniform continuous random variable.
|
|
|
|
In the standard form, the distribution is uniform on ``[0, 1]``. Using
|
|
the parameters ``loc`` and ``scale``, one obtains the uniform distribution
|
|
on ``[loc, loc + scale]``.
|
|
|
|
%(before_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return []
|
|
|
|
def _rvs(self, size=None, random_state=None):
|
|
return random_state.uniform(0.0, 1.0, size)
|
|
|
|
def _pdf(self, x):
|
|
return 1.0*(x == x)
|
|
|
|
def _cdf(self, x):
|
|
return x
|
|
|
|
def _ppf(self, q):
|
|
return q
|
|
|
|
def _stats(self):
|
|
return 0.5, 1.0/12, 0, -1.2
|
|
|
|
def _entropy(self):
|
|
return 0.0
|
|
|
|
@_call_super_mom
|
|
def fit(self, data, *args, **kwds):
|
|
"""
|
|
Maximum likelihood estimate for the location and scale parameters.
|
|
|
|
`uniform.fit` uses only the following parameters. Because exact
|
|
formulas are used, the parameters related to optimization that are
|
|
available in the `fit` method of other distributions are ignored
|
|
here. The only positional argument accepted is `data`.
|
|
|
|
Parameters
|
|
----------
|
|
data : array_like
|
|
Data to use in calculating the maximum likelihood estimate.
|
|
floc : float, optional
|
|
Hold the location parameter fixed to the specified value.
|
|
fscale : float, optional
|
|
Hold the scale parameter fixed to the specified value.
|
|
|
|
Returns
|
|
-------
|
|
loc, scale : float
|
|
Maximum likelihood estimates for the location and scale.
|
|
|
|
Notes
|
|
-----
|
|
An error is raised if `floc` is given and any values in `data` are
|
|
less than `floc`, or if `fscale` is given and `fscale` is less
|
|
than ``data.max() - data.min()``. An error is also raised if both
|
|
`floc` and `fscale` are given.
|
|
|
|
Examples
|
|
--------
|
|
>>> import numpy as np
|
|
>>> from scipy.stats import uniform
|
|
|
|
We'll fit the uniform distribution to `x`:
|
|
|
|
>>> x = np.array([2, 2.5, 3.1, 9.5, 13.0])
|
|
|
|
For a uniform distribution MLE, the location is the minimum of the
|
|
data, and the scale is the maximum minus the minimum.
|
|
|
|
>>> loc, scale = uniform.fit(x)
|
|
>>> loc
|
|
2.0
|
|
>>> scale
|
|
11.0
|
|
|
|
If we know the data comes from a uniform distribution where the support
|
|
starts at 0, we can use `floc=0`:
|
|
|
|
>>> loc, scale = uniform.fit(x, floc=0)
|
|
>>> loc
|
|
0.0
|
|
>>> scale
|
|
13.0
|
|
|
|
Alternatively, if we know the length of the support is 12, we can use
|
|
`fscale=12`:
|
|
|
|
>>> loc, scale = uniform.fit(x, fscale=12)
|
|
>>> loc
|
|
1.5
|
|
>>> scale
|
|
12.0
|
|
|
|
In that last example, the support interval is [1.5, 13.5]. This
|
|
solution is not unique. For example, the distribution with ``loc=2``
|
|
and ``scale=12`` has the same likelihood as the one above. When
|
|
`fscale` is given and it is larger than ``data.max() - data.min()``,
|
|
the parameters returned by the `fit` method center the support over
|
|
the interval ``[data.min(), data.max()]``.
|
|
|
|
"""
|
|
if len(args) > 0:
|
|
raise TypeError("Too many arguments.")
|
|
|
|
floc = kwds.pop('floc', None)
|
|
fscale = kwds.pop('fscale', None)
|
|
|
|
_remove_optimizer_parameters(kwds)
|
|
|
|
if floc is not None and fscale is not None:
|
|
# This check is for consistency with `rv_continuous.fit`.
|
|
raise ValueError("All parameters fixed. There is nothing to "
|
|
"optimize.")
|
|
|
|
data = np.asarray(data)
|
|
|
|
if not np.isfinite(data).all():
|
|
raise ValueError("The data contains non-finite values.")
|
|
|
|
# MLE for the uniform distribution
|
|
# --------------------------------
|
|
# The PDF is
|
|
#
|
|
# f(x, loc, scale) = {1/scale for loc <= x <= loc + scale
|
|
# {0 otherwise}
|
|
#
|
|
# The likelihood function is
|
|
# L(x, loc, scale) = (1/scale)**n
|
|
# where n is len(x), assuming loc <= x <= loc + scale for all x.
|
|
# The log-likelihood is
|
|
# l(x, loc, scale) = -n*log(scale)
|
|
# The log-likelihood is maximized by making scale as small as possible,
|
|
# while keeping loc <= x <= loc + scale. So if neither loc nor scale
|
|
# are fixed, the log-likelihood is maximized by choosing
|
|
# loc = x.min()
|
|
# scale = np.ptp(x)
|
|
# If loc is fixed, it must be less than or equal to x.min(), and then
|
|
# the scale is
|
|
# scale = x.max() - loc
|
|
# If scale is fixed, it must not be less than np.ptp(x). If scale is
|
|
# greater than np.ptp(x), the solution is not unique. Note that the
|
|
# likelihood does not depend on loc, except for the requirement that
|
|
# loc <= x <= loc + scale. All choices of loc for which
|
|
# x.max() - scale <= loc <= x.min()
|
|
# have the same log-likelihood. In this case, we choose loc such that
|
|
# the support is centered over the interval [data.min(), data.max()]:
|
|
# loc = x.min() = 0.5*(scale - np.ptp(x))
|
|
|
|
if fscale is None:
|
|
# scale is not fixed.
|
|
if floc is None:
|
|
# loc is not fixed, scale is not fixed.
|
|
loc = data.min()
|
|
scale = np.ptp(data)
|
|
else:
|
|
# loc is fixed, scale is not fixed.
|
|
loc = floc
|
|
scale = data.max() - loc
|
|
if data.min() < loc:
|
|
raise FitDataError("uniform", lower=loc, upper=loc + scale)
|
|
else:
|
|
# loc is not fixed, scale is fixed.
|
|
ptp = np.ptp(data)
|
|
if ptp > fscale:
|
|
raise FitUniformFixedScaleDataError(ptp=ptp, fscale=fscale)
|
|
# If ptp < fscale, the ML estimate is not unique; see the comments
|
|
# above. We choose the distribution for which the support is
|
|
# centered over the interval [data.min(), data.max()].
|
|
loc = data.min() - 0.5*(fscale - ptp)
|
|
scale = fscale
|
|
|
|
# We expect the return values to be floating point, so ensure it
|
|
# by explicitly converting to float.
|
|
return float(loc), float(scale)
|
|
|
|
|
|
uniform = uniform_gen(a=0.0, b=1.0, name='uniform')
|
|
|
|
|
|
class vonmises_gen(rv_continuous):
|
|
r"""A Von Mises continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
scipy.stats.vonmises_fisher : Von-Mises Fisher distribution on a
|
|
hypersphere
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `vonmises` and `vonmises_line` is:
|
|
|
|
.. math::
|
|
|
|
f(x, \kappa) = \frac{ \exp(\kappa \cos(x)) }{ 2 \pi I_0(\kappa) }
|
|
|
|
for :math:`-\pi \le x \le \pi`, :math:`\kappa \ge 0`. :math:`I_0` is the
|
|
modified Bessel function of order zero (`scipy.special.i0`).
|
|
|
|
`vonmises` is a circular distribution which does not restrict the
|
|
distribution to a fixed interval. Currently, there is no circular
|
|
distribution framework in SciPy. The ``cdf`` is implemented such that
|
|
``cdf(x + 2*np.pi) == cdf(x) + 1``.
|
|
|
|
`vonmises_line` is the same distribution, defined on :math:`[-\pi, \pi]`
|
|
on the real line. This is a regular (i.e. non-circular) distribution.
|
|
|
|
Note about distribution parameters: `vonmises` and `vonmises_line` take
|
|
``kappa`` as a shape parameter (concentration) and ``loc`` as the location
|
|
(circular mean). A ``scale`` parameter is accepted but does not have any
|
|
effect.
|
|
|
|
Examples
|
|
--------
|
|
Import the necessary modules.
|
|
|
|
>>> import numpy as np
|
|
>>> import matplotlib.pyplot as plt
|
|
>>> from scipy.stats import vonmises
|
|
|
|
Define distribution parameters.
|
|
|
|
>>> loc = 0.5 * np.pi # circular mean
|
|
>>> kappa = 1 # concentration
|
|
|
|
Compute the probability density at ``x=0`` via the ``pdf`` method.
|
|
|
|
>>> vonmises.pdf(0, loc=loc, kappa=kappa)
|
|
0.12570826359722018
|
|
|
|
Verify that the percentile function ``ppf`` inverts the cumulative
|
|
distribution function ``cdf`` up to floating point accuracy.
|
|
|
|
>>> x = 1
|
|
>>> cdf_value = vonmises.cdf(x, loc=loc, kappa=kappa)
|
|
>>> ppf_value = vonmises.ppf(cdf_value, loc=loc, kappa=kappa)
|
|
>>> x, cdf_value, ppf_value
|
|
(1, 0.31489339900904967, 1.0000000000000004)
|
|
|
|
Draw 1000 random variates by calling the ``rvs`` method.
|
|
|
|
>>> sample_size = 1000
|
|
>>> sample = vonmises(loc=loc, kappa=kappa).rvs(sample_size)
|
|
|
|
Plot the von Mises density on a Cartesian and polar grid to emphasize
|
|
that it is a circular distribution.
|
|
|
|
>>> fig = plt.figure(figsize=(12, 6))
|
|
>>> left = plt.subplot(121)
|
|
>>> right = plt.subplot(122, projection='polar')
|
|
>>> x = np.linspace(-np.pi, np.pi, 500)
|
|
>>> vonmises_pdf = vonmises.pdf(x, loc=loc, kappa=kappa)
|
|
>>> ticks = [0, 0.15, 0.3]
|
|
|
|
The left image contains the Cartesian plot.
|
|
|
|
>>> left.plot(x, vonmises_pdf)
|
|
>>> left.set_yticks(ticks)
|
|
>>> number_of_bins = int(np.sqrt(sample_size))
|
|
>>> left.hist(sample, density=True, bins=number_of_bins)
|
|
>>> left.set_title("Cartesian plot")
|
|
>>> left.set_xlim(-np.pi, np.pi)
|
|
>>> left.grid(True)
|
|
|
|
The right image contains the polar plot.
|
|
|
|
>>> right.plot(x, vonmises_pdf, label="PDF")
|
|
>>> right.set_yticks(ticks)
|
|
>>> right.hist(sample, density=True, bins=number_of_bins,
|
|
... label="Histogram")
|
|
>>> right.set_title("Polar plot")
|
|
>>> right.legend(bbox_to_anchor=(0.15, 1.06))
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("kappa", False, (0, np.inf), (True, False))]
|
|
|
|
def _argcheck(self, kappa):
|
|
return kappa >= 0
|
|
|
|
def _rvs(self, kappa, size=None, random_state=None):
|
|
return random_state.vonmises(0.0, kappa, size=size)
|
|
|
|
@inherit_docstring_from(rv_continuous)
|
|
def rvs(self, *args, **kwds):
|
|
rvs = super().rvs(*args, **kwds)
|
|
return np.mod(rvs + np.pi, 2*np.pi) - np.pi
|
|
|
|
def _pdf(self, x, kappa):
|
|
# vonmises.pdf(x, kappa) = exp(kappa * cos(x)) / (2*pi*I[0](kappa))
|
|
# = exp(kappa * (cos(x) - 1)) /
|
|
# (2*pi*exp(-kappa)*I[0](kappa))
|
|
# = exp(kappa * cosm1(x)) / (2*pi*i0e(kappa))
|
|
return np.exp(kappa*sc.cosm1(x)) / (2*np.pi*sc.i0e(kappa))
|
|
|
|
def _logpdf(self, x, kappa):
|
|
# vonmises.pdf(x, kappa) = exp(kappa * cosm1(x)) / (2*pi*i0e(kappa))
|
|
return kappa * sc.cosm1(x) - np.log(2*np.pi) - np.log(sc.i0e(kappa))
|
|
|
|
def _cdf(self, x, kappa):
|
|
return _stats.von_mises_cdf(kappa, x)
|
|
|
|
def _stats_skip(self, kappa):
|
|
return 0, None, 0, None
|
|
|
|
def _entropy(self, kappa):
|
|
# vonmises.entropy(kappa) = -kappa * I[1](kappa) / I[0](kappa) +
|
|
# log(2 * np.pi * I[0](kappa))
|
|
# = -kappa * I[1](kappa) * exp(-kappa) /
|
|
# (I[0](kappa) * exp(-kappa)) +
|
|
# log(2 * np.pi *
|
|
# I[0](kappa) * exp(-kappa) / exp(-kappa))
|
|
# = -kappa * sc.i1e(kappa) / sc.i0e(kappa) +
|
|
# log(2 * np.pi * i0e(kappa)) + kappa
|
|
return (-kappa * sc.i1e(kappa) / sc.i0e(kappa) +
|
|
np.log(2 * np.pi * sc.i0e(kappa)) + kappa)
|
|
|
|
@extend_notes_in_docstring(rv_continuous, notes="""\
|
|
The default limits of integration are endpoints of the interval
|
|
of width ``2*pi`` centered at `loc` (e.g. ``[-pi, pi]`` when
|
|
``loc=0``).\n\n""")
|
|
def expect(self, func=None, args=(), loc=0, scale=1, lb=None, ub=None,
|
|
conditional=False, **kwds):
|
|
_a, _b = -np.pi, np.pi
|
|
|
|
if lb is None:
|
|
lb = loc + _a
|
|
if ub is None:
|
|
ub = loc + _b
|
|
|
|
return super().expect(func, args, loc,
|
|
scale, lb, ub, conditional, **kwds)
|
|
|
|
@_call_super_mom
|
|
@extend_notes_in_docstring(rv_continuous, notes="""\
|
|
Fit data is assumed to represent angles and will be wrapped onto the
|
|
unit circle. `f0` and `fscale` are ignored; the returned shape is
|
|
always the maximum likelihood estimate and the scale is always
|
|
1. Initial guesses are ignored.\n\n""")
|
|
def fit(self, data, *args, **kwds):
|
|
if kwds.pop('superfit', False):
|
|
return super().fit(data, *args, **kwds)
|
|
|
|
data, fshape, floc, fscale = _check_fit_input_parameters(self, data,
|
|
args, kwds)
|
|
if self.a == -np.pi:
|
|
# vonmises line case, here the default fit method will be used
|
|
return super().fit(data, *args, **kwds)
|
|
|
|
# wrap data to interval [0, 2*pi]
|
|
data = np.mod(data, 2 * np.pi)
|
|
|
|
def find_mu(data):
|
|
return stats.circmean(data)
|
|
|
|
def find_kappa(data, loc):
|
|
# Usually, sources list the following as the equation to solve for
|
|
# the MLE of the shape parameter:
|
|
# r = I[1](kappa)/I[0](kappa), where r = mean resultant length
|
|
# This is valid when the location is the MLE of location.
|
|
# More generally, when the location may be fixed at an arbitrary
|
|
# value, r should be defined as follows:
|
|
r = np.sum(np.cos(loc - data))/len(data)
|
|
# See gh-18128 for more information.
|
|
|
|
# The function r[0](kappa) := I[1](kappa)/I[0](kappa) is monotonic
|
|
# increasing from r[0](0) = 0 to r[0](+inf) = 1. The partial
|
|
# derivative of the log likelihood function with respect to kappa
|
|
# is monotonic decreasing in kappa.
|
|
if r == 1:
|
|
# All observations are (almost) equal to the mean. Return
|
|
# some large kappa such that r[0](kappa) = 1.0 numerically.
|
|
return 1e16
|
|
elif r > 0:
|
|
def solve_for_kappa(kappa):
|
|
return sc.i1e(kappa)/sc.i0e(kappa) - r
|
|
|
|
# The bounds of the root of r[0](kappa) = r are derived from
|
|
# selected bounds of r[0](x) given in [1, Eq. 11 & 16]. See
|
|
# gh-20102 for details.
|
|
#
|
|
# [1] Amos, D. E. (1973). Computation of Modified Bessel
|
|
# Functions and Their Ratios. Mathematics of Computation,
|
|
# 28(125): 239-251.
|
|
lower_bound = r/(1-r)/(1+r)
|
|
upper_bound = 2*lower_bound
|
|
|
|
# The bounds are violated numerically for certain values of r,
|
|
# where solve_for_kappa evaluated at the bounds have the same
|
|
# sign. This indicates numerical imprecision of i1e()/i0e().
|
|
# Return the violated bound in this case as it's more accurate.
|
|
if solve_for_kappa(lower_bound) >= 0:
|
|
return lower_bound
|
|
elif solve_for_kappa(upper_bound) <= 0:
|
|
return upper_bound
|
|
else:
|
|
root_res = root_scalar(solve_for_kappa, method="brentq",
|
|
bracket=(lower_bound, upper_bound))
|
|
return root_res.root
|
|
else:
|
|
# if the provided floc is very far from the circular mean,
|
|
# the mean resultant length r can become negative.
|
|
# In that case, the equation
|
|
# I[1](kappa)/I[0](kappa) = r does not have a solution.
|
|
# The maximum likelihood kappa is then 0 which practically
|
|
# results in the uniform distribution on the circle. As
|
|
# vonmises is defined for kappa > 0, return instead the
|
|
# smallest floating point value.
|
|
# See gh-18190 for more information
|
|
return np.finfo(float).tiny
|
|
|
|
# location likelihood equation has a solution independent of kappa
|
|
loc = floc if floc is not None else find_mu(data)
|
|
# shape likelihood equation depends on location
|
|
shape = fshape if fshape is not None else find_kappa(data, loc)
|
|
|
|
loc = np.mod(loc + np.pi, 2 * np.pi) - np.pi # ensure in [-pi, pi]
|
|
return shape, loc, 1 # scale is not handled
|
|
|
|
|
|
vonmises = vonmises_gen(name='vonmises')
|
|
vonmises_line = vonmises_gen(a=-np.pi, b=np.pi, name='vonmises_line')
|
|
|
|
|
|
class wald_gen(invgauss_gen):
|
|
r"""A Wald continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `wald` is:
|
|
|
|
.. math::
|
|
|
|
f(x) = \frac{1}{\sqrt{2\pi x^3}} \exp(- \frac{ (x-1)^2 }{ 2x })
|
|
|
|
for :math:`x >= 0`.
|
|
|
|
`wald` is a special case of `invgauss` with ``mu=1``.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
"""
|
|
_support_mask = rv_continuous._open_support_mask
|
|
|
|
def _shape_info(self):
|
|
return []
|
|
|
|
def _rvs(self, size=None, random_state=None):
|
|
return random_state.wald(1.0, 1.0, size=size)
|
|
|
|
def _pdf(self, x):
|
|
# wald.pdf(x) = 1/sqrt(2*pi*x**3) * exp(-(x-1)**2/(2*x))
|
|
return invgauss._pdf(x, 1.0)
|
|
|
|
def _cdf(self, x):
|
|
return invgauss._cdf(x, 1.0)
|
|
|
|
def _sf(self, x):
|
|
return invgauss._sf(x, 1.0)
|
|
|
|
def _ppf(self, x):
|
|
return invgauss._ppf(x, 1.0)
|
|
|
|
def _isf(self, x):
|
|
return invgauss._isf(x, 1.0)
|
|
|
|
def _logpdf(self, x):
|
|
return invgauss._logpdf(x, 1.0)
|
|
|
|
def _logcdf(self, x):
|
|
return invgauss._logcdf(x, 1.0)
|
|
|
|
def _logsf(self, x):
|
|
return invgauss._logsf(x, 1.0)
|
|
|
|
def _stats(self):
|
|
return 1.0, 1.0, 3.0, 15.0
|
|
|
|
def _entropy(self):
|
|
return invgauss._entropy(1.0)
|
|
|
|
|
|
wald = wald_gen(a=0.0, name="wald")
|
|
|
|
|
|
class wrapcauchy_gen(rv_continuous):
|
|
r"""A wrapped Cauchy continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `wrapcauchy` is:
|
|
|
|
.. math::
|
|
|
|
f(x, c) = \frac{1-c^2}{2\pi (1+c^2 - 2c \cos(x))}
|
|
|
|
for :math:`0 \le x \le 2\pi`, :math:`0 < c < 1`.
|
|
|
|
`wrapcauchy` takes ``c`` as a shape parameter for :math:`c`.
|
|
|
|
%(after_notes)s
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _argcheck(self, c):
|
|
return (c > 0) & (c < 1)
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("c", False, (0, 1), (False, False))]
|
|
|
|
def _pdf(self, x, c):
|
|
# wrapcauchy.pdf(x, c) = (1-c**2) / (2*pi*(1+c**2-2*c*cos(x)))
|
|
return (1.0-c*c)/(2*np.pi*(1+c*c-2*c*np.cos(x)))
|
|
|
|
def _cdf(self, x, c):
|
|
|
|
def f1(x, cr):
|
|
# CDF for 0 <= x < pi
|
|
return 1/np.pi * np.arctan(cr*np.tan(x/2))
|
|
|
|
def f2(x, cr):
|
|
# CDF for pi <= x <= 2*pi
|
|
return 1 - 1/np.pi * np.arctan(cr*np.tan((2*np.pi - x)/2))
|
|
|
|
cr = (1 + c)/(1 - c)
|
|
return _lazywhere(x < np.pi, (x, cr), f=f1, f2=f2)
|
|
|
|
def _ppf(self, q, c):
|
|
val = (1.0-c)/(1.0+c)
|
|
rcq = 2*np.arctan(val*np.tan(np.pi*q))
|
|
rcmq = 2*np.pi-2*np.arctan(val*np.tan(np.pi*(1-q)))
|
|
return np.where(q < 1.0/2, rcq, rcmq)
|
|
|
|
def _entropy(self, c):
|
|
return np.log(2*np.pi*(1-c*c))
|
|
|
|
def _fitstart(self, data):
|
|
# Use 0.5 as the initial guess of the shape parameter.
|
|
# For the location and scale, use the minimum and
|
|
# peak-to-peak/(2*pi), respectively.
|
|
if isinstance(data, CensoredData):
|
|
data = data._uncensor()
|
|
return 0.5, np.min(data), np.ptp(data)/(2*np.pi)
|
|
|
|
|
|
wrapcauchy = wrapcauchy_gen(a=0.0, b=2*np.pi, name='wrapcauchy')
|
|
|
|
|
|
class gennorm_gen(rv_continuous):
|
|
r"""A generalized normal continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
laplace : Laplace distribution
|
|
norm : normal distribution
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `gennorm` is [1]_:
|
|
|
|
.. math::
|
|
|
|
f(x, \beta) = \frac{\beta}{2 \Gamma(1/\beta)} \exp(-|x|^\beta),
|
|
|
|
where :math:`x` is a real number, :math:`\beta > 0` and
|
|
:math:`\Gamma` is the gamma function (`scipy.special.gamma`).
|
|
|
|
`gennorm` takes ``beta`` as a shape parameter for :math:`\beta`.
|
|
For :math:`\beta = 1`, it is identical to a Laplace distribution.
|
|
For :math:`\beta = 2`, it is identical to a normal distribution
|
|
(with ``scale=1/sqrt(2)``).
|
|
|
|
References
|
|
----------
|
|
|
|
.. [1] "Generalized normal distribution, Version 1",
|
|
https://en.wikipedia.org/wiki/Generalized_normal_distribution#Version_1
|
|
|
|
.. [2] Nardon, Martina, and Paolo Pianca. "Simulation techniques for
|
|
generalized Gaussian densities." Journal of Statistical
|
|
Computation and Simulation 79.11 (2009): 1317-1329
|
|
|
|
.. [3] Wicklin, Rick. "Simulate data from a generalized Gaussian
|
|
distribution" in The DO Loop blog, September 21, 2016,
|
|
https://blogs.sas.com/content/iml/2016/09/21/simulate-generalized-gaussian-sas.html
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("beta", False, (0, np.inf), (False, False))]
|
|
|
|
def _pdf(self, x, beta):
|
|
return np.exp(self._logpdf(x, beta))
|
|
|
|
def _logpdf(self, x, beta):
|
|
return np.log(0.5*beta) - sc.gammaln(1.0/beta) - abs(x)**beta
|
|
|
|
def _cdf(self, x, beta):
|
|
c = 0.5 * np.sign(x)
|
|
# evaluating (.5 + c) first prevents numerical cancellation
|
|
return (0.5 + c) - c * sc.gammaincc(1.0/beta, abs(x)**beta)
|
|
|
|
def _ppf(self, x, beta):
|
|
c = np.sign(x - 0.5)
|
|
# evaluating (1. + c) first prevents numerical cancellation
|
|
return c * sc.gammainccinv(1.0/beta, (1.0 + c) - 2.0*c*x)**(1.0/beta)
|
|
|
|
def _sf(self, x, beta):
|
|
return self._cdf(-x, beta)
|
|
|
|
def _isf(self, x, beta):
|
|
return -self._ppf(x, beta)
|
|
|
|
def _stats(self, beta):
|
|
c1, c3, c5 = sc.gammaln([1.0/beta, 3.0/beta, 5.0/beta])
|
|
return 0., np.exp(c3 - c1), 0., np.exp(c5 + c1 - 2.0*c3) - 3.
|
|
|
|
def _entropy(self, beta):
|
|
return 1. / beta - np.log(.5 * beta) + sc.gammaln(1. / beta)
|
|
|
|
def _rvs(self, beta, size=None, random_state=None):
|
|
# see [2]_ for the algorithm
|
|
# see [3]_ for reference implementation in SAS
|
|
z = random_state.gamma(1/beta, size=size)
|
|
y = z ** (1/beta)
|
|
# convert y to array to ensure masking support
|
|
y = np.asarray(y)
|
|
mask = random_state.random(size=y.shape) < 0.5
|
|
y[mask] = -y[mask]
|
|
return y
|
|
|
|
|
|
gennorm = gennorm_gen(name='gennorm')
|
|
|
|
|
|
class halfgennorm_gen(rv_continuous):
|
|
r"""The upper half of a generalized normal continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
gennorm : generalized normal distribution
|
|
expon : exponential distribution
|
|
halfnorm : half normal distribution
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `halfgennorm` is:
|
|
|
|
.. math::
|
|
|
|
f(x, \beta) = \frac{\beta}{\Gamma(1/\beta)} \exp(-|x|^\beta)
|
|
|
|
for :math:`x, \beta > 0`. :math:`\Gamma` is the gamma function
|
|
(`scipy.special.gamma`).
|
|
|
|
`halfgennorm` takes ``beta`` as a shape parameter for :math:`\beta`.
|
|
For :math:`\beta = 1`, it is identical to an exponential distribution.
|
|
For :math:`\beta = 2`, it is identical to a half normal distribution
|
|
(with ``scale=1/sqrt(2)``).
|
|
|
|
References
|
|
----------
|
|
|
|
.. [1] "Generalized normal distribution, Version 1",
|
|
https://en.wikipedia.org/wiki/Generalized_normal_distribution#Version_1
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("beta", False, (0, np.inf), (False, False))]
|
|
|
|
def _pdf(self, x, beta):
|
|
# beta
|
|
# halfgennorm.pdf(x, beta) = ------------- exp(-|x|**beta)
|
|
# gamma(1/beta)
|
|
return np.exp(self._logpdf(x, beta))
|
|
|
|
def _logpdf(self, x, beta):
|
|
return np.log(beta) - sc.gammaln(1.0/beta) - x**beta
|
|
|
|
def _cdf(self, x, beta):
|
|
return sc.gammainc(1.0/beta, x**beta)
|
|
|
|
def _ppf(self, x, beta):
|
|
return sc.gammaincinv(1.0/beta, x)**(1.0/beta)
|
|
|
|
def _sf(self, x, beta):
|
|
return sc.gammaincc(1.0/beta, x**beta)
|
|
|
|
def _isf(self, x, beta):
|
|
return sc.gammainccinv(1.0/beta, x)**(1.0/beta)
|
|
|
|
def _entropy(self, beta):
|
|
return 1.0/beta - np.log(beta) + sc.gammaln(1.0/beta)
|
|
|
|
|
|
halfgennorm = halfgennorm_gen(a=0, name='halfgennorm')
|
|
|
|
|
|
class crystalball_gen(rv_continuous):
|
|
r"""
|
|
Crystalball distribution
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `crystalball` is:
|
|
|
|
.. math::
|
|
|
|
f(x, \beta, m) = \begin{cases}
|
|
N \exp(-x^2 / 2), &\text{for } x > -\beta\\
|
|
N A (B - x)^{-m} &\text{for } x \le -\beta
|
|
\end{cases}
|
|
|
|
where :math:`A = (m / |\beta|)^m \exp(-\beta^2 / 2)`,
|
|
:math:`B = m/|\beta| - |\beta|` and :math:`N` is a normalisation constant.
|
|
|
|
`crystalball` takes :math:`\beta > 0` and :math:`m > 1` as shape
|
|
parameters. :math:`\beta` defines the point where the pdf changes
|
|
from a power-law to a Gaussian distribution. :math:`m` is the power
|
|
of the power-law tail.
|
|
|
|
%(after_notes)s
|
|
|
|
.. versionadded:: 0.19.0
|
|
|
|
References
|
|
----------
|
|
.. [1] "Crystal Ball Function",
|
|
https://en.wikipedia.org/wiki/Crystal_Ball_function
|
|
|
|
%(example)s
|
|
"""
|
|
def _argcheck(self, beta, m):
|
|
"""
|
|
Shape parameter bounds are m > 1 and beta > 0.
|
|
"""
|
|
return (m > 1) & (beta > 0)
|
|
|
|
def _shape_info(self):
|
|
ibeta = _ShapeInfo("beta", False, (0, np.inf), (False, False))
|
|
im = _ShapeInfo("m", False, (1, np.inf), (False, False))
|
|
return [ibeta, im]
|
|
|
|
def _fitstart(self, data):
|
|
# Arbitrary, but the default m=1 is not valid
|
|
return super()._fitstart(data, args=(1, 1.5))
|
|
|
|
def _pdf(self, x, beta, m):
|
|
"""
|
|
Return PDF of the crystalball function.
|
|
|
|
--
|
|
| exp(-x**2 / 2), for x > -beta
|
|
crystalball.pdf(x, beta, m) = N * |
|
|
| A * (B - x)**(-m), for x <= -beta
|
|
--
|
|
"""
|
|
N = 1.0 / (m/beta / (m-1) * np.exp(-beta**2 / 2.0) +
|
|
_norm_pdf_C * _norm_cdf(beta))
|
|
|
|
def rhs(x, beta, m):
|
|
return np.exp(-x**2 / 2)
|
|
|
|
def lhs(x, beta, m):
|
|
return ((m/beta)**m * np.exp(-beta**2 / 2.0) *
|
|
(m/beta - beta - x)**(-m))
|
|
|
|
return N * _lazywhere(x > -beta, (x, beta, m), f=rhs, f2=lhs)
|
|
|
|
def _logpdf(self, x, beta, m):
|
|
"""
|
|
Return the log of the PDF of the crystalball function.
|
|
"""
|
|
N = 1.0 / (m/beta / (m-1) * np.exp(-beta**2 / 2.0) +
|
|
_norm_pdf_C * _norm_cdf(beta))
|
|
|
|
def rhs(x, beta, m):
|
|
return -x**2/2
|
|
|
|
def lhs(x, beta, m):
|
|
return m*np.log(m/beta) - beta**2/2 - m*np.log(m/beta - beta - x)
|
|
|
|
return np.log(N) + _lazywhere(x > -beta, (x, beta, m), f=rhs, f2=lhs)
|
|
|
|
def _cdf(self, x, beta, m):
|
|
"""
|
|
Return CDF of the crystalball function
|
|
"""
|
|
N = 1.0 / (m/beta / (m-1) * np.exp(-beta**2 / 2.0) +
|
|
_norm_pdf_C * _norm_cdf(beta))
|
|
|
|
def rhs(x, beta, m):
|
|
return ((m/beta) * np.exp(-beta**2 / 2.0) / (m-1) +
|
|
_norm_pdf_C * (_norm_cdf(x) - _norm_cdf(-beta)))
|
|
|
|
def lhs(x, beta, m):
|
|
return ((m/beta)**m * np.exp(-beta**2 / 2.0) *
|
|
(m/beta - beta - x)**(-m+1) / (m-1))
|
|
|
|
return N * _lazywhere(x > -beta, (x, beta, m), f=rhs, f2=lhs)
|
|
|
|
def _ppf(self, p, beta, m):
|
|
N = 1.0 / (m/beta / (m-1) * np.exp(-beta**2 / 2.0) +
|
|
_norm_pdf_C * _norm_cdf(beta))
|
|
pbeta = N * (m/beta) * np.exp(-beta**2/2) / (m - 1)
|
|
|
|
def ppf_less(p, beta, m):
|
|
eb2 = np.exp(-beta**2/2)
|
|
C = (m/beta) * eb2 / (m-1)
|
|
N = 1/(C + _norm_pdf_C * _norm_cdf(beta))
|
|
return (m/beta - beta -
|
|
((m - 1)*(m/beta)**(-m)/eb2*p/N)**(1/(1-m)))
|
|
|
|
def ppf_greater(p, beta, m):
|
|
eb2 = np.exp(-beta**2/2)
|
|
C = (m/beta) * eb2 / (m-1)
|
|
N = 1/(C + _norm_pdf_C * _norm_cdf(beta))
|
|
return _norm_ppf(_norm_cdf(-beta) + (1/_norm_pdf_C)*(p/N - C))
|
|
|
|
return _lazywhere(p < pbeta, (p, beta, m), f=ppf_less, f2=ppf_greater)
|
|
|
|
def _munp(self, n, beta, m):
|
|
"""
|
|
Returns the n-th non-central moment of the crystalball function.
|
|
"""
|
|
N = 1.0 / (m/beta / (m-1) * np.exp(-beta**2 / 2.0) +
|
|
_norm_pdf_C * _norm_cdf(beta))
|
|
|
|
def n_th_moment(n, beta, m):
|
|
"""
|
|
Returns n-th moment. Defined only if n+1 < m
|
|
Function cannot broadcast due to the loop over n
|
|
"""
|
|
A = (m/beta)**m * np.exp(-beta**2 / 2.0)
|
|
B = m/beta - beta
|
|
rhs = (2**((n-1)/2.0) * sc.gamma((n+1)/2) *
|
|
(1.0 + (-1)**n * sc.gammainc((n+1)/2, beta**2 / 2)))
|
|
lhs = np.zeros(rhs.shape)
|
|
for k in range(n + 1):
|
|
lhs += (sc.binom(n, k) * B**(n-k) * (-1)**k / (m - k - 1) *
|
|
(m/beta)**(-m + k + 1))
|
|
return A * lhs + rhs
|
|
|
|
return N * _lazywhere(n + 1 < m, (n, beta, m),
|
|
np.vectorize(n_th_moment, otypes=[np.float64]),
|
|
np.inf)
|
|
|
|
|
|
crystalball = crystalball_gen(name='crystalball', longname="A Crystalball Function")
|
|
|
|
|
|
def _argus_phi(chi):
|
|
"""
|
|
Utility function for the argus distribution used in the pdf, sf and
|
|
moment calculation.
|
|
Note that for all x > 0:
|
|
gammainc(1.5, x**2/2) = 2 * (_norm_cdf(x) - x * _norm_pdf(x) - 0.5).
|
|
This can be verified directly by noting that the cdf of Gamma(1.5) can
|
|
be written as erf(sqrt(x)) - 2*sqrt(x)*exp(-x)/sqrt(Pi).
|
|
We use gammainc instead of the usual definition because it is more precise
|
|
for small chi.
|
|
"""
|
|
return sc.gammainc(1.5, chi**2/2) / 2
|
|
|
|
|
|
class argus_gen(rv_continuous):
|
|
r"""
|
|
Argus distribution
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `argus` is:
|
|
|
|
.. math::
|
|
|
|
f(x, \chi) = \frac{\chi^3}{\sqrt{2\pi} \Psi(\chi)} x \sqrt{1-x^2}
|
|
\exp(-\chi^2 (1 - x^2)/2)
|
|
|
|
for :math:`0 < x < 1` and :math:`\chi > 0`, where
|
|
|
|
.. math::
|
|
|
|
\Psi(\chi) = \Phi(\chi) - \chi \phi(\chi) - 1/2
|
|
|
|
with :math:`\Phi` and :math:`\phi` being the CDF and PDF of a standard
|
|
normal distribution, respectively.
|
|
|
|
`argus` takes :math:`\chi` as shape a parameter. Details about sampling
|
|
from the ARGUS distribution can be found in [2]_.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
.. [1] "ARGUS distribution",
|
|
https://en.wikipedia.org/wiki/ARGUS_distribution
|
|
.. [2] Christoph Baumgarten "Random variate generation by fast numerical
|
|
inversion in the varying parameter case." Research in Statistics,
|
|
vol. 1, 2023, doi:10.1080/27684520.2023.2279060.
|
|
|
|
.. versionadded:: 0.19.0
|
|
|
|
%(example)s
|
|
"""
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("chi", False, (0, np.inf), (False, False))]
|
|
|
|
def _logpdf(self, x, chi):
|
|
# for x = 0 or 1, logpdf returns -np.inf
|
|
with np.errstate(divide='ignore'):
|
|
y = 1.0 - x*x
|
|
A = 3*np.log(chi) - _norm_pdf_logC - np.log(_argus_phi(chi))
|
|
return A + np.log(x) + 0.5*np.log1p(-x*x) - chi**2 * y / 2
|
|
|
|
def _pdf(self, x, chi):
|
|
return np.exp(self._logpdf(x, chi))
|
|
|
|
def _cdf(self, x, chi):
|
|
return 1.0 - self._sf(x, chi)
|
|
|
|
def _sf(self, x, chi):
|
|
return _argus_phi(chi * np.sqrt(1 - x**2)) / _argus_phi(chi)
|
|
|
|
def _rvs(self, chi, size=None, random_state=None):
|
|
chi = np.asarray(chi)
|
|
if chi.size == 1:
|
|
out = self._rvs_scalar(chi, numsamples=size,
|
|
random_state=random_state)
|
|
else:
|
|
shp, bc = _check_shape(chi.shape, size)
|
|
numsamples = int(np.prod(shp))
|
|
out = np.empty(size)
|
|
it = np.nditer([chi],
|
|
flags=['multi_index'],
|
|
op_flags=[['readonly']])
|
|
while not it.finished:
|
|
idx = tuple((it.multi_index[j] if not bc[j] else slice(None))
|
|
for j in range(-len(size), 0))
|
|
r = self._rvs_scalar(it[0], numsamples=numsamples,
|
|
random_state=random_state)
|
|
out[idx] = r.reshape(shp)
|
|
it.iternext()
|
|
|
|
if size == ():
|
|
out = out[()]
|
|
return out
|
|
|
|
def _rvs_scalar(self, chi, numsamples=None, random_state=None):
|
|
# if chi <= 1.8:
|
|
# use rejection method, see Devroye:
|
|
# Non-Uniform Random Variate Generation, 1986, section II.3.2.
|
|
# write: PDF f(x) = c * g(x) * h(x), where
|
|
# h is [0,1]-valued and g is a density
|
|
# we use two ways to write f
|
|
#
|
|
# Case 1:
|
|
# write g(x) = 3*x*sqrt(1-x**2), h(x) = exp(-chi**2 (1-x**2) / 2)
|
|
# If X has a distribution with density g its ppf G_inv is given by:
|
|
# G_inv(u) = np.sqrt(1 - u**(2/3))
|
|
#
|
|
# Case 2:
|
|
# g(x) = chi**2 * x * exp(-chi**2 * (1-x**2)/2) / (1 - exp(-chi**2 /2))
|
|
# h(x) = sqrt(1 - x**2), 0 <= x <= 1
|
|
# one can show that
|
|
# G_inv(u) = np.sqrt(2*np.log(u*(np.exp(chi**2/2)-1)+1))/chi
|
|
# = np.sqrt(1 + 2*np.log(np.exp(-chi**2/2)*(1-u)+u)/chi**2)
|
|
# the latter expression is used for precision with small chi
|
|
#
|
|
# In both cases, the inverse cdf of g can be written analytically, and
|
|
# we can apply the rejection method:
|
|
#
|
|
# REPEAT
|
|
# Generate U uniformly distributed on [0, 1]
|
|
# Generate X with density g (e.g. via inverse transform sampling:
|
|
# X = G_inv(V) with V uniformly distributed on [0, 1])
|
|
# UNTIL X <= h(X)
|
|
# RETURN X
|
|
#
|
|
# We use case 1 for chi <= 0.5 as it maintains precision for small chi
|
|
# and case 2 for 0.5 < chi <= 1.8 due to its speed for moderate chi.
|
|
#
|
|
# if chi > 1.8:
|
|
# use relation to the Gamma distribution: if X is ARGUS with parameter
|
|
# chi), then Y = chi**2 * (1 - X**2) / 2 has density proportional to
|
|
# sqrt(u) * exp(-u) on [0, chi**2 / 2], i.e. a Gamma(3/2) distribution
|
|
# conditioned on [0, chi**2 / 2]). Therefore, to sample X from the
|
|
# ARGUS distribution, we sample Y from the gamma distribution, keeping
|
|
# only samples on [0, chi**2 / 2], and apply the inverse
|
|
# transformation X = (1 - 2*Y/chi**2)**(1/2). Since we only
|
|
# look at chi > 1.8, gamma(1.5).cdf(chi**2/2) is large enough such
|
|
# Y falls in the interval [0, chi**2 / 2] with a high probability:
|
|
# stats.gamma(1.5).cdf(1.8**2/2) = 0.644...
|
|
#
|
|
# The points to switch between the different methods are determined
|
|
# by a comparison of the runtime of the different methods. However,
|
|
# the runtime is platform-dependent. The implemented values should
|
|
# ensure a good overall performance and are supported by an analysis
|
|
# of the rejection constants of different methods.
|
|
|
|
size1d = tuple(np.atleast_1d(numsamples))
|
|
N = int(np.prod(size1d))
|
|
x = np.zeros(N)
|
|
simulated = 0
|
|
chi2 = chi * chi
|
|
if chi <= 0.5:
|
|
d = -chi2 / 2
|
|
while simulated < N:
|
|
k = N - simulated
|
|
u = random_state.uniform(size=k)
|
|
v = random_state.uniform(size=k)
|
|
z = v**(2/3)
|
|
# acceptance condition: u <= h(G_inv(v)). This simplifies to
|
|
accept = (np.log(u) <= d * z)
|
|
num_accept = np.sum(accept)
|
|
if num_accept > 0:
|
|
# we still need to transform z=v**(2/3) to X = G_inv(v)
|
|
rvs = np.sqrt(1 - z[accept])
|
|
x[simulated:(simulated + num_accept)] = rvs
|
|
simulated += num_accept
|
|
elif chi <= 1.8:
|
|
echi = np.exp(-chi2 / 2)
|
|
while simulated < N:
|
|
k = N - simulated
|
|
u = random_state.uniform(size=k)
|
|
v = random_state.uniform(size=k)
|
|
z = 2 * np.log(echi * (1 - v) + v) / chi2
|
|
# as in case one, simplify u <= h(G_inv(v)) and then transform
|
|
# z to the target distribution X = G_inv(v)
|
|
accept = (u**2 + z <= 0)
|
|
num_accept = np.sum(accept)
|
|
if num_accept > 0:
|
|
rvs = np.sqrt(1 + z[accept])
|
|
x[simulated:(simulated + num_accept)] = rvs
|
|
simulated += num_accept
|
|
else:
|
|
# conditional Gamma for chi > 1.8
|
|
while simulated < N:
|
|
k = N - simulated
|
|
g = random_state.standard_gamma(1.5, size=k)
|
|
accept = (g <= chi2 / 2)
|
|
num_accept = np.sum(accept)
|
|
if num_accept > 0:
|
|
x[simulated:(simulated + num_accept)] = g[accept]
|
|
simulated += num_accept
|
|
x = np.sqrt(1 - 2 * x / chi2)
|
|
|
|
return np.reshape(x, size1d)
|
|
|
|
def _stats(self, chi):
|
|
# need to ensure that dtype is float
|
|
# otherwise the mask below does not work for integers
|
|
chi = np.asarray(chi, dtype=float)
|
|
phi = _argus_phi(chi)
|
|
m = np.sqrt(np.pi/8) * chi * sc.ive(1, chi**2/4) / phi
|
|
# compute second moment, use Taylor expansion for small chi (<= 0.1)
|
|
mu2 = np.empty_like(chi)
|
|
mask = chi > 0.1
|
|
c = chi[mask]
|
|
mu2[mask] = 1 - 3 / c**2 + c * _norm_pdf(c) / phi[mask]
|
|
c = chi[~mask]
|
|
coef = [-358/65690625, 0, -94/1010625, 0, 2/2625, 0, 6/175, 0, 0.4]
|
|
mu2[~mask] = np.polyval(coef, c)
|
|
return m, mu2 - m**2, None, None
|
|
|
|
|
|
argus = argus_gen(name='argus', longname="An Argus Function", a=0.0, b=1.0)
|
|
|
|
|
|
class rv_histogram(rv_continuous):
|
|
"""
|
|
Generates a distribution given by a histogram.
|
|
This is useful to generate a template distribution from a binned
|
|
datasample.
|
|
|
|
As a subclass of the `rv_continuous` class, `rv_histogram` inherits from it
|
|
a collection of generic methods (see `rv_continuous` for the full list),
|
|
and implements them based on the properties of the provided binned
|
|
datasample.
|
|
|
|
Parameters
|
|
----------
|
|
histogram : tuple of array_like
|
|
Tuple containing two array_like objects.
|
|
The first containing the content of n bins,
|
|
the second containing the (n+1) bin boundaries.
|
|
In particular, the return value of `numpy.histogram` is accepted.
|
|
|
|
density : bool, optional
|
|
If False, assumes the histogram is proportional to counts per bin;
|
|
otherwise, assumes it is proportional to a density.
|
|
For constant bin widths, these are equivalent, but the distinction
|
|
is important when bin widths vary (see Notes).
|
|
If None (default), sets ``density=True`` for backwards compatibility,
|
|
but warns if the bin widths are variable. Set `density` explicitly
|
|
to silence the warning.
|
|
|
|
.. versionadded:: 1.10.0
|
|
|
|
Notes
|
|
-----
|
|
When a histogram has unequal bin widths, there is a distinction between
|
|
histograms that are proportional to counts per bin and histograms that are
|
|
proportional to probability density over a bin. If `numpy.histogram` is
|
|
called with its default ``density=False``, the resulting histogram is the
|
|
number of counts per bin, so ``density=False`` should be passed to
|
|
`rv_histogram`. If `numpy.histogram` is called with ``density=True``, the
|
|
resulting histogram is in terms of probability density, so ``density=True``
|
|
should be passed to `rv_histogram`. To avoid warnings, always pass
|
|
``density`` explicitly when the input histogram has unequal bin widths.
|
|
|
|
There are no additional shape parameters except for the loc and scale.
|
|
The pdf is defined as a stepwise function from the provided histogram.
|
|
The cdf is a linear interpolation of the pdf.
|
|
|
|
.. versionadded:: 0.19.0
|
|
|
|
Examples
|
|
--------
|
|
|
|
Create a scipy.stats distribution from a numpy histogram
|
|
|
|
>>> import scipy.stats
|
|
>>> import numpy as np
|
|
>>> data = scipy.stats.norm.rvs(size=100000, loc=0, scale=1.5,
|
|
... random_state=123)
|
|
>>> hist = np.histogram(data, bins=100)
|
|
>>> hist_dist = scipy.stats.rv_histogram(hist, density=False)
|
|
|
|
Behaves like an ordinary scipy rv_continuous distribution
|
|
|
|
>>> hist_dist.pdf(1.0)
|
|
0.20538577847618705
|
|
>>> hist_dist.cdf(2.0)
|
|
0.90818568543056499
|
|
|
|
PDF is zero above (below) the highest (lowest) bin of the histogram,
|
|
defined by the max (min) of the original dataset
|
|
|
|
>>> hist_dist.pdf(np.max(data))
|
|
0.0
|
|
>>> hist_dist.cdf(np.max(data))
|
|
1.0
|
|
>>> hist_dist.pdf(np.min(data))
|
|
7.7591907244498314e-05
|
|
>>> hist_dist.cdf(np.min(data))
|
|
0.0
|
|
|
|
PDF and CDF follow the histogram
|
|
|
|
>>> import matplotlib.pyplot as plt
|
|
>>> X = np.linspace(-5.0, 5.0, 100)
|
|
>>> fig, ax = plt.subplots()
|
|
>>> ax.set_title("PDF from Template")
|
|
>>> ax.hist(data, density=True, bins=100)
|
|
>>> ax.plot(X, hist_dist.pdf(X), label='PDF')
|
|
>>> ax.plot(X, hist_dist.cdf(X), label='CDF')
|
|
>>> ax.legend()
|
|
>>> fig.show()
|
|
|
|
"""
|
|
_support_mask = rv_continuous._support_mask
|
|
|
|
def __init__(self, histogram, *args, density=None, **kwargs):
|
|
"""
|
|
Create a new distribution using the given histogram
|
|
|
|
Parameters
|
|
----------
|
|
histogram : tuple of array_like
|
|
Tuple containing two array_like objects.
|
|
The first containing the content of n bins,
|
|
the second containing the (n+1) bin boundaries.
|
|
In particular, the return value of np.histogram is accepted.
|
|
density : bool, optional
|
|
If False, assumes the histogram is proportional to counts per bin;
|
|
otherwise, assumes it is proportional to a density.
|
|
For constant bin widths, these are equivalent.
|
|
If None (default), sets ``density=True`` for backward
|
|
compatibility, but warns if the bin widths are variable. Set
|
|
`density` explicitly to silence the warning.
|
|
"""
|
|
self._histogram = histogram
|
|
self._density = density
|
|
if len(histogram) != 2:
|
|
raise ValueError("Expected length 2 for parameter histogram")
|
|
self._hpdf = np.asarray(histogram[0])
|
|
self._hbins = np.asarray(histogram[1])
|
|
if len(self._hpdf) + 1 != len(self._hbins):
|
|
raise ValueError("Number of elements in histogram content "
|
|
"and histogram boundaries do not match, "
|
|
"expected n and n+1.")
|
|
self._hbin_widths = self._hbins[1:] - self._hbins[:-1]
|
|
bins_vary = not np.allclose(self._hbin_widths, self._hbin_widths[0])
|
|
if density is None and bins_vary:
|
|
message = ("Bin widths are not constant. Assuming `density=True`."
|
|
"Specify `density` explicitly to silence this warning.")
|
|
warnings.warn(message, RuntimeWarning, stacklevel=2)
|
|
density = True
|
|
elif not density:
|
|
self._hpdf = self._hpdf / self._hbin_widths
|
|
|
|
self._hpdf = self._hpdf / float(np.sum(self._hpdf * self._hbin_widths))
|
|
self._hcdf = np.cumsum(self._hpdf * self._hbin_widths)
|
|
self._hpdf = np.hstack([0.0, self._hpdf, 0.0])
|
|
self._hcdf = np.hstack([0.0, self._hcdf])
|
|
# Set support
|
|
kwargs['a'] = self.a = self._hbins[0]
|
|
kwargs['b'] = self.b = self._hbins[-1]
|
|
super().__init__(*args, **kwargs)
|
|
|
|
def _pdf(self, x):
|
|
"""
|
|
PDF of the histogram
|
|
"""
|
|
return self._hpdf[np.searchsorted(self._hbins, x, side='right')]
|
|
|
|
def _cdf(self, x):
|
|
"""
|
|
CDF calculated from the histogram
|
|
"""
|
|
return np.interp(x, self._hbins, self._hcdf)
|
|
|
|
def _ppf(self, x):
|
|
"""
|
|
Percentile function calculated from the histogram
|
|
"""
|
|
return np.interp(x, self._hcdf, self._hbins)
|
|
|
|
def _munp(self, n):
|
|
"""Compute the n-th non-central moment."""
|
|
integrals = (self._hbins[1:]**(n+1) - self._hbins[:-1]**(n+1)) / (n+1)
|
|
return np.sum(self._hpdf[1:-1] * integrals)
|
|
|
|
def _entropy(self):
|
|
"""Compute entropy of distribution"""
|
|
res = _lazywhere(self._hpdf[1:-1] > 0.0,
|
|
(self._hpdf[1:-1],),
|
|
np.log,
|
|
0.0)
|
|
return -np.sum(self._hpdf[1:-1] * res * self._hbin_widths)
|
|
|
|
def _updated_ctor_param(self):
|
|
"""
|
|
Set the histogram as additional constructor argument
|
|
"""
|
|
dct = super()._updated_ctor_param()
|
|
dct['histogram'] = self._histogram
|
|
dct['density'] = self._density
|
|
return dct
|
|
|
|
|
|
class studentized_range_gen(rv_continuous):
|
|
r"""A studentized range continuous random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
t: Student's t distribution
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `studentized_range` is:
|
|
|
|
.. math::
|
|
|
|
f(x; k, \nu) = \frac{k(k-1)\nu^{\nu/2}}{\Gamma(\nu/2)
|
|
2^{\nu/2-1}} \int_{0}^{\infty} \int_{-\infty}^{\infty}
|
|
s^{\nu} e^{-\nu s^2/2} \phi(z) \phi(sx + z)
|
|
[\Phi(sx + z) - \Phi(z)]^{k-2} \,dz \,ds
|
|
|
|
for :math:`x ≥ 0`, :math:`k > 1`, and :math:`\nu > 0`.
|
|
|
|
`studentized_range` takes ``k`` for :math:`k` and ``df`` for :math:`\nu`
|
|
as shape parameters.
|
|
|
|
When :math:`\nu` exceeds 100,000, an asymptotic approximation (infinite
|
|
degrees of freedom) is used to compute the cumulative distribution
|
|
function [4]_ and probability distribution function.
|
|
|
|
%(after_notes)s
|
|
|
|
References
|
|
----------
|
|
|
|
.. [1] "Studentized range distribution",
|
|
https://en.wikipedia.org/wiki/Studentized_range_distribution
|
|
.. [2] Batista, Ben Dêivide, et al. "Externally Studentized Normal Midrange
|
|
Distribution." Ciência e Agrotecnologia, vol. 41, no. 4, 2017, pp.
|
|
378-389., doi:10.1590/1413-70542017414047716.
|
|
.. [3] Harter, H. Leon. "Tables of Range and Studentized Range." The Annals
|
|
of Mathematical Statistics, vol. 31, no. 4, 1960, pp. 1122-1147.
|
|
JSTOR, www.jstor.org/stable/2237810. Accessed 18 Feb. 2021.
|
|
.. [4] Lund, R. E., and J. R. Lund. "Algorithm AS 190: Probabilities and
|
|
Upper Quantiles for the Studentized Range." Journal of the Royal
|
|
Statistical Society. Series C (Applied Statistics), vol. 32, no. 2,
|
|
1983, pp. 204-210. JSTOR, www.jstor.org/stable/2347300. Accessed 18
|
|
Feb. 2021.
|
|
|
|
Examples
|
|
--------
|
|
>>> import numpy as np
|
|
>>> from scipy.stats import studentized_range
|
|
>>> import matplotlib.pyplot as plt
|
|
>>> fig, ax = plt.subplots(1, 1)
|
|
|
|
Display the probability density function (``pdf``):
|
|
|
|
>>> k, df = 3, 10
|
|
>>> x = np.linspace(studentized_range.ppf(0.01, k, df),
|
|
... studentized_range.ppf(0.99, k, df), 100)
|
|
>>> ax.plot(x, studentized_range.pdf(x, k, df),
|
|
... 'r-', lw=5, alpha=0.6, label='studentized_range pdf')
|
|
|
|
Alternatively, the distribution object can be called (as a function)
|
|
to fix the shape, location and scale parameters. This returns a "frozen"
|
|
RV object holding the given parameters fixed.
|
|
|
|
Freeze the distribution and display the frozen ``pdf``:
|
|
|
|
>>> rv = studentized_range(k, df)
|
|
>>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
|
|
|
|
Check accuracy of ``cdf`` and ``ppf``:
|
|
|
|
>>> vals = studentized_range.ppf([0.001, 0.5, 0.999], k, df)
|
|
>>> np.allclose([0.001, 0.5, 0.999], studentized_range.cdf(vals, k, df))
|
|
True
|
|
|
|
Rather than using (``studentized_range.rvs``) to generate random variates,
|
|
which is very slow for this distribution, we can approximate the inverse
|
|
CDF using an interpolator, and then perform inverse transform sampling
|
|
with this approximate inverse CDF.
|
|
|
|
This distribution has an infinite but thin right tail, so we focus our
|
|
attention on the leftmost 99.9 percent.
|
|
|
|
>>> a, b = studentized_range.ppf([0, .999], k, df)
|
|
>>> a, b
|
|
0, 7.41058083802274
|
|
|
|
>>> from scipy.interpolate import interp1d
|
|
>>> rng = np.random.default_rng()
|
|
>>> xs = np.linspace(a, b, 50)
|
|
>>> cdf = studentized_range.cdf(xs, k, df)
|
|
# Create an interpolant of the inverse CDF
|
|
>>> ppf = interp1d(cdf, xs, fill_value='extrapolate')
|
|
# Perform inverse transform sampling using the interpolant
|
|
>>> r = ppf(rng.uniform(size=1000))
|
|
|
|
And compare the histogram:
|
|
|
|
>>> ax.hist(r, density=True, histtype='stepfilled', alpha=0.2)
|
|
>>> ax.legend(loc='best', frameon=False)
|
|
>>> plt.show()
|
|
|
|
"""
|
|
|
|
def _argcheck(self, k, df):
|
|
return (k > 1) & (df > 0)
|
|
|
|
def _shape_info(self):
|
|
ik = _ShapeInfo("k", False, (1, np.inf), (False, False))
|
|
idf = _ShapeInfo("df", False, (0, np.inf), (False, False))
|
|
return [ik, idf]
|
|
|
|
def _fitstart(self, data):
|
|
# Default is k=1, but that is not a valid value of the parameter.
|
|
return super()._fitstart(data, args=(2, 1))
|
|
|
|
def _munp(self, K, k, df):
|
|
cython_symbol = '_studentized_range_moment'
|
|
_a, _b = self._get_support()
|
|
# all three of these are used to create a numpy array so they must
|
|
# be the same shape.
|
|
|
|
def _single_moment(K, k, df):
|
|
log_const = _stats._studentized_range_pdf_logconst(k, df)
|
|
arg = [K, k, df, log_const]
|
|
usr_data = np.array(arg, float).ctypes.data_as(ctypes.c_void_p)
|
|
|
|
llc = LowLevelCallable.from_cython(_stats, cython_symbol, usr_data)
|
|
|
|
ranges = [(-np.inf, np.inf), (0, np.inf), (_a, _b)]
|
|
opts = dict(epsabs=1e-11, epsrel=1e-12)
|
|
|
|
return integrate.nquad(llc, ranges=ranges, opts=opts)[0]
|
|
|
|
ufunc = np.frompyfunc(_single_moment, 3, 1)
|
|
return np.asarray(ufunc(K, k, df), dtype=np.float64)[()]
|
|
|
|
def _pdf(self, x, k, df):
|
|
|
|
def _single_pdf(q, k, df):
|
|
# The infinite form of the PDF is derived from the infinite
|
|
# CDF.
|
|
if df < 100000:
|
|
cython_symbol = '_studentized_range_pdf'
|
|
log_const = _stats._studentized_range_pdf_logconst(k, df)
|
|
arg = [q, k, df, log_const]
|
|
usr_data = np.array(arg, float).ctypes.data_as(ctypes.c_void_p)
|
|
ranges = [(-np.inf, np.inf), (0, np.inf)]
|
|
|
|
else:
|
|
cython_symbol = '_studentized_range_pdf_asymptotic'
|
|
arg = [q, k]
|
|
usr_data = np.array(arg, float).ctypes.data_as(ctypes.c_void_p)
|
|
ranges = [(-np.inf, np.inf)]
|
|
|
|
llc = LowLevelCallable.from_cython(_stats, cython_symbol, usr_data)
|
|
opts = dict(epsabs=1e-11, epsrel=1e-12)
|
|
return integrate.nquad(llc, ranges=ranges, opts=opts)[0]
|
|
|
|
ufunc = np.frompyfunc(_single_pdf, 3, 1)
|
|
return np.asarray(ufunc(x, k, df), dtype=np.float64)[()]
|
|
|
|
def _cdf(self, x, k, df):
|
|
|
|
def _single_cdf(q, k, df):
|
|
# "When the degrees of freedom V are infinite the probability
|
|
# integral takes [on a] simpler form," and a single asymptotic
|
|
# integral is evaluated rather than the standard double integral.
|
|
# (Lund, Lund, page 205)
|
|
if df < 100000:
|
|
cython_symbol = '_studentized_range_cdf'
|
|
log_const = _stats._studentized_range_cdf_logconst(k, df)
|
|
arg = [q, k, df, log_const]
|
|
usr_data = np.array(arg, float).ctypes.data_as(ctypes.c_void_p)
|
|
ranges = [(-np.inf, np.inf), (0, np.inf)]
|
|
|
|
else:
|
|
cython_symbol = '_studentized_range_cdf_asymptotic'
|
|
arg = [q, k]
|
|
usr_data = np.array(arg, float).ctypes.data_as(ctypes.c_void_p)
|
|
ranges = [(-np.inf, np.inf)]
|
|
|
|
llc = LowLevelCallable.from_cython(_stats, cython_symbol, usr_data)
|
|
opts = dict(epsabs=1e-11, epsrel=1e-12)
|
|
return integrate.nquad(llc, ranges=ranges, opts=opts)[0]
|
|
|
|
ufunc = np.frompyfunc(_single_cdf, 3, 1)
|
|
|
|
# clip p-values to ensure they are in [0, 1].
|
|
return np.clip(np.asarray(ufunc(x, k, df), dtype=np.float64)[()], 0, 1)
|
|
|
|
|
|
studentized_range = studentized_range_gen(name='studentized_range', a=0,
|
|
b=np.inf)
|
|
|
|
|
|
class rel_breitwigner_gen(rv_continuous):
|
|
r"""A relativistic Breit-Wigner random variable.
|
|
|
|
%(before_notes)s
|
|
|
|
See Also
|
|
--------
|
|
cauchy: Cauchy distribution, also known as the Breit-Wigner distribution.
|
|
|
|
Notes
|
|
-----
|
|
|
|
The probability density function for `rel_breitwigner` is
|
|
|
|
.. math::
|
|
|
|
f(x, \rho) = \frac{k}{(x^2 - \rho^2)^2 + \rho^2}
|
|
|
|
where
|
|
|
|
.. math::
|
|
k = \frac{2\sqrt{2}\rho^2\sqrt{\rho^2 + 1}}
|
|
{\pi\sqrt{\rho^2 + \rho\sqrt{\rho^2 + 1}}}
|
|
|
|
The relativistic Breit-Wigner distribution is used in high energy physics
|
|
to model resonances [1]_. It gives the uncertainty in the invariant mass,
|
|
:math:`M` [2]_, of a resonance with characteristic mass :math:`M_0` and
|
|
decay-width :math:`\Gamma`, where :math:`M`, :math:`M_0` and :math:`\Gamma`
|
|
are expressed in natural units. In SciPy's parametrization, the shape
|
|
parameter :math:`\rho` is equal to :math:`M_0/\Gamma` and takes values in
|
|
:math:`(0, \infty)`.
|
|
|
|
Equivalently, the relativistic Breit-Wigner distribution is said to give
|
|
the uncertainty in the center-of-mass energy :math:`E_{\text{cm}}`. In
|
|
natural units, the speed of light :math:`c` is equal to 1 and the invariant
|
|
mass :math:`M` is equal to the rest energy :math:`Mc^2`. In the
|
|
center-of-mass frame, the rest energy is equal to the total energy [3]_.
|
|
|
|
%(after_notes)s
|
|
|
|
:math:`\rho = M/\Gamma` and :math:`\Gamma` is the scale parameter. For
|
|
example, if one seeks to model the :math:`Z^0` boson with :math:`M_0
|
|
\approx 91.1876 \text{ GeV}` and :math:`\Gamma \approx 2.4952\text{ GeV}`
|
|
[4]_ one can set ``rho=91.1876/2.4952`` and ``scale=2.4952``.
|
|
|
|
To ensure a physically meaningful result when using the `fit` method, one
|
|
should set ``floc=0`` to fix the location parameter to 0.
|
|
|
|
References
|
|
----------
|
|
.. [1] Relativistic Breit-Wigner distribution, Wikipedia,
|
|
https://en.wikipedia.org/wiki/Relativistic_Breit-Wigner_distribution
|
|
.. [2] Invariant mass, Wikipedia,
|
|
https://en.wikipedia.org/wiki/Invariant_mass
|
|
.. [3] Center-of-momentum frame, Wikipedia,
|
|
https://en.wikipedia.org/wiki/Center-of-momentum_frame
|
|
.. [4] M. Tanabashi et al. (Particle Data Group) Phys. Rev. D 98, 030001 -
|
|
Published 17 August 2018
|
|
|
|
%(example)s
|
|
|
|
"""
|
|
def _argcheck(self, rho):
|
|
return rho > 0
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("rho", False, (0, np.inf), (False, False))]
|
|
|
|
def _pdf(self, x, rho):
|
|
# C = k / rho**2
|
|
C = np.sqrt(
|
|
2 * (1 + 1/rho**2) / (1 + np.sqrt(1 + 1/rho**2))
|
|
) * 2 / np.pi
|
|
with np.errstate(over='ignore'):
|
|
return C / (((x - rho)*(x + rho)/rho)**2 + 1)
|
|
|
|
def _cdf(self, x, rho):
|
|
# C = k / (2 * rho**2) / np.sqrt(1 + 1/rho**2)
|
|
C = np.sqrt(2/(1 + np.sqrt(1 + 1/rho**2)))/np.pi
|
|
result = (
|
|
np.sqrt(-1 + 1j/rho)
|
|
* np.arctan(x/np.sqrt(-rho*(rho + 1j)))
|
|
)
|
|
result = C * 2 * np.imag(result)
|
|
# Sometimes above formula produces values greater than 1.
|
|
return np.clip(result, None, 1)
|
|
|
|
def _munp(self, n, rho):
|
|
if n == 1:
|
|
# C = k / (2 * rho)
|
|
C = np.sqrt(
|
|
2 * (1 + 1/rho**2) / (1 + np.sqrt(1 + 1/rho**2))
|
|
) / np.pi * rho
|
|
return C * (np.pi/2 + np.arctan(rho))
|
|
if n == 2:
|
|
# C = pi * k / (4 * rho)
|
|
C = np.sqrt(
|
|
(1 + 1/rho**2) / (2 * (1 + np.sqrt(1 + 1/rho**2)))
|
|
) * rho
|
|
result = (1 - rho * 1j) / np.sqrt(-1 - 1j/rho)
|
|
return 2 * C * np.real(result)
|
|
else:
|
|
return np.inf
|
|
|
|
def _stats(self, rho):
|
|
# Returning None from stats makes public stats use _munp.
|
|
# nan values will be omitted from public stats. Skew and
|
|
# kurtosis are actually infinite.
|
|
return None, None, np.nan, np.nan
|
|
|
|
@inherit_docstring_from(rv_continuous)
|
|
def fit(self, data, *args, **kwds):
|
|
# Override rv_continuous.fit to better handle case where floc is set.
|
|
data, _, floc, fscale = _check_fit_input_parameters(
|
|
self, data, args, kwds
|
|
)
|
|
|
|
censored = isinstance(data, CensoredData)
|
|
if censored:
|
|
if data.num_censored() == 0:
|
|
# There are no censored values in data, so replace the
|
|
# CensoredData instance with a regular array.
|
|
data = data._uncensored
|
|
censored = False
|
|
|
|
if floc is None or censored:
|
|
return super().fit(data, *args, **kwds)
|
|
|
|
if fscale is None:
|
|
# The interquartile range approximates the scale parameter gamma.
|
|
# The median approximates rho * gamma.
|
|
p25, p50, p75 = np.quantile(data - floc, [0.25, 0.5, 0.75])
|
|
scale_0 = p75 - p25
|
|
rho_0 = p50 / scale_0
|
|
if not args:
|
|
args = [rho_0]
|
|
if "scale" not in kwds:
|
|
kwds["scale"] = scale_0
|
|
else:
|
|
M_0 = np.median(data - floc)
|
|
rho_0 = M_0 / fscale
|
|
if not args:
|
|
args = [rho_0]
|
|
return super().fit(data, *args, **kwds)
|
|
|
|
|
|
rel_breitwigner = rel_breitwigner_gen(a=0.0, name="rel_breitwigner")
|
|
|
|
|
|
# Collect names of classes and objects in this module.
|
|
pairs = list(globals().copy().items())
|
|
_distn_names, _distn_gen_names = get_distribution_names(pairs, rv_continuous)
|
|
|
|
__all__ = _distn_names + _distn_gen_names + ['rv_histogram']
|