1315 lines
45 KiB
Python
1315 lines
45 KiB
Python
|
import math
|
||
|
import numbers
|
||
|
import numpy as np
|
||
|
from scipy import stats
|
||
|
from scipy import special as sc
|
||
|
from ._qmc import (check_random_state as check_random_state_qmc,
|
||
|
Halton, QMCEngine)
|
||
|
from ._unuran.unuran_wrapper import NumericalInversePolynomial
|
||
|
from scipy._lib._util import check_random_state
|
||
|
|
||
|
|
||
|
__all__ = ['FastGeneratorInversion', 'RatioUniforms']
|
||
|
|
||
|
|
||
|
# define pdfs and other helper functions to create the generators
|
||
|
|
||
|
def argus_pdf(x, chi):
|
||
|
# approach follows Baumgarten/Hoermann: Generating ARGUS random variates
|
||
|
# for chi > 5, use relationship of the ARGUS distribution to Gamma(1.5)
|
||
|
if chi <= 5:
|
||
|
y = 1 - x * x
|
||
|
return x * math.sqrt(y) * math.exp(-0.5 * chi**2 * y)
|
||
|
return math.sqrt(x) * math.exp(-x)
|
||
|
|
||
|
|
||
|
def argus_gamma_trf(x, chi):
|
||
|
if chi <= 5:
|
||
|
return x
|
||
|
return np.sqrt(1.0 - 2 * x / chi**2)
|
||
|
|
||
|
|
||
|
def argus_gamma_inv_trf(x, chi):
|
||
|
if chi <= 5:
|
||
|
return x
|
||
|
return 0.5 * chi**2 * (1 - x**2)
|
||
|
|
||
|
|
||
|
def betaprime_pdf(x, a, b):
|
||
|
if x > 0:
|
||
|
logf = (a - 1) * math.log(x) - (a + b) * math.log1p(x) - sc.betaln(a, b)
|
||
|
return math.exp(logf)
|
||
|
else:
|
||
|
# return pdf at x == 0 separately to avoid runtime warnings
|
||
|
if a > 1:
|
||
|
return 0
|
||
|
elif a < 1:
|
||
|
return np.inf
|
||
|
else:
|
||
|
return 1 / sc.beta(a, b)
|
||
|
|
||
|
|
||
|
def beta_valid_params(a, b):
|
||
|
return (min(a, b) >= 0.1) and (max(a, b) <= 700)
|
||
|
|
||
|
|
||
|
def gamma_pdf(x, a):
|
||
|
if x > 0:
|
||
|
return math.exp(-math.lgamma(a) + (a - 1.0) * math.log(x) - x)
|
||
|
else:
|
||
|
return 0 if a >= 1 else np.inf
|
||
|
|
||
|
|
||
|
def invgamma_pdf(x, a):
|
||
|
if x > 0:
|
||
|
return math.exp(-(a + 1.0) * math.log(x) - math.lgamma(a) - 1 / x)
|
||
|
else:
|
||
|
return 0 if a >= 1 else np.inf
|
||
|
|
||
|
|
||
|
def burr_pdf(x, cc, dd):
|
||
|
# note: we use np.exp instead of math.exp, otherwise an overflow
|
||
|
# error can occur in the setup, e.g., for parameters
|
||
|
# 1.89128135, 0.30195177, see test test_burr_overflow
|
||
|
if x > 0:
|
||
|
lx = math.log(x)
|
||
|
return np.exp(-(cc + 1) * lx - (dd + 1) * math.log1p(np.exp(-cc * lx)))
|
||
|
else:
|
||
|
return 0
|
||
|
|
||
|
|
||
|
def burr12_pdf(x, cc, dd):
|
||
|
if x > 0:
|
||
|
lx = math.log(x)
|
||
|
logterm = math.log1p(math.exp(cc * lx))
|
||
|
return math.exp((cc - 1) * lx - (dd + 1) * logterm + math.log(cc * dd))
|
||
|
else:
|
||
|
return 0
|
||
|
|
||
|
|
||
|
def chi_pdf(x, a):
|
||
|
if x > 0:
|
||
|
return math.exp(
|
||
|
(a - 1) * math.log(x)
|
||
|
- 0.5 * (x * x)
|
||
|
- (a / 2 - 1) * math.log(2)
|
||
|
- math.lgamma(0.5 * a)
|
||
|
)
|
||
|
else:
|
||
|
return 0 if a >= 1 else np.inf
|
||
|
|
||
|
|
||
|
def chi2_pdf(x, df):
|
||
|
if x > 0:
|
||
|
return math.exp(
|
||
|
(df / 2 - 1) * math.log(x)
|
||
|
- 0.5 * x
|
||
|
- (df / 2) * math.log(2)
|
||
|
- math.lgamma(0.5 * df)
|
||
|
)
|
||
|
else:
|
||
|
return 0 if df >= 1 else np.inf
|
||
|
|
||
|
|
||
|
def alpha_pdf(x, a):
|
||
|
if x > 0:
|
||
|
return math.exp(-2.0 * math.log(x) - 0.5 * (a - 1.0 / x) ** 2)
|
||
|
return 0.0
|
||
|
|
||
|
|
||
|
def bradford_pdf(x, c):
|
||
|
if 0 <= x <= 1:
|
||
|
return 1.0 / (1.0 + c * x)
|
||
|
return 0.0
|
||
|
|
||
|
|
||
|
def crystalball_pdf(x, b, m):
|
||
|
if x > -b:
|
||
|
return math.exp(-0.5 * x * x)
|
||
|
return math.exp(m * math.log(m / b) - 0.5 * b * b - m * math.log(m / b - b - x))
|
||
|
|
||
|
|
||
|
def weibull_min_pdf(x, c):
|
||
|
if x > 0:
|
||
|
return c * math.exp((c - 1) * math.log(x) - x**c)
|
||
|
return 0.0
|
||
|
|
||
|
|
||
|
def weibull_max_pdf(x, c):
|
||
|
if x < 0:
|
||
|
return c * math.exp((c - 1) * math.log(-x) - ((-x) ** c))
|
||
|
return 0.0
|
||
|
|
||
|
|
||
|
def invweibull_pdf(x, c):
|
||
|
if x > 0:
|
||
|
return c * math.exp(-(c + 1) * math.log(x) - x ** (-c))
|
||
|
return 0.0
|
||
|
|
||
|
|
||
|
def wald_pdf(x):
|
||
|
if x > 0:
|
||
|
return math.exp(-((x - 1) ** 2) / (2 * x)) / math.sqrt(x**3)
|
||
|
return 0.0
|
||
|
|
||
|
|
||
|
def geninvgauss_mode(p, b):
|
||
|
if p > 1: # equivalent mode formulas numerical more stable versions
|
||
|
return (math.sqrt((1 - p) ** 2 + b**2) - (1 - p)) / b
|
||
|
return b / (math.sqrt((1 - p) ** 2 + b**2) + (1 - p))
|
||
|
|
||
|
|
||
|
def geninvgauss_pdf(x, p, b):
|
||
|
m = geninvgauss_mode(p, b)
|
||
|
lfm = (p - 1) * math.log(m) - 0.5 * b * (m + 1 / m)
|
||
|
if x > 0:
|
||
|
return math.exp((p - 1) * math.log(x) - 0.5 * b * (x + 1 / x) - lfm)
|
||
|
return 0.0
|
||
|
|
||
|
|
||
|
def invgauss_mode(mu):
|
||
|
return 1.0 / (math.sqrt(1.5 * 1.5 + 1 / (mu * mu)) + 1.5)
|
||
|
|
||
|
|
||
|
def invgauss_pdf(x, mu):
|
||
|
m = invgauss_mode(mu)
|
||
|
lfm = -1.5 * math.log(m) - (m - mu) ** 2 / (2 * m * mu**2)
|
||
|
if x > 0:
|
||
|
return math.exp(-1.5 * math.log(x) - (x - mu) ** 2 / (2 * x * mu**2) - lfm)
|
||
|
return 0.0
|
||
|
|
||
|
|
||
|
def powerlaw_pdf(x, a):
|
||
|
if x > 0:
|
||
|
return x ** (a - 1)
|
||
|
return 0.0
|
||
|
|
||
|
|
||
|
# Define a dictionary: for a given distribution (keys), another dictionary
|
||
|
# (values) specifies the parameters for NumericalInversePolynomial (PINV).
|
||
|
# The keys of the latter dictionary are:
|
||
|
# - pdf: the pdf of the distribution (callable). The signature of the pdf
|
||
|
# is float -> float (i.e., the function does not have to be vectorized).
|
||
|
# If possible, functions like log or exp from the module math should be
|
||
|
# preferred over functions from numpy since the PINV setup will be faster
|
||
|
# in that case.
|
||
|
# - check_pinv_params: callable f that returns true if the shape parameters
|
||
|
# (args) are recommended parameters for PINV (i.e., the u-error does
|
||
|
# not exceed the default tolerance)
|
||
|
# - center: scalar if the center does not depend on args, otherwise
|
||
|
# callable that returns the center as a function of the shape parameters
|
||
|
# - rvs_transform: a callable that can be used to transform the rvs that
|
||
|
# are distributed according to the pdf to the target distribution
|
||
|
# (as an example, see the entry for the beta distribution)
|
||
|
# - rvs_transform_inv: the inverse of rvs_transform (it is required
|
||
|
# for the transformed ppf)
|
||
|
# - mirror_uniform: boolean or a callable that returns true or false
|
||
|
# depending on the shape parameters. If True, the ppf is applied
|
||
|
# to 1-u instead of u to generate rvs, where u is a uniform rv.
|
||
|
# While both u and 1-u are uniform, it can be required to use 1-u
|
||
|
# to compute the u-error correctly. This is only relevant for the argus
|
||
|
# distribution.
|
||
|
# The only required keys are "pdf" and "check_pinv_params".
|
||
|
# All other keys are optional.
|
||
|
|
||
|
PINV_CONFIG = {
|
||
|
"alpha": {
|
||
|
"pdf": alpha_pdf,
|
||
|
"check_pinv_params": lambda a: 1.0e-11 <= a < 2.1e5,
|
||
|
"center": lambda a: 0.25 * (math.sqrt(a * a + 8.0) - a),
|
||
|
},
|
||
|
"anglit": {
|
||
|
"pdf": lambda x: math.cos(2 * x) + 1.0e-13,
|
||
|
# +1.e-13 is necessary, otherwise PINV has strange problems as
|
||
|
# f(upper border) is very close to 0
|
||
|
"center": 0,
|
||
|
},
|
||
|
"argus": {
|
||
|
"pdf": argus_pdf,
|
||
|
"center": lambda chi: 0.7 if chi <= 5 else 0.5,
|
||
|
"check_pinv_params": lambda chi: 1e-20 < chi < 901,
|
||
|
"rvs_transform": argus_gamma_trf,
|
||
|
"rvs_transform_inv": argus_gamma_inv_trf,
|
||
|
"mirror_uniform": lambda chi: chi > 5,
|
||
|
},
|
||
|
"beta": {
|
||
|
"pdf": betaprime_pdf,
|
||
|
"center": lambda a, b: max(0.1, (a - 1) / (b + 1)),
|
||
|
"check_pinv_params": beta_valid_params,
|
||
|
"rvs_transform": lambda x, *args: x / (1 + x),
|
||
|
"rvs_transform_inv": lambda x, *args: x / (1 - x) if x < 1 else np.inf,
|
||
|
},
|
||
|
"betaprime": {
|
||
|
"pdf": betaprime_pdf,
|
||
|
"center": lambda a, b: max(0.1, (a - 1) / (b + 1)),
|
||
|
"check_pinv_params": beta_valid_params,
|
||
|
},
|
||
|
"bradford": {
|
||
|
"pdf": bradford_pdf,
|
||
|
"check_pinv_params": lambda a: 1.0e-6 <= a <= 1e9,
|
||
|
"center": 0.5,
|
||
|
},
|
||
|
"burr": {
|
||
|
"pdf": burr_pdf,
|
||
|
"center": lambda a, b: (2 ** (1 / b) - 1) ** (-1 / a),
|
||
|
"check_pinv_params": lambda a, b: (min(a, b) >= 0.3) and (max(a, b) <= 50),
|
||
|
},
|
||
|
"burr12": {
|
||
|
"pdf": burr12_pdf,
|
||
|
"center": lambda a, b: (2 ** (1 / b) - 1) ** (1 / a),
|
||
|
"check_pinv_params": lambda a, b: (min(a, b) >= 0.2) and (max(a, b) <= 50),
|
||
|
},
|
||
|
"cauchy": {
|
||
|
"pdf": lambda x: 1 / (1 + (x * x)),
|
||
|
"center": 0,
|
||
|
},
|
||
|
"chi": {
|
||
|
"pdf": chi_pdf,
|
||
|
"check_pinv_params": lambda df: 0.05 <= df <= 1.0e6,
|
||
|
"center": lambda a: math.sqrt(a),
|
||
|
},
|
||
|
"chi2": {
|
||
|
"pdf": chi2_pdf,
|
||
|
"check_pinv_params": lambda df: 0.07 <= df <= 1e6,
|
||
|
"center": lambda a: a,
|
||
|
},
|
||
|
"cosine": {
|
||
|
"pdf": lambda x: 1 + math.cos(x),
|
||
|
"center": 0,
|
||
|
},
|
||
|
"crystalball": {
|
||
|
"pdf": crystalball_pdf,
|
||
|
"check_pinv_params": lambda b, m: (0.01 <= b <= 5.5)
|
||
|
and (1.1 <= m <= 75.1),
|
||
|
"center": 0.0,
|
||
|
},
|
||
|
"expon": {
|
||
|
"pdf": lambda x: math.exp(-x),
|
||
|
"center": 1.0,
|
||
|
},
|
||
|
"gamma": {
|
||
|
"pdf": gamma_pdf,
|
||
|
"check_pinv_params": lambda a: 0.04 <= a <= 1e6,
|
||
|
"center": lambda a: a,
|
||
|
},
|
||
|
"gennorm": {
|
||
|
"pdf": lambda x, b: math.exp(-abs(x) ** b),
|
||
|
"check_pinv_params": lambda b: 0.081 <= b <= 45.0,
|
||
|
"center": 0.0,
|
||
|
},
|
||
|
"geninvgauss": {
|
||
|
"pdf": geninvgauss_pdf,
|
||
|
"check_pinv_params": lambda p, b: (abs(p) <= 1200.0)
|
||
|
and (1.0e-10 <= b <= 1200.0),
|
||
|
"center": geninvgauss_mode,
|
||
|
},
|
||
|
"gumbel_l": {
|
||
|
"pdf": lambda x: math.exp(x - math.exp(x)),
|
||
|
"center": -0.6,
|
||
|
},
|
||
|
"gumbel_r": {
|
||
|
"pdf": lambda x: math.exp(-x - math.exp(-x)),
|
||
|
"center": 0.6,
|
||
|
},
|
||
|
"hypsecant": {
|
||
|
"pdf": lambda x: 1.0 / (math.exp(x) + math.exp(-x)),
|
||
|
"center": 0.0,
|
||
|
},
|
||
|
"invgamma": {
|
||
|
"pdf": invgamma_pdf,
|
||
|
"check_pinv_params": lambda a: 0.04 <= a <= 1e6,
|
||
|
"center": lambda a: 1 / a,
|
||
|
},
|
||
|
"invgauss": {
|
||
|
"pdf": invgauss_pdf,
|
||
|
"check_pinv_params": lambda mu: 1.0e-10 <= mu <= 1.0e9,
|
||
|
"center": invgauss_mode,
|
||
|
},
|
||
|
"invweibull": {
|
||
|
"pdf": invweibull_pdf,
|
||
|
"check_pinv_params": lambda a: 0.12 <= a <= 512,
|
||
|
"center": 1.0,
|
||
|
},
|
||
|
"laplace": {
|
||
|
"pdf": lambda x: math.exp(-abs(x)),
|
||
|
"center": 0.0,
|
||
|
},
|
||
|
"logistic": {
|
||
|
"pdf": lambda x: math.exp(-x) / (1 + math.exp(-x)) ** 2,
|
||
|
"center": 0.0,
|
||
|
},
|
||
|
"maxwell": {
|
||
|
"pdf": lambda x: x * x * math.exp(-0.5 * x * x),
|
||
|
"center": 1.41421,
|
||
|
},
|
||
|
"moyal": {
|
||
|
"pdf": lambda x: math.exp(-(x + math.exp(-x)) / 2),
|
||
|
"center": 1.2,
|
||
|
},
|
||
|
"norm": {
|
||
|
"pdf": lambda x: math.exp(-x * x / 2),
|
||
|
"center": 0.0,
|
||
|
},
|
||
|
"pareto": {
|
||
|
"pdf": lambda x, b: x ** -(b + 1),
|
||
|
"center": lambda b: b / (b - 1) if b > 2 else 1.5,
|
||
|
"check_pinv_params": lambda b: 0.08 <= b <= 400000,
|
||
|
},
|
||
|
"powerlaw": {
|
||
|
"pdf": powerlaw_pdf,
|
||
|
"center": 1.0,
|
||
|
"check_pinv_params": lambda a: 0.06 <= a <= 1.0e5,
|
||
|
},
|
||
|
"t": {
|
||
|
"pdf": lambda x, df: (1 + x * x / df) ** (-0.5 * (df + 1)),
|
||
|
"check_pinv_params": lambda a: 0.07 <= a <= 1e6,
|
||
|
"center": 0.0,
|
||
|
},
|
||
|
"rayleigh": {
|
||
|
"pdf": lambda x: x * math.exp(-0.5 * (x * x)),
|
||
|
"center": 1.0,
|
||
|
},
|
||
|
"semicircular": {
|
||
|
"pdf": lambda x: math.sqrt(1.0 - (x * x)),
|
||
|
"center": 0,
|
||
|
},
|
||
|
"wald": {
|
||
|
"pdf": wald_pdf,
|
||
|
"center": 1.0,
|
||
|
},
|
||
|
"weibull_max": {
|
||
|
"pdf": weibull_max_pdf,
|
||
|
"check_pinv_params": lambda a: 0.25 <= a <= 512,
|
||
|
"center": -1.0,
|
||
|
},
|
||
|
"weibull_min": {
|
||
|
"pdf": weibull_min_pdf,
|
||
|
"check_pinv_params": lambda a: 0.25 <= a <= 512,
|
||
|
"center": 1.0,
|
||
|
},
|
||
|
}
|
||
|
|
||
|
|
||
|
def _validate_qmc_input(qmc_engine, d, seed):
|
||
|
# Input validation for `qmc_engine` and `d`
|
||
|
# Error messages for invalid `d` are raised by QMCEngine
|
||
|
# we could probably use a stats.qmc.check_qrandom_state
|
||
|
if isinstance(qmc_engine, QMCEngine):
|
||
|
if d is not None and qmc_engine.d != d:
|
||
|
message = "`d` must be consistent with dimension of `qmc_engine`."
|
||
|
raise ValueError(message)
|
||
|
d = qmc_engine.d if d is None else d
|
||
|
elif qmc_engine is None:
|
||
|
d = 1 if d is None else d
|
||
|
qmc_engine = Halton(d, seed=seed)
|
||
|
else:
|
||
|
message = (
|
||
|
"`qmc_engine` must be an instance of "
|
||
|
"`scipy.stats.qmc.QMCEngine` or `None`."
|
||
|
)
|
||
|
raise ValueError(message)
|
||
|
|
||
|
return qmc_engine, d
|
||
|
|
||
|
|
||
|
class CustomDistPINV:
|
||
|
def __init__(self, pdf, args):
|
||
|
self._pdf = lambda x: pdf(x, *args)
|
||
|
|
||
|
def pdf(self, x):
|
||
|
return self._pdf(x)
|
||
|
|
||
|
|
||
|
class FastGeneratorInversion:
|
||
|
"""
|
||
|
Fast sampling by numerical inversion of the CDF for a large class of
|
||
|
continuous distributions in `scipy.stats`.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
dist : rv_frozen object
|
||
|
Frozen distribution object from `scipy.stats`. The list of supported
|
||
|
distributions can be found in the Notes section. The shape parameters,
|
||
|
`loc` and `scale` used to create the distributions must be scalars.
|
||
|
For example, for the Gamma distribution with shape parameter `p`,
|
||
|
`p` has to be a float, and for the beta distribution with shape
|
||
|
parameters (a, b), both a and b have to be floats.
|
||
|
domain : tuple of floats, optional
|
||
|
If one wishes to sample from a truncated/conditional distribution,
|
||
|
the domain has to be specified.
|
||
|
The default is None. In that case, the random variates are not
|
||
|
truncated, and the domain is inferred from the support of the
|
||
|
distribution.
|
||
|
ignore_shape_range : boolean, optional.
|
||
|
If False, shape parameters that are outside of the valid range
|
||
|
of values to ensure that the numerical accuracy (see Notes) is
|
||
|
high, raise a ValueError. If True, any shape parameters that are valid
|
||
|
for the distribution are accepted. This can be useful for testing.
|
||
|
The default is False.
|
||
|
random_state : {None, int, `numpy.random.Generator`,
|
||
|
`numpy.random.RandomState`}, optional
|
||
|
|
||
|
A NumPy random number generator or seed for the underlying NumPy
|
||
|
random number generator used to generate the stream of uniform
|
||
|
random numbers.
|
||
|
If `random_state` is None, it uses ``self.random_state``.
|
||
|
If `random_state` is an int,
|
||
|
``np.random.default_rng(random_state)`` is used.
|
||
|
If `random_state` is already a ``Generator`` or ``RandomState``
|
||
|
instance then that instance is used.
|
||
|
|
||
|
Attributes
|
||
|
----------
|
||
|
loc : float
|
||
|
The location parameter.
|
||
|
random_state : {`numpy.random.Generator`, `numpy.random.RandomState`}
|
||
|
The random state used in relevant methods like `rvs` (unless
|
||
|
another `random_state` is passed as an argument to these methods).
|
||
|
scale : float
|
||
|
The scale parameter.
|
||
|
|
||
|
Methods
|
||
|
-------
|
||
|
cdf
|
||
|
evaluate_error
|
||
|
ppf
|
||
|
qrvs
|
||
|
rvs
|
||
|
support
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
The class creates an object for continuous distributions specified
|
||
|
by `dist`. The method `rvs` uses a generator from
|
||
|
`scipy.stats.sampling` that is created when the object is instantiated.
|
||
|
In addition, the methods `qrvs` and `ppf` are added.
|
||
|
`qrvs` generate samples based on quasi-random numbers from
|
||
|
`scipy.stats.qmc`. `ppf` is the PPF based on the
|
||
|
numerical inversion method in [1]_ (`NumericalInversePolynomial`) that is
|
||
|
used to generate random variates.
|
||
|
|
||
|
Supported distributions (`distname`) are:
|
||
|
``alpha``, ``anglit``, ``argus``, ``beta``, ``betaprime``, ``bradford``,
|
||
|
``burr``, ``burr12``, ``cauchy``, ``chi``, ``chi2``, ``cosine``,
|
||
|
``crystalball``, ``expon``, ``gamma``, ``gennorm``, ``geninvgauss``,
|
||
|
``gumbel_l``, ``gumbel_r``, ``hypsecant``, ``invgamma``, ``invgauss``,
|
||
|
``invweibull``, ``laplace``, ``logistic``, ``maxwell``, ``moyal``,
|
||
|
``norm``, ``pareto``, ``powerlaw``, ``t``, ``rayleigh``, ``semicircular``,
|
||
|
``wald``, ``weibull_max``, ``weibull_min``.
|
||
|
|
||
|
`rvs` relies on the accuracy of the numerical inversion. If very extreme
|
||
|
shape parameters are used, the numerical inversion might not work. However,
|
||
|
for all implemented distributions, the admissible shape parameters have
|
||
|
been tested, and an error will be raised if the user supplies values
|
||
|
outside of the allowed range. The u-error should not exceed 1e-10 for all
|
||
|
valid parameters. Note that warnings might be raised even if parameters
|
||
|
are within the valid range when the object is instantiated.
|
||
|
To check numerical accuracy, the method `evaluate_error` can be used.
|
||
|
|
||
|
Note that all implemented distributions are also part of `scipy.stats`, and
|
||
|
the object created by `FastGeneratorInversion` relies on methods like
|
||
|
`ppf`, `cdf` and `pdf` from `rv_frozen`. The main benefit of using this
|
||
|
class can be summarized as follows: Once the generator to sample random
|
||
|
variates is created in the setup step, sampling and evaluation of
|
||
|
the PPF using `ppf` are very fast,
|
||
|
and performance is essentially independent of the distribution. Therefore,
|
||
|
a substantial speed-up can be achieved for many distributions if large
|
||
|
numbers of random variates are required. It is important to know that this
|
||
|
fast sampling is achieved by inversion of the CDF. Thus, one uniform
|
||
|
random variate is transformed into a non-uniform variate, which is an
|
||
|
advantage for several simulation methods, e.g., when
|
||
|
the variance reduction methods of common random variates or
|
||
|
antithetic variates are be used ([2]_).
|
||
|
|
||
|
In addition, inversion makes it possible to
|
||
|
- to use a QMC generator from `scipy.stats.qmc` (method `qrvs`),
|
||
|
- to generate random variates truncated to an interval. For example, if
|
||
|
one aims to sample standard normal random variates from
|
||
|
the interval (2, 4), this can be easily achieved by using the parameter
|
||
|
`domain`.
|
||
|
|
||
|
The location and scale that are initially defined by `dist`
|
||
|
can be reset without having to rerun the setup
|
||
|
step to create the generator that is used for sampling. The relation
|
||
|
of the distribution `Y` with `loc` and `scale` to the standard
|
||
|
distribution `X` (i.e., ``loc=0`` and ``scale=1``) is given by
|
||
|
``Y = loc + scale * X``.
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
.. [1] Derflinger, Gerhard, Wolfgang Hörmann, and Josef Leydold.
|
||
|
"Random variate generation by numerical inversion when only the
|
||
|
density is known." ACM Transactions on Modeling and Computer
|
||
|
Simulation (TOMACS) 20.4 (2010): 1-25.
|
||
|
.. [2] Hörmann, Wolfgang, Josef Leydold and Gerhard Derflinger.
|
||
|
"Automatic nonuniform random number generation."
|
||
|
Springer, 2004.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> import numpy as np
|
||
|
>>> from scipy import stats
|
||
|
>>> from scipy.stats.sampling import FastGeneratorInversion
|
||
|
|
||
|
Let's start with a simple example to illustrate the main features:
|
||
|
|
||
|
>>> gamma_frozen = stats.gamma(1.5)
|
||
|
>>> gamma_dist = FastGeneratorInversion(gamma_frozen)
|
||
|
>>> r = gamma_dist.rvs(size=1000)
|
||
|
|
||
|
The mean should be approximately equal to the shape parameter 1.5:
|
||
|
|
||
|
>>> r.mean()
|
||
|
1.52423591130436 # may vary
|
||
|
|
||
|
Similarly, we can draw a sample based on quasi-random numbers:
|
||
|
|
||
|
>>> r = gamma_dist.qrvs(size=1000)
|
||
|
>>> r.mean()
|
||
|
1.4996639255942914 # may vary
|
||
|
|
||
|
Compare the PPF against approximation `ppf`.
|
||
|
|
||
|
>>> q = [0.001, 0.2, 0.5, 0.8, 0.999]
|
||
|
>>> np.max(np.abs(gamma_frozen.ppf(q) - gamma_dist.ppf(q)))
|
||
|
4.313394796895409e-08
|
||
|
|
||
|
To confirm that the numerical inversion is accurate, we evaluate the
|
||
|
approximation error (u-error), which should be below 1e-10 (for more
|
||
|
details, refer to the documentation of `evaluate_error`):
|
||
|
|
||
|
>>> gamma_dist.evaluate_error()
|
||
|
(7.446320551265581e-11, nan) # may vary
|
||
|
|
||
|
Note that the location and scale can be changed without instantiating a
|
||
|
new generator:
|
||
|
|
||
|
>>> gamma_dist.loc = 2
|
||
|
>>> gamma_dist.scale = 3
|
||
|
>>> r = gamma_dist.rvs(size=1000)
|
||
|
|
||
|
The mean should be approximately 2 + 3*1.5 = 6.5.
|
||
|
|
||
|
>>> r.mean()
|
||
|
6.399549295242894 # may vary
|
||
|
|
||
|
Let us also illustrate how truncation can be applied:
|
||
|
|
||
|
>>> trunc_norm = FastGeneratorInversion(stats.norm(), domain=(3, 4))
|
||
|
>>> r = trunc_norm.rvs(size=1000)
|
||
|
>>> 3 < r.min() < r.max() < 4
|
||
|
True
|
||
|
|
||
|
Check the mean:
|
||
|
|
||
|
>>> r.mean()
|
||
|
3.250433367078603 # may vary
|
||
|
|
||
|
>>> stats.norm.expect(lb=3, ub=4, conditional=True)
|
||
|
3.260454285589997
|
||
|
|
||
|
In this particular, case, `scipy.stats.truncnorm` could also be used to
|
||
|
generate truncated normal random variates.
|
||
|
|
||
|
"""
|
||
|
|
||
|
def __init__(
|
||
|
self,
|
||
|
dist,
|
||
|
*,
|
||
|
domain=None,
|
||
|
ignore_shape_range=False,
|
||
|
random_state=None,
|
||
|
):
|
||
|
|
||
|
if isinstance(dist, stats.distributions.rv_frozen):
|
||
|
distname = dist.dist.name
|
||
|
if distname not in PINV_CONFIG.keys():
|
||
|
raise ValueError(
|
||
|
f"Distribution '{distname}' is not supported."
|
||
|
f"It must be one of {list(PINV_CONFIG.keys())}"
|
||
|
)
|
||
|
else:
|
||
|
raise ValueError("`dist` must be a frozen distribution object")
|
||
|
|
||
|
loc = dist.kwds.get("loc", 0)
|
||
|
scale = dist.kwds.get("scale", 1)
|
||
|
args = dist.args
|
||
|
if not np.isscalar(loc):
|
||
|
raise ValueError("loc must be scalar.")
|
||
|
if not np.isscalar(scale):
|
||
|
raise ValueError("scale must be scalar.")
|
||
|
|
||
|
self._frozendist = getattr(stats, distname)(
|
||
|
*args,
|
||
|
loc=loc,
|
||
|
scale=scale,
|
||
|
)
|
||
|
self._distname = distname
|
||
|
|
||
|
nargs = np.broadcast_arrays(args)[0].size
|
||
|
nargs_expected = self._frozendist.dist.numargs
|
||
|
if nargs != nargs_expected:
|
||
|
raise ValueError(
|
||
|
f"Each of the {nargs_expected} shape parameters must be a "
|
||
|
f"scalar, but {nargs} values are provided."
|
||
|
)
|
||
|
|
||
|
self.random_state = random_state
|
||
|
|
||
|
if domain is None:
|
||
|
self._domain = self._frozendist.support()
|
||
|
self._p_lower = 0.0
|
||
|
self._p_domain = 1.0
|
||
|
else:
|
||
|
self._domain = domain
|
||
|
self._p_lower = self._frozendist.cdf(self._domain[0])
|
||
|
_p_domain = self._frozendist.cdf(self._domain[1]) - self._p_lower
|
||
|
self._p_domain = _p_domain
|
||
|
self._set_domain_adj()
|
||
|
self._ignore_shape_range = ignore_shape_range
|
||
|
|
||
|
# the domain to be passed to NumericalInversePolynomial
|
||
|
# define a separate variable since in case of a transformation,
|
||
|
# domain_pinv will not be the same as self._domain
|
||
|
self._domain_pinv = self._domain
|
||
|
|
||
|
# get information about the distribution from the config to set up
|
||
|
# the generator
|
||
|
dist = self._process_config(distname, args)
|
||
|
|
||
|
if self._rvs_transform_inv is not None:
|
||
|
d0 = self._rvs_transform_inv(self._domain[0], *args)
|
||
|
d1 = self._rvs_transform_inv(self._domain[1], *args)
|
||
|
if d0 > d1:
|
||
|
# swap values if transformation if decreasing
|
||
|
d0, d1 = d1, d0
|
||
|
# only update _domain_pinv and not _domain
|
||
|
# _domain refers to the original distribution, _domain_pinv
|
||
|
# to the transformed distribution
|
||
|
self._domain_pinv = d0, d1
|
||
|
|
||
|
# self._center has been set by the call self._process_config
|
||
|
# check if self._center is inside the transformed domain
|
||
|
# _domain_pinv, otherwise move it to the endpoint that is closer
|
||
|
if self._center is not None:
|
||
|
if self._center < self._domain_pinv[0]:
|
||
|
self._center = self._domain_pinv[0]
|
||
|
elif self._center > self._domain_pinv[1]:
|
||
|
self._center = self._domain_pinv[1]
|
||
|
|
||
|
self._rng = NumericalInversePolynomial(
|
||
|
dist,
|
||
|
random_state=self.random_state,
|
||
|
domain=self._domain_pinv,
|
||
|
center=self._center,
|
||
|
)
|
||
|
|
||
|
@property
|
||
|
def random_state(self):
|
||
|
return self._random_state
|
||
|
|
||
|
@random_state.setter
|
||
|
def random_state(self, random_state):
|
||
|
self._random_state = check_random_state_qmc(random_state)
|
||
|
|
||
|
@property
|
||
|
def loc(self):
|
||
|
return self._frozendist.kwds.get("loc", 0)
|
||
|
|
||
|
@loc.setter
|
||
|
def loc(self, loc):
|
||
|
if not np.isscalar(loc):
|
||
|
raise ValueError("loc must be scalar.")
|
||
|
self._frozendist.kwds["loc"] = loc
|
||
|
# update the adjusted domain that depends on loc and scale
|
||
|
self._set_domain_adj()
|
||
|
|
||
|
@property
|
||
|
def scale(self):
|
||
|
return self._frozendist.kwds.get("scale", 0)
|
||
|
|
||
|
@scale.setter
|
||
|
def scale(self, scale):
|
||
|
if not np.isscalar(scale):
|
||
|
raise ValueError("scale must be scalar.")
|
||
|
self._frozendist.kwds["scale"] = scale
|
||
|
# update the adjusted domain that depends on loc and scale
|
||
|
self._set_domain_adj()
|
||
|
|
||
|
def _set_domain_adj(self):
|
||
|
""" Adjust the domain based on loc and scale. """
|
||
|
loc = self.loc
|
||
|
scale = self.scale
|
||
|
lb = self._domain[0] * scale + loc
|
||
|
ub = self._domain[1] * scale + loc
|
||
|
self._domain_adj = (lb, ub)
|
||
|
|
||
|
def _process_config(self, distname, args):
|
||
|
cfg = PINV_CONFIG[distname]
|
||
|
if "check_pinv_params" in cfg:
|
||
|
if not self._ignore_shape_range:
|
||
|
if not cfg["check_pinv_params"](*args):
|
||
|
msg = ("No generator is defined for the shape parameters "
|
||
|
f"{args}. Use ignore_shape_range to proceed "
|
||
|
"with the selected values.")
|
||
|
raise ValueError(msg)
|
||
|
|
||
|
if "center" in cfg.keys():
|
||
|
if not np.isscalar(cfg["center"]):
|
||
|
self._center = cfg["center"](*args)
|
||
|
else:
|
||
|
self._center = cfg["center"]
|
||
|
else:
|
||
|
self._center = None
|
||
|
self._rvs_transform = cfg.get("rvs_transform", None)
|
||
|
self._rvs_transform_inv = cfg.get("rvs_transform_inv", None)
|
||
|
_mirror_uniform = cfg.get("mirror_uniform", None)
|
||
|
if _mirror_uniform is None:
|
||
|
self._mirror_uniform = False
|
||
|
else:
|
||
|
self._mirror_uniform = _mirror_uniform(*args)
|
||
|
|
||
|
return CustomDistPINV(cfg["pdf"], args)
|
||
|
|
||
|
def rvs(self, size=None):
|
||
|
"""
|
||
|
Sample from the distribution by inversion.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
size : int or tuple, optional
|
||
|
The shape of samples. Default is ``None`` in which case a scalar
|
||
|
sample is returned.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
rvs : array_like
|
||
|
A NumPy array of random variates.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
Random variates are generated by numerical inversion of the CDF, i.e.,
|
||
|
`ppf` computed by `NumericalInversePolynomial` when the class
|
||
|
is instantiated. Note that the
|
||
|
default ``rvs`` method of the rv_continuous class is
|
||
|
overwritten. Hence, a different stream of random numbers is generated
|
||
|
even if the same seed is used.
|
||
|
"""
|
||
|
# note: we cannot use self._rng.rvs directly in case
|
||
|
# self._mirror_uniform is true
|
||
|
u = self.random_state.uniform(size=size)
|
||
|
if self._mirror_uniform:
|
||
|
u = 1 - u
|
||
|
r = self._rng.ppf(u)
|
||
|
if self._rvs_transform is not None:
|
||
|
r = self._rvs_transform(r, *self._frozendist.args)
|
||
|
return self.loc + self.scale * r
|
||
|
|
||
|
def ppf(self, q):
|
||
|
"""
|
||
|
Very fast PPF (inverse CDF) of the distribution which
|
||
|
is a very close approximation of the exact PPF values.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
u : array_like
|
||
|
Array with probabilities.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
ppf : array_like
|
||
|
Quantiles corresponding to the values in `u`.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
The evaluation of the PPF is very fast but it may have a large
|
||
|
relative error in the far tails. The numerical precision of the PPF
|
||
|
is controlled by the u-error, that is,
|
||
|
``max |u - CDF(PPF(u))|`` where the max is taken over points in
|
||
|
the interval [0,1], see `evaluate_error`.
|
||
|
|
||
|
Note that this PPF is designed to generate random samples.
|
||
|
"""
|
||
|
q = np.asarray(q)
|
||
|
if self._mirror_uniform:
|
||
|
x = self._rng.ppf(1 - q)
|
||
|
else:
|
||
|
x = self._rng.ppf(q)
|
||
|
if self._rvs_transform is not None:
|
||
|
x = self._rvs_transform(x, *self._frozendist.args)
|
||
|
return self.scale * x + self.loc
|
||
|
|
||
|
def qrvs(self, size=None, d=None, qmc_engine=None):
|
||
|
"""
|
||
|
Quasi-random variates of the given distribution.
|
||
|
|
||
|
The `qmc_engine` is used to draw uniform quasi-random variates, and
|
||
|
these are converted to quasi-random variates of the given distribution
|
||
|
using inverse transform sampling.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
size : int, tuple of ints, or None; optional
|
||
|
Defines shape of random variates array. Default is ``None``.
|
||
|
d : int or None, optional
|
||
|
Defines dimension of uniform quasi-random variates to be
|
||
|
transformed. Default is ``None``.
|
||
|
qmc_engine : scipy.stats.qmc.QMCEngine(d=1), optional
|
||
|
Defines the object to use for drawing
|
||
|
quasi-random variates. Default is ``None``, which uses
|
||
|
`scipy.stats.qmc.Halton(1)`.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
rvs : ndarray or scalar
|
||
|
Quasi-random variates. See Notes for shape information.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
The shape of the output array depends on `size`, `d`, and `qmc_engine`.
|
||
|
The intent is for the interface to be natural, but the detailed rules
|
||
|
to achieve this are complicated.
|
||
|
|
||
|
- If `qmc_engine` is ``None``, a `scipy.stats.qmc.Halton` instance is
|
||
|
created with dimension `d`. If `d` is not provided, ``d=1``.
|
||
|
- If `qmc_engine` is not ``None`` and `d` is ``None``, `d` is
|
||
|
determined from the dimension of the `qmc_engine`.
|
||
|
- If `qmc_engine` is not ``None`` and `d` is not ``None`` but the
|
||
|
dimensions are inconsistent, a ``ValueError`` is raised.
|
||
|
- After `d` is determined according to the rules above, the output
|
||
|
shape is ``tuple_shape + d_shape``, where:
|
||
|
|
||
|
- ``tuple_shape = tuple()`` if `size` is ``None``,
|
||
|
- ``tuple_shape = (size,)`` if `size` is an ``int``,
|
||
|
- ``tuple_shape = size`` if `size` is a sequence,
|
||
|
- ``d_shape = tuple()`` if `d` is ``None`` or `d` is 1, and
|
||
|
- ``d_shape = (d,)`` if `d` is greater than 1.
|
||
|
|
||
|
The elements of the returned array are part of a low-discrepancy
|
||
|
sequence. If `d` is 1, this means that none of the samples are truly
|
||
|
independent. If `d` > 1, each slice ``rvs[..., i]`` will be of a
|
||
|
quasi-independent sequence; see `scipy.stats.qmc.QMCEngine` for
|
||
|
details. Note that when `d` > 1, the samples returned are still those
|
||
|
of the provided univariate distribution, not a multivariate
|
||
|
generalization of that distribution.
|
||
|
|
||
|
"""
|
||
|
qmc_engine, d = _validate_qmc_input(qmc_engine, d, self.random_state)
|
||
|
# mainly copied from unuran_wrapper.pyx.templ
|
||
|
# `rvs` is flexible about whether `size` is an int or tuple, so this
|
||
|
# should be, too.
|
||
|
try:
|
||
|
if size is None:
|
||
|
tuple_size = (1,)
|
||
|
else:
|
||
|
tuple_size = tuple(size)
|
||
|
except TypeError:
|
||
|
tuple_size = (size,)
|
||
|
# we do not use rng.qrvs directly since we need to be
|
||
|
# able to apply the ppf to 1 - u
|
||
|
N = 1 if size is None else np.prod(size)
|
||
|
u = qmc_engine.random(N)
|
||
|
if self._mirror_uniform:
|
||
|
u = 1 - u
|
||
|
qrvs = self._ppf(u)
|
||
|
if self._rvs_transform is not None:
|
||
|
qrvs = self._rvs_transform(qrvs, *self._frozendist.args)
|
||
|
if size is None:
|
||
|
qrvs = qrvs.squeeze()[()]
|
||
|
else:
|
||
|
if d == 1:
|
||
|
qrvs = qrvs.reshape(tuple_size)
|
||
|
else:
|
||
|
qrvs = qrvs.reshape(tuple_size + (d,))
|
||
|
return self.loc + self.scale * qrvs
|
||
|
|
||
|
def evaluate_error(self, size=100000, random_state=None, x_error=False):
|
||
|
"""
|
||
|
Evaluate the numerical accuracy of the inversion (u- and x-error).
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
size : int, optional
|
||
|
The number of random points over which the error is estimated.
|
||
|
Default is ``100000``.
|
||
|
random_state : {None, int, `numpy.random.Generator`,
|
||
|
`numpy.random.RandomState`}, optional
|
||
|
|
||
|
A NumPy random number generator or seed for the underlying NumPy
|
||
|
random number generator used to generate the stream of uniform
|
||
|
random numbers.
|
||
|
If `random_state` is None, use ``self.random_state``.
|
||
|
If `random_state` is an int,
|
||
|
``np.random.default_rng(random_state)`` is used.
|
||
|
If `random_state` is already a ``Generator`` or ``RandomState``
|
||
|
instance then that instance is used.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
u_error, x_error : tuple of floats
|
||
|
A NumPy array of random variates.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
The numerical precision of the inverse CDF `ppf` is controlled by
|
||
|
the u-error. It is computed as follows:
|
||
|
``max |u - CDF(PPF(u))|`` where the max is taken `size` random
|
||
|
points in the interval [0,1]. `random_state` determines the random
|
||
|
sample. Note that if `ppf` was exact, the u-error would be zero.
|
||
|
|
||
|
The x-error measures the direct distance between the exact PPF
|
||
|
and `ppf`. If ``x_error`` is set to ``True`, it is
|
||
|
computed as the maximum of the minimum of the relative and absolute
|
||
|
x-error:
|
||
|
``max(min(x_error_abs[i], x_error_rel[i]))`` where
|
||
|
``x_error_abs[i] = |PPF(u[i]) - PPF_fast(u[i])|``,
|
||
|
``x_error_rel[i] = max |(PPF(u[i]) - PPF_fast(u[i])) / PPF(u[i])|``.
|
||
|
Note that it is important to consider the relative x-error in the case
|
||
|
that ``PPF(u)`` is close to zero or very large.
|
||
|
|
||
|
By default, only the u-error is evaluated and the x-error is set to
|
||
|
``np.nan``. Note that the evaluation of the x-error will be very slow
|
||
|
if the implementation of the PPF is slow.
|
||
|
|
||
|
Further information about these error measures can be found in [1]_.
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
.. [1] Derflinger, Gerhard, Wolfgang Hörmann, and Josef Leydold.
|
||
|
"Random variate generation by numerical inversion when only the
|
||
|
density is known." ACM Transactions on Modeling and Computer
|
||
|
Simulation (TOMACS) 20.4 (2010): 1-25.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
|
||
|
>>> import numpy as np
|
||
|
>>> from scipy import stats
|
||
|
>>> from scipy.stats.sampling import FastGeneratorInversion
|
||
|
|
||
|
Create an object for the normal distribution:
|
||
|
|
||
|
>>> d_norm_frozen = stats.norm()
|
||
|
>>> d_norm = FastGeneratorInversion(d_norm_frozen)
|
||
|
|
||
|
To confirm that the numerical inversion is accurate, we evaluate the
|
||
|
approximation error (u-error and x-error).
|
||
|
|
||
|
>>> u_error, x_error = d_norm.evaluate_error(x_error=True)
|
||
|
|
||
|
The u-error should be below 1e-10:
|
||
|
|
||
|
>>> u_error
|
||
|
8.785783212061915e-11 # may vary
|
||
|
|
||
|
Compare the PPF against approximation `ppf`:
|
||
|
|
||
|
>>> q = [0.001, 0.2, 0.4, 0.6, 0.8, 0.999]
|
||
|
>>> diff = np.abs(d_norm_frozen.ppf(q) - d_norm.ppf(q))
|
||
|
>>> x_error_abs = np.max(diff)
|
||
|
>>> x_error_abs
|
||
|
1.2937954707581412e-08
|
||
|
|
||
|
This is the absolute x-error evaluated at the points q. The relative
|
||
|
error is given by
|
||
|
|
||
|
>>> x_error_rel = np.max(diff / np.abs(d_norm_frozen.ppf(q)))
|
||
|
>>> x_error_rel
|
||
|
4.186725600453555e-09
|
||
|
|
||
|
The x_error computed above is derived in a very similar way over a
|
||
|
much larger set of random values q. At each value q[i], the minimum
|
||
|
of the relative and absolute error is taken. The final value is then
|
||
|
derived as the maximum of these values. In our example, we get the
|
||
|
following value:
|
||
|
|
||
|
>>> x_error
|
||
|
4.507068014335139e-07 # may vary
|
||
|
|
||
|
"""
|
||
|
if not isinstance(size, (numbers.Integral, np.integer)):
|
||
|
raise ValueError("size must be an integer.")
|
||
|
# urng will be used to draw the samples for testing the error
|
||
|
# it must not interfere with self.random_state. therefore, do not
|
||
|
# call self.rvs, but draw uniform random numbers and apply
|
||
|
# self.ppf (note: like in rvs, consider self._mirror_uniform)
|
||
|
urng = check_random_state_qmc(random_state)
|
||
|
u = urng.uniform(size=size)
|
||
|
if self._mirror_uniform:
|
||
|
u = 1 - u
|
||
|
x = self.ppf(u)
|
||
|
uerr = np.max(np.abs(self._cdf(x) - u))
|
||
|
if not x_error:
|
||
|
return uerr, np.nan
|
||
|
ppf_u = self._ppf(u)
|
||
|
x_error_abs = np.abs(self.ppf(u)-ppf_u)
|
||
|
x_error_rel = x_error_abs / np.abs(ppf_u)
|
||
|
x_error_combined = np.array([x_error_abs, x_error_rel]).min(axis=0)
|
||
|
return uerr, np.max(x_error_combined)
|
||
|
|
||
|
def support(self):
|
||
|
"""Support of the distribution.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
a, b : float
|
||
|
end-points of the distribution's support.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
|
||
|
Note that the support of the distribution depends on `loc`,
|
||
|
`scale` and `domain`.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
|
||
|
>>> from scipy import stats
|
||
|
>>> from scipy.stats.sampling import FastGeneratorInversion
|
||
|
|
||
|
Define a truncated normal distribution:
|
||
|
|
||
|
>>> d_norm = FastGeneratorInversion(stats.norm(), domain=(0, 1))
|
||
|
>>> d_norm.support()
|
||
|
(0, 1)
|
||
|
|
||
|
Shift the distribution:
|
||
|
|
||
|
>>> d_norm.loc = 2.5
|
||
|
>>> d_norm.support()
|
||
|
(2.5, 3.5)
|
||
|
|
||
|
"""
|
||
|
return self._domain_adj
|
||
|
|
||
|
def _cdf(self, x):
|
||
|
"""Cumulative distribution function (CDF)
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x : array_like
|
||
|
The values where the CDF is evaluated
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
y : ndarray
|
||
|
CDF evaluated at x
|
||
|
|
||
|
"""
|
||
|
y = self._frozendist.cdf(x)
|
||
|
if self._p_domain == 1.0:
|
||
|
return y
|
||
|
return np.clip((y - self._p_lower) / self._p_domain, 0, 1)
|
||
|
|
||
|
def _ppf(self, q):
|
||
|
"""Percent point function (inverse of `cdf`)
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
q : array_like
|
||
|
lower tail probability
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
x : array_like
|
||
|
quantile corresponding to the lower tail probability q.
|
||
|
|
||
|
"""
|
||
|
if self._p_domain == 1.0:
|
||
|
return self._frozendist.ppf(q)
|
||
|
x = self._frozendist.ppf(self._p_domain * np.array(q) + self._p_lower)
|
||
|
return np.clip(x, self._domain_adj[0], self._domain_adj[1])
|
||
|
|
||
|
|
||
|
class RatioUniforms:
|
||
|
"""
|
||
|
Generate random samples from a probability density function using the
|
||
|
ratio-of-uniforms method.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
pdf : callable
|
||
|
A function with signature `pdf(x)` that is proportional to the
|
||
|
probability density function of the distribution.
|
||
|
umax : float
|
||
|
The upper bound of the bounding rectangle in the u-direction.
|
||
|
vmin : float
|
||
|
The lower bound of the bounding rectangle in the v-direction.
|
||
|
vmax : float
|
||
|
The upper bound of the bounding rectangle in the v-direction.
|
||
|
c : float, optional.
|
||
|
Shift parameter of ratio-of-uniforms method, see Notes. Default is 0.
|
||
|
random_state : {None, int, `numpy.random.Generator`,
|
||
|
`numpy.random.RandomState`}, optional
|
||
|
|
||
|
If `seed` is None (or `np.random`), the `numpy.random.RandomState`
|
||
|
singleton is used.
|
||
|
If `seed` is an int, a new ``RandomState`` instance is used,
|
||
|
seeded with `seed`.
|
||
|
If `seed` is already a ``Generator`` or ``RandomState`` instance then
|
||
|
that instance is used.
|
||
|
|
||
|
Methods
|
||
|
-------
|
||
|
rvs
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
Given a univariate probability density function `pdf` and a constant `c`,
|
||
|
define the set ``A = {(u, v) : 0 < u <= sqrt(pdf(v/u + c))}``.
|
||
|
If ``(U, V)`` is a random vector uniformly distributed over ``A``,
|
||
|
then ``V/U + c`` follows a distribution according to `pdf`.
|
||
|
|
||
|
The above result (see [1]_, [2]_) can be used to sample random variables
|
||
|
using only the PDF, i.e. no inversion of the CDF is required. Typical
|
||
|
choices of `c` are zero or the mode of `pdf`. The set ``A`` is a subset of
|
||
|
the rectangle ``R = [0, umax] x [vmin, vmax]`` where
|
||
|
|
||
|
- ``umax = sup sqrt(pdf(x))``
|
||
|
- ``vmin = inf (x - c) sqrt(pdf(x))``
|
||
|
- ``vmax = sup (x - c) sqrt(pdf(x))``
|
||
|
|
||
|
In particular, these values are finite if `pdf` is bounded and
|
||
|
``x**2 * pdf(x)`` is bounded (i.e. subquadratic tails).
|
||
|
One can generate ``(U, V)`` uniformly on ``R`` and return
|
||
|
``V/U + c`` if ``(U, V)`` are also in ``A`` which can be directly
|
||
|
verified.
|
||
|
|
||
|
The algorithm is not changed if one replaces `pdf` by k * `pdf` for any
|
||
|
constant k > 0. Thus, it is often convenient to work with a function
|
||
|
that is proportional to the probability density function by dropping
|
||
|
unnecessary normalization factors.
|
||
|
|
||
|
Intuitively, the method works well if ``A`` fills up most of the
|
||
|
enclosing rectangle such that the probability is high that ``(U, V)``
|
||
|
lies in ``A`` whenever it lies in ``R`` as the number of required
|
||
|
iterations becomes too large otherwise. To be more precise, note that
|
||
|
the expected number of iterations to draw ``(U, V)`` uniformly
|
||
|
distributed on ``R`` such that ``(U, V)`` is also in ``A`` is given by
|
||
|
the ratio ``area(R) / area(A) = 2 * umax * (vmax - vmin) / area(pdf)``,
|
||
|
where `area(pdf)` is the integral of `pdf` (which is equal to one if the
|
||
|
probability density function is used but can take on other values if a
|
||
|
function proportional to the density is used). The equality holds since
|
||
|
the area of ``A`` is equal to ``0.5 * area(pdf)`` (Theorem 7.1 in [1]_).
|
||
|
If the sampling fails to generate a single random variate after 50000
|
||
|
iterations (i.e. not a single draw is in ``A``), an exception is raised.
|
||
|
|
||
|
If the bounding rectangle is not correctly specified (i.e. if it does not
|
||
|
contain ``A``), the algorithm samples from a distribution different from
|
||
|
the one given by `pdf`. It is therefore recommended to perform a
|
||
|
test such as `~scipy.stats.kstest` as a check.
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
.. [1] L. Devroye, "Non-Uniform Random Variate Generation",
|
||
|
Springer-Verlag, 1986.
|
||
|
|
||
|
.. [2] W. Hoermann and J. Leydold, "Generating generalized inverse Gaussian
|
||
|
random variates", Statistics and Computing, 24(4), p. 547--557, 2014.
|
||
|
|
||
|
.. [3] A.J. Kinderman and J.F. Monahan, "Computer Generation of Random
|
||
|
Variables Using the Ratio of Uniform Deviates",
|
||
|
ACM Transactions on Mathematical Software, 3(3), p. 257--260, 1977.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> import numpy as np
|
||
|
>>> from scipy import stats
|
||
|
|
||
|
>>> from scipy.stats.sampling import RatioUniforms
|
||
|
>>> rng = np.random.default_rng()
|
||
|
|
||
|
Simulate normally distributed random variables. It is easy to compute the
|
||
|
bounding rectangle explicitly in that case. For simplicity, we drop the
|
||
|
normalization factor of the density.
|
||
|
|
||
|
>>> f = lambda x: np.exp(-x**2 / 2)
|
||
|
>>> v = np.sqrt(f(np.sqrt(2))) * np.sqrt(2)
|
||
|
>>> umax = np.sqrt(f(0))
|
||
|
>>> gen = RatioUniforms(f, umax=umax, vmin=-v, vmax=v, random_state=rng)
|
||
|
>>> r = gen.rvs(size=2500)
|
||
|
|
||
|
The K-S test confirms that the random variates are indeed normally
|
||
|
distributed (normality is not rejected at 5% significance level):
|
||
|
|
||
|
>>> stats.kstest(r, 'norm')[1]
|
||
|
0.250634764150542
|
||
|
|
||
|
The exponential distribution provides another example where the bounding
|
||
|
rectangle can be determined explicitly.
|
||
|
|
||
|
>>> gen = RatioUniforms(lambda x: np.exp(-x), umax=1, vmin=0,
|
||
|
... vmax=2*np.exp(-1), random_state=rng)
|
||
|
>>> r = gen.rvs(1000)
|
||
|
>>> stats.kstest(r, 'expon')[1]
|
||
|
0.21121052054580314
|
||
|
|
||
|
"""
|
||
|
|
||
|
def __init__(self, pdf, *, umax, vmin, vmax, c=0, random_state=None):
|
||
|
if vmin >= vmax:
|
||
|
raise ValueError("vmin must be smaller than vmax.")
|
||
|
|
||
|
if umax <= 0:
|
||
|
raise ValueError("umax must be positive.")
|
||
|
|
||
|
self._pdf = pdf
|
||
|
self._umax = umax
|
||
|
self._vmin = vmin
|
||
|
self._vmax = vmax
|
||
|
self._c = c
|
||
|
self._rng = check_random_state(random_state)
|
||
|
|
||
|
def rvs(self, size=1):
|
||
|
"""Sampling of random variates
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
size : int or tuple of ints, optional
|
||
|
Number of random variates to be generated (default is 1).
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
rvs : ndarray
|
||
|
The random variates distributed according to the probability
|
||
|
distribution defined by the pdf.
|
||
|
|
||
|
"""
|
||
|
size1d = tuple(np.atleast_1d(size))
|
||
|
N = np.prod(size1d) # number of rvs needed, reshape upon return
|
||
|
|
||
|
# start sampling using ratio of uniforms method
|
||
|
x = np.zeros(N)
|
||
|
simulated, i = 0, 1
|
||
|
|
||
|
# loop until N rvs have been generated: expected runtime is finite.
|
||
|
# to avoid infinite loop, raise exception if not a single rv has been
|
||
|
# generated after 50000 tries. even if the expected number of iterations
|
||
|
# is 1000, the probability of this event is (1-1/1000)**50000
|
||
|
# which is of order 10e-22
|
||
|
while simulated < N:
|
||
|
k = N - simulated
|
||
|
# simulate uniform rvs on [0, umax] and [vmin, vmax]
|
||
|
u1 = self._umax * self._rng.uniform(size=k)
|
||
|
v1 = self._rng.uniform(self._vmin, self._vmax, size=k)
|
||
|
# apply rejection method
|
||
|
rvs = v1 / u1 + self._c
|
||
|
accept = (u1**2 <= self._pdf(rvs))
|
||
|
num_accept = np.sum(accept)
|
||
|
if num_accept > 0:
|
||
|
x[simulated:(simulated + num_accept)] = rvs[accept]
|
||
|
simulated += num_accept
|
||
|
|
||
|
if (simulated == 0) and (i*N >= 50000):
|
||
|
msg = (
|
||
|
f"Not a single random variate could be generated in {i*N} "
|
||
|
"attempts. The ratio of uniforms method does not appear "
|
||
|
"to work for the provided parameters. Please check the "
|
||
|
"pdf and the bounds."
|
||
|
)
|
||
|
raise RuntimeError(msg)
|
||
|
i += 1
|
||
|
|
||
|
return np.reshape(x, size1d)
|