AIM-PIbd-32-Kurbanova-A-A/aimenv/Lib/site-packages/seaborn/algorithms.py

121 lines
4.1 KiB
Python
Raw Normal View History

2024-10-02 22:15:59 +04:00
"""Algorithms to support fitting routines in seaborn plotting functions."""
import numpy as np
import warnings
def bootstrap(*args, **kwargs):
"""Resample one or more arrays with replacement and store aggregate values.
Positional arguments are a sequence of arrays to bootstrap along the first
axis and pass to a summary function.
Keyword arguments:
n_boot : int, default=10000
Number of iterations
axis : int, default=None
Will pass axis to ``func`` as a keyword argument.
units : array, default=None
Array of sampling unit IDs. When used the bootstrap resamples units
and then observations within units instead of individual
datapoints.
func : string or callable, default="mean"
Function to call on the args that are passed in. If string, uses as
name of function in the numpy namespace. If nans are present in the
data, will try to use nan-aware version of named function.
seed : Generator | SeedSequence | RandomState | int | None
Seed for the random number generator; useful if you want
reproducible resamples.
Returns
-------
boot_dist: array
array of bootstrapped statistic values
"""
# Ensure list of arrays are same length
if len(np.unique(list(map(len, args)))) > 1:
raise ValueError("All input arrays must have the same length")
n = len(args[0])
# Default keyword arguments
n_boot = kwargs.get("n_boot", 10000)
func = kwargs.get("func", "mean")
axis = kwargs.get("axis", None)
units = kwargs.get("units", None)
random_seed = kwargs.get("random_seed", None)
if random_seed is not None:
msg = "`random_seed` has been renamed to `seed` and will be removed"
warnings.warn(msg)
seed = kwargs.get("seed", random_seed)
if axis is None:
func_kwargs = dict()
else:
func_kwargs = dict(axis=axis)
# Initialize the resampler
if isinstance(seed, np.random.RandomState):
rng = seed
else:
rng = np.random.default_rng(seed)
# Coerce to arrays
args = list(map(np.asarray, args))
if units is not None:
units = np.asarray(units)
if isinstance(func, str):
# Allow named numpy functions
f = getattr(np, func)
# Try to use nan-aware version of function if necessary
missing_data = np.isnan(np.sum(np.column_stack(args)))
if missing_data and not func.startswith("nan"):
nanf = getattr(np, f"nan{func}", None)
if nanf is None:
msg = f"Data contain nans but no nan-aware version of `{func}` found"
warnings.warn(msg, UserWarning)
else:
f = nanf
else:
f = func
# Handle numpy changes
try:
integers = rng.integers
except AttributeError:
integers = rng.randint
# Do the bootstrap
if units is not None:
return _structured_bootstrap(args, n_boot, units, f,
func_kwargs, integers)
boot_dist = []
for i in range(int(n_boot)):
resampler = integers(0, n, n, dtype=np.intp) # intp is indexing dtype
sample = [a.take(resampler, axis=0) for a in args]
boot_dist.append(f(*sample, **func_kwargs))
return np.array(boot_dist)
def _structured_bootstrap(args, n_boot, units, func, func_kwargs, integers):
"""Resample units instead of datapoints."""
unique_units = np.unique(units)
n_units = len(unique_units)
args = [[a[units == unit] for unit in unique_units] for a in args]
boot_dist = []
for i in range(int(n_boot)):
resampler = integers(0, n_units, n_units, dtype=np.intp)
sample = [[a[i] for i in resampler] for a in args]
lengths = map(len, sample[0])
resampler = [integers(0, n, n, dtype=np.intp) for n in lengths]
sample = [[c.take(r, axis=0) for c, r in zip(a, resampler)] for a in sample]
sample = list(map(np.concatenate, sample))
boot_dist.append(func(*sample, **func_kwargs))
return np.array(boot_dist)