213 lines
6.6 KiB
Python
213 lines
6.6 KiB
Python
"""Low discrepancy sequence tools."""
|
|
import numpy as np
|
|
|
|
|
|
def discrepancy(sample, bounds=None):
|
|
"""Discrepancy.
|
|
|
|
Compute the centered discrepancy on a given sample.
|
|
It is a measure of the uniformity of the points in the parameter space.
|
|
The lower the value is, the better the coverage of the parameter space is.
|
|
|
|
Parameters
|
|
----------
|
|
sample : array_like (n_samples, k_vars)
|
|
The sample to compute the discrepancy from.
|
|
bounds : tuple or array_like ([min, k_vars], [max, k_vars])
|
|
Desired range of transformed data. The transformation apply the bounds
|
|
on the sample and not the theoretical space, unit cube. Thus min and
|
|
max values of the sample will coincide with the bounds.
|
|
|
|
Returns
|
|
-------
|
|
discrepancy : float
|
|
Centered discrepancy.
|
|
|
|
References
|
|
----------
|
|
[1] Fang et al. "Design and modeling for computer experiments",
|
|
Computer Science and Data Analysis Series Science and Data Analysis
|
|
Series, 2006.
|
|
"""
|
|
sample = np.asarray(sample)
|
|
n_sample, dim = sample.shape
|
|
|
|
# Sample scaling from bounds to unit hypercube
|
|
if bounds is not None:
|
|
min_ = bounds.min(axis=0)
|
|
max_ = bounds.max(axis=0)
|
|
sample = (sample - min_) / (max_ - min_)
|
|
|
|
abs_ = abs(sample - 0.5)
|
|
disc1 = np.sum(np.prod(1 + 0.5 * abs_ - 0.5 * abs_ ** 2, axis=1))
|
|
|
|
prod_arr = 1
|
|
for i in range(dim):
|
|
s0 = sample[:, i]
|
|
prod_arr *= (1 +
|
|
0.5 * abs(s0[:, None] - 0.5) + 0.5 * abs(s0 - 0.5) -
|
|
0.5 * abs(s0[:, None] - s0))
|
|
disc2 = prod_arr.sum()
|
|
|
|
c2 = ((13.0 / 12.0) ** dim - 2.0 / n_sample * disc1 +
|
|
1.0 / (n_sample ** 2) * disc2)
|
|
|
|
return c2
|
|
|
|
|
|
def primes_from_2_to(n):
|
|
"""Prime numbers from 2 to *n*.
|
|
|
|
Parameters
|
|
----------
|
|
n : int
|
|
Sup bound with ``n >= 6``.
|
|
|
|
Returns
|
|
-------
|
|
primes : list(int)
|
|
Primes in ``2 <= p < n``.
|
|
|
|
References
|
|
----------
|
|
[1] `StackOverflow <https://stackoverflow.com/questions/2068372>`_.
|
|
"""
|
|
sieve = np.ones(n // 3 + (n % 6 == 2), dtype=bool)
|
|
for i in range(1, int(n ** 0.5) // 3 + 1):
|
|
if sieve[i]:
|
|
k = 3 * i + 1 | 1
|
|
sieve[k * k // 3::2 * k] = False
|
|
sieve[k * (k - 2 * (i & 1) + 4) // 3::2 * k] = False
|
|
return np.r_[2, 3, ((3 * np.nonzero(sieve)[0][1:] + 1) | 1)]
|
|
|
|
|
|
def n_primes(n):
|
|
"""List of the n-first prime numbers.
|
|
|
|
Parameters
|
|
----------
|
|
n : int
|
|
Number of prime numbers wanted.
|
|
|
|
Returns
|
|
-------
|
|
primes : list(int)
|
|
List of primes.
|
|
"""
|
|
primes = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59,
|
|
61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127,
|
|
131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193,
|
|
197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269,
|
|
271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349,
|
|
353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431,
|
|
433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503,
|
|
509, 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599,
|
|
601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661, 673,
|
|
677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751, 757, 761,
|
|
769, 773, 787, 797, 809, 811, 821, 823, 827, 829, 839, 853, 857,
|
|
859, 863, 877, 881, 883, 887, 907, 911, 919, 929, 937, 941, 947,
|
|
953, 967, 971, 977, 983, 991, 997][:n]
|
|
|
|
if len(primes) < n:
|
|
big_number = 10
|
|
while 'Not enought primes':
|
|
primes = primes_from_2_to(big_number)[:n]
|
|
if len(primes) == n:
|
|
break
|
|
big_number += 1000
|
|
|
|
return primes
|
|
|
|
|
|
def van_der_corput(n_sample, base=2, start_index=0):
|
|
"""Van der Corput sequence.
|
|
|
|
Pseudo-random number generator based on a b-adic expansion.
|
|
|
|
Parameters
|
|
----------
|
|
n_sample : int
|
|
Number of element of the sequence.
|
|
base : int
|
|
Base of the sequence.
|
|
start_index : int
|
|
Index to start the sequence from.
|
|
|
|
Returns
|
|
-------
|
|
sequence : list (n_samples,)
|
|
Sequence of Van der Corput.
|
|
"""
|
|
sequence = []
|
|
for i in range(start_index, start_index + n_sample):
|
|
n_th_number, denom = 0., 1.
|
|
quotient = i
|
|
while quotient > 0:
|
|
quotient, remainder = divmod(quotient, base)
|
|
denom *= base
|
|
n_th_number += remainder / denom
|
|
sequence.append(n_th_number)
|
|
|
|
return sequence
|
|
|
|
|
|
def halton(dim, n_sample, bounds=None, start_index=0):
|
|
"""Halton sequence.
|
|
|
|
Pseudo-random number generator that generalize the Van der Corput sequence
|
|
for multiple dimensions. Halton sequence use base-two Van der Corput
|
|
sequence for the first dimension, base-three for its second and base-n for
|
|
its n-dimension.
|
|
|
|
Parameters
|
|
----------
|
|
dim : int
|
|
Dimension of the parameter space.
|
|
n_sample : int
|
|
Number of samples to generate in the parametr space.
|
|
bounds : tuple or array_like ([min, k_vars], [max, k_vars])
|
|
Desired range of transformed data. The transformation apply the bounds
|
|
on the sample and not the theoretical space, unit cube. Thus min and
|
|
max values of the sample will coincide with the bounds.
|
|
start_index : int
|
|
Index to start the sequence from.
|
|
|
|
Returns
|
|
-------
|
|
sequence : array_like (n_samples, k_vars)
|
|
Sequence of Halton.
|
|
|
|
References
|
|
----------
|
|
[1] Halton, "On the efficiency of certain quasi-random sequences of points
|
|
in evaluating multi-dimensional integrals", Numerische Mathematik, 1960.
|
|
|
|
Examples
|
|
--------
|
|
Generate samples from a low discrepancy sequence of Halton.
|
|
|
|
>>> from statsmodels.tools import sequences
|
|
>>> sample = sequences.halton(dim=2, n_sample=5)
|
|
|
|
Compute the quality of the sample using the discrepancy criterion.
|
|
|
|
>>> uniformity = sequences.discrepancy(sample)
|
|
|
|
If some wants to continue an existing design, extra points can be obtained.
|
|
|
|
>>> sample_continued = sequences.halton(dim=2, n_sample=5, start_index=5)
|
|
"""
|
|
base = n_primes(dim)
|
|
|
|
# Generate a sample using a Van der Corput sequence per dimension.
|
|
sample = [van_der_corput(n_sample + 1, bdim, start_index) for bdim in base]
|
|
sample = np.array(sample).T[1:]
|
|
|
|
# Sample scaling from unit hypercube to feature range
|
|
if bounds is not None:
|
|
min_ = bounds.min(axis=0)
|
|
max_ = bounds.max(axis=0)
|
|
sample = sample * (max_ - min_) + min_
|
|
|
|
return sample
|