4094 lines
143 KiB
Python
4094 lines
143 KiB
Python
#__docformat__ = "restructuredtext en"
|
|
# ******NOTICE***************
|
|
# optimize.py module by Travis E. Oliphant
|
|
#
|
|
# You may copy and use this module as you see fit with no
|
|
# guarantee implied provided you keep this notice in all copies.
|
|
# *****END NOTICE************
|
|
|
|
# A collection of optimization algorithms. Version 0.5
|
|
# CHANGES
|
|
# Added fminbound (July 2001)
|
|
# Added brute (Aug. 2002)
|
|
# Finished line search satisfying strong Wolfe conditions (Mar. 2004)
|
|
# Updated strong Wolfe conditions line search to use
|
|
# cubic-interpolation (Mar. 2004)
|
|
|
|
|
|
# Minimization routines
|
|
|
|
__all__ = ['fmin', 'fmin_powell', 'fmin_bfgs', 'fmin_ncg', 'fmin_cg',
|
|
'fminbound', 'brent', 'golden', 'bracket', 'rosen', 'rosen_der',
|
|
'rosen_hess', 'rosen_hess_prod', 'brute', 'approx_fprime',
|
|
'line_search', 'check_grad', 'OptimizeResult', 'show_options',
|
|
'OptimizeWarning']
|
|
|
|
__docformat__ = "restructuredtext en"
|
|
|
|
import math
|
|
import warnings
|
|
import sys
|
|
import inspect
|
|
from numpy import (atleast_1d, eye, argmin, zeros, shape, squeeze,
|
|
asarray, sqrt)
|
|
import numpy as np
|
|
from scipy.linalg import cholesky, issymmetric, LinAlgError
|
|
from scipy.sparse.linalg import LinearOperator
|
|
from ._linesearch import (line_search_wolfe1, line_search_wolfe2,
|
|
line_search_wolfe2 as line_search,
|
|
LineSearchWarning)
|
|
from ._numdiff import approx_derivative
|
|
from scipy._lib._util import getfullargspec_no_self as _getfullargspec
|
|
from scipy._lib._util import (MapWrapper, check_random_state, _RichResult,
|
|
_call_callback_maybe_halt)
|
|
from scipy.optimize._differentiable_functions import ScalarFunction, FD_METHODS
|
|
|
|
|
|
# standard status messages of optimizers
|
|
_status_message = {'success': 'Optimization terminated successfully.',
|
|
'maxfev': 'Maximum number of function evaluations has '
|
|
'been exceeded.',
|
|
'maxiter': 'Maximum number of iterations has been '
|
|
'exceeded.',
|
|
'pr_loss': 'Desired error not necessarily achieved due '
|
|
'to precision loss.',
|
|
'nan': 'NaN result encountered.',
|
|
'out_of_bounds': 'The result is outside of the provided '
|
|
'bounds.'}
|
|
|
|
|
|
class MemoizeJac:
|
|
""" Decorator that caches the return values of a function returning `(fun, grad)`
|
|
each time it is called. """
|
|
|
|
def __init__(self, fun):
|
|
self.fun = fun
|
|
self.jac = None
|
|
self._value = None
|
|
self.x = None
|
|
|
|
def _compute_if_needed(self, x, *args):
|
|
if not np.all(x == self.x) or self._value is None or self.jac is None:
|
|
self.x = np.asarray(x).copy()
|
|
fg = self.fun(x, *args)
|
|
self.jac = fg[1]
|
|
self._value = fg[0]
|
|
|
|
def __call__(self, x, *args):
|
|
""" returns the function value """
|
|
self._compute_if_needed(x, *args)
|
|
return self._value
|
|
|
|
def derivative(self, x, *args):
|
|
self._compute_if_needed(x, *args)
|
|
return self.jac
|
|
|
|
|
|
def _wrap_callback(callback, method=None):
|
|
"""Wrap a user-provided callback so that attributes can be attached."""
|
|
if callback is None or method in {'tnc', 'slsqp', 'cobyla', 'cobyqa'}:
|
|
return callback # don't wrap
|
|
|
|
sig = inspect.signature(callback)
|
|
|
|
if set(sig.parameters) == {'intermediate_result'}:
|
|
def wrapped_callback(res):
|
|
return callback(intermediate_result=res)
|
|
elif method == 'trust-constr':
|
|
def wrapped_callback(res):
|
|
return callback(np.copy(res.x), res)
|
|
elif method == 'differential_evolution':
|
|
def wrapped_callback(res):
|
|
return callback(np.copy(res.x), res.convergence)
|
|
else:
|
|
def wrapped_callback(res):
|
|
return callback(np.copy(res.x))
|
|
|
|
wrapped_callback.stop_iteration = False
|
|
return wrapped_callback
|
|
|
|
|
|
class OptimizeResult(_RichResult):
|
|
"""
|
|
Represents the optimization result.
|
|
|
|
Attributes
|
|
----------
|
|
x : ndarray
|
|
The solution of the optimization.
|
|
success : bool
|
|
Whether or not the optimizer exited successfully.
|
|
status : int
|
|
Termination status of the optimizer. Its value depends on the
|
|
underlying solver. Refer to `message` for details.
|
|
message : str
|
|
Description of the cause of the termination.
|
|
fun, jac, hess: ndarray
|
|
Values of objective function, its Jacobian and its Hessian (if
|
|
available). The Hessians may be approximations, see the documentation
|
|
of the function in question.
|
|
hess_inv : object
|
|
Inverse of the objective function's Hessian; may be an approximation.
|
|
Not available for all solvers. The type of this attribute may be
|
|
either np.ndarray or scipy.sparse.linalg.LinearOperator.
|
|
nfev, njev, nhev : int
|
|
Number of evaluations of the objective functions and of its
|
|
Jacobian and Hessian.
|
|
nit : int
|
|
Number of iterations performed by the optimizer.
|
|
maxcv : float
|
|
The maximum constraint violation.
|
|
|
|
Notes
|
|
-----
|
|
Depending on the specific solver being used, `OptimizeResult` may
|
|
not have all attributes listed here, and they may have additional
|
|
attributes not listed here. Since this class is essentially a
|
|
subclass of dict with attribute accessors, one can see which
|
|
attributes are available using the `OptimizeResult.keys` method.
|
|
|
|
"""
|
|
pass
|
|
|
|
|
|
class OptimizeWarning(UserWarning):
|
|
pass
|
|
|
|
def _check_positive_definite(Hk):
|
|
def is_pos_def(A):
|
|
if issymmetric(A):
|
|
try:
|
|
cholesky(A)
|
|
return True
|
|
except LinAlgError:
|
|
return False
|
|
else:
|
|
return False
|
|
if Hk is not None:
|
|
if not is_pos_def(Hk):
|
|
raise ValueError("'hess_inv0' matrix isn't positive definite.")
|
|
|
|
|
|
def _check_unknown_options(unknown_options):
|
|
if unknown_options:
|
|
msg = ", ".join(map(str, unknown_options.keys()))
|
|
# Stack level 4: this is called from _minimize_*, which is
|
|
# called from another function in SciPy. Level 4 is the first
|
|
# level in user code.
|
|
warnings.warn("Unknown solver options: %s" % msg, OptimizeWarning, stacklevel=4)
|
|
|
|
|
|
def is_finite_scalar(x):
|
|
"""Test whether `x` is either a finite scalar or a finite array scalar.
|
|
|
|
"""
|
|
return np.size(x) == 1 and np.isfinite(x)
|
|
|
|
|
|
_epsilon = sqrt(np.finfo(float).eps)
|
|
|
|
|
|
def vecnorm(x, ord=2):
|
|
if ord == np.inf:
|
|
return np.amax(np.abs(x))
|
|
elif ord == -np.inf:
|
|
return np.amin(np.abs(x))
|
|
else:
|
|
return np.sum(np.abs(x)**ord, axis=0)**(1.0 / ord)
|
|
|
|
|
|
def _prepare_scalar_function(fun, x0, jac=None, args=(), bounds=None,
|
|
epsilon=None, finite_diff_rel_step=None,
|
|
hess=None):
|
|
"""
|
|
Creates a ScalarFunction object for use with scalar minimizers
|
|
(BFGS/LBFGSB/SLSQP/TNC/CG/etc).
|
|
|
|
Parameters
|
|
----------
|
|
fun : callable
|
|
The objective function to be minimized.
|
|
|
|
``fun(x, *args) -> float``
|
|
|
|
where ``x`` is an 1-D array with shape (n,) and ``args``
|
|
is a tuple of the fixed parameters needed to completely
|
|
specify the function.
|
|
x0 : ndarray, shape (n,)
|
|
Initial guess. Array of real elements of size (n,),
|
|
where 'n' is the number of independent variables.
|
|
jac : {callable, '2-point', '3-point', 'cs', None}, optional
|
|
Method for computing the gradient vector. If it is a callable, it
|
|
should be a function that returns the gradient vector:
|
|
|
|
``jac(x, *args) -> array_like, shape (n,)``
|
|
|
|
If one of `{'2-point', '3-point', 'cs'}` is selected then the gradient
|
|
is calculated with a relative step for finite differences. If `None`,
|
|
then two-point finite differences with an absolute step is used.
|
|
args : tuple, optional
|
|
Extra arguments passed to the objective function and its
|
|
derivatives (`fun`, `jac` functions).
|
|
bounds : sequence, optional
|
|
Bounds on variables. 'new-style' bounds are required.
|
|
eps : float or ndarray
|
|
If `jac is None` the absolute step size used for numerical
|
|
approximation of the jacobian via forward differences.
|
|
finite_diff_rel_step : None or array_like, optional
|
|
If `jac in ['2-point', '3-point', 'cs']` the relative step size to
|
|
use for numerical approximation of the jacobian. The absolute step
|
|
size is computed as ``h = rel_step * sign(x0) * max(1, abs(x0))``,
|
|
possibly adjusted to fit into the bounds. For ``jac='3-point'``
|
|
the sign of `h` is ignored. If None (default) then step is selected
|
|
automatically.
|
|
hess : {callable, '2-point', '3-point', 'cs', None}
|
|
Computes the Hessian matrix. If it is callable, it should return the
|
|
Hessian matrix:
|
|
|
|
``hess(x, *args) -> {LinearOperator, spmatrix, array}, (n, n)``
|
|
|
|
Alternatively, the keywords {'2-point', '3-point', 'cs'} select a
|
|
finite difference scheme for numerical estimation.
|
|
Whenever the gradient is estimated via finite-differences, the Hessian
|
|
cannot be estimated with options {'2-point', '3-point', 'cs'} and needs
|
|
to be estimated using one of the quasi-Newton strategies.
|
|
|
|
Returns
|
|
-------
|
|
sf : ScalarFunction
|
|
"""
|
|
if callable(jac):
|
|
grad = jac
|
|
elif jac in FD_METHODS:
|
|
# epsilon is set to None so that ScalarFunction is made to use
|
|
# rel_step
|
|
epsilon = None
|
|
grad = jac
|
|
else:
|
|
# default (jac is None) is to do 2-point finite differences with
|
|
# absolute step size. ScalarFunction has to be provided an
|
|
# epsilon value that is not None to use absolute steps. This is
|
|
# normally the case from most _minimize* methods.
|
|
grad = '2-point'
|
|
epsilon = epsilon
|
|
|
|
if hess is None:
|
|
# ScalarFunction requires something for hess, so we give a dummy
|
|
# implementation here if nothing is provided, return a value of None
|
|
# so that downstream minimisers halt. The results of `fun.hess`
|
|
# should not be used.
|
|
def hess(x, *args):
|
|
return None
|
|
|
|
if bounds is None:
|
|
bounds = (-np.inf, np.inf)
|
|
|
|
# ScalarFunction caches. Reuse of fun(x) during grad
|
|
# calculation reduces overall function evaluations.
|
|
sf = ScalarFunction(fun, x0, args, grad, hess,
|
|
finite_diff_rel_step, bounds, epsilon=epsilon)
|
|
|
|
return sf
|
|
|
|
|
|
def _clip_x_for_func(func, bounds):
|
|
# ensures that x values sent to func are clipped to bounds
|
|
|
|
# this is used as a mitigation for gh11403, slsqp/tnc sometimes
|
|
# suggest a move that is outside the limits by 1 or 2 ULP. This
|
|
# unclean fix makes sure x is strictly within bounds.
|
|
def eval(x):
|
|
x = _check_clip_x(x, bounds)
|
|
return func(x)
|
|
|
|
return eval
|
|
|
|
|
|
def _check_clip_x(x, bounds):
|
|
if (x < bounds[0]).any() or (x > bounds[1]).any():
|
|
warnings.warn("Values in x were outside bounds during a "
|
|
"minimize step, clipping to bounds",
|
|
RuntimeWarning, stacklevel=3)
|
|
x = np.clip(x, bounds[0], bounds[1])
|
|
return x
|
|
|
|
return x
|
|
|
|
|
|
def rosen(x):
|
|
"""
|
|
The Rosenbrock function.
|
|
|
|
The function computed is::
|
|
|
|
sum(100.0*(x[1:] - x[:-1]**2.0)**2.0 + (1 - x[:-1])**2.0)
|
|
|
|
Parameters
|
|
----------
|
|
x : array_like
|
|
1-D array of points at which the Rosenbrock function is to be computed.
|
|
|
|
Returns
|
|
-------
|
|
f : float
|
|
The value of the Rosenbrock function.
|
|
|
|
See Also
|
|
--------
|
|
rosen_der, rosen_hess, rosen_hess_prod
|
|
|
|
Examples
|
|
--------
|
|
>>> import numpy as np
|
|
>>> from scipy.optimize import rosen
|
|
>>> X = 0.1 * np.arange(10)
|
|
>>> rosen(X)
|
|
76.56
|
|
|
|
For higher-dimensional input ``rosen`` broadcasts.
|
|
In the following example, we use this to plot a 2D landscape.
|
|
Note that ``rosen_hess`` does not broadcast in this manner.
|
|
|
|
>>> import matplotlib.pyplot as plt
|
|
>>> from mpl_toolkits.mplot3d import Axes3D
|
|
>>> x = np.linspace(-1, 1, 50)
|
|
>>> X, Y = np.meshgrid(x, x)
|
|
>>> ax = plt.subplot(111, projection='3d')
|
|
>>> ax.plot_surface(X, Y, rosen([X, Y]))
|
|
>>> plt.show()
|
|
"""
|
|
x = asarray(x)
|
|
r = np.sum(100.0 * (x[1:] - x[:-1]**2.0)**2.0 + (1 - x[:-1])**2.0,
|
|
axis=0)
|
|
return r
|
|
|
|
|
|
def rosen_der(x):
|
|
"""
|
|
The derivative (i.e. gradient) of the Rosenbrock function.
|
|
|
|
Parameters
|
|
----------
|
|
x : array_like
|
|
1-D array of points at which the derivative is to be computed.
|
|
|
|
Returns
|
|
-------
|
|
rosen_der : (N,) ndarray
|
|
The gradient of the Rosenbrock function at `x`.
|
|
|
|
See Also
|
|
--------
|
|
rosen, rosen_hess, rosen_hess_prod
|
|
|
|
Examples
|
|
--------
|
|
>>> import numpy as np
|
|
>>> from scipy.optimize import rosen_der
|
|
>>> X = 0.1 * np.arange(9)
|
|
>>> rosen_der(X)
|
|
array([ -2. , 10.6, 15.6, 13.4, 6.4, -3. , -12.4, -19.4, 62. ])
|
|
|
|
"""
|
|
x = asarray(x)
|
|
xm = x[1:-1]
|
|
xm_m1 = x[:-2]
|
|
xm_p1 = x[2:]
|
|
der = np.zeros_like(x)
|
|
der[1:-1] = (200 * (xm - xm_m1**2) -
|
|
400 * (xm_p1 - xm**2) * xm - 2 * (1 - xm))
|
|
der[0] = -400 * x[0] * (x[1] - x[0]**2) - 2 * (1 - x[0])
|
|
der[-1] = 200 * (x[-1] - x[-2]**2)
|
|
return der
|
|
|
|
|
|
def rosen_hess(x):
|
|
"""
|
|
The Hessian matrix of the Rosenbrock function.
|
|
|
|
Parameters
|
|
----------
|
|
x : array_like
|
|
1-D array of points at which the Hessian matrix is to be computed.
|
|
|
|
Returns
|
|
-------
|
|
rosen_hess : ndarray
|
|
The Hessian matrix of the Rosenbrock function at `x`.
|
|
|
|
See Also
|
|
--------
|
|
rosen, rosen_der, rosen_hess_prod
|
|
|
|
Examples
|
|
--------
|
|
>>> import numpy as np
|
|
>>> from scipy.optimize import rosen_hess
|
|
>>> X = 0.1 * np.arange(4)
|
|
>>> rosen_hess(X)
|
|
array([[-38., 0., 0., 0.],
|
|
[ 0., 134., -40., 0.],
|
|
[ 0., -40., 130., -80.],
|
|
[ 0., 0., -80., 200.]])
|
|
|
|
"""
|
|
x = atleast_1d(x)
|
|
H = np.diag(-400 * x[:-1], 1) - np.diag(400 * x[:-1], -1)
|
|
diagonal = np.zeros(len(x), dtype=x.dtype)
|
|
diagonal[0] = 1200 * x[0]**2 - 400 * x[1] + 2
|
|
diagonal[-1] = 200
|
|
diagonal[1:-1] = 202 + 1200 * x[1:-1]**2 - 400 * x[2:]
|
|
H = H + np.diag(diagonal)
|
|
return H
|
|
|
|
|
|
def rosen_hess_prod(x, p):
|
|
"""
|
|
Product of the Hessian matrix of the Rosenbrock function with a vector.
|
|
|
|
Parameters
|
|
----------
|
|
x : array_like
|
|
1-D array of points at which the Hessian matrix is to be computed.
|
|
p : array_like
|
|
1-D array, the vector to be multiplied by the Hessian matrix.
|
|
|
|
Returns
|
|
-------
|
|
rosen_hess_prod : ndarray
|
|
The Hessian matrix of the Rosenbrock function at `x` multiplied
|
|
by the vector `p`.
|
|
|
|
See Also
|
|
--------
|
|
rosen, rosen_der, rosen_hess
|
|
|
|
Examples
|
|
--------
|
|
>>> import numpy as np
|
|
>>> from scipy.optimize import rosen_hess_prod
|
|
>>> X = 0.1 * np.arange(9)
|
|
>>> p = 0.5 * np.arange(9)
|
|
>>> rosen_hess_prod(X, p)
|
|
array([ -0., 27., -10., -95., -192., -265., -278., -195., -180.])
|
|
|
|
"""
|
|
x = atleast_1d(x)
|
|
Hp = np.zeros(len(x), dtype=x.dtype)
|
|
Hp[0] = (1200 * x[0]**2 - 400 * x[1] + 2) * p[0] - 400 * x[0] * p[1]
|
|
Hp[1:-1] = (-400 * x[:-2] * p[:-2] +
|
|
(202 + 1200 * x[1:-1]**2 - 400 * x[2:]) * p[1:-1] -
|
|
400 * x[1:-1] * p[2:])
|
|
Hp[-1] = -400 * x[-2] * p[-2] + 200*p[-1]
|
|
return Hp
|
|
|
|
|
|
def _wrap_scalar_function(function, args):
|
|
# wraps a minimizer function to count number of evaluations
|
|
# and to easily provide an args kwd.
|
|
ncalls = [0]
|
|
if function is None:
|
|
return ncalls, None
|
|
|
|
def function_wrapper(x, *wrapper_args):
|
|
ncalls[0] += 1
|
|
# A copy of x is sent to the user function (gh13740)
|
|
fx = function(np.copy(x), *(wrapper_args + args))
|
|
# Ideally, we'd like to a have a true scalar returned from f(x). For
|
|
# backwards-compatibility, also allow np.array([1.3]), np.array([[1.3]]) etc.
|
|
if not np.isscalar(fx):
|
|
try:
|
|
fx = np.asarray(fx).item()
|
|
except (TypeError, ValueError) as e:
|
|
raise ValueError("The user-provided objective function "
|
|
"must return a scalar value.") from e
|
|
return fx
|
|
|
|
return ncalls, function_wrapper
|
|
|
|
|
|
class _MaxFuncCallError(RuntimeError):
|
|
pass
|
|
|
|
|
|
def _wrap_scalar_function_maxfun_validation(function, args, maxfun):
|
|
# wraps a minimizer function to count number of evaluations
|
|
# and to easily provide an args kwd.
|
|
ncalls = [0]
|
|
if function is None:
|
|
return ncalls, None
|
|
|
|
def function_wrapper(x, *wrapper_args):
|
|
if ncalls[0] >= maxfun:
|
|
raise _MaxFuncCallError("Too many function calls")
|
|
ncalls[0] += 1
|
|
# A copy of x is sent to the user function (gh13740)
|
|
fx = function(np.copy(x), *(wrapper_args + args))
|
|
# Ideally, we'd like to a have a true scalar returned from f(x). For
|
|
# backwards-compatibility, also allow np.array([1.3]),
|
|
# np.array([[1.3]]) etc.
|
|
if not np.isscalar(fx):
|
|
try:
|
|
fx = np.asarray(fx).item()
|
|
except (TypeError, ValueError) as e:
|
|
raise ValueError("The user-provided objective function "
|
|
"must return a scalar value.") from e
|
|
return fx
|
|
|
|
return ncalls, function_wrapper
|
|
|
|
|
|
def fmin(func, x0, args=(), xtol=1e-4, ftol=1e-4, maxiter=None, maxfun=None,
|
|
full_output=0, disp=1, retall=0, callback=None, initial_simplex=None):
|
|
"""
|
|
Minimize a function using the downhill simplex algorithm.
|
|
|
|
This algorithm only uses function values, not derivatives or second
|
|
derivatives.
|
|
|
|
Parameters
|
|
----------
|
|
func : callable func(x,*args)
|
|
The objective function to be minimized.
|
|
x0 : ndarray
|
|
Initial guess.
|
|
args : tuple, optional
|
|
Extra arguments passed to func, i.e., ``f(x,*args)``.
|
|
xtol : float, optional
|
|
Absolute error in xopt between iterations that is acceptable for
|
|
convergence.
|
|
ftol : number, optional
|
|
Absolute error in func(xopt) between iterations that is acceptable for
|
|
convergence.
|
|
maxiter : int, optional
|
|
Maximum number of iterations to perform.
|
|
maxfun : number, optional
|
|
Maximum number of function evaluations to make.
|
|
full_output : bool, optional
|
|
Set to True if fopt and warnflag outputs are desired.
|
|
disp : bool, optional
|
|
Set to True to print convergence messages.
|
|
retall : bool, optional
|
|
Set to True to return list of solutions at each iteration.
|
|
callback : callable, optional
|
|
Called after each iteration, as callback(xk), where xk is the
|
|
current parameter vector.
|
|
initial_simplex : array_like of shape (N + 1, N), optional
|
|
Initial simplex. If given, overrides `x0`.
|
|
``initial_simplex[j,:]`` should contain the coordinates of
|
|
the jth vertex of the ``N+1`` vertices in the simplex, where
|
|
``N`` is the dimension.
|
|
|
|
Returns
|
|
-------
|
|
xopt : ndarray
|
|
Parameter that minimizes function.
|
|
fopt : float
|
|
Value of function at minimum: ``fopt = func(xopt)``.
|
|
iter : int
|
|
Number of iterations performed.
|
|
funcalls : int
|
|
Number of function calls made.
|
|
warnflag : int
|
|
1 : Maximum number of function evaluations made.
|
|
2 : Maximum number of iterations reached.
|
|
allvecs : list
|
|
Solution at each iteration.
|
|
|
|
See also
|
|
--------
|
|
minimize: Interface to minimization algorithms for multivariate
|
|
functions. See the 'Nelder-Mead' `method` in particular.
|
|
|
|
Notes
|
|
-----
|
|
Uses a Nelder-Mead simplex algorithm to find the minimum of function of
|
|
one or more variables.
|
|
|
|
This algorithm has a long history of successful use in applications.
|
|
But it will usually be slower than an algorithm that uses first or
|
|
second derivative information. In practice, it can have poor
|
|
performance in high-dimensional problems and is not robust to
|
|
minimizing complicated functions. Additionally, there currently is no
|
|
complete theory describing when the algorithm will successfully
|
|
converge to the minimum, or how fast it will if it does. Both the ftol and
|
|
xtol criteria must be met for convergence.
|
|
|
|
Examples
|
|
--------
|
|
>>> def f(x):
|
|
... return x**2
|
|
|
|
>>> from scipy import optimize
|
|
|
|
>>> minimum = optimize.fmin(f, 1)
|
|
Optimization terminated successfully.
|
|
Current function value: 0.000000
|
|
Iterations: 17
|
|
Function evaluations: 34
|
|
>>> minimum[0]
|
|
-8.8817841970012523e-16
|
|
|
|
References
|
|
----------
|
|
.. [1] Nelder, J.A. and Mead, R. (1965), "A simplex method for function
|
|
minimization", The Computer Journal, 7, pp. 308-313
|
|
|
|
.. [2] Wright, M.H. (1996), "Direct Search Methods: Once Scorned, Now
|
|
Respectable", in Numerical Analysis 1995, Proceedings of the
|
|
1995 Dundee Biennial Conference in Numerical Analysis, D.F.
|
|
Griffiths and G.A. Watson (Eds.), Addison Wesley Longman,
|
|
Harlow, UK, pp. 191-208.
|
|
|
|
"""
|
|
opts = {'xatol': xtol,
|
|
'fatol': ftol,
|
|
'maxiter': maxiter,
|
|
'maxfev': maxfun,
|
|
'disp': disp,
|
|
'return_all': retall,
|
|
'initial_simplex': initial_simplex}
|
|
|
|
callback = _wrap_callback(callback)
|
|
res = _minimize_neldermead(func, x0, args, callback=callback, **opts)
|
|
if full_output:
|
|
retlist = res['x'], res['fun'], res['nit'], res['nfev'], res['status']
|
|
if retall:
|
|
retlist += (res['allvecs'], )
|
|
return retlist
|
|
else:
|
|
if retall:
|
|
return res['x'], res['allvecs']
|
|
else:
|
|
return res['x']
|
|
|
|
|
|
def _minimize_neldermead(func, x0, args=(), callback=None,
|
|
maxiter=None, maxfev=None, disp=False,
|
|
return_all=False, initial_simplex=None,
|
|
xatol=1e-4, fatol=1e-4, adaptive=False, bounds=None,
|
|
**unknown_options):
|
|
"""
|
|
Minimization of scalar function of one or more variables using the
|
|
Nelder-Mead algorithm.
|
|
|
|
Options
|
|
-------
|
|
disp : bool
|
|
Set to True to print convergence messages.
|
|
maxiter, maxfev : int
|
|
Maximum allowed number of iterations and function evaluations.
|
|
Will default to ``N*200``, where ``N`` is the number of
|
|
variables, if neither `maxiter` or `maxfev` is set. If both
|
|
`maxiter` and `maxfev` are set, minimization will stop at the
|
|
first reached.
|
|
return_all : bool, optional
|
|
Set to True to return a list of the best solution at each of the
|
|
iterations.
|
|
initial_simplex : array_like of shape (N + 1, N)
|
|
Initial simplex. If given, overrides `x0`.
|
|
``initial_simplex[j,:]`` should contain the coordinates of
|
|
the jth vertex of the ``N+1`` vertices in the simplex, where
|
|
``N`` is the dimension.
|
|
xatol : float, optional
|
|
Absolute error in xopt between iterations that is acceptable for
|
|
convergence.
|
|
fatol : number, optional
|
|
Absolute error in func(xopt) between iterations that is acceptable for
|
|
convergence.
|
|
adaptive : bool, optional
|
|
Adapt algorithm parameters to dimensionality of problem. Useful for
|
|
high-dimensional minimization [1]_.
|
|
bounds : sequence or `Bounds`, optional
|
|
Bounds on variables. There are two ways to specify the bounds:
|
|
|
|
1. Instance of `Bounds` class.
|
|
2. Sequence of ``(min, max)`` pairs for each element in `x`. None
|
|
is used to specify no bound.
|
|
|
|
Note that this just clips all vertices in simplex based on
|
|
the bounds.
|
|
|
|
References
|
|
----------
|
|
.. [1] Gao, F. and Han, L.
|
|
Implementing the Nelder-Mead simplex algorithm with adaptive
|
|
parameters. 2012. Computational Optimization and Applications.
|
|
51:1, pp. 259-277
|
|
|
|
"""
|
|
_check_unknown_options(unknown_options)
|
|
maxfun = maxfev
|
|
retall = return_all
|
|
|
|
x0 = np.atleast_1d(x0).flatten()
|
|
dtype = x0.dtype if np.issubdtype(x0.dtype, np.inexact) else np.float64
|
|
x0 = np.asarray(x0, dtype=dtype)
|
|
|
|
if adaptive:
|
|
dim = float(len(x0))
|
|
rho = 1
|
|
chi = 1 + 2/dim
|
|
psi = 0.75 - 1/(2*dim)
|
|
sigma = 1 - 1/dim
|
|
else:
|
|
rho = 1
|
|
chi = 2
|
|
psi = 0.5
|
|
sigma = 0.5
|
|
|
|
nonzdelt = 0.05
|
|
zdelt = 0.00025
|
|
|
|
if bounds is not None:
|
|
lower_bound, upper_bound = bounds.lb, bounds.ub
|
|
# check bounds
|
|
if (lower_bound > upper_bound).any():
|
|
raise ValueError("Nelder Mead - one of the lower bounds "
|
|
"is greater than an upper bound.")
|
|
if np.any(lower_bound > x0) or np.any(x0 > upper_bound):
|
|
warnings.warn("Initial guess is not within the specified bounds",
|
|
OptimizeWarning, stacklevel=3)
|
|
|
|
if bounds is not None:
|
|
x0 = np.clip(x0, lower_bound, upper_bound)
|
|
|
|
if initial_simplex is None:
|
|
N = len(x0)
|
|
|
|
sim = np.empty((N + 1, N), dtype=x0.dtype)
|
|
sim[0] = x0
|
|
for k in range(N):
|
|
y = np.array(x0, copy=True)
|
|
if y[k] != 0:
|
|
y[k] = (1 + nonzdelt)*y[k]
|
|
else:
|
|
y[k] = zdelt
|
|
sim[k + 1] = y
|
|
else:
|
|
sim = np.atleast_2d(initial_simplex).copy()
|
|
dtype = sim.dtype if np.issubdtype(sim.dtype, np.inexact) else np.float64
|
|
sim = np.asarray(sim, dtype=dtype)
|
|
if sim.ndim != 2 or sim.shape[0] != sim.shape[1] + 1:
|
|
raise ValueError("`initial_simplex` should be an array of shape (N+1,N)")
|
|
if len(x0) != sim.shape[1]:
|
|
raise ValueError("Size of `initial_simplex` is not consistent with `x0`")
|
|
N = sim.shape[1]
|
|
|
|
if retall:
|
|
allvecs = [sim[0]]
|
|
|
|
# If neither are set, then set both to default
|
|
if maxiter is None and maxfun is None:
|
|
maxiter = N * 200
|
|
maxfun = N * 200
|
|
elif maxiter is None:
|
|
# Convert remaining Nones, to np.inf, unless the other is np.inf, in
|
|
# which case use the default to avoid unbounded iteration
|
|
if maxfun == np.inf:
|
|
maxiter = N * 200
|
|
else:
|
|
maxiter = np.inf
|
|
elif maxfun is None:
|
|
if maxiter == np.inf:
|
|
maxfun = N * 200
|
|
else:
|
|
maxfun = np.inf
|
|
|
|
if bounds is not None:
|
|
# The default simplex construction may make all entries (for a given
|
|
# parameter) greater than an upper bound if x0 is very close to the
|
|
# upper bound. If one simply clips the simplex to the bounds this could
|
|
# make the simplex entries degenerate. If that occurs reflect into the
|
|
# interior.
|
|
msk = sim > upper_bound
|
|
# reflect into the interior
|
|
sim = np.where(msk, 2*upper_bound - sim, sim)
|
|
# but make sure the reflection is no less than the lower_bound
|
|
sim = np.clip(sim, lower_bound, upper_bound)
|
|
|
|
one2np1 = list(range(1, N + 1))
|
|
fsim = np.full((N + 1,), np.inf, dtype=float)
|
|
|
|
fcalls, func = _wrap_scalar_function_maxfun_validation(func, args, maxfun)
|
|
|
|
try:
|
|
for k in range(N + 1):
|
|
fsim[k] = func(sim[k])
|
|
except _MaxFuncCallError:
|
|
pass
|
|
finally:
|
|
ind = np.argsort(fsim)
|
|
sim = np.take(sim, ind, 0)
|
|
fsim = np.take(fsim, ind, 0)
|
|
|
|
ind = np.argsort(fsim)
|
|
fsim = np.take(fsim, ind, 0)
|
|
# sort so sim[0,:] has the lowest function value
|
|
sim = np.take(sim, ind, 0)
|
|
|
|
iterations = 1
|
|
|
|
while (fcalls[0] < maxfun and iterations < maxiter):
|
|
try:
|
|
if (np.max(np.ravel(np.abs(sim[1:] - sim[0]))) <= xatol and
|
|
np.max(np.abs(fsim[0] - fsim[1:])) <= fatol):
|
|
break
|
|
|
|
xbar = np.add.reduce(sim[:-1], 0) / N
|
|
xr = (1 + rho) * xbar - rho * sim[-1]
|
|
if bounds is not None:
|
|
xr = np.clip(xr, lower_bound, upper_bound)
|
|
fxr = func(xr)
|
|
doshrink = 0
|
|
|
|
if fxr < fsim[0]:
|
|
xe = (1 + rho * chi) * xbar - rho * chi * sim[-1]
|
|
if bounds is not None:
|
|
xe = np.clip(xe, lower_bound, upper_bound)
|
|
fxe = func(xe)
|
|
|
|
if fxe < fxr:
|
|
sim[-1] = xe
|
|
fsim[-1] = fxe
|
|
else:
|
|
sim[-1] = xr
|
|
fsim[-1] = fxr
|
|
else: # fsim[0] <= fxr
|
|
if fxr < fsim[-2]:
|
|
sim[-1] = xr
|
|
fsim[-1] = fxr
|
|
else: # fxr >= fsim[-2]
|
|
# Perform contraction
|
|
if fxr < fsim[-1]:
|
|
xc = (1 + psi * rho) * xbar - psi * rho * sim[-1]
|
|
if bounds is not None:
|
|
xc = np.clip(xc, lower_bound, upper_bound)
|
|
fxc = func(xc)
|
|
|
|
if fxc <= fxr:
|
|
sim[-1] = xc
|
|
fsim[-1] = fxc
|
|
else:
|
|
doshrink = 1
|
|
else:
|
|
# Perform an inside contraction
|
|
xcc = (1 - psi) * xbar + psi * sim[-1]
|
|
if bounds is not None:
|
|
xcc = np.clip(xcc, lower_bound, upper_bound)
|
|
fxcc = func(xcc)
|
|
|
|
if fxcc < fsim[-1]:
|
|
sim[-1] = xcc
|
|
fsim[-1] = fxcc
|
|
else:
|
|
doshrink = 1
|
|
|
|
if doshrink:
|
|
for j in one2np1:
|
|
sim[j] = sim[0] + sigma * (sim[j] - sim[0])
|
|
if bounds is not None:
|
|
sim[j] = np.clip(
|
|
sim[j], lower_bound, upper_bound)
|
|
fsim[j] = func(sim[j])
|
|
iterations += 1
|
|
except _MaxFuncCallError:
|
|
pass
|
|
finally:
|
|
ind = np.argsort(fsim)
|
|
sim = np.take(sim, ind, 0)
|
|
fsim = np.take(fsim, ind, 0)
|
|
if retall:
|
|
allvecs.append(sim[0])
|
|
intermediate_result = OptimizeResult(x=sim[0], fun=fsim[0])
|
|
if _call_callback_maybe_halt(callback, intermediate_result):
|
|
break
|
|
|
|
x = sim[0]
|
|
fval = np.min(fsim)
|
|
warnflag = 0
|
|
|
|
if fcalls[0] >= maxfun:
|
|
warnflag = 1
|
|
msg = _status_message['maxfev']
|
|
if disp:
|
|
warnings.warn(msg, RuntimeWarning, stacklevel=3)
|
|
elif iterations >= maxiter:
|
|
warnflag = 2
|
|
msg = _status_message['maxiter']
|
|
if disp:
|
|
warnings.warn(msg, RuntimeWarning, stacklevel=3)
|
|
else:
|
|
msg = _status_message['success']
|
|
if disp:
|
|
print(msg)
|
|
print(" Current function value: %f" % fval)
|
|
print(" Iterations: %d" % iterations)
|
|
print(" Function evaluations: %d" % fcalls[0])
|
|
|
|
result = OptimizeResult(fun=fval, nit=iterations, nfev=fcalls[0],
|
|
status=warnflag, success=(warnflag == 0),
|
|
message=msg, x=x, final_simplex=(sim, fsim))
|
|
if retall:
|
|
result['allvecs'] = allvecs
|
|
return result
|
|
|
|
|
|
def approx_fprime(xk, f, epsilon=_epsilon, *args):
|
|
"""Finite difference approximation of the derivatives of a
|
|
scalar or vector-valued function.
|
|
|
|
If a function maps from :math:`R^n` to :math:`R^m`, its derivatives form
|
|
an m-by-n matrix
|
|
called the Jacobian, where an element :math:`(i, j)` is a partial
|
|
derivative of f[i] with respect to ``xk[j]``.
|
|
|
|
Parameters
|
|
----------
|
|
xk : array_like
|
|
The coordinate vector at which to determine the gradient of `f`.
|
|
f : callable
|
|
Function of which to estimate the derivatives of. Has the signature
|
|
``f(xk, *args)`` where `xk` is the argument in the form of a 1-D array
|
|
and `args` is a tuple of any additional fixed parameters needed to
|
|
completely specify the function. The argument `xk` passed to this
|
|
function is an ndarray of shape (n,) (never a scalar even if n=1).
|
|
It must return a 1-D array_like of shape (m,) or a scalar.
|
|
|
|
.. versionchanged:: 1.9.0
|
|
`f` is now able to return a 1-D array-like, with the :math:`(m, n)`
|
|
Jacobian being estimated.
|
|
|
|
epsilon : {float, array_like}, optional
|
|
Increment to `xk` to use for determining the function gradient.
|
|
If a scalar, uses the same finite difference delta for all partial
|
|
derivatives. If an array, should contain one value per element of
|
|
`xk`. Defaults to ``sqrt(np.finfo(float).eps)``, which is approximately
|
|
1.49e-08.
|
|
\\*args : args, optional
|
|
Any other arguments that are to be passed to `f`.
|
|
|
|
Returns
|
|
-------
|
|
jac : ndarray
|
|
The partial derivatives of `f` to `xk`.
|
|
|
|
See Also
|
|
--------
|
|
check_grad : Check correctness of gradient function against approx_fprime.
|
|
|
|
Notes
|
|
-----
|
|
The function gradient is determined by the forward finite difference
|
|
formula::
|
|
|
|
f(xk[i] + epsilon[i]) - f(xk[i])
|
|
f'[i] = ---------------------------------
|
|
epsilon[i]
|
|
|
|
Examples
|
|
--------
|
|
>>> import numpy as np
|
|
>>> from scipy import optimize
|
|
>>> def func(x, c0, c1):
|
|
... "Coordinate vector `x` should be an array of size two."
|
|
... return c0 * x[0]**2 + c1*x[1]**2
|
|
|
|
>>> x = np.ones(2)
|
|
>>> c0, c1 = (1, 200)
|
|
>>> eps = np.sqrt(np.finfo(float).eps)
|
|
>>> optimize.approx_fprime(x, func, [eps, np.sqrt(200) * eps], c0, c1)
|
|
array([ 2. , 400.00004208])
|
|
|
|
"""
|
|
xk = np.asarray(xk, float)
|
|
f0 = f(xk, *args)
|
|
|
|
return approx_derivative(f, xk, method='2-point', abs_step=epsilon,
|
|
args=args, f0=f0)
|
|
|
|
|
|
def check_grad(func, grad, x0, *args, epsilon=_epsilon,
|
|
direction='all', seed=None):
|
|
"""Check the correctness of a gradient function by comparing it against a
|
|
(forward) finite-difference approximation of the gradient.
|
|
|
|
Parameters
|
|
----------
|
|
func : callable ``func(x0, *args)``
|
|
Function whose derivative is to be checked.
|
|
grad : callable ``grad(x0, *args)``
|
|
Jacobian of `func`.
|
|
x0 : ndarray
|
|
Points to check `grad` against forward difference approximation of grad
|
|
using `func`.
|
|
args : \\*args, optional
|
|
Extra arguments passed to `func` and `grad`.
|
|
epsilon : float, optional
|
|
Step size used for the finite difference approximation. It defaults to
|
|
``sqrt(np.finfo(float).eps)``, which is approximately 1.49e-08.
|
|
direction : str, optional
|
|
If set to ``'random'``, then gradients along a random vector
|
|
are used to check `grad` against forward difference approximation
|
|
using `func`. By default it is ``'all'``, in which case, all
|
|
the one hot direction vectors are considered to check `grad`.
|
|
If `func` is a vector valued function then only ``'all'`` can be used.
|
|
seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
|
|
If `seed` is None (or `np.random`), the `numpy.random.RandomState`
|
|
singleton is used.
|
|
If `seed` is an int, a new ``RandomState`` instance is used,
|
|
seeded with `seed`.
|
|
If `seed` is already a ``Generator`` or ``RandomState`` instance then
|
|
that instance is used.
|
|
Specify `seed` for reproducing the return value from this function.
|
|
The random numbers generated with this seed affect the random vector
|
|
along which gradients are computed to check ``grad``. Note that `seed`
|
|
is only used when `direction` argument is set to `'random'`.
|
|
|
|
Returns
|
|
-------
|
|
err : float
|
|
The square root of the sum of squares (i.e., the 2-norm) of the
|
|
difference between ``grad(x0, *args)`` and the finite difference
|
|
approximation of `grad` using func at the points `x0`.
|
|
|
|
See Also
|
|
--------
|
|
approx_fprime
|
|
|
|
Examples
|
|
--------
|
|
>>> import numpy as np
|
|
>>> def func(x):
|
|
... return x[0]**2 - 0.5 * x[1]**3
|
|
>>> def grad(x):
|
|
... return [2 * x[0], -1.5 * x[1]**2]
|
|
>>> from scipy.optimize import check_grad
|
|
>>> check_grad(func, grad, [1.5, -1.5])
|
|
2.9802322387695312e-08 # may vary
|
|
>>> rng = np.random.default_rng()
|
|
>>> check_grad(func, grad, [1.5, -1.5],
|
|
... direction='random', seed=rng)
|
|
2.9802322387695312e-08
|
|
|
|
"""
|
|
step = epsilon
|
|
x0 = np.asarray(x0)
|
|
|
|
def g(w, func, x0, v, *args):
|
|
return func(x0 + w*v, *args)
|
|
|
|
if direction == 'random':
|
|
_grad = np.asanyarray(grad(x0, *args))
|
|
if _grad.ndim > 1:
|
|
raise ValueError("'random' can only be used with scalar valued"
|
|
" func")
|
|
random_state = check_random_state(seed)
|
|
v = random_state.normal(0, 1, size=(x0.shape))
|
|
_args = (func, x0, v) + args
|
|
_func = g
|
|
vars = np.zeros((1,))
|
|
analytical_grad = np.dot(_grad, v)
|
|
elif direction == 'all':
|
|
_args = args
|
|
_func = func
|
|
vars = x0
|
|
analytical_grad = grad(x0, *args)
|
|
else:
|
|
raise ValueError(f"{direction} is not a valid string for "
|
|
"``direction`` argument")
|
|
|
|
return np.sqrt(np.sum(np.abs(
|
|
(analytical_grad - approx_fprime(vars, _func, step, *_args))**2
|
|
)))
|
|
|
|
|
|
def approx_fhess_p(x0, p, fprime, epsilon, *args):
|
|
# calculate fprime(x0) first, as this may be cached by ScalarFunction
|
|
f1 = fprime(*((x0,) + args))
|
|
f2 = fprime(*((x0 + epsilon*p,) + args))
|
|
return (f2 - f1) / epsilon
|
|
|
|
|
|
class _LineSearchError(RuntimeError):
|
|
pass
|
|
|
|
|
|
def _line_search_wolfe12(f, fprime, xk, pk, gfk, old_fval, old_old_fval,
|
|
**kwargs):
|
|
"""
|
|
Same as line_search_wolfe1, but fall back to line_search_wolfe2 if
|
|
suitable step length is not found, and raise an exception if a
|
|
suitable step length is not found.
|
|
|
|
Raises
|
|
------
|
|
_LineSearchError
|
|
If no suitable step size is found
|
|
|
|
"""
|
|
|
|
extra_condition = kwargs.pop('extra_condition', None)
|
|
|
|
ret = line_search_wolfe1(f, fprime, xk, pk, gfk,
|
|
old_fval, old_old_fval,
|
|
**kwargs)
|
|
|
|
if ret[0] is not None and extra_condition is not None:
|
|
xp1 = xk + ret[0] * pk
|
|
if not extra_condition(ret[0], xp1, ret[3], ret[5]):
|
|
# Reject step if extra_condition fails
|
|
ret = (None,)
|
|
|
|
if ret[0] is None:
|
|
# line search failed: try different one.
|
|
with warnings.catch_warnings():
|
|
warnings.simplefilter('ignore', LineSearchWarning)
|
|
kwargs2 = {}
|
|
for key in ('c1', 'c2', 'amax'):
|
|
if key in kwargs:
|
|
kwargs2[key] = kwargs[key]
|
|
ret = line_search_wolfe2(f, fprime, xk, pk, gfk,
|
|
old_fval, old_old_fval,
|
|
extra_condition=extra_condition,
|
|
**kwargs2)
|
|
|
|
if ret[0] is None:
|
|
raise _LineSearchError()
|
|
|
|
return ret
|
|
|
|
|
|
def fmin_bfgs(f, x0, fprime=None, args=(), gtol=1e-5, norm=np.inf,
|
|
epsilon=_epsilon, maxiter=None, full_output=0, disp=1,
|
|
retall=0, callback=None, xrtol=0, c1=1e-4, c2=0.9,
|
|
hess_inv0=None):
|
|
"""
|
|
Minimize a function using the BFGS algorithm.
|
|
|
|
Parameters
|
|
----------
|
|
f : callable ``f(x,*args)``
|
|
Objective function to be minimized.
|
|
x0 : ndarray
|
|
Initial guess, shape (n,)
|
|
fprime : callable ``f'(x,*args)``, optional
|
|
Gradient of f.
|
|
args : tuple, optional
|
|
Extra arguments passed to f and fprime.
|
|
gtol : float, optional
|
|
Terminate successfully if gradient norm is less than `gtol`
|
|
norm : float, optional
|
|
Order of norm (Inf is max, -Inf is min)
|
|
epsilon : int or ndarray, optional
|
|
If `fprime` is approximated, use this value for the step size.
|
|
callback : callable, optional
|
|
An optional user-supplied function to call after each
|
|
iteration. Called as ``callback(xk)``, where ``xk`` is the
|
|
current parameter vector.
|
|
maxiter : int, optional
|
|
Maximum number of iterations to perform.
|
|
full_output : bool, optional
|
|
If True, return ``fopt``, ``func_calls``, ``grad_calls``, and
|
|
``warnflag`` in addition to ``xopt``.
|
|
disp : bool, optional
|
|
Print convergence message if True.
|
|
retall : bool, optional
|
|
Return a list of results at each iteration if True.
|
|
xrtol : float, default: 0
|
|
Relative tolerance for `x`. Terminate successfully if step
|
|
size is less than ``xk * xrtol`` where ``xk`` is the current
|
|
parameter vector.
|
|
c1 : float, default: 1e-4
|
|
Parameter for Armijo condition rule.
|
|
c2 : float, default: 0.9
|
|
Parameter for curvature condition rule.
|
|
hess_inv0 : None or ndarray, optional``
|
|
Initial inverse hessian estimate, shape (n, n). If None (default) then
|
|
the identity matrix is used.
|
|
|
|
Returns
|
|
-------
|
|
xopt : ndarray
|
|
Parameters which minimize f, i.e., ``f(xopt) == fopt``.
|
|
fopt : float
|
|
Minimum value.
|
|
gopt : ndarray
|
|
Value of gradient at minimum, f'(xopt), which should be near 0.
|
|
Bopt : ndarray
|
|
Value of 1/f''(xopt), i.e., the inverse Hessian matrix.
|
|
func_calls : int
|
|
Number of function_calls made.
|
|
grad_calls : int
|
|
Number of gradient calls made.
|
|
warnflag : integer
|
|
1 : Maximum number of iterations exceeded.
|
|
2 : Gradient and/or function calls not changing.
|
|
3 : NaN result encountered.
|
|
allvecs : list
|
|
The value of `xopt` at each iteration. Only returned if `retall` is
|
|
True.
|
|
|
|
Notes
|
|
-----
|
|
Optimize the function, `f`, whose gradient is given by `fprime`
|
|
using the quasi-Newton method of Broyden, Fletcher, Goldfarb,
|
|
and Shanno (BFGS).
|
|
|
|
Parameters `c1` and `c2` must satisfy ``0 < c1 < c2 < 1``.
|
|
|
|
See Also
|
|
--------
|
|
minimize: Interface to minimization algorithms for multivariate
|
|
functions. See ``method='BFGS'`` in particular.
|
|
|
|
References
|
|
----------
|
|
Wright, and Nocedal 'Numerical Optimization', 1999, p. 198.
|
|
|
|
Examples
|
|
--------
|
|
>>> import numpy as np
|
|
>>> from scipy.optimize import fmin_bfgs
|
|
>>> def quadratic_cost(x, Q):
|
|
... return x @ Q @ x
|
|
...
|
|
>>> x0 = np.array([-3, -4])
|
|
>>> cost_weight = np.diag([1., 10.])
|
|
>>> # Note that a trailing comma is necessary for a tuple with single element
|
|
>>> fmin_bfgs(quadratic_cost, x0, args=(cost_weight,))
|
|
Optimization terminated successfully.
|
|
Current function value: 0.000000
|
|
Iterations: 7 # may vary
|
|
Function evaluations: 24 # may vary
|
|
Gradient evaluations: 8 # may vary
|
|
array([ 2.85169950e-06, -4.61820139e-07])
|
|
|
|
>>> def quadratic_cost_grad(x, Q):
|
|
... return 2 * Q @ x
|
|
...
|
|
>>> fmin_bfgs(quadratic_cost, x0, quadratic_cost_grad, args=(cost_weight,))
|
|
Optimization terminated successfully.
|
|
Current function value: 0.000000
|
|
Iterations: 7
|
|
Function evaluations: 8
|
|
Gradient evaluations: 8
|
|
array([ 2.85916637e-06, -4.54371951e-07])
|
|
|
|
"""
|
|
opts = {'gtol': gtol,
|
|
'norm': norm,
|
|
'eps': epsilon,
|
|
'disp': disp,
|
|
'maxiter': maxiter,
|
|
'return_all': retall,
|
|
'xrtol': xrtol,
|
|
'c1': c1,
|
|
'c2': c2,
|
|
'hess_inv0': hess_inv0}
|
|
|
|
callback = _wrap_callback(callback)
|
|
res = _minimize_bfgs(f, x0, args, fprime, callback=callback, **opts)
|
|
|
|
if full_output:
|
|
retlist = (res['x'], res['fun'], res['jac'], res['hess_inv'],
|
|
res['nfev'], res['njev'], res['status'])
|
|
if retall:
|
|
retlist += (res['allvecs'], )
|
|
return retlist
|
|
else:
|
|
if retall:
|
|
return res['x'], res['allvecs']
|
|
else:
|
|
return res['x']
|
|
|
|
|
|
def _minimize_bfgs(fun, x0, args=(), jac=None, callback=None,
|
|
gtol=1e-5, norm=np.inf, eps=_epsilon, maxiter=None,
|
|
disp=False, return_all=False, finite_diff_rel_step=None,
|
|
xrtol=0, c1=1e-4, c2=0.9,
|
|
hess_inv0=None, **unknown_options):
|
|
"""
|
|
Minimization of scalar function of one or more variables using the
|
|
BFGS algorithm.
|
|
|
|
Options
|
|
-------
|
|
disp : bool
|
|
Set to True to print convergence messages.
|
|
maxiter : int
|
|
Maximum number of iterations to perform.
|
|
gtol : float
|
|
Terminate successfully if gradient norm is less than `gtol`.
|
|
norm : float
|
|
Order of norm (Inf is max, -Inf is min).
|
|
eps : float or ndarray
|
|
If `jac is None` the absolute step size used for numerical
|
|
approximation of the jacobian via forward differences.
|
|
return_all : bool, optional
|
|
Set to True to return a list of the best solution at each of the
|
|
iterations.
|
|
finite_diff_rel_step : None or array_like, optional
|
|
If `jac in ['2-point', '3-point', 'cs']` the relative step size to
|
|
use for numerical approximation of the jacobian. The absolute step
|
|
size is computed as ``h = rel_step * sign(x) * max(1, abs(x))``,
|
|
possibly adjusted to fit into the bounds. For ``jac='3-point'``
|
|
the sign of `h` is ignored. If None (default) then step is selected
|
|
automatically.
|
|
xrtol : float, default: 0
|
|
Relative tolerance for `x`. Terminate successfully if step size is
|
|
less than ``xk * xrtol`` where ``xk`` is the current parameter vector.
|
|
c1 : float, default: 1e-4
|
|
Parameter for Armijo condition rule.
|
|
c2 : float, default: 0.9
|
|
Parameter for curvature condition rule.
|
|
hess_inv0 : None or ndarray, optional
|
|
Initial inverse hessian estimate, shape (n, n). If None (default) then
|
|
the identity matrix is used.
|
|
|
|
Notes
|
|
-----
|
|
Parameters `c1` and `c2` must satisfy ``0 < c1 < c2 < 1``.
|
|
|
|
If minimization doesn't complete successfully, with an error message of
|
|
``Desired error not necessarily achieved due to precision loss``, then
|
|
consider setting `gtol` to a higher value. This precision loss typically
|
|
occurs when the (finite difference) numerical differentiation cannot provide
|
|
sufficient precision to satisfy the `gtol` termination criterion.
|
|
This can happen when working in single precision and a callable jac is not
|
|
provided. For single precision problems a `gtol` of 1e-3 seems to work.
|
|
"""
|
|
_check_unknown_options(unknown_options)
|
|
_check_positive_definite(hess_inv0)
|
|
retall = return_all
|
|
|
|
x0 = asarray(x0).flatten()
|
|
if x0.ndim == 0:
|
|
x0.shape = (1,)
|
|
if maxiter is None:
|
|
maxiter = len(x0) * 200
|
|
|
|
sf = _prepare_scalar_function(fun, x0, jac, args=args, epsilon=eps,
|
|
finite_diff_rel_step=finite_diff_rel_step)
|
|
|
|
f = sf.fun
|
|
myfprime = sf.grad
|
|
|
|
old_fval = f(x0)
|
|
gfk = myfprime(x0)
|
|
|
|
k = 0
|
|
N = len(x0)
|
|
I = np.eye(N, dtype=int)
|
|
Hk = I if hess_inv0 is None else hess_inv0
|
|
|
|
# Sets the initial step guess to dx ~ 1
|
|
old_old_fval = old_fval + np.linalg.norm(gfk) / 2
|
|
|
|
xk = x0
|
|
if retall:
|
|
allvecs = [x0]
|
|
warnflag = 0
|
|
gnorm = vecnorm(gfk, ord=norm)
|
|
while (gnorm > gtol) and (k < maxiter):
|
|
pk = -np.dot(Hk, gfk)
|
|
try:
|
|
alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
|
|
_line_search_wolfe12(f, myfprime, xk, pk, gfk,
|
|
old_fval, old_old_fval, amin=1e-100,
|
|
amax=1e100, c1=c1, c2=c2)
|
|
except _LineSearchError:
|
|
# Line search failed to find a better solution.
|
|
warnflag = 2
|
|
break
|
|
|
|
sk = alpha_k * pk
|
|
xkp1 = xk + sk
|
|
|
|
if retall:
|
|
allvecs.append(xkp1)
|
|
xk = xkp1
|
|
if gfkp1 is None:
|
|
gfkp1 = myfprime(xkp1)
|
|
|
|
yk = gfkp1 - gfk
|
|
gfk = gfkp1
|
|
k += 1
|
|
intermediate_result = OptimizeResult(x=xk, fun=old_fval)
|
|
if _call_callback_maybe_halt(callback, intermediate_result):
|
|
break
|
|
gnorm = vecnorm(gfk, ord=norm)
|
|
if (gnorm <= gtol):
|
|
break
|
|
|
|
# See Chapter 5 in P.E. Frandsen, K. Jonasson, H.B. Nielsen,
|
|
# O. Tingleff: "Unconstrained Optimization", IMM, DTU. 1999.
|
|
# These notes are available here:
|
|
# http://www2.imm.dtu.dk/documents/ftp/publlec.html
|
|
if (alpha_k*vecnorm(pk) <= xrtol*(xrtol + vecnorm(xk))):
|
|
break
|
|
|
|
if not np.isfinite(old_fval):
|
|
# We correctly found +-Inf as optimal value, or something went
|
|
# wrong.
|
|
warnflag = 2
|
|
break
|
|
|
|
rhok_inv = np.dot(yk, sk)
|
|
# this was handled in numeric, let it remains for more safety
|
|
# Cryptic comment above is preserved for posterity. Future reader:
|
|
# consider change to condition below proposed in gh-1261/gh-17345.
|
|
if rhok_inv == 0.:
|
|
rhok = 1000.0
|
|
if disp:
|
|
msg = "Divide-by-zero encountered: rhok assumed large"
|
|
_print_success_message_or_warn(True, msg)
|
|
else:
|
|
rhok = 1. / rhok_inv
|
|
|
|
A1 = I - sk[:, np.newaxis] * yk[np.newaxis, :] * rhok
|
|
A2 = I - yk[:, np.newaxis] * sk[np.newaxis, :] * rhok
|
|
Hk = np.dot(A1, np.dot(Hk, A2)) + (rhok * sk[:, np.newaxis] *
|
|
sk[np.newaxis, :])
|
|
|
|
fval = old_fval
|
|
|
|
if warnflag == 2:
|
|
msg = _status_message['pr_loss']
|
|
elif k >= maxiter:
|
|
warnflag = 1
|
|
msg = _status_message['maxiter']
|
|
elif np.isnan(gnorm) or np.isnan(fval) or np.isnan(xk).any():
|
|
warnflag = 3
|
|
msg = _status_message['nan']
|
|
else:
|
|
msg = _status_message['success']
|
|
|
|
if disp:
|
|
_print_success_message_or_warn(warnflag, msg)
|
|
print(" Current function value: %f" % fval)
|
|
print(" Iterations: %d" % k)
|
|
print(" Function evaluations: %d" % sf.nfev)
|
|
print(" Gradient evaluations: %d" % sf.ngev)
|
|
|
|
result = OptimizeResult(fun=fval, jac=gfk, hess_inv=Hk, nfev=sf.nfev,
|
|
njev=sf.ngev, status=warnflag,
|
|
success=(warnflag == 0), message=msg, x=xk,
|
|
nit=k)
|
|
if retall:
|
|
result['allvecs'] = allvecs
|
|
return result
|
|
|
|
|
|
def _print_success_message_or_warn(warnflag, message, warntype=None):
|
|
if not warnflag:
|
|
print(message)
|
|
else:
|
|
warnings.warn(message, warntype or OptimizeWarning, stacklevel=3)
|
|
|
|
|
|
def fmin_cg(f, x0, fprime=None, args=(), gtol=1e-5, norm=np.inf,
|
|
epsilon=_epsilon, maxiter=None, full_output=0, disp=1, retall=0,
|
|
callback=None, c1=1e-4, c2=0.4):
|
|
"""
|
|
Minimize a function using a nonlinear conjugate gradient algorithm.
|
|
|
|
Parameters
|
|
----------
|
|
f : callable, ``f(x, *args)``
|
|
Objective function to be minimized. Here `x` must be a 1-D array of
|
|
the variables that are to be changed in the search for a minimum, and
|
|
`args` are the other (fixed) parameters of `f`.
|
|
x0 : ndarray
|
|
A user-supplied initial estimate of `xopt`, the optimal value of `x`.
|
|
It must be a 1-D array of values.
|
|
fprime : callable, ``fprime(x, *args)``, optional
|
|
A function that returns the gradient of `f` at `x`. Here `x` and `args`
|
|
are as described above for `f`. The returned value must be a 1-D array.
|
|
Defaults to None, in which case the gradient is approximated
|
|
numerically (see `epsilon`, below).
|
|
args : tuple, optional
|
|
Parameter values passed to `f` and `fprime`. Must be supplied whenever
|
|
additional fixed parameters are needed to completely specify the
|
|
functions `f` and `fprime`.
|
|
gtol : float, optional
|
|
Stop when the norm of the gradient is less than `gtol`.
|
|
norm : float, optional
|
|
Order to use for the norm of the gradient
|
|
(``-np.inf`` is min, ``np.inf`` is max).
|
|
epsilon : float or ndarray, optional
|
|
Step size(s) to use when `fprime` is approximated numerically. Can be a
|
|
scalar or a 1-D array. Defaults to ``sqrt(eps)``, with eps the
|
|
floating point machine precision. Usually ``sqrt(eps)`` is about
|
|
1.5e-8.
|
|
maxiter : int, optional
|
|
Maximum number of iterations to perform. Default is ``200 * len(x0)``.
|
|
full_output : bool, optional
|
|
If True, return `fopt`, `func_calls`, `grad_calls`, and `warnflag` in
|
|
addition to `xopt`. See the Returns section below for additional
|
|
information on optional return values.
|
|
disp : bool, optional
|
|
If True, return a convergence message, followed by `xopt`.
|
|
retall : bool, optional
|
|
If True, add to the returned values the results of each iteration.
|
|
callback : callable, optional
|
|
An optional user-supplied function, called after each iteration.
|
|
Called as ``callback(xk)``, where ``xk`` is the current value of `x0`.
|
|
c1 : float, default: 1e-4
|
|
Parameter for Armijo condition rule.
|
|
c2 : float, default: 0.4
|
|
Parameter for curvature condition rule.
|
|
|
|
Returns
|
|
-------
|
|
xopt : ndarray
|
|
Parameters which minimize f, i.e., ``f(xopt) == fopt``.
|
|
fopt : float, optional
|
|
Minimum value found, f(xopt). Only returned if `full_output` is True.
|
|
func_calls : int, optional
|
|
The number of function_calls made. Only returned if `full_output`
|
|
is True.
|
|
grad_calls : int, optional
|
|
The number of gradient calls made. Only returned if `full_output` is
|
|
True.
|
|
warnflag : int, optional
|
|
Integer value with warning status, only returned if `full_output` is
|
|
True.
|
|
|
|
0 : Success.
|
|
|
|
1 : The maximum number of iterations was exceeded.
|
|
|
|
2 : Gradient and/or function calls were not changing. May indicate
|
|
that precision was lost, i.e., the routine did not converge.
|
|
|
|
3 : NaN result encountered.
|
|
|
|
allvecs : list of ndarray, optional
|
|
List of arrays, containing the results at each iteration.
|
|
Only returned if `retall` is True.
|
|
|
|
See Also
|
|
--------
|
|
minimize : common interface to all `scipy.optimize` algorithms for
|
|
unconstrained and constrained minimization of multivariate
|
|
functions. It provides an alternative way to call
|
|
``fmin_cg``, by specifying ``method='CG'``.
|
|
|
|
Notes
|
|
-----
|
|
This conjugate gradient algorithm is based on that of Polak and Ribiere
|
|
[1]_.
|
|
|
|
Conjugate gradient methods tend to work better when:
|
|
|
|
1. `f` has a unique global minimizing point, and no local minima or
|
|
other stationary points,
|
|
2. `f` is, at least locally, reasonably well approximated by a
|
|
quadratic function of the variables,
|
|
3. `f` is continuous and has a continuous gradient,
|
|
4. `fprime` is not too large, e.g., has a norm less than 1000,
|
|
5. The initial guess, `x0`, is reasonably close to `f` 's global
|
|
minimizing point, `xopt`.
|
|
|
|
Parameters `c1` and `c2` must satisfy ``0 < c1 < c2 < 1``.
|
|
|
|
References
|
|
----------
|
|
.. [1] Wright & Nocedal, "Numerical Optimization", 1999, pp. 120-122.
|
|
|
|
Examples
|
|
--------
|
|
Example 1: seek the minimum value of the expression
|
|
``a*u**2 + b*u*v + c*v**2 + d*u + e*v + f`` for given values
|
|
of the parameters and an initial guess ``(u, v) = (0, 0)``.
|
|
|
|
>>> import numpy as np
|
|
>>> args = (2, 3, 7, 8, 9, 10) # parameter values
|
|
>>> def f(x, *args):
|
|
... u, v = x
|
|
... a, b, c, d, e, f = args
|
|
... return a*u**2 + b*u*v + c*v**2 + d*u + e*v + f
|
|
>>> def gradf(x, *args):
|
|
... u, v = x
|
|
... a, b, c, d, e, f = args
|
|
... gu = 2*a*u + b*v + d # u-component of the gradient
|
|
... gv = b*u + 2*c*v + e # v-component of the gradient
|
|
... return np.asarray((gu, gv))
|
|
>>> x0 = np.asarray((0, 0)) # Initial guess.
|
|
>>> from scipy import optimize
|
|
>>> res1 = optimize.fmin_cg(f, x0, fprime=gradf, args=args)
|
|
Optimization terminated successfully.
|
|
Current function value: 1.617021
|
|
Iterations: 4
|
|
Function evaluations: 8
|
|
Gradient evaluations: 8
|
|
>>> res1
|
|
array([-1.80851064, -0.25531915])
|
|
|
|
Example 2: solve the same problem using the `minimize` function.
|
|
(This `myopts` dictionary shows all of the available options,
|
|
although in practice only non-default values would be needed.
|
|
The returned value will be a dictionary.)
|
|
|
|
>>> opts = {'maxiter' : None, # default value.
|
|
... 'disp' : True, # non-default value.
|
|
... 'gtol' : 1e-5, # default value.
|
|
... 'norm' : np.inf, # default value.
|
|
... 'eps' : 1.4901161193847656e-08} # default value.
|
|
>>> res2 = optimize.minimize(f, x0, jac=gradf, args=args,
|
|
... method='CG', options=opts)
|
|
Optimization terminated successfully.
|
|
Current function value: 1.617021
|
|
Iterations: 4
|
|
Function evaluations: 8
|
|
Gradient evaluations: 8
|
|
>>> res2.x # minimum found
|
|
array([-1.80851064, -0.25531915])
|
|
|
|
"""
|
|
opts = {'gtol': gtol,
|
|
'norm': norm,
|
|
'eps': epsilon,
|
|
'disp': disp,
|
|
'maxiter': maxiter,
|
|
'return_all': retall}
|
|
|
|
callback = _wrap_callback(callback)
|
|
res = _minimize_cg(f, x0, args, fprime, callback=callback, c1=c1, c2=c2,
|
|
**opts)
|
|
|
|
if full_output:
|
|
retlist = res['x'], res['fun'], res['nfev'], res['njev'], res['status']
|
|
if retall:
|
|
retlist += (res['allvecs'], )
|
|
return retlist
|
|
else:
|
|
if retall:
|
|
return res['x'], res['allvecs']
|
|
else:
|
|
return res['x']
|
|
|
|
|
|
def _minimize_cg(fun, x0, args=(), jac=None, callback=None,
|
|
gtol=1e-5, norm=np.inf, eps=_epsilon, maxiter=None,
|
|
disp=False, return_all=False, finite_diff_rel_step=None,
|
|
c1=1e-4, c2=0.4, **unknown_options):
|
|
"""
|
|
Minimization of scalar function of one or more variables using the
|
|
conjugate gradient algorithm.
|
|
|
|
Options
|
|
-------
|
|
disp : bool
|
|
Set to True to print convergence messages.
|
|
maxiter : int
|
|
Maximum number of iterations to perform.
|
|
gtol : float
|
|
Gradient norm must be less than `gtol` before successful
|
|
termination.
|
|
norm : float
|
|
Order of norm (Inf is max, -Inf is min).
|
|
eps : float or ndarray
|
|
If `jac is None` the absolute step size used for numerical
|
|
approximation of the jacobian via forward differences.
|
|
return_all : bool, optional
|
|
Set to True to return a list of the best solution at each of the
|
|
iterations.
|
|
finite_diff_rel_step : None or array_like, optional
|
|
If `jac in ['2-point', '3-point', 'cs']` the relative step size to
|
|
use for numerical approximation of the jacobian. The absolute step
|
|
size is computed as ``h = rel_step * sign(x) * max(1, abs(x))``,
|
|
possibly adjusted to fit into the bounds. For ``jac='3-point'``
|
|
the sign of `h` is ignored. If None (default) then step is selected
|
|
automatically.
|
|
c1 : float, default: 1e-4
|
|
Parameter for Armijo condition rule.
|
|
c2 : float, default: 0.4
|
|
Parameter for curvature condition rule.
|
|
|
|
Notes
|
|
-----
|
|
Parameters `c1` and `c2` must satisfy ``0 < c1 < c2 < 1``.
|
|
"""
|
|
_check_unknown_options(unknown_options)
|
|
|
|
retall = return_all
|
|
|
|
x0 = asarray(x0).flatten()
|
|
if maxiter is None:
|
|
maxiter = len(x0) * 200
|
|
|
|
sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,
|
|
finite_diff_rel_step=finite_diff_rel_step)
|
|
|
|
f = sf.fun
|
|
myfprime = sf.grad
|
|
|
|
old_fval = f(x0)
|
|
gfk = myfprime(x0)
|
|
|
|
k = 0
|
|
xk = x0
|
|
# Sets the initial step guess to dx ~ 1
|
|
old_old_fval = old_fval + np.linalg.norm(gfk) / 2
|
|
|
|
if retall:
|
|
allvecs = [xk]
|
|
warnflag = 0
|
|
pk = -gfk
|
|
gnorm = vecnorm(gfk, ord=norm)
|
|
|
|
sigma_3 = 0.01
|
|
|
|
while (gnorm > gtol) and (k < maxiter):
|
|
deltak = np.dot(gfk, gfk)
|
|
|
|
cached_step = [None]
|
|
|
|
def polak_ribiere_powell_step(alpha, gfkp1=None):
|
|
xkp1 = xk + alpha * pk
|
|
if gfkp1 is None:
|
|
gfkp1 = myfprime(xkp1)
|
|
yk = gfkp1 - gfk
|
|
beta_k = max(0, np.dot(yk, gfkp1) / deltak)
|
|
pkp1 = -gfkp1 + beta_k * pk
|
|
gnorm = vecnorm(gfkp1, ord=norm)
|
|
return (alpha, xkp1, pkp1, gfkp1, gnorm)
|
|
|
|
def descent_condition(alpha, xkp1, fp1, gfkp1):
|
|
# Polak-Ribiere+ needs an explicit check of a sufficient
|
|
# descent condition, which is not guaranteed by strong Wolfe.
|
|
#
|
|
# See Gilbert & Nocedal, "Global convergence properties of
|
|
# conjugate gradient methods for optimization",
|
|
# SIAM J. Optimization 2, 21 (1992).
|
|
cached_step[:] = polak_ribiere_powell_step(alpha, gfkp1)
|
|
alpha, xk, pk, gfk, gnorm = cached_step
|
|
|
|
# Accept step if it leads to convergence.
|
|
if gnorm <= gtol:
|
|
return True
|
|
|
|
# Accept step if sufficient descent condition applies.
|
|
return np.dot(pk, gfk) <= -sigma_3 * np.dot(gfk, gfk)
|
|
|
|
try:
|
|
alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
|
|
_line_search_wolfe12(f, myfprime, xk, pk, gfk, old_fval,
|
|
old_old_fval, c1=c1, c2=c2, amin=1e-100,
|
|
amax=1e100, extra_condition=descent_condition)
|
|
except _LineSearchError:
|
|
# Line search failed to find a better solution.
|
|
warnflag = 2
|
|
break
|
|
|
|
# Reuse already computed results if possible
|
|
if alpha_k == cached_step[0]:
|
|
alpha_k, xk, pk, gfk, gnorm = cached_step
|
|
else:
|
|
alpha_k, xk, pk, gfk, gnorm = polak_ribiere_powell_step(alpha_k, gfkp1)
|
|
|
|
if retall:
|
|
allvecs.append(xk)
|
|
k += 1
|
|
intermediate_result = OptimizeResult(x=xk, fun=old_fval)
|
|
if _call_callback_maybe_halt(callback, intermediate_result):
|
|
break
|
|
|
|
fval = old_fval
|
|
if warnflag == 2:
|
|
msg = _status_message['pr_loss']
|
|
elif k >= maxiter:
|
|
warnflag = 1
|
|
msg = _status_message['maxiter']
|
|
elif np.isnan(gnorm) or np.isnan(fval) or np.isnan(xk).any():
|
|
warnflag = 3
|
|
msg = _status_message['nan']
|
|
else:
|
|
msg = _status_message['success']
|
|
|
|
if disp:
|
|
_print_success_message_or_warn(warnflag, msg)
|
|
print(" Current function value: %f" % fval)
|
|
print(" Iterations: %d" % k)
|
|
print(" Function evaluations: %d" % sf.nfev)
|
|
print(" Gradient evaluations: %d" % sf.ngev)
|
|
|
|
result = OptimizeResult(fun=fval, jac=gfk, nfev=sf.nfev,
|
|
njev=sf.ngev, status=warnflag,
|
|
success=(warnflag == 0), message=msg, x=xk,
|
|
nit=k)
|
|
if retall:
|
|
result['allvecs'] = allvecs
|
|
return result
|
|
|
|
|
|
def fmin_ncg(f, x0, fprime, fhess_p=None, fhess=None, args=(), avextol=1e-5,
|
|
epsilon=_epsilon, maxiter=None, full_output=0, disp=1, retall=0,
|
|
callback=None, c1=1e-4, c2=0.9):
|
|
"""
|
|
Unconstrained minimization of a function using the Newton-CG method.
|
|
|
|
Parameters
|
|
----------
|
|
f : callable ``f(x, *args)``
|
|
Objective function to be minimized.
|
|
x0 : ndarray
|
|
Initial guess.
|
|
fprime : callable ``f'(x, *args)``
|
|
Gradient of f.
|
|
fhess_p : callable ``fhess_p(x, p, *args)``, optional
|
|
Function which computes the Hessian of f times an
|
|
arbitrary vector, p.
|
|
fhess : callable ``fhess(x, *args)``, optional
|
|
Function to compute the Hessian matrix of f.
|
|
args : tuple, optional
|
|
Extra arguments passed to f, fprime, fhess_p, and fhess
|
|
(the same set of extra arguments is supplied to all of
|
|
these functions).
|
|
epsilon : float or ndarray, optional
|
|
If fhess is approximated, use this value for the step size.
|
|
callback : callable, optional
|
|
An optional user-supplied function which is called after
|
|
each iteration. Called as callback(xk), where xk is the
|
|
current parameter vector.
|
|
avextol : float, optional
|
|
Convergence is assumed when the average relative error in
|
|
the minimizer falls below this amount.
|
|
maxiter : int, optional
|
|
Maximum number of iterations to perform.
|
|
full_output : bool, optional
|
|
If True, return the optional outputs.
|
|
disp : bool, optional
|
|
If True, print convergence message.
|
|
retall : bool, optional
|
|
If True, return a list of results at each iteration.
|
|
c1 : float, default: 1e-4
|
|
Parameter for Armijo condition rule.
|
|
c2 : float, default: 0.9
|
|
Parameter for curvature condition rule
|
|
|
|
Returns
|
|
-------
|
|
xopt : ndarray
|
|
Parameters which minimize f, i.e., ``f(xopt) == fopt``.
|
|
fopt : float
|
|
Value of the function at xopt, i.e., ``fopt = f(xopt)``.
|
|
fcalls : int
|
|
Number of function calls made.
|
|
gcalls : int
|
|
Number of gradient calls made.
|
|
hcalls : int
|
|
Number of Hessian calls made.
|
|
warnflag : int
|
|
Warnings generated by the algorithm.
|
|
1 : Maximum number of iterations exceeded.
|
|
2 : Line search failure (precision loss).
|
|
3 : NaN result encountered.
|
|
allvecs : list
|
|
The result at each iteration, if retall is True (see below).
|
|
|
|
See also
|
|
--------
|
|
minimize: Interface to minimization algorithms for multivariate
|
|
functions. See the 'Newton-CG' `method` in particular.
|
|
|
|
Notes
|
|
-----
|
|
Only one of `fhess_p` or `fhess` need to be given. If `fhess`
|
|
is provided, then `fhess_p` will be ignored. If neither `fhess`
|
|
nor `fhess_p` is provided, then the hessian product will be
|
|
approximated using finite differences on `fprime`. `fhess_p`
|
|
must compute the hessian times an arbitrary vector. If it is not
|
|
given, finite-differences on `fprime` are used to compute
|
|
it.
|
|
|
|
Newton-CG methods are also called truncated Newton methods. This
|
|
function differs from scipy.optimize.fmin_tnc because
|
|
|
|
1. scipy.optimize.fmin_ncg is written purely in Python using NumPy
|
|
and scipy while scipy.optimize.fmin_tnc calls a C function.
|
|
2. scipy.optimize.fmin_ncg is only for unconstrained minimization
|
|
while scipy.optimize.fmin_tnc is for unconstrained minimization
|
|
or box constrained minimization. (Box constraints give
|
|
lower and upper bounds for each variable separately.)
|
|
|
|
Parameters `c1` and `c2` must satisfy ``0 < c1 < c2 < 1``.
|
|
|
|
References
|
|
----------
|
|
Wright & Nocedal, 'Numerical Optimization', 1999, p. 140.
|
|
|
|
"""
|
|
opts = {'xtol': avextol,
|
|
'eps': epsilon,
|
|
'maxiter': maxiter,
|
|
'disp': disp,
|
|
'return_all': retall}
|
|
|
|
callback = _wrap_callback(callback)
|
|
res = _minimize_newtoncg(f, x0, args, fprime, fhess, fhess_p,
|
|
callback=callback, c1=c1, c2=c2, **opts)
|
|
|
|
if full_output:
|
|
retlist = (res['x'], res['fun'], res['nfev'], res['njev'],
|
|
res['nhev'], res['status'])
|
|
if retall:
|
|
retlist += (res['allvecs'], )
|
|
return retlist
|
|
else:
|
|
if retall:
|
|
return res['x'], res['allvecs']
|
|
else:
|
|
return res['x']
|
|
|
|
|
|
def _minimize_newtoncg(fun, x0, args=(), jac=None, hess=None, hessp=None,
|
|
callback=None, xtol=1e-5, eps=_epsilon, maxiter=None,
|
|
disp=False, return_all=False, c1=1e-4, c2=0.9,
|
|
**unknown_options):
|
|
"""
|
|
Minimization of scalar function of one or more variables using the
|
|
Newton-CG algorithm.
|
|
|
|
Note that the `jac` parameter (Jacobian) is required.
|
|
|
|
Options
|
|
-------
|
|
disp : bool
|
|
Set to True to print convergence messages.
|
|
xtol : float
|
|
Average relative error in solution `xopt` acceptable for
|
|
convergence.
|
|
maxiter : int
|
|
Maximum number of iterations to perform.
|
|
eps : float or ndarray
|
|
If `hessp` is approximated, use this value for the step size.
|
|
return_all : bool, optional
|
|
Set to True to return a list of the best solution at each of the
|
|
iterations.
|
|
c1 : float, default: 1e-4
|
|
Parameter for Armijo condition rule.
|
|
c2 : float, default: 0.9
|
|
Parameter for curvature condition rule.
|
|
|
|
Notes
|
|
-----
|
|
Parameters `c1` and `c2` must satisfy ``0 < c1 < c2 < 1``.
|
|
"""
|
|
_check_unknown_options(unknown_options)
|
|
if jac is None:
|
|
raise ValueError('Jacobian is required for Newton-CG method')
|
|
fhess_p = hessp
|
|
fhess = hess
|
|
avextol = xtol
|
|
epsilon = eps
|
|
retall = return_all
|
|
|
|
x0 = asarray(x0).flatten()
|
|
# TODO: add hessp (callable or FD) to ScalarFunction?
|
|
sf = _prepare_scalar_function(
|
|
fun, x0, jac, args=args, epsilon=eps, hess=hess
|
|
)
|
|
f = sf.fun
|
|
fprime = sf.grad
|
|
_h = sf.hess(x0)
|
|
|
|
# Logic for hess/hessp
|
|
# - If a callable(hess) is provided, then use that
|
|
# - If hess is a FD_METHOD, or the output from hess(x) is a LinearOperator
|
|
# then create a hessp function using those.
|
|
# - If hess is None but you have callable(hessp) then use the hessp.
|
|
# - If hess and hessp are None then approximate hessp using the grad/jac.
|
|
|
|
if (hess in FD_METHODS or isinstance(_h, LinearOperator)):
|
|
fhess = None
|
|
|
|
def _hessp(x, p, *args):
|
|
return sf.hess(x).dot(p)
|
|
|
|
fhess_p = _hessp
|
|
|
|
def terminate(warnflag, msg):
|
|
if disp:
|
|
_print_success_message_or_warn(warnflag, msg)
|
|
print(" Current function value: %f" % old_fval)
|
|
print(" Iterations: %d" % k)
|
|
print(" Function evaluations: %d" % sf.nfev)
|
|
print(" Gradient evaluations: %d" % sf.ngev)
|
|
print(" Hessian evaluations: %d" % hcalls)
|
|
fval = old_fval
|
|
result = OptimizeResult(fun=fval, jac=gfk, nfev=sf.nfev,
|
|
njev=sf.ngev, nhev=hcalls, status=warnflag,
|
|
success=(warnflag == 0), message=msg, x=xk,
|
|
nit=k)
|
|
if retall:
|
|
result['allvecs'] = allvecs
|
|
return result
|
|
|
|
hcalls = 0
|
|
if maxiter is None:
|
|
maxiter = len(x0)*200
|
|
cg_maxiter = 20*len(x0)
|
|
|
|
xtol = len(x0) * avextol
|
|
# Make sure we enter the while loop.
|
|
update_l1norm = np.finfo(float).max
|
|
xk = np.copy(x0)
|
|
if retall:
|
|
allvecs = [xk]
|
|
k = 0
|
|
gfk = None
|
|
old_fval = f(x0)
|
|
old_old_fval = None
|
|
float64eps = np.finfo(np.float64).eps
|
|
while update_l1norm > xtol:
|
|
if k >= maxiter:
|
|
msg = "Warning: " + _status_message['maxiter']
|
|
return terminate(1, msg)
|
|
# Compute a search direction pk by applying the CG method to
|
|
# del2 f(xk) p = - grad f(xk) starting from 0.
|
|
b = -fprime(xk)
|
|
maggrad = np.linalg.norm(b, ord=1)
|
|
eta = min(0.5, math.sqrt(maggrad))
|
|
termcond = eta * maggrad
|
|
xsupi = zeros(len(x0), dtype=x0.dtype)
|
|
ri = -b
|
|
psupi = -ri
|
|
i = 0
|
|
dri0 = np.dot(ri, ri)
|
|
|
|
if fhess is not None: # you want to compute hessian once.
|
|
A = sf.hess(xk)
|
|
hcalls += 1
|
|
|
|
for k2 in range(cg_maxiter):
|
|
if np.add.reduce(np.abs(ri)) <= termcond:
|
|
break
|
|
if fhess is None:
|
|
if fhess_p is None:
|
|
Ap = approx_fhess_p(xk, psupi, fprime, epsilon)
|
|
else:
|
|
Ap = fhess_p(xk, psupi, *args)
|
|
hcalls += 1
|
|
else:
|
|
# hess was supplied as a callable or hessian update strategy, so
|
|
# A is a dense numpy array or sparse matrix
|
|
Ap = A.dot(psupi)
|
|
# check curvature
|
|
Ap = asarray(Ap).squeeze() # get rid of matrices...
|
|
curv = np.dot(psupi, Ap)
|
|
if 0 <= curv <= 3 * float64eps:
|
|
break
|
|
elif curv < 0:
|
|
if (i > 0):
|
|
break
|
|
else:
|
|
# fall back to steepest descent direction
|
|
xsupi = dri0 / (-curv) * b
|
|
break
|
|
alphai = dri0 / curv
|
|
xsupi += alphai * psupi
|
|
ri += alphai * Ap
|
|
dri1 = np.dot(ri, ri)
|
|
betai = dri1 / dri0
|
|
psupi = -ri + betai * psupi
|
|
i += 1
|
|
dri0 = dri1 # update np.dot(ri,ri) for next time.
|
|
else:
|
|
# curvature keeps increasing, bail out
|
|
msg = ("Warning: CG iterations didn't converge. The Hessian is not "
|
|
"positive definite.")
|
|
return terminate(3, msg)
|
|
|
|
pk = xsupi # search direction is solution to system.
|
|
gfk = -b # gradient at xk
|
|
|
|
try:
|
|
alphak, fc, gc, old_fval, old_old_fval, gfkp1 = \
|
|
_line_search_wolfe12(f, fprime, xk, pk, gfk,
|
|
old_fval, old_old_fval, c1=c1, c2=c2)
|
|
except _LineSearchError:
|
|
# Line search failed to find a better solution.
|
|
msg = "Warning: " + _status_message['pr_loss']
|
|
return terminate(2, msg)
|
|
|
|
update = alphak * pk
|
|
xk += update # upcast if necessary
|
|
if retall:
|
|
allvecs.append(xk)
|
|
k += 1
|
|
intermediate_result = OptimizeResult(x=xk, fun=old_fval)
|
|
if _call_callback_maybe_halt(callback, intermediate_result):
|
|
return terminate(5, "")
|
|
update_l1norm = np.linalg.norm(update, ord=1)
|
|
|
|
else:
|
|
if np.isnan(old_fval) or np.isnan(update_l1norm):
|
|
return terminate(3, _status_message['nan'])
|
|
|
|
msg = _status_message['success']
|
|
return terminate(0, msg)
|
|
|
|
|
|
def fminbound(func, x1, x2, args=(), xtol=1e-5, maxfun=500,
|
|
full_output=0, disp=1):
|
|
"""Bounded minimization for scalar functions.
|
|
|
|
Parameters
|
|
----------
|
|
func : callable f(x,*args)
|
|
Objective function to be minimized (must accept and return scalars).
|
|
x1, x2 : float or array scalar
|
|
Finite optimization bounds.
|
|
args : tuple, optional
|
|
Extra arguments passed to function.
|
|
xtol : float, optional
|
|
The convergence tolerance.
|
|
maxfun : int, optional
|
|
Maximum number of function evaluations allowed.
|
|
full_output : bool, optional
|
|
If True, return optional outputs.
|
|
disp : int, optional
|
|
If non-zero, print messages.
|
|
0 : no message printing.
|
|
1 : non-convergence notification messages only.
|
|
2 : print a message on convergence too.
|
|
3 : print iteration results.
|
|
|
|
|
|
Returns
|
|
-------
|
|
xopt : ndarray
|
|
Parameters (over given interval) which minimize the
|
|
objective function.
|
|
fval : number
|
|
(Optional output) The function value evaluated at the minimizer.
|
|
ierr : int
|
|
(Optional output) An error flag (0 if converged, 1 if maximum number of
|
|
function calls reached).
|
|
numfunc : int
|
|
(Optional output) The number of function calls made.
|
|
|
|
See also
|
|
--------
|
|
minimize_scalar: Interface to minimization algorithms for scalar
|
|
univariate functions. See the 'Bounded' `method` in particular.
|
|
|
|
Notes
|
|
-----
|
|
Finds a local minimizer of the scalar function `func` in the
|
|
interval x1 < xopt < x2 using Brent's method. (See `brent`
|
|
for auto-bracketing.)
|
|
|
|
References
|
|
----------
|
|
.. [1] Forsythe, G.E., M. A. Malcolm, and C. B. Moler. "Computer Methods
|
|
for Mathematical Computations." Prentice-Hall Series in Automatic
|
|
Computation 259 (1977).
|
|
.. [2] Brent, Richard P. Algorithms for Minimization Without Derivatives.
|
|
Courier Corporation, 2013.
|
|
|
|
Examples
|
|
--------
|
|
`fminbound` finds the minimizer of the function in the given range.
|
|
The following examples illustrate this.
|
|
|
|
>>> from scipy import optimize
|
|
>>> def f(x):
|
|
... return (x-1)**2
|
|
>>> minimizer = optimize.fminbound(f, -4, 4)
|
|
>>> minimizer
|
|
1.0
|
|
>>> minimum = f(minimizer)
|
|
>>> minimum
|
|
0.0
|
|
>>> res = optimize.fminbound(f, 3, 4, full_output=True)
|
|
>>> minimizer, fval, ierr, numfunc = res
|
|
>>> minimizer
|
|
3.000005960860986
|
|
>>> minimum = f(minimizer)
|
|
>>> minimum, fval
|
|
(4.000023843479476, 4.000023843479476)
|
|
"""
|
|
options = {'xatol': xtol,
|
|
'maxiter': maxfun,
|
|
'disp': disp}
|
|
|
|
res = _minimize_scalar_bounded(func, (x1, x2), args, **options)
|
|
if full_output:
|
|
return res['x'], res['fun'], res['status'], res['nfev']
|
|
else:
|
|
return res['x']
|
|
|
|
|
|
def _minimize_scalar_bounded(func, bounds, args=(),
|
|
xatol=1e-5, maxiter=500, disp=0,
|
|
**unknown_options):
|
|
"""
|
|
Options
|
|
-------
|
|
maxiter : int
|
|
Maximum number of iterations to perform.
|
|
disp: int, optional
|
|
If non-zero, print messages.
|
|
0 : no message printing.
|
|
1 : non-convergence notification messages only.
|
|
2 : print a message on convergence too.
|
|
3 : print iteration results.
|
|
xatol : float
|
|
Absolute error in solution `xopt` acceptable for convergence.
|
|
|
|
"""
|
|
_check_unknown_options(unknown_options)
|
|
maxfun = maxiter
|
|
# Test bounds are of correct form
|
|
if len(bounds) != 2:
|
|
raise ValueError('bounds must have two elements.')
|
|
x1, x2 = bounds
|
|
|
|
if not (is_finite_scalar(x1) and is_finite_scalar(x2)):
|
|
raise ValueError("Optimization bounds must be finite scalars.")
|
|
|
|
if x1 > x2:
|
|
raise ValueError("The lower bound exceeds the upper bound.")
|
|
|
|
flag = 0
|
|
header = ' Func-count x f(x) Procedure'
|
|
step = ' initial'
|
|
|
|
sqrt_eps = sqrt(2.2e-16)
|
|
golden_mean = 0.5 * (3.0 - sqrt(5.0))
|
|
a, b = x1, x2
|
|
fulc = a + golden_mean * (b - a)
|
|
nfc, xf = fulc, fulc
|
|
rat = e = 0.0
|
|
x = xf
|
|
fx = func(x, *args)
|
|
num = 1
|
|
fmin_data = (1, xf, fx)
|
|
fu = np.inf
|
|
|
|
ffulc = fnfc = fx
|
|
xm = 0.5 * (a + b)
|
|
tol1 = sqrt_eps * np.abs(xf) + xatol / 3.0
|
|
tol2 = 2.0 * tol1
|
|
|
|
if disp > 2:
|
|
print(" ")
|
|
print(header)
|
|
print("%5.0f %12.6g %12.6g %s" % (fmin_data + (step,)))
|
|
|
|
while (np.abs(xf - xm) > (tol2 - 0.5 * (b - a))):
|
|
golden = 1
|
|
# Check for parabolic fit
|
|
if np.abs(e) > tol1:
|
|
golden = 0
|
|
r = (xf - nfc) * (fx - ffulc)
|
|
q = (xf - fulc) * (fx - fnfc)
|
|
p = (xf - fulc) * q - (xf - nfc) * r
|
|
q = 2.0 * (q - r)
|
|
if q > 0.0:
|
|
p = -p
|
|
q = np.abs(q)
|
|
r = e
|
|
e = rat
|
|
|
|
# Check for acceptability of parabola
|
|
if ((np.abs(p) < np.abs(0.5*q*r)) and (p > q*(a - xf)) and
|
|
(p < q * (b - xf))):
|
|
rat = (p + 0.0) / q
|
|
x = xf + rat
|
|
step = ' parabolic'
|
|
|
|
if ((x - a) < tol2) or ((b - x) < tol2):
|
|
si = np.sign(xm - xf) + ((xm - xf) == 0)
|
|
rat = tol1 * si
|
|
else: # do a golden-section step
|
|
golden = 1
|
|
|
|
if golden: # do a golden-section step
|
|
if xf >= xm:
|
|
e = a - xf
|
|
else:
|
|
e = b - xf
|
|
rat = golden_mean*e
|
|
step = ' golden'
|
|
|
|
si = np.sign(rat) + (rat == 0)
|
|
x = xf + si * np.maximum(np.abs(rat), tol1)
|
|
fu = func(x, *args)
|
|
num += 1
|
|
fmin_data = (num, x, fu)
|
|
if disp > 2:
|
|
print("%5.0f %12.6g %12.6g %s" % (fmin_data + (step,)))
|
|
|
|
if fu <= fx:
|
|
if x >= xf:
|
|
a = xf
|
|
else:
|
|
b = xf
|
|
fulc, ffulc = nfc, fnfc
|
|
nfc, fnfc = xf, fx
|
|
xf, fx = x, fu
|
|
else:
|
|
if x < xf:
|
|
a = x
|
|
else:
|
|
b = x
|
|
if (fu <= fnfc) or (nfc == xf):
|
|
fulc, ffulc = nfc, fnfc
|
|
nfc, fnfc = x, fu
|
|
elif (fu <= ffulc) or (fulc == xf) or (fulc == nfc):
|
|
fulc, ffulc = x, fu
|
|
|
|
xm = 0.5 * (a + b)
|
|
tol1 = sqrt_eps * np.abs(xf) + xatol / 3.0
|
|
tol2 = 2.0 * tol1
|
|
|
|
if num >= maxfun:
|
|
flag = 1
|
|
break
|
|
|
|
if np.isnan(xf) or np.isnan(fx) or np.isnan(fu):
|
|
flag = 2
|
|
|
|
fval = fx
|
|
if disp > 0:
|
|
_endprint(x, flag, fval, maxfun, xatol, disp)
|
|
|
|
result = OptimizeResult(fun=fval, status=flag, success=(flag == 0),
|
|
message={0: 'Solution found.',
|
|
1: 'Maximum number of function calls '
|
|
'reached.',
|
|
2: _status_message['nan']}.get(flag, ''),
|
|
x=xf, nfev=num, nit=num)
|
|
|
|
return result
|
|
|
|
|
|
class Brent:
|
|
#need to rethink design of __init__
|
|
def __init__(self, func, args=(), tol=1.48e-8, maxiter=500,
|
|
full_output=0, disp=0):
|
|
self.func = func
|
|
self.args = args
|
|
self.tol = tol
|
|
self.maxiter = maxiter
|
|
self._mintol = 1.0e-11
|
|
self._cg = 0.3819660
|
|
self.xmin = None
|
|
self.fval = None
|
|
self.iter = 0
|
|
self.funcalls = 0
|
|
self.disp = disp
|
|
|
|
# need to rethink design of set_bracket (new options, etc.)
|
|
def set_bracket(self, brack=None):
|
|
self.brack = brack
|
|
|
|
def get_bracket_info(self):
|
|
#set up
|
|
func = self.func
|
|
args = self.args
|
|
brack = self.brack
|
|
### BEGIN core bracket_info code ###
|
|
### carefully DOCUMENT any CHANGES in core ##
|
|
if brack is None:
|
|
xa, xb, xc, fa, fb, fc, funcalls = bracket(func, args=args)
|
|
elif len(brack) == 2:
|
|
xa, xb, xc, fa, fb, fc, funcalls = bracket(func, xa=brack[0],
|
|
xb=brack[1], args=args)
|
|
elif len(brack) == 3:
|
|
xa, xb, xc = brack
|
|
if (xa > xc): # swap so xa < xc can be assumed
|
|
xc, xa = xa, xc
|
|
if not ((xa < xb) and (xb < xc)):
|
|
raise ValueError(
|
|
"Bracketing values (xa, xb, xc) do not"
|
|
" fulfill this requirement: (xa < xb) and (xb < xc)"
|
|
)
|
|
fa = func(*((xa,) + args))
|
|
fb = func(*((xb,) + args))
|
|
fc = func(*((xc,) + args))
|
|
if not ((fb < fa) and (fb < fc)):
|
|
raise ValueError(
|
|
"Bracketing values (xa, xb, xc) do not fulfill"
|
|
" this requirement: (f(xb) < f(xa)) and (f(xb) < f(xc))"
|
|
)
|
|
|
|
funcalls = 3
|
|
else:
|
|
raise ValueError("Bracketing interval must be "
|
|
"length 2 or 3 sequence.")
|
|
### END core bracket_info code ###
|
|
|
|
return xa, xb, xc, fa, fb, fc, funcalls
|
|
|
|
def optimize(self):
|
|
# set up for optimization
|
|
func = self.func
|
|
xa, xb, xc, fa, fb, fc, funcalls = self.get_bracket_info()
|
|
_mintol = self._mintol
|
|
_cg = self._cg
|
|
#################################
|
|
#BEGIN CORE ALGORITHM
|
|
#################################
|
|
x = w = v = xb
|
|
fw = fv = fx = fb
|
|
if (xa < xc):
|
|
a = xa
|
|
b = xc
|
|
else:
|
|
a = xc
|
|
b = xa
|
|
deltax = 0.0
|
|
iter = 0
|
|
|
|
if self.disp > 2:
|
|
print(" ")
|
|
print(f"{'Func-count':^12} {'x':^12} {'f(x)': ^12}")
|
|
print(f"{funcalls:^12g} {x:^12.6g} {fx:^12.6g}")
|
|
|
|
while (iter < self.maxiter):
|
|
tol1 = self.tol * np.abs(x) + _mintol
|
|
tol2 = 2.0 * tol1
|
|
xmid = 0.5 * (a + b)
|
|
# check for convergence
|
|
if np.abs(x - xmid) < (tol2 - 0.5 * (b - a)):
|
|
break
|
|
# XXX In the first iteration, rat is only bound in the true case
|
|
# of this conditional. This used to cause an UnboundLocalError
|
|
# (gh-4140). It should be set before the if (but to what?).
|
|
if (np.abs(deltax) <= tol1):
|
|
if (x >= xmid):
|
|
deltax = a - x # do a golden section step
|
|
else:
|
|
deltax = b - x
|
|
rat = _cg * deltax
|
|
else: # do a parabolic step
|
|
tmp1 = (x - w) * (fx - fv)
|
|
tmp2 = (x - v) * (fx - fw)
|
|
p = (x - v) * tmp2 - (x - w) * tmp1
|
|
tmp2 = 2.0 * (tmp2 - tmp1)
|
|
if (tmp2 > 0.0):
|
|
p = -p
|
|
tmp2 = np.abs(tmp2)
|
|
dx_temp = deltax
|
|
deltax = rat
|
|
# check parabolic fit
|
|
if ((p > tmp2 * (a - x)) and (p < tmp2 * (b - x)) and
|
|
(np.abs(p) < np.abs(0.5 * tmp2 * dx_temp))):
|
|
rat = p * 1.0 / tmp2 # if parabolic step is useful.
|
|
u = x + rat
|
|
if ((u - a) < tol2 or (b - u) < tol2):
|
|
if xmid - x >= 0:
|
|
rat = tol1
|
|
else:
|
|
rat = -tol1
|
|
else:
|
|
if (x >= xmid):
|
|
deltax = a - x # if it's not do a golden section step
|
|
else:
|
|
deltax = b - x
|
|
rat = _cg * deltax
|
|
|
|
if (np.abs(rat) < tol1): # update by at least tol1
|
|
if rat >= 0:
|
|
u = x + tol1
|
|
else:
|
|
u = x - tol1
|
|
else:
|
|
u = x + rat
|
|
fu = func(*((u,) + self.args)) # calculate new output value
|
|
funcalls += 1
|
|
|
|
if (fu > fx): # if it's bigger than current
|
|
if (u < x):
|
|
a = u
|
|
else:
|
|
b = u
|
|
if (fu <= fw) or (w == x):
|
|
v = w
|
|
w = u
|
|
fv = fw
|
|
fw = fu
|
|
elif (fu <= fv) or (v == x) or (v == w):
|
|
v = u
|
|
fv = fu
|
|
else:
|
|
if (u >= x):
|
|
a = x
|
|
else:
|
|
b = x
|
|
v = w
|
|
w = x
|
|
x = u
|
|
fv = fw
|
|
fw = fx
|
|
fx = fu
|
|
|
|
if self.disp > 2:
|
|
print(f"{funcalls:^12g} {x:^12.6g} {fx:^12.6g}")
|
|
|
|
iter += 1
|
|
#################################
|
|
#END CORE ALGORITHM
|
|
#################################
|
|
|
|
self.xmin = x
|
|
self.fval = fx
|
|
self.iter = iter
|
|
self.funcalls = funcalls
|
|
|
|
def get_result(self, full_output=False):
|
|
if full_output:
|
|
return self.xmin, self.fval, self.iter, self.funcalls
|
|
else:
|
|
return self.xmin
|
|
|
|
|
|
def brent(func, args=(), brack=None, tol=1.48e-8, full_output=0, maxiter=500):
|
|
"""
|
|
Given a function of one variable and a possible bracket, return
|
|
a local minimizer of the function isolated to a fractional precision
|
|
of tol.
|
|
|
|
Parameters
|
|
----------
|
|
func : callable f(x,*args)
|
|
Objective function.
|
|
args : tuple, optional
|
|
Additional arguments (if present).
|
|
brack : tuple, optional
|
|
Either a triple ``(xa, xb, xc)`` satisfying ``xa < xb < xc`` and
|
|
``func(xb) < func(xa) and func(xb) < func(xc)``, or a pair
|
|
``(xa, xb)`` to be used as initial points for a downhill bracket search
|
|
(see `scipy.optimize.bracket`).
|
|
The minimizer ``x`` will not necessarily satisfy ``xa <= x <= xb``.
|
|
tol : float, optional
|
|
Relative error in solution `xopt` acceptable for convergence.
|
|
full_output : bool, optional
|
|
If True, return all output args (xmin, fval, iter,
|
|
funcalls).
|
|
maxiter : int, optional
|
|
Maximum number of iterations in solution.
|
|
|
|
Returns
|
|
-------
|
|
xmin : ndarray
|
|
Optimum point.
|
|
fval : float
|
|
(Optional output) Optimum function value.
|
|
iter : int
|
|
(Optional output) Number of iterations.
|
|
funcalls : int
|
|
(Optional output) Number of objective function evaluations made.
|
|
|
|
See also
|
|
--------
|
|
minimize_scalar: Interface to minimization algorithms for scalar
|
|
univariate functions. See the 'Brent' `method` in particular.
|
|
|
|
Notes
|
|
-----
|
|
Uses inverse parabolic interpolation when possible to speed up
|
|
convergence of golden section method.
|
|
|
|
Does not ensure that the minimum lies in the range specified by
|
|
`brack`. See `scipy.optimize.fminbound`.
|
|
|
|
Examples
|
|
--------
|
|
We illustrate the behaviour of the function when `brack` is of
|
|
size 2 and 3 respectively. In the case where `brack` is of the
|
|
form ``(xa, xb)``, we can see for the given values, the output does
|
|
not necessarily lie in the range ``(xa, xb)``.
|
|
|
|
>>> def f(x):
|
|
... return (x-1)**2
|
|
|
|
>>> from scipy import optimize
|
|
|
|
>>> minimizer = optimize.brent(f, brack=(1, 2))
|
|
>>> minimizer
|
|
1
|
|
>>> res = optimize.brent(f, brack=(-1, 0.5, 2), full_output=True)
|
|
>>> xmin, fval, iter, funcalls = res
|
|
>>> f(xmin), fval
|
|
(0.0, 0.0)
|
|
|
|
"""
|
|
options = {'xtol': tol,
|
|
'maxiter': maxiter}
|
|
res = _minimize_scalar_brent(func, brack, args, **options)
|
|
if full_output:
|
|
return res['x'], res['fun'], res['nit'], res['nfev']
|
|
else:
|
|
return res['x']
|
|
|
|
|
|
def _minimize_scalar_brent(func, brack=None, args=(), xtol=1.48e-8,
|
|
maxiter=500, disp=0,
|
|
**unknown_options):
|
|
"""
|
|
Options
|
|
-------
|
|
maxiter : int
|
|
Maximum number of iterations to perform.
|
|
xtol : float
|
|
Relative error in solution `xopt` acceptable for convergence.
|
|
disp: int, optional
|
|
If non-zero, print messages.
|
|
0 : no message printing.
|
|
1 : non-convergence notification messages only.
|
|
2 : print a message on convergence too.
|
|
3 : print iteration results.
|
|
Notes
|
|
-----
|
|
Uses inverse parabolic interpolation when possible to speed up
|
|
convergence of golden section method.
|
|
|
|
"""
|
|
_check_unknown_options(unknown_options)
|
|
tol = xtol
|
|
if tol < 0:
|
|
raise ValueError('tolerance should be >= 0, got %r' % tol)
|
|
|
|
brent = Brent(func=func, args=args, tol=tol,
|
|
full_output=True, maxiter=maxiter, disp=disp)
|
|
brent.set_bracket(brack)
|
|
brent.optimize()
|
|
x, fval, nit, nfev = brent.get_result(full_output=True)
|
|
|
|
success = nit < maxiter and not (np.isnan(x) or np.isnan(fval))
|
|
|
|
if success:
|
|
message = ("\nOptimization terminated successfully;\n"
|
|
"The returned value satisfies the termination criteria\n"
|
|
f"(using xtol = {xtol} )")
|
|
else:
|
|
if nit >= maxiter:
|
|
message = "\nMaximum number of iterations exceeded"
|
|
if np.isnan(x) or np.isnan(fval):
|
|
message = f"{_status_message['nan']}"
|
|
|
|
if disp:
|
|
_print_success_message_or_warn(not success, message)
|
|
|
|
return OptimizeResult(fun=fval, x=x, nit=nit, nfev=nfev,
|
|
success=success, message=message)
|
|
|
|
|
|
def golden(func, args=(), brack=None, tol=_epsilon,
|
|
full_output=0, maxiter=5000):
|
|
"""
|
|
Return the minimizer of a function of one variable using the golden section
|
|
method.
|
|
|
|
Given a function of one variable and a possible bracketing interval,
|
|
return a minimizer of the function isolated to a fractional precision of
|
|
tol.
|
|
|
|
Parameters
|
|
----------
|
|
func : callable func(x,*args)
|
|
Objective function to minimize.
|
|
args : tuple, optional
|
|
Additional arguments (if present), passed to func.
|
|
brack : tuple, optional
|
|
Either a triple ``(xa, xb, xc)`` where ``xa < xb < xc`` and
|
|
``func(xb) < func(xa) and func(xb) < func(xc)``, or a pair (xa, xb)
|
|
to be used as initial points for a downhill bracket search (see
|
|
`scipy.optimize.bracket`).
|
|
The minimizer ``x`` will not necessarily satisfy ``xa <= x <= xb``.
|
|
tol : float, optional
|
|
x tolerance stop criterion
|
|
full_output : bool, optional
|
|
If True, return optional outputs.
|
|
maxiter : int
|
|
Maximum number of iterations to perform.
|
|
|
|
Returns
|
|
-------
|
|
xmin : ndarray
|
|
Optimum point.
|
|
fval : float
|
|
(Optional output) Optimum function value.
|
|
funcalls : int
|
|
(Optional output) Number of objective function evaluations made.
|
|
|
|
See also
|
|
--------
|
|
minimize_scalar: Interface to minimization algorithms for scalar
|
|
univariate functions. See the 'Golden' `method` in particular.
|
|
|
|
Notes
|
|
-----
|
|
Uses analog of bisection method to decrease the bracketed
|
|
interval.
|
|
|
|
Examples
|
|
--------
|
|
We illustrate the behaviour of the function when `brack` is of
|
|
size 2 and 3, respectively. In the case where `brack` is of the
|
|
form (xa,xb), we can see for the given values, the output need
|
|
not necessarily lie in the range ``(xa, xb)``.
|
|
|
|
>>> def f(x):
|
|
... return (x-1)**2
|
|
|
|
>>> from scipy import optimize
|
|
|
|
>>> minimizer = optimize.golden(f, brack=(1, 2))
|
|
>>> minimizer
|
|
1
|
|
>>> res = optimize.golden(f, brack=(-1, 0.5, 2), full_output=True)
|
|
>>> xmin, fval, funcalls = res
|
|
>>> f(xmin), fval
|
|
(9.925165290385052e-18, 9.925165290385052e-18)
|
|
|
|
"""
|
|
options = {'xtol': tol, 'maxiter': maxiter}
|
|
res = _minimize_scalar_golden(func, brack, args, **options)
|
|
if full_output:
|
|
return res['x'], res['fun'], res['nfev']
|
|
else:
|
|
return res['x']
|
|
|
|
|
|
def _minimize_scalar_golden(func, brack=None, args=(),
|
|
xtol=_epsilon, maxiter=5000, disp=0,
|
|
**unknown_options):
|
|
"""
|
|
Options
|
|
-------
|
|
xtol : float
|
|
Relative error in solution `xopt` acceptable for convergence.
|
|
maxiter : int
|
|
Maximum number of iterations to perform.
|
|
disp: int, optional
|
|
If non-zero, print messages.
|
|
0 : no message printing.
|
|
1 : non-convergence notification messages only.
|
|
2 : print a message on convergence too.
|
|
3 : print iteration results.
|
|
"""
|
|
_check_unknown_options(unknown_options)
|
|
tol = xtol
|
|
if brack is None:
|
|
xa, xb, xc, fa, fb, fc, funcalls = bracket(func, args=args)
|
|
elif len(brack) == 2:
|
|
xa, xb, xc, fa, fb, fc, funcalls = bracket(func, xa=brack[0],
|
|
xb=brack[1], args=args)
|
|
elif len(brack) == 3:
|
|
xa, xb, xc = brack
|
|
if (xa > xc): # swap so xa < xc can be assumed
|
|
xc, xa = xa, xc
|
|
if not ((xa < xb) and (xb < xc)):
|
|
raise ValueError(
|
|
"Bracketing values (xa, xb, xc) do not"
|
|
" fulfill this requirement: (xa < xb) and (xb < xc)"
|
|
)
|
|
fa = func(*((xa,) + args))
|
|
fb = func(*((xb,) + args))
|
|
fc = func(*((xc,) + args))
|
|
if not ((fb < fa) and (fb < fc)):
|
|
raise ValueError(
|
|
"Bracketing values (xa, xb, xc) do not fulfill"
|
|
" this requirement: (f(xb) < f(xa)) and (f(xb) < f(xc))"
|
|
)
|
|
funcalls = 3
|
|
else:
|
|
raise ValueError("Bracketing interval must be length 2 or 3 sequence.")
|
|
|
|
_gR = 0.61803399 # golden ratio conjugate: 2.0/(1.0+sqrt(5.0))
|
|
_gC = 1.0 - _gR
|
|
x3 = xc
|
|
x0 = xa
|
|
if (np.abs(xc - xb) > np.abs(xb - xa)):
|
|
x1 = xb
|
|
x2 = xb + _gC * (xc - xb)
|
|
else:
|
|
x2 = xb
|
|
x1 = xb - _gC * (xb - xa)
|
|
f1 = func(*((x1,) + args))
|
|
f2 = func(*((x2,) + args))
|
|
funcalls += 2
|
|
nit = 0
|
|
|
|
if disp > 2:
|
|
print(" ")
|
|
print(f"{'Func-count':^12} {'x':^12} {'f(x)': ^12}")
|
|
|
|
for i in range(maxiter):
|
|
if np.abs(x3 - x0) <= tol * (np.abs(x1) + np.abs(x2)):
|
|
break
|
|
if (f2 < f1):
|
|
x0 = x1
|
|
x1 = x2
|
|
x2 = _gR * x1 + _gC * x3
|
|
f1 = f2
|
|
f2 = func(*((x2,) + args))
|
|
else:
|
|
x3 = x2
|
|
x2 = x1
|
|
x1 = _gR * x2 + _gC * x0
|
|
f2 = f1
|
|
f1 = func(*((x1,) + args))
|
|
funcalls += 1
|
|
if disp > 2:
|
|
if (f1 < f2):
|
|
xmin, fval = x1, f1
|
|
else:
|
|
xmin, fval = x2, f2
|
|
print(f"{funcalls:^12g} {xmin:^12.6g} {fval:^12.6g}")
|
|
|
|
nit += 1
|
|
# end of iteration loop
|
|
|
|
if (f1 < f2):
|
|
xmin = x1
|
|
fval = f1
|
|
else:
|
|
xmin = x2
|
|
fval = f2
|
|
|
|
success = nit < maxiter and not (np.isnan(fval) or np.isnan(xmin))
|
|
|
|
if success:
|
|
message = ("\nOptimization terminated successfully;\n"
|
|
"The returned value satisfies the termination criteria\n"
|
|
f"(using xtol = {xtol} )")
|
|
else:
|
|
if nit >= maxiter:
|
|
message = "\nMaximum number of iterations exceeded"
|
|
if np.isnan(xmin) or np.isnan(fval):
|
|
message = f"{_status_message['nan']}"
|
|
|
|
if disp:
|
|
_print_success_message_or_warn(not success, message)
|
|
|
|
return OptimizeResult(fun=fval, nfev=funcalls, x=xmin, nit=nit,
|
|
success=success, message=message)
|
|
|
|
|
|
def bracket(func, xa=0.0, xb=1.0, args=(), grow_limit=110.0, maxiter=1000):
|
|
"""
|
|
Bracket the minimum of a function.
|
|
|
|
Given a function and distinct initial points, search in the
|
|
downhill direction (as defined by the initial points) and return
|
|
three points that bracket the minimum of the function.
|
|
|
|
Parameters
|
|
----------
|
|
func : callable f(x,*args)
|
|
Objective function to minimize.
|
|
xa, xb : float, optional
|
|
Initial points. Defaults `xa` to 0.0, and `xb` to 1.0.
|
|
A local minimum need not be contained within this interval.
|
|
args : tuple, optional
|
|
Additional arguments (if present), passed to `func`.
|
|
grow_limit : float, optional
|
|
Maximum grow limit. Defaults to 110.0
|
|
maxiter : int, optional
|
|
Maximum number of iterations to perform. Defaults to 1000.
|
|
|
|
Returns
|
|
-------
|
|
xa, xb, xc : float
|
|
Final points of the bracket.
|
|
fa, fb, fc : float
|
|
Objective function values at the bracket points.
|
|
funcalls : int
|
|
Number of function evaluations made.
|
|
|
|
Raises
|
|
------
|
|
BracketError
|
|
If no valid bracket is found before the algorithm terminates.
|
|
See notes for conditions of a valid bracket.
|
|
|
|
Notes
|
|
-----
|
|
The algorithm attempts to find three strictly ordered points (i.e.
|
|
:math:`x_a < x_b < x_c` or :math:`x_c < x_b < x_a`) satisfying
|
|
:math:`f(x_b) ≤ f(x_a)` and :math:`f(x_b) ≤ f(x_c)`, where one of the
|
|
inequalities must be satistfied strictly and all :math:`x_i` must be
|
|
finite.
|
|
|
|
Examples
|
|
--------
|
|
This function can find a downward convex region of a function:
|
|
|
|
>>> import numpy as np
|
|
>>> import matplotlib.pyplot as plt
|
|
>>> from scipy.optimize import bracket
|
|
>>> def f(x):
|
|
... return 10*x**2 + 3*x + 5
|
|
>>> x = np.linspace(-2, 2)
|
|
>>> y = f(x)
|
|
>>> init_xa, init_xb = 0.1, 1
|
|
>>> xa, xb, xc, fa, fb, fc, funcalls = bracket(f, xa=init_xa, xb=init_xb)
|
|
>>> plt.axvline(x=init_xa, color="k", linestyle="--")
|
|
>>> plt.axvline(x=init_xb, color="k", linestyle="--")
|
|
>>> plt.plot(x, y, "-k")
|
|
>>> plt.plot(xa, fa, "bx")
|
|
>>> plt.plot(xb, fb, "rx")
|
|
>>> plt.plot(xc, fc, "bx")
|
|
>>> plt.show()
|
|
|
|
Note that both initial points were to the right of the minimum, and the
|
|
third point was found in the "downhill" direction: the direction
|
|
in which the function appeared to be decreasing (to the left).
|
|
The final points are strictly ordered, and the function value
|
|
at the middle point is less than the function values at the endpoints;
|
|
it follows that a minimum must lie within the bracket.
|
|
|
|
"""
|
|
_gold = 1.618034 # golden ratio: (1.0+sqrt(5.0))/2.0
|
|
_verysmall_num = 1e-21
|
|
# convert to numpy floats if not already
|
|
xa, xb = np.asarray([xa, xb])
|
|
fa = func(*(xa,) + args)
|
|
fb = func(*(xb,) + args)
|
|
if (fa < fb): # Switch so fa > fb
|
|
xa, xb = xb, xa
|
|
fa, fb = fb, fa
|
|
xc = xb + _gold * (xb - xa)
|
|
fc = func(*((xc,) + args))
|
|
funcalls = 3
|
|
iter = 0
|
|
while (fc < fb):
|
|
tmp1 = (xb - xa) * (fb - fc)
|
|
tmp2 = (xb - xc) * (fb - fa)
|
|
val = tmp2 - tmp1
|
|
if np.abs(val) < _verysmall_num:
|
|
denom = 2.0 * _verysmall_num
|
|
else:
|
|
denom = 2.0 * val
|
|
w = xb - ((xb - xc) * tmp2 - (xb - xa) * tmp1) / denom
|
|
wlim = xb + grow_limit * (xc - xb)
|
|
msg = ("No valid bracket was found before the iteration limit was "
|
|
"reached. Consider trying different initial points or "
|
|
"increasing `maxiter`.")
|
|
if iter > maxiter:
|
|
raise RuntimeError(msg)
|
|
iter += 1
|
|
if (w - xc) * (xb - w) > 0.0:
|
|
fw = func(*((w,) + args))
|
|
funcalls += 1
|
|
if (fw < fc):
|
|
xa = xb
|
|
xb = w
|
|
fa = fb
|
|
fb = fw
|
|
break
|
|
elif (fw > fb):
|
|
xc = w
|
|
fc = fw
|
|
break
|
|
w = xc + _gold * (xc - xb)
|
|
fw = func(*((w,) + args))
|
|
funcalls += 1
|
|
elif (w - wlim)*(wlim - xc) >= 0.0:
|
|
w = wlim
|
|
fw = func(*((w,) + args))
|
|
funcalls += 1
|
|
elif (w - wlim)*(xc - w) > 0.0:
|
|
fw = func(*((w,) + args))
|
|
funcalls += 1
|
|
if (fw < fc):
|
|
xb = xc
|
|
xc = w
|
|
w = xc + _gold * (xc - xb)
|
|
fb = fc
|
|
fc = fw
|
|
fw = func(*((w,) + args))
|
|
funcalls += 1
|
|
else:
|
|
w = xc + _gold * (xc - xb)
|
|
fw = func(*((w,) + args))
|
|
funcalls += 1
|
|
xa = xb
|
|
xb = xc
|
|
xc = w
|
|
fa = fb
|
|
fb = fc
|
|
fc = fw
|
|
|
|
# three conditions for a valid bracket
|
|
cond1 = (fb < fc and fb <= fa) or (fb < fa and fb <= fc)
|
|
cond2 = (xa < xb < xc or xc < xb < xa)
|
|
cond3 = np.isfinite(xa) and np.isfinite(xb) and np.isfinite(xc)
|
|
msg = ("The algorithm terminated without finding a valid bracket. "
|
|
"Consider trying different initial points.")
|
|
if not (cond1 and cond2 and cond3):
|
|
e = BracketError(msg)
|
|
e.data = (xa, xb, xc, fa, fb, fc, funcalls)
|
|
raise e
|
|
|
|
return xa, xb, xc, fa, fb, fc, funcalls
|
|
|
|
|
|
class BracketError(RuntimeError):
|
|
pass
|
|
|
|
|
|
def _recover_from_bracket_error(solver, fun, bracket, args, **options):
|
|
# `bracket` was originally written without checking whether the resulting
|
|
# bracket is valid. `brent` and `golden` built on top of it without
|
|
# checking the returned bracket for validity, and their output can be
|
|
# incorrect without warning/error if the original bracket is invalid.
|
|
# gh-14858 noticed the problem, and the following is the desired
|
|
# behavior:
|
|
# - `scipy.optimize.bracket`, `scipy.optimize.brent`, and
|
|
# `scipy.optimize.golden` should raise an error if the bracket is
|
|
# invalid, as opposed to silently returning garbage
|
|
# - `scipy.optimize.minimize_scalar` should return with `success=False`
|
|
# and other information
|
|
# The changes that would be required to achieve this the traditional
|
|
# way (`return`ing all the required information from bracket all the way
|
|
# up to `minimizer_scalar`) are extensive and invasive. (See a6aa40d.)
|
|
# We can achieve the same thing by raising the error in `bracket`, but
|
|
# storing the information needed by `minimize_scalar` in the error object,
|
|
# and intercepting it here.
|
|
try:
|
|
res = solver(fun, bracket, args, **options)
|
|
except BracketError as e:
|
|
msg = str(e)
|
|
xa, xb, xc, fa, fb, fc, funcalls = e.data
|
|
xs, fs = [xa, xb, xc], [fa, fb, fc]
|
|
if np.any(np.isnan([xs, fs])):
|
|
x, fun = np.nan, np.nan
|
|
else:
|
|
imin = np.argmin(fs)
|
|
x, fun = xs[imin], fs[imin]
|
|
return OptimizeResult(fun=fun, nfev=funcalls, x=x,
|
|
nit=0, success=False, message=msg)
|
|
return res
|
|
|
|
|
|
def _line_for_search(x0, alpha, lower_bound, upper_bound):
|
|
"""
|
|
Given a parameter vector ``x0`` with length ``n`` and a direction
|
|
vector ``alpha`` with length ``n``, and lower and upper bounds on
|
|
each of the ``n`` parameters, what are the bounds on a scalar
|
|
``l`` such that ``lower_bound <= x0 + alpha * l <= upper_bound``.
|
|
|
|
|
|
Parameters
|
|
----------
|
|
x0 : np.array.
|
|
The vector representing the current location.
|
|
Note ``np.shape(x0) == (n,)``.
|
|
alpha : np.array.
|
|
The vector representing the direction.
|
|
Note ``np.shape(alpha) == (n,)``.
|
|
lower_bound : np.array.
|
|
The lower bounds for each parameter in ``x0``. If the ``i``th
|
|
parameter in ``x0`` is unbounded below, then ``lower_bound[i]``
|
|
should be ``-np.inf``.
|
|
Note ``np.shape(lower_bound) == (n,)``.
|
|
upper_bound : np.array.
|
|
The upper bounds for each parameter in ``x0``. If the ``i``th
|
|
parameter in ``x0`` is unbounded above, then ``upper_bound[i]``
|
|
should be ``np.inf``.
|
|
Note ``np.shape(upper_bound) == (n,)``.
|
|
|
|
Returns
|
|
-------
|
|
res : tuple ``(lmin, lmax)``
|
|
The bounds for ``l`` such that
|
|
``lower_bound[i] <= x0[i] + alpha[i] * l <= upper_bound[i]``
|
|
for all ``i``.
|
|
|
|
"""
|
|
# get nonzero indices of alpha so we don't get any zero division errors.
|
|
# alpha will not be all zero, since it is called from _linesearch_powell
|
|
# where we have a check for this.
|
|
nonzero, = alpha.nonzero()
|
|
lower_bound, upper_bound = lower_bound[nonzero], upper_bound[nonzero]
|
|
x0, alpha = x0[nonzero], alpha[nonzero]
|
|
low = (lower_bound - x0) / alpha
|
|
high = (upper_bound - x0) / alpha
|
|
|
|
# positive and negative indices
|
|
pos = alpha > 0
|
|
|
|
lmin_pos = np.where(pos, low, 0)
|
|
lmin_neg = np.where(pos, 0, high)
|
|
lmax_pos = np.where(pos, high, 0)
|
|
lmax_neg = np.where(pos, 0, low)
|
|
|
|
lmin = np.max(lmin_pos + lmin_neg)
|
|
lmax = np.min(lmax_pos + lmax_neg)
|
|
|
|
# if x0 is outside the bounds, then it is possible that there is
|
|
# no way to get back in the bounds for the parameters being updated
|
|
# with the current direction alpha.
|
|
# when this happens, lmax < lmin.
|
|
# If this is the case, then we can just return (0, 0)
|
|
return (lmin, lmax) if lmax >= lmin else (0, 0)
|
|
|
|
|
|
def _linesearch_powell(func, p, xi, tol=1e-3,
|
|
lower_bound=None, upper_bound=None, fval=None):
|
|
"""Line-search algorithm using fminbound.
|
|
|
|
Find the minimum of the function ``func(x0 + alpha*direc)``.
|
|
|
|
lower_bound : np.array.
|
|
The lower bounds for each parameter in ``x0``. If the ``i``th
|
|
parameter in ``x0`` is unbounded below, then ``lower_bound[i]``
|
|
should be ``-np.inf``.
|
|
Note ``np.shape(lower_bound) == (n,)``.
|
|
upper_bound : np.array.
|
|
The upper bounds for each parameter in ``x0``. If the ``i``th
|
|
parameter in ``x0`` is unbounded above, then ``upper_bound[i]``
|
|
should be ``np.inf``.
|
|
Note ``np.shape(upper_bound) == (n,)``.
|
|
fval : number.
|
|
``fval`` is equal to ``func(p)``, the idea is just to avoid
|
|
recomputing it so we can limit the ``fevals``.
|
|
|
|
"""
|
|
def myfunc(alpha):
|
|
return func(p + alpha*xi)
|
|
|
|
# if xi is zero, then don't optimize
|
|
if not np.any(xi):
|
|
return ((fval, p, xi) if fval is not None else (func(p), p, xi))
|
|
elif lower_bound is None and upper_bound is None:
|
|
# non-bounded minimization
|
|
res = _recover_from_bracket_error(_minimize_scalar_brent,
|
|
myfunc, None, tuple(), xtol=tol)
|
|
alpha_min, fret = res.x, res.fun
|
|
xi = alpha_min * xi
|
|
return squeeze(fret), p + xi, xi
|
|
else:
|
|
bound = _line_for_search(p, xi, lower_bound, upper_bound)
|
|
if np.isneginf(bound[0]) and np.isposinf(bound[1]):
|
|
# equivalent to unbounded
|
|
return _linesearch_powell(func, p, xi, fval=fval, tol=tol)
|
|
elif not np.isneginf(bound[0]) and not np.isposinf(bound[1]):
|
|
# we can use a bounded scalar minimization
|
|
res = _minimize_scalar_bounded(myfunc, bound, xatol=tol / 100)
|
|
xi = res.x * xi
|
|
return squeeze(res.fun), p + xi, xi
|
|
else:
|
|
# only bounded on one side. use the tangent function to convert
|
|
# the infinity bound to a finite bound. The new bounded region
|
|
# is a subregion of the region bounded by -np.pi/2 and np.pi/2.
|
|
bound = np.arctan(bound[0]), np.arctan(bound[1])
|
|
res = _minimize_scalar_bounded(
|
|
lambda x: myfunc(np.tan(x)),
|
|
bound,
|
|
xatol=tol / 100)
|
|
xi = np.tan(res.x) * xi
|
|
return squeeze(res.fun), p + xi, xi
|
|
|
|
|
|
def fmin_powell(func, x0, args=(), xtol=1e-4, ftol=1e-4, maxiter=None,
|
|
maxfun=None, full_output=0, disp=1, retall=0, callback=None,
|
|
direc=None):
|
|
"""
|
|
Minimize a function using modified Powell's method.
|
|
|
|
This method only uses function values, not derivatives.
|
|
|
|
Parameters
|
|
----------
|
|
func : callable f(x,*args)
|
|
Objective function to be minimized.
|
|
x0 : ndarray
|
|
Initial guess.
|
|
args : tuple, optional
|
|
Extra arguments passed to func.
|
|
xtol : float, optional
|
|
Line-search error tolerance.
|
|
ftol : float, optional
|
|
Relative error in ``func(xopt)`` acceptable for convergence.
|
|
maxiter : int, optional
|
|
Maximum number of iterations to perform.
|
|
maxfun : int, optional
|
|
Maximum number of function evaluations to make.
|
|
full_output : bool, optional
|
|
If True, ``fopt``, ``xi``, ``direc``, ``iter``, ``funcalls``, and
|
|
``warnflag`` are returned.
|
|
disp : bool, optional
|
|
If True, print convergence messages.
|
|
retall : bool, optional
|
|
If True, return a list of the solution at each iteration.
|
|
callback : callable, optional
|
|
An optional user-supplied function, called after each
|
|
iteration. Called as ``callback(xk)``, where ``xk`` is the
|
|
current parameter vector.
|
|
direc : ndarray, optional
|
|
Initial fitting step and parameter order set as an (N, N) array, where N
|
|
is the number of fitting parameters in `x0`. Defaults to step size 1.0
|
|
fitting all parameters simultaneously (``np.eye((N, N))``). To
|
|
prevent initial consideration of values in a step or to change initial
|
|
step size, set to 0 or desired step size in the Jth position in the Mth
|
|
block, where J is the position in `x0` and M is the desired evaluation
|
|
step, with steps being evaluated in index order. Step size and ordering
|
|
will change freely as minimization proceeds.
|
|
|
|
Returns
|
|
-------
|
|
xopt : ndarray
|
|
Parameter which minimizes `func`.
|
|
fopt : number
|
|
Value of function at minimum: ``fopt = func(xopt)``.
|
|
direc : ndarray
|
|
Current direction set.
|
|
iter : int
|
|
Number of iterations.
|
|
funcalls : int
|
|
Number of function calls made.
|
|
warnflag : int
|
|
Integer warning flag:
|
|
1 : Maximum number of function evaluations.
|
|
2 : Maximum number of iterations.
|
|
3 : NaN result encountered.
|
|
4 : The result is out of the provided bounds.
|
|
allvecs : list
|
|
List of solutions at each iteration.
|
|
|
|
See also
|
|
--------
|
|
minimize: Interface to unconstrained minimization algorithms for
|
|
multivariate functions. See the 'Powell' method in particular.
|
|
|
|
Notes
|
|
-----
|
|
Uses a modification of Powell's method to find the minimum of
|
|
a function of N variables. Powell's method is a conjugate
|
|
direction method.
|
|
|
|
The algorithm has two loops. The outer loop merely iterates over the inner
|
|
loop. The inner loop minimizes over each current direction in the direction
|
|
set. At the end of the inner loop, if certain conditions are met, the
|
|
direction that gave the largest decrease is dropped and replaced with the
|
|
difference between the current estimated x and the estimated x from the
|
|
beginning of the inner-loop.
|
|
|
|
The technical conditions for replacing the direction of greatest
|
|
increase amount to checking that
|
|
|
|
1. No further gain can be made along the direction of greatest increase
|
|
from that iteration.
|
|
2. The direction of greatest increase accounted for a large sufficient
|
|
fraction of the decrease in the function value from that iteration of
|
|
the inner loop.
|
|
|
|
References
|
|
----------
|
|
Powell M.J.D. (1964) An efficient method for finding the minimum of a
|
|
function of several variables without calculating derivatives,
|
|
Computer Journal, 7 (2):155-162.
|
|
|
|
Press W., Teukolsky S.A., Vetterling W.T., and Flannery B.P.:
|
|
Numerical Recipes (any edition), Cambridge University Press
|
|
|
|
Examples
|
|
--------
|
|
>>> def f(x):
|
|
... return x**2
|
|
|
|
>>> from scipy import optimize
|
|
|
|
>>> minimum = optimize.fmin_powell(f, -1)
|
|
Optimization terminated successfully.
|
|
Current function value: 0.000000
|
|
Iterations: 2
|
|
Function evaluations: 16
|
|
>>> minimum
|
|
array(0.0)
|
|
|
|
"""
|
|
opts = {'xtol': xtol,
|
|
'ftol': ftol,
|
|
'maxiter': maxiter,
|
|
'maxfev': maxfun,
|
|
'disp': disp,
|
|
'direc': direc,
|
|
'return_all': retall}
|
|
|
|
callback = _wrap_callback(callback)
|
|
res = _minimize_powell(func, x0, args, callback=callback, **opts)
|
|
|
|
if full_output:
|
|
retlist = (res['x'], res['fun'], res['direc'], res['nit'],
|
|
res['nfev'], res['status'])
|
|
if retall:
|
|
retlist += (res['allvecs'], )
|
|
return retlist
|
|
else:
|
|
if retall:
|
|
return res['x'], res['allvecs']
|
|
else:
|
|
return res['x']
|
|
|
|
|
|
def _minimize_powell(func, x0, args=(), callback=None, bounds=None,
|
|
xtol=1e-4, ftol=1e-4, maxiter=None, maxfev=None,
|
|
disp=False, direc=None, return_all=False,
|
|
**unknown_options):
|
|
"""
|
|
Minimization of scalar function of one or more variables using the
|
|
modified Powell algorithm.
|
|
|
|
Parameters
|
|
----------
|
|
fun : callable
|
|
The objective function to be minimized.
|
|
|
|
``fun(x, *args) -> float``
|
|
|
|
where ``x`` is a 1-D array with shape (n,) and ``args``
|
|
is a tuple of the fixed parameters needed to completely
|
|
specify the function.
|
|
x0 : ndarray, shape (n,)
|
|
Initial guess. Array of real elements of size (n,),
|
|
where ``n`` is the number of independent variables.
|
|
args : tuple, optional
|
|
Extra arguments passed to the objective function and its
|
|
derivatives (`fun`, `jac` and `hess` functions).
|
|
method : str or callable, optional
|
|
The present documentation is specific to ``method='powell'``, but other
|
|
options are available. See documentation for `scipy.optimize.minimize`.
|
|
bounds : sequence or `Bounds`, optional
|
|
Bounds on decision variables. There are two ways to specify the bounds:
|
|
|
|
1. Instance of `Bounds` class.
|
|
2. Sequence of ``(min, max)`` pairs for each element in `x`. None
|
|
is used to specify no bound.
|
|
|
|
If bounds are not provided, then an unbounded line search will be used.
|
|
If bounds are provided and the initial guess is within the bounds, then
|
|
every function evaluation throughout the minimization procedure will be
|
|
within the bounds. If bounds are provided, the initial guess is outside
|
|
the bounds, and `direc` is full rank (or left to default), then some
|
|
function evaluations during the first iteration may be outside the
|
|
bounds, but every function evaluation after the first iteration will be
|
|
within the bounds. If `direc` is not full rank, then some parameters
|
|
may not be optimized and the solution is not guaranteed to be within
|
|
the bounds.
|
|
|
|
options : dict, optional
|
|
A dictionary of solver options. All methods accept the following
|
|
generic options:
|
|
|
|
maxiter : int
|
|
Maximum number of iterations to perform. Depending on the
|
|
method each iteration may use several function evaluations.
|
|
disp : bool
|
|
Set to True to print convergence messages.
|
|
|
|
See method-specific options for ``method='powell'`` below.
|
|
callback : callable, optional
|
|
Called after each iteration. The signature is:
|
|
|
|
``callback(xk)``
|
|
|
|
where ``xk`` is the current parameter vector.
|
|
|
|
Returns
|
|
-------
|
|
res : OptimizeResult
|
|
The optimization result represented as a ``OptimizeResult`` object.
|
|
Important attributes are: ``x`` the solution array, ``success`` a
|
|
Boolean flag indicating if the optimizer exited successfully and
|
|
``message`` which describes the cause of the termination. See
|
|
`OptimizeResult` for a description of other attributes.
|
|
|
|
Options
|
|
-------
|
|
disp : bool
|
|
Set to True to print convergence messages.
|
|
xtol : float
|
|
Relative error in solution `xopt` acceptable for convergence.
|
|
ftol : float
|
|
Relative error in ``fun(xopt)`` acceptable for convergence.
|
|
maxiter, maxfev : int
|
|
Maximum allowed number of iterations and function evaluations.
|
|
Will default to ``N*1000``, where ``N`` is the number of
|
|
variables, if neither `maxiter` or `maxfev` is set. If both
|
|
`maxiter` and `maxfev` are set, minimization will stop at the
|
|
first reached.
|
|
direc : ndarray
|
|
Initial set of direction vectors for the Powell method.
|
|
return_all : bool, optional
|
|
Set to True to return a list of the best solution at each of the
|
|
iterations.
|
|
"""
|
|
_check_unknown_options(unknown_options)
|
|
maxfun = maxfev
|
|
retall = return_all
|
|
|
|
x = asarray(x0).flatten()
|
|
if retall:
|
|
allvecs = [x]
|
|
N = len(x)
|
|
# If neither are set, then set both to default
|
|
if maxiter is None and maxfun is None:
|
|
maxiter = N * 1000
|
|
maxfun = N * 1000
|
|
elif maxiter is None:
|
|
# Convert remaining Nones, to np.inf, unless the other is np.inf, in
|
|
# which case use the default to avoid unbounded iteration
|
|
if maxfun == np.inf:
|
|
maxiter = N * 1000
|
|
else:
|
|
maxiter = np.inf
|
|
elif maxfun is None:
|
|
if maxiter == np.inf:
|
|
maxfun = N * 1000
|
|
else:
|
|
maxfun = np.inf
|
|
|
|
# we need to use a mutable object here that we can update in the
|
|
# wrapper function
|
|
fcalls, func = _wrap_scalar_function_maxfun_validation(func, args, maxfun)
|
|
|
|
if direc is None:
|
|
direc = eye(N, dtype=float)
|
|
else:
|
|
direc = asarray(direc, dtype=float)
|
|
if np.linalg.matrix_rank(direc) != direc.shape[0]:
|
|
warnings.warn("direc input is not full rank, some parameters may "
|
|
"not be optimized",
|
|
OptimizeWarning, stacklevel=3)
|
|
|
|
if bounds is None:
|
|
# don't make these arrays of all +/- inf. because
|
|
# _linesearch_powell will do an unnecessary check of all the elements.
|
|
# just keep them None, _linesearch_powell will not have to check
|
|
# all the elements.
|
|
lower_bound, upper_bound = None, None
|
|
else:
|
|
# bounds is standardized in _minimize.py.
|
|
lower_bound, upper_bound = bounds.lb, bounds.ub
|
|
if np.any(lower_bound > x0) or np.any(x0 > upper_bound):
|
|
warnings.warn("Initial guess is not within the specified bounds",
|
|
OptimizeWarning, stacklevel=3)
|
|
|
|
fval = squeeze(func(x))
|
|
x1 = x.copy()
|
|
iter = 0
|
|
while True:
|
|
try:
|
|
fx = fval
|
|
bigind = 0
|
|
delta = 0.0
|
|
for i in range(N):
|
|
direc1 = direc[i]
|
|
fx2 = fval
|
|
fval, x, direc1 = _linesearch_powell(func, x, direc1,
|
|
tol=xtol * 100,
|
|
lower_bound=lower_bound,
|
|
upper_bound=upper_bound,
|
|
fval=fval)
|
|
if (fx2 - fval) > delta:
|
|
delta = fx2 - fval
|
|
bigind = i
|
|
iter += 1
|
|
if retall:
|
|
allvecs.append(x)
|
|
intermediate_result = OptimizeResult(x=x, fun=fval)
|
|
if _call_callback_maybe_halt(callback, intermediate_result):
|
|
break
|
|
bnd = ftol * (np.abs(fx) + np.abs(fval)) + 1e-20
|
|
if 2.0 * (fx - fval) <= bnd:
|
|
break
|
|
if fcalls[0] >= maxfun:
|
|
break
|
|
if iter >= maxiter:
|
|
break
|
|
if np.isnan(fx) and np.isnan(fval):
|
|
# Ended up in a nan-region: bail out
|
|
break
|
|
|
|
# Construct the extrapolated point
|
|
direc1 = x - x1
|
|
x1 = x.copy()
|
|
# make sure that we don't go outside the bounds when extrapolating
|
|
if lower_bound is None and upper_bound is None:
|
|
lmax = 1
|
|
else:
|
|
_, lmax = _line_for_search(x, direc1, lower_bound, upper_bound)
|
|
x2 = x + min(lmax, 1) * direc1
|
|
fx2 = squeeze(func(x2))
|
|
|
|
if (fx > fx2):
|
|
t = 2.0*(fx + fx2 - 2.0*fval)
|
|
temp = (fx - fval - delta)
|
|
t *= temp*temp
|
|
temp = fx - fx2
|
|
t -= delta*temp*temp
|
|
if t < 0.0:
|
|
fval, x, direc1 = _linesearch_powell(
|
|
func, x, direc1,
|
|
tol=xtol * 100,
|
|
lower_bound=lower_bound,
|
|
upper_bound=upper_bound,
|
|
fval=fval
|
|
)
|
|
if np.any(direc1):
|
|
direc[bigind] = direc[-1]
|
|
direc[-1] = direc1
|
|
except _MaxFuncCallError:
|
|
break
|
|
|
|
warnflag = 0
|
|
msg = _status_message['success']
|
|
# out of bounds is more urgent than exceeding function evals or iters,
|
|
# but I don't want to cause inconsistencies by changing the
|
|
# established warning flags for maxfev and maxiter, so the out of bounds
|
|
# warning flag becomes 3, but is checked for first.
|
|
if bounds and (np.any(lower_bound > x) or np.any(x > upper_bound)):
|
|
warnflag = 4
|
|
msg = _status_message['out_of_bounds']
|
|
elif fcalls[0] >= maxfun:
|
|
warnflag = 1
|
|
msg = _status_message['maxfev']
|
|
elif iter >= maxiter:
|
|
warnflag = 2
|
|
msg = _status_message['maxiter']
|
|
elif np.isnan(fval) or np.isnan(x).any():
|
|
warnflag = 3
|
|
msg = _status_message['nan']
|
|
|
|
if disp:
|
|
_print_success_message_or_warn(warnflag, msg, RuntimeWarning)
|
|
print(" Current function value: %f" % fval)
|
|
print(" Iterations: %d" % iter)
|
|
print(" Function evaluations: %d" % fcalls[0])
|
|
|
|
result = OptimizeResult(fun=fval, direc=direc, nit=iter, nfev=fcalls[0],
|
|
status=warnflag, success=(warnflag == 0),
|
|
message=msg, x=x)
|
|
if retall:
|
|
result['allvecs'] = allvecs
|
|
return result
|
|
|
|
|
|
def _endprint(x, flag, fval, maxfun, xtol, disp):
|
|
if flag == 0:
|
|
if disp > 1:
|
|
print("\nOptimization terminated successfully;\n"
|
|
"The returned value satisfies the termination criteria\n"
|
|
"(using xtol = ", xtol, ")")
|
|
return
|
|
|
|
if flag == 1:
|
|
msg = ("\nMaximum number of function evaluations exceeded --- "
|
|
"increase maxfun argument.\n")
|
|
elif flag == 2:
|
|
msg = "\n{}".format(_status_message['nan'])
|
|
|
|
_print_success_message_or_warn(flag, msg)
|
|
return
|
|
|
|
|
|
def brute(func, ranges, args=(), Ns=20, full_output=0, finish=fmin,
|
|
disp=False, workers=1):
|
|
"""Minimize a function over a given range by brute force.
|
|
|
|
Uses the "brute force" method, i.e., computes the function's value
|
|
at each point of a multidimensional grid of points, to find the global
|
|
minimum of the function.
|
|
|
|
The function is evaluated everywhere in the range with the datatype of the
|
|
first call to the function, as enforced by the ``vectorize`` NumPy
|
|
function. The value and type of the function evaluation returned when
|
|
``full_output=True`` are affected in addition by the ``finish`` argument
|
|
(see Notes).
|
|
|
|
The brute force approach is inefficient because the number of grid points
|
|
increases exponentially - the number of grid points to evaluate is
|
|
``Ns ** len(x)``. Consequently, even with coarse grid spacing, even
|
|
moderately sized problems can take a long time to run, and/or run into
|
|
memory limitations.
|
|
|
|
Parameters
|
|
----------
|
|
func : callable
|
|
The objective function to be minimized. Must be in the
|
|
form ``f(x, *args)``, where ``x`` is the argument in
|
|
the form of a 1-D array and ``args`` is a tuple of any
|
|
additional fixed parameters needed to completely specify
|
|
the function.
|
|
ranges : tuple
|
|
Each component of the `ranges` tuple must be either a
|
|
"slice object" or a range tuple of the form ``(low, high)``.
|
|
The program uses these to create the grid of points on which
|
|
the objective function will be computed. See `Note 2` for
|
|
more detail.
|
|
args : tuple, optional
|
|
Any additional fixed parameters needed to completely specify
|
|
the function.
|
|
Ns : int, optional
|
|
Number of grid points along the axes, if not otherwise
|
|
specified. See `Note2`.
|
|
full_output : bool, optional
|
|
If True, return the evaluation grid and the objective function's
|
|
values on it.
|
|
finish : callable, optional
|
|
An optimization function that is called with the result of brute force
|
|
minimization as initial guess. `finish` should take `func` and
|
|
the initial guess as positional arguments, and take `args` as
|
|
keyword arguments. It may additionally take `full_output`
|
|
and/or `disp` as keyword arguments. Use None if no "polishing"
|
|
function is to be used. See Notes for more details.
|
|
disp : bool, optional
|
|
Set to True to print convergence messages from the `finish` callable.
|
|
workers : int or map-like callable, optional
|
|
If `workers` is an int the grid is subdivided into `workers`
|
|
sections and evaluated in parallel (uses
|
|
`multiprocessing.Pool <multiprocessing>`).
|
|
Supply `-1` to use all cores available to the Process.
|
|
Alternatively supply a map-like callable, such as
|
|
`multiprocessing.Pool.map` for evaluating the grid in parallel.
|
|
This evaluation is carried out as ``workers(func, iterable)``.
|
|
Requires that `func` be pickleable.
|
|
|
|
.. versionadded:: 1.3.0
|
|
|
|
Returns
|
|
-------
|
|
x0 : ndarray
|
|
A 1-D array containing the coordinates of a point at which the
|
|
objective function had its minimum value. (See `Note 1` for
|
|
which point is returned.)
|
|
fval : float
|
|
Function value at the point `x0`. (Returned when `full_output` is
|
|
True.)
|
|
grid : tuple
|
|
Representation of the evaluation grid. It has the same
|
|
length as `x0`. (Returned when `full_output` is True.)
|
|
Jout : ndarray
|
|
Function values at each point of the evaluation
|
|
grid, i.e., ``Jout = func(*grid)``. (Returned
|
|
when `full_output` is True.)
|
|
|
|
See Also
|
|
--------
|
|
basinhopping, differential_evolution
|
|
|
|
Notes
|
|
-----
|
|
*Note 1*: The program finds the gridpoint at which the lowest value
|
|
of the objective function occurs. If `finish` is None, that is the
|
|
point returned. When the global minimum occurs within (or not very far
|
|
outside) the grid's boundaries, and the grid is fine enough, that
|
|
point will be in the neighborhood of the global minimum.
|
|
|
|
However, users often employ some other optimization program to
|
|
"polish" the gridpoint values, i.e., to seek a more precise
|
|
(local) minimum near `brute's` best gridpoint.
|
|
The `brute` function's `finish` option provides a convenient way to do
|
|
that. Any polishing program used must take `brute's` output as its
|
|
initial guess as a positional argument, and take `brute's` input values
|
|
for `args` as keyword arguments, otherwise an error will be raised.
|
|
It may additionally take `full_output` and/or `disp` as keyword arguments.
|
|
|
|
`brute` assumes that the `finish` function returns either an
|
|
`OptimizeResult` object or a tuple in the form:
|
|
``(xmin, Jmin, ... , statuscode)``, where ``xmin`` is the minimizing
|
|
value of the argument, ``Jmin`` is the minimum value of the objective
|
|
function, "..." may be some other returned values (which are not used
|
|
by `brute`), and ``statuscode`` is the status code of the `finish` program.
|
|
|
|
Note that when `finish` is not None, the values returned are those
|
|
of the `finish` program, *not* the gridpoint ones. Consequently,
|
|
while `brute` confines its search to the input grid points,
|
|
the `finish` program's results usually will not coincide with any
|
|
gridpoint, and may fall outside the grid's boundary. Thus, if a
|
|
minimum only needs to be found over the provided grid points, make
|
|
sure to pass in `finish=None`.
|
|
|
|
*Note 2*: The grid of points is a `numpy.mgrid` object.
|
|
For `brute` the `ranges` and `Ns` inputs have the following effect.
|
|
Each component of the `ranges` tuple can be either a slice object or a
|
|
two-tuple giving a range of values, such as (0, 5). If the component is a
|
|
slice object, `brute` uses it directly. If the component is a two-tuple
|
|
range, `brute` internally converts it to a slice object that interpolates
|
|
`Ns` points from its low-value to its high-value, inclusive.
|
|
|
|
Examples
|
|
--------
|
|
We illustrate the use of `brute` to seek the global minimum of a function
|
|
of two variables that is given as the sum of a positive-definite
|
|
quadratic and two deep "Gaussian-shaped" craters. Specifically, define
|
|
the objective function `f` as the sum of three other functions,
|
|
``f = f1 + f2 + f3``. We suppose each of these has a signature
|
|
``(z, *params)``, where ``z = (x, y)``, and ``params`` and the functions
|
|
are as defined below.
|
|
|
|
>>> import numpy as np
|
|
>>> params = (2, 3, 7, 8, 9, 10, 44, -1, 2, 26, 1, -2, 0.5)
|
|
>>> def f1(z, *params):
|
|
... x, y = z
|
|
... a, b, c, d, e, f, g, h, i, j, k, l, scale = params
|
|
... return (a * x**2 + b * x * y + c * y**2 + d*x + e*y + f)
|
|
|
|
>>> def f2(z, *params):
|
|
... x, y = z
|
|
... a, b, c, d, e, f, g, h, i, j, k, l, scale = params
|
|
... return (-g*np.exp(-((x-h)**2 + (y-i)**2) / scale))
|
|
|
|
>>> def f3(z, *params):
|
|
... x, y = z
|
|
... a, b, c, d, e, f, g, h, i, j, k, l, scale = params
|
|
... return (-j*np.exp(-((x-k)**2 + (y-l)**2) / scale))
|
|
|
|
>>> def f(z, *params):
|
|
... return f1(z, *params) + f2(z, *params) + f3(z, *params)
|
|
|
|
Thus, the objective function may have local minima near the minimum
|
|
of each of the three functions of which it is composed. To
|
|
use `fmin` to polish its gridpoint result, we may then continue as
|
|
follows:
|
|
|
|
>>> rranges = (slice(-4, 4, 0.25), slice(-4, 4, 0.25))
|
|
>>> from scipy import optimize
|
|
>>> resbrute = optimize.brute(f, rranges, args=params, full_output=True,
|
|
... finish=optimize.fmin)
|
|
>>> resbrute[0] # global minimum
|
|
array([-1.05665192, 1.80834843])
|
|
>>> resbrute[1] # function value at global minimum
|
|
-3.4085818767
|
|
|
|
Note that if `finish` had been set to None, we would have gotten the
|
|
gridpoint [-1.0 1.75] where the rounded function value is -2.892.
|
|
|
|
"""
|
|
N = len(ranges)
|
|
if N > 40:
|
|
raise ValueError("Brute Force not possible with more "
|
|
"than 40 variables.")
|
|
lrange = list(ranges)
|
|
for k in range(N):
|
|
if not isinstance(lrange[k], slice):
|
|
if len(lrange[k]) < 3:
|
|
lrange[k] = tuple(lrange[k]) + (complex(Ns),)
|
|
lrange[k] = slice(*lrange[k])
|
|
if (N == 1):
|
|
lrange = lrange[0]
|
|
|
|
grid = np.mgrid[lrange]
|
|
|
|
# obtain an array of parameters that is iterable by a map-like callable
|
|
inpt_shape = grid.shape
|
|
if (N > 1):
|
|
grid = np.reshape(grid, (inpt_shape[0], np.prod(inpt_shape[1:]))).T
|
|
|
|
if not np.iterable(args):
|
|
args = (args,)
|
|
|
|
wrapped_func = _Brute_Wrapper(func, args)
|
|
|
|
# iterate over input arrays, possibly in parallel
|
|
with MapWrapper(pool=workers) as mapper:
|
|
Jout = np.array(list(mapper(wrapped_func, grid)))
|
|
if (N == 1):
|
|
grid = (grid,)
|
|
Jout = np.squeeze(Jout)
|
|
elif (N > 1):
|
|
Jout = np.reshape(Jout, inpt_shape[1:])
|
|
grid = np.reshape(grid.T, inpt_shape)
|
|
|
|
Nshape = shape(Jout)
|
|
|
|
indx = argmin(Jout.ravel(), axis=-1)
|
|
Nindx = np.empty(N, int)
|
|
xmin = np.empty(N, float)
|
|
for k in range(N - 1, -1, -1):
|
|
thisN = Nshape[k]
|
|
Nindx[k] = indx % Nshape[k]
|
|
indx = indx // thisN
|
|
for k in range(N):
|
|
xmin[k] = grid[k][tuple(Nindx)]
|
|
|
|
Jmin = Jout[tuple(Nindx)]
|
|
if (N == 1):
|
|
grid = grid[0]
|
|
xmin = xmin[0]
|
|
|
|
if callable(finish):
|
|
# set up kwargs for `finish` function
|
|
finish_args = _getfullargspec(finish).args
|
|
finish_kwargs = dict()
|
|
if 'full_output' in finish_args:
|
|
finish_kwargs['full_output'] = 1
|
|
if 'disp' in finish_args:
|
|
finish_kwargs['disp'] = disp
|
|
elif 'options' in finish_args:
|
|
# pass 'disp' as `options`
|
|
# (e.g., if `finish` is `minimize`)
|
|
finish_kwargs['options'] = {'disp': disp}
|
|
|
|
# run minimizer
|
|
res = finish(func, xmin, args=args, **finish_kwargs)
|
|
|
|
if isinstance(res, OptimizeResult):
|
|
xmin = res.x
|
|
Jmin = res.fun
|
|
success = res.success
|
|
else:
|
|
xmin = res[0]
|
|
Jmin = res[1]
|
|
success = res[-1] == 0
|
|
if not success:
|
|
if disp:
|
|
warnings.warn("Either final optimization did not succeed or `finish` "
|
|
"does not return `statuscode` as its last argument.",
|
|
RuntimeWarning, stacklevel=2)
|
|
|
|
if full_output:
|
|
return xmin, Jmin, grid, Jout
|
|
else:
|
|
return xmin
|
|
|
|
|
|
class _Brute_Wrapper:
|
|
"""
|
|
Object to wrap user cost function for optimize.brute, allowing picklability
|
|
"""
|
|
|
|
def __init__(self, f, args):
|
|
self.f = f
|
|
self.args = [] if args is None else args
|
|
|
|
def __call__(self, x):
|
|
# flatten needed for one dimensional case.
|
|
return self.f(np.asarray(x).flatten(), *self.args)
|
|
|
|
|
|
def show_options(solver=None, method=None, disp=True):
|
|
"""
|
|
Show documentation for additional options of optimization solvers.
|
|
|
|
These are method-specific options that can be supplied through the
|
|
``options`` dict.
|
|
|
|
Parameters
|
|
----------
|
|
solver : str
|
|
Type of optimization solver. One of 'minimize', 'minimize_scalar',
|
|
'root', 'root_scalar', 'linprog', or 'quadratic_assignment'.
|
|
method : str, optional
|
|
If not given, shows all methods of the specified solver. Otherwise,
|
|
show only the options for the specified method. Valid values
|
|
corresponds to methods' names of respective solver (e.g., 'BFGS' for
|
|
'minimize').
|
|
disp : bool, optional
|
|
Whether to print the result rather than returning it.
|
|
|
|
Returns
|
|
-------
|
|
text
|
|
Either None (for disp=True) or the text string (disp=False)
|
|
|
|
Notes
|
|
-----
|
|
The solver-specific methods are:
|
|
|
|
`scipy.optimize.minimize`
|
|
|
|
- :ref:`Nelder-Mead <optimize.minimize-neldermead>`
|
|
- :ref:`Powell <optimize.minimize-powell>`
|
|
- :ref:`CG <optimize.minimize-cg>`
|
|
- :ref:`BFGS <optimize.minimize-bfgs>`
|
|
- :ref:`Newton-CG <optimize.minimize-newtoncg>`
|
|
- :ref:`L-BFGS-B <optimize.minimize-lbfgsb>`
|
|
- :ref:`TNC <optimize.minimize-tnc>`
|
|
- :ref:`COBYLA <optimize.minimize-cobyla>`
|
|
- :ref:`COBYQA <optimize.minimize-cobyqa>`
|
|
- :ref:`SLSQP <optimize.minimize-slsqp>`
|
|
- :ref:`dogleg <optimize.minimize-dogleg>`
|
|
- :ref:`trust-ncg <optimize.minimize-trustncg>`
|
|
|
|
`scipy.optimize.root`
|
|
|
|
- :ref:`hybr <optimize.root-hybr>`
|
|
- :ref:`lm <optimize.root-lm>`
|
|
- :ref:`broyden1 <optimize.root-broyden1>`
|
|
- :ref:`broyden2 <optimize.root-broyden2>`
|
|
- :ref:`anderson <optimize.root-anderson>`
|
|
- :ref:`linearmixing <optimize.root-linearmixing>`
|
|
- :ref:`diagbroyden <optimize.root-diagbroyden>`
|
|
- :ref:`excitingmixing <optimize.root-excitingmixing>`
|
|
- :ref:`krylov <optimize.root-krylov>`
|
|
- :ref:`df-sane <optimize.root-dfsane>`
|
|
|
|
`scipy.optimize.minimize_scalar`
|
|
|
|
- :ref:`brent <optimize.minimize_scalar-brent>`
|
|
- :ref:`golden <optimize.minimize_scalar-golden>`
|
|
- :ref:`bounded <optimize.minimize_scalar-bounded>`
|
|
|
|
`scipy.optimize.root_scalar`
|
|
|
|
- :ref:`bisect <optimize.root_scalar-bisect>`
|
|
- :ref:`brentq <optimize.root_scalar-brentq>`
|
|
- :ref:`brenth <optimize.root_scalar-brenth>`
|
|
- :ref:`ridder <optimize.root_scalar-ridder>`
|
|
- :ref:`toms748 <optimize.root_scalar-toms748>`
|
|
- :ref:`newton <optimize.root_scalar-newton>`
|
|
- :ref:`secant <optimize.root_scalar-secant>`
|
|
- :ref:`halley <optimize.root_scalar-halley>`
|
|
|
|
`scipy.optimize.linprog`
|
|
|
|
- :ref:`simplex <optimize.linprog-simplex>`
|
|
- :ref:`interior-point <optimize.linprog-interior-point>`
|
|
- :ref:`revised simplex <optimize.linprog-revised_simplex>`
|
|
- :ref:`highs <optimize.linprog-highs>`
|
|
- :ref:`highs-ds <optimize.linprog-highs-ds>`
|
|
- :ref:`highs-ipm <optimize.linprog-highs-ipm>`
|
|
|
|
`scipy.optimize.quadratic_assignment`
|
|
|
|
- :ref:`faq <optimize.qap-faq>`
|
|
- :ref:`2opt <optimize.qap-2opt>`
|
|
|
|
Examples
|
|
--------
|
|
We can print documentations of a solver in stdout:
|
|
|
|
>>> from scipy.optimize import show_options
|
|
>>> show_options(solver="minimize")
|
|
...
|
|
|
|
Specifying a method is possible:
|
|
|
|
>>> show_options(solver="minimize", method="Nelder-Mead")
|
|
...
|
|
|
|
We can also get the documentations as a string:
|
|
|
|
>>> show_options(solver="minimize", method="Nelder-Mead", disp=False)
|
|
Minimization of scalar function of one or more variables using the ...
|
|
|
|
"""
|
|
import textwrap
|
|
|
|
doc_routines = {
|
|
'minimize': (
|
|
('bfgs', 'scipy.optimize._optimize._minimize_bfgs'),
|
|
('cg', 'scipy.optimize._optimize._minimize_cg'),
|
|
('cobyla', 'scipy.optimize._cobyla_py._minimize_cobyla'),
|
|
('cobyqa', 'scipy.optimize._cobyqa_py._minimize_cobyqa'),
|
|
('dogleg', 'scipy.optimize._trustregion_dogleg._minimize_dogleg'),
|
|
('l-bfgs-b', 'scipy.optimize._lbfgsb_py._minimize_lbfgsb'),
|
|
('nelder-mead', 'scipy.optimize._optimize._minimize_neldermead'),
|
|
('newton-cg', 'scipy.optimize._optimize._minimize_newtoncg'),
|
|
('powell', 'scipy.optimize._optimize._minimize_powell'),
|
|
('slsqp', 'scipy.optimize._slsqp_py._minimize_slsqp'),
|
|
('tnc', 'scipy.optimize._tnc._minimize_tnc'),
|
|
('trust-ncg',
|
|
'scipy.optimize._trustregion_ncg._minimize_trust_ncg'),
|
|
('trust-constr',
|
|
'scipy.optimize._trustregion_constr.'
|
|
'_minimize_trustregion_constr'),
|
|
('trust-exact',
|
|
'scipy.optimize._trustregion_exact._minimize_trustregion_exact'),
|
|
('trust-krylov',
|
|
'scipy.optimize._trustregion_krylov._minimize_trust_krylov'),
|
|
),
|
|
'root': (
|
|
('hybr', 'scipy.optimize._minpack_py._root_hybr'),
|
|
('lm', 'scipy.optimize._root._root_leastsq'),
|
|
('broyden1', 'scipy.optimize._root._root_broyden1_doc'),
|
|
('broyden2', 'scipy.optimize._root._root_broyden2_doc'),
|
|
('anderson', 'scipy.optimize._root._root_anderson_doc'),
|
|
('diagbroyden', 'scipy.optimize._root._root_diagbroyden_doc'),
|
|
('excitingmixing', 'scipy.optimize._root._root_excitingmixing_doc'),
|
|
('linearmixing', 'scipy.optimize._root._root_linearmixing_doc'),
|
|
('krylov', 'scipy.optimize._root._root_krylov_doc'),
|
|
('df-sane', 'scipy.optimize._spectral._root_df_sane'),
|
|
),
|
|
'root_scalar': (
|
|
('bisect', 'scipy.optimize._root_scalar._root_scalar_bisect_doc'),
|
|
('brentq', 'scipy.optimize._root_scalar._root_scalar_brentq_doc'),
|
|
('brenth', 'scipy.optimize._root_scalar._root_scalar_brenth_doc'),
|
|
('ridder', 'scipy.optimize._root_scalar._root_scalar_ridder_doc'),
|
|
('toms748', 'scipy.optimize._root_scalar._root_scalar_toms748_doc'),
|
|
('secant', 'scipy.optimize._root_scalar._root_scalar_secant_doc'),
|
|
('newton', 'scipy.optimize._root_scalar._root_scalar_newton_doc'),
|
|
('halley', 'scipy.optimize._root_scalar._root_scalar_halley_doc'),
|
|
),
|
|
'linprog': (
|
|
('simplex', 'scipy.optimize._linprog._linprog_simplex_doc'),
|
|
('interior-point', 'scipy.optimize._linprog._linprog_ip_doc'),
|
|
('revised simplex', 'scipy.optimize._linprog._linprog_rs_doc'),
|
|
('highs-ipm', 'scipy.optimize._linprog._linprog_highs_ipm_doc'),
|
|
('highs-ds', 'scipy.optimize._linprog._linprog_highs_ds_doc'),
|
|
('highs', 'scipy.optimize._linprog._linprog_highs_doc'),
|
|
),
|
|
'quadratic_assignment': (
|
|
('faq', 'scipy.optimize._qap._quadratic_assignment_faq'),
|
|
('2opt', 'scipy.optimize._qap._quadratic_assignment_2opt'),
|
|
),
|
|
'minimize_scalar': (
|
|
('brent', 'scipy.optimize._optimize._minimize_scalar_brent'),
|
|
('bounded', 'scipy.optimize._optimize._minimize_scalar_bounded'),
|
|
('golden', 'scipy.optimize._optimize._minimize_scalar_golden'),
|
|
),
|
|
}
|
|
|
|
if solver is None:
|
|
text = ["\n\n\n========\n", "minimize\n", "========\n"]
|
|
text.append(show_options('minimize', disp=False))
|
|
text.extend(["\n\n===============\n", "minimize_scalar\n",
|
|
"===============\n"])
|
|
text.append(show_options('minimize_scalar', disp=False))
|
|
text.extend(["\n\n\n====\n", "root\n",
|
|
"====\n"])
|
|
text.append(show_options('root', disp=False))
|
|
text.extend(['\n\n\n=======\n', 'linprog\n',
|
|
'=======\n'])
|
|
text.append(show_options('linprog', disp=False))
|
|
text = "".join(text)
|
|
else:
|
|
solver = solver.lower()
|
|
if solver not in doc_routines:
|
|
raise ValueError(f'Unknown solver {solver!r}')
|
|
|
|
if method is None:
|
|
text = []
|
|
for name, _ in doc_routines[solver]:
|
|
text.extend(["\n\n" + name, "\n" + "="*len(name) + "\n\n"])
|
|
text.append(show_options(solver, name, disp=False))
|
|
text = "".join(text)
|
|
else:
|
|
method = method.lower()
|
|
methods = dict(doc_routines[solver])
|
|
if method not in methods:
|
|
raise ValueError(f"Unknown method {method!r}")
|
|
name = methods[method]
|
|
|
|
# Import function object
|
|
parts = name.split('.')
|
|
mod_name = ".".join(parts[:-1])
|
|
__import__(mod_name)
|
|
obj = getattr(sys.modules[mod_name], parts[-1])
|
|
|
|
# Get doc
|
|
doc = obj.__doc__
|
|
if doc is not None:
|
|
text = textwrap.dedent(doc).strip()
|
|
else:
|
|
text = ""
|
|
|
|
if disp:
|
|
print(text)
|
|
return
|
|
else:
|
|
return text
|