import numpy as np import scipy.sparse as sps from ._numdiff import approx_derivative, group_columns from ._hessian_update_strategy import HessianUpdateStrategy from scipy.sparse.linalg import LinearOperator from scipy._lib._array_api import atleast_nd, array_namespace FD_METHODS = ('2-point', '3-point', 'cs') def _wrapper_fun(fun, args=()): ncalls = [0] def wrapped(x): ncalls[0] += 1 # Send a copy because the user may overwrite it. # Overwriting results in undefined behaviour because # fun(self.x) will change self.x, with the two no longer linked. fx = fun(np.copy(x), *args) # Make sure the function returns a true scalar if not np.isscalar(fx): try: fx = np.asarray(fx).item() except (TypeError, ValueError) as e: raise ValueError( "The user-provided objective function " "must return a scalar value." ) from e return fx return wrapped, ncalls def _wrapper_grad(grad, fun=None, args=(), finite_diff_options=None): ncalls = [0] if callable(grad): def wrapped(x, **kwds): # kwds present to give function same signature as numdiff variant ncalls[0] += 1 return np.atleast_1d(grad(np.copy(x), *args)) return wrapped, ncalls elif grad in FD_METHODS: def wrapped1(x, f0=None): ncalls[0] += 1 return approx_derivative( fun, x, f0=f0, **finite_diff_options ) return wrapped1, ncalls def _wrapper_hess(hess, grad=None, x0=None, args=(), finite_diff_options=None): if callable(hess): H = hess(np.copy(x0), *args) ncalls = [1] if sps.issparse(H): def wrapped(x, **kwds): ncalls[0] += 1 return sps.csr_matrix(hess(np.copy(x), *args)) H = sps.csr_matrix(H) elif isinstance(H, LinearOperator): def wrapped(x, **kwds): ncalls[0] += 1 return hess(np.copy(x), *args) else: # dense def wrapped(x, **kwds): ncalls[0] += 1 return np.atleast_2d(np.asarray(hess(np.copy(x), *args))) H = np.atleast_2d(np.asarray(H)) return wrapped, ncalls, H elif hess in FD_METHODS: ncalls = [0] def wrapped1(x, f0=None): return approx_derivative( grad, x, f0=f0, **finite_diff_options ) return wrapped1, ncalls, None class ScalarFunction: """Scalar function and its derivatives. This class defines a scalar function F: R^n->R and methods for computing or approximating its first and second derivatives. Parameters ---------- fun : callable evaluates the scalar function. Must be of the form ``fun(x, *args)``, where ``x`` is the argument in the form of a 1-D array and ``args`` is a tuple of any additional fixed parameters needed to completely specify the function. Should return a scalar. x0 : array-like Provides an initial set of variables for evaluating fun. Array of real elements of size (n,), where 'n' is the number of independent variables. args : tuple, optional Any additional fixed parameters needed to completely specify the scalar function. grad : {callable, '2-point', '3-point', 'cs'} Method for computing the gradient vector. If it is a callable, it should be a function that returns the gradient vector: ``grad(x, *args) -> array_like, shape (n,)`` where ``x`` is an array with shape (n,) and ``args`` is a tuple with the fixed parameters. Alternatively, the keywords {'2-point', '3-point', 'cs'} can be used to select a finite difference scheme for numerical estimation of the gradient with a relative step size. These finite difference schemes obey any specified `bounds`. hess : {callable, '2-point', '3-point', 'cs', HessianUpdateStrategy} Method for computing the Hessian matrix. If it is callable, it should return the Hessian matrix: ``hess(x, *args) -> {LinearOperator, spmatrix, array}, (n, n)`` where x is a (n,) ndarray and `args` is a tuple with the fixed parameters. Alternatively, the keywords {'2-point', '3-point', 'cs'} select a finite difference scheme for numerical estimation. Or, objects implementing `HessianUpdateStrategy` interface can be used to approximate the Hessian. Whenever the gradient is estimated via finite-differences, the Hessian cannot be estimated with options {'2-point', '3-point', 'cs'} and needs to be estimated using one of the quasi-Newton strategies. finite_diff_rel_step : None or array_like Relative step size to use. The absolute step size is computed as ``h = finite_diff_rel_step * sign(x0) * max(1, abs(x0))``, possibly adjusted to fit into the bounds. For ``method='3-point'`` the sign of `h` is ignored. If None then finite_diff_rel_step is selected automatically, finite_diff_bounds : tuple of array_like Lower and upper bounds on independent variables. Defaults to no bounds, (-np.inf, np.inf). Each bound must match the size of `x0` or be a scalar, in the latter case the bound will be the same for all variables. Use it to limit the range of function evaluation. epsilon : None or array_like, optional Absolute step size to use, possibly adjusted to fit into the bounds. For ``method='3-point'`` the sign of `epsilon` is ignored. By default relative steps are used, only if ``epsilon is not None`` are absolute steps used. Notes ----- This class implements a memoization logic. There are methods `fun`, `grad`, hess` and corresponding attributes `f`, `g` and `H`. The following things should be considered: 1. Use only public methods `fun`, `grad` and `hess`. 2. After one of the methods is called, the corresponding attribute will be set. However, a subsequent call with a different argument of *any* of the methods may overwrite the attribute. """ def __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step, finite_diff_bounds, epsilon=None): if not callable(grad) and grad not in FD_METHODS: raise ValueError( f"`grad` must be either callable or one of {FD_METHODS}." ) if not (callable(hess) or hess in FD_METHODS or isinstance(hess, HessianUpdateStrategy)): raise ValueError( f"`hess` must be either callable, HessianUpdateStrategy" f" or one of {FD_METHODS}." ) if grad in FD_METHODS and hess in FD_METHODS: raise ValueError("Whenever the gradient is estimated via " "finite-differences, we require the Hessian " "to be estimated using one of the " "quasi-Newton strategies.") self.xp = xp = array_namespace(x0) _x = atleast_nd(x0, ndim=1, xp=xp) _dtype = xp.float64 if xp.isdtype(_x.dtype, "real floating"): _dtype = _x.dtype # original arguments self._wrapped_fun, self._nfev = _wrapper_fun(fun, args=args) self._orig_fun = fun self._orig_grad = grad self._orig_hess = hess self._args = args # promotes to floating self.x = xp.astype(_x, _dtype) self.x_dtype = _dtype self.n = self.x.size self.f_updated = False self.g_updated = False self.H_updated = False self._lowest_x = None self._lowest_f = np.inf finite_diff_options = {} if grad in FD_METHODS: finite_diff_options["method"] = grad finite_diff_options["rel_step"] = finite_diff_rel_step finite_diff_options["abs_step"] = epsilon finite_diff_options["bounds"] = finite_diff_bounds if hess in FD_METHODS: finite_diff_options["method"] = hess finite_diff_options["rel_step"] = finite_diff_rel_step finite_diff_options["abs_step"] = epsilon finite_diff_options["as_linear_operator"] = True # Initial function evaluation self._update_fun() # Initial gradient evaluation self._wrapped_grad, self._ngev = _wrapper_grad( grad, fun=self._wrapped_fun, args=args, finite_diff_options=finite_diff_options ) self._update_grad() # Hessian evaluation if callable(hess): self._wrapped_hess, self._nhev, self.H = _wrapper_hess( hess, x0=x0, args=args ) self.H_updated = True elif hess in FD_METHODS: self._wrapped_hess, self._nhev, self.H = _wrapper_hess( hess, grad=self._wrapped_grad, x0=x0, finite_diff_options=finite_diff_options ) self._update_grad() self.H = self._wrapped_hess(self.x, f0=self.g) self.H_updated = True elif isinstance(hess, HessianUpdateStrategy): self.H = hess self.H.initialize(self.n, 'hess') self.H_updated = True self.x_prev = None self.g_prev = None self._nhev = [0] @property def nfev(self): return self._nfev[0] @property def ngev(self): return self._ngev[0] @property def nhev(self): return self._nhev[0] def _update_x(self, x): if isinstance(self._orig_hess, HessianUpdateStrategy): self._update_grad() self.x_prev = self.x self.g_prev = self.g # ensure that self.x is a copy of x. Don't store a reference # otherwise the memoization doesn't work properly. _x = atleast_nd(x, ndim=1, xp=self.xp) self.x = self.xp.astype(_x, self.x_dtype) self.f_updated = False self.g_updated = False self.H_updated = False self._update_hess() else: # ensure that self.x is a copy of x. Don't store a reference # otherwise the memoization doesn't work properly. _x = atleast_nd(x, ndim=1, xp=self.xp) self.x = self.xp.astype(_x, self.x_dtype) self.f_updated = False self.g_updated = False self.H_updated = False def _update_fun(self): if not self.f_updated: fx = self._wrapped_fun(self.x) if fx < self._lowest_f: self._lowest_x = self.x self._lowest_f = fx self.f = fx self.f_updated = True def _update_grad(self): if not self.g_updated: if self._orig_grad in FD_METHODS: self._update_fun() self.g = self._wrapped_grad(self.x, f0=self.f) self.g_updated = True def _update_hess(self): if not self.H_updated: if self._orig_hess in FD_METHODS: self._update_grad() self.H = self._wrapped_hess(self.x, f0=self.g) elif isinstance(self._orig_hess, HessianUpdateStrategy): self._update_grad() self.H.update(self.x - self.x_prev, self.g - self.g_prev) else: # should be callable(hess) self.H = self._wrapped_hess(self.x) self.H_updated = True def fun(self, x): if not np.array_equal(x, self.x): self._update_x(x) self._update_fun() return self.f def grad(self, x): if not np.array_equal(x, self.x): self._update_x(x) self._update_grad() return self.g def hess(self, x): if not np.array_equal(x, self.x): self._update_x(x) self._update_hess() return self.H def fun_and_grad(self, x): if not np.array_equal(x, self.x): self._update_x(x) self._update_fun() self._update_grad() return self.f, self.g class VectorFunction: """Vector function and its derivatives. This class defines a vector function F: R^n->R^m and methods for computing or approximating its first and second derivatives. Notes ----- This class implements a memoization logic. There are methods `fun`, `jac`, hess` and corresponding attributes `f`, `J` and `H`. The following things should be considered: 1. Use only public methods `fun`, `jac` and `hess`. 2. After one of the methods is called, the corresponding attribute will be set. However, a subsequent call with a different argument of *any* of the methods may overwrite the attribute. """ def __init__(self, fun, x0, jac, hess, finite_diff_rel_step, finite_diff_jac_sparsity, finite_diff_bounds, sparse_jacobian): if not callable(jac) and jac not in FD_METHODS: raise ValueError(f"`jac` must be either callable or one of {FD_METHODS}.") if not (callable(hess) or hess in FD_METHODS or isinstance(hess, HessianUpdateStrategy)): raise ValueError("`hess` must be either callable," f"HessianUpdateStrategy or one of {FD_METHODS}.") if jac in FD_METHODS and hess in FD_METHODS: raise ValueError("Whenever the Jacobian is estimated via " "finite-differences, we require the Hessian to " "be estimated using one of the quasi-Newton " "strategies.") self.xp = xp = array_namespace(x0) _x = atleast_nd(x0, ndim=1, xp=xp) _dtype = xp.float64 if xp.isdtype(_x.dtype, "real floating"): _dtype = _x.dtype # promotes to floating self.x = xp.astype(_x, _dtype) self.x_dtype = _dtype self.n = self.x.size self.nfev = 0 self.njev = 0 self.nhev = 0 self.f_updated = False self.J_updated = False self.H_updated = False finite_diff_options = {} if jac in FD_METHODS: finite_diff_options["method"] = jac finite_diff_options["rel_step"] = finite_diff_rel_step if finite_diff_jac_sparsity is not None: sparsity_groups = group_columns(finite_diff_jac_sparsity) finite_diff_options["sparsity"] = (finite_diff_jac_sparsity, sparsity_groups) finite_diff_options["bounds"] = finite_diff_bounds self.x_diff = np.copy(self.x) if hess in FD_METHODS: finite_diff_options["method"] = hess finite_diff_options["rel_step"] = finite_diff_rel_step finite_diff_options["as_linear_operator"] = True self.x_diff = np.copy(self.x) if jac in FD_METHODS and hess in FD_METHODS: raise ValueError("Whenever the Jacobian is estimated via " "finite-differences, we require the Hessian to " "be estimated using one of the quasi-Newton " "strategies.") # Function evaluation def fun_wrapped(x): self.nfev += 1 return np.atleast_1d(fun(x)) def update_fun(): self.f = fun_wrapped(self.x) self._update_fun_impl = update_fun update_fun() self.v = np.zeros_like(self.f) self.m = self.v.size # Jacobian Evaluation if callable(jac): self.J = jac(self.x) self.J_updated = True self.njev += 1 if (sparse_jacobian or sparse_jacobian is None and sps.issparse(self.J)): def jac_wrapped(x): self.njev += 1 return sps.csr_matrix(jac(x)) self.J = sps.csr_matrix(self.J) self.sparse_jacobian = True elif sps.issparse(self.J): def jac_wrapped(x): self.njev += 1 return jac(x).toarray() self.J = self.J.toarray() self.sparse_jacobian = False else: def jac_wrapped(x): self.njev += 1 return np.atleast_2d(jac(x)) self.J = np.atleast_2d(self.J) self.sparse_jacobian = False def update_jac(): self.J = jac_wrapped(self.x) elif jac in FD_METHODS: self.J = approx_derivative(fun_wrapped, self.x, f0=self.f, **finite_diff_options) self.J_updated = True if (sparse_jacobian or sparse_jacobian is None and sps.issparse(self.J)): def update_jac(): self._update_fun() self.J = sps.csr_matrix( approx_derivative(fun_wrapped, self.x, f0=self.f, **finite_diff_options)) self.J = sps.csr_matrix(self.J) self.sparse_jacobian = True elif sps.issparse(self.J): def update_jac(): self._update_fun() self.J = approx_derivative(fun_wrapped, self.x, f0=self.f, **finite_diff_options).toarray() self.J = self.J.toarray() self.sparse_jacobian = False else: def update_jac(): self._update_fun() self.J = np.atleast_2d( approx_derivative(fun_wrapped, self.x, f0=self.f, **finite_diff_options)) self.J = np.atleast_2d(self.J) self.sparse_jacobian = False self._update_jac_impl = update_jac # Define Hessian if callable(hess): self.H = hess(self.x, self.v) self.H_updated = True self.nhev += 1 if sps.issparse(self.H): def hess_wrapped(x, v): self.nhev += 1 return sps.csr_matrix(hess(x, v)) self.H = sps.csr_matrix(self.H) elif isinstance(self.H, LinearOperator): def hess_wrapped(x, v): self.nhev += 1 return hess(x, v) else: def hess_wrapped(x, v): self.nhev += 1 return np.atleast_2d(np.asarray(hess(x, v))) self.H = np.atleast_2d(np.asarray(self.H)) def update_hess(): self.H = hess_wrapped(self.x, self.v) elif hess in FD_METHODS: def jac_dot_v(x, v): return jac_wrapped(x).T.dot(v) def update_hess(): self._update_jac() self.H = approx_derivative(jac_dot_v, self.x, f0=self.J.T.dot(self.v), args=(self.v,), **finite_diff_options) update_hess() self.H_updated = True elif isinstance(hess, HessianUpdateStrategy): self.H = hess self.H.initialize(self.n, 'hess') self.H_updated = True self.x_prev = None self.J_prev = None def update_hess(): self._update_jac() # When v is updated before x was updated, then x_prev and # J_prev are None and we need this check. if self.x_prev is not None and self.J_prev is not None: delta_x = self.x - self.x_prev delta_g = self.J.T.dot(self.v) - self.J_prev.T.dot(self.v) self.H.update(delta_x, delta_g) self._update_hess_impl = update_hess if isinstance(hess, HessianUpdateStrategy): def update_x(x): self._update_jac() self.x_prev = self.x self.J_prev = self.J _x = atleast_nd(x, ndim=1, xp=self.xp) self.x = self.xp.astype(_x, self.x_dtype) self.f_updated = False self.J_updated = False self.H_updated = False self._update_hess() else: def update_x(x): _x = atleast_nd(x, ndim=1, xp=self.xp) self.x = self.xp.astype(_x, self.x_dtype) self.f_updated = False self.J_updated = False self.H_updated = False self._update_x_impl = update_x def _update_v(self, v): if not np.array_equal(v, self.v): self.v = v self.H_updated = False def _update_x(self, x): if not np.array_equal(x, self.x): self._update_x_impl(x) def _update_fun(self): if not self.f_updated: self._update_fun_impl() self.f_updated = True def _update_jac(self): if not self.J_updated: self._update_jac_impl() self.J_updated = True def _update_hess(self): if not self.H_updated: self._update_hess_impl() self.H_updated = True def fun(self, x): self._update_x(x) self._update_fun() return self.f def jac(self, x): self._update_x(x) self._update_jac() return self.J def hess(self, x, v): # v should be updated before x. self._update_v(v) self._update_x(x) self._update_hess() return self.H class LinearVectorFunction: """Linear vector function and its derivatives. Defines a linear function F = A x, where x is N-D vector and A is m-by-n matrix. The Jacobian is constant and equals to A. The Hessian is identically zero and it is returned as a csr matrix. """ def __init__(self, A, x0, sparse_jacobian): if sparse_jacobian or sparse_jacobian is None and sps.issparse(A): self.J = sps.csr_matrix(A) self.sparse_jacobian = True elif sps.issparse(A): self.J = A.toarray() self.sparse_jacobian = False else: # np.asarray makes sure A is ndarray and not matrix self.J = np.atleast_2d(np.asarray(A)) self.sparse_jacobian = False self.m, self.n = self.J.shape self.xp = xp = array_namespace(x0) _x = atleast_nd(x0, ndim=1, xp=xp) _dtype = xp.float64 if xp.isdtype(_x.dtype, "real floating"): _dtype = _x.dtype # promotes to floating self.x = xp.astype(_x, _dtype) self.x_dtype = _dtype self.f = self.J.dot(self.x) self.f_updated = True self.v = np.zeros(self.m, dtype=float) self.H = sps.csr_matrix((self.n, self.n)) def _update_x(self, x): if not np.array_equal(x, self.x): _x = atleast_nd(x, ndim=1, xp=self.xp) self.x = self.xp.astype(_x, self.x_dtype) self.f_updated = False def fun(self, x): self._update_x(x) if not self.f_updated: self.f = self.J.dot(x) self.f_updated = True return self.f def jac(self, x): self._update_x(x) return self.J def hess(self, x, v): self._update_x(x) self.v = v return self.H class IdentityVectorFunction(LinearVectorFunction): """Identity vector function and its derivatives. The Jacobian is the identity matrix, returned as a dense array when `sparse_jacobian=False` and as a csr matrix otherwise. The Hessian is identically zero and it is returned as a csr matrix. """ def __init__(self, x0, sparse_jacobian): n = len(x0) if sparse_jacobian or sparse_jacobian is None: A = sps.eye(n, format='csr') sparse_jacobian = True else: A = np.eye(n) sparse_jacobian = False super().__init__(A, x0, sparse_jacobian)