Source code for pyproximal.optimization.primaldual

import time
import numpy as np

from pylops.utils.backend import get_array_module, to_numpy


[docs]def PrimalDual(proxf, proxg, A, x0, tau, mu, y0=None, z=None, theta=1., niter=10, gfirst=True, callback=None, callbacky=False, returny=False, show=False): r"""Primal-dual algorithm Solves the following (possibly) nonlinear minimization problem using the general version of the first-order primal-dual algorithm of [1]_: .. math:: \min_{\mathbf{x} \in X} g(\mathbf{Ax}) + f(\mathbf{x}) + \mathbf{z}^T \mathbf{x} where :math:`\mathbf{A}` is a linear operator, :math:`f` and :math:`g` can be any convex functions that have a known proximal operator. This functional is effectively minimized by solving its equivalent primal-dual problem (primal in :math:`f`, dual in :math:`g`): .. math:: \min_{\mathbf{x} \in X} \max_{\mathbf{y} \in Y} \mathbf{y}^T(\mathbf{Ax}) + \mathbf{z}^T \mathbf{x} + f(\mathbf{x}) - g^*(\mathbf{y}) where :math:`\mathbf{y}` is the so-called dual variable. Parameters ---------- proxf : :obj:`pyproximal.ProxOperator` Proximal operator of f function proxg : :obj:`pyproximal.ProxOperator` Proximal operator of g function A : :obj:`pylops.LinearOperator` Linear operator of g x0 : :obj:`numpy.ndarray` Initial vector tau : :obj:`float` or :obj:`np.ndarray` Stepsize of subgradient of :math:`f`. This can be constant or function of iterations (in the latter cases provided as np.ndarray) mu : :obj:`float` or :obj:`np.ndarray` Stepsize of subgradient of :math:`g^*`. This can be constant or function of iterations (in the latter cases provided as np.ndarray) z0 : :obj:`numpy.ndarray` Initial auxiliary vector z : :obj:`numpy.ndarray`, optional Additional vector theta : :obj:`float` Scalar between 0 and 1 that defines the update of the :math:`\bar{\mathbf{x}}` variable - note that ``theta=0`` is a special case that represents the semi-implicit classical Arrow-Hurwicz algorithm niter : :obj:`int`, optional Number of iterations of iterative scheme gfirst : :obj:`bool`, optional Apply Proximal of operator ``g`` first (``True``) or Proximal of operator ``f`` first (``False``) callback : :obj:`callable`, optional Function with signature (``callback(x)``) to call after each iteration where ``x`` is the current model vector callbacky : :obj:`bool`, optional Modify callback signature to (``callback(x, y)``) when ``callbacky=True`` returny : :obj:`bool`, optional Return also ``y`` show : :obj:`bool`, optional Display iterations log Returns ------- x : :obj:`numpy.ndarray` Inverted model Notes ----- The Primal-dual algorithm can be expressed by the following recursion (``gfirst=True``): .. math:: \mathbf{y}^{k+1} = \prox_{\mu g^*}(\mathbf{y}^{k} + \mu \mathbf{A}\bar{\mathbf{x}}^{k})\\ \mathbf{x}^{k+1} = \prox_{\tau f}(\mathbf{x}^{k} - \tau (\mathbf{A}^H \mathbf{y}^{k+1} + \mathbf{z})) \\ \bar{\mathbf{x}}^{k+1} = \mathbf{x}^{k+1} + \theta (\mathbf{x}^{k+1} - \mathbf{x}^k) where :math:`\tau \mu \lambda_{max}(\mathbf{A}^H\mathbf{A}) < 1`. Alternatively for ``gfirst=False`` the scheme becomes: .. math:: \mathbf{x}^{k+1} = \prox_{\tau f}(\mathbf{x}^{k} - \tau (\mathbf{A}^H \mathbf{y}^{k} + \mathbf{z})) \\ \bar{\mathbf{x}}^{k+1} = \mathbf{x}^{k+1} + \theta (\mathbf{x}^{k+1} - \mathbf{x}^k) \\ \mathbf{y}^{k+1} = \prox_{\mu g^*}(\mathbf{y}^{k} + \mu \mathbf{A}\bar{\mathbf{x}}^{k+1}) .. [1] A., Chambolle, and T., Pock, "A first-order primal-dual algorithm for convex problems with applications to imaging", Journal of Mathematical Imaging and Vision, 40, 8pp. 120-145. 2011. """ ncp = get_array_module(x0) # check if tau and mu are scalars or arrays fixedtau = fixedmu = False if isinstance(tau, (int, float)): tau = tau * ncp.ones(niter, dtype=x0.dtype) fixedtau = True if isinstance(mu, (int, float)): mu = mu * ncp.ones(niter, dtype=x0.dtype) fixedmu = True if show: tstart = time.time() print('Primal-dual: min_x f(Ax) + x^T z + g(x)\n' '---------------------------------------------------------\n' 'Proximal operator (f): %s\n' 'Proximal operator (g): %s\n' 'Linear operator (A): %s\n' 'Additional vector (z): %s\n' 'tau = %s\t\tmu = %s\ntheta = %.2f\t\tniter = %d\n' % (type(proxf), type(proxg), type(A), None if z is None else 'vector', str(tau[0]) if fixedtau else 'Variable', str(mu[0]) if fixedmu else 'Variable', theta, niter)) head = ' Itn x[0] f g z^x J = f + g + z^x' print(head) x = x0.copy() xhat = x.copy() y = y0.copy() if y0 is not None else ncp.zeros(A.shape[0], dtype=x.dtype) for iiter in range(niter): xold = x.copy() if gfirst: y = proxg.proxdual(y + mu[iiter] * A.matvec(xhat), mu[iiter]) ATy = A.rmatvec(y) if z is not None: ATy += z x = proxf.prox(x - tau[iiter] * ATy, tau[iiter]) xhat = x + theta * (x - xold) else: ATy = A.rmatvec(y) if z is not None: ATy += z x = proxf.prox(x - tau[iiter] * ATy, tau[iiter]) xhat = x + theta * (x - xold) y = proxg.proxdual(y + mu[iiter] * A.matvec(xhat), mu[iiter]) # run callback if callback is not None: if callbacky: callback(x, y) else: callback(x) if show: if iiter < 10 or niter - iiter < 10 or iiter % (niter // 10) == 0: pf, pg = proxf(x), proxg(A.matvec(x)) pf = 0. if type(pf) == bool else pf pg = 0. if type(pg) == bool else pg zx = 0. if z is None else np.dot(z, x) msg = '%6g %12.5e %10.3e %10.3e %10.3e %10.3e' % \ (iiter + 1, np.real(to_numpy(x[0])), pf, pg, zx, pf + pg + zx) print(msg) if show: print('\nTotal time (s) = %.2f' % (time.time() - tstart)) print('---------------------------------------------------------\n') if not returny: return x else: return x, y
[docs]def AdaptivePrimalDual(proxf, proxg, A, x0, tau, mu, alpha=0.5, eta=0.95, s=1., delta=1.5, z=None, niter=10, tol=1e-10, callback=None, show=False): r"""Adaptive Primal-dual algorithm Solves the minimization problem in :func:`pyproximal.optimization.primaldual.PrimalDual` using an adaptive version of the first-order primal-dual algorithm of [1]_. The main advantage of this method is that step sizes :math:`\tau` and :math:`\mu` are changing through iterations, improving the overall speed of convergence of the algorithm. Parameters ---------- proxf : :obj:`pyproximal.ProxOperator` Proximal operator of f function proxg : :obj:`pyproximal.ProxOperator` Proximal operator of g function A : :obj:`pylops.LinearOperator` Linear operator of g x0 : :obj:`numpy.ndarray` Initial vector tau : :obj:`float` Stepsize of subgradient of :math:`f` mu : :obj:`float` Stepsize of subgradient of :math:`g^*` alpha : :obj:`float`, optional Initial adaptivity level (must be between 0 and 1) eta : :obj:`float`, optional Scaling of adaptivity level to be multipled to the current alpha every time the norm of the two residuals start to diverge (must be between 0 and 1) s : :obj:`float`, optional Scaling of residual balancing principle delta : :obj:`float`, optional Balancing factor. Step sizes are updated only when their ratio exceeds this value. z : :obj:`numpy.ndarray`, optional Additional vector niter : :obj:`int`, optional Number of iterations of iterative scheme tol : :obj:`int`, optional Tolerance on residual norms callback : :obj:`callable`, optional Function with signature (``callback(x)``) to call after each iteration where ``x`` is the current model vector show : :obj:`bool`, optional Display iterations log Returns ------- x : :obj:`numpy.ndarray` Inverted model steps : :obj:`tuple` Tau, mu and alpha evolution through iterations Notes ----- The Adative Primal-dual algorithm share the the same iterations of the original :func:`pyproximal.optimization.primaldual.PrimalDual` solver. The main difference lies in the fact that the step sizes ``tau`` and ``mu`` are adaptively changed at each iteration leading to faster converge. Changes are applied by tracking the norm of the primal and dual residuals. When their mutual ratio increases beyond a certain treshold ``delta`` the step lenghts are updated to balance the minimization and maximization part of the overall optimization process. .. [1] T., Goldstein, M., Li, X., Yuan, E., Esser, R., Baraniuk, "Adaptive Primal-Dual Hybrid Gradient Methods for Saddle-Point Problems", ArXiv, 2013. """ if show: tstart = time.time() print('Adaptive Primal-dual: min_x f(Ax) + x^T z + g(x)\n' '---------------------------------------------------------\n' 'Proximal operator (f): %s\n' 'Proximal operator (g): %s\n' 'Linear operator (A): %s\n' 'Additional vector (z): %s\n' 'tau0 = %10e\tmu0 = %10e\n' 'alpha0 = %10e\teta = %10e\n' 's = %10e\tdelta = %10e\n' 'niter = %d\t\ttol = %10e\n' % (type(proxf), type(proxg), type(A), None if z is None else 'vector', tau, mu, alpha, eta, s, delta, niter, tol)) head = ' Itn x[0] f g z^x J = f + g + z^x' print(head) # initialization x = x0.copy() y = np.zeros(A.shape[0], dtype=x.dtype) Ax = np.zeros(A.shape[0], dtype=x.dtype) ATy = np.zeros(A.shape[1], dtype=x.dtype) taus = np.zeros(niter + 1) mus = np.zeros(niter + 1) alphas = np.zeros(niter + 1) taus[0], mus[0], alphas[0] = tau, mu, alpha p = d = tol + 1. iiter = 0 while iiter < niter and p > tol and d > tol: # store old values xold = x.copy() yold = y.copy() Axold = Ax.copy() ATyold = ATy.copy() # proxf if z is not None: ATy += z x = proxf.prox(x - tau * ATy, tau) Ax = A.matvec(x) Axhat = 2 * Ax - Axold # proxg y = proxg.proxdual(y + mu * Axhat, mu) ATy = A.rmatvec(y) # update steps if z is not None: p = np.linalg.norm((xold - x) / tau - (ATyold - ATy) - A.rmatvec(z) + z) else: p = np.linalg.norm((xold - x) / tau - (ATyold - ATy)) d = np.linalg.norm((yold - y) / mu - (Axold - Ax)) if p > s * d * delta: tau /= 1 - alpha mu *= 1 - alpha alpha *= eta elif p < s * d / delta: tau *= 1 - alpha mu /= 1 - alpha alpha *= eta # save history of steps taus[iiter + 1] = tau mus[iiter + 1] = mu alphas[iiter + 1] = alpha iiter += 1 # run callback if callback is not None: callback(x) if show: if iiter < 10 or niter - iiter < 10 or iiter % (niter // 10) == 0: pf, pg = proxf(x), proxg(A.matvec(x)) pf = 0. if type(pf) == bool else pf pg = 0. if type(pg) == bool else pg zx = 0. if z is None else np.dot(z, x) msg = '%6g %12.5e %10.3e %10.3e %10.3e %10.3e' % \ (iiter + 1, np.real(to_numpy(x[0])), pf, pg, zx, pf + pg + zx) print(msg) steps = (taus[:iiter + 1], mus[:iiter + 1], alphas[:iiter + 1]) if show: print('\nTotal time (s) = %.2f' % (time.time() - tstart)) return x, steps