# Source code for pyproximal.optimization.primaldual

import time
import numpy as np

from pylops.utils.backend import get_array_module, to_numpy

[docs]def PrimalDual(proxf, proxg, A, x0, tau, mu, y0=None, z=None, theta=1., niter=10,
gfirst=True, callback=None, callbacky=False, returny=False, show=False):
r"""Primal-dual algorithm

Solves the following (possibly) nonlinear minimization problem using
the general version of the first-order primal-dual algorithm of [1]_:

.. math::

\min_{\mathbf{x} \in X} g(\mathbf{Ax}) + f(\mathbf{x}) +
\mathbf{z}^T \mathbf{x}

where :math:\mathbf{A} is a linear operator, :math:f
and :math:g can be any convex functions that have a known proximal
operator.

This functional is effectively minimized by solving its equivalent
primal-dual problem (primal in :math:f, dual in :math:g):

.. math::

\min_{\mathbf{x} \in X} \max_{\mathbf{y} \in Y}
\mathbf{y}^T(\mathbf{Ax}) + \mathbf{z}^T \mathbf{x} +
f(\mathbf{x}) - g^*(\mathbf{y})

where :math:\mathbf{y} is the so-called dual variable.

Parameters
----------
proxf : :obj:pyproximal.ProxOperator
Proximal operator of f function
proxg : :obj:pyproximal.ProxOperator
Proximal operator of g function
A : :obj:pylops.LinearOperator
Linear operator of g
x0 : :obj:numpy.ndarray
Initial vector
tau : :obj:float or :obj:np.ndarray
Stepsize of subgradient of :math:f. This can be constant
or function of iterations (in the latter cases provided as np.ndarray)
mu : :obj:float or :obj:np.ndarray
Stepsize of subgradient of :math:g^*. This can be constant
or function of iterations (in the latter cases provided as np.ndarray)
z0 : :obj:numpy.ndarray
Initial auxiliary vector
z : :obj:numpy.ndarray, optional
theta : :obj:float
Scalar between 0 and 1 that defines the update of the
:math:\bar{\mathbf{x}} variable - note that theta=0 is a
special case that represents the semi-implicit classical Arrow-Hurwicz
algorithm
niter : :obj:int, optional
Number of iterations of iterative scheme
gfirst : :obj:bool, optional
Apply Proximal of operator g first (True) or Proximal of
operator f first (False)
callback : :obj:callable, optional
Function with signature (callback(x)) to call after each iteration
where x is the current model vector
callbacky : :obj:bool, optional
Modify callback signature to (callback(x, y)) when callbacky=True
returny : :obj:bool, optional
Return also y
show : :obj:bool, optional
Display iterations log

Returns
-------
x : :obj:numpy.ndarray
Inverted model

Notes
-----
The Primal-dual algorithm can be expressed by the following recursion
(gfirst=True):

.. math::

\mathbf{y}^{k+1} = \prox_{\mu g^*}(\mathbf{y}^{k} +
\mu \mathbf{A}\bar{\mathbf{x}}^{k})\\
\mathbf{x}^{k+1} = \prox_{\tau f}(\mathbf{x}^{k} -
\tau (\mathbf{A}^H \mathbf{y}^{k+1} + \mathbf{z})) \\
\bar{\mathbf{x}}^{k+1} = \mathbf{x}^{k+1} +
\theta (\mathbf{x}^{k+1} - \mathbf{x}^k)

where :math:\tau \mu \lambda_{max}(\mathbf{A}^H\mathbf{A}) < 1.

Alternatively for gfirst=False the scheme becomes:

.. math::

\mathbf{x}^{k+1} = \prox_{\tau f}(\mathbf{x}^{k} -
\tau (\mathbf{A}^H \mathbf{y}^{k} + \mathbf{z})) \\
\bar{\mathbf{x}}^{k+1} = \mathbf{x}^{k+1} +
\theta (\mathbf{x}^{k+1} - \mathbf{x}^k) \\
\mathbf{y}^{k+1} = \prox_{\mu g^*}(\mathbf{y}^{k} +
\mu \mathbf{A}\bar{\mathbf{x}}^{k+1})

.. [1] A., Chambolle, and T., Pock, "A first-order primal-dual algorithm for
convex problems with applications to imaging", Journal of Mathematical
Imaging and Vision, 40, 8pp. 120-145. 2011.

"""
ncp = get_array_module(x0)

# check if tau and mu are scalars or arrays
fixedtau = fixedmu = False
if isinstance(tau, (int, float)):
tau = tau * ncp.ones(niter, dtype=x0.dtype)
fixedtau = True
if isinstance(mu, (int, float)):
mu = mu * ncp.ones(niter, dtype=x0.dtype)
fixedmu = True

if show:
tstart = time.time()
print('Primal-dual: min_x f(Ax) + x^T z + g(x)\n'
'---------------------------------------------------------\n'
'Proximal operator (f): %s\n'
'Proximal operator (g): %s\n'
'Linear operator (A): %s\n'
'tau = %s\t\tmu = %s\ntheta = %.2f\t\tniter = %d\n' %
(type(proxf), type(proxg), type(A),
None if z is None else 'vector', str(tau[0]) if fixedtau else 'Variable',
str(mu[0]) if fixedmu else 'Variable', theta, niter))
head = '   Itn       x[0]          f           g          z^x       J = f + g + z^x'

x = x0.copy()
xhat = x.copy()
y = y0.copy() if y0 is not None else ncp.zeros(A.shape[0], dtype=x.dtype)
for iiter in range(niter):
xold = x.copy()
if gfirst:
y = proxg.proxdual(y + mu[iiter] * A.matvec(xhat), mu[iiter])
ATy = A.rmatvec(y)
if z is not None:
ATy += z
x = proxf.prox(x - tau[iiter] * ATy, tau[iiter])
xhat = x + theta * (x - xold)
else:
ATy = A.rmatvec(y)
if z is not None:
ATy += z
x = proxf.prox(x - tau[iiter] * ATy, tau[iiter])
xhat = x + theta * (x - xold)
y = proxg.proxdual(y + mu[iiter] * A.matvec(xhat), mu[iiter])

# run callback
if callback is not None:
if callbacky:
callback(x, y)
else:
callback(x)
if show:
if iiter < 10 or niter - iiter < 10 or iiter % (niter // 10) == 0:
pf, pg = proxf(x), proxg(A.matvec(x))
pf = 0. if type(pf) == bool else pf
pg = 0. if type(pg) == bool else pg
zx = 0. if z is None else np.dot(z, x)
msg = '%6g  %12.5e  %10.3e  %10.3e  %10.3e      %10.3e' % \
(iiter + 1, np.real(to_numpy(x[0])), pf, pg, zx, pf + pg + zx)
print(msg)
if show:
print('\nTotal time (s) = %.2f' % (time.time() - tstart))
print('---------------------------------------------------------\n')
if not returny:
return x
else:
return x, y

[docs]def AdaptivePrimalDual(proxf, proxg, A, x0, tau, mu,
alpha=0.5, eta=0.95, s=1., delta=1.5,
z=None, niter=10, tol=1e-10, callback=None, show=False):

Solves the minimization problem in
:func:pyproximal.optimization.primaldual.PrimalDual
using an adaptive version of the first-order primal-dual algorithm of [1]_.
The main advantage of this method is that step sizes :math:\tau and
:math:\mu are changing through iterations, improving the overall speed
of convergence of the algorithm.

Parameters
----------
proxf : :obj:pyproximal.ProxOperator
Proximal operator of f function
proxg : :obj:pyproximal.ProxOperator
Proximal operator of g function
A : :obj:pylops.LinearOperator
Linear operator of g
x0 : :obj:numpy.ndarray
Initial vector
tau : :obj:float
Stepsize of subgradient of :math:f
mu : :obj:float
Stepsize of subgradient of :math:g^*
alpha : :obj:float, optional
Initial adaptivity level (must be between 0 and 1)
eta : :obj:float, optional
Scaling of adaptivity level to be multipled to the current alpha every
time the norm of the two residuals start to diverge (must be between
0 and 1)
s : :obj:float, optional
Scaling of residual balancing principle
delta : :obj:float, optional
Balancing factor. Step sizes are updated only when their ratio exceeds
this value.
z : :obj:numpy.ndarray, optional
niter : :obj:int, optional
Number of iterations of iterative scheme
tol : :obj:int, optional
Tolerance on residual norms
callback : :obj:callable, optional
Function with signature (callback(x)) to call after each iteration
where x is the current model vector
show : :obj:bool, optional
Display iterations log

Returns
-------
x : :obj:numpy.ndarray
Inverted model
steps : :obj:tuple
Tau, mu and alpha evolution through iterations

Notes
-----
The Adative Primal-dual algorithm share the the same iterations of the
original :func:pyproximal.optimization.primaldual.PrimalDual solver.
The main difference lies in the fact that the step sizes tau and mu

Changes are applied by tracking the norm of the primal and dual
residuals. When their mutual ratio increases beyond a certain treshold
delta the step lenghts are updated to balance the minimization and
maximization part of the overall optimization process.

.. [1] T., Goldstein, M., Li, X., Yuan, E., Esser, R., Baraniuk, "Adaptive
ArXiv, 2013.

"""
if show:
tstart = time.time()
print('Adaptive Primal-dual: min_x f(Ax) + x^T z + g(x)\n'
'---------------------------------------------------------\n'
'Proximal operator (f): %s\n'
'Proximal operator (g): %s\n'
'Linear operator (A): %s\n'
'tau0 = %10e\tmu0 = %10e\n'
'alpha0 = %10e\teta = %10e\n'
's = %10e\tdelta = %10e\n'
'niter = %d\t\ttol = %10e\n' %
(type(proxf), type(proxg), type(A),
None if z is None else 'vector', tau, mu,
alpha, eta, s, delta, niter, tol))
head = '   Itn       x[0]          f           g          z^x       J = f + g + z^x'

# initialization
x = x0.copy()
y = np.zeros(A.shape[0], dtype=x.dtype)
Ax = np.zeros(A.shape[0], dtype=x.dtype)
ATy = np.zeros(A.shape[1], dtype=x.dtype)
taus = np.zeros(niter + 1)
mus =  np.zeros(niter + 1)
alphas = np.zeros(niter + 1)
taus[0], mus[0], alphas[0] = tau, mu, alpha
p = d = tol + 1.

iiter = 0
while iiter < niter and p > tol and d > tol:

# store old values
xold = x.copy()
yold = y.copy()
Axold = Ax.copy()
ATyold = ATy.copy()

# proxf
if z is not None:
ATy += z
x = proxf.prox(x - tau * ATy, tau)
Ax = A.matvec(x)
Axhat = 2 * Ax - Axold

# proxg
y = proxg.proxdual(y + mu * Axhat, mu)
ATy = A.rmatvec(y)

# update steps
if z is not None:
p = np.linalg.norm((xold - x) / tau - (ATyold - ATy) -
A.rmatvec(z) + z)
else:
p = np.linalg.norm((xold - x) / tau - (ATyold - ATy))
d = np.linalg.norm((yold - y) / mu - (Axold - Ax))

if p > s * d * delta:
tau /= 1 - alpha
mu *= 1 - alpha
alpha *= eta
elif p < s * d / delta:
tau *= 1 - alpha
mu /= 1 - alpha
alpha *= eta

# save history of steps
taus[iiter + 1] = tau
mus[iiter + 1] = mu
alphas[iiter + 1] = alpha
iiter += 1

# run callback
if callback is not None:
callback(x)

if show:
if iiter < 10 or niter - iiter < 10 or iiter % (niter // 10) == 0:
pf, pg = proxf(x), proxg(A.matvec(x))
pf = 0. if type(pf) == bool else pf
pg = 0. if type(pg) == bool else pg
zx = 0. if z is None else np.dot(z, x)
msg = '%6g  %12.5e  %10.3e  %10.3e  %10.3e      %10.3e' % \
(iiter + 1, np.real(to_numpy(x[0])), pf, pg, zx, pf + pg + zx)
print(msg)

steps = (taus[:iiter + 1], mus[:iiter + 1], alphas[:iiter + 1])
if show:
print('\nTotal time (s) = %.2f' % (time.time() - tstart))

return x, steps