Source code for pycsou.opt.proxalgs

# #############################################################################
# solver.py
# =========
# Author : Matthieu Simeoni [matthieu.simeoni@gmail.com]
# #############################################################################

r"""
Proximal algorithms.

This module provides various proximal algorithms for convex optimisation.
"""

from numbers import Number
from typing import Optional, Tuple, Any

import numpy as np
from pandas import DataFrame

from pycsou.core.functional import ProximableFunctional, DifferentiableFunctional
from pycsou.core.linop import LinearOperator
from pycsou.core.map import DifferentiableMap
from pycsou.core.solver import GenericIterativeAlgorithm
from pycsou.func.base import NullDifferentiableFunctional, NullProximableFunctional
from pycsou.linop.base import IdentityOperator, NullOperator


[docs]class PrimalDualSplitting(GenericIterativeAlgorithm): r""" Primal dual splitting algorithm. This class is also accessible via the alias ``PDS()``. Notes ----- The *Primal Dual Splitting (PDS)* method is described in [PDS]_ (this particular implementation is based on the pseudo-code Algorithm 7.1 provided in [FuncSphere]_ Chapter 7, Section1). It can be used to solve problems of the form: .. math:: {\min_{\mathbf{x}\in\mathbb{R}^N} \;\mathcal{F}(\mathbf{x})\;\;+\;\;\mathcal{G}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathbf{K} \mathbf{x}).} where: * :math:`\mathcal{F}:\mathbb{R}^N\rightarrow \mathbb{R}` is *convex* and *differentiable*, with :math:`\beta`-*Lipschitz continuous* gradient, for some :math:`\beta\in[0,+\infty[`. * :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` and :math:`\mathcal{H}:\mathbb{R}^M\rightarrow \mathbb{R}\cup\{+\infty\}` are two *proper*, *lower semicontinuous* and *convex functions* with *simple proximal operators*. * :math:`\mathbf{K}:\mathbb{R}^N\rightarrow \mathbb{R}^M` is a *linear operator*, with **operator norm**: .. math:: \Vert{\mathbf{K}}\Vert_2=\sup_{\mathbf{x}\in\mathbb{R}^N,\Vert\mathbf{x}\Vert_2=1} \Vert\mathbf{K}\mathbf{x}\Vert_2. * The problem is *feasible* --i.e. there exists at least one solution. **Remark 1:** The algorithm is still valid if one or more of the terms :math:`\mathcal{F}`, :math:`\mathcal{G}` or :math:`\mathcal{H}` is zero. **Remark 2:** Assume that the following holds: * :math:`\beta>0` and: - :math:`\frac{1}{\tau}-\sigma\Vert\mathbf{K}\Vert_{2}^2\geq \frac{\beta}{2}`, - :math:`\rho \in ]0,\delta[`, where :math:`\delta:=2-\frac{\beta}{2}\left(\frac{1}{\tau}-\sigma\Vert\mathbf{K}\Vert_{2}^2\right)^{-1}\in[1,2[.` * or :math:`\beta=0` and: - :math:`\tau\sigma\Vert\mathbf{K}\Vert_{2}^2\leq 1` - :math:`\rho \in [\epsilon,2-\epsilon]`, for some :math:`\epsilon>0.` Then, there exists a pair :math:`(\mathbf{x}^\star,\mathbf{z}^\star)\in\mathbb{R}^N\times \mathbb{R}^M`} solution s.t. the primal and dual sequences of estimates :math:`(\mathbf{x}_n)_{n\in\mathbb{N}}` and :math:`(\mathbf{z}_n)_{n\in\mathbb{N}}` *converge* towards :math:`\mathbf{x}^\star` and :math:`\mathbf{z}^\star` respectively, i.e. .. math:: \lim_{n\rightarrow +\infty}\Vert\mathbf{x}^\star-\mathbf{x}_n\Vert_2=0, \quad \text{and} \quad \lim_{n\rightarrow +\infty}\Vert\mathbf{z}^\star-\mathbf{z}_n\Vert_2=0. **Default values of the hyperparameters provided here always ensure convergence of the algorithm.** Examples -------- Consider the following optimisation problem: .. math:: \min_{\mathbf{x}\in\mathbb{R}_+^N}\frac{1}{2}\left\|\mathbf{y}-\mathbf{G}\mathbf{x}\right\|_2^2\quad+\quad\lambda_1 \|\mathbf{D}\mathbf{x}\|_1\quad+\quad\lambda_2 \|\mathbf{x}\|_1, with :math:`\mathbf{D}\in\mathbb{R}^{N\times N}` the discrete derivative operator and :math:`\mathbf{G}\in\mathbb{R}^{L\times N}, \, \mathbf{y}\in\mathbb{R}^L, \lambda_1,\lambda_2>0.` This problem can be solved via PDS with :math:`\mathcal{F}(\mathbf{x})= \frac{1}{2}\left\|\mathbf{y}-\mathbf{G}\mathbf{x}\right\|_2^2`, :math:`\mathcal{G}(\mathbf{x})=\lambda_2\|\mathbf{x}\|_1,` :math:`\mathcal{H}(\mathbf{x})=\lambda \|\mathbf{x}\|_1` and :math:`\mathbf{K}=\mathbf{D}`. .. plot:: import numpy as np import matplotlib.pyplot as plt from pycsou.linop.diff import FirstDerivative from pycsou.func.loss import SquaredL2Loss from pycsou.func.penalty import L1Norm, NonNegativeOrthant from pycsou.linop.sampling import DownSampling from pycsou.opt.proxalgs import PrimalDualSplitting x = np.repeat([0, 2, 1, 3, 0, 2, 0], 10) D = FirstDerivative(size=x.size, kind='forward') D.compute_lipschitz_cst(tol=1e-3) rng = np.random.default_rng(0) G = DownSampling(size=x.size, downsampling_factor=3) G.compute_lipschitz_cst() y = G(x) l22_loss = (1 / 2) * SquaredL2Loss(dim=G.shape[0], data=y) F = l22_loss * G lambda_ = 0.1 H = lambda_ * L1Norm(dim=D.shape[0]) G = 0.01 * L1Norm(dim=G.shape[1]) pds = PrimalDualSplitting(dim=G.shape[1], F=F, G=G, H=H, K=D, verbose=None) estimate, converged, diagnostics = pds.iterate() plt.figure() plt.stem(x, linefmt='C0-', markerfmt='C0o') plt.stem(estimate['primal_variable'], linefmt='C1--', markerfmt='C1s') plt.legend(['Ground truth', 'PDS Estimate']) plt.show() See Also -------- :py:class:`~pycsou.opt.proxalgs.PDS`, :py:class:`~pycsou.opt.proxalgs.ChambollePockSplitting`, :py:class:`~pycsou.opt.proxalgs.DouglasRachford` """
[docs] def __init__(self, dim: int, F: Optional[DifferentiableMap] = None, G: Optional[ProximableFunctional] = None, H: Optional[ProximableFunctional] = None, K: Optional[LinearOperator] = None, tau: Optional[float] = None, sigma: Optional[float] = None, rho: Optional[float] = None, beta: Optional[float] = None, x0: Optional[np.ndarray] = None, z0: Optional[np.ndarray] = None, max_iter: int = 500, min_iter: int = 10, accuracy_threshold: float = 1e-3, verbose: Optional[int] = 1): r""" Parameters ---------- dim: int Dimension of the objective functional's domain. F: Optional[DifferentiableMap] Differentiable map :math:`\mathcal{F}`. G: Optional[ProximableFunctional] Proximable functional :math:`\mathcal{G}`. H: Optional[ProximableFunctional] Proximable functional :math:`\mathcal{H}`. K: Optional[LinearOperator] Linear operator :math:`\mathbf{K}`. tau: Optional[float] Primal step size. sigma: Optional[float] Dual step size. rho: Optional[float] Momentum parameter. beta: Optional[float] Lipschitz constant :math:`\beta` of the derivative of :math:`\mathcal{F}`. x0: Optional[np.ndarray] Initial guess for the primal variable. z0: Optional[np.ndarray] Initial guess for the dual variable. max_iter: int Maximal number of iterations. min_iter: int Minimal number of iterations. accuracy_threshold: float Accuracy threshold for stopping criterion. verbose: int Print diagnostics every ``verbose`` iterations. If ``None`` does not print anything. """ self.dim = dim self._H = True if isinstance(F, DifferentiableMap): if F.shape[1] != dim: raise ValueError(f'F does not have the proper dimension: {F.shape[1]}!={dim}.') else: self.F = F if F.diff_lipschitz_cst < np.infty: self.beta = self.F.diff_lipschitz_cst if beta is None else beta elif (beta is not None) and isinstance(beta, Number): self.beta = beta else: raise ValueError('F must be a differentiable functional with Lipschitz-continuous gradient.') elif F is None: self.F = NullDifferentiableFunctional(dim=dim) self.beta = 0 else: raise TypeError(f'F must be of type {DifferentiableMap}.') if isinstance(G, ProximableFunctional): if G.dim != dim: raise ValueError(f'G does not have the proper dimension: {G.dim}!={dim}.') else: self.G = G elif G is None: self.G = NullProximableFunctional(dim=dim) else: raise TypeError(f'G must be of type {ProximableFunctional}.') if isinstance(K, LinearOperator) and isinstance(H, ProximableFunctional): if (K.shape[1] != dim) or (K.shape[0] != H.dim): raise ValueError( f'Operator K with shape {K.shape} is inconsistent with functional H with dimension {H.dim}.') if isinstance(H, ProximableFunctional): self.H = H if isinstance(K, LinearOperator): self.K = K elif K is None: self.K = IdentityOperator(size=H.dim) self.K.lipschitz_cst = self.K.diff_lipschitz_cst = 1 else: raise TypeError(f'K must be of type {LinearOperator}.') elif H is None: self.H = NullProximableFunctional(dim=dim) self._H = False self.K = NullOperator(shape=(dim, dim)) self.K.lipschitz_cst = self.K.diff_lipschitz_cst = 0 else: raise TypeError(f'H must be of type {ProximableFunctional}.') if (tau is not None) and (sigma is not None): self.tau, self.sigma = tau, sigma elif (tau is not None) and (sigma is None): self.tau = self.sigma = tau elif (tau is None) and (sigma is not None): self.tau = self.sigma = sigma else: self.tau, self.sigma = self.set_step_sizes() if rho is not None: self.rho = rho else: self.rho = self.set_momentum_term() if x0 is not None: self.x0 = np.asarray(x0) else: self.x0 = self.initialize_primal_variable() if z0 is not None: self.z0 = np.asarray(z0) else: self.z0 = self.initialize_dual_variable() objective_functional = (self.F + self.G) + (self.H * self.K) init_iterand = {'primal_variable': self.x0, 'dual_variable': self.z0} super(PrimalDualSplitting, self).__init__(objective_functional=objective_functional, init_iterand=init_iterand, max_iter=max_iter, min_iter=min_iter, accuracy_threshold=accuracy_threshold, verbose=verbose)
[docs] def set_step_sizes(self) -> Tuple[float, float]: r""" Set the primal/dual step sizes. Returns ------- Tuple[float, float] Sensible primal/dual step sizes. Notes ----- In practice, the convergence speed of the algorithm is improved by choosing :math:`\sigma` and :math:`\tau` as large as possible and relatively well-balanced --so that both the primal and dual variables converge at the same pace. In practice, it is hence recommended to choose perfectly balanced parameters :math:`\sigma=\tau` saturating the convergence inequalities. For :math:`\beta>0` this yields: .. math:: \frac{1}{\tau}-\tau\Vert\mathbf{K}\Vert_{2}^2= \frac{\beta}{2} \quad\Longleftrightarrow\quad -2\tau^2\Vert\mathbf{K}\Vert_{2}^2-\beta\tau+2=0, which admits one positive root .. math:: \tau=\sigma=\frac{1}{\Vert\mathbf{K}\Vert_{2}^2}\left(-\frac{\beta}{4}+\sqrt{\frac{\beta^2}{16}+\Vert\mathbf{K}\Vert_{2}^2}\right). For :math:`\beta=0`, this yields .. math:: \tau=\sigma=\Vert\mathbf{K\Vert_{2}^{-1}.} """ if self.beta > 0: if self._H is False: tau = 2 / self.beta sigma = 0 else: if self.K.lipschitz_cst < np.infty: tau = sigma = (1 / (self.K.lipschitz_cst) ** 2) * ( (-self.beta / 4) + np.sqrt((self.beta ** 2 / 16) + self.K.lipschitz_cst ** 2)) else: raise ValueError( 'Please compute the Lipschitz constant of the linear operator K by calling its method "compute_lipschitz_cst()".') else: if self._H is False: tau = 1 sigma = 0 else: if self.K.lipschitz_cst < np.infty: tau = sigma = 1 / self.K.lipschitz_cst else: raise ValueError( 'Please compute the Lipschitz constant of the linear operator K by calling its method "compute_lipschitz_cst()".') return tau, sigma
[docs] def set_momentum_term(self) -> float: r""" Sets the momentum term. Returns ------- float Momentum term. """ if self.beta > 0: rho = 0.9 else: rho = 1 return rho
[docs] def initialize_primal_variable(self) -> np.ndarray: """ Initialize the primal variable to zero. Returns ------- np.ndarray Zero-initialized primal variable. """ return np.zeros(shape=(self.dim,), dtype=np.float)
[docs] def initialize_dual_variable(self) -> Optional[np.ndarray]: """ Initialize the dual variable to zero. Returns ------- np.ndarray Zero-initialized dual variable. """ if self._H is False: return None else: return np.zeros(shape=(self.H.dim,), dtype=np.float)
[docs] def update_iterand(self) -> dict: if self.iter == 0: x, z = self.init_iterand.values() else: x, z = self.iterand.values() x_temp = self.G.prox(x - self.tau * self.F.gradient(x) - self.tau * self.K.adjoint(z), tau=self.tau) if self._H is True: u = 2 * x_temp - x z_temp = self.H.fenchel_prox(z + self.sigma * self.K(u), sigma=self.sigma) z = self.rho * z_temp + (1 - self.rho) * z x = self.rho * x_temp + (1 - self.rho) * x iterand = {'primal_variable': x, 'dual_variable': z} return iterand
[docs] def print_diagnostics(self): print(dict(self.diagnostics.loc[self.iter]))
[docs] def stopping_metric(self): if self.iter == 0: return np.infty else: return self.diagnostics.loc[self.iter - 1, 'Relative Improvement (primal variable)']
[docs] def update_diagnostics(self): if self._H is True: if self.iter == 0: self.diagnostics = DataFrame( columns=['Iter', 'Relative Improvement (primal variable)', 'Relative Improvement (dual variable)']) self.diagnostics.loc[self.iter, 'Iter'] = self.iter if np.linalg.norm(self.old_iterand['primal_variable']) == 0: self.diagnostics.loc[self.iter, 'Relative Improvement (primal variable)'] = np.infty else: self.diagnostics.loc[self.iter, 'Relative Improvement (primal variable)'] = np.linalg.norm( self.old_iterand['primal_variable'] - self.iterand['primal_variable']) / np.linalg.norm( self.old_iterand['primal_variable']) if np.linalg.norm(self.old_iterand['dual_variable']) == 0: self.diagnostics.loc[self.iter, 'Relative Improvement (dual variable)'] = np.infty else: self.diagnostics.loc[self.iter, 'Relative Improvement (dual variable)'] = np.linalg.norm( self.old_iterand['dual_variable'] - self.iterand['dual_variable']) / np.linalg.norm( self.old_iterand['dual_variable']) else: if self.iter == 0: self.diagnostics = DataFrame( columns=['Iter', 'Relative Improvement (primal variable)']) self.diagnostics.loc[self.iter, 'Iter'] = self.iter if np.linalg.norm(self.old_iterand['primal_variable']) == 0: self.diagnostics.loc[self.iter, 'Relative Improvement (primal variable)'] = np.infty else: self.diagnostics.loc[self.iter, 'Relative Improvement (primal variable)'] = np.linalg.norm( self.old_iterand['primal_variable'] - self.iterand['primal_variable']) / np.linalg.norm( self.old_iterand['primal_variable'])
PDS = PrimalDualSplitting
[docs]class AcceleratedProximalGradientDescent(GenericIterativeAlgorithm): r""" Accelerated proximal gradient descent. This class is also accessible via the alias ``APGD()``. Notes ----- The *Accelerated Proximal Gradient Descent (APGD)* method can be used to solve problems of the form: .. math:: {\min_{\mathbf{x}\in\mathbb{R}^N} \;\mathcal{F}(\mathbf{x})\;\;+\;\;\mathcal{G}(\mathbf{x}).} where: * :math:`\mathcal{F}:\mathbb{R}^N\rightarrow \mathbb{R}` is *convex* and *differentiable*, with :math:`\beta`-*Lipschitz continuous* gradient, for some :math:`\beta\in[0,+\infty[`. * :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` is a *proper*, *lower semicontinuous* and *convex function* with a *simple proximal operator*. * The problem is *feasible* --i.e. there exists at least one solution. **Remark 1:** the algorithm is still valid if one or more of the terms :math:`\mathcal{F}` or :math:`\mathcal{G}` is zero. **Remark 2:** The convergence is guaranteed for step sizes :math:`\tau\leq 1/\beta`. Without acceleration, APGD can be seen as a PDS method with :math:`\rho=1`. The various acceleration schemes are described in [APGD]_. For :math:`0<\tau\leq 1/\beta` and Chambolle and Dossal's acceleration scheme (``acceleration='CD'``), APGD achieves the following (optimal) *convergence rates*: .. math:: \lim\limits_{n\rightarrow \infty} n^2\left\vert \mathcal{J}(\mathbf{x}^\star)- \mathcal{J}(\mathbf{x}_n)\right\vert=0\qquad \&\qquad \lim\limits_{n\rightarrow \infty} n^2\Vert \mathbf{x}_n-\mathbf{x}_{n-1}\Vert^2_\mathcal{X}=0, for *some minimiser* :math:`{\mathbf{x}^\star}\in\arg\min_{\mathbf{x}\in\mathbb{R}^N} \;\left\{\mathcal{J}(\mathbf{x}):=\mathcal{F}(\mathbf{x})+\mathcal{G}(\mathbf{x})\right\}`. In other words, both the objective functional and the APGD iterates :math:`\{\mathbf{x}_n\}_{n\in\mathbb{N}}` converge at a rate :math:`o(1/n^2)`. In comparison Beck and Teboule's acceleration scheme (``acceleration='BT'``) only achieves a convergence rate of :math:`O(1/n^2)`. Significant practical *speedup* can moreover be achieved for values of :math:`d` in the range :math:`[50,100]` [APGD]_. Examples -------- Consider the *LASSO problem*: .. math:: \min_{\mathbf{x}\in\mathbb{R}^N}\frac{1}{2}\left\|\mathbf{y}-\mathbf{G}\mathbf{x}\right\|_2^2\quad+\quad\lambda \|\mathbf{x}\|_1, with :math:`\mathbf{G}\in\mathbb{R}^{L\times N}, \, \mathbf{y}\in\mathbb{R}^L, \lambda>0.` This problem can be solved via APGD with :math:`\mathcal{F}(\mathbf{x})= \frac{1}{2}\left\|\mathbf{y}-\mathbf{G}\mathbf{x}\right\|_2^2` and :math:`\mathcal{G}(\mathbf{x})=\lambda \|\mathbf{x}\|_1`. We have: .. math:: \mathbf{\nabla}\mathcal{F}(\mathbf{x})=\mathbf{G}^T(\mathbf{G}\mathbf{x}-\mathbf{y}), \qquad \text{prox}_{\lambda\|\cdot\|_1}(\mathbf{x})=\text{soft}_\lambda(\mathbf{x}). This yields the so-called *Fast Iterative Soft Thresholding Algorithm (FISTA)*, whose convergence is guaranteed for :math:`d>2` and :math:`0<\tau\leq \beta^{-1}=\|\mathbf{G}\|_2^{-2}`. .. plot:: import numpy as np import matplotlib.pyplot as plt from pycsou.func.loss import SquaredL2Loss from pycsou.func.penalty import L1Norm from pycsou.linop.base import DenseLinearOperator from pycsou.opt.proxalgs import APGD rng = np.random.default_rng(0) G = DenseLinearOperator(rng.standard_normal(15).reshape(3,5)) G.compute_lipschitz_cst() x = np.zeros(G.shape[1]) x[1] = 1 x[-2] = -1 y = G(x) l22_loss = (1/2) * SquaredL2Loss(dim=G.shape[0], data=y) F = l22_loss * G lambda_ = 0.9 * np.max(np.abs(F.gradient(0 * x))) G = lambda_ * L1Norm(dim=G.shape[1]) apgd = APGD(dim=G.shape[1], F=F, G=G, acceleration='CD', verbose=None) estimate, converged, diagnostics = apgd.iterate() plt.figure() plt.stem(x, linefmt='C0-', markerfmt='C0o') plt.stem(estimate['iterand'], linefmt='C1--', markerfmt='C1s') plt.legend(['Ground truth', 'LASSO Estimate']) plt.show() See Also -------- :py:class:`~pycsou.opt.proxalgs.APGD` """
[docs] def __init__(self, dim: int, F: Optional[DifferentiableMap] = None, G: Optional[ProximableFunctional] = None, tau: Optional[float] = None, acceleration: Optional[str] = 'CD', beta: Optional[float] = None, x0: Optional[np.ndarray] = None, max_iter: int = 500, min_iter: int = 10, accuracy_threshold: float = 1e-3, verbose: Optional[int] = 1, d: float = 75.): r""" Parameters ---------- dim: int Dimension of the objective functional's domain. F: Optional[DifferentiableMap] Differentiable map :math:`\mathcal{F}`. G: Optional[ProximableFunctional] Proximable functional :math:`\mathcal{G}`. tau: Optional[float] Primal step size. acceleration: Optional[str] [None, 'BT', 'CD'] Which acceleration scheme should be used (`None` for no acceleration). beta: Optional[float] Lipschitz constant :math:`\beta` of the derivative of :math:`\mathcal{F}`. x0: Optional[np.ndarray] Initial guess for the primal variable. max_iter: int Maximal number of iterations. min_iter: int Minimal number of iterations. accuracy_threshold: float Accuracy threshold for stopping criterion. verbose: int Print diagnostics every ``verbose`` iterations. If ``None`` does not print anything. d: float Parameter :math:`d` for Chambolle and Dossal's acceleration scheme (``acceleration='CD'``). """ self.dim = dim self.acceleration = acceleration self.d = d if isinstance(F, DifferentiableMap): if F.shape[1] != dim: raise ValueError(f'F does not have the proper dimension: {F.shape[1]}!={dim}.') else: self.F = F if F.diff_lipschitz_cst < np.infty: self.beta = self.F.diff_lipschitz_cst if beta is None else beta elif (beta is not None) and isinstance(beta, Number): self.beta = beta else: raise ValueError('F must be a differentiable functional with Lipschitz-continuous gradient.') elif F is None: self.F = NullDifferentiableFunctional(dim=dim) self.beta = 0 else: raise TypeError(f'F must be of type {DifferentiableMap}.') if isinstance(G, ProximableFunctional): if G.dim != dim: raise ValueError(f'G does not have the proper dimension: {G.dim}!={dim}.') else: self.G = G elif G is None: self.G = NullProximableFunctional(dim=dim) else: raise TypeError(f'G must be of type {ProximableFunctional}.') if tau is not None: self.tau = tau else: self.tau = self.set_step_size() if x0 is not None: self.x0 = np.asarray(x0) else: self.x0 = self.initialize_iterate() objective_functional = self.F + self.G init_iterand = {'iterand': self.x0, 'past_aux': 0 * self.x0, 'past_t': 1} super(AcceleratedProximalGradientDescent, self).__init__(objective_functional=objective_functional, init_iterand=init_iterand, max_iter=max_iter, min_iter=min_iter, accuracy_threshold=accuracy_threshold, verbose=verbose)
[docs] def set_step_size(self) -> float: r""" Set the step size to its largest admissible value :math:`1/\beta`. Returns ------- Tuple[float, float] Largest admissible step size. """ return 1 / self.beta
[docs] def initialize_iterate(self) -> np.ndarray: """ Initialize the iterand to zero. Returns ------- np.ndarray Zero-initialized iterand. """ return np.zeros(shape=(self.dim,), dtype=np.float)
[docs] def update_iterand(self) -> Any: if self.iter == 0: x, x_old, t_old = self.init_iterand.values() else: x, x_old, t_old = self.iterand.values() x_temp = self.G.prox(x - self.tau * self.F.gradient(x), tau=self.tau) if self.acceleration == 'BT': t = (1 + np.sqrt(1 + 4 * t_old ** 2)) / 2 elif self.acceleration == 'CD': t = (self.iter + self.d) / self.d else: t = t_old = 1 a = (t_old - 1) / t x = x_temp + a * (x_temp - x_old) iterand = {'iterand': x, 'past_aux': x_temp, 'past_t': t} return iterand
[docs] def print_diagnostics(self): print(dict(self.diagnostics.loc[self.iter]))
[docs] def stopping_metric(self): if self.iter == 0: return np.infty else: return self.diagnostics.loc[self.iter - 1, 'Relative Improvement']
[docs] def update_diagnostics(self): if self.iter == 0: self.diagnostics = DataFrame( columns=['Iter', 'Relative Improvement']) self.diagnostics.loc[self.iter, 'Iter'] = self.iter if np.linalg.norm(self.old_iterand['iterand']) == 0: self.diagnostics.loc[self.iter, 'Relative Improvement'] = np.infty else: self.diagnostics.loc[self.iter, 'Relative Improvement'] = np.linalg.norm( self.old_iterand['iterand'] - self.iterand['iterand']) / np.linalg.norm( self.old_iterand['iterand'])
APGD = AcceleratedProximalGradientDescent
[docs]class ChambollePockSplitting(PrimalDualSplitting): r""" Chambolle and Pock primal-dual splitting method. This class is also accessible via the alias ``CPS()``. Notes ----- The *Chambolle and Pock primal-dual splitting (CPS)* method can be used to solve problems of the form: .. math:: {\min_{\mathbf{x}\in\mathbb{R}^N} \mathcal{G}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathbf{K} \mathbf{x}).} where: * :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` and :math:`\mathcal{H}:\mathbb{R}^M\rightarrow \mathbb{R}\cup\{+\infty\}` are two *proper*, *lower semicontinuous* and *convex functions* with *simple proximal operators*. * :math:`\mathbf{K}:\mathbb{R}^N\rightarrow \mathbb{R}^M``` is a *linear operator*, with **operator norm**: .. math:: \Vert{\mathbf{K}}\Vert_2=\sup_{\mathbf{x}\in\mathbb{R}^N,\Vert\mathbf{x}\Vert_2=1} \Vert\mathbf{K}\mathbf{x}\Vert_2. * The problem is *feasible* --i.e. there exists at least one solution. **Remark 1:** The algorithm is still valid if one of the terms :math:`\mathcal{G}` or :math:`\mathcal{H}` is zero. **Remark 2:** Assume that the following holds: - :math:`\tau\sigma\Vert\mathbf{K}\Vert_{2}^2\leq 1` - :math:`\rho \in [\epsilon,2-\epsilon]`, for some :math:`\epsilon>0.` Then, there exists a pair :math:`(\mathbf{x}^\star,\mathbf{z}^\star)\in\mathbb{R}^N\times \mathbb{R}^M`} solution s.t. the primal and dual sequences of estimates :math:`(\mathbf{x}_n)_{n\in\mathbb{N}}` and :math:`(\mathbf{z}_n)_{n\in\mathbb{N}}` *converge* towards :math:`\mathbf{x}^\star` and :math:`\mathbf{z}^\star` respectively, i.e. .. math:: \lim_{n\rightarrow +\infty}\Vert\mathbf{x}^\star-\mathbf{x}_n\Vert_2=0, \quad \text{and} \quad \lim_{n\rightarrow +\infty}\Vert\mathbf{z}^\star-\mathbf{z}_n\Vert_2=0. **Default values of the hyperparameters provided here always ensure convergence of the algorithm.** See Also -------- :py:class:`~pycsou.opt.proxalgs.CPS`, :py:class:`~pycsou.opt.proxalgs.PrimalDualSplitting`, :py:class:`~pycsou.opt.proxalgs.DouglasRachfordSplitting` """
[docs] def __init__(self, dim: int, G: Optional[ProximableFunctional] = None, H: Optional[ProximableFunctional] = None, K: Optional[LinearOperator] = None, tau: Optional[float] = None, sigma: Optional[float] = None, rho: Optional[float] = 1, x0: Optional[np.ndarray] = None, z0: Optional[np.ndarray] = None, max_iter: int = 500, min_iter: int = 10, accuracy_threshold: float = 1e-3, verbose: Optional[int] = 1): r""" Parameters ---------- dim: int Dimension of the objective functional's domain. G: Optional[ProximableFunctional] Proximable functional :math:`\mathcal{G}`. H: Optional[ProximableFunctional] Proximable functional :math:`\mathcal{H}`. K: Optional[LinearOperator] Linear operator :math:`\mathbf{K}`. tau: Optional[float] Primal step size. sigma: Optional[float] Dual step size. rho: Optional[float] Momentum parameter. x0: Optional[np.ndarray] Initial guess for the primal variable. z0: Optional[np.ndarray] Initial guess for the dual variable. max_iter: int Maximal number of iterations. min_iter: int Minimal number of iterations. accuracy_threshold: float Accuracy threshold for stopping criterion. verbose: int Print diagnostics every ``verbose`` iterations. If ``None`` does not print anything. """ super(ChambollePockSplitting, self).__init__(dim=dim, F=None, G=G, H=H, K=K, tau=tau, sigma=sigma, rho=rho, x0=x0, z0=z0, max_iter=max_iter, min_iter=min_iter, accuracy_threshold=accuracy_threshold, verbose=verbose)
CPS = ChambollePockSplitting
[docs]class DouglasRachfordSplitting(PrimalDualSplitting): r""" Douglas Rachford splitting algorithm. This class is also accessible via the alias ``DRS()``. Notes ----- The *Douglas Rachford Splitting (DRS)* can be used to solve problems of the form: .. math:: {\min_{\mathbf{x}\in\mathbb{R}^N} \mathcal{G}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathbf{x}).} where: * :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` and :math:`\mathcal{H}:\mathbb{R}^M\rightarrow \mathbb{R}\cup\{+\infty\}` are two *proper*, *lower semicontinuous* and *convex functions* with *simple proximal operators*. * The problem is *feasible* --i.e. there exists at least one solution. **Remark 1:** The algorithm is still valid if one of the terms :math:`\mathcal{G}` or :math:`\mathcal{H}` is zero. **Default values of the hyperparameters provided here always ensure convergence of the algorithm.** See Also -------- :py:class:`~pycsou.opt.proxalgs.DRS`, :py:class:`~pycsou.opt.proxalgs.PrimalDualSplitting`, :py:class:`~pycsou.opt.proxalgs.ChambollePockSplitting` """
[docs] def __init__(self, dim: int, G: Optional[ProximableFunctional] = None, H: Optional[ProximableFunctional] = None, tau: float = 1., x0: Optional[np.ndarray] = None, z0: Optional[np.ndarray] = None, max_iter: int = 500, min_iter: int = 10, accuracy_threshold: float = 1e-3, verbose: Optional[int] = 1): r""" Parameters ---------- dim: int Dimension of the objective functional's domain. G: Optional[ProximableFunctional] Proximable functional :math:`\mathcal{G}`. H: Optional[ProximableFunctional] Proximable functional :math:`\mathcal{H}`. tau: Optional[float] Primal step size. x0: Optional[np.ndarray] Initial guess for the primal variable. z0: Optional[np.ndarray] Initial guess for the dual variable. max_iter: int Maximal number of iterations. min_iter: int Minimal number of iterations. accuracy_threshold: float Accuracy threshold for stopping criterion. verbose: int Print diagnostics every ``verbose`` iterations. If ``None`` does not print anything. """ super(DouglasRachfordSplitting, self).__init__(dim=dim, F=None, G=G, H=H, K=None, tau=tau, sigma=1 / tau, rho=1, x0=x0, z0=z0, max_iter=max_iter, min_iter=min_iter, accuracy_threshold=accuracy_threshold, verbose=verbose)
DRS = DouglasRachfordSplitting
[docs]class ForwardBackwardSplitting(PrimalDualSplitting): r""" Forward-backward splitting algorithm. This class is also accessible via the alias ``FBS()``. Notes ----- The *Forward-backward splitting (FBS)* method can be used to solve problems of the form: .. math:: {\min_{\mathbf{x}\in\mathbb{R}^N} \;\mathcal{F}(\mathbf{x})\;\;+\;\;\mathcal{G}(\mathbf{x}).} where: * :math:`\mathcal{F}:\mathbb{R}^N\rightarrow \mathbb{R}` is *convex* and *differentiable*, with :math:`\beta`-*Lipschitz continuous* gradient, for some :math:`\beta\in[0,+\infty[`. * :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` is *proper*, *lower semicontinuous* and *convex function* with *simple proximal operator*. * The problem is *feasible* --i.e. there exists at least one solution. **Remark 1:** The algorithm is still valid if one of the terms :math:`\mathcal{F}` or :math:`\mathcal{G}` is zero. **Remark 2:** Assume that the following holds: - :math:`\frac{1}{\tau}\geq \frac{\beta}{2}`, - :math:`\rho \in ]0,\delta[`, where :math:`\delta:=2-\frac{\beta}{2}\tau\in[1,2[.` Then, there exists a pair :math:`(\mathbf{x}^\star,\mathbf{z}^\star)\in\mathbb{R}^N\times \mathbb{R}^M`} solution s.t. the primal and dual sequences of estimates :math:`(\mathbf{x}_n)_{n\in\mathbb{N}}` and :math:`(\mathbf{z}_n)_{n\in\mathbb{N}}` *converge* towards :math:`\mathbf{x}^\star` and :math:`\mathbf{z}^\star` respectively, i.e. .. math:: \lim_{n\rightarrow +\infty}\Vert\mathbf{x}^\star-\mathbf{x}_n\Vert_2=0, \quad \text{and} \quad \lim_{n\rightarrow +\infty}\Vert\mathbf{z}^\star-\mathbf{z}_n\Vert_2=0. **Default values of the hyperparameters provided here always ensure convergence of the algorithm.** See Also -------- :py:class:`~pycsou.opt.proxalgs.FBS`, :py:class:`~pycsou.opt.proxalgs.AcceleratedProximalGradientDescent` """
[docs] def __init__(self, dim: int, F: Optional[DifferentiableFunctional] = None, G: Optional[ProximableFunctional] = None, tau: Optional[float] = None, rho: Optional[float] = 1, x0: Optional[np.ndarray] = None, max_iter: int = 500, min_iter: int = 10, accuracy_threshold: float = 1e-3, verbose: Optional[int] = 1): r""" Parameters ---------- dim: int Dimension of the objective functional's domain. F: Optional[DifferentiableMap] Differentiable map :math:`\mathcal{F}`. G: Optional[ProximableFunctional] Proximable functional :math:`\mathcal{G}`. tau: Optional[float] Primal step size. rho: Optional[float] Momentum parameter. beta: Optional[float] Lipschitz constant :math:`\beta` of the derivative of :math:`\mathcal{F}`. x0: Optional[np.ndarray] Initial guess for the primal variable. max_iter: int Maximal number of iterations. min_iter: int Minimal number of iterations. accuracy_threshold: float Accuracy threshold for stopping criterion. verbose: int Print diagnostics every ``verbose`` iterations. If ``None`` does not print anything. """ super(ForwardBackwardSplitting, self).__init__(dim=dim, F=F, G=G, H=None, K=None, tau=tau, rho=rho, x0=x0, max_iter=max_iter, min_iter=min_iter, accuracy_threshold=accuracy_threshold, verbose=verbose)
FBS = ForwardBackwardSplitting if __name__ == '__main__': import numpy as np import matplotlib.pyplot as plt from pycsou.linop.diff import FirstDerivative from pycsou.func.loss import SquaredL2Loss from pycsou.func.penalty import L1Norm from pycsou.linop.sampling import DownSampling from pycsou.opt.proxalgs import PrimalDualSplitting x = np.repeat([0, 2, 1, 3, 0, 2, 0], 10) D = FirstDerivative(size=x.size, kind='forward') D.compute_lipschitz_cst(tol=1e-3) rng = np.random.default_rng(0) G = DownSampling(size=x.size, downsampling_factor=3) G.compute_lipschitz_cst() y = G(x) l22_loss = (1 / 2) * SquaredL2Loss(dim=G.shape[0], data=y) F = l22_loss * G lambda_ = 0.1 H = lambda_ * L1Norm(dim=D.shape[0]) G = 0.01 * L1Norm(dim=G.shape[1]) pds = PrimalDualSplitting(dim=G.shape[1], F=F, G=G, H=H, K=D, verbose=None) estimate, converged, diagnostics = pds.iterate() plt.figure() plt.stem(x, linefmt='C0-', markerfmt='C0o') plt.stem(estimate['primal_variable'], linefmt='C1--', markerfmt='C1s') plt.legend(['Ground truth', 'PDS Estimate']) plt.show()