Source code for pycsou.opt.proxalgs

# #############################################################################
# solver.py
# =========
# Author : Matthieu Simeoni [matthieu.simeoni@gmail.com]
# #############################################################################

r"""
Proximal algorithms.

This module provides various proximal algorithms for convex optimisation.
"""

from numbers import Number
from typing import Optional, Tuple, Any

import numpy as np
from pandas import DataFrame

from pycsou.core.functional import ProximableFunctional, DifferentiableFunctional
from pycsou.core.linop import LinearOperator
from pycsou.core.map import DifferentiableMap
from pycsou.core.solver import GenericIterativeAlgorithm
from pycsou.func.base import NullDifferentiableFunctional, NullProximableFunctional
from pycsou.linop.base import IdentityOperator, NullOperator


[docs]class PrimalDualSplitting(GenericIterativeAlgorithm):
    r"""
    Primal dual splitting algorithm.

    This class is also accessible via the alias ``PDS()``.

    Notes
    -----
    The *Primal Dual Splitting (PDS)* method is described in [PDS]_ (this particular implementation is based on the pseudo-code Algorithm 7.1 provided in [FuncSphere]_ Chapter 7, Section1).
    It can be used to solve problems of the form:

    .. math::
       {\min_{\mathbf{x}\in\mathbb{R}^N} \;\mathcal{F}(\mathbf{x})\;\;+\;\;\mathcal{G}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathbf{K} \mathbf{x}).}

    where:

    * :math:`\mathcal{F}:\mathbb{R}^N\rightarrow \mathbb{R}` is *convex* and *differentiable*, with :math:`\beta`-*Lipschitz continuous* gradient,
      for some :math:`\beta\in[0,+\infty[`.
    * :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` and :math:`\mathcal{H}:\mathbb{R}^M\rightarrow \mathbb{R}\cup\{+\infty\}` are two *proper*, *lower semicontinuous* and *convex functions* with *simple proximal operators*.
    * :math:`\mathbf{K}:\mathbb{R}^N\rightarrow \mathbb{R}^M` is a *linear operator*, with **operator norm**:

      .. math::
         \Vert{\mathbf{K}}\Vert_2=\sup_{\mathbf{x}\in\mathbb{R}^N,\Vert\mathbf{x}\Vert_2=1} \Vert\mathbf{K}\mathbf{x}\Vert_2.

    * The problem is *feasible* --i.e. there exists at least one solution.

    **Remark 1:**

    The algorithm is still valid if one or more of the terms :math:`\mathcal{F}`, :math:`\mathcal{G}` or :math:`\mathcal{H}` is zero.

    **Remark 2:**

    Assume that the following holds:

    * :math:`\beta>0` and:

      - :math:`\frac{1}{\tau}-\sigma\Vert\mathbf{K}\Vert_{2}^2\geq \frac{\beta}{2}`,
      - :math:`\rho \in ]0,\delta[`, where :math:`\delta:=2-\frac{\beta}{2}\left(\frac{1}{\tau}-\sigma\Vert\mathbf{K}\Vert_{2}^2\right)^{-1}\in[1,2[.`

    * or :math:`\beta=0` and:

      - :math:`\tau\sigma\Vert\mathbf{K}\Vert_{2}^2\leq 1`
      - :math:`\rho \in [\epsilon,2-\epsilon]`, for some  :math:`\epsilon>0.`

    Then, there exists a pair :math:`(\mathbf{x}^\star,\mathbf{z}^\star)\in\mathbb{R}^N\times \mathbb{R}^M`} solution s.t. the primal and dual sequences of  estimates :math:`(\mathbf{x}_n)_{n\in\mathbb{N}}` and :math:`(\mathbf{z}_n)_{n\in\mathbb{N}}` *converge* towards :math:`\mathbf{x}^\star` and :math:`\mathbf{z}^\star` respectively, i.e.

    .. math::

       \lim_{n\rightarrow +\infty}\Vert\mathbf{x}^\star-\mathbf{x}_n\Vert_2=0, \quad \text{and} \quad  \lim_{n\rightarrow +\infty}\Vert\mathbf{z}^\star-\mathbf{z}_n\Vert_2=0.

    **Default values of the hyperparameters provided here always ensure convergence of the algorithm.**

    Examples
    --------
    Consider the following optimisation problem:

    .. math::

       \min_{\mathbf{x}\in\mathbb{R}_+^N}\frac{1}{2}\left\|\mathbf{y}-\mathbf{G}\mathbf{x}\right\|_2^2\quad+\quad\lambda_1 \|\mathbf{D}\mathbf{x}\|_1\quad+\quad\lambda_2 \|\mathbf{x}\|_1,

    with :math:`\mathbf{D}\in\mathbb{R}^{N\times N}` the discrete derivative operator and :math:`\mathbf{G}\in\mathbb{R}^{L\times N}, \, \mathbf{y}\in\mathbb{R}^L, \lambda_1,\lambda_2>0.`
    This problem can be solved via PDS with :math:`\mathcal{F}(\mathbf{x})= \frac{1}{2}\left\|\mathbf{y}-\mathbf{G}\mathbf{x}\right\|_2^2`, :math:`\mathcal{G}(\mathbf{x})=\lambda_2\|\mathbf{x}\|_1,`
    :math:`\mathcal{H}(\mathbf{x})=\lambda \|\mathbf{x}\|_1` and :math:`\mathbf{K}=\mathbf{D}`.

    .. plot::

        import numpy as np
        import matplotlib.pyplot as plt
        from pycsou.linop.diff import FirstDerivative
        from pycsou.func.loss import SquaredL2Loss
        from pycsou.func.penalty import L1Norm, NonNegativeOrthant
        from pycsou.linop.sampling import DownSampling
        from pycsou.opt.proxalgs import PrimalDualSplitting

        x = np.repeat([0, 2, 1, 3, 0, 2, 0], 10)
        D = FirstDerivative(size=x.size, kind='forward')
        D.compute_lipschitz_cst(tol=1e-3)
        rng = np.random.default_rng(0)
        G = DownSampling(size=x.size, downsampling_factor=3)
        G.compute_lipschitz_cst()
        y = G(x)
        l22_loss = (1 / 2) * SquaredL2Loss(dim=G.shape[0], data=y)
        F = l22_loss * G
        lambda_ = 0.1
        H = lambda_ * L1Norm(dim=D.shape[0])
        G = 0.01 * L1Norm(dim=G.shape[1])
        pds = PrimalDualSplitting(dim=G.shape[1], F=F, G=G, H=H, K=D, verbose=None)
        estimate, converged, diagnostics = pds.iterate()
        plt.figure()
        plt.stem(x, linefmt='C0-', markerfmt='C0o')
        plt.stem(estimate['primal_variable'], linefmt='C1--', markerfmt='C1s')
        plt.legend(['Ground truth', 'PDS Estimate'])
        plt.show()

    See Also
    --------
    :py:class:`~pycsou.opt.proxalgs.PDS`, :py:class:`~pycsou.opt.proxalgs.ChambollePockSplitting`, :py:class:`~pycsou.opt.proxalgs.DouglasRachford`
    """

[docs]    def __init__(self, dim: int, F: Optional[DifferentiableMap] = None, G: Optional[ProximableFunctional] = None,
                 H: Optional[ProximableFunctional] = None, K: Optional[LinearOperator] = None,
                 tau: Optional[float] = None, sigma: Optional[float] = None, rho: Optional[float] = None,
                 beta: Optional[float] = None, x0: Optional[np.ndarray] = None, z0: Optional[np.ndarray] = None,
                 max_iter: int = 500, min_iter: int = 10, accuracy_threshold: float = 1e-3, verbose: Optional[int] = 1):
        r"""
        Parameters
        ----------
        dim: int
            Dimension of the objective functional's domain.
        F: Optional[DifferentiableMap]
            Differentiable map :math:`\mathcal{F}`.
        G: Optional[ProximableFunctional]
            Proximable functional :math:`\mathcal{G}`.
        H: Optional[ProximableFunctional]
            Proximable functional :math:`\mathcal{H}`.
        K: Optional[LinearOperator]
            Linear operator :math:`\mathbf{K}`.
        tau: Optional[float]
            Primal step size.
        sigma: Optional[float]
            Dual step size.
        rho: Optional[float]
            Momentum parameter.
        beta: Optional[float]
            Lipschitz constant :math:`\beta` of the derivative of :math:`\mathcal{F}`.
        x0: Optional[np.ndarray]
            Initial guess for the primal variable.
        z0: Optional[np.ndarray]
            Initial guess for the dual variable.
        max_iter: int
            Maximal number of iterations.
        min_iter: int
            Minimal number of iterations.
        accuracy_threshold: float
            Accuracy threshold for stopping criterion.
        verbose: int
            Print diagnostics every ``verbose`` iterations. If ``None`` does not print anything.
        """
        self.dim = dim
        self._H = True
        if isinstance(F, DifferentiableMap):
            if F.shape[1] != dim:
                raise ValueError(f'F does not have the proper dimension: {F.shape[1]}!={dim}.')
            else:
                self.F = F
            if F.diff_lipschitz_cst < np.infty:
                self.beta = self.F.diff_lipschitz_cst if beta is None else beta
            elif (beta is not None) and isinstance(beta, Number):
                self.beta = beta
            else:
                raise ValueError('F must be a differentiable functional with Lipschitz-continuous gradient.')
        elif F is None:
            self.F = NullDifferentiableFunctional(dim=dim)
            self.beta = 0
        else:
            raise TypeError(f'F must be of type {DifferentiableMap}.')

        if isinstance(G, ProximableFunctional):
            if G.dim != dim:
                raise ValueError(f'G does not have the proper dimension: {G.dim}!={dim}.')
            else:
                self.G = G
        elif G is None:
            self.G = NullProximableFunctional(dim=dim)
        else:
            raise TypeError(f'G must be of type {ProximableFunctional}.')

        if isinstance(K, LinearOperator) and isinstance(H, ProximableFunctional):
            if (K.shape[1] != dim) or (K.shape[0] != H.dim):
                raise ValueError(
                    f'Operator K with shape {K.shape} is inconsistent with functional H with dimension {H.dim}.')

        if isinstance(H, ProximableFunctional):
            self.H = H
            if isinstance(K, LinearOperator):
                self.K = K
            elif K is None:
                self.K = IdentityOperator(size=H.dim)
                self.K.lipschitz_cst = self.K.diff_lipschitz_cst = 1
            else:
                raise TypeError(f'K must be of type {LinearOperator}.')
        elif H is None:
            self.H = NullProximableFunctional(dim=dim)
            self._H = False
            self.K = NullOperator(shape=(dim, dim))
            self.K.lipschitz_cst = self.K.diff_lipschitz_cst = 0
        else:
            raise TypeError(f'H must be of type {ProximableFunctional}.')

        if (tau is not None) and (sigma is not None):
            self.tau, self.sigma = tau, sigma
        elif (tau is not None) and (sigma is None):
            self.tau = self.sigma = tau
        elif (tau is None) and (sigma is not None):
            self.tau = self.sigma = sigma
        else:
            self.tau, self.sigma = self.set_step_sizes()

        if rho is not None:
            self.rho = rho
        else:
            self.rho = self.set_momentum_term()

        if x0 is not None:
            self.x0 = np.asarray(x0)
        else:
            self.x0 = self.initialize_primal_variable()

        if z0 is not None:
            self.z0 = np.asarray(z0)
        else:
            self.z0 = self.initialize_dual_variable()

        objective_functional = (self.F + self.G) + (self.H * self.K)
        init_iterand = {'primal_variable': self.x0, 'dual_variable': self.z0}
        super(PrimalDualSplitting, self).__init__(objective_functional=objective_functional, init_iterand=init_iterand,
                                                  max_iter=max_iter, min_iter=min_iter,
                                                  accuracy_threshold=accuracy_threshold, verbose=verbose)

[docs]    def set_step_sizes(self) -> Tuple[float, float]:
        r"""
        Set the primal/dual step sizes.

        Returns
        -------
        Tuple[float, float]
            Sensible primal/dual step sizes.

        Notes
        -----
        In practice, the convergence speed  of the algorithm is improved by choosing :math:`\sigma` and :math:`\tau` as large as possible and relatively well-balanced --so that both the primal and dual variables converge at the same pace. In practice, it is hence recommended to choose perfectly balanced parameters :math:`\sigma=\tau` saturating the convergence inequalities.

        For :math:`\beta>0` this yields:

        .. math::

           \frac{1}{\tau}-\tau\Vert\mathbf{K}\Vert_{2}^2= \frac{\beta}{2} \quad\Longleftrightarrow\quad -2\tau^2\Vert\mathbf{K}\Vert_{2}^2-\beta\tau+2=0,


        which admits one positive root

        .. math::

           \tau=\sigma=\frac{1}{\Vert\mathbf{K}\Vert_{2}^2}\left(-\frac{\beta}{4}+\sqrt{\frac{\beta^2}{16}+\Vert\mathbf{K}\Vert_{2}^2}\right).


        For :math:`\beta=0`, this yields

        .. math::

           \tau=\sigma=\Vert\mathbf{K\Vert_{2}^{-1}.}

        """
        if self.beta > 0:
            if self._H is False:
                tau = 2 / self.beta
                sigma = 0
            else:
                if self.K.lipschitz_cst < np.infty:
                    tau = sigma = (1 / (self.K.lipschitz_cst) ** 2) * (
                            (-self.beta / 4) + np.sqrt((self.beta ** 2 / 16) + self.K.lipschitz_cst ** 2))
                else:
                    raise ValueError(
                        'Please compute the Lipschitz constant of the linear operator K by calling its method "compute_lipschitz_cst()".')
        else:
            if self._H is False:
                tau = 1
                sigma = 0
            else:
                if self.K.lipschitz_cst < np.infty:
                    tau = sigma = 1 / self.K.lipschitz_cst
                else:
                    raise ValueError(
                        'Please compute the Lipschitz constant of the linear operator K by calling its method "compute_lipschitz_cst()".')
        return tau, sigma

[docs]    def set_momentum_term(self) -> float:
        r"""
        Sets the momentum term.

        Returns
        -------
        float
            Momentum term.
        """
        if self.beta > 0:
            rho = 0.9
        else:
            rho = 1
        return rho

[docs]    def initialize_primal_variable(self) -> np.ndarray:
        """
        Initialize the primal variable to zero.

        Returns
        -------
        np.ndarray
            Zero-initialized primal variable.
        """
        return np.zeros(shape=(self.dim,), dtype=np.float)

[docs]    def initialize_dual_variable(self) -> Optional[np.ndarray]:
        """
        Initialize the dual variable to zero.

        Returns
        -------
        np.ndarray
            Zero-initialized dual variable.
        """
        if self._H is False:
            return None
        else:
            return np.zeros(shape=(self.H.dim,), dtype=np.float)

[docs]    def update_iterand(self) -> dict:
        if self.iter == 0:
            x, z = self.init_iterand.values()
        else:
            x, z = self.iterand.values()
        x_temp = self.G.prox(x - self.tau * self.F.gradient(x) - self.tau * self.K.adjoint(z), tau=self.tau)
        if self._H is True:
            u = 2 * x_temp - x
            z_temp = self.H.fenchel_prox(z + self.sigma * self.K(u), sigma=self.sigma)
            z = self.rho * z_temp + (1 - self.rho) * z
        x = self.rho * x_temp + (1 - self.rho) * x
        iterand = {'primal_variable': x, 'dual_variable': z}
        return iterand

[docs]    def print_diagnostics(self):
        print(dict(self.diagnostics.loc[self.iter]))

[docs]    def stopping_metric(self):
        if self.iter == 0:
            return np.infty
        else:
            return self.diagnostics.loc[self.iter - 1, 'Relative Improvement (primal variable)']

[docs]    def update_diagnostics(self):
        if self._H is True:
            if self.iter == 0:
                self.diagnostics = DataFrame(
                    columns=['Iter', 'Relative Improvement (primal variable)', 'Relative Improvement (dual variable)'])
            self.diagnostics.loc[self.iter, 'Iter'] = self.iter
            if np.linalg.norm(self.old_iterand['primal_variable']) == 0:
                self.diagnostics.loc[self.iter, 'Relative Improvement (primal variable)'] = np.infty
            else:
                self.diagnostics.loc[self.iter, 'Relative Improvement (primal variable)'] = np.linalg.norm(
                    self.old_iterand['primal_variable'] - self.iterand['primal_variable']) / np.linalg.norm(
                    self.old_iterand['primal_variable'])
            if np.linalg.norm(self.old_iterand['dual_variable']) == 0:
                self.diagnostics.loc[self.iter, 'Relative Improvement (dual variable)'] = np.infty
            else:
                self.diagnostics.loc[self.iter, 'Relative Improvement (dual variable)'] = np.linalg.norm(
                    self.old_iterand['dual_variable'] - self.iterand['dual_variable']) / np.linalg.norm(
                    self.old_iterand['dual_variable'])
        else:
            if self.iter == 0:
                self.diagnostics = DataFrame(
                    columns=['Iter', 'Relative Improvement (primal variable)'])
            self.diagnostics.loc[self.iter, 'Iter'] = self.iter
            if np.linalg.norm(self.old_iterand['primal_variable']) == 0:
                self.diagnostics.loc[self.iter, 'Relative Improvement (primal variable)'] = np.infty
            else:
                self.diagnostics.loc[self.iter, 'Relative Improvement (primal variable)'] = np.linalg.norm(
                    self.old_iterand['primal_variable'] - self.iterand['primal_variable']) / np.linalg.norm(
                    self.old_iterand['primal_variable'])


PDS = PrimalDualSplitting


[docs]class AcceleratedProximalGradientDescent(GenericIterativeAlgorithm):
    r"""
    Accelerated proximal gradient descent.

    This class is also accessible via the alias ``APGD()``.

    Notes
    -----
    The *Accelerated Proximal Gradient Descent (APGD)* method can be used to solve problems of the form:

    .. math::
       {\min_{\mathbf{x}\in\mathbb{R}^N} \;\mathcal{F}(\mathbf{x})\;\;+\;\;\mathcal{G}(\mathbf{x}).}

    where:

    * :math:`\mathcal{F}:\mathbb{R}^N\rightarrow \mathbb{R}` is *convex* and *differentiable*, with :math:`\beta`-*Lipschitz continuous* gradient,
      for some :math:`\beta\in[0,+\infty[`.
    * :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` is a *proper*, *lower semicontinuous* and *convex function* with a *simple proximal operator*.
    * The problem is *feasible* --i.e. there exists at least one solution.

    **Remark 1:** the algorithm is still valid if one or more of the terms :math:`\mathcal{F}` or :math:`\mathcal{G}` is zero.

    **Remark 2:**  The convergence is guaranteed for step sizes :math:`\tau\leq 1/\beta`. Without acceleration, APGD can be seen
    as a PDS method with :math:`\rho=1`. The various acceleration schemes are described in [APGD]_.
    For :math:`0<\tau\leq 1/\beta` and Chambolle and Dossal's acceleration scheme (``acceleration='CD'``), APGD achieves the following (optimal) *convergence rates*:

    .. math::

       \lim\limits_{n\rightarrow \infty} n^2\left\vert \mathcal{J}(\mathbf{x}^\star)- \mathcal{J}(\mathbf{x}_n)\right\vert=0\qquad \&\qquad \lim\limits_{n\rightarrow \infty} n^2\Vert \mathbf{x}_n-\mathbf{x}_{n-1}\Vert^2_\mathcal{X}=0,


    for *some minimiser* :math:`{\mathbf{x}^\star}\in\arg\min_{\mathbf{x}\in\mathbb{R}^N} \;\left\{\mathcal{J}(\mathbf{x}):=\mathcal{F}(\mathbf{x})+\mathcal{G}(\mathbf{x})\right\}`.
    In other words, both the objective functional and the APGD iterates :math:`\{\mathbf{x}_n\}_{n\in\mathbb{N}}` converge at a rate :math:`o(1/n^2)`. In comparison
    Beck and Teboule's acceleration scheme (``acceleration='BT'``) only achieves a convergence rate of :math:`O(1/n^2)`.
    Significant practical *speedup* can moreover be achieved for values of :math:`d` in the range  :math:`[50,100]`  [APGD]_.

    Examples
    --------
    Consider the *LASSO problem*:

    .. math::

       \min_{\mathbf{x}\in\mathbb{R}^N}\frac{1}{2}\left\|\mathbf{y}-\mathbf{G}\mathbf{x}\right\|_2^2\quad+\quad\lambda \|\mathbf{x}\|_1,

    with :math:`\mathbf{G}\in\mathbb{R}^{L\times N}, \, \mathbf{y}\in\mathbb{R}^L, \lambda>0.` This problem can be solved via APGD with :math:`\mathcal{F}(\mathbf{x})= \frac{1}{2}\left\|\mathbf{y}-\mathbf{G}\mathbf{x}\right\|_2^2` and :math:`\mathcal{G}(\mathbf{x})=\lambda \|\mathbf{x}\|_1`. We have:

    .. math::

       \mathbf{\nabla}\mathcal{F}(\mathbf{x})=\mathbf{G}^T(\mathbf{G}\mathbf{x}-\mathbf{y}), \qquad  \text{prox}_{\lambda\|\cdot\|_1}(\mathbf{x})=\text{soft}_\lambda(\mathbf{x}).

    This yields the so-called *Fast Iterative Soft Thresholding Algorithm (FISTA)*, whose convergence is guaranteed for :math:`d>2` and :math:`0<\tau\leq \beta^{-1}=\|\mathbf{G}\|_2^{-2}`.

    .. plot::

       import numpy as np
       import matplotlib.pyplot as plt
       from pycsou.func.loss import SquaredL2Loss
       from pycsou.func.penalty import L1Norm
       from pycsou.linop.base import DenseLinearOperator
       from pycsou.opt.proxalgs import APGD

       rng = np.random.default_rng(0)
       G = DenseLinearOperator(rng.standard_normal(15).reshape(3,5))
       G.compute_lipschitz_cst()
       x = np.zeros(G.shape[1])
       x[1] = 1
       x[-2] = -1
       y = G(x)
       l22_loss = (1/2) * SquaredL2Loss(dim=G.shape[0], data=y)
       F = l22_loss * G
       lambda_ = 0.9 * np.max(np.abs(F.gradient(0 * x)))
       G = lambda_ * L1Norm(dim=G.shape[1])
       apgd = APGD(dim=G.shape[1], F=F, G=G, acceleration='CD', verbose=None)
       estimate, converged, diagnostics = apgd.iterate()
       plt.figure()
       plt.stem(x, linefmt='C0-', markerfmt='C0o')
       plt.stem(estimate['iterand'], linefmt='C1--', markerfmt='C1s')
       plt.legend(['Ground truth', 'LASSO Estimate'])
       plt.show()

    See Also
    --------
    :py:class:`~pycsou.opt.proxalgs.APGD`
    """

[docs]    def __init__(self, dim: int, F: Optional[DifferentiableMap] = None, G: Optional[ProximableFunctional] = None,
                 tau: Optional[float] = None, acceleration: Optional[str] = 'CD', beta: Optional[float] = None,
                 x0: Optional[np.ndarray] = None, max_iter: int = 500, min_iter: int = 10,
                 accuracy_threshold: float = 1e-3, verbose: Optional[int] = 1, d: float = 75.):
        r"""
        Parameters
        ----------
        dim: int
            Dimension of the objective functional's domain.
        F: Optional[DifferentiableMap]
            Differentiable map :math:`\mathcal{F}`.
        G: Optional[ProximableFunctional]
            Proximable functional :math:`\mathcal{G}`.
        tau: Optional[float]
            Primal step size.
        acceleration: Optional[str] [None, 'BT', 'CD']
            Which acceleration scheme should be used (`None` for no acceleration).
        beta: Optional[float]
            Lipschitz constant :math:`\beta` of the derivative of :math:`\mathcal{F}`.
        x0: Optional[np.ndarray]
            Initial guess for the primal variable.
        max_iter: int
            Maximal number of iterations.
        min_iter: int
            Minimal number of iterations.
        accuracy_threshold: float
            Accuracy threshold for stopping criterion.
        verbose: int
            Print diagnostics every ``verbose`` iterations. If ``None`` does not print anything.
        d: float
            Parameter :math:`d` for Chambolle and Dossal's acceleration scheme (``acceleration='CD'``).
        """
        self.dim = dim
        self.acceleration = acceleration
        self.d = d
        if isinstance(F, DifferentiableMap):
            if F.shape[1] != dim:
                raise ValueError(f'F does not have the proper dimension: {F.shape[1]}!={dim}.')
            else:
                self.F = F
            if F.diff_lipschitz_cst < np.infty:
                self.beta = self.F.diff_lipschitz_cst if beta is None else beta
            elif (beta is not None) and isinstance(beta, Number):
                self.beta = beta
            else:
                raise ValueError('F must be a differentiable functional with Lipschitz-continuous gradient.')
        elif F is None:
            self.F = NullDifferentiableFunctional(dim=dim)
            self.beta = 0
        else:
            raise TypeError(f'F must be of type {DifferentiableMap}.')

        if isinstance(G, ProximableFunctional):
            if G.dim != dim:
                raise ValueError(f'G does not have the proper dimension: {G.dim}!={dim}.')
            else:
                self.G = G
        elif G is None:
            self.G = NullProximableFunctional(dim=dim)
        else:
            raise TypeError(f'G must be of type {ProximableFunctional}.')

        if tau is not None:
            self.tau = tau
        else:
            self.tau = self.set_step_size()

        if x0 is not None:
            self.x0 = np.asarray(x0)
        else:
            self.x0 = self.initialize_iterate()
        objective_functional = self.F + self.G
        init_iterand = {'iterand': self.x0, 'past_aux': 0 * self.x0, 'past_t': 1}
        super(AcceleratedProximalGradientDescent, self).__init__(objective_functional=objective_functional,
                                                                 init_iterand=init_iterand,
                                                                 max_iter=max_iter, min_iter=min_iter,
                                                                 accuracy_threshold=accuracy_threshold,
                                                                 verbose=verbose)

[docs]    def set_step_size(self) -> float:
        r"""
        Set the step size to its largest admissible value :math:`1/\beta`.

        Returns
        -------
        Tuple[float, float]
            Largest admissible step size.
        """
        return 1 / self.beta

[docs]    def initialize_iterate(self) -> np.ndarray:
        """
        Initialize the iterand to zero.

        Returns
        -------
        np.ndarray
            Zero-initialized iterand.
        """
        return np.zeros(shape=(self.dim,), dtype=np.float)

[docs]    def update_iterand(self) -> Any:
        if self.iter == 0:
            x, x_old, t_old = self.init_iterand.values()
        else:
            x, x_old, t_old = self.iterand.values()
        x_temp = self.G.prox(x - self.tau * self.F.gradient(x), tau=self.tau)
        if self.acceleration == 'BT':
            t = (1 + np.sqrt(1 + 4 * t_old ** 2)) / 2
        elif self.acceleration == 'CD':
            t = (self.iter + self.d) / self.d
        else:
            t = t_old = 1
        a = (t_old - 1) / t
        x = x_temp + a * (x_temp - x_old)
        iterand = {'iterand': x, 'past_aux': x_temp, 'past_t': t}
        return iterand

[docs]    def print_diagnostics(self):
        print(dict(self.diagnostics.loc[self.iter]))

[docs]    def stopping_metric(self):
        if self.iter == 0:
            return np.infty
        else:
            return self.diagnostics.loc[self.iter - 1, 'Relative Improvement']

[docs]    def update_diagnostics(self):
        if self.iter == 0:
            self.diagnostics = DataFrame(
                columns=['Iter', 'Relative Improvement'])
        self.diagnostics.loc[self.iter, 'Iter'] = self.iter
        if np.linalg.norm(self.old_iterand['iterand']) == 0:
            self.diagnostics.loc[self.iter, 'Relative Improvement'] = np.infty
        else:
            self.diagnostics.loc[self.iter, 'Relative Improvement'] = np.linalg.norm(
                self.old_iterand['iterand'] - self.iterand['iterand']) / np.linalg.norm(
                self.old_iterand['iterand'])


APGD = AcceleratedProximalGradientDescent


[docs]class ChambollePockSplitting(PrimalDualSplitting):
    r"""
    Chambolle and Pock primal-dual splitting method.

    This class is also accessible via the alias ``CPS()``.

    Notes
    -----
    The *Chambolle and Pock primal-dual splitting (CPS)* method can be used to solve problems of the form:

    .. math::
       {\min_{\mathbf{x}\in\mathbb{R}^N} \mathcal{G}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathbf{K} \mathbf{x}).}

    where:

    * :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` and :math:`\mathcal{H}:\mathbb{R}^M\rightarrow \mathbb{R}\cup\{+\infty\}` are two *proper*, *lower semicontinuous* and *convex functions* with *simple proximal operators*.
    * :math:`\mathbf{K}:\mathbb{R}^N\rightarrow \mathbb{R}^M``` is a *linear operator*, with **operator norm**:

      .. math::
         \Vert{\mathbf{K}}\Vert_2=\sup_{\mathbf{x}\in\mathbb{R}^N,\Vert\mathbf{x}\Vert_2=1} \Vert\mathbf{K}\mathbf{x}\Vert_2.

    * The problem is *feasible* --i.e. there exists at least one solution.

    **Remark 1:**

    The algorithm is still valid if one of the terms :math:`\mathcal{G}` or :math:`\mathcal{H}` is zero.

    **Remark 2:**

    Assume that the following holds:

    - :math:`\tau\sigma\Vert\mathbf{K}\Vert_{2}^2\leq 1`
    - :math:`\rho \in [\epsilon,2-\epsilon]`, for some  :math:`\epsilon>0.`

    Then, there exists a pair :math:`(\mathbf{x}^\star,\mathbf{z}^\star)\in\mathbb{R}^N\times \mathbb{R}^M`} solution s.t. the primal and dual sequences of  estimates :math:`(\mathbf{x}_n)_{n\in\mathbb{N}}` and :math:`(\mathbf{z}_n)_{n\in\mathbb{N}}` *converge* towards :math:`\mathbf{x}^\star` and :math:`\mathbf{z}^\star` respectively, i.e.

    .. math::

       \lim_{n\rightarrow +\infty}\Vert\mathbf{x}^\star-\mathbf{x}_n\Vert_2=0, \quad \text{and} \quad  \lim_{n\rightarrow +\infty}\Vert\mathbf{z}^\star-\mathbf{z}_n\Vert_2=0.

    **Default values of the hyperparameters provided here always ensure convergence of the algorithm.**

    See Also
    --------
    :py:class:`~pycsou.opt.proxalgs.CPS`, :py:class:`~pycsou.opt.proxalgs.PrimalDualSplitting`, :py:class:`~pycsou.opt.proxalgs.DouglasRachfordSplitting`
    """

[docs]    def __init__(self, dim: int, G: Optional[ProximableFunctional] = None,
                 H: Optional[ProximableFunctional] = None, K: Optional[LinearOperator] = None,
                 tau: Optional[float] = None, sigma: Optional[float] = None, rho: Optional[float] = 1,
                 x0: Optional[np.ndarray] = None, z0: Optional[np.ndarray] = None,
                 max_iter: int = 500, min_iter: int = 10, accuracy_threshold: float = 1e-3, verbose: Optional[int] = 1):
        r"""
        Parameters
        ----------
        dim: int
            Dimension of the objective functional's domain.
        G: Optional[ProximableFunctional]
            Proximable functional :math:`\mathcal{G}`.
        H: Optional[ProximableFunctional]
            Proximable functional :math:`\mathcal{H}`.
        K: Optional[LinearOperator]
            Linear operator :math:`\mathbf{K}`.
        tau: Optional[float]
            Primal step size.
        sigma: Optional[float]
            Dual step size.
        rho: Optional[float]
            Momentum parameter.
        x0: Optional[np.ndarray]
            Initial guess for the primal variable.
        z0: Optional[np.ndarray]
            Initial guess for the dual variable.
        max_iter: int
            Maximal number of iterations.
        min_iter: int
            Minimal number of iterations.
        accuracy_threshold: float
            Accuracy threshold for stopping criterion.
        verbose: int
            Print diagnostics every ``verbose`` iterations. If ``None`` does not print anything.
        """
        super(ChambollePockSplitting, self).__init__(dim=dim, F=None, G=G, H=H, K=K, tau=tau, sigma=sigma, rho=rho,
                                                     x0=x0,
                                                     z0=z0, max_iter=max_iter, min_iter=min_iter,
                                                     accuracy_threshold=accuracy_threshold, verbose=verbose)


CPS = ChambollePockSplitting


[docs]class DouglasRachfordSplitting(PrimalDualSplitting):
    r"""
    Douglas Rachford splitting algorithm.

    This class is also accessible via the alias ``DRS()``.

    Notes
    -----
    The *Douglas Rachford Splitting (DRS)* can be used to solve problems of the form:

    .. math::
       {\min_{\mathbf{x}\in\mathbb{R}^N} \mathcal{G}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathbf{x}).}

    where:

    * :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` and :math:`\mathcal{H}:\mathbb{R}^M\rightarrow \mathbb{R}\cup\{+\infty\}` are two *proper*, *lower semicontinuous* and *convex functions* with *simple proximal operators*.
    * The problem is *feasible* --i.e. there exists at least one solution.

    **Remark 1:**

    The algorithm is still valid if one of the terms :math:`\mathcal{G}` or :math:`\mathcal{H}` is zero.

    **Default values of the hyperparameters provided here always ensure convergence of the algorithm.**

    See Also
    --------
    :py:class:`~pycsou.opt.proxalgs.DRS`, :py:class:`~pycsou.opt.proxalgs.PrimalDualSplitting`, :py:class:`~pycsou.opt.proxalgs.ChambollePockSplitting`
    """

[docs]    def __init__(self, dim: int, G: Optional[ProximableFunctional] = None,
                 H: Optional[ProximableFunctional] = None,
                 tau: float = 1., x0: Optional[np.ndarray] = None, z0: Optional[np.ndarray] = None,
                 max_iter: int = 500, min_iter: int = 10, accuracy_threshold: float = 1e-3, verbose: Optional[int] = 1):
        r"""
        Parameters
        ----------
        dim: int
            Dimension of the objective functional's domain.
        G: Optional[ProximableFunctional]
            Proximable functional :math:`\mathcal{G}`.
        H: Optional[ProximableFunctional]
            Proximable functional :math:`\mathcal{H}`.
        tau: Optional[float]
            Primal step size.
        x0: Optional[np.ndarray]
            Initial guess for the primal variable.
        z0: Optional[np.ndarray]
            Initial guess for the dual variable.
        max_iter: int
            Maximal number of iterations.
        min_iter: int
            Minimal number of iterations.
        accuracy_threshold: float
            Accuracy threshold for stopping criterion.
        verbose: int
            Print diagnostics every ``verbose`` iterations. If ``None`` does not print anything.
        """
        super(DouglasRachfordSplitting, self).__init__(dim=dim, F=None, G=G, H=H, K=None, tau=tau, sigma=1 / tau, rho=1,
                                                       x0=x0, z0=z0, max_iter=max_iter, min_iter=min_iter,
                                                       accuracy_threshold=accuracy_threshold, verbose=verbose)


DRS = DouglasRachfordSplitting


[docs]class ForwardBackwardSplitting(PrimalDualSplitting):
    r"""
    Forward-backward splitting algorithm.

    This class is also accessible via the alias ``FBS()``.

    Notes
    -----
    The *Forward-backward splitting (FBS)* method can be used to solve problems of the form:

    .. math::
       {\min_{\mathbf{x}\in\mathbb{R}^N} \;\mathcal{F}(\mathbf{x})\;\;+\;\;\mathcal{G}(\mathbf{x}).}

    where:

    * :math:`\mathcal{F}:\mathbb{R}^N\rightarrow \mathbb{R}` is *convex* and *differentiable*, with :math:`\beta`-*Lipschitz continuous* gradient,
      for some :math:`\beta\in[0,+\infty[`.
    * :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` is *proper*, *lower semicontinuous* and *convex function* with *simple proximal operator*.
    * The problem is *feasible* --i.e. there exists at least one solution.

    **Remark 1:**

    The algorithm is still valid if one of the terms :math:`\mathcal{F}` or :math:`\mathcal{G}` is zero.

    **Remark 2:**

    Assume that the following holds:

      - :math:`\frac{1}{\tau}\geq \frac{\beta}{2}`,
      - :math:`\rho \in ]0,\delta[`, where :math:`\delta:=2-\frac{\beta}{2}\tau\in[1,2[.`

    Then, there exists a pair :math:`(\mathbf{x}^\star,\mathbf{z}^\star)\in\mathbb{R}^N\times \mathbb{R}^M`} solution s.t. the primal and dual sequences of  estimates :math:`(\mathbf{x}_n)_{n\in\mathbb{N}}` and :math:`(\mathbf{z}_n)_{n\in\mathbb{N}}` *converge* towards :math:`\mathbf{x}^\star` and :math:`\mathbf{z}^\star` respectively, i.e.

    .. math::

       \lim_{n\rightarrow +\infty}\Vert\mathbf{x}^\star-\mathbf{x}_n\Vert_2=0, \quad \text{and} \quad  \lim_{n\rightarrow +\infty}\Vert\mathbf{z}^\star-\mathbf{z}_n\Vert_2=0.

    **Default values of the hyperparameters provided here always ensure convergence of the algorithm.**

    See Also
    --------
    :py:class:`~pycsou.opt.proxalgs.FBS`, :py:class:`~pycsou.opt.proxalgs.AcceleratedProximalGradientDescent`
    """

[docs]    def __init__(self, dim: int, F: Optional[DifferentiableFunctional] = None, G: Optional[ProximableFunctional] = None,
                 tau: Optional[float] = None, rho: Optional[float] = 1, x0: Optional[np.ndarray] = None,
                 max_iter: int = 500, min_iter: int = 10, accuracy_threshold: float = 1e-3, verbose: Optional[int] = 1):
        r"""
        Parameters
        ----------
        dim: int
            Dimension of the objective functional's domain.
        F: Optional[DifferentiableMap]
            Differentiable map :math:`\mathcal{F}`.
        G: Optional[ProximableFunctional]
            Proximable functional :math:`\mathcal{G}`.
        tau: Optional[float]
            Primal step size.
        rho: Optional[float]
            Momentum parameter.
        beta: Optional[float]
            Lipschitz constant :math:`\beta` of the derivative of :math:`\mathcal{F}`.
        x0: Optional[np.ndarray]
            Initial guess for the primal variable.
        max_iter: int
            Maximal number of iterations.
        min_iter: int
            Minimal number of iterations.
        accuracy_threshold: float
            Accuracy threshold for stopping criterion.
        verbose: int
            Print diagnostics every ``verbose`` iterations. If ``None`` does not print anything.
        """
        super(ForwardBackwardSplitting, self).__init__(dim=dim, F=F, G=G, H=None, K=None, tau=tau, rho=rho,
                                                       x0=x0, max_iter=max_iter, min_iter=min_iter,
                                                       accuracy_threshold=accuracy_threshold, verbose=verbose)


FBS = ForwardBackwardSplitting

if __name__ == '__main__':
    import numpy as np
    import matplotlib.pyplot as plt
    from pycsou.linop.diff import FirstDerivative
    from pycsou.func.loss import SquaredL2Loss
    from pycsou.func.penalty import L1Norm
    from pycsou.linop.sampling import DownSampling
    from pycsou.opt.proxalgs import PrimalDualSplitting

    x = np.repeat([0, 2, 1, 3, 0, 2, 0], 10)
    D = FirstDerivative(size=x.size, kind='forward')
    D.compute_lipschitz_cst(tol=1e-3)
    rng = np.random.default_rng(0)
    G = DownSampling(size=x.size, downsampling_factor=3)
    G.compute_lipschitz_cst()
    y = G(x)
    l22_loss = (1 / 2) * SquaredL2Loss(dim=G.shape[0], data=y)
    F = l22_loss * G
    lambda_ = 0.1
    H = lambda_ * L1Norm(dim=D.shape[0])
    G = 0.01 * L1Norm(dim=G.shape[1])
    pds = PrimalDualSplitting(dim=G.shape[1], F=F, G=G, H=H, K=D, verbose=None)
    estimate, converged, diagnostics = pds.iterate()
    plt.figure()
    plt.stem(x, linefmt='C0-', markerfmt='C0o')
    plt.stem(estimate['primal_variable'], linefmt='C1--', markerfmt='C1s')
    plt.legend(['Ground truth', 'PDS Estimate'])
    plt.show()