Source code for pycsou.func.loss

# #############################################################################
# loss.py
# ==========
# Authors : Matthieu Simeoni [matthieu.simeoni@gmail.com]
# #############################################################################

r"""
Repository of common loss functionals.
"""

from pycsou.core.functional import DifferentiableFunctional, ProximableFunctional, ProxFuncPreComp
from pycsou.func.base import IndicatorFunctional
from pycsou.linop.base import DenseLinearOperator
from pycsou.func.penalty import L2Norm, L1Norm, LInftyNorm, L2Ball, L1Ball, LInftyBall, SquaredL1Norm, SquaredL2Norm
from typing import Union
from numbers import Number
import numpy as np


[docs]def ProximableLoss(func: ProximableFunctional, data: Union[Number, np.ndarray]) -> ProximableFunctional:
    r"""
    Constructor of proximable loss functions.

    Constructs a proximable loss from a proximable functional and a data vector.
    Let :math:`\varphi:\mathbb{R}^N\rightarrow \mathbb{R}` be some proximable functional and :math:`\mathbf{y}\in\mathbb{R}^N`.
    This routine defines the loss functional :math:`F(\mathbf{x}; \mathbf{y}):= \varphi(\mathbf{x}-\mathbf{y}), \,\forall \mathbf{x}\in\mathbb{R}^N.`

    Parameters
    ----------
    func: ProximableFunctional
        Some proximable functional :math:`\varphi:\mathbb{R}^N\rightarrow \mathbb{R}`.
    data: Union[Number, np.ndarray]
        Data vector :math:`\mathbf{y}\in\mathbb{R}^N`.

    Returns
    -------
    :py:class:`~pycsou.core.functional.ProximableFunctional`
        Proximable loss functional constructed as :math:`F(\mathbf{x}; \mathbf{y}):= \varphi(\mathbf{x}-\mathbf{y}), \,\forall \mathbf{x}\in\mathbb{R}^N.`

    Examples
    --------

    .. testsetup::

       import numpy as np
       from pycsou.func.loss import ProximableLoss
       from pycsou.func.penalty import L1Norm

    .. doctest::

       >>> y = np.arange(10)
       >>> func = L1Norm(dim=y.size)
       >>> loss = ProximableLoss(func=func, data=y)
       >>> x = 2 * np.arange(10)
       >>> np.allclose(loss(x), func(x-y))
       True
       >>> np.allclose(loss.prox(x, tau=1), func.prox(x-y, tau=1) + y)
       True

    Notes
    -----
    The proximity operator of the loss functional is automatically computed from the one of the input functional :math:`\varphi` using
    properties described in [ProxAlg]_ Section 2.1.

    See Also
    --------
    :py:func:`~pycsou.func.loss.DifferentiableLoss`.
    """
    return ProxFuncPreComp(func, scale=1, shift=-data)


[docs]def DifferentiableLoss(func: DifferentiableFunctional, data: Union[Number, np.ndarray]) -> DifferentiableFunctional:
    r"""
    Constructor of proximable loss functions.

    Constructs a differentiable loss from a differentiable functional and a data vector.
    Let :math:`\varphi:\mathbb{R}^N\rightarrow \mathbb{R}` be some differentiable functional and :math:`\mathbf{y}\in\mathbb{R}^N`.
    This routine defines the loss functional :math:`F(\mathbf{x}; \mathbf{y}):= \varphi(\mathbf{x}-\mathbf{y}), \,\forall \mathbf{x}\in\mathbb{R}^N.`

    Parameters
    ----------
    func: DifferentiableFunctional
        Some differentiable functional :math:`\varphi:\mathbb{R}^N\rightarrow \mathbb{R}`.
    data: Union[Number, np.ndarray]
        Data vector :math:`\mathbf{y}\in\mathbb{R}^N`.

    Returns
    -------
    :py:class:`~pycsou.core.functional.DifferentiableFunctional`
        Differentiable loss functional constructed as :math:`F(\mathbf{x}; \mathbf{y}):= \varphi(\mathbf{x}-\mathbf{y}), \,\forall \mathbf{x}\in\mathbb{R}^N.`

    Examples
    --------

    .. testsetup::

       import numpy as np
       from pycsou.func.loss import DifferentiableLoss
       from pycsou.func.penalty import SquaredL2Norm

    .. doctest::

       >>> y = np.arange(10)
       >>> func = SquaredL2Norm(dim=y.size)
       >>> loss = DifferentiableLoss(func=func, data=y)
       >>> x = 2 * np.arange(10)
       >>> np.allclose(loss(x), func(x-y))
       True
       >>> np.allclose(loss.gradient(x), 2*(x-y))
       True

    Notes
    -----
    The derivative and Lipschitz constant of the loss functional are automatically computed from those of the input functional :math:`\varphi`.

    See Also
    --------
    :py:func:`~pycsou.func.loss.ProximableLoss`.
    """
    return func.shifter(shift=-data)


[docs]def L2Loss(dim: int, data: Union[Number, np.ndarray]) -> ProximableFunctional:
    r"""
    :math:`\ell_2` loss functional, :math:`F(\mathbf{y},\mathbf{x}):=\|\mathbf{y}-\mathbf{x}\|_2`.

    Parameters
    ----------
    dim: int
        Dimension of the domain.
    data: Union[Number, np.ndarray]
        Data vector :math:`\mathbf{y}`.

    Returns
    -------
    :py:class:`~pycsou.core.functional.ProximableFunctional`
        The :math:`\ell_2` loss functional.

    Examples
    --------

    .. testsetup::

       import numpy as np
       from pycsou.func.loss import L2Loss
       from pycsou.func.penalty import L2Norm

    .. doctest::

       >>> y = np.arange(10)
       >>> loss = L2Loss(dim=y.size, data=y)
       >>> func = L2Norm(dim=y.size)
       >>> x = 2 * np.arange(10)
       >>> np.allclose(loss.prox(x, tau=1), func.prox(x-y, tau=1) + y)
       True

    See Also
    --------
    :py:func:`~pycsou.func.penalty.L2Norm`, :py:func:`~pycsou.func.loss.L1Loss`, :py:func:`~pycsou.func.loss.LInftyLoss`
    """
    L2_norm = L2Norm(dim=dim)
    return ProximableLoss(L2_norm, data=data)


[docs]def SquaredL2Loss(dim: int, data: Union[Number, np.ndarray]) -> DifferentiableFunctional:
    r"""
    :math:`\ell^2_2` loss functional, :math:`F(\mathbf{y},\mathbf{x}):=\|\mathbf{y}-\mathbf{x}\|^2_2`.

    Parameters
    ----------
    dim: int
        Dimension of the domain.
    data: Union[Number, np.ndarray]
        Data vector :math:`\mathbf{y}`.

    Returns
    -------
    :py:class:`~pycsou.core.functional.DifferentiableFunctional`
        The :math:`\ell^2_2` loss functional.

    Examples
    --------

    .. testsetup::

       import numpy as np
       from pycsou.func.loss import SquaredL2Loss
       from pycsou.func.penalty import SquaredL2Norm
       from pycsou.linop.base import DenseLinearOperator

    .. doctest::

       >>> y = np.arange(10)
       >>> loss = SquaredL2Loss(dim=y.size, data=y)
       >>> Gmat = np.arange(100).reshape(10, 10).astype(float)
       >>> G = DenseLinearOperator(Gmat, is_symmetric=False)
       >>> G.compute_lipschitz_cst()
       >>> fwd_loss = loss * G
       >>> x = 2 * np.arange(10)
       >>> np.allclose(loss(x), np.linalg.norm(y - x) ** 2)
       True
       >>> np.allclose(fwd_loss(x), loss(G(x)))
       True
       >>> np.allclose(fwd_loss.diff_lipschitz_cst, 2 * (G.lipschitz_cst ** 2))
       True
       >>> np.allclose(fwd_loss.gradient(x), 2 * G.adjoint(G(x) - y))
       True

    Notes
    -----
    The :math:`\ell_2^2` functional is the likelihood of the data :math:`\mathbf{y}` under the assumtpion of
    Gaussian white noise.

    See Also
    --------
    :py:func:`~pycsou.func.penalty.SquaredL2Norm`, :py:func:`~pycsou.func.loss.L2Loss`.
    """
    squared_L2_norm = SquaredL2Norm(dim=dim)
    return DifferentiableLoss(squared_L2_norm, data=data)


[docs]def L2BallLoss(dim: int, data: Union[Number, np.ndarray], radius: Number = 1) -> ProximableFunctional:
    r"""
    :math:`\ell_2`-ball loss functional, :math:`\{\mathbf{x}\in\mathbb{R}^N: \|\mathbf{y}-\mathbf{x}\|_2\leq \text{radius}\}`.

    The :math:`\ell_2`-ball loss functional is defined as:

    .. math::

       \iota(\mathbf{x}):=\begin{cases}
        0 \,\text{if} \,\|\mathbf{x}-\mathbf{y}\|_2\leq \text{radius},\\
         \, 0\,\text{ortherwise}.
         \end{cases}

    Parameters
    ----------
    dim: int
        Dimension of the domain.
    data: Union[Number, np.ndarray]
        Data vector :math:`\mathbf{y}`.
    radius: Number
        Radius of the ball.

    Returns
    -------
    :py:class:`~pycsou.core.functional.ProximableFunctional`
        The :math:`\ell_2`-ball loss functional.

    Examples
    --------

    .. testsetup::

       import numpy as np
       from pycsou.func.loss import L2BallLoss
       from pycsou.func.penalty import L2Ball

    .. doctest::

       >>> y = np.arange(10)
       >>> loss = L2BallLoss(dim=y.size, data=y, radius=2)
       >>> func = L2Ball(dim=y.size, radius=2)
       >>> x = 2 * np.arange(10)
       >>> np.allclose(loss.prox(x, tau=1), func.prox(x-y, tau=1) + y)
       True

    Notes
    -----
    The :math:`\ell_2`-ball loss functional is particularly useful in the context of Gaussian white noise with
    known standard deviation. In which case, the :math:`\ell_2`-ball defines a confidence region for the data :math:`\mathbf{y}` ([FuncSphere]_ Section 5 of Chapter 7).

    See Also
    --------
    :py:func:`~pycsou.func.penalty.L2Ball`, :py:func:`~pycsou.func.loss.L2Loss`.
    """
    L2_ball = L2Ball(dim=dim, radius=radius)
    return ProximableLoss(L2_ball, data=data)


[docs]def L1Loss(dim: int, data: Union[Number, np.ndarray]) -> ProximableFunctional:
    r"""
    :math:`\ell_1` loss functional, :math:`F(\mathbf{y},\mathbf{x}):=\|\mathbf{y}-\mathbf{x}\|_1`.

    Parameters
    ----------
    dim: int
        Dimension of the domain.
    data: Union[Number, np.ndarray]
        Data vector :math:`\mathbf{y}`.

    Returns
    -------
    :py:class:`~pycsou.core.functional.ProximableFunctional`
        The :math:`\ell_1` loss functional.

    Examples
    --------

    .. testsetup::

       import numpy as np
       from pycsou.func.loss import L1Loss
       from pycsou.func.penalty import L1Norm

    .. doctest::

       >>> y = np.arange(10)
       >>> loss = L1Loss(dim=y.size, data=y)
       >>> func = L1Norm(dim=y.size)
       >>> x = 2 * np.arange(10)
       >>> np.allclose(loss.prox(x, tau=1), func.prox(x-y, tau=1) + y)
       True

    Notes
    -----
    The :math:`\ell_1` loss functional leads to sparse residuals, with most of the predicted samples matching exactly the
    observed samples, and a few –potentially large– misfits ([FuncSphere]_ Section 5 of Chapter 7).
    Such a functional is particularly useful in the context of salt-and-pepper noise with strong outliers, or more generally
    for noise distributions with heavy tails, templated by the Laplace distribution.

    See Also
    --------
    :py:func:`~pycsou.func.penalty.L1Norm`, :py:func:`~pycsou.func.loss.SquaredL1Loss`.
    """
    L1_norm = L1Norm(dim=dim)
    return ProximableLoss(L1_norm, data=data)


[docs]def SquaredL1Loss(dim: int, data: Union[Number, np.ndarray], prox_computation='sort') -> ProximableFunctional:
    r"""
    :math:`\ell^2_1` loss functional, :math:`F(\mathbf{y},\mathbf{x}):=\|\mathbf{y}-\mathbf{x}\|^2_1`.

    Parameters
    ----------
    dim: int
        Dimension of the domain.
    data: Union[Number, np.ndarray]
        Data vector :math:`\mathbf{y}`.

    Returns
    -------
    :py:class:`~pycsou.core.functional.ProximableFunctional`
        The :math:`\ell^2_1` loss functional.

    Examples
    --------

    .. testsetup::

       import numpy as np
       from pycsou.func.loss import SquaredL1Loss
       from pycsou.func.penalty import SquaredL1Norm

    .. doctest::

       >>> y = np.arange(10)
       >>> loss = SquaredL1Loss(dim=y.size, data=y)
       >>> func = SquaredL1Norm(dim=y.size)
       >>> x = 2 * np.arange(10)
       >>> np.allclose(loss.prox(x, tau=1), func.prox(x-y, tau=1) + y)
       True

    See Also
    --------
    :py:func:`~pycsou.func.penalty.SquaredL1Norm`, :py:func:`~pycsou.func.loss.L1Loss`.
    """
    squared_L1_norm = SquaredL1Norm(dim=dim, prox_computation=prox_computation)
    return ProximableLoss(squared_L1_norm, data=data)


[docs]def L1BallLoss(dim: int, data: Union[Number, np.ndarray], radius: Number = 1) -> ProximableFunctional:
    r"""
    :math:`\ell_1`-ball loss functional, :math:`\{\mathbf{x}\in\mathbb{R}^N: \|\mathbf{y}-\mathbf{x}\|_1\leq \text{radius}\}`.

    The :math:`\ell_1`-ball loss functional is defined as:

    .. math::

       \iota(\mathbf{x}):=\begin{cases}
        0 \,\text{if} \,\|\mathbf{x}-\mathbf{y}\|_1\leq \text{radius},\\
         \, 0\,\text{ortherwise}.
         \end{cases}

    Parameters
    ----------
    dim: int
        Dimension of the domain.
    data: Union[Number, np.ndarray]
        Data vector :math:`\mathbf{y}`.
    radius: Number
        Radius of the ball.

    Returns
    -------
    :py:class:`~pycsou.core.functional.ProximableFunctional`
        The :math:`\ell_1`-ball loss functional.

    Examples
    --------

    .. testsetup::

       import numpy as np
       from pycsou.func.loss import L1BallLoss
       from pycsou.func.penalty import L1Ball

    .. doctest::

       >>> y = np.arange(10)
       >>> loss = L1BallLoss(dim=y.size, data=y, radius=2)
       >>> func = L1Ball(dim=y.size, radius=2)
       >>> x = 2 * np.arange(10)
       >>> np.allclose(loss.prox(x, tau=1), func.prox(x-y, tau=1) + y)
       True

    Notes
    -----
    The :math:`\ell_1`-ball loss functional is particularly useful in the context of salt-and-pepper noise with
    known standard deviation. In which case, the :math:`\ell_1`-ball defines a confidence region for the data :math:`\mathbf{y}`.

    See Also
    --------
    :py:func:`~pycsou.func.penalty.L1Ball`, :py:func:`~pycsou.func.loss.L1Loss`, :py:func:`~pycsou.func.loss.SquaredL1Loss`.
    """
    L1_ball = L1Ball(dim=dim, radius=radius)
    return ProximableLoss(L1_ball, data=data)


[docs]def LInftyLoss(dim: int, data: Union[Number, np.ndarray]) -> ProximableFunctional:
    r"""
    :math:`\ell_\infty` loss functional, :math:`F(\mathbf{y},\mathbf{x}):=\|\mathbf{y}-\mathbf{x}\|_\infty`.

    Parameters
    ----------
    dim: int
        Dimension of the domain.
    data: Union[Number, np.ndarray]
        Data vector :math:`\mathbf{y}`.

    Returns
    -------
    :py:class:`~pycsou.core.functional.ProximableFunctional`
        The :math:`\ell_\infty` loss functional.

    Examples
    --------

    .. testsetup::

       import numpy as np
       from pycsou.func.loss import LInftyLoss
       from pycsou.func.penalty import LInftyNorm

    .. doctest::

       >>> y = np.arange(10)
       >>> loss = LInftyLoss(dim=y.size, data=y)
       >>> func = LInftyNorm(dim=y.size)
       >>> x = 2 * np.arange(10)
       >>> loss(x)
       9
       >>> np.allclose(loss.prox(x, tau=1), func.prox(x-y, tau=1) + y)
       True

    Notes
    -----
    The :math:`\ell_\infty` loss functional is particularly useful in the context of quantisation noise, or more generally
    for noise distributions with compact support.

    See Also
    --------
    :py:func:`~pycsou.func.penalty.LInftyNorm`, :py:func:`~pycsou.func.loss.LInftyBallLoss`.
    """
    LInfty_norm = LInftyNorm(dim=dim)
    return ProximableLoss(LInfty_norm, data=data)


[docs]def LInftyBallLoss(dim: int, data: Union[Number, np.ndarray], radius: Number = 1) -> ProximableFunctional:
    r"""
    :math:`\ell_\infty`-ball loss functional, :math:`\{\mathbf{x}\in\mathbb{R}^N: \|\mathbf{y}-\mathbf{x}\|_\infty\leq \text{radius}\}`.

    The :math:`\ell_1`-ball loss functional is defined as:

    .. math::

       \iota(\mathbf{x}):=\begin{cases}
        0 \,\text{if} \,\|\mathbf{x}-\mathbf{y}\|_\infty\leq \text{radius},\\
         \, 0\,\text{ortherwise}.
         \end{cases}

    Parameters
    ----------
    dim: int
        Dimension of the domain.
    data: Union[Number, np.ndarray]
        Data vector :math:`\mathbf{y}`.
    radius: Number
        Radius of the ball.

    Returns
    -------
    :py:class:`~pycsou.core.functional.ProximableFunctional`
        The :math:`\ell_\infty`-ball loss functional.

    Examples
    --------

    .. testsetup::

       import numpy as np
       from pycsou.func.loss import LInftyBallLoss
       from pycsou.func.penalty import LInftyBall

    .. doctest::

       >>> y = np.arange(10)
       >>> loss = LInftyBallLoss(dim=y.size, data=y, radius=2)
       >>> func = LInftyBall(dim=y.size, radius=2)
       >>> x = 2 * np.arange(10)
       >>> np.allclose(loss.prox(x, tau=1), func.prox(x-y, tau=1) + y)
       True

    Notes
    -----
    The :math:`\ell_\infty`-ball loss functional is particularly useful in the context of quantisation noise with
    compact support. In which case, the :math:`\ell_\infty`-ball defines a confidence region for the data :math:`\mathbf{y}`.

    See Also
    --------
    :py:func:`~pycsou.func.penalty.LInftyBall`, :py:func:`~pycsou.func.loss.LInftyLoss`, :py:func:`~pycsou.func.penalty.LInftyNorm`.
    """
    LInfty_ball = LInftyBall(dim=dim, radius=radius)
    return ProximableLoss(LInfty_ball, data=data)


[docs]def ConsistencyLoss(dim: int, data: Union[Number, np.ndarray]):
    r"""
    Consistency loss functional :math:`\mathbf{y}=\mathbf{x}`.

    The consistency loss functional is defined as:

    .. math::

       \iota(\mathbf{x}):=\begin{cases}
        0 \,\text{if} \,\mathbf{x}=\mathbf{y},\\
         \, 0\,\text{ortherwise}.
         \end{cases}

    Parameters
    ----------
    dim: int
        Dimension of the domain.
    data: Union[Number, np.ndarray]
        Data vector :math:`\mathbf{y}` to match.

    Returns
    -------
    :py:class:`~pycsou.core.functional.ProximableFunctional`
        The consistency loss functional.

    Examples
    --------

    .. testsetup::

       import numpy as np
       from pycsou.func.loss import ConsistencyLoss

    .. doctest::

       >>> y = np.arange(10)
       >>> loss = ConsistencyLoss(dim=y.size, data=y)
       >>> x = 2 * np.arange(10)
       >>> loss(x), loss(y)
       (inf, 0)
       >>> np.allclose(loss.prox(x, tau=1), y)
       True

    Notes
    -----
    This functional enforces an exact match between the predicted and observed samples, as required in interpolation problems.
    Such a functional is mainly useful in the context of noiseless data as it can lead to serious overfitting issues in the presence of noise.

    """
    condition_func = lambda x: np.allclose(x, data)
    projection_func = lambda x: data
    return IndicatorFunctional(dim=dim, condition_func=condition_func, projection_func=projection_func)


[docs]class KLDivergence(ProximableFunctional):
    r"""
    Generalised Kullback-Leibler divergence :math:`D_{KL}(\mathbf{y}||\mathbf{x}):=\sum_{i=1}^N y_i\log(y_i/x_i) -y_i +z_i`.

    The generalised Kullback-Leibler divergence is defined as:

    .. math::

       D_{KL}(\mathbf{y}||\mathbf{x}):=\sum_{i=1}^N H(y_i,x_i) -y_i +z_i, \quad \forall \mathbf{y}, \mathbf{x} \in \mathbb{R}^N,

    where

    .. math::

       H(y,x):=\begin{cases}
       y\log(y/x) &\, \text{if} \,x>0, y>0,\\
       0&\, \text{if} \,x=0, y\geq 0,\\
       +\infty &\,\text{otherwise.}
       \end{cases}

    Parameters
    ----------
    dim: int
        Dimension of the domain.
    data: Union[Number, np.ndarray]
        Data vector :math:`\mathbf{y}` to match.

    Returns
    -------
    :py:class:`~pycsou.core.functional.ProximableFunctional`
        The KL-divergence.

    Examples
    --------

    .. testsetup::

       import numpy as np
       from pycsou.func.loss import KLDivergence

    .. doctest::

       >>> y = np.arange(10)
       >>> loss = KLDivergence(dim=y.size, data=y)
       >>> x = 2 * np.arange(10)
       >>> loss(x)
       13.80837687480246
       >>> np.round(loss.prox(x, tau=1))
       array([ 0.,  2.,  4.,  6.,  8., 10., 12., 14., 16., 18.])

    Notes
    -----
    In information theory, and in the case where :math:`\mathbf{y}` and :math:`\mathbf{x}`  sum to one  --and hence can be interpreted as discrete probability distributions,
    the KL-divergence can be interpreted as the relative entropy of :math:`\mathbf{y}` w.r.t. :math:`\mathbf{x}`,
    i.e. the amount of information lost when using :math:`\mathbf{x}` to approximate :math:`\mathbf{y}`.
    It is particularly useful in the context of count data with Poisson distribution. Indeed, the KL-divergence corresponds
    –up to an additive constant– to the likelihood of the data :math:`\mathbf{y}` where each component is independent
    with Poisson distribution and respective intensities given by the entries of :math:`\mathbf{x}`.
    See [FuncSphere]_ Section 5 of Chapter 7 for the computation of its proximal operator.

    See Also
    --------
    :py:class:`~pycsou.func.penalty.ShannonEntropy`, :py:class:`~pycsou.func.penalty.LogBarrier`
    """

[docs]    def __init__(self, dim: int, data: Union[Number, np.ndarray]):
        super(KLDivergence, self).__init__(dim=dim, data=None, is_differentiable=False, is_linear=False)
        self.data = data

[docs]    def __call__(self, x: Union[Number, np.ndarray]) -> Number:
        z = 0 * x + np.infty
        z[(x > 0) * (self.data > 0)] = self.data[(x > 0) * (self.data > 0)] * np.log(
            self.data[(x > 0) * (self.data > 0)] / x[(x > 0) * (self.data > 0)])
        z[(x == 0) * (self.data >= 0)] = 0
        return np.sum(z - self.data + x)

[docs]    def prox(self, x: Union[Number, np.ndarray], tau: Number) -> Union[Number, np.ndarray]:
        r"""
        Proximal operator of the KL-divergence functional (see [FuncSphere]_ Section 5 of Chapter 7).

        Parameters
        ----------
        x: Union[Number, np.ndarray]
            Input.
        tau: Number
            Scaling constant.

        Returns
        -------
        Union[Number, np.ndarray]
            Proximal point of x.
        """
        return (x - tau + np.sqrt((x - tau) ** 2 + 4 * tau * self.data)) / 2


if __name__ == "__main__":
    import doctest

    doctest.testmod()