Source code for mednet.models.losses

# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""Custom losses for different tasks."""

import logging
import typing

import fairret.loss.violation
import numpy.typing
import torch
import torch.utils.data
from fairret.statistic import Statistic

from ..data.datamodule import ConcatDataModule
from ..data.typing import Dataset
from . import typing as model_typing

logger = logging.getLogger(__name__)


def _task_type(
    targets: torch.Tensor
    | numpy.typing.NDArray
    | typing.Iterable[typing.Iterable[int]]
    | typing.Iterable[typing.Iterable[typing.Iterable[typing.Iterable[int]]]],
) -> model_typing.TaskType:
    """Determine the type of task from combined targets available.

    This function will look into the provided targets of a dataset and will
    determine the type of task.

    Parameters
    ----------
    targets
        The complete target set, for the whole dataset being analyzed. This
        matrix should be ``[n, C]`` where ``n`` is the number of samples, and
        ``C`` the number of classes.  All values should be either 0 or 1.

    Returns
    -------
        The type of tas
    """

    int_targets = torch.Tensor(targets).int()

    task_type: model_typing.TaskType = "classification"
    if len(int_targets.shape) > 2:
        task_type = "segmentation"

    return task_type


def _get_positive_weights_from_targets(targets: torch.Tensor) -> torch.Tensor:
    """Compute the weights of each class of a set of targets.

    This function inputs a set of targets and computes the ratio between number
    of negative and positive samples (scalar).  The weight can be used to
    adjust minimisation criteria to in cases there is a huge data imbalance.

    It returns a vector with weights (inverse counts) for each target.

    Parameters
    ----------
    targets
        A :py:class:`torch.Tensor` containing the targets, in the format
        ``[n, C]`` where ``n`` is the number of samples and ``C`` the
        number of classes.

    Returns
    -------
        The positive weight of each class in the dataset given as input.
    """

    task_type = _task_type(targets)

    if task_type == "segmentation":
        # rearranges ``targets`` vector so the problem looks like a simpler
        # classification problem where each pixel is a "separate sample"
        targets = targets.transpose(0, 2).transpose(1, 3).reshape(-1, targets.shape[1])

    positive_count = targets.sum(dim=0)
    negative_count = targets.shape[0] - positive_count
    return negative_count / positive_count


def _get_positive_weights_from_dataloader(
    dataloader: torch.utils.data.DataLoader,
) -> torch.Tensor:
    """Compute the weights of each class of a DataLoader.

    This function inputs a :py:class:`torch.utils.data.DataLoader` and computes
    the ratio between number of negative and positive samples (scalar).  The
    weight can be used to adjust minimisation criteria to in cases there is a
    huge data imbalance.

    It returns a vector with weights (inverse counts) for each target.

    Parameters
    ----------
    dataloader
        A DataLoader from which to compute the positive weights.  Entries must
        be a dictionary which must contain a ``target`` key.

    Returns
    -------
        The positive weight of each class in the dataset given as input.

    Raises
    ------
    NotImplementedError
        In the case of "multilabel" datasets, which are currently not
        supported.
    """

    if isinstance(dataloader.dataset, Dataset):
        # there is a faster way to access the targets!
        targets = dataloader.dataset.targets()
    else:
        targets = [batch["target"] for batch in dataloader]

    return _get_positive_weights_from_targets(torch.vstack(targets))



[docs]
def pos_weight_for_bcewithlogitsloss(
    datamodule: ConcatDataModule,
) -> tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]:
    """Generate the ``pos_weight`` argument for losses of type :py:class:`torch.nn.BCEWithLogitsLoss`.

    This function can generate the ``pos_weight`` parameters for both train and
    validation losses given a datamodule.

    Parameters
    ----------
    datamodule
        The datamodule to probe for training and validation datasets.

    Returns
    -------
        A tuple containing the training and validation ``pos_weight``
        arguments, wrapped in a dictionary.
    """

    train_weights = _get_positive_weights_from_dataloader(
        datamodule.unshuffled_train_dataloader()
    )
    logger.info(f"train: BCEWithLogitsLoss(pos_weight={train_weights})")

    if "validation" in datamodule.val_dataloader().keys():
        validation_weights = _get_positive_weights_from_dataloader(
            datamodule.val_dataloader()["validation"]
        )
    else:
        logger.warning(
            "Datamodule does not contain a validation dataloader. "
            "The training dataloader will be used instead."
        )
        validation_weights = train_weights
    logger.info(f"validation: BCEWithLogitsLoss(weight={validation_weights})")

    return (dict(pos_weight=train_weights), dict(pos_weight=validation_weights))




[docs]
class BCEWithLogitsLossWeightedPerBatch(torch.nn.Module):
    """Calculates the binary cross entropy loss for every batch.

    This loss is similar to :py:class:`torch.nn.BCEWithLogitsLoss`, except it
    updates the ``pos_weight`` (ratio between negative and positive target
    pixels) parameter for the loss term for every batch, based on the
    accumulated taget pixels for all samples in the batch.

    Implements Equation 1 in :cite:p:`maninis_deep_2016`.  The weight depends on the
    current proportion between negatives and positives in the ground-
    truth sample being analyzed.
    """

    def __init__(self):
        super().__init__()


[docs]
    def forward(self, input_: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
        """Forward pass.

        Parameters
        ----------
        input_
            Logits produced by the model to be evaluated, with the shape ``[n,
            c]`` (classification), or ``[n, c, h, w]`` (segmentation).
        target
            Ground-truth information with the shape  ``[n, c]``
            (classification), or ``[n, c, h, w]`` (segmentation), containing
            zeroes and ones.

        Returns
        -------
            The average loss for all input data.
        """

        # calculates the proportion of negatives to the total number of pixels
        # available in the masked region
        num_pos = target.sum()
        return torch.nn.functional.binary_cross_entropy_with_logits(
            input_,
            target,
            reduction="mean",
            pos_weight=(input_.numel() - num_pos) / num_pos,
        )





[docs]
class SoftJaccardAndBCEWithLogitsLoss(torch.nn.Module):
    r"""Implement the generalized loss function of Equation (3) at :cite:p:`iglovikov_ternausnetv2_2018`.

    At the paper, authors suggest a value of :math:`\alpha = 0.7`, which we set
    as default for instances of this type.

    .. math::

       L = \alpha H + (1-\alpha)(1-J)

    J is the Jaccard distance, and H, the Binary Cross-Entropy Loss.  Our
    implementation is based on :py:class:`torch.nn.BCEWithLogitsLoss`.

    Parameters
    ----------
    alpha
        Determines the weighting of J and H. Default: ``0.7``.
    """

    def __init__(self, alpha: float = 0.7):
        super().__init__()
        self.alpha = alpha


[docs]
    def forward(self, input_: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
        """Forward pass.

        Parameters
        ----------
        input_
            Logits produced by the model to be evaluated, with the shape ``[n,
            c]`` (classification), or ``[n, c, h, w]`` (segmentation).
        target
            Ground-truth information with the shape  ``[n, c]``
            (classification), or ``[n, c, h, w]`` (segmentation), containing
            zeroes and ones.

        Returns
        -------
            Loss, in a single entry.
        """

        eps = 1e-8
        probabilities = torch.sigmoid(input_)
        intersection = (probabilities * target).sum()
        sums = probabilities.sum() + target.sum()
        j = intersection / (sums - intersection + eps)

        # this implements the support for looking just into the RoI
        h = torch.nn.functional.binary_cross_entropy_with_logits(
            input_, target, reduction="mean"
        )
        return (self.alpha * h) + ((1 - self.alpha) * (1 - j))





[docs]
class MultiLayerBCELogitsLossWeightedPerBatch(BCEWithLogitsLossWeightedPerBatch):
    """Weighted Binary Cross-Entropy Loss for multi-layered inputs.

    This loss can be used in networks that produce more than one output that
    has to match output targets.  For example, architectures such as
    as :py:class:`.hed.HED` or :py:class:`.lwnet.LittleWNet` require this
    feature.

    It follows the inherited super class applying on-the-fly `pos_weight`
    updates per batch.
    """

    def __init__(self):
        super().__init__()


[docs]
    def forward(self, input_: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
        """Forward pass.

        Parameters
        ----------
        input_
            Logits produced by the model to be evaluated, with the shape ``[n,
            c]`` (classification), or ``[n, c, h, w]`` (segmentation).
        target
            Ground-truth information with the shape  ``[n, c]``
            (classification), or ``[n, c, h, w]`` (segmentation), containing
            zeroes and ones.

        Returns
        -------
            The average loss for all input data.
        """

        fwd = super().forward
        return torch.cat([fwd(i, target).unsqueeze(0) for i in input_]).mean()





[docs]
class MultiLayerSoftJaccardAndBCELogitsLoss(SoftJaccardAndBCEWithLogitsLoss):
    """Implement Equation 3 in :cite:p:`iglovikov_ternausnetv2_2018` for the multi-output networks.

    This loss can be used in networks that produce more than one output that
    has to match output targets.  For example, architectures such as
    as :py:class:`.hed.HED` or :py:class:`.lwnet.LittleWNet` require this
    feature.

    Parameters
    ----------
    alpha : float
        Determines the weighting of SoftJaccard and BCE. Default: ``0.7``.
    """

    def __init__(self, alpha: float = 0.7):
        super().__init__(alpha=alpha)


[docs]
    def forward(self, input_: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
        """Forward pass.

        Parameters
        ----------
        input_
            Logits produced by the model to be evaluated, with the shape ``[n,
            c]`` (classification), or ``[n, c, h, w]`` (segmentation).
        target
            Ground-truth information with the shape  ``[n, c]``
            (classification), or ``[n, c, h, w]`` (segmentation), containing
            zeroes and ones.

        Returns
        -------
            The average loss for all input data.
        """

        fwd = super().forward
        return torch.cat([fwd(i, target).unsqueeze(0) for i in input_]).mean()





[docs]
class MOONBCEWithLogitsLoss(torch.nn.Module):
    """Calculates the weighted binary cross entropy loss based on :cite:p:`guler_refining_2024`.

    This loss implements the domain-adapted multitask loss function in Equation
    (2) on :cite:p:`guler_refining_2024`.  The vector of input weights must be calculated from
    the input dataset in advance, and set during initialization, or later,
    before the loss can be fully used.

    Parameters
    ----------
    weights
        The positive weight of each class in the dataset given as input as a
        ``[2, C]`` tensor, with :math:`w_i^-` at position 0, and :math:`w_i^+`
        at position 1, as defined in Equation (1) of :cite:p:`guler_refining_2024`.
    """

    def __init__(self, weights: torch.Tensor | None = None):
        super().__init__()
        self.weights = weights

    @classmethod
    def _get_weight_from_targets(cls, targets: torch.Tensor) -> torch.Tensor:
        r"""Compute the MOON weights from a set of targets as per Equation
        (1) in :cite:p:`guler_refining_2024`.

        Parameters
        ----------
        targets
            A :py:class:`torch.Tensor` containing the targets, in the format
            ``[n, C]`` where ``n`` is the number of samples and ``C`` the
            number of classes.

        Returns
        -------
            The weight of each class in the dataset given as input as a ``[2,
            C]`` tensor, with :math:`w_i^-` at position 0, and :math:`w_i^+` at
            position 1, as defined in Equation (1) of :cite:p:`guler_refining_2024`.
        """

        task_type = _task_type(targets)

        if task_type == "segmentation":
            # rearranges ``targets`` vector so the problem looks like a simpler
            # classification problem where each pixel is a "separate sample"
            targets = (
                targets.transpose(0, 2).transpose(1, 3).reshape(-1, targets.shape[1])
            )

        # at this point targets should be [n x C]
        s_plus = targets.sum(dim=0)
        s_minus = targets.shape[0] - s_plus
        w_minus = (s_plus / s_minus).clamp(min=0, max=1)
        w_plus = (s_minus / s_plus).clamp(min=0, max=1)

        return torch.vstack((w_minus, w_plus))

    @classmethod
    def _get_weight_from_dataloader(
        cls, dataloader: torch.utils.data.DataLoader
    ) -> torch.Tensor:
        r"""Compute the MOON weights of each class of a DataLoader as per Equation
        (1) in :cite:p:`guler_refining_2024`.

        Parameters
        ----------
        dataloader
            A DataLoader from which to compute the positive weights.  Entries must
            be a dictionary which must contain a ``target`` key.

        Returns
        -------
            The weight of each class in the dataset given as input as a ``[2,
            C]`` tensor, with :math:`w_i^-` at position 0, and :math:`w_i^+` at
            position 1, as defined in Equation (1) of :cite:p:`guler_refining_2024`.
        """

        if isinstance(dataloader.dataset, Dataset):
            # there is a faster way to access the targets!
            targets = dataloader.dataset.targets()
        else:
            targets = [batch["target"] for batch in dataloader]

        return cls._get_weight_from_targets(torch.vstack(targets))


[docs]
    @classmethod
    def get_arguments_from_datamodule(
        cls, datamodule: ConcatDataModule
    ) -> tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]:
        r"""Compute the MOON weights for train and validation sets of a datamodule.

        This function inputs a :py:class:`.data.datamodule.ConcatDataModule`,
        and for both the training and validation sets, and for each class on
        the respective dataloader targets, computes negative and positive
        weights as such:

        .. math::

           \begin{align}
               w_i^+ &=
                   \begin{cases}
                       1 & \text{if } S^{-}_{i} > S^{+}_{i} \\
                       \frac{S^{-}_{i}}{S^{+}_{i}} & \text{otherwise}
                   \end{cases} &
               w_i^- &=
                   \begin{cases}
                       1 & \text{if } S^{+}_{i} > S^{-}_{i} \\
                       \frac{S^{+}_{i}}{S^{-}_{i}} & \text{otherwise}
                   \end{cases}
           \end{align}

        This weight vector is used during runtime to balance individual batch
        losses respecting individual class distributions.

        Parameters
        ----------
        datamodule
            The datamodule to probe for training and validation datasets.

        Returns
        -------
            A tuple containing the training and validation ``weight``
            arguments, wrapped in a dictionary. Each ``weight`` variable
            contains the weights of each class in the target dataset as a ``[2,
            C]`` tensor, with :math:`w_i^-` at position 0, and :math:`w_i^+` at
            position 1, as defined in Equation (1) of :cite:p:`guler_refining_2024`.
        """

        train_weights = cls._get_weight_from_dataloader(
            datamodule.unshuffled_train_dataloader()
        )
        logger.info(f"train: MOONBCEWithLogitsLoss(weight={train_weights})")

        if "validation" in datamodule.val_dataloader().keys():
            validation_weights = cls._get_weight_from_dataloader(
                datamodule.val_dataloader()["validation"]
            )
        else:
            logger.warning(
                "Datamodule does not contain a validation dataloader. "
                "The training dataloader will be used instead."
            )
            validation_weights = train_weights
        logger.info(f"validation: MOONBCEWithLogitsLoss(weight={validation_weights})")

        return (dict(weights=train_weights), dict(weights=validation_weights))



[docs]
    def forward(self, input_: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
        r"""Forward pass.

        This function inputs the output of the model and a set of binary
        targets (as a float tensor containing zeroes and ones), and implements
        Equation (2) from :cite:p:`guler_refining_2024`:

        .. math::

           \mathcal J = -\sum_{i=1}^M w_i^{t_i} \bigl[t_i\log f_i(x) +
           (1-t_i)\log (1-f_i(x)) \bigr]

        Parameters
        ----------
        input_
            Logits produced by the model to be evaluated, with the shape ``[n,
            c]`` (classification), or ``[n, c, h, w]`` (segmentation).
        target
            Ground-truth information with the shape  ``[n, c]``
            (classification), or ``[n, c, h, w]`` (segmentation), containing
            zeroes and ones.

        Returns
        -------
            The result of Equation (2) from :cite:p:`guler_refining_2024`.

        Raises
        ------
        AssertionError
            In case the weights have not be initialized by calling
            :py:meth:`get_arguments_from_datamodule`.
        """

        assert self.weights is not None, (
            f"Weights are not initialized. Call "
            f"{self.__class__.__name__}.get_arguments_from_datamodule() to sort this."
        )

        if len(input_.shape) > 2:  # segmentation
            input_ = input_.transpose(0, 2).transpose(1, 3).reshape(-1, input_.shape[1])
            target = target.transpose(0, 2).transpose(1, 3).reshape(-1, target.shape[1])

        weights = ((1.0 - target) * self.weights[0]) + (target * self.weights[1])

        individual_losses = torch.nn.functional.binary_cross_entropy_with_logits(
            input_, target, reduction="none"
        )

        return (individual_losses * weights).mean()



[docs]
    def to(self, *args: typing.Any, **kwargs: typing.Any) -> typing.Self:
        """Move loss parameters to specified device.

        Refer to the method :py:meth:`torch.nn.Module.to` for details.

        Parameters
        ----------
        *args
            Parameter forwarded to the underlying implementations.
        **kwargs
            Parameter forwarded to the underlying implementations.

        Returns
        -------
            Self.
        """

        if self.weights is None:
            logger.warning(
                f"Weights are not initialized. Call {self.__class__.__name__}."
                f"get_arguments_from_datamodule() to sort this."
            )

            return self

        self.weights = self.weights.to(*args, **kwargs)

        return self





[docs]
class FairretNormLoss(fairret.loss.violation.NormLoss):
    """Calculates the fairness regularization term through ``fairret.loss.violation.NormLoss``.

    This class uses a fairness loss function introduced and implemented in the FAIRRET framework
    (Buyl et al., ICLR 2024). It quantifies unfairness by computing a violation vector
    measuring how much each sensitive group's statistic deviates from a target statistic,
    and penalizes this vector using a p-norm.

    Parameters
    ----------
    stat
        The statistic that should be used to calculate the violation vector. Preferably, a
        LinearFractionalStatistic is provided, as this allows for a straightforward calculation of
        the target statistic as the overall statistic.
    sensitive_key
        The key in the batch dictionary that contains the sensitive attribute information.
    num_groups
        The number of sensitive groups. Default is 2.
    p
        The order of the norm. Default is 1.
    """

    def __init__(
        self, stat: Statistic, sensitive_key: str, num_groups: int = 2, p: int = 1
    ):
        super().__init__(stat, p)
        self.num_groups = num_groups
        self.sensitive_key = sensitive_key


[docs]
    def forward(
        self,
        input_: torch.Tensor,
        target: torch.Tensor,
        sensitive_attr: dict[str, torch.Tensor],
    ) -> torch.Tensor:
        """Forward pass.

        Parameters
        ----------
        input_
            Logits produced by the model to be evaluated, with the shape ``[n,
            c]`` (classification), or ``[n, c, h, w]`` (segmentation).
        target
            Ground-truth information with the shape  ``[n, c]``
            (classification), or ``[n, c, h, w]`` (segmentation), containing
            zeroes and ones.
        sensitive_attr
            Sensitive attribute information dictionary ``sensitive_key: [n]``, containing
            integer values indicating the sensitive group of each sample.

        Returns
        -------
        torch.Tensor
            The fairness term for all input data.
        """
        attribute = sensitive_attr[self.sensitive_key]
        if attribute.ndim > 1:
            # mednet datamodule returns a metadata tensor via torch.LongTensor([sensitive_attr]).
            # When stacked on the batch dimension the final shape is[B, 1], so we
            # remove the singleton dimension → [B]. this will ensure that the one-hot encoding
            # vector will have the expected shape [B, num_groups] for fairret loss
            attribute = attribute.squeeze(-1)

        # One-hot encoding of sensitive attribute
        s = torch.nn.functional.one_hot(attribute, num_classes=self.num_groups).float()
        return super().forward(input_, s, target, pred_as_logit=True)





[docs]
class MOLoss(torch.nn.Module):
    """Multi-objective loss combining a utility loss and a fairness regularization term.

    This module implements a standard multi-objective optimization formulation for
    fair machine learning, where a task-specific loss (e.g., binary cross-entropy)
    is jointly optimized with a fairness loss. The two losses are combined through
    a convex combination, with a weighting parameter **λ** that controls the trade-off
    between utility and fairness.

    Parameters
    ----------
    utility_loss_type
        Loss class used to compute the task-specific utility loss.
    fairness_loss_type
        Loss class used to compute the fairness regularization term.
    utility_args
        Keyword arguments passed to ``utility_loss_type`` during initialization.
        If ``None``, an empty dictionary is used.
    fairness_args
        Keyword arguments passed to ``fairness_loss_type`` during initialization.
        If ``None``, an empty dictionary is used.
    lambda_
        Weighting coefficient controlling the trade-off between utility and
        fairness. The combined loss is computed as
        ``(1 - lambda_) * utility_loss + lambda_ * fairness_loss``.
        Default is 0.2.
    """

    def __init__(
        self,
        utility_loss_type: type[torch.nn.Module],
        fairness_loss_type: type[torch.nn.Module],
        utility_args: dict[str, typing.Any] | None = None,
        fairness_args: dict[str, typing.Any] | None = None,
        lambda_: float = 0.2,
    ):
        super().__init__()
        utility_args = utility_args or {}
        fairness_args = fairness_args or {}

        self.utility_loss = utility_loss_type(**utility_args)
        self.fairness_loss = fairness_loss_type(**fairness_args)
        self.lambda_ = lambda_


[docs]
    @classmethod
    def get_arguments_from_datamodule(
        cls, datamodule: ConcatDataModule
    ) -> tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]:
        # The balance is done uniquely on the utility loss
        # NOTE: since this method has not self, it cannot know which loss type is being used.
        # So we assume that the utility loss is of type BCEWithLogitsLoss.
        logger.info("MOLoss balance is done uniquely on the utility loss...")
        train_loss_args, valid_loss_args = pos_weight_for_bcewithlogitsloss(datamodule)
        return (dict(utility_args=train_loss_args), dict(utility_args=valid_loss_args))


    def _reweight(
        self, utility_loss: torch.Tensor, fairness_loss: torch.Tensor
    ) -> torch.Tensor:
        """
        Combine utility and fairness losses.

        By default, this is implemented as a simple convex combination of the two losses:

            L = (1 - λ) * L_utility + λ * L_fairness

        Subclasses can override this method to implement alternative
        combination strategies.

        Parameters
        ----------
        utility_loss : torch.Tensor
            The task-specific loss (e.g., BCE, cross-entropy).
        fairness_loss : torch.Tensor
            The fairness regularization term.

        Returns
        -------
        torch.Tensor
            Scalar combined loss.
        """
        return (1 - self.lambda_) * utility_loss + self.lambda_ * fairness_loss


[docs]
    def forward(
        self,
        input_: torch.Tensor,
        target: torch.Tensor,
        sensitive_attr: dict[str, torch.Tensor],
    ) -> torch.Tensor:
        """Forward pass.

        Parameters
        ----------
        input_
            Logits produced by the model to be evaluated, with the shape ``[n,
            c]`` (classification), or ``[n, c, h, w]`` (segmentation).
        target
            Ground-truth information with the shape  ``[n, c]``
            (classification), or ``[n, c, h, w]`` (segmentation), containing
            zeroes and ones.
        sensitive_attr
            Sensitive attribute information dictionary ``sensitive_key: [n]``, containing
            integer values indicating the sensitive group of each sample.

        Returns
        -------
        torch.Tensor
            The fairness term for all input data.
        """
        # Compute utility loss
        utility_loss_value = self.utility_loss(input_, target)

        # Compute fairness loss
        fairness_loss_value = self.fairness_loss(input_, target, sensitive_attr)

        # Combine utility and fairness losses
        return self._reweight(utility_loss_value, fairness_loss_value)