Source code for mednet.data.segment.refuge

# SPDX-FileCopyrightText: Copyright © 2024 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""REFUGE for optic disc and cup segmentation.

The dataset consists of 1200 color fundus photographs, created for a MICCAI
challenge. The goal of the challenge is to evaluate and compare automated
algorithms for glaucoma detection and optic disc/cup segmentation on a common
dataset of retinal fundus images.

* Database reference (including train/dev/test split): [REFUGE-2018]_

.. warning::

   The original directory ``Training400/AMD`` in REFUGE is considered to be
   replaced by an updated version provided by the `AMD Grand-Challenge`_ (with
   matching names).

   The changes concerns images ``A0012.jpg``, which was corrupted in REFUGE,
   and ``A0013.jpg``, which only exists in the AMD Grand-Challenge version.

Data specifications:

* Raw data input (on disk):

  * RGB images encoded in JPG format with varying resolution.  Training images
    are (HxW) 2056 x 2124 pixels; Validation (and test) images are 1634 x 1634
    pixels.
  * Vessel annotations are encoded as BMP images with the same resolution as
    input samples.
  * Masks for the eye fundus are provided by this package.
  * Total samples: 1200 distributed as 400 (training), 400 (validation) and 400
    (test).

* Output sample:

    * Image: Load raw TIFF images with :py:mod:`PIL`, with auto-conversion to RGB.
    * Vessel annotations: Load annotations with :py:mod:`PIL`, with
      auto-conversion to mode ``1`` with no dithering.
    * Eye fundus mask: Load mask with :py:mod:`PIL`, with
      auto-conversion to mode ``1`` with no dithering.

Splits ``optic-disc`` and ``cup`` contain annotations for optic-disc or cup
segmentation.

This module contains the base declaration of common data modules and raw-data
loaders for this database. All configured splits inherit from this definition.
"""

import importlib.resources
import importlib.resources.abc
import os
import pathlib
import typing

import PIL.Image
from torchvision import tv_tensors
from torchvision.transforms.functional import to_tensor

from ...models.transforms import crop_image_to_mask
from ...utils.rc import load_rc
from ..datamodule import CachingDataModule
from ..split import JSONDatabaseSplit
from ..typing import RawDataLoader as BaseDataLoader
from ..typing import Sample

DATABASE_SLUG = __name__.rsplit(".", 1)[-1]
"""Pythonic name to refer to this database."""

CONFIGURATION_KEY_DATADIR = "datadir." + DATABASE_SLUG
"""Key to search for in the configuration file for the root directory of this
database."""



[docs]
class RawDataLoader(BaseDataLoader):
    """A specialized raw-data-loader for the drishtigs1 dataset.

    Parameters
    ----------
    target_type
        Indicate whether to use the "cup" or "disc" target.
    """

    datadir: pathlib.Path
    """This variable contains the base directory where the database raw data is
    stored."""

    def __init__(self, target_type: str):
        self.datadir = pathlib.Path(
            load_rc().get(CONFIGURATION_KEY_DATADIR, os.path.realpath(os.curdir))
        )
        self.target_type = target_type


[docs]
    def sample(self, sample: typing.Any) -> Sample:
        """Load a single image sample from the disk.

        Parameters
        ----------
        sample
            A tuple containing path suffixes to the sample image, target, and mask
            to be loaded, within the dataset root folder.

        Returns
        -------
            The sample representation.
        """

        image = to_tensor(PIL.Image.open(self.datadir / sample[0]).convert(mode="RGB"))

        if self.target_type == "disc":
            target = to_tensor(
                PIL.Image.open(self.datadir / sample[1])
                .convert(mode="RGB", dither=None)
                .convert("L")
                .point(lambda p: p <= 150, mode="1")
            )
        elif self.target_type == "cup":
            target = to_tensor(
                PIL.Image.open(self.datadir / sample[1])
                .convert(mode="RGB", dither=None)
                .convert("L")
                .point(lambda p: p <= 100, mode="1")
            )
        else:
            raise ValueError(
                f"Target type {self.target_type} is not an option. "
                f"Available options are 'cup' and 'disc'."
            )

        assert sample[2] is not None
        mask_path = (
            importlib.resources.files(__package__) / "masks" / DATABASE_SLUG / sample[2]
        )
        with importlib.resources.as_file(mask_path) as path:
            mask = to_tensor(PIL.Image.open(path).convert(mode="1", dither=None))

        image = tv_tensors.Image(crop_image_to_mask(image, mask))
        target = tv_tensors.Mask(crop_image_to_mask(target, mask))
        mask = tv_tensors.Mask(crop_image_to_mask(mask, mask))

        return dict(image=image, target=target, mask=mask, name=sample[0])





[docs]
class DataModule(CachingDataModule):
    """REFUGE for optic disc and cup segmentation.

    Parameters
    ----------
    split_path
        Path or traversable (resource) with the JSON split description to load.
    target_type
        Indicate whether to use the "cup" or "disc" target.
    """

    def __init__(
        self,
        split_path: pathlib.Path | importlib.resources.abc.Traversable,
        target_type: str,
    ):
        super().__init__(
            database_split=JSONDatabaseSplit(split_path),
            raw_data_loader=RawDataLoader(target_type),
            database_name=DATABASE_SLUG,
            split_name=split_path.name.rsplit(".", 2)[0],
            task="segmentation",
        )