Source code for mednet.data.classify.indian
# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""Indian database for TB detection (a.k.a. Dataset A/Dataset B).
The Indian collection database has been established to foster research in
computer-aided diagnosis of pulmonary diseases with a special focus on
pulmonary tuberculosis (TB). This database is also known as the "Database
A/Database B" database.
* Database reference: :cite:p:`noauthor_tbxpredict_2014`
* Split references: :cite:p:`noauthor_tbxpredict_2014` with 20% of train set for the validation
set
.. important:: **Raw data organization**
The Indian_ base datadir, which you should configure following the
:ref:`mednet.setup` instructions, must contain at least these two
subdirectories:
- ``DatasetA/`` (directory containing the dataset A images in JPG format)
- ``DatasetB/`` (directory containing the dataset B images in DICOM format)
Data specifications:
* Raw data input (on disk):
* JPG RGB 8-bit depth images with "inverted" grayscale scale, with varying
resolution of at least 1024 x 1024 pixels per sample
* Samples: 156 images and associated labels
* Output image: Use the same transforms and specifications as for
:py:mod:`.classify.shenzhen`
This module contains the base declaration of common data modules and raw-data
loaders for this database. All configured splits inherit from this definition.
"""
import importlib.resources.abc
import pathlib
from ..datamodule import CachingDataModule
from ..split import JSONDatabaseSplit
from .shenzhen import RawDataLoader
DATABASE_SLUG = __name__.rsplit(".", 1)[-1]
"""Pythonic name of this database."""
CONFIGURATION_KEY_DATADIR = "datadir." + DATABASE_SLUG
"""Key to search for in the configuration file for the root directory of this
database."""
[docs]
class DataModule(CachingDataModule):
"""Indian database for TB detection (a.k.a. Dataset A/Dataset B).
Names of the JSON files containing the splits to load for montgomery
and shenzhen databases (in this order).
Parameters
----------
split_path
Path or traversable (resource) with the JSON split description to load.
"""
def __init__(self, split_path: pathlib.Path | importlib.resources.abc.Traversable):
super().__init__(
database_split=JSONDatabaseSplit(split_path),
raw_data_loader=RawDataLoader(config_variable=CONFIGURATION_KEY_DATADIR),
database_name=DATABASE_SLUG,
split_name=split_path.name.rsplit(".", 2)[0],
task="classification",
num_classes=1,
)