Toggle Light / Dark / Auto color theme
Toggle table of contents sidebar
Source code for mednet.data.classify.indian
# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""Indian database for TB detection (a.k.a. Dataset A/Dataset B).
The Indian collection database has been established to foster research in
computer-aided diagnosis of pulmonary diseases with a special focus on
pulmonary tuberculosis (TB). This database is also known as the "Database
A/Database B" database.
* Database reference: [INDIAN-2013]_
* Split references: [INDIAN-2013]_ with 20% of train set for the validation
set
.. important:: **Raw data organization**
The Indian_ base datadir, which you should configure following the
:ref:`mednet.setup` instructions, must contain at least these two
subdirectories:
- ``DatasetA/`` (directory containing the dataset A images in JPG format)
- ``DatasetB/`` (directory containing the dataset B images in DICOM format)
Data specifications:
* Raw data input (on disk):
* JPG RGB 8-bit depth images with "inverted" grayscale scale, with varying
resolution of at least 1024 x 1024 pixels per sample
* Samples: 156 images and associated labels
* Output image: Use the same transforms and specifications as for
:py:mod:`.classify.shenzhen`
This module contains the base declaration of common data modules and raw-data
loaders for this database. All configured splits inherit from this definition.
"""
import importlib.resources.abc
import pathlib
from ..datamodule import CachingDataModule
from ..split import JSONDatabaseSplit
from .shenzhen import RawDataLoader
DATABASE_SLUG = __name__ . rsplit ( "." , 1 )[ - 1 ]
"""Pythonic name of this database."""
CONFIGURATION_KEY_DATADIR = "datadir." + DATABASE_SLUG
"""Key to search for in the configuration file for the root directory of this
database."""
[docs]
class DataModule ( CachingDataModule ):
"""Indian database for TB detection (a.k.a. Dataset A/Dataset B).
Names of the JSON files containing the splits to load for montgomery
and shenzhen databases (in this order).
Parameters
----------
split_path
Path or traversable (resource) with the JSON split description to load.
"""
def __init__ ( self , split_path : pathlib . Path | importlib . resources . abc . Traversable ):
super () . __init__ (
database_split = JSONDatabaseSplit ( split_path ),
raw_data_loader = RawDataLoader ( config_variable = CONFIGURATION_KEY_DATADIR ),
database_name = DATABASE_SLUG ,
split_name = split_path . name . rsplit ( "." , 2 )[ 0 ],
task = "classification" ,
)