|
| 1 | +import os |
| 2 | +from glob import glob |
| 3 | +from typing import Tuple, Optional, Union |
| 4 | + |
| 5 | +import torch |
| 6 | + |
| 7 | +import torch_em |
| 8 | + |
| 9 | +from .. import util |
| 10 | + |
| 11 | + |
| 12 | +AUTOPET_DATA = "http://193.196.20.155/data/autoPET/data/nifti.zip" |
| 13 | +CHECKSUM = "0ac2186ea6d936ff41ce605c6a9588aeb20f031085589897dbab22fc82a12972" |
| 14 | + |
| 15 | + |
| 16 | +def _assort_autopet_dataset(path, download): |
| 17 | + target_dir = os.path.join(path, "AutoPET-II") |
| 18 | + if os.path.exists(target_dir): |
| 19 | + return |
| 20 | + |
| 21 | + os.makedirs(target_dir) |
| 22 | + zip_path = os.path.join(path, "autopet.zip") |
| 23 | + print("The AutoPET data is not available yet and will be downloaded.") |
| 24 | + print("Note that this dataset is large, so this step can take several hours (depending on your internet).") |
| 25 | + util.download_source(path=zip_path, url=AUTOPET_DATA, download=download, checksum=CHECKSUM) |
| 26 | + util.unzip(zip_path, target_dir, remove=False) |
| 27 | + |
| 28 | + |
| 29 | +def _get_paths(path, modality): |
| 30 | + root_dir = os.path.join(path, "AutoPET-II", "FDG-PET-CT-Lesions", "*", "*") |
| 31 | + ct_paths = sorted(glob(os.path.join(root_dir, "CTres.nii.gz"))) |
| 32 | + pet_paths = sorted(glob(os.path.join(root_dir, "SUV.nii.gz"))) |
| 33 | + label_paths = sorted(glob(os.path.join(root_dir, "SEG.nii.gz"))) |
| 34 | + if modality is None: |
| 35 | + raw_paths = [(ct_path, pet_path) for ct_path, pet_path in zip(ct_paths, pet_paths)] |
| 36 | + elif modality == "CT": |
| 37 | + raw_paths = ct_paths |
| 38 | + elif modality == "PET": |
| 39 | + raw_paths = pet_paths |
| 40 | + else: |
| 41 | + raise ValueError("Choose from the available modalities: `CT` / `PET`") |
| 42 | + |
| 43 | + return raw_paths, label_paths |
| 44 | + |
| 45 | + |
| 46 | +def get_autopet_dataset( |
| 47 | + path: str, |
| 48 | + patch_shape: Tuple[int, ...], |
| 49 | + ndim: int, |
| 50 | + modality: Optional[str] = None, |
| 51 | + download: bool = False, |
| 52 | + **kwargs |
| 53 | +) -> torch.utils.data.Dataset: |
| 54 | + """Dataset for lesion segmentation in whole-body FDG-PET/CT scans. |
| 55 | +
|
| 56 | + This dataset is fromt the `AutoPET II - Automated Lesion Segmentation in PET/CT - Domain Generalization` challenge. |
| 57 | + Link: https://autopet-ii.grand-challenge.org/ |
| 58 | + Please cite it if you use this dataset for publication. |
| 59 | +
|
| 60 | + Arguments: |
| 61 | + path: The path where the zip files / the prepared dataset exists. |
| 62 | + - Expected initial structure: `path` should have ... |
| 63 | + patch_shape: The patch shape (for 2d or 3d patches) |
| 64 | + ndim: The dimensions of the inputs (use `2` for getting 2d patches, and `3` for getting 3d patches) |
| 65 | + modality: The modality for using the AutoPET dataset. |
| 66 | + - (default: None) If passed `None`, it takes both the modalities as inputs |
| 67 | + download: Downloads the dataset |
| 68 | +
|
| 69 | + Returns: |
| 70 | + dataset: The segmentation dataset for the respective modalities. |
| 71 | + """ |
| 72 | + assert isinstance(modality, Union[str, None]) |
| 73 | + _assort_autopet_dataset(path, download) |
| 74 | + raw_paths, label_paths = _get_paths(path, modality) |
| 75 | + dataset = torch_em.default_segmentation_dataset( |
| 76 | + raw_paths, "data", label_paths, "data", |
| 77 | + patch_shape, ndim=ndim, with_channels=modality is None, |
| 78 | + **kwargs |
| 79 | + ) |
| 80 | + if "sampler" in kwargs: |
| 81 | + for ds in dataset.datasets: |
| 82 | + ds.max_sampling_attempts = 5000 |
| 83 | + return dataset |
| 84 | + |
| 85 | + |
| 86 | +def get_autopet_loader( |
| 87 | + path, patch_shape, batch_size, ndim, modality=None, download=False, **kwargs |
| 88 | +): |
| 89 | + """Dataloader for lesion segmentation in whole-body FDG-PET/CT scans. See `get_autopet_dataset` for details.""" |
| 90 | + ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) |
| 91 | + ds = get_autopet_dataset(path, patch_shape, ndim, modality, download, **ds_kwargs) |
| 92 | + loader = torch_em.get_data_loader(ds, batch_size=batch_size, **loader_kwargs) |
| 93 | + return loader |
0 commit comments