|
| 1 | +import os |
| 2 | +from tqdm import tqdm |
| 3 | +from glob import glob |
| 4 | + |
| 5 | +import z5py |
| 6 | +import numpy as np |
| 7 | +import pandas as pd |
| 8 | + |
| 9 | +import torch_em |
| 10 | + |
| 11 | +from . import util |
| 12 | + |
| 13 | + |
| 14 | +# Automatic download is currently not possible, because of authentication |
| 15 | +URL = None # TODO: here - https://datasets.deepcell.org/data |
| 16 | + |
| 17 | + |
| 18 | +def _create_split(path, split): |
| 19 | + split_file = os.path.join(path, "DynamicNuclearNet-segmentation-v1_0", f"{split}.npz") |
| 20 | + split_folder = os.path.join(path, split) |
| 21 | + os.makedirs(split_folder, exist_ok=True) |
| 22 | + data = np.load(split_file, allow_pickle=True) |
| 23 | + |
| 24 | + x, y = data["X"], data["y"] |
| 25 | + metadata = data["meta"] |
| 26 | + metadata = pd.DataFrame(metadata[1:], columns=metadata[0]) |
| 27 | + |
| 28 | + for i, (im, label) in tqdm(enumerate(zip(x, y)), total=len(x), desc=f"Creating files for {split}-split"): |
| 29 | + out_path = os.path.join(split_folder, f"image_{i:04}.zarr") |
| 30 | + image_channel = im[..., 0] |
| 31 | + label_channel = label[..., 0] |
| 32 | + chunks = image_channel.shape |
| 33 | + with z5py.File(out_path, "a") as f: |
| 34 | + f.create_dataset("raw", data=image_channel, compression="gzip", chunks=chunks) |
| 35 | + f.create_dataset("labels", data=label_channel, compression="gzip", chunks=chunks) |
| 36 | + |
| 37 | + os.remove(split_file) |
| 38 | + |
| 39 | + |
| 40 | +def _create_dataset(path, zip_path): |
| 41 | + util.unzip(zip_path, path, remove=False) |
| 42 | + splits = ["train", "val", "test"] |
| 43 | + assert all( |
| 44 | + [os.path.exists(os.path.join(path, "DynamicNuclearNet-segmentation-v1_0", f"{split}.npz")) for split in splits] |
| 45 | + ) |
| 46 | + for split in splits: |
| 47 | + _create_split(path, split) |
| 48 | + |
| 49 | + |
| 50 | +def get_dynamicnuclearnet_dataset( |
| 51 | + path, split, patch_shape, download=False, **kwargs |
| 52 | +): |
| 53 | + """Dataset for the segmentation of cell nuclei imaged with fluorescene microscopy. |
| 54 | +
|
| 55 | + This dataset is from the publication https://doi.org/10.1101/803205. |
| 56 | + Please cite it if you use this dataset for a publication.""" |
| 57 | + splits = ["train", "val", "test"] |
| 58 | + assert split in splits |
| 59 | + |
| 60 | + # check if the dataset exists already |
| 61 | + zip_path = os.path.join(path, "DynamicNuclearNet-segmentation-v1_0.zip") |
| 62 | + if all([os.path.exists(os.path.join(path, split)) for split in splits]): # yes it does |
| 63 | + pass |
| 64 | + elif os.path.exists(zip_path): # no it does not, but we have the zip there and can unpack it |
| 65 | + _create_dataset(path, zip_path) |
| 66 | + else: |
| 67 | + raise RuntimeError( |
| 68 | + "We do not support automatic download for the dynamic nuclear net dataset yet." |
| 69 | + f"Please download the dataset from https://datasets.deepcell.org/data and put it here: {zip_path}" |
| 70 | + ) |
| 71 | + |
| 72 | + split_folder = os.path.join(path, split) |
| 73 | + assert os.path.exists(split_folder) |
| 74 | + data_path = glob(os.path.join(split_folder, "*.zarr")) |
| 75 | + assert len(data_path) > 0 |
| 76 | + |
| 77 | + raw_key, label_key = "raw", "labels" |
| 78 | + |
| 79 | + return torch_em.default_segmentation_dataset( |
| 80 | + data_path, raw_key, data_path, label_key, patch_shape, is_seg_dataset=True, ndim=2, **kwargs |
| 81 | + ) |
| 82 | + |
| 83 | + |
| 84 | +def get_dynamicnuclearnet_loader( |
| 85 | + path, split, patch_shape, batch_size, download, **kwargs |
| 86 | +): |
| 87 | + """Dataloader for the segmentation of cell nuclei for 5 different cell lines in fluorescence microscopes. |
| 88 | + See `get_dynamicnuclearnet_dataset` for details. |
| 89 | +""" |
| 90 | + ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs) |
| 91 | + dataset = get_dynamicnuclearnet_dataset(path, split, patch_shape, download, **ds_kwargs) |
| 92 | + loader = torch_em.get_data_loader(dataset, batch_size, **loader_kwargs) |
| 93 | + return loader |
0 commit comments