Skip to content

Commit daedad0

Browse files
authored
Add DynamicNuclearNet data - from DeepCell (#228)
Add DynamicNuclearNet data - from DeepCell
1 parent b90721b commit daedad0

File tree

3 files changed

+114
-0
lines changed

3 files changed

+114
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from torch_em.util.debug import check_loader
2+
from torch_em.data.datasets import get_dynamicnuclearnet_loader
3+
4+
5+
DYNAMICNUCLEARNET_ROOT = "/home/anwai/data/deepcell/"
6+
7+
8+
# NOTE: the DynamicNuclearNet data cannot be downloaded automatically.
9+
# you need to download it yourself from https://datasets.deepcell.org/data
10+
def check_dynamicnuclearnet():
11+
# set this path to where you have downloaded the dynamicnuclearnet data
12+
loader = get_dynamicnuclearnet_loader(
13+
DYNAMICNUCLEARNET_ROOT, "train",
14+
patch_shape=(512, 512), batch_size=2, download=True
15+
)
16+
check_loader(loader, 10, instance_labels=True, rgb=False)
17+
18+
19+
if __name__ == "__main__":
20+
check_dynamicnuclearnet()

torch_em/data/datasets/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from .ctc import get_ctc_segmentation_loader, get_ctc_segmentation_dataset
77
from .deepbacs import get_deepbacs_loader, get_deepbacs_dataset
88
from .dsb import get_dsb_loader, get_dsb_dataset
9+
from .dynamicnuclearnet import get_dynamicnuclearnet_loader, get_dynamicnuclearnet_dataset
910
from .hpa import get_hpa_segmentation_loader, get_hpa_segmentation_dataset
1011
from .isbi2012 import get_isbi_loader, get_isbi_dataset
1112
from .kasthuri import get_kasthuri_loader, get_kasthuri_dataset
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
import os
2+
from tqdm import tqdm
3+
from glob import glob
4+
5+
import z5py
6+
import numpy as np
7+
import pandas as pd
8+
9+
import torch_em
10+
11+
from . import util
12+
13+
14+
# Automatic download is currently not possible, because of authentication
15+
URL = None # TODO: here - https://datasets.deepcell.org/data
16+
17+
18+
def _create_split(path, split):
19+
split_file = os.path.join(path, "DynamicNuclearNet-segmentation-v1_0", f"{split}.npz")
20+
split_folder = os.path.join(path, split)
21+
os.makedirs(split_folder, exist_ok=True)
22+
data = np.load(split_file, allow_pickle=True)
23+
24+
x, y = data["X"], data["y"]
25+
metadata = data["meta"]
26+
metadata = pd.DataFrame(metadata[1:], columns=metadata[0])
27+
28+
for i, (im, label) in tqdm(enumerate(zip(x, y)), total=len(x), desc=f"Creating files for {split}-split"):
29+
out_path = os.path.join(split_folder, f"image_{i:04}.zarr")
30+
image_channel = im[..., 0]
31+
label_channel = label[..., 0]
32+
chunks = image_channel.shape
33+
with z5py.File(out_path, "a") as f:
34+
f.create_dataset("raw", data=image_channel, compression="gzip", chunks=chunks)
35+
f.create_dataset("labels", data=label_channel, compression="gzip", chunks=chunks)
36+
37+
os.remove(split_file)
38+
39+
40+
def _create_dataset(path, zip_path):
41+
util.unzip(zip_path, path, remove=False)
42+
splits = ["train", "val", "test"]
43+
assert all(
44+
[os.path.exists(os.path.join(path, "DynamicNuclearNet-segmentation-v1_0", f"{split}.npz")) for split in splits]
45+
)
46+
for split in splits:
47+
_create_split(path, split)
48+
49+
50+
def get_dynamicnuclearnet_dataset(
51+
path, split, patch_shape, download=False, **kwargs
52+
):
53+
"""Dataset for the segmentation of cell nuclei imaged with fluorescene microscopy.
54+
55+
This dataset is from the publication https://doi.org/10.1101/803205.
56+
Please cite it if you use this dataset for a publication."""
57+
splits = ["train", "val", "test"]
58+
assert split in splits
59+
60+
# check if the dataset exists already
61+
zip_path = os.path.join(path, "DynamicNuclearNet-segmentation-v1_0.zip")
62+
if all([os.path.exists(os.path.join(path, split)) for split in splits]): # yes it does
63+
pass
64+
elif os.path.exists(zip_path): # no it does not, but we have the zip there and can unpack it
65+
_create_dataset(path, zip_path)
66+
else:
67+
raise RuntimeError(
68+
"We do not support automatic download for the dynamic nuclear net dataset yet."
69+
f"Please download the dataset from https://datasets.deepcell.org/data and put it here: {zip_path}"
70+
)
71+
72+
split_folder = os.path.join(path, split)
73+
assert os.path.exists(split_folder)
74+
data_path = glob(os.path.join(split_folder, "*.zarr"))
75+
assert len(data_path) > 0
76+
77+
raw_key, label_key = "raw", "labels"
78+
79+
return torch_em.default_segmentation_dataset(
80+
data_path, raw_key, data_path, label_key, patch_shape, is_seg_dataset=True, ndim=2, **kwargs
81+
)
82+
83+
84+
def get_dynamicnuclearnet_loader(
85+
path, split, patch_shape, batch_size, download, **kwargs
86+
):
87+
"""Dataloader for the segmentation of cell nuclei for 5 different cell lines in fluorescence microscopes.
88+
See `get_dynamicnuclearnet_dataset` for details.
89+
"""
90+
ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
91+
dataset = get_dynamicnuclearnet_dataset(path, split, patch_shape, download, **ds_kwargs)
92+
loader = torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
93+
return loader

0 commit comments

Comments
 (0)