Skip to content

Commit 5af5259

Browse files
Add CTC dataset (#214)
1 parent 279e1fb commit 5af5259

File tree

3 files changed

+158
-0
lines changed

3 files changed

+158
-0
lines changed

scripts/datasets/check_ctc.py

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from torch_em.data.datasets.ctc import get_ctc_segmentation_loader, CTC_URLS
2+
from torch_em.util.debug import check_loader
3+
from torch_em.data.sampler import MinInstanceSampler
4+
5+
ROOT = "/home/pape/Work/data/ctc/ctc-training-data"
6+
7+
8+
# Some of the datasets have partial sparse labels:
9+
# - Fluo-N2DH-GOWT1
10+
# - Fluo-N2DL-HeLa
11+
# Maybe depends on the split?!
12+
def check_ctc_segmentation():
13+
for name in CTC_URLS.keys():
14+
if not name.startswith("DIC"):
15+
continue
16+
print("Checking dataset", name)
17+
loader = get_ctc_segmentation_loader(
18+
ROOT, name, (1, 512, 512), 1, download=True,
19+
sampler=MinInstanceSampler()
20+
)
21+
check_loader(loader, 8, instance_labels=True)
22+
23+
24+
if __name__ == "__main__":
25+
check_ctc_segmentation()

torch_em/data/datasets/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from .cem import get_mitolab_loader
44
from .covid_if import get_covid_if_loader, get_covid_if_dataset
55
from .cremi import get_cremi_loader, get_cremi_dataset
6+
from .ctc import get_ctc_segmentation_loader, get_ctc_segmentation_dataset
67
from .deepbacs import get_deepbacs_loader, get_deepbacs_dataset
78
from .dsb import get_dsb_loader, get_dsb_dataset
89
from .hpa import get_hpa_segmentation_loader, get_hpa_segmentation_dataset

torch_em/data/datasets/ctc.py

+132
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
import os
2+
from glob import glob
3+
from shutil import copyfile
4+
5+
import torch_em
6+
from . import util
7+
8+
9+
CTC_URLS = {
10+
"BF-C2DL-HSC": "http://data.celltrackingchallenge.net/training-datasets/BF-C2DL-HSC.zip",
11+
"BF-C2DL-MuSC": "http://data.celltrackingchallenge.net/training-datasets/BF-C2DL-MuSC.zip",
12+
"DIC-C2DH-HeLa": "http://data.celltrackingchallenge.net/training-datasets/DIC-C2DH-HeLa.zip",
13+
"Fluo-C2DL-Huh7": "http://data.celltrackingchallenge.net/training-datasets/Fluo-C2DL-Huh7.zip",
14+
"Fluo-C2DL-MSC": "http://data.celltrackingchallenge.net/training-datasets/Fluo-C2DL-MSC.zip",
15+
"Fluo-N2DH-GOWT1": "http://data.celltrackingchallenge.net/training-datasets/Fluo-N2DH-GOWT1.zip",
16+
"Fluo-N2DH-SIM+": "http://data.celltrackingchallenge.net/training-datasets/Fluo-N2DH-SIM+.zip",
17+
"Fluo-N2DL-HeLa": "http://data.celltrackingchallenge.net/training-datasets/Fluo-N2DL-HeLa.zip",
18+
"PhC-C2DH-U373": "http://data.celltrackingchallenge.net/training-datasets/PhC-C2DH-U373.zip",
19+
"PhC-C2DL-PSC": "http://data.celltrackingchallenge.net/training-datasets/PhC-C2DL-PSC.zip",
20+
}
21+
CTC_CHECKSUMS = {
22+
"BF-C2DL-HSC": "0aa68ec37a9b06e72a5dfa07d809f56e1775157fb674bb75ff904936149657b1",
23+
"BF-C2DL-MuSC": "ca72b59042809120578a198ba236e5ed3504dd6a122ef969428b7c64f0a5e67d",
24+
"DIC-C2DH-HeLa": "832fed2d05bb7488cf9c51a2994b75f8f3f53b3c3098856211f2d39023c34e1a",
25+
"Fluo-C2DL-Huh7": "1912658c1b3d8b38b314eb658b559e7b39c256917150e9b3dd8bfdc77347617d",
26+
"Fluo-C2DL-MSC": "a083521f0cb673ae02d4957c5e6580c2e021943ef88101f6a2f61b944d671af2",
27+
"Fluo-N2DH-GOWT1": "1a7bd9a7d1d10c4122c7782427b437246fb69cc3322a975485c04e206f64fc2c",
28+
"Fluo-N2DH-SIM+": "3e809148c87ace80c72f563b56c35e0d9448dcdeb461a09c83f61e93f5e40ec8",
29+
"Fluo-N2DL-HeLa": "35dd99d58e071aba0b03880128d920bd1c063783cc280f9531fbdc5be614c82e",
30+
"PhC-C2DH-U373": "b18185c18fce54e8eeb93e4bbb9b201d757add9409bbf2283b8114185a11bc9e",
31+
"PhC-C2DL-PSC": "9d54bb8febc8798934a21bf92e05d92f5e8557c87e28834b2832591cdda78422",
32+
33+
}
34+
35+
36+
def _require_ctc_dataset(path, dataset_name, download):
37+
dataset_names = list(CTC_URLS.keys())
38+
if dataset_name not in dataset_names:
39+
raise ValueError(f"Inalid dataset: {dataset_name}, choose one of {dataset_names}.")
40+
41+
data_path = os.path.join(path, dataset_name)
42+
43+
if not os.path.exists(data_path):
44+
url, checksum = CTC_URLS[dataset_name], CTC_CHECKSUMS[dataset_name]
45+
zip_path = os.path.join(path, f"{dataset_name}.zip")
46+
util.download_source(zip_path, url, download, checksum=checksum)
47+
util.unzip(zip_path, path, remove=True)
48+
49+
return data_path
50+
51+
52+
def _require_gt_images(data_path, splits):
53+
image_paths, label_paths = [], []
54+
55+
if isinstance(splits, str):
56+
splits = [splits]
57+
58+
for split in splits:
59+
image_folder = os.path.join(data_path, split)
60+
assert os.path.join(image_folder), f"Cannot find split, {split} in {data_path}."
61+
62+
label_folder = os.path.join(data_path, f"{split}_GT", "SEG")
63+
64+
# copy over the images corresponding to the labeled frames
65+
label_image_folder = os.path.join(data_path, f"{split}_GT", "IM")
66+
os.makedirs(label_image_folder, exist_ok=True)
67+
68+
this_label_paths = glob(os.path.join(label_folder, "*.tif"))
69+
for label_path in this_label_paths:
70+
fname = os.path.basename(label_path)
71+
image_label_path = os.path.join(label_image_folder, fname)
72+
if not os.path.exists(image_label_path):
73+
im_name = "t" + fname.lstrip("main_seg")
74+
image_path = os.path.join(image_folder, im_name)
75+
assert os.path.join(image_path), image_path
76+
copyfile(image_path, image_label_path)
77+
78+
image_paths.append(label_image_folder)
79+
label_paths.append(label_folder)
80+
81+
return image_paths, label_paths
82+
83+
84+
def get_ctc_segmentation_dataset(
85+
path,
86+
dataset_name,
87+
patch_shape,
88+
split=None,
89+
download=False,
90+
**kwargs,
91+
):
92+
"""Dataset for the cell tracking challenge segmentation data.
93+
94+
This dataset provides access to the 2d segmentation datsets of the
95+
cell tracking challenge. If you use this data in your research please cite
96+
https://doi.org/10.1038/nmeth.4473
97+
"""
98+
data_path = _require_ctc_dataset(path, dataset_name, download)
99+
100+
if split is None:
101+
splits = glob(os.path.join(data_path, "*_GT"))
102+
splits = [os.path.basename(split) for split in splits]
103+
splits = [split.rstrip("_GT") for split in splits]
104+
105+
image_path, label_path = _require_gt_images(data_path, splits)
106+
107+
kwargs = util.update_kwargs(kwargs, "ndim", 2)
108+
return torch_em.default_segmentation_dataset(
109+
image_path, "*.tif", label_path, "*.tif", patch_shape, is_seg_dataset=True, **kwargs
110+
)
111+
112+
113+
def get_ctc_segmentation_loader(
114+
path,
115+
dataset_name,
116+
patch_shape,
117+
batch_size,
118+
split=None,
119+
download=False,
120+
**kwargs,
121+
):
122+
"""Dataloader for cell tracking challenge segmentation data.
123+
See 'get_ctc_segmentation_dataset' for details.
124+
"""
125+
ds_kwargs, loader_kwargs = util.split_kwargs(
126+
torch_em.default_segmentation_dataset, **kwargs
127+
)
128+
dataset = get_ctc_segmentation_dataset(
129+
path, dataset_name, patch_shape, split=split, download=download, **ds_kwargs,
130+
)
131+
loader = torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
132+
return loader

0 commit comments

Comments
 (0)