Skip to content

Commit a254a6f

Browse files
authored
Add test set for CTC dataset (#216)
* Add test set for ctc dataset
1 parent 6673a3f commit a254a6f

File tree

2 files changed

+76
-53
lines changed

2 files changed

+76
-53
lines changed

scripts/datasets/check_ctc.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,29 @@
1-
from torch_em.data.datasets.ctc import get_ctc_segmentation_loader, CTC_URLS
1+
from torch_em.data.datasets.ctc import get_ctc_segmentation_loader, CTC_CHECKSUMS
22
from torch_em.util.debug import check_loader
33
from torch_em.data.sampler import MinInstanceSampler
44

5-
ROOT = "/scratch/projects/nim00007/sam/data/ctc/"
5+
ROOT = "/home/anwai/data/ctc/"
66

77

88
# Some of the datasets have partial sparse labels:
99
# - Fluo-N2DH-GOWT1
1010
# - Fluo-N2DL-HeLa
1111
# Maybe depends on the split?!
12-
def check_ctc_segmentation():
13-
for name in CTC_URLS.keys():
12+
def check_ctc_segmentation(split):
13+
ctc_dataset_names = list(CTC_CHECKSUMS["train"].keys())
14+
for name in ctc_dataset_names:
1415
print("Checking dataset", name)
1516
loader = get_ctc_segmentation_loader(
16-
ROOT, name, (1, 512, 512), 1, download=True,
17+
path=ROOT,
18+
dataset_name=name,
19+
patch_shape=(1, 512, 512),
20+
batch_size=1,
21+
download=True,
22+
split=split,
1723
sampler=MinInstanceSampler()
1824
)
19-
check_loader(loader, 8, plt=True, save_path="ctc.png")
25+
check_loader(loader, 8, plt=True)
2026

2127

2228
if __name__ == "__main__":
23-
check_ctc_segmentation()
29+
check_ctc_segmentation("train")

torch_em/data/datasets/ctc.py

+63-46
Original file line numberDiff line numberDiff line change
@@ -6,66 +6,78 @@
66
from . import util
77

88

9-
CTC_URLS = {
10-
"BF-C2DL-HSC": "http://data.celltrackingchallenge.net/training-datasets/BF-C2DL-HSC.zip",
11-
"BF-C2DL-MuSC": "http://data.celltrackingchallenge.net/training-datasets/BF-C2DL-MuSC.zip",
12-
"DIC-C2DH-HeLa": "http://data.celltrackingchallenge.net/training-datasets/DIC-C2DH-HeLa.zip",
13-
"Fluo-C2DL-Huh7": "http://data.celltrackingchallenge.net/training-datasets/Fluo-C2DL-Huh7.zip",
14-
"Fluo-C2DL-MSC": "http://data.celltrackingchallenge.net/training-datasets/Fluo-C2DL-MSC.zip",
15-
"Fluo-N2DH-GOWT1": "http://data.celltrackingchallenge.net/training-datasets/Fluo-N2DH-GOWT1.zip",
16-
"Fluo-N2DH-SIM+": "http://data.celltrackingchallenge.net/training-datasets/Fluo-N2DH-SIM+.zip",
17-
"Fluo-N2DL-HeLa": "http://data.celltrackingchallenge.net/training-datasets/Fluo-N2DL-HeLa.zip",
18-
"PhC-C2DH-U373": "http://data.celltrackingchallenge.net/training-datasets/PhC-C2DH-U373.zip",
19-
"PhC-C2DL-PSC": "http://data.celltrackingchallenge.net/training-datasets/PhC-C2DL-PSC.zip",
20-
}
219
CTC_CHECKSUMS = {
22-
"BF-C2DL-HSC": "0aa68ec37a9b06e72a5dfa07d809f56e1775157fb674bb75ff904936149657b1",
23-
"BF-C2DL-MuSC": "ca72b59042809120578a198ba236e5ed3504dd6a122ef969428b7c64f0a5e67d",
24-
"DIC-C2DH-HeLa": "832fed2d05bb7488cf9c51a2994b75f8f3f53b3c3098856211f2d39023c34e1a",
25-
"Fluo-C2DL-Huh7": "1912658c1b3d8b38b314eb658b559e7b39c256917150e9b3dd8bfdc77347617d",
26-
"Fluo-C2DL-MSC": "a083521f0cb673ae02d4957c5e6580c2e021943ef88101f6a2f61b944d671af2",
27-
"Fluo-N2DH-GOWT1": "1a7bd9a7d1d10c4122c7782427b437246fb69cc3322a975485c04e206f64fc2c",
28-
"Fluo-N2DH-SIM+": "3e809148c87ace80c72f563b56c35e0d9448dcdeb461a09c83f61e93f5e40ec8",
29-
"Fluo-N2DL-HeLa": "35dd99d58e071aba0b03880128d920bd1c063783cc280f9531fbdc5be614c82e",
30-
"PhC-C2DH-U373": "b18185c18fce54e8eeb93e4bbb9b201d757add9409bbf2283b8114185a11bc9e",
31-
"PhC-C2DL-PSC": "9d54bb8febc8798934a21bf92e05d92f5e8557c87e28834b2832591cdda78422",
32-
10+
"train": {
11+
"BF-C2DL-HSC": "0aa68ec37a9b06e72a5dfa07d809f56e1775157fb674bb75ff904936149657b1",
12+
"BF-C2DL-MuSC": "ca72b59042809120578a198ba236e5ed3504dd6a122ef969428b7c64f0a5e67d",
13+
"DIC-C2DH-HeLa": "832fed2d05bb7488cf9c51a2994b75f8f3f53b3c3098856211f2d39023c34e1a",
14+
"Fluo-C2DL-Huh7": "1912658c1b3d8b38b314eb658b559e7b39c256917150e9b3dd8bfdc77347617d",
15+
"Fluo-C2DL-MSC": "a083521f0cb673ae02d4957c5e6580c2e021943ef88101f6a2f61b944d671af2",
16+
"Fluo-N2DH-GOWT1": "1a7bd9a7d1d10c4122c7782427b437246fb69cc3322a975485c04e206f64fc2c",
17+
"Fluo-N2DH-SIM+": "3e809148c87ace80c72f563b56c35e0d9448dcdeb461a09c83f61e93f5e40ec8",
18+
"Fluo-N2DL-HeLa": "35dd99d58e071aba0b03880128d920bd1c063783cc280f9531fbdc5be614c82e",
19+
"PhC-C2DH-U373": "b18185c18fce54e8eeb93e4bbb9b201d757add9409bbf2283b8114185a11bc9e",
20+
"PhC-C2DL-PSC": "9d54bb8febc8798934a21bf92e05d92f5e8557c87e28834b2832591cdda78422",
21+
},
22+
"test": {
23+
"BF-C2DL-HSC": "fd1c05ec625fd0526c8369d1139babe137e885457eee98c10d957da578d0d5bc",
24+
"BF-C2DL-MuSC": "c5cae259e6090e82a2596967fb54c8a768717c1772398f8546ad1c8df0820450",
25+
"DIC-C2DH-HeLa": "5e5d5f2aa90aef99d750cf03f5c12d799d50b892f98c86950e07a2c5955ac01f",
26+
"Fluo-C2DL-Huh7": "cc7359f8fb6b0c43995365e83ce0116d32f477ac644b2ca02b98bc253e2bcbbe",
27+
"Fluo-C2DL-MSC": "c90b13e603dde52f17801d4f0cadde04ed7f21cc05296b1f0957d92dbfc8ffa6",
28+
"Fluo-N2DH-GOWT1": "c6893ec2d63459de49d4dc21009b04275573403c62cc02e6ee8d0cb1a5068add",
29+
"Fluo-N2DH-SIM+": "c4f257add739b284d02176057814de345dee2ac1a7438e360ccd2df73618db68",
30+
"Fluo-N2DL-HeLa": "45cf3daf05e8495aa2ce0febacca4cf0928fab808c0b14ed2eb7289a819e6bb8",
31+
"PhC-C2DH-U373": "7aa3162e4363a416b259149adc13c9b09cb8aecfe8165eb1428dd534b66bec8a",
32+
"PhC-C2DL-PSC": "8c98ac6203e7490157ceb6aa1131d60a3863001b61fb75e784bc49d47ee264d5",
33+
}
3334
}
3435

3536

36-
def _require_ctc_dataset(path, dataset_name, download):
37-
dataset_names = list(CTC_URLS.keys())
37+
def get_ctc_url_and_checksum(dataset_name, split):
38+
if split == "train":
39+
_link_to_split = "training-datasets"
40+
else:
41+
_link_to_split = "test-datasets"
42+
43+
url = f"http://data.celltrackingchallenge.net/{_link_to_split}/{dataset_name}.zip"
44+
checksum = CTC_CHECKSUMS[split][dataset_name]
45+
return url, checksum
46+
47+
48+
def _require_ctc_dataset(path, dataset_name, download, split):
49+
dataset_names = list(CTC_CHECKSUMS["train"].keys())
3850
if dataset_name not in dataset_names:
3951
raise ValueError(f"Inalid dataset: {dataset_name}, choose one of {dataset_names}.")
4052

41-
data_path = os.path.join(path, dataset_name)
53+
data_path = os.path.join(path, split, dataset_name)
4254

4355
if os.path.exists(data_path):
4456
return data_path
4557

4658
os.makedirs(data_path)
47-
url, checksum = CTC_URLS[dataset_name], CTC_CHECKSUMS[dataset_name]
59+
url, checksum = get_ctc_url_and_checksum(dataset_name, split)
4860
zip_path = os.path.join(path, f"{dataset_name}.zip")
4961
util.download_source(zip_path, url, download, checksum=checksum)
50-
util.unzip(zip_path, path, remove=True)
62+
util.unzip(zip_path, os.path.join(path, split), remove=True)
5163

5264
return data_path
5365

5466

55-
def _require_gt_images(data_path, splits):
67+
def _require_gt_images(data_path, vol_ids):
5668
image_paths, label_paths = [], []
5769

58-
if isinstance(splits, str):
59-
splits = [splits]
70+
if isinstance(vol_ids, str):
71+
vol_ids = [vol_ids]
6072

61-
for split in splits:
62-
image_folder = os.path.join(data_path, split)
63-
assert os.path.join(image_folder), f"Cannot find split, {split} in {data_path}."
73+
for vol_id in vol_ids:
74+
image_folder = os.path.join(data_path, vol_id)
75+
assert os.path.join(image_folder), f"Cannot find volume id, {vol_id} in {data_path}."
6476

65-
label_folder = os.path.join(data_path, f"{split}_GT", "SEG")
77+
label_folder = os.path.join(data_path, f"{vol_id}_GT", "SEG")
6678

6779
# copy over the images corresponding to the labeled frames
68-
label_image_folder = os.path.join(data_path, f"{split}_GT", "IM")
80+
label_image_folder = os.path.join(data_path, f"{vol_id}_GT", "IM")
6981
os.makedirs(label_image_folder, exist_ok=True)
7082

7183
this_label_paths = glob(os.path.join(label_folder, "*.tif"))
@@ -88,7 +100,8 @@ def get_ctc_segmentation_dataset(
88100
path,
89101
dataset_name,
90102
patch_shape,
91-
split=None,
103+
split="train",
104+
vol_id=None,
92105
download=False,
93106
**kwargs,
94107
):
@@ -98,16 +111,18 @@ def get_ctc_segmentation_dataset(
98111
cell tracking challenge. If you use this data in your research please cite
99112
https://doi.org/10.1038/nmeth.4473
100113
"""
101-
data_path = _require_ctc_dataset(path, dataset_name, download)
114+
assert split in ["train"]
102115

103-
if split is None:
104-
splits = glob(os.path.join(data_path, "*_GT"))
105-
splits = [os.path.basename(split) for split in splits]
106-
splits = [split.rstrip("_GT") for split in splits]
116+
data_path = _require_ctc_dataset(path, dataset_name, download, split)
117+
118+
if vol_id is None:
119+
vol_ids = glob(os.path.join(data_path, "*_GT"))
120+
vol_ids = [os.path.basename(vol_id) for vol_id in vol_ids]
121+
vol_ids = [vol_id.rstrip("_GT") for vol_id in vol_ids]
107122
else:
108-
splits = split
123+
vol_ids = vol_id
109124

110-
image_path, label_path = _require_gt_images(data_path, splits)
125+
image_path, label_path = _require_gt_images(data_path, vol_ids)
111126

112127
kwargs = util.update_kwargs(kwargs, "ndim", 2)
113128
return torch_em.default_segmentation_dataset(
@@ -120,7 +135,8 @@ def get_ctc_segmentation_loader(
120135
dataset_name,
121136
patch_shape,
122137
batch_size,
123-
split=None,
138+
split="train",
139+
vol_id=None,
124140
download=False,
125141
**kwargs,
126142
):
@@ -131,7 +147,8 @@ def get_ctc_segmentation_loader(
131147
torch_em.default_segmentation_dataset, **kwargs
132148
)
133149
dataset = get_ctc_segmentation_dataset(
134-
path, dataset_name, patch_shape, split=split, download=download, **ds_kwargs,
150+
path, dataset_name, patch_shape, split=split, vol_id=vol_id, download=download, **ds_kwargs,
135151
)
152+
136153
loader = torch_em.get_data_loader(dataset, batch_size, **loader_kwargs)
137154
return loader

0 commit comments

Comments
 (0)