6
6
from . import util
7
7
8
8
9
- CTC_URLS = {
10
- "BF-C2DL-HSC" : "http://data.celltrackingchallenge.net/training-datasets/BF-C2DL-HSC.zip" ,
11
- "BF-C2DL-MuSC" : "http://data.celltrackingchallenge.net/training-datasets/BF-C2DL-MuSC.zip" ,
12
- "DIC-C2DH-HeLa" : "http://data.celltrackingchallenge.net/training-datasets/DIC-C2DH-HeLa.zip" ,
13
- "Fluo-C2DL-Huh7" : "http://data.celltrackingchallenge.net/training-datasets/Fluo-C2DL-Huh7.zip" ,
14
- "Fluo-C2DL-MSC" : "http://data.celltrackingchallenge.net/training-datasets/Fluo-C2DL-MSC.zip" ,
15
- "Fluo-N2DH-GOWT1" : "http://data.celltrackingchallenge.net/training-datasets/Fluo-N2DH-GOWT1.zip" ,
16
- "Fluo-N2DH-SIM+" : "http://data.celltrackingchallenge.net/training-datasets/Fluo-N2DH-SIM+.zip" ,
17
- "Fluo-N2DL-HeLa" : "http://data.celltrackingchallenge.net/training-datasets/Fluo-N2DL-HeLa.zip" ,
18
- "PhC-C2DH-U373" : "http://data.celltrackingchallenge.net/training-datasets/PhC-C2DH-U373.zip" ,
19
- "PhC-C2DL-PSC" : "http://data.celltrackingchallenge.net/training-datasets/PhC-C2DL-PSC.zip" ,
20
- }
21
9
CTC_CHECKSUMS = {
22
- "BF-C2DL-HSC" : "0aa68ec37a9b06e72a5dfa07d809f56e1775157fb674bb75ff904936149657b1" ,
23
- "BF-C2DL-MuSC" : "ca72b59042809120578a198ba236e5ed3504dd6a122ef969428b7c64f0a5e67d" ,
24
- "DIC-C2DH-HeLa" : "832fed2d05bb7488cf9c51a2994b75f8f3f53b3c3098856211f2d39023c34e1a" ,
25
- "Fluo-C2DL-Huh7" : "1912658c1b3d8b38b314eb658b559e7b39c256917150e9b3dd8bfdc77347617d" ,
26
- "Fluo-C2DL-MSC" : "a083521f0cb673ae02d4957c5e6580c2e021943ef88101f6a2f61b944d671af2" ,
27
- "Fluo-N2DH-GOWT1" : "1a7bd9a7d1d10c4122c7782427b437246fb69cc3322a975485c04e206f64fc2c" ,
28
- "Fluo-N2DH-SIM+" : "3e809148c87ace80c72f563b56c35e0d9448dcdeb461a09c83f61e93f5e40ec8" ,
29
- "Fluo-N2DL-HeLa" : "35dd99d58e071aba0b03880128d920bd1c063783cc280f9531fbdc5be614c82e" ,
30
- "PhC-C2DH-U373" : "b18185c18fce54e8eeb93e4bbb9b201d757add9409bbf2283b8114185a11bc9e" ,
31
- "PhC-C2DL-PSC" : "9d54bb8febc8798934a21bf92e05d92f5e8557c87e28834b2832591cdda78422" ,
32
-
10
+ "train" : {
11
+ "BF-C2DL-HSC" : "0aa68ec37a9b06e72a5dfa07d809f56e1775157fb674bb75ff904936149657b1" ,
12
+ "BF-C2DL-MuSC" : "ca72b59042809120578a198ba236e5ed3504dd6a122ef969428b7c64f0a5e67d" ,
13
+ "DIC-C2DH-HeLa" : "832fed2d05bb7488cf9c51a2994b75f8f3f53b3c3098856211f2d39023c34e1a" ,
14
+ "Fluo-C2DL-Huh7" : "1912658c1b3d8b38b314eb658b559e7b39c256917150e9b3dd8bfdc77347617d" ,
15
+ "Fluo-C2DL-MSC" : "a083521f0cb673ae02d4957c5e6580c2e021943ef88101f6a2f61b944d671af2" ,
16
+ "Fluo-N2DH-GOWT1" : "1a7bd9a7d1d10c4122c7782427b437246fb69cc3322a975485c04e206f64fc2c" ,
17
+ "Fluo-N2DH-SIM+" : "3e809148c87ace80c72f563b56c35e0d9448dcdeb461a09c83f61e93f5e40ec8" ,
18
+ "Fluo-N2DL-HeLa" : "35dd99d58e071aba0b03880128d920bd1c063783cc280f9531fbdc5be614c82e" ,
19
+ "PhC-C2DH-U373" : "b18185c18fce54e8eeb93e4bbb9b201d757add9409bbf2283b8114185a11bc9e" ,
20
+ "PhC-C2DL-PSC" : "9d54bb8febc8798934a21bf92e05d92f5e8557c87e28834b2832591cdda78422" ,
21
+ },
22
+ "test" : {
23
+ "BF-C2DL-HSC" : "fd1c05ec625fd0526c8369d1139babe137e885457eee98c10d957da578d0d5bc" ,
24
+ "BF-C2DL-MuSC" : "c5cae259e6090e82a2596967fb54c8a768717c1772398f8546ad1c8df0820450" ,
25
+ "DIC-C2DH-HeLa" : "5e5d5f2aa90aef99d750cf03f5c12d799d50b892f98c86950e07a2c5955ac01f" ,
26
+ "Fluo-C2DL-Huh7" : "cc7359f8fb6b0c43995365e83ce0116d32f477ac644b2ca02b98bc253e2bcbbe" ,
27
+ "Fluo-C2DL-MSC" : "c90b13e603dde52f17801d4f0cadde04ed7f21cc05296b1f0957d92dbfc8ffa6" ,
28
+ "Fluo-N2DH-GOWT1" : "c6893ec2d63459de49d4dc21009b04275573403c62cc02e6ee8d0cb1a5068add" ,
29
+ "Fluo-N2DH-SIM+" : "c4f257add739b284d02176057814de345dee2ac1a7438e360ccd2df73618db68" ,
30
+ "Fluo-N2DL-HeLa" : "45cf3daf05e8495aa2ce0febacca4cf0928fab808c0b14ed2eb7289a819e6bb8" ,
31
+ "PhC-C2DH-U373" : "7aa3162e4363a416b259149adc13c9b09cb8aecfe8165eb1428dd534b66bec8a" ,
32
+ "PhC-C2DL-PSC" : "8c98ac6203e7490157ceb6aa1131d60a3863001b61fb75e784bc49d47ee264d5" ,
33
+ }
33
34
}
34
35
35
36
36
- def _require_ctc_dataset (path , dataset_name , download ):
37
- dataset_names = list (CTC_URLS .keys ())
37
+ def get_ctc_url_and_checksum (dataset_name , split ):
38
+ if split == "train" :
39
+ _link_to_split = "training-datasets"
40
+ else :
41
+ _link_to_split = "test-datasets"
42
+
43
+ url = f"http://data.celltrackingchallenge.net/{ _link_to_split } /{ dataset_name } .zip"
44
+ checksum = CTC_CHECKSUMS [split ][dataset_name ]
45
+ return url , checksum
46
+
47
+
48
+ def _require_ctc_dataset (path , dataset_name , download , split ):
49
+ dataset_names = list (CTC_CHECKSUMS ["train" ].keys ())
38
50
if dataset_name not in dataset_names :
39
51
raise ValueError (f"Inalid dataset: { dataset_name } , choose one of { dataset_names } ." )
40
52
41
- data_path = os .path .join (path , dataset_name )
53
+ data_path = os .path .join (path , split , dataset_name )
42
54
43
55
if os .path .exists (data_path ):
44
56
return data_path
45
57
46
58
os .makedirs (data_path )
47
- url , checksum = CTC_URLS [ dataset_name ], CTC_CHECKSUMS [ dataset_name ]
59
+ url , checksum = get_ctc_url_and_checksum ( dataset_name , split )
48
60
zip_path = os .path .join (path , f"{ dataset_name } .zip" )
49
61
util .download_source (zip_path , url , download , checksum = checksum )
50
- util .unzip (zip_path , path , remove = True )
62
+ util .unzip (zip_path , os . path . join ( path , split ) , remove = True )
51
63
52
64
return data_path
53
65
54
66
55
- def _require_gt_images (data_path , splits ):
67
+ def _require_gt_images (data_path , vol_ids ):
56
68
image_paths , label_paths = [], []
57
69
58
- if isinstance (splits , str ):
59
- splits = [splits ]
70
+ if isinstance (vol_ids , str ):
71
+ vol_ids = [vol_ids ]
60
72
61
- for split in splits :
62
- image_folder = os .path .join (data_path , split )
63
- assert os .path .join (image_folder ), f"Cannot find split , { split } in { data_path } ."
73
+ for vol_id in vol_ids :
74
+ image_folder = os .path .join (data_path , vol_id )
75
+ assert os .path .join (image_folder ), f"Cannot find volume id , { vol_id } in { data_path } ."
64
76
65
- label_folder = os .path .join (data_path , f"{ split } _GT" , "SEG" )
77
+ label_folder = os .path .join (data_path , f"{ vol_id } _GT" , "SEG" )
66
78
67
79
# copy over the images corresponding to the labeled frames
68
- label_image_folder = os .path .join (data_path , f"{ split } _GT" , "IM" )
80
+ label_image_folder = os .path .join (data_path , f"{ vol_id } _GT" , "IM" )
69
81
os .makedirs (label_image_folder , exist_ok = True )
70
82
71
83
this_label_paths = glob (os .path .join (label_folder , "*.tif" ))
@@ -88,7 +100,8 @@ def get_ctc_segmentation_dataset(
88
100
path ,
89
101
dataset_name ,
90
102
patch_shape ,
91
- split = None ,
103
+ split = "train" ,
104
+ vol_id = None ,
92
105
download = False ,
93
106
** kwargs ,
94
107
):
@@ -98,16 +111,18 @@ def get_ctc_segmentation_dataset(
98
111
cell tracking challenge. If you use this data in your research please cite
99
112
https://doi.org/10.1038/nmeth.4473
100
113
"""
101
- data_path = _require_ctc_dataset ( path , dataset_name , download )
114
+ assert split in [ "train" ]
102
115
103
- if split is None :
104
- splits = glob (os .path .join (data_path , "*_GT" ))
105
- splits = [os .path .basename (split ) for split in splits ]
106
- splits = [split .rstrip ("_GT" ) for split in splits ]
116
+ data_path = _require_ctc_dataset (path , dataset_name , download , split )
117
+
118
+ if vol_id is None :
119
+ vol_ids = glob (os .path .join (data_path , "*_GT" ))
120
+ vol_ids = [os .path .basename (vol_id ) for vol_id in vol_ids ]
121
+ vol_ids = [vol_id .rstrip ("_GT" ) for vol_id in vol_ids ]
107
122
else :
108
- splits = split
123
+ vol_ids = vol_id
109
124
110
- image_path , label_path = _require_gt_images (data_path , splits )
125
+ image_path , label_path = _require_gt_images (data_path , vol_ids )
111
126
112
127
kwargs = util .update_kwargs (kwargs , "ndim" , 2 )
113
128
return torch_em .default_segmentation_dataset (
@@ -120,7 +135,8 @@ def get_ctc_segmentation_loader(
120
135
dataset_name ,
121
136
patch_shape ,
122
137
batch_size ,
123
- split = None ,
138
+ split = "train" ,
139
+ vol_id = None ,
124
140
download = False ,
125
141
** kwargs ,
126
142
):
@@ -131,7 +147,8 @@ def get_ctc_segmentation_loader(
131
147
torch_em .default_segmentation_dataset , ** kwargs
132
148
)
133
149
dataset = get_ctc_segmentation_dataset (
134
- path , dataset_name , patch_shape , split = split , download = download , ** ds_kwargs ,
150
+ path , dataset_name , patch_shape , split = split , vol_id = vol_id , download = download , ** ds_kwargs ,
135
151
)
152
+
136
153
loader = torch_em .get_data_loader (dataset , batch_size , ** loader_kwargs )
137
154
return loader
0 commit comments