Skip to content

Commit da86e37

Browse files
committed
Add reader datatree
1 parent f430fa2 commit da86e37

File tree

1 file changed

+217
-0
lines changed

1 file changed

+217
-0
lines changed

wsidata/reader/_reader_datatree.py

+217
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
__all__ = ["to_datatree"]
2+
3+
4+
# Adapted from https://github.com/gustaveroussy/sopa/blob/master/sopa/io/reader/wsi.py
5+
from ctypes import ArgumentError
6+
from dataclasses import asdict
7+
from functools import singledispatch
8+
from typing import Dict
9+
10+
import numpy as np
11+
from zarr.storage import (
12+
_path_to_prefix,
13+
attrs_key,
14+
init_array,
15+
init_group,
16+
Store,
17+
KVStore,
18+
)
19+
from zarr.util import json_dumps, normalize_storage_path, normalize_shape
20+
21+
from ..reader import TiffSlideReader
22+
from ..reader.base import ReaderBase
23+
24+
from xarray import open_zarr, DataArray
25+
from datatree import DataTree
26+
from spatialdata.models import Image2DModel
27+
from spatialdata.transformations import Identity, Scale
28+
29+
30+
def init_attrs(store, attrs, path: str = None):
31+
path = normalize_storage_path(path)
32+
path = _path_to_prefix(path)
33+
store[path + attrs_key] = json_dumps(attrs)
34+
35+
36+
def create_meta_store(reader: ReaderBase, tilesize: int) -> Dict[str, bytes]:
37+
"""Creates a dict containing the zarr metadata for the multiscale openslide image."""
38+
store = dict()
39+
root_attrs = {
40+
"multiscales": [
41+
{
42+
"name": reader.file,
43+
"datasets": [
44+
{"path": str(i)} for i in range(reader.properties.n_level)
45+
],
46+
"version": "0.1",
47+
}
48+
],
49+
"metadata": asdict(reader.properties),
50+
}
51+
init_group(store)
52+
init_attrs(store, root_attrs)
53+
for i, (x, y) in enumerate(reader.properties.level_shape):
54+
init_array(
55+
store,
56+
path=str(i),
57+
shape=normalize_shape((y, x, 4)),
58+
chunks=(tilesize, tilesize, 4),
59+
fill_value=0,
60+
dtype="|u1",
61+
compressor=None,
62+
)
63+
suffix = str(i) if i != 0 else ""
64+
init_attrs(
65+
store, {"_ARRAY_DIMENSIONS": [f"Y{suffix}", f"X{suffix}", "S"]}, path=str(i)
66+
)
67+
return store
68+
69+
70+
def _parse_chunk_path(path: str):
71+
"""Returns x,y chunk coords and pyramid level from string key"""
72+
level, ckey = path.split("/")
73+
y, x, _ = map(int, ckey.split("."))
74+
return x, y, int(level)
75+
76+
77+
class ReaderStore(Store):
78+
"""Wraps a Reader object as a multiscale Zarr Store.
79+
80+
Parameters
81+
----------
82+
reader: Reader
83+
The reader object
84+
tilesize: int
85+
Desired "chunk" size for zarr store (default: 512).
86+
"""
87+
88+
def __init__(self, reader: ReaderBase, tilesize: int = 512):
89+
self._reader = reader
90+
self._tilesize = tilesize
91+
self._store = create_meta_store(reader, tilesize)
92+
self._writeable = False
93+
self._erasable = False
94+
95+
def __getitem__(self, key: str):
96+
if key in self._store:
97+
# key is for metadata
98+
return self._store[key]
99+
100+
# key should now be a path to an array chunk
101+
# e.g '3/4.5.0' -> '<level>/<chunk_key>'
102+
try:
103+
x, y, level = _parse_chunk_path(key)
104+
location = self._ref_pos(x, y, level)
105+
size = (self._tilesize, self._tilesize)
106+
tile = self._reader.get_region(*location, *size, level=level)
107+
except ArgumentError as err:
108+
# Can occur if trying to read a closed slide
109+
raise err
110+
except Exception:
111+
# TODO: probably need better error handling.
112+
# If anything goes wrong, we just signal the chunk
113+
# is missing from the store.
114+
raise KeyError(key)
115+
return np.array(tile)
116+
117+
def __eq__(self, other):
118+
return (
119+
isinstance(other, ReaderStore)
120+
and self._reader.name == other._reader.name
121+
and self._reader.file == other._reader.file
122+
)
123+
124+
def __setitem__(self, key, val):
125+
raise PermissionError("ZarrStore is read-only")
126+
127+
def __delitem__(self, key):
128+
raise PermissionError("ZarrStore is read-only")
129+
130+
def __iter__(self):
131+
return iter(self.keys())
132+
133+
def __len__(self):
134+
return sum(1 for _ in self)
135+
136+
def __enter__(self):
137+
return self
138+
139+
def __exit__(self, exc_type, exc_value, traceback):
140+
self.close()
141+
142+
def _ref_pos(self, x: int, y: int, level: int):
143+
level = self._reader.translate_level(level)
144+
dsample = self._reader.properties.level_downsample[level]
145+
xref = int(x * dsample * self._tilesize)
146+
yref = int(y * dsample * self._tilesize)
147+
return xref, yref
148+
149+
def keys(self):
150+
return self._store.keys()
151+
152+
def close(self):
153+
self._reader.detach_reader()
154+
155+
# Retrieved from napari-lazy-openslide PR#16
156+
def __getstate__(self):
157+
return {"_path": self._reader.file, "_tilesize": self._tilesize}
158+
159+
def __setstate__(self, newstate):
160+
path = newstate["_path"]
161+
tilesize = newstate["_tilesize"]
162+
self.__init__(path, tilesize)
163+
164+
def rename(self, path: str, new_path: str):
165+
raise PermissionError(f'{type(self)} is not erasable, cannot call "rename"')
166+
167+
def rmdir(self, path: str = "") -> None:
168+
raise PermissionError(f'{type(self)} is not erasable, cannot call "rmdir"')
169+
170+
171+
@singledispatch
172+
def create_reader_store(reader: ReaderBase, tilesize: int = 512) -> KVStore:
173+
"""Creates a ReaderStore from a Reader object."""
174+
return KVStore(ReaderStore(reader, tilesize=tilesize))
175+
176+
177+
@create_reader_store.register(TiffSlideReader)
178+
def _(reader: TiffSlideReader, tilesize: int = 512) -> KVStore:
179+
return reader.reader.zarr_group.store
180+
181+
182+
def to_datatree(
183+
reader,
184+
chunks=(3, 512, 512),
185+
) -> DataTree:
186+
store = create_reader_store(reader)
187+
img_dataset = open_zarr(store, consolidated=False, mask_and_scale=False)
188+
189+
images = {}
190+
for level, key in enumerate(list(img_dataset.keys())):
191+
suffix = key if key != "0" else ""
192+
193+
scale_image = DataArray(
194+
img_dataset[key].transpose("S", f"Y{suffix}", f"X{suffix}"),
195+
dims=("c", "y", "x"),
196+
).chunk(chunks)
197+
198+
scale_factor = reader.properties.level_downsample[level]
199+
200+
if scale_factor == 1:
201+
transform = Identity()
202+
else:
203+
transform = Scale([scale_factor, scale_factor], axes=("y", "x"))
204+
205+
scale_image = Image2DModel.parse(
206+
scale_image[:3, :, :],
207+
transformations={"global": transform},
208+
c_coords=("r", "g", "b"),
209+
)
210+
scale_image.coords["y"] = scale_factor * scale_image.coords["y"]
211+
scale_image.coords["x"] = scale_factor * scale_image.coords["x"]
212+
213+
images[f"scale{key}"] = scale_image
214+
215+
slide_image = DataTree.from_dict(images)
216+
slide_image.attrs = asdict(reader.properties)
217+
return slide_image

0 commit comments

Comments
 (0)