Skip to content

Commit 02b1d74

Browse files
jsignellpre-commit-ci[bot]dcherian
authored
Add conversion between cf and shapely for lines (#460)
* Add conversion between cf and shapely for lines * Make polygons raise * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Switch to using `linestrings` and `multilinestrings` * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add shapely to CF for lines * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Use `from_ragged_array` * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix mypy * Full test coverage for patch * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>
1 parent f573ed7 commit 02b1d74

File tree

3 files changed

+298
-13
lines changed

3 files changed

+298
-13
lines changed

cf_xarray/geometry.py

+149-6
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,10 @@ def shapely_to_cf(geometries: xr.DataArray | Sequence, grid_mapping: str | None
112112
}
113113
if types.issubset({"Point", "MultiPoint"}):
114114
ds = points_to_cf(geometries)
115-
elif types.issubset({"Polygon", "MultiPolygon"}) or types.issubset(
116-
{"LineString", "MultiLineString"}
117-
):
118-
raise NotImplementedError("Only point geometries conversion is implemented.")
115+
elif types.issubset({"LineString", "MultiLineString"}):
116+
ds = lines_to_cf(geometries)
117+
elif types.issubset({"Polygon", "MultiPolygon"}):
118+
raise NotImplementedError("Polygon geometry conversion is not implemented.")
119119
else:
120120
raise ValueError(
121121
f"Mixed geometry types are not supported in CF-compliant datasets. Got {types}"
@@ -165,8 +165,10 @@ def cf_to_shapely(ds: xr.Dataset):
165165
geom_type = ds.geometry_container.attrs["geometry_type"]
166166
if geom_type == "point":
167167
geometries = cf_to_points(ds)
168-
elif geom_type in ["line", "polygon"]:
169-
raise NotImplementedError("Only point geometries conversion is implemented.")
168+
elif geom_type == "line":
169+
geometries = cf_to_lines(ds)
170+
elif geom_type == "polygon":
171+
raise NotImplementedError("Polygon geometry conversion is not implemented.")
170172
else:
171173
raise ValueError(
172174
f"Valid CF geometry types are 'point', 'line' and 'polygon'. Got {geom_type}"
@@ -295,3 +297,144 @@ def cf_to_points(ds: xr.Dataset):
295297
j += n
296298

297299
return xr.DataArray(geoms, dims=node_count.dims, coords=node_count.coords)
300+
301+
302+
def lines_to_cf(lines: xr.DataArray | Sequence):
303+
"""Convert an iterable of lines (shapely.geometry.[Multi]Line) into a CF-compliant geometry dataset.
304+
305+
Parameters
306+
----------
307+
lines : sequence of shapely.geometry.Line or MultiLine
308+
The sequence of [multi]lines to translate to a CF dataset.
309+
310+
Returns
311+
-------
312+
xr.Dataset
313+
A Dataset with variables 'x', 'y', 'crd_x', 'crd_y', 'node_count' and 'geometry_container'
314+
and optionally 'part_node_count'.
315+
"""
316+
from shapely import to_ragged_array
317+
318+
if isinstance(lines, xr.DataArray):
319+
dim = lines.dims[0]
320+
coord = lines[dim] if dim in lines.coords else None
321+
lines_ = lines.values
322+
else:
323+
dim = "index"
324+
coord = None
325+
lines_ = np.array(lines)
326+
327+
_, arr, offsets = to_ragged_array(lines_)
328+
x = arr[:, 0]
329+
y = arr[:, 1]
330+
331+
node_count = np.diff(offsets[0])
332+
if len(offsets) == 1:
333+
indices = offsets[0]
334+
part_node_count = node_count
335+
else:
336+
indices = np.take(offsets[0], offsets[1])
337+
part_node_count = np.diff(indices)
338+
339+
geom_coords = arr.take(indices[:-1], 0)
340+
crdX = geom_coords[:, 0]
341+
crdY = geom_coords[:, 1]
342+
343+
ds = xr.Dataset(
344+
data_vars={
345+
"node_count": xr.DataArray(node_count, dims=("segment",)),
346+
"part_node_count": xr.DataArray(part_node_count, dims=(dim,)),
347+
"geometry_container": xr.DataArray(
348+
attrs={
349+
"geometry_type": "line",
350+
"node_count": "node_count",
351+
"part_node_count": "part_node_count",
352+
"node_coordinates": "x y",
353+
"coordinates": "crd_x crd_y",
354+
}
355+
),
356+
},
357+
coords={
358+
"x": xr.DataArray(x, dims=("node",), attrs={"axis": "X"}),
359+
"y": xr.DataArray(y, dims=("node",), attrs={"axis": "Y"}),
360+
"crd_x": xr.DataArray(crdX, dims=(dim,), attrs={"nodes": "x"}),
361+
"crd_y": xr.DataArray(crdY, dims=(dim,), attrs={"nodes": "y"}),
362+
},
363+
)
364+
365+
if coord is not None:
366+
ds = ds.assign_coords({dim: coord})
367+
368+
# Special case when we have no MultiLines
369+
if len(ds.part_node_count) == len(ds.node_count):
370+
ds = ds.drop_vars("part_node_count")
371+
del ds.geometry_container.attrs["part_node_count"]
372+
return ds
373+
374+
375+
def cf_to_lines(ds: xr.Dataset):
376+
"""Convert line geometries stored in a CF-compliant way to shapely lines stored in a single variable.
377+
378+
Parameters
379+
----------
380+
ds : xr.Dataset
381+
A dataset with CF-compliant line geometries.
382+
Must have a "geometry_container" variable with at least a 'node_coordinates' attribute.
383+
Must also have the two 1D variables listed by this attribute.
384+
385+
Returns
386+
-------
387+
geometry : xr.DataArray
388+
A 1D array of shapely.geometry.[Multi]Line objects.
389+
It has the same dimension as the ``part_node_count`` or the coordinates variables, or
390+
``'features'`` if those were not present in ``ds``.
391+
"""
392+
from shapely import GeometryType, from_ragged_array
393+
394+
# Shorthand for convenience
395+
geo = ds.geometry_container.attrs
396+
397+
# The features dimension name, defaults to the one of 'part_node_count'
398+
# or the dimension of the coordinates, if present.
399+
feat_dim = None
400+
if "coordinates" in geo:
401+
xcoord_name, _ = geo["coordinates"].split(" ")
402+
(feat_dim,) = ds[xcoord_name].dims
403+
404+
x_name, y_name = geo["node_coordinates"].split(" ")
405+
xy = np.stack([ds[x_name].values, ds[y_name].values], axis=-1)
406+
407+
node_count_name = geo.get("node_count")
408+
part_node_count_name = geo.get("part_node_count")
409+
if node_count_name is None:
410+
raise ValueError("'node_count' must be provided for line geometries")
411+
else:
412+
node_count = ds[node_count_name]
413+
414+
offset1 = np.insert(np.cumsum(node_count.values), 0, 0)
415+
lines = from_ragged_array(GeometryType.LINESTRING, xy, offsets=(offset1,))
416+
417+
if part_node_count_name is None:
418+
# No part_node_count means there are no multilines
419+
# And if we had no coordinates, then the dimension defaults to "index"
420+
feat_dim = feat_dim or "index"
421+
part_node_count = xr.DataArray(node_count.values, dims=(feat_dim,))
422+
if feat_dim in ds.coords:
423+
part_node_count = part_node_count.assign_coords({feat_dim: ds[feat_dim]})
424+
425+
geoms = lines
426+
else:
427+
part_node_count = ds[part_node_count_name]
428+
429+
# get index of offset1 values that are edges for part_node_count
430+
offset2 = np.nonzero(
431+
np.isin(offset1, np.insert(np.cumsum(part_node_count), 0, 0))
432+
)[0]
433+
multilines = from_ragged_array(
434+
GeometryType.MULTILINESTRING, xy, offsets=(offset1, offset2)
435+
)
436+
437+
# get items from lines or multilines depending on number of segments
438+
geoms = np.where(np.diff(offset2) == 1, lines[offset2[:-1]], multilines)
439+
440+
return xr.DataArray(geoms, dims=part_node_count.dims, coords=part_node_count.coords)

cf_xarray/tests/test_geometry.py

+148-7
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,83 @@ def geometry_ds():
4949
return cf_ds, shp_ds
5050

5151

52+
@pytest.fixture
53+
def geometry_line_ds():
54+
from shapely.geometry import LineString, MultiLineString
55+
56+
# empty/fill workaround to avoid numpy deprecation(warning) due to the array interface of shapely geometries.
57+
geoms = np.empty(3, dtype=object)
58+
geoms[:] = [
59+
MultiLineString([[[0, 0], [1, 2]], [[4, 4], [5, 6]]]),
60+
LineString([[0, 0], [1, 0], [1, 1]]),
61+
LineString([[1.0, 1.0], [2.0, 2.0], [1.7, 9.5]]),
62+
]
63+
64+
ds = xr.Dataset()
65+
shp_da = xr.DataArray(geoms, dims=("index",), name="geometry")
66+
67+
cf_ds = ds.assign(
68+
x=xr.DataArray(
69+
[0, 1, 4, 5, 0, 1, 1, 1.0, 2.0, 1.7], dims=("node",), attrs={"axis": "X"}
70+
),
71+
y=xr.DataArray(
72+
[0, 2, 4, 6, 0, 0, 1, 1.0, 2.0, 9.5], dims=("node",), attrs={"axis": "Y"}
73+
),
74+
part_node_count=xr.DataArray([4, 3, 3], dims=("index",)),
75+
node_count=xr.DataArray([2, 2, 3, 3], dims=("segment",)),
76+
crd_x=xr.DataArray([0.0, 0.0, 1.0], dims=("index",), attrs={"nodes": "x"}),
77+
crd_y=xr.DataArray([0.0, 0.0, 1.0], dims=("index",), attrs={"nodes": "y"}),
78+
geometry_container=xr.DataArray(
79+
attrs={
80+
"geometry_type": "line",
81+
"node_count": "node_count",
82+
"part_node_count": "part_node_count",
83+
"node_coordinates": "x y",
84+
"coordinates": "crd_x crd_y",
85+
}
86+
),
87+
)
88+
89+
cf_ds = cf_ds.set_coords(["x", "y", "crd_x", "crd_y"])
90+
91+
return cf_ds, shp_da
92+
93+
94+
@pytest.fixture
95+
def geometry_line_without_multilines_ds():
96+
from shapely.geometry import LineString
97+
98+
# empty/fill workaround to avoid numpy deprecation(warning) due to the array interface of shapely geometries.
99+
geoms = np.empty(2, dtype=object)
100+
geoms[:] = [
101+
LineString([[0, 0], [1, 0], [1, 1]]),
102+
LineString([[1.0, 1.0], [2.0, 2.0], [1.7, 9.5]]),
103+
]
104+
105+
ds = xr.Dataset()
106+
shp_da = xr.DataArray(geoms, dims=("index",), name="geometry")
107+
108+
cf_ds = ds.assign(
109+
x=xr.DataArray([0, 1, 1, 1.0, 2.0, 1.7], dims=("node",), attrs={"axis": "X"}),
110+
y=xr.DataArray([0, 0, 1, 1.0, 2.0, 9.5], dims=("node",), attrs={"axis": "Y"}),
111+
node_count=xr.DataArray([3, 3], dims=("segment",)),
112+
crd_x=xr.DataArray([0.0, 1.0], dims=("index",), attrs={"nodes": "x"}),
113+
crd_y=xr.DataArray([0.0, 1.0], dims=("index",), attrs={"nodes": "y"}),
114+
geometry_container=xr.DataArray(
115+
attrs={
116+
"geometry_type": "line",
117+
"node_count": "node_count",
118+
"node_coordinates": "x y",
119+
"coordinates": "crd_x crd_y",
120+
}
121+
),
122+
)
123+
124+
cf_ds = cf_ds.set_coords(["x", "y", "crd_x", "crd_y"])
125+
126+
return cf_ds, shp_da
127+
128+
52129
@requires_shapely
53130
def test_shapely_to_cf(geometry_ds):
54131
from shapely.geometry import Point
@@ -87,12 +164,45 @@ def test_shapely_to_cf(geometry_ds):
87164
assert set(out.dims) == {"features", "node"}
88165

89166

167+
@requires_shapely
168+
def test_shapely_to_cf_for_lines_as_da(geometry_line_ds):
169+
expected, in_da = geometry_line_ds
170+
171+
actual = cfxr.shapely_to_cf(in_da)
172+
xr.testing.assert_identical(actual, expected)
173+
174+
in_da = in_da.assign_coords(index=["a", "b", "c"])
175+
actual = cfxr.shapely_to_cf(in_da)
176+
xr.testing.assert_identical(actual, expected.assign_coords(index=["a", "b", "c"]))
177+
178+
179+
@requires_shapely
180+
def test_shapely_to_cf_for_lines_as_sequence(geometry_line_ds):
181+
expected, in_da = geometry_line_ds
182+
actual = cfxr.shapely_to_cf(in_da.values)
183+
xr.testing.assert_identical(actual, expected)
184+
185+
186+
@requires_shapely
187+
def test_shapely_to_cf_for_lines_without_multilines(
188+
geometry_line_without_multilines_ds,
189+
):
190+
expected, in_da = geometry_line_without_multilines_ds
191+
actual = cfxr.shapely_to_cf(in_da)
192+
xr.testing.assert_identical(actual, expected)
193+
194+
90195
@requires_shapely
91196
def test_shapely_to_cf_errors():
92-
from shapely.geometry import LineString, Point
197+
from shapely.geometry import Point, Polygon
93198

94-
geoms = [LineString([[1, 2], [2, 3]]), LineString([[2, 3, 4], [4, 3, 2]])]
95-
with pytest.raises(NotImplementedError, match="Only point geometries conversion"):
199+
geoms = [
200+
Polygon([[1, 1], [1, 3], [3, 3], [1, 1]]),
201+
Polygon([[1, 1, 4], [1, 3, 4], [3, 3, 3], [1, 1, 4]]),
202+
]
203+
with pytest.raises(
204+
NotImplementedError, match="Polygon geometry conversion is not implemented"
205+
):
96206
cfxr.shapely_to_cf(geoms)
97207

98208
geoms.append(Point(1, 2))
@@ -122,16 +232,47 @@ def test_cf_to_shapely(geometry_ds):
122232

123233

124234
@requires_shapely
125-
def test_cf_to_shapely_errors(geometry_ds):
126-
in_ds, expected = geometry_ds
127-
in_ds.geometry_container.attrs["geometry_type"] = "line"
128-
with pytest.raises(NotImplementedError, match="Only point geometries conversion"):
235+
def test_cf_to_shapely_for_lines(geometry_line_ds):
236+
in_ds, expected = geometry_line_ds
237+
238+
actual = cfxr.cf_to_shapely(in_ds)
239+
assert actual.dims == ("index",)
240+
xr.testing.assert_identical(actual.drop_vars(["crd_x", "crd_y"]), expected)
241+
242+
243+
@requires_shapely
244+
def test_cf_to_shapely_for_lines_without_multilines(
245+
geometry_line_without_multilines_ds,
246+
):
247+
in_ds, expected = geometry_line_without_multilines_ds
248+
actual = cfxr.cf_to_shapely(in_ds)
249+
assert actual.dims == ("index",)
250+
xr.testing.assert_identical(actual, expected)
251+
252+
in_ds = in_ds.assign_coords(index=["b", "c"])
253+
actual = cfxr.cf_to_shapely(in_ds)
254+
assert actual.dims == ("index",)
255+
xr.testing.assert_identical(
256+
actual.drop_vars(["crd_x", "crd_y"]), expected.assign_coords(index=["b", "c"])
257+
)
258+
259+
260+
@requires_shapely
261+
def test_cf_to_shapely_errors(geometry_ds, geometry_line_ds):
262+
in_ds, _ = geometry_ds
263+
in_ds.geometry_container.attrs["geometry_type"] = "polygon"
264+
with pytest.raises(NotImplementedError, match="Polygon geometry"):
129265
cfxr.cf_to_shapely(in_ds)
130266

131267
in_ds.geometry_container.attrs["geometry_type"] = "punkt"
132268
with pytest.raises(ValueError, match="Valid CF geometry types are "):
133269
cfxr.cf_to_shapely(in_ds)
134270

271+
in_ds, _ = geometry_line_ds
272+
del in_ds.geometry_container.attrs["node_count"]
273+
with pytest.raises(ValueError, match="'node_count' must be provided"):
274+
cfxr.cf_to_shapely(in_ds)
275+
135276

136277
@requires_shapely
137278
def test_reshape_unique_geometries(geometry_ds):

pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ module=[
140140
"pint",
141141
"matplotlib",
142142
"pytest",
143+
"shapely",
143144
"shapely.geometry",
144145
"xarray.core.pycompat",
145146
]

0 commit comments

Comments
 (0)