Skip to content

Commit 35a8a02

Browse files
jsignellpre-commit-ci[bot]aulemahal
authored
Add conversion between shapely and cf for polygons (#495)
* Shapely polygons to and from cf * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix mypy * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix mypy for real * Apply suggestions from code review Co-authored-by: Pascal Bourgault <bourgault.pascal@ouranos.ca> * Update the value in the dataset as well --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Pascal Bourgault <bourgault.pascal@ouranos.ca>
1 parent 5ee1bc7 commit 35a8a02

File tree

2 files changed

+397
-19
lines changed

2 files changed

+397
-19
lines changed

cf_xarray/geometry.py

+162-8
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,6 @@ def shapely_to_cf(geometries: xr.DataArray | Sequence, grid_mapping: str | None
7575
"""
7676
Convert a DataArray with shapely geometry objects into a CF-compliant dataset.
7777
78-
.. warning::
79-
Only point and line geometries are currently implemented.
80-
8178
Parameters
8279
----------
8380
geometries : sequence of shapely geometries or xarray.DataArray
@@ -115,7 +112,7 @@ def shapely_to_cf(geometries: xr.DataArray | Sequence, grid_mapping: str | None
115112
elif types.issubset({"LineString", "MultiLineString"}):
116113
ds = lines_to_cf(geometries)
117114
elif types.issubset({"Polygon", "MultiPolygon"}):
118-
raise NotImplementedError("Polygon geometry conversion is not implemented.")
115+
ds = polygons_to_cf(geometries)
119116
else:
120117
raise ValueError(
121118
f"Mixed geometry types are not supported in CF-compliant datasets. Got {types}"
@@ -142,9 +139,6 @@ def cf_to_shapely(ds: xr.Dataset):
142139
"""
143140
Convert geometries stored in a CF-compliant way to shapely objects stored in a single variable.
144141
145-
.. warning::
146-
Only point and line geometries are currently implemented.
147-
148142
Parameters
149143
----------
150144
ds : xr.Dataset
@@ -168,7 +162,7 @@ def cf_to_shapely(ds: xr.Dataset):
168162
elif geom_type == "line":
169163
geometries = cf_to_lines(ds)
170164
elif geom_type == "polygon":
171-
raise NotImplementedError("Polygon geometry conversion is not implemented.")
165+
geometries = cf_to_polygons(ds)
172166
else:
173167
raise ValueError(
174168
f"Valid CF geometry types are 'point', 'line' and 'polygon'. Got {geom_type}"
@@ -430,3 +424,163 @@ def cf_to_lines(ds: xr.Dataset):
430424
geoms = np.where(np.diff(offset2) == 1, lines[offset2[:-1]], multilines)
431425

432426
return xr.DataArray(geoms, dims=node_count.dims, coords=node_count.coords)
427+
428+
429+
def polygons_to_cf(polygons: xr.DataArray | Sequence):
430+
"""Convert an iterable of polygons (shapely.geometry.[Multi]Polygon) into a CF-compliant geometry dataset.
431+
432+
Parameters
433+
----------
434+
polygons : sequence of shapely.geometry.Polygon or MultiPolygon
435+
The sequence of [multi]polygons to translate to a CF dataset.
436+
437+
Returns
438+
-------
439+
xr.Dataset
440+
A Dataset with variables 'x', 'y', 'crd_x', 'crd_y', 'node_count' and 'geometry_container'
441+
and optionally 'part_node_count'.
442+
"""
443+
from shapely import to_ragged_array
444+
445+
if isinstance(polygons, xr.DataArray):
446+
dim = polygons.dims[0]
447+
coord = polygons[dim] if dim in polygons.coords else None
448+
polygons_ = polygons.values
449+
else:
450+
dim = "index"
451+
coord = None
452+
polygons_ = np.array(polygons)
453+
454+
_, arr, offsets = to_ragged_array(polygons_)
455+
x = arr[:, 0]
456+
y = arr[:, 1]
457+
458+
part_node_count = np.diff(offsets[0])
459+
if len(offsets) == 1:
460+
indices = offsets[0]
461+
node_count = part_node_count
462+
elif len(offsets) >= 2:
463+
indices = np.take(offsets[0], offsets[1])
464+
interior_ring = np.isin(offsets[0], indices, invert=True)[:-1].astype(int)
465+
466+
if len(offsets) == 3:
467+
indices = np.take(indices, offsets[2])
468+
469+
node_count = np.diff(indices)
470+
471+
geom_coords = arr.take(indices[:-1], 0)
472+
crdX = geom_coords[:, 0]
473+
crdY = geom_coords[:, 1]
474+
475+
ds = xr.Dataset(
476+
data_vars={
477+
"node_count": xr.DataArray(node_count, dims=(dim,)),
478+
"interior_ring": xr.DataArray(interior_ring, dims=("part",)),
479+
"part_node_count": xr.DataArray(part_node_count, dims=("part",)),
480+
"geometry_container": xr.DataArray(
481+
attrs={
482+
"geometry_type": "polygon",
483+
"node_count": "node_count",
484+
"part_node_count": "part_node_count",
485+
"interior_ring": "interior_ring",
486+
"node_coordinates": "x y",
487+
"coordinates": "crd_x crd_y",
488+
}
489+
),
490+
},
491+
coords={
492+
"x": xr.DataArray(x, dims=("node",), attrs={"axis": "X"}),
493+
"y": xr.DataArray(y, dims=("node",), attrs={"axis": "Y"}),
494+
"crd_x": xr.DataArray(crdX, dims=(dim,), attrs={"nodes": "x"}),
495+
"crd_y": xr.DataArray(crdY, dims=(dim,), attrs={"nodes": "y"}),
496+
},
497+
)
498+
499+
if coord is not None:
500+
ds = ds.assign_coords({dim: coord})
501+
502+
# Special case when we have no MultiPolygons and no holes
503+
if len(ds.part_node_count) == len(ds.node_count):
504+
ds = ds.drop_vars("part_node_count")
505+
del ds.geometry_container.attrs["part_node_count"]
506+
507+
# Special case when we have no holes
508+
if (ds.interior_ring == 0).all():
509+
ds = ds.drop_vars("interior_ring")
510+
del ds.geometry_container.attrs["interior_ring"]
511+
return ds
512+
513+
514+
def cf_to_polygons(ds: xr.Dataset):
515+
"""Convert polygon geometries stored in a CF-compliant way to shapely polygons stored in a single variable.
516+
517+
Parameters
518+
----------
519+
ds : xr.Dataset
520+
A dataset with CF-compliant polygon geometries.
521+
Must have a "geometry_container" variable with at least a 'node_coordinates' attribute.
522+
Must also have the two 1D variables listed by this attribute.
523+
524+
Returns
525+
-------
526+
geometry : xr.DataArray
527+
A 1D array of shapely.geometry.[Multi]Polygon objects.
528+
It has the same dimension as the ``part_node_count`` or the coordinates variables, or
529+
``'features'`` if those were not present in ``ds``.
530+
"""
531+
from shapely import GeometryType, from_ragged_array
532+
533+
# Shorthand for convenience
534+
geo = ds.geometry_container.attrs
535+
536+
# The features dimension name, defaults to the one of 'part_node_count'
537+
# or the dimension of the coordinates, if present.
538+
feat_dim = None
539+
if "coordinates" in geo:
540+
xcoord_name, _ = geo["coordinates"].split(" ")
541+
(feat_dim,) = ds[xcoord_name].dims
542+
543+
x_name, y_name = geo["node_coordinates"].split(" ")
544+
xy = np.stack([ds[x_name].values, ds[y_name].values], axis=-1)
545+
546+
node_count_name = geo.get("node_count")
547+
part_node_count_name = geo.get("part_node_count", node_count_name)
548+
interior_ring_name = geo.get("interior_ring")
549+
550+
if node_count_name is None:
551+
raise ValueError("'node_count' must be provided for polygon geometries")
552+
else:
553+
node_count = ds[node_count_name]
554+
feat_dim = feat_dim or "index"
555+
if feat_dim in ds.coords:
556+
node_count = node_count.assign_coords({feat_dim: ds[feat_dim]})
557+
558+
# first get geometries for all the rings
559+
part_node_count = ds[part_node_count_name]
560+
offset1 = np.insert(np.cumsum(part_node_count.values), 0, 0)
561+
562+
if interior_ring_name is None:
563+
offset2 = np.array(list(range(len(offset1))))
564+
else:
565+
interior_ring = ds[interior_ring_name]
566+
if not interior_ring[0] == 0:
567+
raise ValueError("coordinate array must start with an exterior ring")
568+
offset2 = np.append(np.where(interior_ring == 0)[0], [len(part_node_count)])
569+
570+
polygons = from_ragged_array(GeometryType.POLYGON, xy, offsets=(offset1, offset2))
571+
572+
# get index of offset2 values that are edges for node_count
573+
offset3 = np.nonzero(
574+
np.isin(
575+
offset2,
576+
np.nonzero(np.isin(offset1, np.insert(np.cumsum(node_count), 0, 0)))[0],
577+
)
578+
)[0]
579+
multipolygons = from_ragged_array(
580+
GeometryType.MULTIPOLYGON, xy, offsets=(offset1, offset2, offset3)
581+
)
582+
583+
# get items from polygons or multipolygons depending on number of parts
584+
geoms = np.where(np.diff(offset3) == 1, polygons[offset3[:-1]], multipolygons)
585+
586+
return xr.DataArray(geoms, dims=node_count.dims, coords=node_count.coords)

0 commit comments

Comments
 (0)