1
1
from __future__ import annotations
2
2
3
+ import copy
3
4
from collections .abc import Sequence
4
5
5
6
import numpy as np
6
7
import pandas as pd
7
8
import xarray as xr
8
9
10
+ GEOMETRY_CONTAINER_NAME = "geometry_container"
11
+
12
+ __all__ = [
13
+ "decode_geometries" ,
14
+ "encode_geometries" ,
15
+ "cf_to_shapely" ,
16
+ "shapely_to_cf" ,
17
+ ]
18
+
19
+
20
+ def decode_geometries (encoded : xr .Dataset ) -> xr .Dataset :
21
+ """
22
+ Decode CF encoded geometries to a numpy object array containing shapely geometries.
23
+
24
+ Parameters
25
+ ----------
26
+ encoded : Dataset
27
+ A Xarray Dataset containing encoded geometries.
28
+
29
+ Returns
30
+ -------
31
+ Dataset
32
+ A Xarray Dataset containing decoded geometries.
33
+
34
+ See Also
35
+ --------
36
+ shapely_to_cf
37
+ cf_to_shapely
38
+ encode_geometries
39
+ """
40
+ if GEOMETRY_CONTAINER_NAME not in encoded ._variables :
41
+ raise NotImplementedError (
42
+ f"Currently only a single geometry variable named { GEOMETRY_CONTAINER_NAME !r} is supported."
43
+ "A variable by this name is not present in the provided dataset."
44
+ )
45
+
46
+ enc_geom_var = encoded [GEOMETRY_CONTAINER_NAME ]
47
+ geom_attrs = enc_geom_var .attrs
48
+ # Grab the coordinates attribute
49
+ geom_attrs .update (enc_geom_var .encoding )
50
+
51
+ geom_var = cf_to_shapely (encoded ).variable
52
+
53
+ todrop = (GEOMETRY_CONTAINER_NAME ,) + tuple (
54
+ s
55
+ for s in " " .join (
56
+ geom_attrs .get (attr , "" )
57
+ for attr in [
58
+ "interior_ring" ,
59
+ "node_coordinates" ,
60
+ "node_count" ,
61
+ "part_node_count" ,
62
+ "coordinates" ,
63
+ ]
64
+ ).split (" " )
65
+ if s
66
+ )
67
+ decoded = encoded .drop_vars (todrop )
68
+
69
+ name = geom_attrs .get ("variable_name" , None )
70
+ if name in decoded .dims :
71
+ decoded = decoded .assign_coords (
72
+ xr .Coordinates (coords = {name : geom_var }, indexes = {})
73
+ )
74
+ else :
75
+ decoded [name ] = geom_var
76
+
77
+ # Is this a good idea? We are deleting information.
78
+ for var in decoded ._variables .values ():
79
+ if var .attrs .get ("geometry" ) == GEOMETRY_CONTAINER_NAME :
80
+ var .attrs .pop ("geometry" )
81
+ return decoded
82
+
83
+
84
+ def encode_geometries (ds : xr .Dataset ):
85
+ """
86
+ Encode any discovered geometry variables using the CF conventions.
87
+
88
+ Practically speaking, geometry variables are numpy object arrays where the first
89
+ element is a shapely geometry.
90
+
91
+ .. warning::
92
+
93
+ Only a single geometry variable is supported at present. Contributions to fix this
94
+ are welcome.
95
+
96
+ Parameters
97
+ ----------
98
+ ds : Dataset
99
+ Dataset containing at least one geometry variable.
100
+
101
+ Returns
102
+ -------
103
+ Dataset
104
+ Where all geometry variables are encoded.
105
+
106
+ See Also
107
+ --------
108
+ shapely_to_cf
109
+ cf_to_shapely
110
+ """
111
+ from shapely import (
112
+ LineString ,
113
+ MultiLineString ,
114
+ MultiPoint ,
115
+ MultiPolygon ,
116
+ Point ,
117
+ Polygon ,
118
+ )
119
+
120
+ SHAPELY_TYPES = (
121
+ Point ,
122
+ LineString ,
123
+ Polygon ,
124
+ MultiPoint ,
125
+ MultiLineString ,
126
+ MultiPolygon ,
127
+ )
128
+
129
+ geom_var_names = [
130
+ name
131
+ for name , var in ds ._variables .items ()
132
+ if var .dtype == "O" and isinstance (var .data .flat [0 ], SHAPELY_TYPES )
133
+ ]
134
+ if not geom_var_names :
135
+ return ds
136
+
137
+ if to_drop := set (geom_var_names ) & set (ds ._indexes ):
138
+ # e.g. xvec GeometryIndex
139
+ ds = ds .drop_indexes (to_drop )
140
+
141
+ if len (geom_var_names ) > 1 :
142
+ raise NotImplementedError (
143
+ "Multiple geometry variables are not supported at this time. "
144
+ "Contributions to fix this are welcome. "
145
+ f"Detected geometry variables are { geom_var_names !r} "
146
+ )
147
+
148
+ (name ,) = geom_var_names
149
+ variables = {}
150
+ # If `name` is a dimension name, then we need to drop it. Otherwise we don't
151
+ # So set errors="ignore"
152
+ variables .update (
153
+ shapely_to_cf (ds [name ]).drop_vars (name , errors = "ignore" )._variables
154
+ )
155
+
156
+ geom_var = ds [name ]
157
+
158
+ more_updates = {}
159
+ for varname , var in ds ._variables .items ():
160
+ if varname == name :
161
+ continue
162
+ if name in var .dims :
163
+ var = var .copy ()
164
+ var ._attrs = copy .deepcopy (var ._attrs )
165
+ var .attrs ["geometry" ] = GEOMETRY_CONTAINER_NAME
166
+ # The grid_mapping and coordinates attributes can be carried by the geometry container
167
+ # variable provided they are also carried by the data variables associated with the container.
168
+ if to_add := geom_var .attrs .get ("coordinates" , "" ):
169
+ var .attrs ["coordinates" ] = var .attrs .get ("coordinates" , "" ) + to_add
170
+ more_updates [varname ] = var
171
+ variables .update (more_updates )
172
+
173
+ # WARNING: cf-xarray specific convention.
174
+ # For vector data cubes, `name` is a dimension name.
175
+ # By encoding to CF, we have
176
+ # encoded the information in that variable across many different
177
+ # variables (e.g. node_count) with `name` as a dimension.
178
+ # We have to record `name` somewhere so that we reconstruct
179
+ # a geometry variable of the right name at decode-time.
180
+ variables [GEOMETRY_CONTAINER_NAME ].attrs ["variable_name" ] = name
181
+
182
+ encoded = xr .Dataset (variables )
183
+
184
+ return encoded
185
+
9
186
10
187
def reshape_unique_geometries (
11
188
ds : xr .Dataset ,
@@ -119,13 +296,15 @@ def shapely_to_cf(geometries: xr.DataArray | Sequence, grid_mapping: str | None
119
296
f"Mixed geometry types are not supported in CF-compliant datasets. Got { types } "
120
297
)
121
298
299
+ ds [GEOMETRY_CONTAINER_NAME ].attrs .update (coordinates = "crd_x crd_y" )
300
+
122
301
# Special treatment of selected grid mappings
123
302
if grid_mapping == "longitude_latitude" :
124
303
# Special case for longitude_latitude grid mapping
125
304
ds = ds .rename (crd_x = "lon" , crd_y = "lat" )
126
305
ds .lon .attrs .update (units = "degrees_east" , standard_name = "longitude" )
127
306
ds .lat .attrs .update (units = "degrees_north" , standard_name = "latitude" )
128
- ds . geometry_container .attrs .update (coordinates = "lon lat" )
307
+ ds [ GEOMETRY_CONTAINER_NAME ] .attrs .update (coordinates = "lon lat" )
129
308
ds .x .attrs .update (units = "degrees_east" , standard_name = "longitude" )
130
309
ds .y .attrs .update (units = "degrees_north" , standard_name = "latitude" )
131
310
elif grid_mapping is not None :
@@ -157,7 +336,7 @@ def cf_to_shapely(ds: xr.Dataset):
157
336
----------
158
337
Please refer to the CF conventions document: http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#geometries
159
338
"""
160
- geom_type = ds . geometry_container .attrs ["geometry_type" ]
339
+ geom_type = ds [ GEOMETRY_CONTAINER_NAME ] .attrs ["geometry_type" ]
161
340
if geom_type == "point" :
162
341
geometries = cf_to_points (ds )
163
342
elif geom_type == "line" :
@@ -235,7 +414,7 @@ def points_to_cf(pts: xr.DataArray | Sequence):
235
414
# Special case when we have no MultiPoints
236
415
if (ds .node_count == 1 ).all ():
237
416
ds = ds .drop_vars ("node_count" )
238
- del ds . geometry_container .attrs ["node_count" ]
417
+ del ds [ GEOMETRY_CONTAINER_NAME ] .attrs ["node_count" ]
239
418
return ds
240
419
241
420
@@ -259,18 +438,18 @@ def cf_to_points(ds: xr.Dataset):
259
438
from shapely .geometry import MultiPoint , Point
260
439
261
440
# Shorthand for convenience
262
- geo = ds . geometry_container .attrs
441
+ geo = ds [ GEOMETRY_CONTAINER_NAME ] .attrs
263
442
264
443
# The features dimension name, defaults to the one of 'node_count' or the dimension of the coordinates, if present.
265
444
feat_dim = None
266
445
if "coordinates" in geo and feat_dim is None :
267
446
xcoord_name , _ = geo ["coordinates" ].split (" " )
268
447
(feat_dim ,) = ds [xcoord_name ].dims
269
448
270
- x_name , y_name = ds . geometry_container .attrs ["node_coordinates" ].split (" " )
449
+ x_name , y_name = ds [ GEOMETRY_CONTAINER_NAME ] .attrs ["node_coordinates" ].split (" " )
271
450
xy = np .stack ([ds [x_name ].values , ds [y_name ].values ], axis = - 1 )
272
451
273
- node_count_name = ds . geometry_container .attrs .get ("node_count" )
452
+ node_count_name = ds [ GEOMETRY_CONTAINER_NAME ] .attrs .get ("node_count" )
274
453
if node_count_name is None :
275
454
# No node_count means all geometries are single points (node_count = 1)
276
455
# And if we had no coordinates, then the dimension defaults to "features"
@@ -363,7 +542,7 @@ def lines_to_cf(lines: xr.DataArray | Sequence):
363
542
# Special case when we have no MultiLines
364
543
if len (ds .part_node_count ) == len (ds .node_count ):
365
544
ds = ds .drop_vars ("part_node_count" )
366
- del ds . geometry_container .attrs ["part_node_count" ]
545
+ del ds [ GEOMETRY_CONTAINER_NAME ] .attrs ["part_node_count" ]
367
546
return ds
368
547
369
548
@@ -387,7 +566,7 @@ def cf_to_lines(ds: xr.Dataset):
387
566
from shapely import GeometryType , from_ragged_array
388
567
389
568
# Shorthand for convenience
390
- geo = ds . geometry_container .attrs
569
+ geo = ds [ GEOMETRY_CONTAINER_NAME ] .attrs
391
570
392
571
# The features dimension name, defaults to the one of 'node_count'
393
572
# or the dimension of the coordinates, if present.
@@ -503,12 +682,12 @@ def polygons_to_cf(polygons: xr.DataArray | Sequence):
503
682
# Special case when we have no MultiPolygons and no holes
504
683
if len (ds .part_node_count ) == len (ds .node_count ):
505
684
ds = ds .drop_vars ("part_node_count" )
506
- del ds . geometry_container .attrs ["part_node_count" ]
685
+ del ds [ GEOMETRY_CONTAINER_NAME ] .attrs ["part_node_count" ]
507
686
508
687
# Special case when we have no holes
509
688
if (ds .interior_ring == 0 ).all ():
510
689
ds = ds .drop_vars ("interior_ring" )
511
- del ds . geometry_container .attrs ["interior_ring" ]
690
+ del ds [ GEOMETRY_CONTAINER_NAME ] .attrs ["interior_ring" ]
512
691
return ds
513
692
514
693
@@ -532,7 +711,7 @@ def cf_to_polygons(ds: xr.Dataset):
532
711
from shapely import GeometryType , from_ragged_array
533
712
534
713
# Shorthand for convenience
535
- geo = ds . geometry_container .attrs
714
+ geo = ds [ GEOMETRY_CONTAINER_NAME ] .attrs
536
715
537
716
# The features dimension name, defaults to the one of 'part_node_count'
538
717
# or the dimension of the coordinates, if present.
0 commit comments