Skip to content

Commit f6c8a1f

Browse files
mps01060pre-commit-ci[bot]dcherian
authored
Add larger bit depth for displaying flag_masks (#492)
* Add flag_mask formatting support for bitdepth greater than uint8 Underlying cf flag_mask codes (eg. == and .isin) work with flag_masks with more than 8 independent flags (eg. uint32), however the formatting codes that print messages to the screen are limited to uint8. Add/modify functions to unpack bits from larger integer dtypes. * Edit fixed-width formatting for larger bit depth flag_masks Original formatting widths work well for 8-bit flag_masks, but need some modification for larger integer values. * Fix bit_length calculating multiple times bit_length is re-calculated a second time inside _unpacked_bits, which can lead to incorrect bit_length. Pass this value as an argument instead, since it was properly calculated earlier in the process. * Remove unsigned integer check Flags do not have to be unsigned. For example, "basin" test dataset will fail the unsigned integer test even though it contains valid int64 flags. * Add test for 16bit version of flag_masks * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add flag_indep_uint16 example dataset * One more test --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Deepak Cherian <deepak@cherian.net>
1 parent c2f2540 commit f6c8a1f

File tree

3 files changed

+92
-9
lines changed

3 files changed

+92
-9
lines changed

cf_xarray/datasets.py

+10
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,16 @@ def _create_inexact_bounds():
503503
name="flag_var",
504504
)
505505

506+
flag_indep_uint16 = xr.DataArray(
507+
np.array([1, 10, 100, 1000, 10000, 65535], dtype=np.uint16),
508+
dims=("time",),
509+
attrs={
510+
"flag_masks": [2**i for i in range(16)],
511+
"flag_meanings": " ".join([f"flag_{2**i}" for i in range(16)]),
512+
"standard_name": "flag_independent",
513+
},
514+
name="flag_var",
515+
)
506516

507517
flag_mix = xr.DataArray(
508518
np.array([4, 8, 13, 5, 10, 14, 7, 3], np.uint8),

cf_xarray/formatting.py

+55-9
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,47 @@ def _maybe_panel(textgen, title: str, rich: bool):
151151
return title + ":\n" + text
152152

153153

154-
def find_set_bits(mask, value, repeated_masks):
155-
bitpos = np.arange(8)[::-1]
154+
def _get_bit_length(dtype):
155+
# Check if dtype is a numpy dtype, if not, convert it
156+
if not isinstance(dtype, np.dtype):
157+
dtype = np.dtype(dtype)
158+
159+
# Calculate the bit length
160+
bit_length = 8 * dtype.itemsize
161+
162+
return bit_length
163+
164+
165+
def _unpackbits(mask, bit_length):
166+
# Ensure the array is a numpy array
167+
arr = np.asarray(mask)
168+
169+
# Create an output array of the appropriate shape
170+
output_shape = arr.shape + (bit_length,)
171+
output = np.zeros(output_shape, dtype=np.uint8)
172+
173+
# Unpack bits
174+
for i in range(bit_length):
175+
output[..., i] = (arr >> i) & 1
176+
177+
return output[..., ::-1]
178+
179+
180+
def _max_chars_for_bit_length(bit_length):
181+
"""
182+
Find the maximum characters needed for a fixed-width display
183+
for integer values of a certain bit_length. Use calculation
184+
for signed integers, since it conservatively will always have
185+
enough characters for signed or unsigned.
186+
"""
187+
# Maximum value for signed integers of this bit length
188+
max_val = 2 ** (bit_length - 1) - 1
189+
# Add 1 for the negative sign
190+
return len(str(max_val)) + 1
191+
192+
193+
def find_set_bits(mask, value, repeated_masks, bit_length):
194+
bitpos = np.arange(bit_length)[::-1]
156195
if mask not in repeated_masks:
157196
if value == 0:
158197
return [-1]
@@ -161,8 +200,8 @@ def find_set_bits(mask, value, repeated_masks):
161200
else:
162201
return [int(np.log2(mask))]
163202
else:
164-
allset = bitpos[np.unpackbits(np.uint8(mask)) == 1]
165-
setbits = bitpos[np.unpackbits(np.uint8(mask & value)) == 1]
203+
allset = bitpos[_unpackbits(mask, bit_length) == 1]
204+
setbits = bitpos[_unpackbits(mask & value, bit_length) == 1]
166205
return [b if abs(b) in setbits else -b for b in allset]
167206

168207

@@ -184,25 +223,30 @@ def _format_flags(accessor, rich):
184223
# for f, (m, _) in flag_dict.items()
185224
# if m is not None and m not in repeated_masks
186225
# ]
226+
227+
bit_length = _get_bit_length(accessor._obj.dtype)
228+
mask_width = _max_chars_for_bit_length(bit_length)
229+
key_width = max(len(key) for key in flag_dict)
230+
187231
bit_text = []
188232
value_text = []
189233
for key, (mask, value) in flag_dict.items():
190234
if mask is None:
191235
bit_text.append("✗" if rich else "")
192236
value_text.append(str(value))
193237
continue
194-
bits = find_set_bits(mask, value, repeated_masks)
195-
bitstring = ["."] * 8
238+
bits = find_set_bits(mask, value, repeated_masks, bit_length)
239+
bitstring = ["."] * bit_length
196240
if bits == [-1]:
197241
continue
198242
else:
199243
for b in bits:
200244
bitstring[abs(b)] = _format_cf_name("1" if b >= 0 else "0", rich)
201245
text = "".join(bitstring[::-1])
202246
value_text.append(
203-
f"{mask} & {value}"
247+
f"{mask:{mask_width}} & {value}"
204248
if key in excl_flags and value is not None
205-
else str(mask)
249+
else f"{mask:{mask_width}}"
206250
)
207251
bit_text.append(text if rich else f" / Bit: {text}")
208252

@@ -230,7 +274,9 @@ def _format_flags(accessor, rich):
230274
else:
231275
rows = []
232276
for val, bit, key in zip(value_text, bit_text, flag_dict):
233-
rows.append(f"{TAB}{_format_cf_name(key, rich)}: {TAB} {val} {bit}")
277+
rows.append(
278+
f"{TAB}{_format_cf_name(key, rich):>{key_width}}: {TAB} {val} {bit}"
279+
)
234280
return _print_rows("Flag Meanings", rows, rich)
235281

236282

cf_xarray/tests/test_accessor.py

+27
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
dsg,
2727
flag_excl,
2828
flag_indep,
29+
flag_indep_uint16,
2930
flag_mix,
3031
forecast,
3132
mollwds,
@@ -164,6 +165,7 @@ def test_repr() -> None:
164165
# Flag DataArray
165166
assert "Flag Variable" in repr(flag_excl.cf)
166167
assert "Flag Variable" in repr(flag_indep.cf)
168+
assert "Flag Variable" in repr(flag_indep_uint16.cf)
167169
assert "Flag Variable" in repr(flag_mix.cf)
168170
assert "Flag Variable" in repr(basin.cf)
169171

@@ -1837,6 +1839,30 @@ def test_flag_indep(self) -> None:
18371839
res = flag_indep.cf.flags[name]
18381840
np.testing.assert_equal(res.to_numpy(), expected[i])
18391841

1842+
def test_flag_indep_uint16(self) -> None:
1843+
expected = [
1844+
[True, False, False, False, False, True], # bit 1
1845+
[False, True, False, False, False, True], # bit 2
1846+
[False, False, True, False, False, True], # bit 4
1847+
[False, True, False, True, False, True], # bit 8
1848+
[False, False, False, False, True, True], # bit 16
1849+
[False, False, True, True, False, True], # bit 32
1850+
[False, False, True, True, False, True], # bit 64
1851+
[False, False, False, True, False, True], # bit 128
1852+
[False, False, False, True, True, True], # bit 256
1853+
[False, False, False, True, True, True], # bit 512
1854+
[False, False, False, False, True, True], # bit 1024
1855+
[False, False, False, False, False, True], # bit 2048
1856+
[False, False, False, False, False, True], # bit 4096
1857+
[False, False, False, False, True, True], # bit 8192
1858+
[False, False, False, False, False, True], # bit 16384
1859+
[False, False, False, False, False, True], # bit 32768
1860+
]
1861+
for i in range(16):
1862+
name = f"flag_{2**i}"
1863+
res = flag_indep_uint16.cf.flags[name]
1864+
np.testing.assert_equal(res.to_numpy(), expected[i])
1865+
18401866
def test_flag_mix(self) -> None:
18411867
expected = [
18421868
[False, False, True, True, False, False, True, True], # flag 1
@@ -1983,6 +2009,7 @@ def plane(coords, slopex, slopey):
19832009
[basin, "Flag Variable"],
19842010
[flag_mix, "Flag Variable"],
19852011
[flag_indep, "Flag Variable"],
2012+
[flag_indep_uint16, "Flag Variable"],
19862013
[flag_excl, "Flag Variable"],
19872014
[dsg, "Discrete Sampling Geometry"],
19882015
),

0 commit comments

Comments
 (0)