Skip to content

Commit 51a4f21

Browse files
authored
Better guard the parsing logic for malformed data from the SDK (#248)
1 parent 3c1a10c commit 51a4f21

File tree

3 files changed

+129
-12
lines changed

3 files changed

+129
-12
lines changed

matter_server/common/helpers/json.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Helpers to work with (de)serializing of json."""
22

3+
from base64 import b64encode
34
from dataclasses import is_dataclass
45
from typing import Any
56

@@ -30,7 +31,7 @@ def json_encoder_default(obj: Any) -> Any:
3031
if isinstance(obj, Nullable):
3132
return None
3233
if isinstance(obj, bytes):
33-
return str(obj)
34+
return b64encode(obj).decode("utf-8")
3435
if isinstance(obj, Exception):
3536
return str(obj)
3637
if type(obj) == type:

matter_server/common/helpers/util.py

+22-11
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from __future__ import annotations
33

44
from base64 import b64decode, b64encode
5+
import binascii
56
from dataclasses import MISSING, asdict, fields, is_dataclass
67
from datetime import datetime
78
from enum import Enum
@@ -78,7 +79,7 @@ def _convert_value(value: Any) -> Any:
7879
if isinstance(value, Enum):
7980
return value.value
8081
if isinstance(value, bytes):
81-
return b64encode(value).decode()
82+
return b64encode(value).decode("utf-8")
8283
if isinstance(value, float32):
8384
return float(value)
8485
if type(value) == type:
@@ -115,11 +116,8 @@ def parse_value(name: str, value: Any, value_type: Any, default: Any = MISSING)
115116
if hasattr(value_type, "from_dict"):
116117
return value_type.from_dict(value)
117118
# handle a parse error in the sdk which is returned as:
118-
# {'TLVValue': None, 'Reason': None}
119-
if (
120-
value.get("TLVValue", MISSING) is None
121-
and value.get("Reason", MISSING) is None
122-
):
119+
# {'TLVValue': None, 'Reason': None} or {'TLVValue': None}
120+
if value.get("TLVValue", MISSING) is None:
123121
if value_type in (None, Nullable, Any):
124122
return None
125123
value = None
@@ -132,13 +130,14 @@ def parse_value(name: str, value: Any, value_type: Any, default: Any = MISSING)
132130
return None
133131
if is_dataclass(value_type) and isinstance(value, dict):
134132
return dataclass_from_dict(value_type, value)
135-
origin = get_origin(value_type)
136-
if origin is list and isinstance(value, list):
137-
return [
133+
# get origin value type and inspect one-by-one
134+
origin: Any = get_origin(value_type)
135+
if origin in (list, tuple) and isinstance(value, list | tuple):
136+
return origin(
138137
parse_value(name, subvalue, get_args(value_type)[0])
139138
for subvalue in value
140139
if subvalue is not None
141-
]
140+
)
142141
# handle dictionary where we should inspect all values
143142
elif origin is dict:
144143
subkey_type = get_args(value_type)[0]
@@ -175,13 +174,19 @@ def parse_value(name: str, value: Any, value_type: Any, default: Any = MISSING)
175174
return None
176175
elif origin is type:
177176
return get_type_hints(value, globals(), locals())
177+
# handle Any as value type (which is basically unprocessable)
178178
if value_type is Any:
179179
return value
180+
# raise if value is None and the value is required according to annotations
180181
if value is None and value_type is not NoneType:
181182
raise KeyError(f"`{name}` of type `{value_type}` is required.")
182183

183184
try:
184185
if issubclass(value_type, Enum):
186+
# handle enums from the SDK that have a value that does not exist in the enum (sigh)
187+
if value not in value_type._value2member_map_:
188+
# we do not want to crash so we return the raw value
189+
return value
185190
return value_type(value)
186191
if issubclass(value_type, datetime):
187192
return parse_utc_timestamp(value)
@@ -194,8 +199,14 @@ def parse_value(name: str, value: Any, value_type: Any, default: Any = MISSING)
194199
return float(value)
195200
if value_type is int and isinstance(value, str) and value.isnumeric():
196201
return int(value)
202+
# handle bytes values (sent over the wire as base64 encoded strings)
197203
if value_type is bytes and isinstance(value, str):
198-
return b64decode(value.encode())
204+
try:
205+
return b64decode(value.encode("utf-8"))
206+
except binascii.Error:
207+
# unfortunately sometimes the data is malformed
208+
# as it is not super important we ignore it (for now)
209+
return b""
199210

200211
# Matter SDK specific types
201212
if value_type is uint and (

tests/common/test_parser.py

+105
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
"""Test parser functions that converts the incoming json from API into dataclass models."""
2+
import datetime
3+
from dataclasses import dataclass
4+
from typing import Optional
5+
from enum import IntEnum, Enum
6+
7+
import pytest
8+
9+
from matter_server.common.helpers.util import dataclass_from_dict
10+
11+
class MatterIntEnum(IntEnum):
12+
"""Basic Matter Test IntEnum"""
13+
A = 0
14+
B = 1
15+
C = 2
16+
17+
18+
class MatterEnum(Enum):
19+
"""Basic Matter Test Enum"""
20+
A = "a"
21+
B = "b"
22+
C = "c"
23+
24+
25+
@dataclass
26+
class BasicModelChild:
27+
"""Basic test model."""
28+
29+
a: int
30+
b: str
31+
c: str
32+
d: Optional[int]
33+
34+
35+
@dataclass
36+
class BasicModel:
37+
"""Basic test model."""
38+
39+
a: int
40+
b: float
41+
c: str
42+
d: Optional[int]
43+
e: BasicModelChild
44+
f: datetime.datetime
45+
g: MatterEnum
46+
h: MatterIntEnum
47+
i: str = "default"
48+
49+
50+
def test_dataclass_from_dict():
51+
"""Test dataclass from dict parsing."""
52+
raw = {
53+
"a": 1,
54+
"b": 1.0,
55+
"c": "hello",
56+
"d": 1,
57+
"e": {"a": 2, "b": "test", "c": "test", "d": None},
58+
"f": "2022-12-09T06:58:00Z",
59+
"g": "a",
60+
"h": 2
61+
}
62+
res = dataclass_from_dict(BasicModel, raw)
63+
# test the basic values
64+
assert isinstance(res, BasicModel)
65+
assert res.a == 1
66+
assert res.b == 1.0
67+
assert res.d == 1
68+
# test recursive parsing
69+
assert isinstance(res.e, BasicModelChild)
70+
# test default value
71+
assert res.i == "default"
72+
# test int gets converted to float
73+
raw["b"] = 2
74+
res = dataclass_from_dict(BasicModel, raw)
75+
assert res.b == 2.0
76+
# test datetime string
77+
assert isinstance(res.f, datetime.datetime)
78+
assert res.f.month == 12
79+
assert res.f.day == 9
80+
# test parse (valid) MatterEnum
81+
assert res.g == MatterEnum.A
82+
# test parse (valid) MatterIntEnum
83+
assert res.h == MatterIntEnum.C
84+
# test parse invalid enum value returns raw value
85+
raw2 = {**raw}
86+
raw2["h"] = 5
87+
res2 = dataclass_from_dict(BasicModel, raw2)
88+
assert res2.h == 5
89+
# test string doesn't match int
90+
with pytest.raises(TypeError):
91+
raw2 = {**raw}
92+
raw2["a"] = "blah"
93+
dataclass_from_dict(BasicModel, raw2)
94+
# test missing key result in keyerror
95+
with pytest.raises(KeyError):
96+
raw2 = {**raw}
97+
del raw2["a"]
98+
dataclass_from_dict(BasicModel, raw2)
99+
# test extra keys silently ignored in non-strict mode
100+
raw2 = {**raw}
101+
raw2["extrakey"] = "something"
102+
dataclass_from_dict(BasicModel, raw2, strict=False)
103+
# test extra keys not silently ignored in strict mode
104+
with pytest.raises(KeyError):
105+
dataclass_from_dict(BasicModel, raw2, strict=True)

0 commit comments

Comments
 (0)