From bf505c9e88fe662e302a981ad063f515819f8d51 Mon Sep 17 00:00:00 2001 From: Marcel van der Veldt <m.vanderveldt@outlook.com> Date: Thu, 7 Mar 2024 18:01:54 +0100 Subject: [PATCH 1/2] Add big fat warning if node setup takes too long --- matter_server/server/device_controller.py | 45 ++++++++++++++++++++--- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/matter_server/server/device_controller.py b/matter_server/server/device_controller.py index e49caf48..b088fa42 100644 --- a/matter_server/server/device_controller.py +++ b/matter_server/server/device_controller.py @@ -1067,8 +1067,42 @@ async def _setup_node(self, node_id: int) -> None: return self._nodes_in_setup.add(node_id) node_logger = LOGGER.getChild(f"node_{node_id}") - try: - async with self._node_setup_throttle: + node_data = self._nodes[node_id] + log_timers: dict[int, asyncio.TimerHandle] = {} + + def log_node_long_setup(time_start: float) -> None: + """Temporary measure to track a locked-up SDK issue in some (special) circumstances.""" + time_mins = int((time.time() - time_start) / 60) + node_model = node_data.attributes.get( + "0/40/14", node_data.attributes.get("0/40/3", "") + ) + node_name = f"Node {node_id} ({node_model})" + + node_logger.error( + f"\n\nATTENTION: {node_name} did not complete setup in {time_mins} minutes.\n" # noqa: G004 + "This is an indication of a (connectivity) issue with this device. \n" + "Try powercycling this device and/or relocate it closer to a Border Router or \n" + "WiFi Access Point. If this issue persists, please create an issue report on \n" + "the Matter channel of the Home Assistant Discord server or on Github:\n" + "https://github.com/home-assistant/core/issues/new?assignees=&labels=" + "integration%3A%20matter&projects=&template=bug_report.yml\n", + ) + # reschedule itself + if TYPE_CHECKING: + assert self.server.loop + log_timers[node_id] = self.server.loop.call_later( + 15 * 60, log_node_long_setup, time_start + ) + + async with self._node_setup_throttle: + time_start = time.time() + # we want to track nodes that take too long so we log it when we detect that + if TYPE_CHECKING: + assert self.server.loop + log_timers[node_id] = self.server.loop.call_later( + 15 * 60, log_node_long_setup, time_start + ) + try: node_logger.info("Setting-up node...") # try to resolve the node using the sdk first before do anything else @@ -1086,7 +1120,7 @@ async def _setup_node(self, node_id: int) -> None: return # (re)interview node (only) if needed - node_data = self._nodes[node_id] + if ( # re-interview if we dont have any node attributes (empty node) not node_data.attributes @@ -1120,8 +1154,9 @@ async def _setup_node(self, node_id: int) -> None: ) # NOTE: the node will be picked up by mdns discovery automatically # when it becomes available again. - finally: - self._nodes_in_setup.discard(node_id) + finally: + log_timers[node_id].cancel() + self._nodes_in_setup.discard(node_id) async def _resolve_node( self, node_id: int, retries: int = 2, attempt: int = 1 From a911f59b22508d5d314dc27f08505e79e9c580de Mon Sep 17 00:00:00 2001 From: Marcel van der Veldt <m.vanderveldt@outlook.com> Date: Thu, 7 Mar 2024 21:42:57 +0100 Subject: [PATCH 2/2] add ip to log --- matter_server/server/device_controller.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/matter_server/server/device_controller.py b/matter_server/server/device_controller.py index b088fa42..2e4e9c75 100644 --- a/matter_server/server/device_controller.py +++ b/matter_server/server/device_controller.py @@ -1073,14 +1073,24 @@ async def _setup_node(self, node_id: int) -> None: def log_node_long_setup(time_start: float) -> None: """Temporary measure to track a locked-up SDK issue in some (special) circumstances.""" time_mins = int((time.time() - time_start) / 60) + if TYPE_CHECKING: + assert self.server.loop + assert self.chip_controller + # get productlabel or modelname from raw attributes node_model = node_data.attributes.get( "0/40/14", node_data.attributes.get("0/40/3", "") ) node_name = f"Node {node_id} ({node_model})" + # get current IP the sdk is using to communicate with the device + if sdk_ip_info := self.chip_controller.GetAddressAndPort(node_id): + ip_address = sdk_ip_info[0] + else: + ip_address = "unknown" node_logger.error( f"\n\nATTENTION: {node_name} did not complete setup in {time_mins} minutes.\n" # noqa: G004 "This is an indication of a (connectivity) issue with this device. \n" + f"IP-address in use for this device: {ip_address}\n" "Try powercycling this device and/or relocate it closer to a Border Router or \n" "WiFi Access Point. If this issue persists, please create an issue report on \n" "the Matter channel of the Home Assistant Discord server or on Github:\n" @@ -1088,8 +1098,6 @@ def log_node_long_setup(time_start: float) -> None: "integration%3A%20matter&projects=&template=bug_report.yml\n", ) # reschedule itself - if TYPE_CHECKING: - assert self.server.loop log_timers[node_id] = self.server.loop.call_later( 15 * 60, log_node_long_setup, time_start )