Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delay marking a node as unavailable #568

Merged
merged 4 commits into from
Feb 21, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions matter_server/server/device_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@
LOGGER = logging.getLogger(__name__)
MAX_POLL_INTERVAL = 600
MAX_COMMISSION_RETRIES = 3
NODE_RESUBSCRIBE_ATTEMPTS_UNAVAILABLE = 3
NODE_RESUBSCRIBE_TIMEOUT_OFFLINE = 30 * 60 * 1000

MDNS_TYPE_OPERATIONAL_NODE = "_matter._tcp.local."
MDNS_TYPE_COMMISSIONABLE_NODE = "_matterc._udp.local."
Expand Down Expand Up @@ -851,7 +853,7 @@ async def _subscribe_node(self, node_id: int) -> None:
node_logger.debug("Re-using existing subscription.")
return
async with node_lock:
node_logger.debug("Unsubscribing from existing subscription.")
node_logger.info("Unsubscribing from existing subscription.")
await self._call_sdk(prev_sub.Shutdown)
del self._subscriptions[node_id]

Expand Down Expand Up @@ -976,14 +978,25 @@ def resubscription_attempted(
terminationError,
nextResubscribeIntervalMsec,
)
# mark node as unavailable and signal consumers
# we debounce it a bit so we only mark the node unavailable
# at the second resubscription attempt
if node.available and self._last_subscription_attempt[node_id] >= 1:
# NOTE: if the node is (re)discovered by mdns, that callback will
# take care of resubscribing to the node
resubscription_attempt = self._last_subscription_attempt[node_id] + 1
self._last_subscription_attempt[node_id] = resubscription_attempt
# Mark node as unavailable and signal consumers.
# We debounce it a bit so we only mark the node unavailable
# after some resubscription attempts and we shutdown the subscription
# if the resubscription interval exceeds 30 minutes (TTL of mdns).
# The node will be auto picked up by mdns if it's alive again.
if (
node.available
and resubscription_attempt >= NODE_RESUBSCRIBE_ATTEMPTS_UNAVAILABLE
):
node.available = False
self.server.signal_event(EventType.NODE_UPDATED, node)
LOGGER.info("Marked node %s as unavailable", node_id)
if (
not node.available
and nextResubscribeIntervalMsec > NODE_RESUBSCRIBE_TIMEOUT_OFFLINE
):
asyncio.create_task(self._node_offline(node_id))
self._last_subscription_attempt[node_id] += 1

def resubscription_succeeded(
transaction: Attribute.SubscriptionTransaction,
Expand Down Expand Up @@ -1249,7 +1262,7 @@ async def _node_offline(self, node_id: int) -> None:
# shutdown existing subscriptions
if sub := self._subscriptions.pop(node_id, None):
await self._call_sdk(sub.Shutdown)
# mark node as unavailable
# mark node as unavailable (if it wasn't already)
node = self._nodes[node_id]
if not node.available:
return # nothing to do to
Expand Down