Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add software update capability #709

Merged
merged 39 commits into from
Jul 15, 2024
Merged
Changes from 1 commit
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
612b867
Implement Update check using DCL software information
agners May 17, 2024
17f9f54
Initial implementation of OTA provider
agners May 17, 2024
2bab7e3
Implement update using OTA Provider app
agners May 17, 2024
9d7717f
Setup OTA Provider App automatically when necessary
agners May 17, 2024
ee82e39
Deploy chip-ota-provider-app in container
agners May 23, 2024
1cf634b
Check if DCL software updates are indeed applicable
agners May 24, 2024
f698b51
Introduce hardcoded updates
agners May 24, 2024
93f3894
Split update WebSocket command into two commands
agners May 24, 2024
09a4469
Introduce Update logic specific exceptions
agners May 24, 2024
e1a5941
Implement OTA checksum verification
agners May 24, 2024
116077d
Add client commands for updates
agners May 27, 2024
5b41888
Improve DCL error message when download fails
agners May 27, 2024
4b0911c
Improve OTA Provider handling
agners May 28, 2024
70e9b60
Move almost all update logic into ExternalOtaProvider
agners May 28, 2024
c67c850
Update implementation to work with latest refactoring
agners May 29, 2024
e4bbc47
Simplify ExternalOtaProvider
agners May 30, 2024
be9ee65
Support specific version by string
agners Jun 4, 2024
3c33d5f
Use ephemeral OTA Provider instances
agners Jun 5, 2024
07b8254
Raise update error if the node moves from querying to idle
agners Jun 5, 2024
02d43d6
Improve logging and use Future to mark completion
agners Jun 5, 2024
56d5b06
Make sure that only one updates is running at a time
agners Jun 5, 2024
2f535aa
Use new commissioning API
agners Jun 20, 2024
683b33f
Ignore when there is no software version info on DCL
agners Jun 24, 2024
76ed950
Add MatterSoftwareVersion model for check_node_update
agners Jun 24, 2024
475a1dc
Bump Server schema
agners Jun 24, 2024
23a6e6b
Use OTA Provider from dedicated repository
agners Jul 11, 2024
b0dca4b
Bump OTA Provider to 2024.7.1
agners Jul 11, 2024
87cd0a4
Use new node logger
agners Jul 11, 2024
7e7537b
Complete future only once on error
agners Jul 11, 2024
7a30700
Apply suggestions from code review
agners Jul 11, 2024
07e20dd
Share client session for update check
agners Jul 11, 2024
b057eae
Provide methods to convert dataclass as dict
agners Jul 11, 2024
09c92f7
Log with node logger when checking for updates
agners Jul 11, 2024
57fb7d2
Fix trailing whitespace
agners Jul 11, 2024
9ad2348
Fix tests
agners Jul 12, 2024
507a429
ruff format
agners Jul 12, 2024
39f025e
Support loading updates from local json file
agners Jul 15, 2024
51bec82
Check if update directory exists
agners Jul 15, 2024
d4162fe
Add software update source information
agners Jul 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Move almost all update logic into ExternalOtaProvider
Most update logic is related to the external OTA provider (like
commissioning and configuring it). This commit moves most of the
update logic into the ExternalOtaProvider class.
agners committed Jul 11, 2024

Verified

This commit was signed with the committer’s verified signature. The key has expired.
agners Stefan Agner
commit 70e9b60204cdcd6032311dfa06f152894e04a695
143 changes: 27 additions & 116 deletions matter_server/server/device_controller.py
Original file line number Diff line number Diff line change
@@ -18,12 +18,11 @@
from typing import TYPE_CHECKING, Any, cast

from chip.ChipDeviceCtrl import ChipDeviceController
from chip.clusters import Attribute, Objects as Clusters, Types
from chip.clusters import Attribute, Objects as Clusters
from chip.clusters.Attribute import ValueDecodeFailure
from chip.clusters.ClusterObjects import ALL_ATTRIBUTES, ALL_CLUSTERS, Cluster
from chip.discovery import DiscoveryType
from chip.exceptions import ChipStackError
from chip.interaction_model import Status
from zeroconf import BadTypeInNameException, IPVersion, ServiceStateChange, Zeroconf
from zeroconf.asyncio import AsyncServiceBrowser, AsyncServiceInfo, AsyncZeroconf

@@ -112,6 +111,12 @@
0, Clusters.BasicInformation.Attributes.SoftwareVersionString
)
)
OTA_SOFTWARE_UPDATE_REQUESTOR_UPDATE_STATE_ATTRIBUTE_PATH = (
create_attribute_path_from_attribute(
0, Clusters.OtaSoftwareUpdateRequestor.Attributes.UpdateState
)
)


# pylint: disable=too-many-lines,too-many-instance-attributes,too-many-public-methods

@@ -910,83 +915,6 @@ async def check_node_update(self, node_id: int) -> dict | None:

return await self._check_node_update(node_id)

async def _initialize_ota_provider(self, ota_provider: ExternalOtaProvider) -> None:
"""Commissions the OTA Provider."""

if self.chip_controller is None:
raise RuntimeError("Device Controller not initialized.")

# The OTA Provider has not been commissioned yet, let's do it now.
LOGGER.info("Commissioning the built-in OTA Provider App.")
try:
ota_provider_node = await self.commission_on_network(
ota_provider.get_passcode(),
# TODO: Filtering by long discriminator seems broken
# filter_type=FilterType.LONG_DISCRIMINATOR,
# filter=ota_provider.get_descriminator(),
)
ota_provider_node_id = ota_provider_node.node_id
except NodeCommissionFailed:
LOGGER.error("Failed to commission OTA Provider App!")
return

LOGGER.info(
"OTA Provider App commissioned with node id %d.",
ota_provider_node_id,
)

# Adjust ACL of OTA Requestor such that Node peer-to-peer communication
# is allowed.
try:
read_result = await self.chip_controller.ReadAttribute(
ota_provider_node_id, [(0, Clusters.AccessControl.Attributes.Acl)]
)
acl_list = cast(
list,
read_result[0][Clusters.AccessControl][
Clusters.AccessControl.Attributes.Acl
],
)

# Add new ACL entry...
acl_list.append(
Clusters.AccessControl.Structs.AccessControlEntryStruct(
fabricIndex=1,
privilege=Clusters.AccessControl.Enums.AccessControlEntryPrivilegeEnum.kOperate,
authMode=Clusters.AccessControl.Enums.AccessControlEntryAuthModeEnum.kCase,
subjects=Types.NullValue,
targets=[
Clusters.AccessControl.Structs.AccessControlTargetStruct(
cluster=Clusters.OtaSoftwareUpdateProvider.id,
endpoint=0,
deviceType=Types.NullValue,
)
],
)
)

# And write. This is persistent, so only need to be done after we commissioned
# the OTA Provider App.
write_result: Attribute.AttributeWriteResult = (
await self.chip_controller.WriteAttribute(
ota_provider_node_id,
[(0, Clusters.AccessControl.Attributes.Acl(acl_list))],
)
)
if write_result[0].Status != Status.Success:
logging.error(
"Failed writing adjusted OTA Provider App ACL: Status %s.",
str(write_result[0].Status),
)
await self.remove_node(ota_provider_node_id)
raise UpdateError("Error while setting up OTA Provider.")
except ChipStackError as ex:
logging.exception("Failed adjusting OTA Provider App ACL.", exc_info=ex)
await self.remove_node(ota_provider_node_id)
raise UpdateError("Error while setting up OTA Provider.") from ex

ota_provider.set_node_id(ota_provider_node_id)

@api_command(APICommand.UPDATE_NODE)
async def update_node(self, node_id: int, software_version: int) -> dict | None:
"""
@@ -1008,48 +936,21 @@ async def update_node(self, node_id: int, software_version: int) -> dict | None:
raise RuntimeError("Device Controller not initialized.")

if not self._ota_provider:
raise UpdateError("No OTA provider found, updates not possible.")
raise UpdateError("No OTA provider found, updates not possible")

if self._ota_provider.is_busy():
raise UpdateError(
"No OTA provider currently busy, updates currently not possible"
)

# Add update to the OTA provider
await self._ota_provider.download_update(update)

ota_provider_node_id = self._ota_provider.get_node_id()
if ota_provider_node_id is None:
LOGGER.info("Initializing OTA Provider")
elif ota_provider_node_id not in self._nodes:
LOGGER.warning(
"OTA Provider node id %d no longer exists! Resetting...",
ota_provider_node_id,
)
await self._ota_provider.reset()
ota_provider_node_id = None

# Make sure any previous instances get stopped
await self._ota_provider.stop()
await self._ota_provider.start()

# Wait for OTA provider to be ready
# TODO: Detect when OTA provider is ready
await asyncio.sleep(2)

if not ota_provider_node_id:
await self._initialize_ota_provider(self._ota_provider)

# Notify update node about the availability of the OTA Provider. It will query
# the OTA provider and start the update.
try:
await self.chip_controller.SendCommand(
nodeid=node_id,
endpoint=0,
payload=Clusters.OtaSoftwareUpdateRequestor.Commands.AnnounceOTAProvider(
providerNodeID=ota_provider_node_id,
vendorID=self.server.vendor_id,
announcementReason=Clusters.OtaSoftwareUpdateRequestor.Enums.AnnouncementReasonEnum.kUpdateAvailable,
endpoint=ExternalOtaProvider.ENDPOINT_ID,
),
)
except ChipStackError as ex:
raise UpdateError("Error while announcing OTA Provider to node.") from ex
await self._ota_provider.start_update(
self,
node_id,
)

return update

@@ -1142,6 +1043,16 @@ def attribute_updated_callback(
# schedule a full interview of the node if the software version changed
self._loop.create_task(self.interview_node(node_id))

# work out if update state changed
if (
str(path) == OTA_SOFTWARE_UPDATE_REQUESTOR_UPDATE_STATE_ATTRIBUTE_PATH
and new_value != old_value
):
if self._ota_provider:
loop.create_task(
self._ota_provider.check_update_state(node_id, new_value)
)

# store updated value in node attributes
node.attributes[str(path)] = new_value

13 changes: 13 additions & 0 deletions matter_server/server/ota/__init__.py
Original file line number Diff line number Diff line change
@@ -18,6 +18,19 @@
"otaUrl": "https://github.com/agners/matter-linux-example-apps/releases/download/v1.3.0.0/chip-ota-requestor-app-x86-64.ota",
"releaseNotesUrl": "https://github.com/agners/matter-linux-example-apps/releases/tag/v1.3.0.0",
},
(0x143D, 0x1001): {
"vid": 0x143D,
"pid": 0x1001,
"softwareVersion": 10010011,
"softwareVersionString": "1.1.11-c85ba1e-dirty",
"cdVersionNumber": 1,
"softwareVersionValid": True,
"otaChecksum": "x2sK9xjVuGff0eefYa4cporDO+Z+WVxxw+JP5Ol+5og=",
"otaChecksumType": 1,
"minApplicableSoftwareVersion": 10010000,
"maxApplicableSoftwareVersion": 10010011,
"otaUrl": "https://raw.githubusercontent.com/ChampOnBon/Onvis/master/S4/debug.ota",
},
}


185 changes: 177 additions & 8 deletions matter_server/server/ota/provider.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Handling Matter OTA provider."""

from __future__ import annotations

import asyncio
from base64 import b64encode
from dataclasses import asdict, dataclass
@@ -10,15 +12,21 @@
import logging
from pathlib import Path
import secrets
from typing import TYPE_CHECKING, Final
from typing import TYPE_CHECKING, Final, cast
from urllib.parse import unquote, urlparse

from aiohttp import ClientError, ClientSession
from aiohttp.client_exceptions import InvalidURL
from chip.clusters import Attribute, Objects as Clusters, Types
from chip.exceptions import ChipStackError
from chip.interaction_model import Status

from matter_server.common.errors import UpdateError
from matter_server.common.errors import NodeCommissionFailed, NodeNotExists, UpdateError
from matter_server.common.helpers.util import dataclass_from_dict

if TYPE_CHECKING:
from matter_server.server.device_controller import MatterDeviceController

if TYPE_CHECKING:
from asyncio.subprocess import Process

@@ -84,6 +92,7 @@ def __init__(self, ota_provider_dir: Path) -> None:
self._ota_provider_image_list: OtaProviderImageList | None = None
self._ota_provider_proc: Process | None = None
self._ota_provider_task: asyncio.Task | None = None
self._ota_target_node_id: int | None = None

async def initialize(self) -> None:
"""Initialize OTA Provider."""
@@ -125,10 +134,9 @@ def _get_ota_provider_image_list(self) -> OtaProviderImageList:
raise RuntimeError("OTA provider image list not initialized.")
return self._ota_provider_image_list

def get_node_id(self) -> int | None:
"""Get Node ID of the OTA Provider App."""

return self._get_ota_provider_image_list().otaProviderNodeId
def is_busy(self) -> bool:
"""If OTA Provider is currently busy delivering updates."""
return self._ota_target_node_id is not None

def get_descriminator(self) -> int:
"""Return OTA Provider App discriminator."""
@@ -145,9 +153,98 @@ def set_node_id(self, node_id: int) -> None:

self._get_ota_provider_image_list().otaProviderNodeId = node_id

async def start(self) -> None:
def get_node_id(self) -> int | None:
"""Get Node ID of the OTA Provider App."""

return self._get_ota_provider_image_list().otaProviderNodeId

async def _initialize(self, device_controller: MatterDeviceController) -> None:
"""Commissions the OTA Provider."""

if device_controller.chip_controller is None:
raise RuntimeError("Device Controller not initialized.")

# The OTA Provider has not been commissioned yet, let's do it now.
LOGGER.info("Commissioning the built-in OTA Provider App.")
try:
ota_provider_node = await device_controller.commission_on_network(
self.get_passcode(),
# TODO: Filtering by long discriminator seems broken
# filter_type=FilterType.LONG_DISCRIMINATOR,
# filter=ota_provider.get_descriminator(),
)
ota_provider_node_id = ota_provider_node.node_id
except NodeCommissionFailed:
LOGGER.error("Failed to commission OTA Provider App!")
return

LOGGER.info(
"OTA Provider App commissioned with node id %d.",
ota_provider_node_id,
)

# Adjust ACL of OTA Requestor such that Node peer-to-peer communication
# is allowed.
try:
read_result = await device_controller.chip_controller.ReadAttribute(
ota_provider_node_id, [(0, Clusters.AccessControl.Attributes.Acl)]
)
acl_list = cast(
list,
read_result[0][Clusters.AccessControl][
Clusters.AccessControl.Attributes.Acl
],
)

# Add new ACL entry...
acl_list.append(
Clusters.AccessControl.Structs.AccessControlEntryStruct(
fabricIndex=1,
privilege=Clusters.AccessControl.Enums.AccessControlEntryPrivilegeEnum.kOperate,
authMode=Clusters.AccessControl.Enums.AccessControlEntryAuthModeEnum.kCase,
subjects=Types.NullValue,
targets=[
Clusters.AccessControl.Structs.AccessControlTargetStruct(
cluster=Clusters.OtaSoftwareUpdateProvider.id,
endpoint=0,
deviceType=Types.NullValue,
)
],
)
)

# And write. This is persistent, so only need to be done after we commissioned
# the OTA Provider App.
write_result: Attribute.AttributeWriteResult = (
await device_controller.chip_controller.WriteAttribute(
ota_provider_node_id,
[(0, Clusters.AccessControl.Attributes.Acl(acl_list))],
)
)
if write_result[0].Status != Status.Success:
logging.error(
"Failed writing adjusted OTA Provider App ACL: Status %s.",
str(write_result[0].Status),
)
await device_controller.remove_node(ota_provider_node_id)
raise UpdateError("Error while setting up OTA Provider.")
except ChipStackError as ex:
logging.exception("Failed adjusting OTA Provider App ACL.", exc_info=ex)
await device_controller.remove_node(ota_provider_node_id)
raise UpdateError("Error while setting up OTA Provider.") from ex

self.set_node_id(ota_provider_node_id)

async def start_update(
self, device_controller: MatterDeviceController, node_id: int
) -> None:
"""Start the OTA Provider."""

if device_controller.chip_controller is None:
raise RuntimeError("Device Controller not initialized.")

self._ota_target_node_id = node_id

def _write_ota_provider_image_list_json(
ota_provider_image_list_file: Path,
ota_provider_image_list: OtaProviderImageList,
@@ -192,7 +289,51 @@ def _write_ota_provider_image_list_json(
self._ota_provider_proc.communicate()
)

async def reset(self) -> None:
# Wait for OTA provider to be ready
# TODO: Detect when OTA provider is ready
await asyncio.sleep(2)

# Handle if user deleted the OTA Provider node.
ota_provider_node_id = self.get_node_id()
if ota_provider_node_id is not None:
try:
device_controller.get_node(ota_provider_node_id)
except NodeNotExists:
LOGGER.warning(
"OTA Provider node id %d not known by device controller! Resetting...",
ota_provider_node_id,
)
await self._reset()
ota_provider_node_id = None

# Commission and prepare OTA Provider if not initialized yet.
# Use "ota_provider_node_id" to indicate if OTA Provider is setup or not.
try:
if ota_provider_node_id is None:
LOGGER.info("Initializing OTA Provider")
await self._initialize(device_controller)
finally:
self._ota_target_node_id = None

# Notify update node about the availability of the OTA Provider. It will query
# the OTA provider and start the update.
try:
await device_controller.chip_controller.SendCommand(
nodeid=node_id,
endpoint=0,
payload=Clusters.OtaSoftwareUpdateRequestor.Commands.AnnounceOTAProvider(
providerNodeID=ota_provider_node_id,
vendorID=device_controller.server.vendor_id,
announcementReason=Clusters.OtaSoftwareUpdateRequestor.Enums.AnnouncementReasonEnum.kUpdateAvailable,
endpoint=ExternalOtaProvider.ENDPOINT_ID,
),
)
except ChipStackError as ex:
raise UpdateError("Error while announcing OTA Provider to node.") from ex
finally:
self._ota_target_node_id = None

async def _reset(self) -> None:
"""Reset the OTA Provider App state."""

def _remove_update_data(ota_provider_dir: Path) -> None:
@@ -312,3 +453,31 @@ async def download_update(self, update_desc: dict) -> None:
raise UpdateError("Fetching software version failed") from err

await self.add_update(update_desc, file_path)

async def check_update_state(
self,
node_id: int,
update_state: Clusters.OtaSoftwareUpdateRequestor.Enums.UpdateStateEnum,
) -> None:
"""
Check the update state of a node and take appropriate action.
Args:
node_id: The ID of the node.
update_state: The update state of the node.
"""

if self._ota_target_node_id is None:
return

if self._ota_target_node_id != node_id:
return

# Update state of target node changed, check if update is done.
if (
update_state
== Clusters.OtaSoftwareUpdateRequestor.Enums.UpdateStateEnum.kIdle
):
LOGGER.info("Update of node %d done.", node_id)
await self.stop()
self._ota_target_node_id = None