Skip to content

Commit e42b533

Browse files
Extend our fail-safe a bit before we try to enable wifi/thread networks. (project-chip#25667)
* Extend our fail-safe a bit before we try to enable wifi/thread networks. Two changes here: 1. Automatic fail-safe extension should not make the fail-safe _shorter_. This should prevent, for example, a short fail-safe on attestation failure overriding a longer initial fail-safe. 2. Before we try to enable network (which is the last time we're guaranteed to be able to talk to a device over PASE, for non-concurrent commissioning flow devices), extend our fail-safe to try to cover the time it will take us to do operational discovery and sending CommissioningComplete. * Address review comments. * Address review comments.
1 parent 1ce3a1e commit e42b533

9 files changed

+151
-44
lines changed

src/app/DeviceProxy.h

+8-2
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include <lib/core/CHIPCallback.h>
3131
#include <lib/core/CHIPCore.h>
3232
#include <lib/support/DLLUtil.h>
33+
#include <system/SystemClock.h>
3334

3435
namespace chip {
3536

@@ -54,7 +55,12 @@ class DLL_EXPORT DeviceProxy
5455

5556
virtual CHIP_ERROR SetPeerId(ByteSpan rcac, ByteSpan noc) { return CHIP_ERROR_NOT_IMPLEMENTED; }
5657

57-
const ReliableMessageProtocolConfig & GetRemoteMRPConfig() const { return mRemoteMRPConfig; }
58+
/**
59+
* Facilities for keeping track of the latest point we can expect the
60+
* fail-safe to last through. These timestamp values use the monotonic clock.
61+
*/
62+
void SetFailSafeExpirationTimestamp(System::Clock::Timestamp timestamp) { mFailSafeExpirationTimestamp = timestamp; }
63+
System::Clock::Timestamp GetFailSafeExpirationTimestamp() const { return mFailSafeExpirationTimestamp; }
5864

5965
/**
6066
* @brief
@@ -69,7 +75,7 @@ class DLL_EXPORT DeviceProxy
6975
protected:
7076
virtual bool IsSecureConnected() const = 0;
7177

72-
ReliableMessageProtocolConfig mRemoteMRPConfig = GetDefaultMRPConfig();
78+
System::Clock::Timestamp mFailSafeExpirationTimestamp = System::Clock::kZero;
7379
};
7480

7581
} // namespace chip

src/controller/AutoCommissioner.cpp

+10-4
Original file line numberDiff line numberDiff line change
@@ -301,18 +301,21 @@ CommissioningStage AutoCommissioner::GetNextCommissioningStageInternal(Commissio
301301
}
302302
else
303303
{
304-
return CommissioningStage::kWiFiNetworkEnable;
304+
return CommissioningStage::kFailsafeBeforeWiFiEnable;
305305
}
306306
case CommissioningStage::kThreadNetworkSetup:
307307
if (mParams.GetWiFiCredentials().HasValue() && mDeviceCommissioningInfo.network.wifi.endpoint != kInvalidEndpointId)
308308
{
309-
return CommissioningStage::kWiFiNetworkEnable;
309+
return CommissioningStage::kFailsafeBeforeWiFiEnable;
310310
}
311311
else
312312
{
313-
return CommissioningStage::kThreadNetworkEnable;
313+
return CommissioningStage::kFailsafeBeforeThreadEnable;
314314
}
315-
315+
case CommissioningStage::kFailsafeBeforeWiFiEnable:
316+
return CommissioningStage::kWiFiNetworkEnable;
317+
case CommissioningStage::kFailsafeBeforeThreadEnable:
318+
return CommissioningStage::kThreadNetworkEnable;
316319
case CommissioningStage::kWiFiNetworkEnable:
317320
if (mParams.GetThreadOperationalDataset().HasValue() &&
318321
mDeviceCommissioningInfo.network.thread.endpoint != kInvalidEndpointId)
@@ -374,6 +377,9 @@ void AutoCommissioner::SetCASEFailsafeTimerIfNeeded()
374377
// CASE establishment, and receipt of the commissioning complete command.
375378
// We know that the mCommissioneeDeviceProxy is still valid at this point since it gets cleared during cleanup
376379
// and SetCASEFailsafeTimerIfNeeded is always called before that stage.
380+
//
381+
// A false return from ExtendArmFailSafe is fine; we don't want to make
382+
// the fail-safe shorter here.
377383
mCommissioner->ExtendArmFailSafe(mCommissioneeDeviceProxy, CommissioningStage::kFindOperational,
378384
mParams.GetCASEFailsafeTimerSeconds().Value(),
379385
GetCommandTimeout(mCommissioneeDeviceProxy, CommissioningStage::kArmFailsafe),

src/controller/CHIPDeviceController.cpp

+88-11
Original file line numberDiff line numberDiff line change
@@ -1131,16 +1131,39 @@ void DeviceCommissioner::OnFailedToExtendedArmFailSafeDeviceAttestation(void * c
11311131
commissioner->CommissioningStageComplete(CHIP_ERROR_INTERNAL, report);
11321132
}
11331133

1134-
void DeviceCommissioner::ExtendArmFailSafe(DeviceProxy * proxy, CommissioningStage step, uint16_t armFailSafeTimeout,
1134+
bool DeviceCommissioner::ExtendArmFailSafe(DeviceProxy * proxy, CommissioningStage step, uint16_t armFailSafeTimeout,
11351135
Optional<System::Clock::Timeout> commandTimeout, OnExtendFailsafeSuccess onSuccess,
11361136
OnExtendFailsafeFailure onFailure)
11371137
{
1138+
using namespace System;
1139+
using namespace System::Clock;
1140+
auto now = SystemClock().GetMonotonicTimestamp();
1141+
auto newFailSafeTimeout = now + Seconds16(armFailSafeTimeout);
1142+
if (newFailSafeTimeout < proxy->GetFailSafeExpirationTimestamp())
1143+
{
1144+
ChipLogProgress(
1145+
Controller, "Skipping arming failsafe: new time (%u seconds from now) before old time (%u seconds from now)",
1146+
armFailSafeTimeout, std::chrono::duration_cast<Seconds16>(proxy->GetFailSafeExpirationTimestamp() - now).count());
1147+
return false;
1148+
}
1149+
11381150
uint64_t breadcrumb = static_cast<uint64_t>(step);
11391151
GeneralCommissioning::Commands::ArmFailSafe::Type request;
11401152
request.expiryLengthSeconds = armFailSafeTimeout;
11411153
request.breadcrumb = breadcrumb;
11421154
ChipLogProgress(Controller, "Arming failsafe (%u seconds)", request.expiryLengthSeconds);
1143-
SendCommand(proxy, request, onSuccess, onFailure, kRootEndpointId, commandTimeout);
1155+
CHIP_ERROR err = SendCommand(proxy, request, onSuccess, onFailure, kRootEndpointId, commandTimeout);
1156+
if (err != CHIP_NO_ERROR)
1157+
{
1158+
onFailure(this, err);
1159+
}
1160+
else
1161+
{
1162+
// TODO: Handle the situation when our command ends up erroring out
1163+
// asynchronously?
1164+
proxy->SetFailSafeExpirationTimestamp(newFailSafeTimeout);
1165+
}
1166+
return true;
11441167
}
11451168

11461169
void DeviceCommissioner::ExtendArmFailSafeForDeviceAttestation(const Credentials::DeviceAttestationVerifier::AttestationInfo & info,
@@ -1154,20 +1177,24 @@ void DeviceCommissioner::ExtendArmFailSafeForDeviceAttestation(const Credentials
11541177
mAttestationDeviceInfo = Platform::MakeUnique<Credentials::DeviceAttestationVerifier::AttestationDeviceInfo>(info);
11551178

11561179
auto expiryLengthSeconds = deviceAttestationDelegate->FailSafeExpiryTimeoutSecs();
1157-
if (expiryLengthSeconds.HasValue())
1180+
bool failSafeSkipped = expiryLengthSeconds.HasValue();
1181+
if (failSafeSkipped)
11581182
{
1159-
GeneralCommissioning::Commands::ArmFailSafe::Type request;
1160-
request.expiryLengthSeconds = expiryLengthSeconds.Value();
1161-
request.breadcrumb = mCommissioningStage;
1162-
ChipLogProgress(Controller, "Changing fail-safe timer to %u seconds to handle DA failure", request.expiryLengthSeconds);
1183+
ChipLogProgress(Controller, "Changing fail-safe timer to %u seconds to handle DA failure", expiryLengthSeconds.Value());
11631184
// Per spec, anything we do with the fail-safe armed must not time out
11641185
// in less than kMinimumCommissioningStepTimeout.
1165-
SendCommand(mDeviceBeingCommissioned, request, OnArmFailSafeExtendedForDeviceAttestation,
1166-
OnFailedToExtendedArmFailSafeDeviceAttestation, MakeOptional(kMinimumCommissioningStepTimeout));
1186+
failSafeSkipped =
1187+
ExtendArmFailSafe(mDeviceBeingCommissioned, mCommissioningStage, expiryLengthSeconds.Value(),
1188+
MakeOptional(kMinimumCommissioningStepTimeout), OnArmFailSafeExtendedForDeviceAttestation,
1189+
OnFailedToExtendedArmFailSafeDeviceAttestation);
11671190
}
11681191
else
11691192
{
11701193
ChipLogProgress(Controller, "Proceeding without changing fail-safe timer value as delegate has not set it");
1194+
}
1195+
1196+
if (failSafeSkipped)
1197+
{
11711198
// Callee does not use data argument.
11721199
const GeneralCommissioning::Commands::ArmFailSafeResponse::DecodableType data;
11731200
OnArmFailSafeExtendedForDeviceAttestation(this, data);
@@ -1782,6 +1809,8 @@ void DeviceCommissioner::OnDeviceConnectionRetryFn(void * context, const ScopedN
17821809
{
17831810
failsafeTimeout = static_cast<uint16_t>(retryTimeout.count() + kDefaultFailsafeTimeout);
17841811
}
1812+
// A false return from ExtendArmFailSafe is fine; we don't want to make the
1813+
// fail-safe shorter here.
17851814
self->ExtendArmFailSafe(commissioneeDevice, CommissioningStage::kFindOperational, failsafeTimeout,
17861815
MakeOptional(kMinimumCommissioningStepTimeout), OnExtendFailsafeForCASERetrySuccess,
17871816
OnExtendFailsafeForCASERetryFailure);
@@ -2168,8 +2197,11 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio
21682197
{
21692198
case CommissioningStage::kArmFailsafe: {
21702199
VerifyOrDie(endpoint == kRootEndpointId);
2171-
ExtendArmFailSafe(proxy, step, params.GetFailsafeTimerSeconds().ValueOr(kDefaultFailsafeTimeout), timeout, OnArmFailSafe,
2172-
OnBasicFailure);
2200+
// Make sure the fail-safe value we set here actually ends up being used
2201+
// no matter what.
2202+
proxy->SetFailSafeExpirationTimestamp(System::Clock::kZero);
2203+
VerifyOrDie(ExtendArmFailSafe(proxy, step, params.GetFailsafeTimerSeconds().ValueOr(kDefaultFailsafeTimeout), timeout,
2204+
OnArmFailSafe, OnBasicFailure));
21732205
}
21742206
break;
21752207
case CommissioningStage::kReadCommissioningInfo: {
@@ -2464,6 +2496,14 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio
24642496
SendCommand(proxy, request, OnNetworkConfigResponse, OnBasicFailure, endpoint, timeout);
24652497
}
24662498
break;
2499+
case CommissioningStage::kFailsafeBeforeWiFiEnable:
2500+
FALLTHROUGH;
2501+
case CommissioningStage::kFailsafeBeforeThreadEnable:
2502+
// Before we try to do network enablement, make sure that our fail-safe
2503+
// is set far enough out that we can later try to do operational
2504+
// discovery without it timing out.
2505+
ExtendFailsafeBeforeNetworkEnable(proxy, params, step);
2506+
break;
24672507
case CommissioningStage::kWiFiNetworkEnable: {
24682508
if (!params.GetWiFiCredentials().HasValue())
24692509
{
@@ -2522,6 +2562,43 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio
25222562
}
25232563
}
25242564

2565+
void DeviceCommissioner::ExtendFailsafeBeforeNetworkEnable(DeviceProxy * device, CommissioningParameters & params,
2566+
CommissioningStage step)
2567+
{
2568+
auto * commissioneeDevice = FindCommissioneeDevice(device->GetDeviceId());
2569+
if (device != commissioneeDevice)
2570+
{
2571+
// Not a commissionee device; just return.
2572+
ChipLogError(Controller, "Trying to extend fail-safe for an unknown commissionee with device id " ChipLogFormatX64,
2573+
ChipLogValueX64(device->GetDeviceId()));
2574+
CommissioningStageComplete(CHIP_ERROR_INCORRECT_STATE, CommissioningDelegate::CommissioningReport());
2575+
return;
2576+
}
2577+
2578+
// Try to make sure we have at least enough time for our expected
2579+
// commissioning bits plus the MRP retries for a Sigma1.
2580+
uint16_t failSafeTimeoutSecs = params.GetFailsafeTimerSeconds().ValueOr(kDefaultFailsafeTimeout);
2581+
auto sigma1Timeout = CASESession::ComputeSigma1ResponseTimeout(commissioneeDevice->GetPairing().GetRemoteMRPConfig());
2582+
uint16_t sigma1TimeoutSecs = std::chrono::duration_cast<System::Clock::Seconds16>(sigma1Timeout).count();
2583+
if (UINT16_MAX - failSafeTimeoutSecs < sigma1TimeoutSecs)
2584+
{
2585+
failSafeTimeoutSecs = UINT16_MAX;
2586+
}
2587+
else
2588+
{
2589+
failSafeTimeoutSecs = static_cast<uint16_t>(failSafeTimeoutSecs + sigma1TimeoutSecs);
2590+
}
2591+
2592+
// A false return from ExtendArmFailSafe is fine; we don't want to make the
2593+
// fail-safe shorter here.
2594+
if (!ExtendArmFailSafe(commissioneeDevice, step, failSafeTimeoutSecs, MakeOptional(kMinimumCommissioningStepTimeout),
2595+
OnArmFailSafe, OnBasicFailure))
2596+
{
2597+
// Just move on to the next step.
2598+
CommissioningStageComplete(CHIP_NO_ERROR, CommissioningDelegate::CommissioningReport());
2599+
}
2600+
}
2601+
25252602
CHIP_ERROR DeviceController::GetCompressedFabricIdBytes(MutableByteSpan & outBytes) const
25262603
{
25272604
const auto * fabricInfo = GetFabricInfo();

src/controller/CHIPDeviceController.h

+10-2
Original file line numberDiff line numberDiff line change
@@ -683,8 +683,11 @@ class DLL_EXPORT DeviceCommissioner : public DeviceController,
683683
return mDefaultCommissioner == nullptr ? NullOptional : MakeOptional(mDefaultCommissioner->GetCommissioningParameters());
684684
}
685685

686-
// Reset the arm failsafe timer during commissioning.
687-
void ExtendArmFailSafe(DeviceProxy * proxy, CommissioningStage step, uint16_t armFailSafeTimeout,
686+
// Reset the arm failsafe timer during commissioning. If this returns
687+
// false, that means that the timer was already set for a longer time period
688+
// than the new time we are trying to set. In this case, neither
689+
// onSuccess nor onFailure will be called.
690+
bool ExtendArmFailSafe(DeviceProxy * proxy, CommissioningStage step, uint16_t armFailSafeTimeout,
688691
Optional<System::Clock::Timeout> commandTimeout, OnExtendFailsafeSuccess onSuccess,
689692
OnExtendFailsafeFailure onFailure);
690693

@@ -900,6 +903,11 @@ class DLL_EXPORT DeviceCommissioner : public DeviceController,
900903
// to the PairingDelegate upon arm failsafe command completion.
901904
void CleanupCommissioning(DeviceProxy * proxy, NodeId nodeId, const CompletionStatus & completionStatus);
902905

906+
// Extend the fail-safe before trying to do network-enable (since after that
907+
// point, for non-concurrent-commissioning devices, we may not have a way to
908+
// extend it).
909+
void ExtendFailsafeBeforeNetworkEnable(DeviceProxy * device, CommissioningParameters & params, CommissioningStage step);
910+
903911
chip::Callback::Callback<OnDeviceConnected> mOnDeviceConnectedCallback;
904912
chip::Callback::Callback<OnDeviceConnectionFailure> mOnDeviceConnectionFailureCallback;
905913
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES

src/controller/CommissioneeDeviceProxy.cpp

+1-3
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,9 @@ CHIP_ERROR CommissioneeDeviceProxy::UpdateDeviceData(const Transport::PeerAddres
7171
{
7272
mDeviceAddress = addr;
7373

74-
mRemoteMRPConfig = config;
75-
7674
// Initialize PASE session state with any MRP parameters that DNS-SD has provided.
7775
// It can be overridden by PASE session protocol messages that include MRP parameters.
78-
mPairing.SetRemoteMRPConfig(mRemoteMRPConfig);
76+
mPairing.SetRemoteMRPConfig(config);
7977

8078
if (!mSecureSession)
8179
{

src/controller/CommissioningDelegate.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,14 @@ const char * StageToString(CommissioningStage stage)
9393
return "ThreadNetworkSetup";
9494
break;
9595

96+
case kFailsafeBeforeWiFiEnable:
97+
return "FailsafeBeforeWiFiEnable";
98+
break;
99+
100+
case kFailsafeBeforeThreadEnable:
101+
return "FailsafeBeforeThreadEnable";
102+
break;
103+
96104
case kWiFiNetworkEnable:
97105
return "WiFiNetworkEnable";
98106
break;

src/controller/CommissioningDelegate.h

+22-20
Original file line numberDiff line numberDiff line change
@@ -32,26 +32,28 @@ class DeviceCommissioner;
3232
enum CommissioningStage : uint8_t
3333
{
3434
kError,
35-
kSecurePairing, ///< Establish a PASE session with the device
36-
kReadCommissioningInfo, ///< Query General Commissioning Attributes and Network Features
37-
kArmFailsafe, ///< Send ArmFailSafe (0x30:0) command to the device
38-
kConfigRegulatory, ///< Send SetRegulatoryConfig (0x30:2) command to the device
39-
kSendPAICertificateRequest, ///< Send PAI CertificateChainRequest (0x3E:2) command to the device
40-
kSendDACCertificateRequest, ///< Send DAC CertificateChainRequest (0x3E:2) command to the device
41-
kSendAttestationRequest, ///< Send AttestationRequest (0x3E:0) command to the device
42-
kAttestationVerification, ///< Verify AttestationResponse (0x3E:1) validity
43-
kSendOpCertSigningRequest, ///< Send CSRRequest (0x3E:4) command to the device
44-
kValidateCSR, ///< Verify CSRResponse (0x3E:5) validity
45-
kGenerateNOCChain, ///< TLV encode Node Operational Credentials (NOC) chain certs
46-
kSendTrustedRootCert, ///< Send AddTrustedRootCertificate (0x3E:11) command to the device
47-
kSendNOC, ///< Send AddNOC (0x3E:6) command to the device
48-
kWiFiNetworkSetup, ///< Send AddOrUpdateWiFiNetwork (0x31:2) command to the device
49-
kThreadNetworkSetup, ///< Send AddOrUpdateThreadNetwork (0x31:3) command to the device
50-
kWiFiNetworkEnable, ///< Send ConnectNetwork (0x31:6) command to the device for the WiFi network
51-
kThreadNetworkEnable, ///< Send ConnectNetwork (0x31:6) command to the device for the Thread network
52-
kFindOperational, ///< Perform operational discovery and establish a CASE session with the device
53-
kSendComplete, ///< Send CommissioningComplete (0x30:4) command to the device
54-
kCleanup, ///< Call delegates with status, free memory, clear timers and state
35+
kSecurePairing, ///< Establish a PASE session with the device
36+
kReadCommissioningInfo, ///< Query General Commissioning Attributes and Network Features
37+
kArmFailsafe, ///< Send ArmFailSafe (0x30:0) command to the device
38+
kConfigRegulatory, ///< Send SetRegulatoryConfig (0x30:2) command to the device
39+
kSendPAICertificateRequest, ///< Send PAI CertificateChainRequest (0x3E:2) command to the device
40+
kSendDACCertificateRequest, ///< Send DAC CertificateChainRequest (0x3E:2) command to the device
41+
kSendAttestationRequest, ///< Send AttestationRequest (0x3E:0) command to the device
42+
kAttestationVerification, ///< Verify AttestationResponse (0x3E:1) validity
43+
kSendOpCertSigningRequest, ///< Send CSRRequest (0x3E:4) command to the device
44+
kValidateCSR, ///< Verify CSRResponse (0x3E:5) validity
45+
kGenerateNOCChain, ///< TLV encode Node Operational Credentials (NOC) chain certs
46+
kSendTrustedRootCert, ///< Send AddTrustedRootCertificate (0x3E:11) command to the device
47+
kSendNOC, ///< Send AddNOC (0x3E:6) command to the device
48+
kWiFiNetworkSetup, ///< Send AddOrUpdateWiFiNetwork (0x31:2) command to the device
49+
kThreadNetworkSetup, ///< Send AddOrUpdateThreadNetwork (0x31:3) command to the device
50+
kFailsafeBeforeWiFiEnable, ///< Extend the fail-safe before doing kWiFiNetworkEnable
51+
kFailsafeBeforeThreadEnable, ///< Extend the fail-safe before doing kThreadNetworkEnable
52+
kWiFiNetworkEnable, ///< Send ConnectNetwork (0x31:6) command to the device for the WiFi network
53+
kThreadNetworkEnable, ///< Send ConnectNetwork (0x31:6) command to the device for the Thread network
54+
kFindOperational, ///< Perform operational discovery and establish a CASE session with the device
55+
kSendComplete, ///< Send CommissioningComplete (0x30:4) command to the device
56+
kCleanup, ///< Call delegates with status, free memory, clear timers and state
5557
/// Send ScanNetworks (0x31:0) command to the device.
5658
/// ScanNetworks can happen anytime after kArmFailsafe.
5759
/// However, the cirque tests fail if it is earlier in the list

src/controller/python/OpCredsBinding.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -297,12 +297,14 @@ class TestCommissioner : public chip::Controller::AutoCommissioner
297297
{
298298
if (!mIsWifi &&
299299
(stage == chip::Controller::CommissioningStage::kWiFiNetworkEnable ||
300+
stage == chip::Controller::CommissioningStage::kFailsafeBeforeWiFiEnable ||
300301
stage == chip::Controller::CommissioningStage::kWiFiNetworkSetup))
301302
{
302303
return false;
303304
}
304305
if (!mIsThread &&
305306
(stage == chip::Controller::CommissioningStage::kThreadNetworkEnable ||
307+
stage == chip::Controller::CommissioningStage::kFailsafeBeforeThreadEnable ||
306308
stage == chip::Controller::CommissioningStage::kThreadNetworkSetup))
307309
{
308310
return false;

src/controller/python/test/test_scripts/commissioning_failure_test.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def main():
9696

9797
# TODO: Start at stage 2 once handling for arming failsafe on pase is done.
9898
if options.report:
99-
for testFailureStage in range(3, 18):
99+
for testFailureStage in range(3, 20):
100100
FailIfNot(test.TestPaseOnly(ip=options.deviceAddress1,
101101
setuppin=20202021,
102102
nodeid=1),
@@ -105,7 +105,7 @@ def main():
105105
"Commissioning failure tests failed for simulated report failure on stage {}".format(testFailureStage))
106106

107107
else:
108-
for testFailureStage in range(3, 18):
108+
for testFailureStage in range(3, 20):
109109
FailIfNot(test.TestPaseOnly(ip=options.deviceAddress1,
110110
setuppin=20202021,
111111
nodeid=1),

0 commit comments

Comments
 (0)