Skip to content

Commit 443bb9c

Browse files
[Metrics] Metrics in core commissioning phase (#33247)
* [Metrics] Metrics in core commissioning phase - Added metrics tracing in core parts of the commissionning flow: - PASE - OperationalSessionSetup - Different stages of commissioning * Fixing code review comments * Fixed a typo * Fixed Code Review Comment * Update src/app/OperationalSessionSetup.cpp Co-authored-by: Boris Zbarsky <bzbarsky@apple.com> * Resytler fixes --------- Co-authored-by: Boris Zbarsky <bzbarsky@apple.com>
1 parent 320f83e commit 443bb9c

9 files changed

+268
-41
lines changed

src/app/OperationalSessionSetup.cpp

+22
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,13 @@
3838
#include <lib/support/logging/CHIPLogging.h>
3939
#include <system/SystemClock.h>
4040
#include <system/SystemLayer.h>
41+
#include <tracing/metric_event.h>
4142

4243
using namespace chip::Callback;
4344
using chip::AddressResolve::NodeLookupRequest;
4445
using chip::AddressResolve::Resolver;
4546
using chip::AddressResolve::ResolveResult;
47+
using namespace chip::Tracing;
4648

4749
namespace chip {
4850

@@ -305,9 +307,11 @@ CHIP_ERROR OperationalSessionSetup::EstablishConnection(const ReliableMessagePro
305307
mCASEClient = mClientPool->Allocate();
306308
ReturnErrorCodeIf(mCASEClient == nullptr, CHIP_ERROR_NO_MEMORY);
307309

310+
MATTER_LOG_METRIC_BEGIN(kMetricDeviceCASESession);
308311
CHIP_ERROR err = mCASEClient->EstablishSession(mInitParams, mPeerId, mDeviceAddress, config, this);
309312
if (err != CHIP_NO_ERROR)
310313
{
314+
MATTER_LOG_METRIC_END(kMetricDeviceCASESession, err);
311315
CleanupCASEClient();
312316
return err;
313317
}
@@ -503,6 +507,10 @@ void OperationalSessionSetup::OnSessionEstablishmentError(CHIP_ERROR error, Sess
503507
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
504508
}
505509

510+
// Session failed to be established. This is when discovery is also stopped
511+
MATTER_LOG_METRIC_END(kMetricDeviceOperationalDiscovery, error);
512+
MATTER_LOG_METRIC_END(kMetricDeviceCASESession, error);
513+
506514
DequeueConnectionCallbacks(error, stage);
507515
// Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
508516
}
@@ -521,6 +529,11 @@ void OperationalSessionSetup::OnSessionEstablished(const SessionHandle & session
521529
VerifyOrReturn(mState == State::Connecting,
522530
ChipLogError(Discovery, "OnSessionEstablished was called while we were not connecting"));
523531

532+
// Session has been established. This is when discovery is also stopped
533+
MATTER_LOG_METRIC_END(kMetricDeviceOperationalDiscovery, CHIP_NO_ERROR);
534+
535+
MATTER_LOG_METRIC_END(kMetricDeviceCASESession, CHIP_NO_ERROR);
536+
524537
if (!mSecureSession.Grab(session))
525538
{
526539
// Got an invalid session, just dispatch an error. We have to do this
@@ -591,6 +604,7 @@ CHIP_ERROR OperationalSessionSetup::LookupPeerAddress()
591604
{
592605
--mResolveAttemptsAllowed;
593606
}
607+
MATTER_LOG_METRIC(kMetricDeviceOperationalDiscoveryAttemptCount, mAttemptsDone);
594608
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
595609

596610
// NOTE: This is public API that can be used to update our stored peer
@@ -605,6 +619,10 @@ CHIP_ERROR OperationalSessionSetup::LookupPeerAddress()
605619
return CHIP_NO_ERROR;
606620
}
607621

622+
// This code can be reached multiple times, if we discover multiple addresses or do retries.
623+
// The metric backend can handle this and always picks the earliest occurrence as the start of the event.
624+
MATTER_LOG_METRIC_BEGIN(kMetricDeviceOperationalDiscovery);
625+
608626
auto const * fabricInfo = mInitParams.fabricTable->FindFabricWithIndex(mPeerId.GetFabricIndex());
609627
VerifyOrReturnError(fabricInfo != nullptr, CHIP_ERROR_INVALID_FABRIC_INDEX);
610628

@@ -676,6 +694,8 @@ void OperationalSessionSetup::OnNodeAddressResolutionFailed(const PeerId & peerI
676694
--mAttemptsDone;
677695
}
678696

697+
MATTER_LOG_METRIC(kMetricDeviceOperationalDiscoveryAttemptCount, mAttemptsDone);
698+
679699
CHIP_ERROR err = LookupPeerAddress();
680700
if (err == CHIP_NO_ERROR)
681701
{
@@ -690,6 +710,8 @@ void OperationalSessionSetup::OnNodeAddressResolutionFailed(const PeerId & peerI
690710
}
691711
#endif
692712

713+
MATTER_LOG_METRIC_END(kMetricDeviceOperationalDiscovery, reason);
714+
693715
// No need to modify any variables in `this` since call below releases `this`.
694716
DequeueConnectionCallbacks(reason);
695717
// Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.

src/controller/CHIPDeviceController.cpp

+34-6
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
#include <protocols/secure_channel/MessageCounterManager.h>
6060
#include <setup_payload/QRCodeSetupPayloadParser.h>
6161
#include <tracing/macros.h>
62+
#include <tracing/metric_event.h>
6263

6364
#if CONFIG_NETWORK_LAYER_BLE
6465
#include <ble/Ble.h>
@@ -79,6 +80,7 @@ using namespace chip::Transport;
7980
using namespace chip::Credentials;
8081
using namespace chip::app::Clusters;
8182
using namespace chip::Crypto;
83+
using namespace chip::Tracing;
8284

8385
namespace chip {
8486
namespace Controller {
@@ -666,16 +668,20 @@ CHIP_ERROR DeviceCommissioner::PairDevice(NodeId remoteDeviceId, const char * se
666668
CHIP_ERROR DeviceCommissioner::PairDevice(NodeId remoteDeviceId, RendezvousParameters & params)
667669
{
668670
MATTER_TRACE_SCOPE("PairDevice", "DeviceCommissioner");
669-
ReturnErrorOnFailure(EstablishPASEConnection(remoteDeviceId, params));
670-
return Commission(remoteDeviceId);
671+
ReturnErrorOnFailureWithMetric(kMetricDeviceCommissionerCommission, EstablishPASEConnection(remoteDeviceId, params));
672+
auto errorCode = Commission(remoteDeviceId);
673+
VerifyOrDoWithMetric(kMetricDeviceCommissionerCommission, CHIP_NO_ERROR == errorCode, errorCode);
674+
return errorCode;
671675
}
672676

673677
CHIP_ERROR DeviceCommissioner::PairDevice(NodeId remoteDeviceId, RendezvousParameters & rendezvousParams,
674678
CommissioningParameters & commissioningParams)
675679
{
676680
MATTER_TRACE_SCOPE("PairDevice", "DeviceCommissioner");
677-
ReturnErrorOnFailure(EstablishPASEConnection(remoteDeviceId, rendezvousParams));
678-
return Commission(remoteDeviceId, commissioningParams);
681+
ReturnErrorOnFailureWithMetric(kMetricDeviceCommissionerCommission, EstablishPASEConnection(remoteDeviceId, rendezvousParams));
682+
auto errorCode = Commission(remoteDeviceId, commissioningParams);
683+
VerifyOrDoWithMetric(kMetricDeviceCommissionerCommission, CHIP_NO_ERROR == errorCode, errorCode);
684+
return errorCode;
679685
}
680686

681687
CHIP_ERROR DeviceCommissioner::EstablishPASEConnection(NodeId remoteDeviceId, const char * setUpCode, DiscoveryType discoveryType,
@@ -689,6 +695,8 @@ CHIP_ERROR DeviceCommissioner::EstablishPASEConnection(NodeId remoteDeviceId, co
689695
CHIP_ERROR DeviceCommissioner::EstablishPASEConnection(NodeId remoteDeviceId, RendezvousParameters & params)
690696
{
691697
MATTER_TRACE_SCOPE("EstablishPASEConnection", "DeviceCommissioner");
698+
MATTER_LOG_METRIC_BEGIN(kMetricDeviceCommissionerPASESession);
699+
692700
CHIP_ERROR err = CHIP_NO_ERROR;
693701
CommissioneeDeviceProxy * device = nullptr;
694702
CommissioneeDeviceProxy * current = nullptr;
@@ -736,6 +744,7 @@ CHIP_ERROR DeviceCommissioner::EstablishPASEConnection(NodeId remoteDeviceId, Re
736744
// We already have an open secure session to this device, call the callback immediately and early return.
737745
mPairingDelegate->OnPairingComplete(CHIP_NO_ERROR);
738746
}
747+
MATTER_LOG_METRIC_END(kMetricDeviceCommissionerPASESession, CHIP_NO_ERROR);
739748
return CHIP_NO_ERROR;
740749
}
741750
if (current->IsSessionSetupInProgress())
@@ -818,6 +827,7 @@ CHIP_ERROR DeviceCommissioner::EstablishPASEConnection(NodeId remoteDeviceId, Re
818827
{
819828
ReleaseCommissioneeDevice(device);
820829
}
830+
MATTER_LOG_METRIC_END(kMetricDeviceCommissionerPASESession, err);
821831
}
822832

823833
return err;
@@ -869,8 +879,10 @@ CHIP_ERROR DeviceCommissioner::Commission(NodeId remoteDeviceId, CommissioningPa
869879
ChipLogError(Controller, "No default commissioner is specified");
870880
return CHIP_ERROR_INCORRECT_STATE;
871881
}
872-
ReturnErrorOnFailure(mDefaultCommissioner->SetCommissioningParameters(params));
873-
return Commission(remoteDeviceId);
882+
ReturnErrorOnFailureWithMetric(kMetricDeviceCommissionerCommission, mDefaultCommissioner->SetCommissioningParameters(params));
883+
auto errorCode = Commission(remoteDeviceId);
884+
VerifyOrDoWithMetric(kMetricDeviceCommissionerCommission, CHIP_NO_ERROR == errorCode, errorCode);
885+
return errorCode;
874886
}
875887

876888
CHIP_ERROR DeviceCommissioner::Commission(NodeId remoteDeviceId)
@@ -908,6 +920,7 @@ CHIP_ERROR DeviceCommissioner::Commission(NodeId remoteDeviceId)
908920
mDefaultCommissioner->SetOperationalCredentialsDelegate(mOperationalCredentialsDelegate);
909921
if (device->IsSecureConnected())
910922
{
923+
MATTER_LOG_METRIC_BEGIN(kMetricDeviceCommissionerCommission);
911924
mDefaultCommissioner->StartCommissioning(this, device);
912925
}
913926
else
@@ -1058,6 +1071,8 @@ void DeviceCommissioner::RendezvousCleanup(CHIP_ERROR status)
10581071

10591072
void DeviceCommissioner::OnSessionEstablishmentError(CHIP_ERROR err)
10601073
{
1074+
MATTER_LOG_METRIC_END(kMetricDeviceCommissionerPASESession, err);
1075+
10611076
if (mPairingDelegate != nullptr)
10621077
{
10631078
mPairingDelegate->OnStatusUpdate(DevicePairingDelegate::SecurePairingFailed);
@@ -1086,6 +1101,7 @@ void DeviceCommissioner::OnSessionEstablished(const SessionHandle & session)
10861101

10871102
ChipLogDetail(Controller, "Remote device completed SPAKE2+ handshake");
10881103

1104+
MATTER_LOG_METRIC_END(kMetricDeviceCommissionerPASESession, CHIP_NO_ERROR);
10891105
if (mPairingDelegate != nullptr)
10901106
{
10911107
mPairingDelegate->OnPairingComplete(CHIP_NO_ERROR);
@@ -1094,6 +1110,7 @@ void DeviceCommissioner::OnSessionEstablished(const SessionHandle & session)
10941110
if (mRunCommissioningAfterConnection)
10951111
{
10961112
mRunCommissioningAfterConnection = false;
1113+
MATTER_LOG_METRIC_BEGIN(kMetricDeviceCommissionerCommission);
10971114
mDefaultCommissioner->StartCommissioning(this, device);
10981115
}
10991116
}
@@ -1916,6 +1933,8 @@ void DeviceCommissioner::CleanupDoneAfterError()
19161933

19171934
void DeviceCommissioner::SendCommissioningCompleteCallbacks(NodeId nodeId, const CompletionStatus & completionStatus)
19181935
{
1936+
MATTER_LOG_METRIC_END(kMetricDeviceCommissionerCommission, completionStatus.err);
1937+
19191938
ChipLogProgress(Controller, "Commissioning complete for node ID 0x" ChipLogFormatX64 ": %s", ChipLogValueX64(nodeId),
19201939
(completionStatus.err == CHIP_NO_ERROR ? "success" : completionStatus.err.AsString()));
19211940
mCommissioningStage = CommissioningStage::kSecurePairing;
@@ -1924,6 +1943,7 @@ void DeviceCommissioner::SendCommissioningCompleteCallbacks(NodeId nodeId, const
19241943
{
19251944
return;
19261945
}
1946+
19271947
mPairingDelegate->OnCommissioningComplete(nodeId, completionStatus.err);
19281948
PeerId peerId(GetCompressedFabricId(), nodeId);
19291949
if (completionStatus.err == CHIP_NO_ERROR)
@@ -1943,6 +1963,7 @@ void DeviceCommissioner::CommissioningStageComplete(CHIP_ERROR err, Commissionin
19431963
{
19441964
// Once this stage is complete, reset mDeviceBeingCommissioned - this will be reset when the delegate calls the next step.
19451965
MATTER_TRACE_SCOPE("CommissioningStageComplete", "DeviceCommissioner");
1966+
MATTER_LOG_METRIC_END(MetricKeyForCommissioningStage(mCommissioningStage), err);
19461967
VerifyOrDie(mDeviceBeingCommissioned);
19471968

19481969
NodeId nodeId = mDeviceBeingCommissioned->GetDeviceId();
@@ -1978,6 +1999,7 @@ void DeviceCommissioner::OnDeviceConnectedFn(void * context, Messaging::Exchange
19781999
const SessionHandle & sessionHandle)
19792000
{
19802001
// CASE session established.
2002+
MATTER_LOG_METRIC_END(kMetricDeviceCommissioningOperationalSetup, CHIP_NO_ERROR);
19812003
DeviceCommissioner * commissioner = static_cast<DeviceCommissioner *>(context);
19822004
VerifyOrDie(commissioner->mCommissioningStage == CommissioningStage::kFindOperationalForStayActive ||
19832005
commissioner->mCommissioningStage == CommissioningStage::kFindOperationalForCommissioningComplete);
@@ -1992,6 +2014,7 @@ void DeviceCommissioner::OnDeviceConnectedFn(void * context, Messaging::Exchange
19922014
void DeviceCommissioner::OnDeviceConnectionFailureFn(void * context, const ScopedNodeId & peerId, CHIP_ERROR error)
19932015
{
19942016
// CASE session establishment failed.
2017+
MATTER_LOG_METRIC_END(kMetricDeviceCommissioningOperationalSetup, error);
19952018
DeviceCommissioner * commissioner = static_cast<DeviceCommissioner *>(context);
19962019
VerifyOrDie(commissioner->mCommissioningStage == CommissioningStage::kFindOperationalForStayActive ||
19972020
commissioner->mCommissioningStage == CommissioningStage::kFindOperationalForCommissioningComplete);
@@ -2748,7 +2771,11 @@ void DeviceCommissioner::SendCommissioningReadRequest(DeviceProxy * proxy, Optio
27482771
void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, CommissioningStage step, CommissioningParameters & params,
27492772
CommissioningDelegate * delegate, EndpointId endpoint,
27502773
Optional<System::Clock::Timeout> timeout)
2774+
27512775
{
2776+
MATTER_LOG_METRIC(kMetricDeviceCommissionerCommissionStage, step);
2777+
MATTER_LOG_METRIC_BEGIN(MetricKeyForCommissioningStage(step));
2778+
27522779
if (params.GetCompletionStatus().err == CHIP_NO_ERROR)
27532780
{
27542781
ChipLogProgress(Controller, "Performing next commissioning step '%s'", StageToString(step));
@@ -3385,6 +3412,7 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio
33853412
case CommissioningStage::kFindOperationalForCommissioningComplete: {
33863413
// If there is an error, CommissioningStageComplete will be called from OnDeviceConnectionFailureFn.
33873414
auto scopedPeerId = GetPeerScopedId(proxy->GetDeviceId());
3415+
MATTER_LOG_METRIC_BEGIN(kMetricDeviceCommissioningOperationalSetup);
33883416
mSystemState->CASESessionMgr()->FindOrEstablishSession(scopedPeerId, &mOnDeviceConnectedCallback,
33893417
&mOnDeviceConnectionFailureCallback
33903418
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES

src/controller/CommissioningDelegate.cpp

+119
Original file line numberDiff line numberDiff line change
@@ -141,5 +141,124 @@ const char * StageToString(CommissioningStage stage)
141141
}
142142
}
143143

144+
#if MATTER_TRACING_ENABLED
145+
const char * MetricKeyForCommissioningStage(CommissioningStage stage)
146+
{
147+
switch (stage)
148+
{
149+
case kError:
150+
return "core_commissioning_stage_error";
151+
152+
case kSecurePairing:
153+
return "core_commissioning_stage_secure_pairing";
154+
155+
case kReadCommissioningInfo:
156+
return "core_commissioning_stage_read_commissioning_info";
157+
158+
case kReadCommissioningInfo2:
159+
return "core_commissioning_stage_read_commissioning_info2";
160+
161+
case kArmFailsafe:
162+
return "core_commissioning_stage_arm_failsafe";
163+
164+
case kScanNetworks:
165+
return "core_commissioning_stage_scan_networks";
166+
167+
case kConfigRegulatory:
168+
return "core_commissioning_stage_config_regulatory";
169+
170+
case kConfigureUTCTime:
171+
return "core_commissioning_stage_configure_utc_time";
172+
173+
case kConfigureTimeZone:
174+
return "core_commissioning_stage_configure_timezone";
175+
176+
case kConfigureDSTOffset:
177+
return "core_commissioning_stage_configure_dst_offset";
178+
179+
case kConfigureDefaultNTP:
180+
return "core_commissioning_stage_configure_default_ntp";
181+
182+
case kSendPAICertificateRequest:
183+
return "core_commissioning_stage_send_pai_certificate_request";
184+
185+
case kSendDACCertificateRequest:
186+
return "core_commissioning_stage_send_dac_certificate_request";
187+
188+
case kSendAttestationRequest:
189+
return "core_commissioning_stage_send_attestation_request";
190+
191+
case kAttestationVerification:
192+
return "core_commissioning_stage_attestation_verification";
193+
194+
case kSendOpCertSigningRequest:
195+
return "core_commissioning_stage_opcert_signing_request";
196+
197+
case kValidateCSR:
198+
return "core_commissioning_stage_validate_csr";
199+
200+
case kGenerateNOCChain:
201+
return "core_commissioning_stage_generate_noc_chain";
202+
203+
case kSendTrustedRootCert:
204+
return "core_commissioning_stage_send_trusted_root_cert";
205+
206+
case kSendNOC:
207+
return "core_commissioning_stage_send_noc";
208+
209+
case kConfigureTrustedTimeSource:
210+
return "core_commissioning_stage_configure_trusted_time_source";
211+
212+
case kICDGetRegistrationInfo:
213+
return "core_commissioning_stage_icd_get_registration_info";
214+
215+
case kICDRegistration:
216+
return "core_commissioning_stage_icd_registration";
217+
218+
case kWiFiNetworkSetup:
219+
return "core_commissioning_stage_wifi_network_setup";
220+
221+
case kThreadNetworkSetup:
222+
return "core_commissioning_stage_thread_network_setup";
223+
224+
case kFailsafeBeforeWiFiEnable:
225+
return "core_commissioning_stage_failsafe_before_wifi_enable";
226+
227+
case kFailsafeBeforeThreadEnable:
228+
return "core_commissioning_stage_failsafe_before_thread_enable";
229+
230+
case kWiFiNetworkEnable:
231+
return "core_commissioning_stage_wifi_network_enable";
232+
233+
case kThreadNetworkEnable:
234+
return "core_commissioning_stage_thread_network_enable";
235+
236+
case kEvictPreviousCaseSessions:
237+
return "core_commissioning_stage_evict_previous_case_sessions";
238+
239+
case kFindOperationalForStayActive:
240+
return "core_commissioning_stage_find_operational_for_stay_active";
241+
242+
case kFindOperationalForCommissioningComplete:
243+
return "core_commissioning_stage_find_operational_for_commissioning_complete";
244+
245+
case kICDSendStayActive:
246+
return "core_commissioning_stage_icd_send_stay_active";
247+
248+
case kSendComplete:
249+
return "core_commissioning_stage_send_complete";
250+
251+
case kCleanup:
252+
return "core_commissioning_stage_cleanup";
253+
254+
case kNeedsNetworkCreds:
255+
return "core_commissioning_stage_need_network_creds";
256+
257+
default:
258+
return "core_commissioning_stage_unknown";
259+
}
260+
}
261+
#endif
262+
144263
} // namespace Controller
145264
} // namespace chip

0 commit comments

Comments
 (0)