Skip to content

Commit e897696

Browse files
anush-applebzbarsky-apple
authored andcommitted
[Metrics] Added additional metrics for CASE and Subscription (project-chip#33934)
* [Metrics] Added additional metrics for CASE and Subscription - CASE session tracking - Subscription Setup - MRP Retries * Fixed code review comment * Restyler fixes * Update src/darwin/Framework/CHIP/MTRDevice.mm Co-authored-by: Boris Zbarsky <bzbarsky@apple.com> * Addressed code review comments * Update src/darwin/Framework/CHIP/MTRMetricKeys.h Co-authored-by: Boris Zbarsky <bzbarsky@apple.com> * Fixed build failure when tracing is off * Resytle fixes --------- Co-authored-by: Boris Zbarsky <bzbarsky@apple.com>
1 parent 3e3aa71 commit e897696

File tree

6 files changed

+114
-14
lines changed

6 files changed

+114
-14
lines changed

src/app/ReadClient.cpp

+16-1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include <messaging/ReliableMessageMgr.h>
3434
#include <messaging/ReliableMessageProtocolConfig.h>
3535
#include <platform/LockTracker.h>
36+
#include <tracing/metric_event.h>
3637

3738
#include <app-common/zap-generated/cluster-objects.h>
3839
#include <app-common/zap-generated/ids/Attributes.h>
@@ -186,6 +187,11 @@ void ReadClient::Close(CHIP_ERROR aError, bool allowResubscription)
186187
}
187188
else
188189
{
190+
if (IsAwaitingInitialReport() || IsAwaitingSubscribeResponse())
191+
{
192+
MATTER_LOG_METRIC_END(Tracing::kMetricDeviceSubscriptionSetup, aError);
193+
}
194+
189195
ClearActiveSubscriptionState();
190196
if (aError != CHIP_NO_ERROR)
191197
{
@@ -491,6 +497,7 @@ CHIP_ERROR ReadClient::OnMessageReceived(Messaging::ExchangeContext * apExchange
491497
ChipLogProgress(DataManagement, "SubscribeResponse is received");
492498
VerifyOrExit(apExchangeContext == mExchange.Get(), err = CHIP_ERROR_INCORRECT_STATE);
493499
err = ProcessSubscribeResponse(std::move(aPayload));
500+
MATTER_LOG_METRIC_END(Tracing::kMetricDeviceSubscriptionSetup, err);
494501
}
495502
else if (aPayloadHeader.HasMessageType(Protocols::InteractionModel::MsgType::StatusResponse))
496503
{
@@ -684,6 +691,7 @@ void ReadClient::OnResponseTimeout(Messaging::ExchangeContext * apExchangeContex
684691
{
685692
ChipLogError(DataManagement, "Time out! failed to receive report data from Exchange: " ChipLogFormatExchange,
686693
ChipLogValueExchange(apExchangeContext));
694+
687695
Close(CHIP_ERROR_TIMEOUT);
688696
}
689697

@@ -1096,11 +1104,18 @@ CHIP_ERROR ReadClient::SendSubscribeRequest(const ReadPrepareParams & aReadPrepa
10961104
VerifyOrReturnError(aReadPrepareParams.mMinIntervalFloorSeconds <= aReadPrepareParams.mMaxIntervalCeilingSeconds,
10971105
CHIP_ERROR_INVALID_ARGUMENT);
10981106

1099-
return SendSubscribeRequestImpl(aReadPrepareParams);
1107+
auto err = SendSubscribeRequestImpl(aReadPrepareParams);
1108+
if (CHIP_NO_ERROR != err)
1109+
{
1110+
MATTER_LOG_METRIC_END(Tracing::kMetricDeviceSubscriptionSetup, err);
1111+
}
1112+
return err;
11001113
}
11011114

11021115
CHIP_ERROR ReadClient::SendSubscribeRequestImpl(const ReadPrepareParams & aReadPrepareParams)
11031116
{
1117+
MATTER_LOG_METRIC_BEGIN(Tracing::kMetricDeviceSubscriptionSetup);
1118+
11041119
VerifyOrReturnError(ClientState::Idle == mState, CHIP_ERROR_INCORRECT_STATE);
11051120

11061121
if (&aReadPrepareParams != &mReadPrepareParams)

src/darwin/Framework/CHIP/MTRDevice.mm

+6
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ - (void)_deviceInternalStateChanged:(MTRDevice *)device;
160160
using namespace chip;
161161
using namespace chip::app;
162162
using namespace chip::Protocols::InteractionModel;
163+
using namespace chip::Tracing::DarwinFramework;
163164

164165
typedef void (^FirstReportHandler)(void);
165166

@@ -1184,6 +1185,7 @@ - (void)_handleSubscriptionEstablished
11841185
if (HadSubscriptionEstablishedOnce(_internalDeviceState)) {
11851186
[self _changeInternalState:MTRInternalDeviceStateLaterSubscriptionEstablished];
11861187
} else {
1188+
MATTER_LOG_METRIC_END(kMetricMTRDeviceInitialSubscriptionSetup, CHIP_NO_ERROR);
11871189
[self _changeInternalState:MTRInternalDeviceStateInitialSubscriptionEstablished];
11881190
}
11891191

@@ -2340,6 +2342,10 @@ - (void)_setupSubscriptionWithReason:(NSString *)reason
23402342
});
23412343
}
23422344

2345+
// This marks begin of initial subscription to the device (before CASE is established). The end is only marked after successfully setting
2346+
// up the subscription since it is always retried as long as the MTRDevice is kept running.
2347+
MATTER_LOG_METRIC_BEGIN(kMetricMTRDeviceInitialSubscriptionSetup);
2348+
23432349
// Call directlyGetSessionForNode because the subscription setup already goes through the subscription pool queue
23442350
[_deviceController
23452351
directlyGetSessionForNode:_nodeID.unsignedLongLongValue

src/darwin/Framework/CHIP/MTRMetricKeys.h

+4
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#include <lib/core/CHIPError.h>
1818
#include <tracing/metric_event.h>
19+
#include <tracing/metric_keys.h>
1920

2021
namespace chip {
2122
namespace Tracing {
@@ -87,6 +88,9 @@ constexpr Tracing::MetricKey kMetricBLEDevicesRemoved = "dwnfw_ble_devices_remov
8788
// Unexpected C quality attribute update outside of priming
8889
constexpr Tracing::MetricKey kMetricUnexpectedCQualityUpdate = "dwnpm_bad_c_attr_update";
8990

91+
// Setup from darwin MTRDevice for initial subscription to a device
92+
constexpr Tracing::MetricKey kMetricMTRDeviceInitialSubscriptionSetup = "dwnpm_dev_initial_subscription_setup";
93+
9094
} // namespace DarwinFramework
9195
} // namespace Tracing
9296
} // namespace chip

src/messaging/ReliableMessageMgr.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include <messaging/ReliableMessageContext.h>
3737
#include <messaging/ReliableMessageMgr.h>
3838
#include <platform/ConnectivityManager.h>
39+
#include <tracing/metric_event.h>
3940

4041
#if CHIP_CONFIG_ENABLE_ICD_SERVER
4142
#include <app/icd/server/ICDConfigurationData.h> // nogncheck
@@ -170,6 +171,7 @@ void ReliableMessageMgr::ExecuteActions()
170171
"Retransmitting MessageCounter:" ChipLogFormatMessageCounter " on exchange " ChipLogFormatExchange
171172
" Send Cnt %d",
172173
messageCounter, ChipLogValueExchange(&entry->ec.Get()), entry->sendCount);
174+
MATTER_LOG_METRIC(Tracing::kMetricDeviceRMPRetryCount, entry->sendCount);
173175

174176
CalculateNextRetransTime(*entry);
175177
SendFromRetransTable(entry);

src/protocols/secure_channel/CASESession.cpp

+62-13
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
#include <system/SystemClock.h>
4848
#include <system/TLVPacketBufferBackingStore.h>
4949
#include <tracing/macros.h>
50+
#include <tracing/metric_event.h>
5051
#include <transport/SessionManager.h>
5152

5253
namespace {
@@ -101,6 +102,7 @@ using namespace Credentials;
101102
using namespace Messaging;
102103
using namespace Encoding;
103104
using namespace Protocols::SecureChannel;
105+
using namespace Tracing;
104106

105107
constexpr uint8_t kKDFSR2Info[] = { 0x53, 0x69, 0x67, 0x6d, 0x61, 0x32 };
106108
constexpr uint8_t kKDFSR3Info[] = { 0x53, 0x69, 0x67, 0x6d, 0x61, 0x33 };
@@ -521,14 +523,15 @@ CHIP_ERROR CASESession::EstablishSession(SessionManager & sessionManager, Fabric
521523
CHIP_ERROR err = CHIP_NO_ERROR;
522524

523525
// Return early on error here, as we have not initialized any state yet
524-
ReturnErrorCodeIf(exchangeCtxt == nullptr, CHIP_ERROR_INVALID_ARGUMENT);
525-
ReturnErrorCodeIf(fabricTable == nullptr, CHIP_ERROR_INVALID_ARGUMENT);
526+
ReturnErrorCodeWithMetricIf(kMetricDeviceCASESession, exchangeCtxt == nullptr, CHIP_ERROR_INVALID_ARGUMENT);
527+
ReturnErrorCodeWithMetricIf(kMetricDeviceCASESession, fabricTable == nullptr, CHIP_ERROR_INVALID_ARGUMENT);
526528

527529
// Use FabricTable directly to avoid situation of dangling index from stale FabricInfo
528530
// until we factor-out any FabricInfo direct usage.
529-
ReturnErrorCodeIf(peerScopedNodeId.GetFabricIndex() == kUndefinedFabricIndex, CHIP_ERROR_INVALID_ARGUMENT);
531+
ReturnErrorCodeWithMetricIf(kMetricDeviceCASESession, peerScopedNodeId.GetFabricIndex() == kUndefinedFabricIndex,
532+
CHIP_ERROR_INVALID_ARGUMENT);
530533
const auto * fabricInfo = fabricTable->FindFabricWithIndex(peerScopedNodeId.GetFabricIndex());
531-
ReturnErrorCodeIf(fabricInfo == nullptr, CHIP_ERROR_INVALID_ARGUMENT);
534+
ReturnErrorCodeWithMetricIf(kMetricDeviceCASESession, fabricInfo == nullptr, CHIP_ERROR_INVALID_ARGUMENT);
532535

533536
err = Init(sessionManager, policy, delegate, peerScopedNodeId);
534537

@@ -542,9 +545,11 @@ CHIP_ERROR CASESession::EstablishSession(SessionManager & sessionManager, Fabric
542545

543546
// From here onwards, let's go to exit on error, as some state might have already
544547
// been initialized
545-
SuccessOrExit(err);
548+
SuccessOrExitWithMetric(kMetricDeviceCASESession, err);
546549

547-
SuccessOrExit(err = fabricTable->AddFabricDelegate(this));
550+
SuccessOrExitWithMetric(kMetricDeviceCASESession, err = fabricTable->AddFabricDelegate(this));
551+
552+
MATTER_LOG_METRIC_BEGIN(kMetricDeviceCASESession);
548553

549554
// Set the PeerAddress in the secure session up front to indicate the
550555
// Transport Type of the session that is being set up.
@@ -571,13 +576,16 @@ CHIP_ERROR CASESession::EstablishSession(SessionManager & sessionManager, Fabric
571576
}
572577
else
573578
{
579+
MATTER_LOG_METRIC_BEGIN(kMetricDeviceCASESessionSigma1);
574580
err = SendSigma1();
575581
SuccessOrExit(err);
576582
}
577583

578584
exit:
579585
if (err != CHIP_NO_ERROR)
580586
{
587+
MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma1, err);
588+
MATTER_LOG_METRIC_END(kMetricDeviceCASESession, err);
581589
Clear();
582590
}
583591
return err;
@@ -601,6 +609,7 @@ void CASESession::OnResponseTimeout(ExchangeContext * ec)
601609

602610
void CASESession::AbortPendingEstablish(CHIP_ERROR err)
603611
{
612+
MATTER_LOG_METRIC_END(kMetricDeviceCASESession, err);
604613
MATTER_TRACE_SCOPE("AbortPendingEstablish", "CASESession");
605614
// This needs to come before Clear() which will reset mState.
606615
SessionEstablishmentStage state = MapCASEStateToSessionEstablishmentStage(mState);
@@ -851,7 +860,17 @@ CHIP_ERROR CASESession::SendSigma1()
851860
ReturnErrorOnFailure(mExchangeCtxt.Value()->SendMessage(Protocols::SecureChannel::MsgType::CASE_Sigma1, std::move(msg_R1),
852861
SendFlags(SendMessageFlags::kExpectResponse)));
853862

854-
mState = resuming ? State::kSentSigma1Resume : State::kSentSigma1;
863+
if (resuming)
864+
{
865+
mState = State::kSentSigma1Resume;
866+
867+
// Flags that Resume is being attempted
868+
MATTER_LOG_METRIC(kMetricDeviceCASESessionSigma1Resume);
869+
}
870+
else
871+
{
872+
mState = State::kSentSigma1;
873+
}
855874

856875
ChipLogProgress(SecureChannel, "Sent Sigma1 msg");
857876

@@ -984,7 +1003,13 @@ CHIP_ERROR CASESession::HandleSigma1(System::PacketBufferHandle && msg)
9841003
std::copy(resumptionId.begin(), resumptionId.end(), mResumeResumptionId.begin());
9851004

9861005
// Send Sigma2Resume message to the initiator
987-
SuccessOrExit(err = SendSigma2Resume());
1006+
MATTER_LOG_METRIC_BEGIN(kMetricDeviceCASESessionSigma2Resume);
1007+
err = SendSigma2Resume();
1008+
if (CHIP_NO_ERROR != err)
1009+
{
1010+
MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma2Resume, err);
1011+
}
1012+
SuccessOrExit(err);
9881013

9891014
mDelegate->OnSessionEstablishmentStarted();
9901015

@@ -1013,7 +1038,13 @@ CHIP_ERROR CASESession::HandleSigma1(System::PacketBufferHandle && msg)
10131038
// mRemotePubKey.Length() == initiatorPubKey.size() == kP256_PublicKey_Length.
10141039
memcpy(mRemotePubKey.Bytes(), initiatorPubKey.data(), mRemotePubKey.Length());
10151040

1016-
SuccessOrExit(err = SendSigma2());
1041+
MATTER_LOG_METRIC_BEGIN(kMetricDeviceCASESessionSigma2);
1042+
err = SendSigma2();
1043+
if (CHIP_NO_ERROR != err)
1044+
{
1045+
MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma2, err);
1046+
}
1047+
SuccessOrExit(err);
10171048

10181049
mDelegate->OnSessionEstablishmentStarted();
10191050

@@ -1236,6 +1267,7 @@ CHIP_ERROR CASESession::HandleSigma2Resume(System::PacketBufferHandle && msg)
12361267

12371268
ChipLogDetail(SecureChannel, "Received Sigma2Resume msg");
12381269
MATTER_TRACE_COUNTER("Sigma2Resume");
1270+
MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma1, err);
12391271

12401272
uint8_t sigma2ResumeMIC[CHIP_CRYPTO_AEAD_MIC_LENGTH_BYTES];
12411273

@@ -1278,6 +1310,7 @@ CHIP_ERROR CASESession::HandleSigma2Resume(System::PacketBufferHandle && msg)
12781310
ChipLogError(SecureChannel, "Unable to save session resumption state: %" CHIP_ERROR_FORMAT, err2.Format());
12791311
}
12801312

1313+
MATTER_LOG_METRIC(kMetricDeviceCASESessionSigmaFinished);
12811314
SendStatusReport(mExchangeCtxt, kProtocolCodeSuccess);
12821315

12831316
mState = State::kFinishedViaResume;
@@ -1294,10 +1327,17 @@ CHIP_ERROR CASESession::HandleSigma2Resume(System::PacketBufferHandle && msg)
12941327
CHIP_ERROR CASESession::HandleSigma2_and_SendSigma3(System::PacketBufferHandle && msg)
12951328
{
12961329
MATTER_TRACE_SCOPE("HandleSigma2_and_SendSigma3", "CASESession");
1297-
ReturnErrorOnFailure(HandleSigma2(std::move(msg)));
1298-
ReturnErrorOnFailure(SendSigma3a());
1330+
CHIP_ERROR err = HandleSigma2(std::move(msg));
1331+
MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma1, err);
1332+
ReturnErrorOnFailure(err);
12991333

1300-
return CHIP_NO_ERROR;
1334+
MATTER_LOG_METRIC_BEGIN(kMetricDeviceCASESessionSigma3);
1335+
err = SendSigma3a();
1336+
if (CHIP_NO_ERROR != err)
1337+
{
1338+
MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma3, err);
1339+
}
1340+
return err;
13011341
}
13021342

13031343
CHIP_ERROR CASESession::HandleSigma2(System::PacketBufferHandle && msg)
@@ -1708,6 +1748,7 @@ CHIP_ERROR CASESession::HandleSigma3a(System::PacketBufferHandle && msg)
17081748

17091749
ChipLogProgress(SecureChannel, "Received Sigma3 msg");
17101750
MATTER_TRACE_COUNTER("Sigma3");
1751+
MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma2, err);
17111752

17121753
auto helper = WorkHelper<HandleSigma3Data>::Create(*this, &HandleSigma3b, &CASESession::HandleSigma3c);
17131754
VerifyOrExit(helper, err = CHIP_ERROR_NO_MEMORY);
@@ -1888,6 +1929,7 @@ CHIP_ERROR CASESession::HandleSigma3c(HandleSigma3Data & data, CHIP_ERROR status
18881929
}
18891930
}
18901931

1932+
MATTER_LOG_METRIC(kMetricDeviceCASESessionSigmaFinished);
18911933
SendStatusReport(mExchangeCtxt, kProtocolCodeSuccess);
18921934

18931935
mState = State::kFinished;
@@ -2288,6 +2330,7 @@ CHIP_ERROR CASESession::OnMessageReceived(ExchangeContext * ec, const PayloadHea
22882330

22892331
case MsgType::StatusReport:
22902332
err = HandleStatusReport(std::move(msg), /* successExpected*/ false);
2333+
MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma1, err);
22912334
break;
22922335

22932336
default:
@@ -2308,6 +2351,7 @@ CHIP_ERROR CASESession::OnMessageReceived(ExchangeContext * ec, const PayloadHea
23082351

23092352
case MsgType::StatusReport:
23102353
err = HandleStatusReport(std::move(msg), /* successExpected*/ false);
2354+
MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma1, err);
23112355
break;
23122356

23132357
default:
@@ -2324,6 +2368,7 @@ CHIP_ERROR CASESession::OnMessageReceived(ExchangeContext * ec, const PayloadHea
23242368

23252369
case MsgType::StatusReport:
23262370
err = HandleStatusReport(std::move(msg), /* successExpected*/ false);
2371+
MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma2, err);
23272372
break;
23282373

23292374
default:
@@ -2335,7 +2380,11 @@ CHIP_ERROR CASESession::OnMessageReceived(ExchangeContext * ec, const PayloadHea
23352380
case State::kSentSigma2Resume:
23362381
if (msgType == Protocols::SecureChannel::MsgType::StatusReport)
23372382
{
2338-
err = HandleStatusReport(std::move(msg), /* successExpected*/ true);
2383+
// Need to capture before invoking status report since 'this' might be deallocated on successful completion of sigma3
2384+
MetricKey key = (mState == State::kSentSigma3) ? kMetricDeviceCASESessionSigma3 : kMetricDeviceCASESessionSigma2Resume;
2385+
err = HandleStatusReport(std::move(msg), /* successExpected*/ true);
2386+
MATTER_LOG_METRIC_END(key, err);
2387+
IgnoreUnusedVariable(key);
23392388
}
23402389
break;
23412390
default:

src/tracing/metric_keys.h

+24
Original file line numberDiff line numberDiff line change
@@ -54,5 +54,29 @@ constexpr MetricKey kMetricDeviceOperationalDiscoveryAttemptCount = "core_dev_op
5454
// CASE Session
5555
constexpr MetricKey kMetricDeviceCASESession = "core_dev_case_session";
5656

57+
// CASE Session Sigma1
58+
constexpr MetricKey kMetricDeviceCASESessionSigma1 = "core_dev_case_session_sigma1";
59+
60+
// CASE Session Sigma1Resume
61+
constexpr MetricKey kMetricDeviceCASESessionSigma1Resume = "core_dev_case_session_sigma1_resume";
62+
63+
// CASE Session Sigma2
64+
constexpr MetricKey kMetricDeviceCASESessionSigma2 = "core_dev_case_session_sigma2";
65+
66+
// CASE Session Sigma3
67+
constexpr MetricKey kMetricDeviceCASESessionSigma3 = "core_dev_case_session_sigma3";
68+
69+
// CASE Session Sigma2 Resume
70+
constexpr MetricKey kMetricDeviceCASESessionSigma2Resume = "core_dev_case_session_sigma2_resume";
71+
72+
// CASE Session SigmaFinished
73+
constexpr MetricKey kMetricDeviceCASESessionSigmaFinished = "core_dev_case_session_sigma_finished";
74+
75+
// MRP Retry Counter
76+
constexpr MetricKey kMetricDeviceRMPRetryCount = "core_dev_rmp_retry_count";
77+
78+
// Subscription setup
79+
constexpr MetricKey kMetricDeviceSubscriptionSetup = "core_dev_subscription_setup";
80+
5781
} // namespace Tracing
5882
} // namespace chip

0 commit comments

Comments
 (0)