Skip to content

Commit 2c522cb

Browse files
Add records of session establishment for subscription resumption (#31755)
* Add records of session establishment for subscription resumption * Restyled by clang-format * review changes * Schedule subscription resumption when failing to establish the session in SubscriptionResumptionSessionEstablisher * Add option to set subscription timeout resumption retry interval seconds for Linux app Add cirque test for subscription resumption timeout * Restyled by clang-format * Restyled by autopep8 * Restyled by isort * fix CI building * Add test to the test list * add subscription resumption restries number to SubscriptionInfo struct * review changes * make resumption retries persistent * Restyled by clang-format * ci build fixes * try to fix cirque test --------- Co-authored-by: Restyled.io <commits@restyled.io>
1 parent 8ad1fc3 commit 2c522cb

12 files changed

+361
-9
lines changed

examples/platform/linux/AppMain.cpp

+6-1
Original file line numberDiff line numberDiff line change
@@ -570,7 +570,12 @@ void ChipLinuxAppMainLoop(AppMainLoopImplementation * impl)
570570
chip::app::InteractionModelEngine::GetInstance()->SetHandlerCapacityForSubscriptions(
571571
LinuxDeviceOptions::GetInstance().subscriptionCapacity);
572572
chip::app::InteractionModelEngine::GetInstance()->SetForceHandlerQuota(true);
573-
#endif
573+
#if CHIP_CONFIG_PERSIST_SUBSCRIPTIONS && CHIP_CONFIG_SUBSCRIPTION_TIMEOUT_RESUMPTION
574+
// Set subscription time resumption retry interval seconds
575+
chip::app::InteractionModelEngine::GetInstance()->SetSubscriptionTimeoutResumptionRetryIntervalSeconds(
576+
LinuxDeviceOptions::GetInstance().subscriptionResumptionRetryIntervalSec);
577+
#endif // CHIP_CONFIG_PERSIST_SUBSCRIPTIONS && CHIP_CONFIG_SUBSCRIPTION_TIMEOUT_RESUMPTION
578+
#endif // CONFIG_BUILD_FOR_HOST_UNIT_TEST
574579

575580
// Now that the server has started and we are done with our startup logging,
576581
// log our discovery/onboarding information again so it's not lost in the

examples/platform/linux/Options.cpp

+10-1
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,10 @@ enum
9090
#if CONFIG_BUILD_FOR_HOST_UNIT_TEST
9191
kDeviceOption_SubscriptionCapacity = 0x1024,
9292
#endif
93-
kDeviceOption_WiFiSupports5g = 0x1025
93+
kDeviceOption_WiFiSupports5g = 0x1025,
94+
#if CONFIG_BUILD_FOR_HOST_UNIT_TEST
95+
kDeviceOption_SubscriptionResumptionRetryIntervalSec = 0x1026,
96+
#endif
9497
};
9598

9699
constexpr unsigned kAppUsageLength = 64;
@@ -151,6 +154,7 @@ OptionDef sDeviceOptionDefs[] = {
151154
#endif
152155
#if CONFIG_BUILD_FOR_HOST_UNIT_TEST
153156
{ "subscription-capacity", kArgumentRequired, kDeviceOption_SubscriptionCapacity },
157+
{ "subscription-resumption-retry-interval", kArgumentRequired, kDeviceOption_SubscriptionResumptionRetryIntervalSec },
154158
#endif
155159
{}
156160
};
@@ -280,6 +284,8 @@ const char * sDeviceOptionHelp =
280284
#if CONFIG_BUILD_FOR_HOST_UNIT_TEST
281285
" --subscription-capacity\n"
282286
" Max number of subscriptions the device will allow\n"
287+
" --subscription-resumption-retry-interval\n"
288+
" subscription timeout resumption retry interval in seconds\n"
283289
#endif
284290
"\n";
285291

@@ -547,6 +553,9 @@ bool HandleOption(const char * aProgram, OptionSet * aOptions, int aIdentifier,
547553
case kDeviceOption_SubscriptionCapacity:
548554
LinuxDeviceOptions::GetInstance().subscriptionCapacity = static_cast<int32_t>(atoi(aValue));
549555
break;
556+
case kDeviceOption_SubscriptionResumptionRetryIntervalSec:
557+
LinuxDeviceOptions::GetInstance().subscriptionResumptionRetryIntervalSec = static_cast<int32_t>(atoi(aValue));
558+
break;
550559
#endif
551560
default:
552561
PrintArgError("%s: INTERNAL ERROR: Unhandled option: %s\n", aProgram, aName);

examples/platform/linux/Options.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@ struct LinuxDeviceOptions
7373
uint16_t rpcServerPort = 33000;
7474
#endif
7575
#if CONFIG_BUILD_FOR_HOST_UNIT_TEST
76-
int32_t subscriptionCapacity = CHIP_IM_MAX_NUM_SUBSCRIPTIONS;
76+
int32_t subscriptionCapacity = CHIP_IM_MAX_NUM_SUBSCRIPTIONS;
77+
int32_t subscriptionResumptionRetryIntervalSec = -1;
7778
#endif
7879
static LinuxDeviceOptions & GetInstance();
7980
};

scripts/tests/cirque_tests.sh

+1
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ CIRQUE_TESTS=(
5151
"CommissioningWindowTest"
5252
"SubscriptionResumptionTest"
5353
"SubscriptionResumptionCapacityTest"
54+
"SubscriptionResumptionTimeoutTest"
5455
)
5556

5657
BOLD_GREEN_TEXT="\033[1;32m"

src/app/InteractionModelEngine.cpp

+13-1
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,11 @@ void InteractionModelEngine::OnDone(ReadHandler & apReadObj)
389389
mReportingEngine.ResetReadHandlerTracker(&apReadObj);
390390

391391
mReadHandlers.ReleaseObject(&apReadObj);
392+
TryToResumeSubscriptions();
393+
}
392394

395+
void InteractionModelEngine::TryToResumeSubscriptions()
396+
{
393397
#if CHIP_CONFIG_PERSIST_SUBSCRIPTIONS && CHIP_CONFIG_SUBSCRIPTION_TIMEOUT_RESUMPTION
394398
if (!mSubscriptionResumptionScheduled && HasSubscriptionsToResume())
395399
{
@@ -398,8 +402,10 @@ void InteractionModelEngine::OnDone(ReadHandler & apReadObj)
398402
mpExchangeMgr->GetSessionManager()->SystemLayer()->StartTimer(
399403
System::Clock::Seconds32(timeTillNextSubscriptionResumptionSecs), ResumeSubscriptionsTimerCallback, this);
400404
mNumSubscriptionResumptionRetries++;
405+
ChipLogProgress(InteractionModel, "Schedule subscription resumption when failing to establish session, Retries: %" PRIu32,
406+
mNumSubscriptionResumptionRetries);
401407
}
402-
#endif // CHIP_CONFIG_PERSIST_SUBSCRIPTIONS
408+
#endif // CHIP_CONFIG_PERSIST_SUBSCRIPTIONS && CHIP_CONFIG_SUBSCRIPTION_TIMEOUT_RESUMPTION
403409
}
404410

405411
Status InteractionModelEngine::OnInvokeCommandRequest(Messaging::ExchangeContext * apExchangeContext,
@@ -1990,6 +1996,12 @@ void InteractionModelEngine::ResumeSubscriptionsTimerCallback(System::Layer * ap
19901996
#if CHIP_CONFIG_PERSIST_SUBSCRIPTIONS && CHIP_CONFIG_SUBSCRIPTION_TIMEOUT_RESUMPTION
19911997
uint32_t InteractionModelEngine::ComputeTimeSecondsTillNextSubscriptionResumption()
19921998
{
1999+
#if CONFIG_BUILD_FOR_HOST_UNIT_TEST
2000+
if (mSubscriptionResumptionRetrySecondsOverride > 0)
2001+
{
2002+
return static_cast<uint32_t>(mSubscriptionResumptionRetrySecondsOverride);
2003+
}
2004+
#endif
19932005
if (mNumSubscriptionResumptionRetries > CHIP_CONFIG_SUBSCRIPTION_TIMEOUT_RESUMPTION_MAX_FIBONACCI_STEP_INDEX)
19942006
{
19952007
return CHIP_CONFIG_SUBSCRIPTION_TIMEOUT_RESUMPTION_MAX_RETRY_INTERVAL_SECS;

src/app/InteractionModelEngine.h

+19-1
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,19 @@ class InteractionModelEngine : public Messaging::UnsolicitedMessageHandler,
354354
//
355355
void SetForceHandlerQuota(bool forceHandlerQuota) { mForceHandlerQuota = forceHandlerQuota; }
356356

357+
#if CHIP_CONFIG_PERSIST_SUBSCRIPTIONS && CHIP_CONFIG_SUBSCRIPTION_TIMEOUT_RESUMPTION
358+
//
359+
// Override the subscription timeout resumption retry interval seconds. The default retry interval will be
360+
// 300s + GetFibonacciForIndex(retry_times) * 300s, which is too long for unit-tests.
361+
//
362+
// If -1 is passed in, no override is instituted and default behavior resumes.
363+
//
364+
void SetSubscriptionTimeoutResumptionRetryIntervalSeconds(int32_t seconds)
365+
{
366+
mSubscriptionResumptionRetrySecondsOverride = seconds;
367+
}
368+
#endif
369+
357370
//
358371
// When testing subscriptions using the high-level APIs in src/controller/ReadInteraction.h,
359372
// they don't provide for the ability to shut down those subscriptions after they've been established.
@@ -392,6 +405,8 @@ class InteractionModelEngine : public Messaging::UnsolicitedMessageHandler,
392405
void OnDone(CommandHandler & apCommandObj) override;
393406
void OnDone(ReadHandler & apReadObj) override;
394407

408+
void TryToResumeSubscriptions();
409+
395410
ReadHandler::ApplicationCallback * GetAppCallback() override { return mpReadHandlerApplicationCallback; }
396411

397412
InteractionModelEngine * GetInteractionModelEngine() override { return this; }
@@ -637,7 +652,10 @@ class InteractionModelEngine : public Messaging::UnsolicitedMessageHandler,
637652
// enforce such check based on the configured size. This flag is used for unit tests only, there is another compare time flag
638653
// CHIP_CONFIG_IM_FORCE_FABRIC_QUOTA_CHECK for stress tests.
639654
bool mForceHandlerQuota = false;
640-
#endif
655+
#if CHIP_CONFIG_PERSIST_SUBSCRIPTIONS && CHIP_CONFIG_SUBSCRIPTION_TIMEOUT_RESUMPTION
656+
int mSubscriptionResumptionRetrySecondsOverride = -1;
657+
#endif // CHIP_CONFIG_PERSIST_SUBSCRIPTIONS && CHIP_CONFIG_SUBSCRIPTION_TIMEOUT_RESUMPTION
658+
#endif // CONFIG_BUILD_FOR_HOST_UNIT_TEST
641659

642660
#if CHIP_CONFIG_PERSIST_SUBSCRIPTIONS && CHIP_CONFIG_SUBSCRIPTION_TIMEOUT_RESUMPTION
643661
bool HasSubscriptionsToResume();

src/app/SimpleSubscriptionResumptionStorage.cpp

+16
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ constexpr TLV::Tag SimpleSubscriptionResumptionStorage::kClusterIdTag;
4646
constexpr TLV::Tag SimpleSubscriptionResumptionStorage::kAttributeIdTag;
4747
constexpr TLV::Tag SimpleSubscriptionResumptionStorage::kEventIdTag;
4848
constexpr TLV::Tag SimpleSubscriptionResumptionStorage::kEventPathTypeTag;
49+
constexpr TLV::Tag SimpleSubscriptionResumptionStorage::kResumptionRetriesTag;
4950

5051
SimpleSubscriptionResumptionStorage::SimpleSubscriptionInfoIterator::SimpleSubscriptionInfoIterator(
5152
SimpleSubscriptionResumptionStorage & storage) :
@@ -252,6 +253,18 @@ CHIP_ERROR SimpleSubscriptionResumptionStorage::Load(uint16_t subscriptionIndex,
252253
}
253254
ReturnErrorOnFailure(reader.ExitContainer(eventsListType));
254255

256+
#if CHIP_CONFIG_SUBSCRIPTION_TIMEOUT_RESUMPTION
257+
// If the reader cannot get resumption retries, set it to 0 for subscriptionInfo
258+
if (reader.Next(kResumptionRetriesTag) == CHIP_NO_ERROR)
259+
{
260+
ReturnErrorOnFailure(reader.Get(subscriptionInfo.mResumptionRetries));
261+
}
262+
else
263+
{
264+
subscriptionInfo.mResumptionRetries = 0;
265+
}
266+
#endif // CHIP_CONFIG_SUBSCRIPTION_TIMEOUT_RESUMPTION
267+
255268
ReturnErrorOnFailure(reader.ExitContainer(subscriptionContainerType));
256269

257270
return CHIP_NO_ERROR;
@@ -307,6 +320,9 @@ CHIP_ERROR SimpleSubscriptionResumptionStorage::Save(TLV::TLVWriter & writer, Su
307320
ReturnErrorOnFailure(writer.EndContainer(eventContainerType));
308321
}
309322
ReturnErrorOnFailure(writer.EndContainer(eventsListType));
323+
#if CHIP_CONFIG_SUBSCRIPTION_TIMEOUT_RESUMPTION
324+
ReturnErrorOnFailure(writer.Put(kResumptionRetriesTag, subscriptionInfo.mResumptionRetries));
325+
#endif // CHIP_CONFIG_SUBSCRIPTION_TIMEOUT_RESUMPTION
310326

311327
ReturnErrorOnFailure(writer.EndContainer(subscriptionContainerType));
312328

src/app/SimpleSubscriptionResumptionStorage.h

+1
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ class SimpleSubscriptionResumptionStorage : public SubscriptionResumptionStorage
132132
static constexpr TLV::Tag kAttributeIdTag = TLV::ContextTag(13);
133133
static constexpr TLV::Tag kEventIdTag = TLV::ContextTag(14);
134134
static constexpr TLV::Tag kEventPathTypeTag = TLV::ContextTag(16);
135+
static constexpr TLV::Tag kResumptionRetriesTag = TLV::ContextTag(17);
135136

136137
PersistentStorageDelegate * mStorage;
137138
ObjectPool<SimpleSubscriptionInfoIterator, kIteratorsMax> mSubscriptionInfoIterators;

src/app/SubscriptionResumptionSessionEstablisher.cpp

+29-4
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ SubscriptionResumptionSessionEstablisher::ResumeSubscription(
5050
mSubscriptionInfo.mMinInterval = subscriptionInfo.mMinInterval;
5151
mSubscriptionInfo.mMaxInterval = subscriptionInfo.mMaxInterval;
5252
mSubscriptionInfo.mFabricFiltered = subscriptionInfo.mFabricFiltered;
53+
#if CHIP_CONFIG_SUBSCRIPTION_TIMEOUT_RESUMPTION
54+
mSubscriptionInfo.mResumptionRetries = subscriptionInfo.mResumptionRetries;
55+
#endif
5356
// Copy the Attribute Paths and Event Paths
5457
if (subscriptionInfo.mAttributePaths.AllocatedSize() > 0)
5558
{
@@ -100,6 +103,15 @@ void SubscriptionResumptionSessionEstablisher::HandleDeviceConnected(void * cont
100103
return;
101104
}
102105
readHandler->OnSubscriptionResumed(sessionHandle, *establisher);
106+
#if CHIP_CONFIG_SUBSCRIPTION_TIMEOUT_RESUMPTION
107+
// Reset the resumption retries to 0 if subscription is resumed
108+
subscriptionInfo.mResumptionRetries = 0;
109+
auto * subscriptionResumptionStorage = InteractionModelEngine::GetInstance()->GetSubscriptionResumptionStorage();
110+
if (subscriptionResumptionStorage)
111+
{
112+
subscriptionResumptionStorage->Save(subscriptionInfo);
113+
}
114+
#endif // CHIP_CONFIG_SUBSCRIPTION_TIMEOUT_RESUMPTION
103115
}
104116

105117
void SubscriptionResumptionSessionEstablisher::HandleDeviceConnectionFailure(void * context, const ScopedNodeId & peerId,
@@ -109,12 +121,25 @@ void SubscriptionResumptionSessionEstablisher::HandleDeviceConnectionFailure(voi
109121
SubscriptionResumptionStorage::SubscriptionInfo & subscriptionInfo = establisher->mSubscriptionInfo;
110122
ChipLogError(DataManagement, "Failed to establish CASE for subscription-resumption with error '%" CHIP_ERROR_FORMAT "'",
111123
error.Format());
112-
// If the device fails to establish the session, the subscriber might be offline and its subscription read client will
113-
// be deleted when the device reconnect to the subscriber. This subscription will be never used again. So clean up
114-
// the persistent subscription information storage.
115124
auto * subscriptionResumptionStorage = InteractionModelEngine::GetInstance()->GetSubscriptionResumptionStorage();
116-
if (subscriptionResumptionStorage)
125+
if (!subscriptionResumptionStorage)
126+
{
127+
ChipLogError(DataManagement, "Failed to get subscription resumption storage");
128+
return;
129+
}
130+
#if CHIP_CONFIG_SUBSCRIPTION_TIMEOUT_RESUMPTION
131+
if (subscriptionInfo.mResumptionRetries <= CHIP_CONFIG_SUBSCRIPTION_TIMEOUT_RESUMPTION_MAX_FIBONACCI_STEP_INDEX)
132+
{
133+
InteractionModelEngine::GetInstance()->TryToResumeSubscriptions();
134+
subscriptionInfo.mResumptionRetries++;
135+
subscriptionResumptionStorage->Save(subscriptionInfo);
136+
}
137+
else
138+
#endif // CHIP_CONFIG_SUBSCRIPTION_TIMEOUT_RESUMPTION
117139
{
140+
// If the device fails to establish the session several times, the subscriber might be offline and its subscription
141+
// read client will be deleted when the device reconnects to the subscriber. This subscription will be never used again.
142+
// Clean up the persistent subscription information storage.
118143
subscriptionResumptionStorage->Delete(subscriptionInfo.mNodeId, subscriptionInfo.mFabricIndex,
119144
subscriptionInfo.mSubscriptionId);
120145
}

src/app/SubscriptionResumptionStorage.h

+3
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ class SubscriptionResumptionStorage
7373
NodeId mNodeId;
7474
FabricIndex mFabricIndex;
7575
SubscriptionId mSubscriptionId;
76+
#if CHIP_CONFIG_SUBSCRIPTION_TIMEOUT_RESUMPTION
77+
uint32_t mResumptionRetries;
78+
#endif
7679
uint16_t mMinInterval;
7780
uint16_t mMaxInterval;
7881
bool mFabricFiltered;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
#!/usr/bin/env python3
2+
3+
#
4+
# Copyright (c) 2024 Project CHIP Authors
5+
# All rights reserved.
6+
#
7+
# Licensed under the Apache License, Version 2.0 (the "License");
8+
# you may not use this file except in compliance with the License.
9+
# You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
#
19+
20+
# Commissioning test.
21+
22+
import os
23+
import sys
24+
from optparse import OptionParser
25+
26+
from base import BaseTestHelper, FailIfNot, TestFail, TestTimeout, logger
27+
28+
TEST_DISCRIMINATOR = 3840
29+
TEST_SETUPPIN = 20202021
30+
31+
TEST_ENDPOINT_ID = 0
32+
33+
34+
def main():
35+
optParser = OptionParser()
36+
optParser.add_option(
37+
"-t",
38+
"--timeout",
39+
action="store",
40+
dest="testTimeout",
41+
default=90,
42+
type='int',
43+
help="The program will return with timeout after specified seconds.",
44+
metavar="<timeout-second>",
45+
)
46+
optParser.add_option(
47+
"-a",
48+
"--address",
49+
action="store",
50+
dest="deviceAddress",
51+
default='',
52+
type='str',
53+
help="Address of the device",
54+
metavar="<device-addr>",
55+
)
56+
optParser.add_option(
57+
"--nodeid",
58+
action="store",
59+
dest="nodeid",
60+
default=1,
61+
type=int,
62+
help="The Node ID issued to the device",
63+
metavar="<nodeid>"
64+
)
65+
optParser.add_option(
66+
"--discriminator",
67+
action="store",
68+
dest="discriminator",
69+
default=TEST_DISCRIMINATOR,
70+
type=int,
71+
help="Discriminator of the device",
72+
metavar="<nodeid>"
73+
)
74+
optParser.add_option(
75+
"--setuppin",
76+
action="store",
77+
dest="setuppin",
78+
default=TEST_SETUPPIN,
79+
type=int,
80+
help="Setup PIN of the device",
81+
metavar="<nodeid>"
82+
)
83+
optParser.add_option(
84+
"-p",
85+
"--paa-trust-store-path",
86+
action="store",
87+
dest="paaTrustStorePath",
88+
default='',
89+
type='str',
90+
help="Path that contains valid and trusted PAA Root Certificates.",
91+
metavar="<paa-trust-store-path>"
92+
)
93+
94+
(options, remainingArgs) = optParser.parse_args(sys.argv[1:])
95+
96+
timeoutTicker = TestTimeout(options.testTimeout)
97+
timeoutTicker.start()
98+
99+
test = BaseTestHelper(
100+
nodeid=112233, paaTrustStorePath=options.paaTrustStorePath, testCommissioner=True)
101+
102+
FailIfNot(
103+
test.TestOnNetworkCommissioning(options.discriminator, options.setuppin, options.nodeid, options.deviceAddress),
104+
"Failed on on-network commissioing")
105+
try:
106+
test.devCtrl.ZCLSubscribeAttribute("BasicInformation", "NodeLabel", options.nodeid, TEST_ENDPOINT_ID, 1, 2,
107+
keepSubscriptions=True, autoResubscribe=False)
108+
except Exception as ex:
109+
TestFail(f"Failed to subscribe attribute: {ex}")
110+
111+
timeoutTicker.stop()
112+
113+
logger.info("Test finished")
114+
115+
# TODO: Python device controller cannot be shutdown clean sometimes and will block on AsyncDNSResolverSockets shutdown.
116+
# Call os._exit(0) to force close it.
117+
os._exit(0)
118+
119+
120+
if __name__ == "__main__":
121+
try:
122+
main()
123+
except Exception as ex:
124+
logger.exception(ex)
125+
TestFail("Exception occurred when running tests.")

0 commit comments

Comments
 (0)