Skip to content

Commit 2f49b9b

Browse files
[mrp] Increase default retry interval for Thread (#33314) (#33415)
* [mrp] Increase default retry interval for Thread The current 800ms is not enough in real setups, where Thread routers must serve as intermediate hops for many parallel conversations. Bump this to 2s. Signed-off-by: Damian Krolik <damian.krolik@nordicsemi.no> * [mrp] Make additional MRP backoff time dynamic for all Make the additional MRP backoff time (aka CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST) dynamic for all build configurations to remove the need for adjusting timeouts in unit tests whenever this parameter changes. In messaging tests, by default, set this parameter to 0, except for tests that explicitly verify its meaning. By the way, fix tests increasing the MRP backoff time by the slow-polling instead of fast-polling interval. --------- Signed-off-by: Damian Krolik <damian.krolik@nordicsemi.no> (cherry picked from commit bc0f5ee)
1 parent 770aacd commit 2f49b9b

9 files changed

+72
-70
lines changed

src/darwin/Framework/CHIP/MTRDeviceControllerFactory.mm

+1-1
Original file line numberDiff line numberDiff line change
@@ -1489,7 +1489,7 @@ void MTRSetMessageReliabilityParameters(NSNumber * _Nullable idleRetransmitMs,
14891489
resetAdvertising = ReliableMessageProtocolConfig::SetLocalMRPConfig(NullOptional);
14901490
} else {
14911491
if (additionalRetransmitDelayMs != nil) {
1492-
System::Clock::Milliseconds64 additionalBackoff(additionalRetransmitDelayMs.unsignedLongLongValue);
1492+
System::Clock::Timeout additionalBackoff(additionalRetransmitDelayMs.unsignedLongValue);
14931493
Messaging::ReliableMessageMgr::SetAdditionalMRPBackoffTime(MakeOptional(additionalBackoff));
14941494
}
14951495

src/messaging/ReliableMessageMgr.cpp

+7-8
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,7 @@ using namespace chip::System::Clock::Literals;
4747
namespace chip {
4848
namespace Messaging {
4949

50-
#if CHIP_DEVICE_CONFIG_ENABLE_DYNAMIC_MRP_CONFIG
51-
Optional<System::Clock::Milliseconds64> ReliableMessageMgr::sAdditionalMRPBackoffTime;
52-
#endif // CHIP_DEVICE_CONFIG_ENABLE_DYNAMIC_MRP_CONFIG
50+
System::Clock::Timeout ReliableMessageMgr::sAdditionalMRPBackoffTime = CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST;
5351

5452
ReliableMessageMgr::RetransTableEntry::RetransTableEntry(ReliableMessageContext * rc) :
5553
ec(*rc->GetExchangeContext()), nextRetransTime(0), sendCount(0)
@@ -267,11 +265,7 @@ System::Clock::Timestamp ReliableMessageMgr::GetBackoff(System::Clock::Timestamp
267265
mrpBackoffTime += ICDConfigurationData::GetInstance().GetFastPollingInterval();
268266
#endif
269267

270-
#if CHIP_DEVICE_CONFIG_ENABLE_DYNAMIC_MRP_CONFIG
271-
mrpBackoffTime += sAdditionalMRPBackoffTime.ValueOr(CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST);
272-
#else
273-
mrpBackoffTime += CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST;
274-
#endif // CHIP_DEVICE_CONFIG_ENABLE_DYNAMIC_MRP_CONFIG
268+
mrpBackoffTime += sAdditionalMRPBackoffTime;
275269

276270
return mrpBackoffTime;
277271
}
@@ -461,6 +455,11 @@ CHIP_ERROR ReliableMessageMgr::MapSendError(CHIP_ERROR error, uint16_t exchangeI
461455
return error;
462456
}
463457

458+
void ReliableMessageMgr::SetAdditionalMRPBackoffTime(const Optional<System::Clock::Timeout> & additionalTime)
459+
{
460+
sAdditionalMRPBackoffTime = additionalTime.ValueOr(CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST);
461+
}
462+
464463
void ReliableMessageMgr::CalculateNextRetransTime(RetransTableEntry & entry)
465464
{
466465
System::Clock::Timestamp baseTimeout = System::Clock::Milliseconds64(0);

src/messaging/ReliableMessageMgr.h

+2-9
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,6 @@ class ReliableMessageMgr
206206
}
207207
#endif // CHIP_CONFIG_TEST
208208

209-
#if CHIP_DEVICE_CONFIG_ENABLE_DYNAMIC_MRP_CONFIG
210209
/**
211210
* Set the value to add to the MRP backoff time we compute. This is meant to
212211
* account for high network latency on the sending side (us) that can't be
@@ -220,11 +219,7 @@ class ReliableMessageMgr
220219
* set this before actually bringing up the stack and having access to a
221220
* ReliableMessageMgr.
222221
*/
223-
static void SetAdditionalMRPBackoffTime(const Optional<System::Clock::Milliseconds64> & additionalTime)
224-
{
225-
sAdditionalMRPBackoffTime = additionalTime;
226-
}
227-
#endif // CHIP_DEVICE_CONFIG_ENABLE_DYNAMIC_MRP_CONFIG
222+
static void SetAdditionalMRPBackoffTime(const Optional<System::Clock::Timeout> & additionalTime);
228223

229224
private:
230225
/**
@@ -255,9 +250,7 @@ class ReliableMessageMgr
255250

256251
SessionUpdateDelegate * mSessionUpdateDelegate = nullptr;
257252

258-
#if CHIP_DEVICE_CONFIG_ENABLE_DYNAMIC_MRP_CONFIG
259-
static Optional<System::Clock::Milliseconds64> sAdditionalMRPBackoffTime;
260-
#endif // CHIP_DEVICE_CONFIG_ENABLE_DYNAMIC_MRP_CONFIG
253+
static System::Clock::Timeout sAdditionalMRPBackoffTime;
261254
};
262255

263256
} // namespace Messaging

src/messaging/ReliableMessageProtocolConfig.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ namespace chip {
5656
*/
5757
#ifndef CHIP_CONFIG_MRP_LOCAL_ACTIVE_RETRY_INTERVAL
5858
#if CHIP_ENABLE_OPENTHREAD && !CHIP_DEVICE_LAYER_TARGET_LINUX
59-
#define CHIP_CONFIG_MRP_LOCAL_ACTIVE_RETRY_INTERVAL (800_ms32)
59+
#define CHIP_CONFIG_MRP_LOCAL_ACTIVE_RETRY_INTERVAL (2000_ms32)
6060
#else
6161
#define CHIP_CONFIG_MRP_LOCAL_ACTIVE_RETRY_INTERVAL (300_ms32)
6262
#endif
@@ -82,7 +82,7 @@ namespace chip {
8282
*/
8383
#ifndef CHIP_CONFIG_MRP_LOCAL_IDLE_RETRY_INTERVAL
8484
#if CHIP_ENABLE_OPENTHREAD && !CHIP_DEVICE_LAYER_TARGET_LINUX
85-
#define CHIP_CONFIG_MRP_LOCAL_IDLE_RETRY_INTERVAL (800_ms32)
85+
#define CHIP_CONFIG_MRP_LOCAL_IDLE_RETRY_INTERVAL (2000_ms32)
8686
#else
8787
#define CHIP_CONFIG_MRP_LOCAL_IDLE_RETRY_INTERVAL (500_ms32)
8888
#endif
@@ -174,7 +174,7 @@ namespace chip {
174174
*/
175175
#ifndef CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST
176176
#if CHIP_ENABLE_OPENTHREAD && !CHIP_DEVICE_LAYER_TARGET_LINUX
177-
#define CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST (500_ms)
177+
#define CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST (1500_ms)
178178
#else
179179
#define CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST (0_ms)
180180
#endif

src/messaging/tests/MessagingContext.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include <credentials/tests/CHIPCert_unit_test_vectors.h>
2222
#include <lib/core/ErrorStr.h>
2323
#include <lib/support/CodeUtils.h>
24+
#include <messaging/ReliableMessageMgr.h>
2425
#include <protocols/secure_channel/Constants.h>
2526

2627
namespace chip {
@@ -70,6 +71,9 @@ CHIP_ERROR MessagingContext::Init(TransportMgrBase * transport, IOContext * ioCo
7071
ReturnErrorOnFailure(CreatePASESessionDavidToCharlie());
7172
}
7273

74+
// Set the additional MRP backoff to zero so that it does not affect the test execution time.
75+
Messaging::ReliableMessageMgr::SetAdditionalMRPBackoffTime(MakeOptional(System::Clock::kZero));
76+
7377
return CHIP_NO_ERROR;
7478
}
7579

@@ -85,6 +89,9 @@ void MessagingContext::Shutdown()
8589
mFabricTable.Shutdown();
8690
mOpCertStore.Finish();
8791
mOpKeyStore.Finish();
92+
93+
// Reset the default additional MRP backoff.
94+
Messaging::ReliableMessageMgr::SetAdditionalMRPBackoffTime(NullOptional);
8895
}
8996

9097
CHIP_ERROR MessagingContext::InitFromExisting(const MessagingContext & existing)

src/messaging/tests/TestAbortExchangesForFabric.cpp

+2-6
Original file line numberDiff line numberDiff line change
@@ -212,15 +212,11 @@ void CommonCheckAbortAllButOneExchange(nlTestSuite * inSuite, TestContext & ctx,
212212
//
213213
auto waitTimeout = System::Clock::Milliseconds32(1000);
214214

215-
#if CHIP_CONFIG_ENABLE_ICD_SERVER == 1
215+
#if CHIP_CONFIG_ENABLE_ICD_SERVER
216216
// If running as an ICD, increase waitTimeout to account for the polling interval
217-
waitTimeout += ICDConfigurationData::GetInstance().GetSlowPollingInterval();
217+
waitTimeout += ICDConfigurationData::GetInstance().GetFastPollingInterval();
218218
#endif
219219

220-
// Account for the retry delay booster, so that we do not timeout our IO processing before the
221-
// retransmission failure is triggered.
222-
waitTimeout += CHIP_CONFIG_RMP_DEFAULT_MAX_RETRANS * CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST;
223-
224220
ctx.GetIOContext().DriveIOUntil(waitTimeout, [&]() { return false; });
225221
}
226222
else

src/messaging/tests/TestReliableMessageProtocol.cpp

+39-30
Original file line numberDiff line numberDiff line change
@@ -61,14 +61,6 @@ using namespace chip::System::Clock::Literals;
6161

6262
const char PAYLOAD[] = "Hello!";
6363

64-
// The CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST can be set to non-zero value
65-
// to boost the retransmission timeout for a high latency network like Thread to
66-
// avoid spurious retransmits.
67-
//
68-
// This adds extra I/O time to account for this. See the documentation for
69-
// CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST for more details.
70-
constexpr auto retryBoosterTimeout = CHIP_CONFIG_RMP_DEFAULT_MAX_RETRANS * CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST;
71-
7264
class TestContext : public chip::Test::LoopbackMessagingContext
7365
{
7466
public:
@@ -312,6 +304,34 @@ struct BackoffComplianceTestVector theBackoffComplianceTestVector[] = {
312304
},
313305
};
314306

307+
void CheckGetBackoffImpl(nlTestSuite * inSuite, System::Clock::Timeout additionalMRPBackoffTime)
308+
{
309+
ReliableMessageMgr::SetAdditionalMRPBackoffTime(MakeOptional(additionalMRPBackoffTime));
310+
311+
// Run 3x iterations to thoroughly test random jitter always results in backoff within bounds.
312+
for (uint32_t j = 0; j < 3; j++)
313+
{
314+
for (const auto & test : theBackoffComplianceTestVector)
315+
{
316+
System::Clock::Timeout backoff = ReliableMessageMgr::GetBackoff(test.backoffBase, test.sendCount);
317+
System::Clock::Timeout extraBackoff = additionalMRPBackoffTime;
318+
319+
#if CHIP_CONFIG_ENABLE_ICD_SERVER
320+
// If running as an ICD, increase maxBackoff to account for the polling interval
321+
extraBackoff += ICDConfigurationData::GetInstance().GetFastPollingInterval();
322+
#endif
323+
324+
ChipLogProgress(Test, "Backoff base %" PRIu32 " extra %" PRIu32 " # %d: %" PRIu32, test.backoffBase.count(),
325+
extraBackoff.count(), test.sendCount, backoff.count());
326+
327+
NL_TEST_ASSERT(inSuite, backoff >= test.backoffMin + extraBackoff);
328+
NL_TEST_ASSERT(inSuite, backoff <= test.backoffMax + extraBackoff);
329+
}
330+
}
331+
332+
ReliableMessageMgr::SetAdditionalMRPBackoffTime(NullOptional);
333+
}
334+
315335
} // namespace
316336

317337
class TestReliableMessageProtocol
@@ -336,6 +356,7 @@ class TestReliableMessageProtocol
336356
static void CheckLostStandaloneAck(nlTestSuite * inSuite, void * inContext);
337357
static void CheckIsPeerActiveNotInitiator(nlTestSuite * inSuite, void * inContext);
338358
static void CheckGetBackoff(nlTestSuite * inSuite, void * inContext);
359+
static void CheckGetBackoffAdditionalTime(nlTestSuite * inSuite, void * inContext);
339360
static void CheckApplicationResponseDelayed(nlTestSuite * inSuite, void * inContext);
340361
static void CheckApplicationResponseNeverComes(nlTestSuite * inSuite, void * inContext);
341362
};
@@ -437,7 +458,7 @@ void TestReliableMessageProtocol::CheckResendApplicationMessage(nlTestSuite * in
437458
NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 1);
438459

439460
// Wait for the initial message to fail (should take 330-413ms)
440-
ctx.GetIOContext().DriveIOUntil(1000_ms32 + retryBoosterTimeout, [&] { return loopback.mSentMessageCount >= 2; });
461+
ctx.GetIOContext().DriveIOUntil(1000_ms32, [&] { return loopback.mSentMessageCount >= 2; });
441462
now = System::SystemClock().GetMonotonicTimestamp();
442463
timeoutTime = now - startTime;
443464
ChipLogProgress(Test, "Attempt #1 Timeout : %" PRIu32 "ms", timeoutTime.count());
@@ -454,7 +475,7 @@ void TestReliableMessageProtocol::CheckResendApplicationMessage(nlTestSuite * in
454475
NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 1);
455476

456477
// Wait for the 1st retry to fail (should take 330-413ms)
457-
ctx.GetIOContext().DriveIOUntil(1000_ms32 + retryBoosterTimeout, [&] { return loopback.mSentMessageCount >= 3; });
478+
ctx.GetIOContext().DriveIOUntil(1000_ms32, [&] { return loopback.mSentMessageCount >= 3; });
458479
now = System::SystemClock().GetMonotonicTimestamp();
459480
timeoutTime = now - startTime;
460481
ChipLogProgress(Test, "Attempt #2 Timeout : %" PRIu32 "ms", timeoutTime.count());
@@ -471,7 +492,7 @@ void TestReliableMessageProtocol::CheckResendApplicationMessage(nlTestSuite * in
471492
NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 1);
472493

473494
// Wait for the 2nd retry to fail (should take 528-660ms)
474-
ctx.GetIOContext().DriveIOUntil(1000_ms32 + retryBoosterTimeout, [&] { return loopback.mSentMessageCount >= 4; });
495+
ctx.GetIOContext().DriveIOUntil(1000_ms32, [&] { return loopback.mSentMessageCount >= 4; });
475496
now = System::SystemClock().GetMonotonicTimestamp();
476497
timeoutTime = now - startTime;
477498
ChipLogProgress(Test, "Attempt #3 Timeout : %" PRIu32 "ms", timeoutTime.count());
@@ -488,7 +509,7 @@ void TestReliableMessageProtocol::CheckResendApplicationMessage(nlTestSuite * in
488509
NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 1);
489510

490511
// Wait for the 3rd retry to fail (should take 845-1056ms)
491-
ctx.GetIOContext().DriveIOUntil(1500_ms32 + retryBoosterTimeout, [&] { return loopback.mSentMessageCount >= 5; });
512+
ctx.GetIOContext().DriveIOUntil(1500_ms32, [&] { return loopback.mSentMessageCount >= 5; });
492513
now = System::SystemClock().GetMonotonicTimestamp();
493514
timeoutTime = now - startTime;
494515
ChipLogProgress(Test, "Attempt #4 Timeout : %" PRIu32 "ms", timeoutTime.count());
@@ -1833,25 +1854,12 @@ void TestReliableMessageProtocol::CheckLostStandaloneAck(nlTestSuite * inSuite,
18331854

18341855
void TestReliableMessageProtocol::CheckGetBackoff(nlTestSuite * inSuite, void * inContext)
18351856
{
1836-
// Run 3x iterations to thoroughly test random jitter always results in backoff within bounds.
1837-
for (uint32_t j = 0; j < 3; j++)
1838-
{
1839-
for (const auto & test : theBackoffComplianceTestVector)
1840-
{
1841-
System::Clock::Timeout backoff = ReliableMessageMgr::GetBackoff(test.backoffBase, test.sendCount);
1842-
ChipLogProgress(Test, "Backoff base %" PRIu32 " # %d: %" PRIu32, test.backoffBase.count(), test.sendCount,
1843-
backoff.count());
1844-
1845-
NL_TEST_ASSERT(inSuite, backoff >= test.backoffMin);
1857+
CheckGetBackoffImpl(inSuite, System::Clock::kZero);
1858+
}
18461859

1847-
auto maxBackoff = test.backoffMax + retryBoosterTimeout;
1848-
#if CHIP_CONFIG_ENABLE_ICD_SERVER == 1
1849-
// If running as an ICD, increase maxBackoff to account for the polling interval
1850-
maxBackoff += ICDConfigurationData::GetInstance().GetSlowPollingInterval();
1851-
#endif
1852-
NL_TEST_ASSERT(inSuite, backoff <= maxBackoff);
1853-
}
1854-
}
1860+
void TestReliableMessageProtocol::CheckGetBackoffAdditionalTime(nlTestSuite * inSuite, void * inContext)
1861+
{
1862+
CheckGetBackoffImpl(inSuite, System::Clock::Seconds32(1));
18551863
}
18561864

18571865
void TestReliableMessageProtocol::CheckApplicationResponseDelayed(nlTestSuite * inSuite, void * inContext)
@@ -2195,6 +2203,7 @@ const nlTest sTests[] = {
21952203
TestReliableMessageProtocol::CheckLostStandaloneAck),
21962204
NL_TEST_DEF("Test Is Peer Active Retry logic", TestReliableMessageProtocol::CheckIsPeerActiveNotInitiator),
21972205
NL_TEST_DEF("Test MRP backoff algorithm", TestReliableMessageProtocol::CheckGetBackoff),
2206+
NL_TEST_DEF("Test MRP backoff algorithm with additional time", TestReliableMessageProtocol::CheckGetBackoffAdditionalTime),
21982207
// TODO: Re-enable this test, after changing test to use Mock clock / DriveIO rather than DriveIOUntil.
21992208
// Issue: https://github.com/project-chip/connectedhomeip/issues/32440
22002209
// NL_TEST_DEF("Test an application response that comes after MRP retransmits run out",

src/platform/nrfconnect/CHIPPlatformConfig.h

+4-9
Original file line numberDiff line numberDiff line change
@@ -106,21 +106,16 @@
106106
#define CHIP_CONFIG_LOG_MODULE_Support_PROGRESS 0
107107
#endif
108108

109-
// Set MRP retry intervals for Thread and Wi-Fi to test-proven values.
110109
#ifndef CHIP_CONFIG_MRP_LOCAL_ACTIVE_RETRY_INTERVAL
111-
#if CHIP_ENABLE_OPENTHREAD
112-
#define CHIP_CONFIG_MRP_LOCAL_ACTIVE_RETRY_INTERVAL (800_ms32)
113-
#else
110+
#ifndef CONFIG_NET_L2_OPENTHREAD
114111
#define CHIP_CONFIG_MRP_LOCAL_ACTIVE_RETRY_INTERVAL (1000_ms32)
115-
#endif // CHIP_ENABLE_OPENTHREAD
112+
#endif // CONFIG_NET_L2_OPENTHREAD
116113
#endif // CHIP_CONFIG_MRP_LOCAL_ACTIVE_RETRY_INTERVAL
117114

118115
#ifndef CHIP_CONFIG_MRP_LOCAL_IDLE_RETRY_INTERVAL
119-
#if CHIP_ENABLE_OPENTHREAD
120-
#define CHIP_CONFIG_MRP_LOCAL_IDLE_RETRY_INTERVAL (800_ms32)
121-
#else
116+
#ifndef CONFIG_NET_L2_OPENTHREAD
122117
#define CHIP_CONFIG_MRP_LOCAL_IDLE_RETRY_INTERVAL (1000_ms32)
123-
#endif // CHIP_ENABLE_OPENTHREAD
118+
#endif // CONFIG_NET_L2_OPENTHREAD
124119
#endif // CHIP_CONFIG_MRP_LOCAL_IDLE_RETRY_INTERVAL
125120

126121
#ifndef CHIP_CONFIG_ICD_IDLE_MODE_DURATION_SEC

src/protocols/secure_channel/tests/TestPASESession.cpp

+7-4
Original file line numberDiff line numberDiff line change
@@ -304,11 +304,14 @@ void SecurePairingHandshakeTestCommon(nlTestSuite * inSuite, void * inContext, S
304304

305305
while (delegate.mMessageDropped)
306306
{
307-
auto waitTimeout = 100_ms + CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST;
307+
auto waitTimeout = 100_ms;
308308

309-
#if CHIP_CONFIG_ENABLE_ICD_SERVER == 1
310-
// If running as an ICD, increase waitTimeout to account for the polling interval
311-
waitTimeout += ICDConfigurationData::GetInstance().GetSlowPollingInterval();
309+
#if CHIP_CONFIG_ENABLE_ICD_SERVER
310+
// If running as an ICD, increase waitTimeout to account for:
311+
// - longer MRP intervals, configured above to 1s/1s,
312+
// - the fast-polling interval that is added to the MRP backoff time.
313+
waitTimeout += 2000_ms32;
314+
waitTimeout += ICDConfigurationData::GetInstance().GetFastPollingInterval();
312315
#endif
313316

314317
// Wait some time so the dropped message will be retransmitted when we drain the IO.

0 commit comments

Comments
 (0)