Skip to content

Commit bc0f5ee

Browse files
[mrp] Increase default retry interval for Thread (#33314)
* [mrp] Increase default retry interval for Thread The current 800ms is not enough in real setups, where Thread routers must serve as intermediate hops for many parallel conversations. Bump this to 2s. Signed-off-by: Damian Krolik <damian.krolik@nordicsemi.no> * [mrp] Make additional MRP backoff time dynamic for all Make the additional MRP backoff time (aka CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST) dynamic for all build configurations to remove the need for adjusting timeouts in unit tests whenever this parameter changes. In messaging tests, by default, set this parameter to 0, except for tests that explicitly verify its meaning. By the way, fix tests increasing the MRP backoff time by the slow-polling instead of fast-polling interval. --------- Signed-off-by: Damian Krolik <damian.krolik@nordicsemi.no>
1 parent 4957dc8 commit bc0f5ee

9 files changed

+72
-70
lines changed

src/darwin/Framework/CHIP/MTRDeviceControllerFactory.mm

+1-1
Original file line numberDiff line numberDiff line change
@@ -1330,7 +1330,7 @@ void MTRSetMessageReliabilityParameters(NSNumber * _Nullable idleRetransmitMs,
13301330
resetAdvertising = ReliableMessageProtocolConfig::SetLocalMRPConfig(NullOptional);
13311331
} else {
13321332
if (additionalRetransmitDelayMs != nil) {
1333-
System::Clock::Milliseconds64 additionalBackoff(additionalRetransmitDelayMs.unsignedLongLongValue);
1333+
System::Clock::Timeout additionalBackoff(additionalRetransmitDelayMs.unsignedLongValue);
13341334
Messaging::ReliableMessageMgr::SetAdditionalMRPBackoffTime(MakeOptional(additionalBackoff));
13351335
}
13361336

src/messaging/ReliableMessageMgr.cpp

+7-8
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,7 @@ using namespace chip::System::Clock::Literals;
4747
namespace chip {
4848
namespace Messaging {
4949

50-
#if CHIP_DEVICE_CONFIG_ENABLE_DYNAMIC_MRP_CONFIG
51-
Optional<System::Clock::Milliseconds64> ReliableMessageMgr::sAdditionalMRPBackoffTime;
52-
#endif // CHIP_DEVICE_CONFIG_ENABLE_DYNAMIC_MRP_CONFIG
50+
System::Clock::Timeout ReliableMessageMgr::sAdditionalMRPBackoffTime = CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST;
5351

5452
ReliableMessageMgr::RetransTableEntry::RetransTableEntry(ReliableMessageContext * rc) :
5553
ec(*rc->GetExchangeContext()), nextRetransTime(0), sendCount(0)
@@ -269,11 +267,7 @@ System::Clock::Timeout ReliableMessageMgr::GetBackoff(System::Clock::Timeout bas
269267
mrpBackoffTime += ICDConfigurationData::GetInstance().GetFastPollingInterval();
270268
#endif
271269

272-
#if CHIP_DEVICE_CONFIG_ENABLE_DYNAMIC_MRP_CONFIG
273-
mrpBackoffTime += sAdditionalMRPBackoffTime.ValueOr(CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST);
274-
#else
275-
mrpBackoffTime += CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST;
276-
#endif // CHIP_DEVICE_CONFIG_ENABLE_DYNAMIC_MRP_CONFIG
270+
mrpBackoffTime += sAdditionalMRPBackoffTime;
277271

278272
return std::chrono::duration_cast<System::Clock::Timeout>(mrpBackoffTime);
279273
}
@@ -463,6 +457,11 @@ CHIP_ERROR ReliableMessageMgr::MapSendError(CHIP_ERROR error, uint16_t exchangeI
463457
return error;
464458
}
465459

460+
void ReliableMessageMgr::SetAdditionalMRPBackoffTime(const Optional<System::Clock::Timeout> & additionalTime)
461+
{
462+
sAdditionalMRPBackoffTime = additionalTime.ValueOr(CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST);
463+
}
464+
466465
void ReliableMessageMgr::CalculateNextRetransTime(RetransTableEntry & entry)
467466
{
468467
System::Clock::Timeout baseTimeout = System::Clock::Timeout(0);

src/messaging/ReliableMessageMgr.h

+2-9
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,6 @@ class ReliableMessageMgr
206206
}
207207
#endif // CHIP_CONFIG_TEST
208208

209-
#if CHIP_DEVICE_CONFIG_ENABLE_DYNAMIC_MRP_CONFIG
210209
/**
211210
* Set the value to add to the MRP backoff time we compute. This is meant to
212211
* account for high network latency on the sending side (us) that can't be
@@ -220,11 +219,7 @@ class ReliableMessageMgr
220219
* set this before actually bringing up the stack and having access to a
221220
* ReliableMessageMgr.
222221
*/
223-
static void SetAdditionalMRPBackoffTime(const Optional<System::Clock::Milliseconds64> & additionalTime)
224-
{
225-
sAdditionalMRPBackoffTime = additionalTime;
226-
}
227-
#endif // CHIP_DEVICE_CONFIG_ENABLE_DYNAMIC_MRP_CONFIG
222+
static void SetAdditionalMRPBackoffTime(const Optional<System::Clock::Timeout> & additionalTime);
228223

229224
private:
230225
/**
@@ -255,9 +250,7 @@ class ReliableMessageMgr
255250

256251
SessionUpdateDelegate * mSessionUpdateDelegate = nullptr;
257252

258-
#if CHIP_DEVICE_CONFIG_ENABLE_DYNAMIC_MRP_CONFIG
259-
static Optional<System::Clock::Milliseconds64> sAdditionalMRPBackoffTime;
260-
#endif // CHIP_DEVICE_CONFIG_ENABLE_DYNAMIC_MRP_CONFIG
253+
static System::Clock::Timeout sAdditionalMRPBackoffTime;
261254
};
262255

263256
} // namespace Messaging

src/messaging/ReliableMessageProtocolConfig.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ namespace chip {
5656
*/
5757
#ifndef CHIP_CONFIG_MRP_LOCAL_ACTIVE_RETRY_INTERVAL
5858
#if CHIP_ENABLE_OPENTHREAD && !CHIP_DEVICE_LAYER_TARGET_LINUX
59-
#define CHIP_CONFIG_MRP_LOCAL_ACTIVE_RETRY_INTERVAL (800_ms32)
59+
#define CHIP_CONFIG_MRP_LOCAL_ACTIVE_RETRY_INTERVAL (2000_ms32)
6060
#else
6161
#define CHIP_CONFIG_MRP_LOCAL_ACTIVE_RETRY_INTERVAL (300_ms32)
6262
#endif
@@ -82,7 +82,7 @@ namespace chip {
8282
*/
8383
#ifndef CHIP_CONFIG_MRP_LOCAL_IDLE_RETRY_INTERVAL
8484
#if CHIP_ENABLE_OPENTHREAD && !CHIP_DEVICE_LAYER_TARGET_LINUX
85-
#define CHIP_CONFIG_MRP_LOCAL_IDLE_RETRY_INTERVAL (800_ms32)
85+
#define CHIP_CONFIG_MRP_LOCAL_IDLE_RETRY_INTERVAL (2000_ms32)
8686
#else
8787
#define CHIP_CONFIG_MRP_LOCAL_IDLE_RETRY_INTERVAL (500_ms32)
8888
#endif
@@ -174,7 +174,7 @@ namespace chip {
174174
*/
175175
#ifndef CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST
176176
#if CHIP_ENABLE_OPENTHREAD && !CHIP_DEVICE_LAYER_TARGET_LINUX
177-
#define CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST (500_ms)
177+
#define CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST (1500_ms)
178178
#else
179179
#define CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST (0_ms)
180180
#endif

src/messaging/tests/MessagingContext.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include <credentials/tests/CHIPCert_unit_test_vectors.h>
2222
#include <lib/core/ErrorStr.h>
2323
#include <lib/support/CodeUtils.h>
24+
#include <messaging/ReliableMessageMgr.h>
2425
#include <protocols/secure_channel/Constants.h>
2526

2627
namespace chip {
@@ -70,6 +71,9 @@ CHIP_ERROR MessagingContext::Init(TransportMgrBase * transport, IOContext * ioCo
7071
ReturnErrorOnFailure(CreatePASESessionDavidToCharlie());
7172
}
7273

74+
// Set the additional MRP backoff to zero so that it does not affect the test execution time.
75+
Messaging::ReliableMessageMgr::SetAdditionalMRPBackoffTime(MakeOptional(System::Clock::kZero));
76+
7377
return CHIP_NO_ERROR;
7478
}
7579

@@ -85,6 +89,9 @@ void MessagingContext::Shutdown()
8589
mFabricTable.Shutdown();
8690
mOpCertStore.Finish();
8791
mOpKeyStore.Finish();
92+
93+
// Reset the default additional MRP backoff.
94+
Messaging::ReliableMessageMgr::SetAdditionalMRPBackoffTime(NullOptional);
8895
}
8996

9097
CHIP_ERROR MessagingContext::InitFromExisting(const MessagingContext & existing)

src/messaging/tests/TestAbortExchangesForFabric.cpp

+2-6
Original file line numberDiff line numberDiff line change
@@ -226,15 +226,11 @@ void CommonCheckAbortAllButOneExchange(nlTestSuite * inSuite, TestContext & ctx,
226226
//
227227
auto waitTimeout = System::Clock::Milliseconds32(1000);
228228

229-
#if CHIP_CONFIG_ENABLE_ICD_SERVER == 1
229+
#if CHIP_CONFIG_ENABLE_ICD_SERVER
230230
// If running as an ICD, increase waitTimeout to account for the polling interval
231-
waitTimeout += ICDConfigurationData::GetInstance().GetSlowPollingInterval();
231+
waitTimeout += ICDConfigurationData::GetInstance().GetFastPollingInterval();
232232
#endif
233233

234-
// Account for the retry delay booster, so that we do not timeout our IO processing before the
235-
// retransmission failure is triggered.
236-
waitTimeout += CHIP_CONFIG_RMP_DEFAULT_MAX_RETRANS * CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST;
237-
238234
ctx.GetIOContext().DriveIOUntil(waitTimeout, [&]() { return false; });
239235
}
240236
else

src/messaging/tests/TestReliableMessageProtocol.cpp

+39-30
Original file line numberDiff line numberDiff line change
@@ -65,14 +65,6 @@ using namespace chip::System::Clock::Literals;
6565

6666
const char PAYLOAD[] = "Hello!";
6767

68-
// The CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST can be set to non-zero value
69-
// to boost the retransmission timeout for a high latency network like Thread to
70-
// avoid spurious retransmits.
71-
//
72-
// This adds extra I/O time to account for this. See the documentation for
73-
// CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST for more details.
74-
constexpr auto retryBoosterTimeout = CHIP_CONFIG_RMP_DEFAULT_MAX_RETRANS * CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST;
75-
7668
class TestContext : public chip::Test::LoopbackMessagingContext
7769
{
7870
public:
@@ -324,6 +316,34 @@ struct BackoffComplianceTestVector theBackoffComplianceTestVector[] = { {
324316
.backoffMax = System::Clock::Timeout(20'286'001),
325317
} };
326318

319+
void CheckGetBackoffImpl(nlTestSuite * inSuite, System::Clock::Timeout additionalMRPBackoffTime)
320+
{
321+
ReliableMessageMgr::SetAdditionalMRPBackoffTime(MakeOptional(additionalMRPBackoffTime));
322+
323+
// Run 3x iterations to thoroughly test random jitter always results in backoff within bounds.
324+
for (uint32_t j = 0; j < 3; j++)
325+
{
326+
for (const auto & test : theBackoffComplianceTestVector)
327+
{
328+
System::Clock::Timeout backoff = ReliableMessageMgr::GetBackoff(test.backoffBase, test.sendCount);
329+
System::Clock::Timeout extraBackoff = additionalMRPBackoffTime;
330+
331+
#if CHIP_CONFIG_ENABLE_ICD_SERVER
332+
// If running as an ICD, increase maxBackoff to account for the polling interval
333+
extraBackoff += ICDConfigurationData::GetInstance().GetFastPollingInterval();
334+
#endif
335+
336+
ChipLogProgress(Test, "Backoff base %" PRIu32 " extra %" PRIu32 " # %d: %" PRIu32, test.backoffBase.count(),
337+
extraBackoff.count(), test.sendCount, backoff.count());
338+
339+
NL_TEST_ASSERT(inSuite, backoff >= test.backoffMin + extraBackoff);
340+
NL_TEST_ASSERT(inSuite, backoff <= test.backoffMax + extraBackoff);
341+
}
342+
}
343+
344+
ReliableMessageMgr::SetAdditionalMRPBackoffTime(NullOptional);
345+
}
346+
327347
} // namespace
328348

329349
class TestReliableMessageProtocol
@@ -348,6 +368,7 @@ class TestReliableMessageProtocol
348368
static void CheckLostStandaloneAck(nlTestSuite * inSuite, void * inContext);
349369
static void CheckIsPeerActiveNotInitiator(nlTestSuite * inSuite, void * inContext);
350370
static void CheckGetBackoff(nlTestSuite * inSuite, void * inContext);
371+
static void CheckGetBackoffAdditionalTime(nlTestSuite * inSuite, void * inContext);
351372
static void CheckApplicationResponseDelayed(nlTestSuite * inSuite, void * inContext);
352373
static void CheckApplicationResponseNeverComes(nlTestSuite * inSuite, void * inContext);
353374
};
@@ -449,7 +470,7 @@ void TestReliableMessageProtocol::CheckResendApplicationMessage(nlTestSuite * in
449470
NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 1);
450471

451472
// Wait for the initial message to fail (should take 330-413ms)
452-
ctx.GetIOContext().DriveIOUntil(1000_ms32 + retryBoosterTimeout, [&] { return loopback.mSentMessageCount >= 2; });
473+
ctx.GetIOContext().DriveIOUntil(1000_ms32, [&] { return loopback.mSentMessageCount >= 2; });
453474
now = System::SystemClock().GetMonotonicTimestamp();
454475
timeoutTime = now - startTime;
455476
ChipLogProgress(Test, "Attempt #1 Timeout : %" PRIu32 "ms", timeoutTime.count());
@@ -466,7 +487,7 @@ void TestReliableMessageProtocol::CheckResendApplicationMessage(nlTestSuite * in
466487
NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 1);
467488

468489
// Wait for the 1st retry to fail (should take 330-413ms)
469-
ctx.GetIOContext().DriveIOUntil(1000_ms32 + retryBoosterTimeout, [&] { return loopback.mSentMessageCount >= 3; });
490+
ctx.GetIOContext().DriveIOUntil(1000_ms32, [&] { return loopback.mSentMessageCount >= 3; });
470491
now = System::SystemClock().GetMonotonicTimestamp();
471492
timeoutTime = now - startTime;
472493
ChipLogProgress(Test, "Attempt #2 Timeout : %" PRIu32 "ms", timeoutTime.count());
@@ -483,7 +504,7 @@ void TestReliableMessageProtocol::CheckResendApplicationMessage(nlTestSuite * in
483504
NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 1);
484505

485506
// Wait for the 2nd retry to fail (should take 528-660ms)
486-
ctx.GetIOContext().DriveIOUntil(1000_ms32 + retryBoosterTimeout, [&] { return loopback.mSentMessageCount >= 4; });
507+
ctx.GetIOContext().DriveIOUntil(1000_ms32, [&] { return loopback.mSentMessageCount >= 4; });
487508
now = System::SystemClock().GetMonotonicTimestamp();
488509
timeoutTime = now - startTime;
489510
ChipLogProgress(Test, "Attempt #3 Timeout : %" PRIu32 "ms", timeoutTime.count());
@@ -500,7 +521,7 @@ void TestReliableMessageProtocol::CheckResendApplicationMessage(nlTestSuite * in
500521
NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 1);
501522

502523
// Wait for the 3rd retry to fail (should take 845-1056ms)
503-
ctx.GetIOContext().DriveIOUntil(1500_ms32 + retryBoosterTimeout, [&] { return loopback.mSentMessageCount >= 5; });
524+
ctx.GetIOContext().DriveIOUntil(1500_ms32, [&] { return loopback.mSentMessageCount >= 5; });
504525
now = System::SystemClock().GetMonotonicTimestamp();
505526
timeoutTime = now - startTime;
506527
ChipLogProgress(Test, "Attempt #4 Timeout : %" PRIu32 "ms", timeoutTime.count());
@@ -1845,25 +1866,12 @@ void TestReliableMessageProtocol::CheckLostStandaloneAck(nlTestSuite * inSuite,
18451866

18461867
void TestReliableMessageProtocol::CheckGetBackoff(nlTestSuite * inSuite, void * inContext)
18471868
{
1848-
// Run 3x iterations to thoroughly test random jitter always results in backoff within bounds.
1849-
for (uint32_t j = 0; j < 3; j++)
1850-
{
1851-
for (const auto & test : theBackoffComplianceTestVector)
1852-
{
1853-
System::Clock::Timeout backoff = ReliableMessageMgr::GetBackoff(test.backoffBase, test.sendCount);
1854-
ChipLogProgress(Test, "Backoff base %" PRIu32 " # %d: %" PRIu32, test.backoffBase.count(), test.sendCount,
1855-
backoff.count());
1856-
1857-
NL_TEST_ASSERT(inSuite, backoff >= test.backoffMin);
1869+
CheckGetBackoffImpl(inSuite, System::Clock::kZero);
1870+
}
18581871

1859-
auto maxBackoff = test.backoffMax + retryBoosterTimeout;
1860-
#if CHIP_CONFIG_ENABLE_ICD_SERVER == 1
1861-
// If running as an ICD, increase maxBackoff to account for the polling interval
1862-
maxBackoff += ICDConfigurationData::GetInstance().GetSlowPollingInterval();
1863-
#endif
1864-
NL_TEST_ASSERT(inSuite, backoff <= maxBackoff);
1865-
}
1866-
}
1872+
void TestReliableMessageProtocol::CheckGetBackoffAdditionalTime(nlTestSuite * inSuite, void * inContext)
1873+
{
1874+
CheckGetBackoffImpl(inSuite, System::Clock::Seconds32(1));
18671875
}
18681876

18691877
void TestReliableMessageProtocol::CheckApplicationResponseDelayed(nlTestSuite * inSuite, void * inContext)
@@ -2207,6 +2215,7 @@ const nlTest sTests[] = {
22072215
TestReliableMessageProtocol::CheckLostStandaloneAck),
22082216
NL_TEST_DEF("Test Is Peer Active Retry logic", TestReliableMessageProtocol::CheckIsPeerActiveNotInitiator),
22092217
NL_TEST_DEF("Test MRP backoff algorithm", TestReliableMessageProtocol::CheckGetBackoff),
2218+
NL_TEST_DEF("Test MRP backoff algorithm with additional time", TestReliableMessageProtocol::CheckGetBackoffAdditionalTime),
22102219
// TODO: Re-enable this test, after changing test to use Mock clock / DriveIO rather than DriveIOUntil.
22112220
// Issue: https://github.com/project-chip/connectedhomeip/issues/32440
22122221
// NL_TEST_DEF("Test an application response that comes after MRP retransmits run out",

src/platform/nrfconnect/CHIPPlatformConfig.h

+4-9
Original file line numberDiff line numberDiff line change
@@ -106,21 +106,16 @@
106106
#define CHIP_CONFIG_LOG_MODULE_Support_PROGRESS 0
107107
#endif
108108

109-
// Set MRP retry intervals for Thread and Wi-Fi to test-proven values.
110109
#ifndef CHIP_CONFIG_MRP_LOCAL_ACTIVE_RETRY_INTERVAL
111-
#if CHIP_ENABLE_OPENTHREAD
112-
#define CHIP_CONFIG_MRP_LOCAL_ACTIVE_RETRY_INTERVAL (800_ms32)
113-
#else
110+
#ifndef CONFIG_NET_L2_OPENTHREAD
114111
#define CHIP_CONFIG_MRP_LOCAL_ACTIVE_RETRY_INTERVAL (1000_ms32)
115-
#endif // CHIP_ENABLE_OPENTHREAD
112+
#endif // CONFIG_NET_L2_OPENTHREAD
116113
#endif // CHIP_CONFIG_MRP_LOCAL_ACTIVE_RETRY_INTERVAL
117114

118115
#ifndef CHIP_CONFIG_MRP_LOCAL_IDLE_RETRY_INTERVAL
119-
#if CHIP_ENABLE_OPENTHREAD
120-
#define CHIP_CONFIG_MRP_LOCAL_IDLE_RETRY_INTERVAL (800_ms32)
121-
#else
116+
#ifndef CONFIG_NET_L2_OPENTHREAD
122117
#define CHIP_CONFIG_MRP_LOCAL_IDLE_RETRY_INTERVAL (1000_ms32)
123-
#endif // CHIP_ENABLE_OPENTHREAD
118+
#endif // CONFIG_NET_L2_OPENTHREAD
124119
#endif // CHIP_CONFIG_MRP_LOCAL_IDLE_RETRY_INTERVAL
125120

126121
#ifndef CHIP_CONFIG_ICD_IDLE_MODE_DURATION_SEC

src/protocols/secure_channel/tests/TestPASESession.cpp

+7-4
Original file line numberDiff line numberDiff line change
@@ -305,11 +305,14 @@ void SecurePairingHandshakeTestCommon(nlTestSuite * inSuite, void * inContext, S
305305

306306
while (delegate.mMessageDropped)
307307
{
308-
auto waitTimeout = 100_ms + CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST;
308+
auto waitTimeout = 100_ms;
309309

310-
#if CHIP_CONFIG_ENABLE_ICD_SERVER == 1
311-
// If running as an ICD, increase waitTimeout to account for the polling interval
312-
waitTimeout += ICDConfigurationData::GetInstance().GetSlowPollingInterval();
310+
#if CHIP_CONFIG_ENABLE_ICD_SERVER
311+
// If running as an ICD, increase waitTimeout to account for:
312+
// - longer MRP intervals, configured above to 1s/1s,
313+
// - the fast-polling interval that is added to the MRP backoff time.
314+
waitTimeout += 2000_ms32;
315+
waitTimeout += ICDConfigurationData::GetInstance().GetFastPollingInterval();
313316
#endif
314317

315318
// Wait some time so the dropped message will be retransmitted when we drain the IO.

0 commit comments

Comments
 (0)