For thread devices throttle the response to BlockQuery by an interval specified in kBdxThrottleIntervalInMsecs so that we don't overload the network with frequent BDX messages

nivi-apple · nivi-apple · commit 4e943d0d1cdf · 2025-02-11T20:58:37.000-08:00
diff --git a/src/darwin/Framework/CHIP/MTRDeviceController_Concrete.h b/src/darwin/Framework/CHIP/MTRDeviceController_Concrete.h
@@ -223,6 +223,11 @@ NS_ASSUME_NONNULL_BEGIN
                             queue:(dispatch_queue_t)queue
                        completion:(void (^)(NSURL * _Nullable url, NSError * _Nullable error))completion;
 
+/**
+ * Returns YES if the MTRDevice corrresponding to the given node ID is a thread device, NO otherwise.
+ */
+- (BOOL)usesThreadForDevice:(chip::NodeId)nodeID;
+
 /**
  * Will return chip::kUndefinedFabricIndex if we do not have a fabric index.
  */
diff --git a/src/darwin/Framework/CHIP/MTRDeviceController_Concrete.mm b/src/darwin/Framework/CHIP/MTRDeviceController_Concrete.mm
@@ -1429,33 +1429,43 @@ - (BOOL)checkIsRunning:(NSError * __autoreleasing *)error
     return NO;
 }
 
-- (void)getSessionForNode:(chip::NodeId)nodeID completion:(MTRInternalDeviceConnectionCallback)completion
+- (BOOL)usesThreadForDevice:(chip::NodeId)nodeID
 {
-    // TODO: Figure out whether the synchronization here makes sense.  What
-    // happens if this call happens mid-suspend or mid-resume?
-    if (self.suspended) {
-        MTR_LOG_ERROR("%@ suspended: can't get session for node %016llX-%016llx (%llu)", self, self.compressedFabricID.unsignedLongLongValue, nodeID, nodeID);
-        // TODO: Can we do a better error here?
-        completion(nullptr, chip::NullOptional, [MTRError errorForCHIPErrorCode:CHIP_ERROR_INCORRECT_STATE], nil);
-        return;
+    if (nodeID == chip::kUndefinedNodeId)
+    {
+        return NO;
     }
 
-    // Get the corresponding MTRDevice object to determine if the case/subscription pool is to be used
+    // Get the corresponding MTRDevice object for the node id
     MTRDevice * device = [self deviceForNodeID:@(nodeID)];
 
     // TODO: Can we not just assume this isKindOfClass test is true?  Would be
     // really nice if we had compile-time checking for this somehow...
     if (![device isKindOfClass:MTRDevice_Concrete.class]) {
         MTR_LOG_ERROR("%@ somehow has %@ instead of MTRDevice_Concrete for node ID 0x%016llX (%llu)", self, device, nodeID, nodeID);
-        completion(nullptr, chip::NullOptional, [MTRError errorForCHIPErrorCode:CHIP_ERROR_INCORRECT_STATE], nil);
-        return;
+        return NO;
     }
 
     auto * concreteDevice = static_cast<MTRDevice_Concrete *>(device);
 
+    BOOL usesThread = [concreteDevice deviceUsesThread];
+    return usesThread;
+}
+
+- (void)getSessionForNode:(chip::NodeId)nodeID completion:(MTRInternalDeviceConnectionCallback)completion
+{
+    // TODO: Figure out whether the synchronization here makes sense.  What
+    // happens if this call happens mid-suspend or mid-resume?
+    if (self.suspended) {
+        MTR_LOG_ERROR("%@ suspended: can't get session for node %016llX-%016llx (%llu)", self, self.compressedFabricID.unsignedLongLongValue, nodeID, nodeID);
+        // TODO: Can we do a better error here?
+        completion(nullptr, chip::NullOptional, [MTRError errorForCHIPErrorCode:CHIP_ERROR_INCORRECT_STATE], nil);
+        return;
+    }
+
     // In the case that this device is known to use thread, queue this with subscription attempts as well, to
     // help with throttling Thread traffic.
-    if ([concreteDevice deviceUsesThread]) {
+    if ([self usesThreadForDevice:nodeID]) {
         MTRAsyncWorkItem * workItem = [[MTRAsyncWorkItem alloc] initWithQueue:dispatch_get_global_queue(QOS_CLASS_DEFAULT, 0)];
         [workItem setReadyHandler:^(id _Nonnull context, NSInteger retryCount, MTRAsyncWorkCompletionBlock _Nonnull workItemCompletion) {
             MTRInternalDeviceConnectionCallback completionWrapper = ^(chip::Messaging::ExchangeManager * _Nullable exchangeManager,
diff --git a/src/darwin/Framework/CHIP/MTROTAImageTransferHandler.h b/src/darwin/Framework/CHIP/MTROTAImageTransferHandler.h
@@ -74,6 +74,8 @@ class MTROTAImageTransferHandler : public chip::bdx::AsyncResponder {
     MTROTAImageTransferHandlerWrapper * mOTAImageTransferHandlerWrapper;
 
     bool mNeedToCallTransferSessionEnd = false;
+
+    bool mIsPeerNodeAThreadDevice = NO;
 };
 
 NS_ASSUME_NONNULL_END
diff --git a/src/darwin/Framework/CHIP/MTROTAImageTransferHandler.mm b/src/darwin/Framework/CHIP/MTROTAImageTransferHandler.mm
@@ -28,9 +28,16 @@
 
 constexpr uint32_t kMaxBdxBlockSize = 1024;
 
+constexpr double kMilliSecondsInSecond = 1000.0;
+
 // Timeout for the BDX transfer session. The OTA Spec mandates this should be >= 5 minutes.
 constexpr System::Clock::Timeout kBdxTimeout = System::Clock::Seconds16(5 * 60);
 
+// For thread devices, we need to throttle sending Blocks in response to BlockQuery messages
+// to avoid spamming the network with too many BDX messages. We are going to match the polling
+// interval of 50 ms as the time to wait before sending a Block in response to a BlockQuery.
+constexpr System::Clock::Timeout kBdxThrottleIntervalInMsecs = System::Clock::Milliseconds32(50);
+
 constexpr bdx::TransferRole kBdxRole = bdx::TransferRole::kSender;
 
 // An ARC-managed object that lets us do weak references to a MTROTAImageTransferHandler
@@ -78,6 +85,8 @@ - (instancetype)initWithMTROTAImageTransferHandler:(MTROTAImageTransferHandler *
     VerifyOrReturnError(mDelegate != nil, CHIP_ERROR_INCORRECT_STATE);
     VerifyOrReturnError(mDelegateNotificationQueue != nil, CHIP_ERROR_INCORRECT_STATE);
 
+    mIsPeerNodeAThreadDevice = [controller usesThreadForDevice:mPeer.GetNodeId()];
+
     BitFlags<bdx::TransferControlFlags> flags(bdx::TransferControlFlags::kReceiverDrive);
 
     return AsyncResponder::Init(mSystemLayer, exchangeCtx, kBdxRole, flags, kMaxBdxBlockSize, kBdxTimeout);
@@ -233,6 +242,11 @@ - (instancetype)initWithMTROTAImageTransferHandler:(MTROTAImageTransferHandler *
 {
     assertChipStackLockedByCurrentThread();
 
+    // For thread devices, we need to throttle sending the response to BlockQuery, if the query is processed, before kBdxThrottleIntervalInMsecs
+    // has elapsed to prevent the BDX messages spamming up the network. Get the timestamp at which we start processing the BlockQuery message.
+
+    __block uint64_t startBlockQueryHandlingTimestamp = chip::System::SystemClock().GetMonotonicMilliseconds64().count();
+
     auto blockSize = @(mTransfer.GetTransferBlockSize());
     auto blockIndex = @(mTransfer.GetNextBlockNum());
 
@@ -241,7 +255,7 @@ - (instancetype)initWithMTROTAImageTransferHandler:(MTROTAImageTransferHandler *
 
     MTROTAImageTransferHandlerWrapper * __weak weakWrapper = mOTAImageTransferHandlerWrapper;
 
-    auto completionHandler = ^(NSData * _Nullable data, BOOL isEOF) {
+    auto respondWithBlock = ^(NSData * _Nullable data, BOOL isEOF) {
         [controller
             asyncDispatchToMatterQueue:^() {
                 assertChipStackLockedByCurrentThread();
@@ -272,6 +286,35 @@ - (instancetype)initWithMTROTAImageTransferHandler:(MTROTAImageTransferHandler *
                           }];
     };
 
+    __block void (^completionHandler)(NSData * _Nullable data, BOOL isEOF) = nil;
+
+    // If the peer node is a Thread device, check how much time has elapsed since we started processing the BlockQuery.
+    // If the time elapsed is greater than kBdxThrottleIntervalInMsecs, call the completion handler to respond with a Block right away.
+    // If time elapsed is less than kBdxThrottleIntervalInMsecs, dispatch the completion handler to respond with a Block after kBdxThrottleIntervalInMsecs has elapsed.
+
+    if (mIsPeerNodeAThreadDevice)
+    {
+        completionHandler = ^(NSData * _Nullable data, BOOL isEOF) {
+            uint64_t timeElapsed = chip::System::SystemClock().GetMonotonicMilliseconds64().count() - startBlockQueryHandlingTimestamp;
+            if (timeElapsed >= kBdxThrottleIntervalInMsecs.count())
+            {
+                completionHandler = respondWithBlock;
+            }
+            else
+            {
+                double timeRemainingInSecs = (kBdxThrottleIntervalInMsecs.count() - timeElapsed) / kMilliSecondsInSecond;
+                dispatch_time_t time =  dispatch_time(DISPATCH_TIME_NOW, (int64_t)(timeRemainingInSecs * NSEC_PER_SEC));
+                dispatch_after(time, dispatch_get_main_queue(), ^{
+                    respondWithBlock(data, isEOF);
+                });
+            }
+        };
+    }
+    else
+    {
+        completionHandler = respondWithBlock;
+    }
+
     // TODO Handle MaxLength
 
     auto nodeId = @(mPeer.GetNodeId());