Skip to content

Commit 012dfbc

Browse files
Make sure to cancel the timeout timer whenever we remove an MTRDownload from the downloads list. (#38123)
Fixes a few issues: 1) abortDownloadsForController used to not actually cancel the BDX transfer, as far as I can tell. Maybe that worked because controller shutdown would close the session/exchange, but better to cancel the transfer explicitly. 2) abortDownloadsForController could remove an MTRDownload from the list without canceling its timeout timer. Then when the timer fired it might use a now-deleted MTRDownload pointer. The fix is to move the timer management into MTRDownload and ensure that the timer is always canceled on remove:.
1 parent b7899bc commit 012dfbc

File tree

1 file changed

+77
-65
lines changed

1 file changed

+77
-65
lines changed

src/darwin/Framework/CHIP/MTRDiagnosticLogsDownloader.mm

+77-65
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ @interface MTRDownload : NSObject
5757
- (instancetype)initWithType:(MTRDiagnosticLogType)type
5858
fabricIndex:(NSNumber *)fabricIndex
5959
nodeID:(NSNumber *)nodeID
60+
timeout:(NSTimeInterval)timeout
6061
queue:(dispatch_queue_t)queue
6162
completion:(void (^)(NSURL * _Nullable url, NSError * _Nullable error))completion
6263
done:(void (^)(MTRDownload * finishedDownload))done;
@@ -69,8 +70,14 @@ - (BOOL)matches:(NSString *)fileDesignator
6970

7071
- (void)checkInteractionModelResponse:(MTRDiagnosticLogsClusterRetrieveLogsResponseParams * _Nullable)response error:(NSError * _Nullable)error;
7172

73+
// TODO: The lifetime management for these objects is very odd. Nothing
74+
// _really_ prevents a BDX transfer starting after a failure: call, for example.
75+
// We should move more of the state into a single place that will know the exact
76+
// state of the object.
7277
- (void)success;
73-
- (void)failure:(NSError * _Nullable)error;
78+
- (void)failure:(NSError *)error;
79+
- (void)cancelTimeoutTimer;
80+
- (void)abort:(NSError *)error;
7481
@end
7582

7683
@interface MTRDownloads : NSObject
@@ -83,6 +90,7 @@ - (MTRDownload * _Nullable)get:(NSString *)fileDesignator
8390
- (MTRDownload * _Nullable)add:(MTRDiagnosticLogType)type
8491
fabricIndex:(NSNumber *)fabricIndex
8592
nodeID:(NSNumber *)nodeID
93+
timeout:(NSTimeInterval)timeout
8694
queue:(dispatch_queue_t)queue
8795
completion:(void (^)(NSURL * _Nullable url, NSError * _Nullable error))completion
8896
done:(void (^)(MTRDownload * finishedDownload))done;
@@ -141,22 +149,38 @@ - (void)handleBDXTransferSessionEndForFileDesignator:(NSString *)fileDesignator
141149
CHIP_ERROR OnTransferEnd(chip::bdx::BDXTransferProxy * transfer, CHIP_ERROR error) override;
142150
CHIP_ERROR OnTransferData(chip::bdx::BDXTransferProxy * transfer, const chip::ByteSpan & data) override;
143151

144-
CHIP_ERROR StartBDXTransferTimeout(MTRDownload * download, uint16_t timeoutInSeconds);
145-
void CancelBDXTransferTimeout(MTRDownload * download);
146-
147152
private:
148-
static void OnTransferTimeout(chip::System::Layer * layer, void * context);
149153
MTRDiagnosticLogsDownloader * __weak mDelegate;
150154
};
151155

152156
@implementation MTRDownload
157+
158+
static void OnTransferTimeout(chip::System::Layer * layer, void * context)
159+
{
160+
assertChipStackLockedByCurrentThread();
161+
162+
auto * download = (__bridge MTRDownload *) context;
163+
VerifyOrReturn(nil != download);
164+
165+
auto * controller = [[MTRDeviceControllerFactory sharedInstance] runningControllerForFabricIndex:download.fabricIndex.unsignedCharValue];
166+
167+
MTR_LOG("%@ Diagnostic log transfer timed out for %016llX-%016llX (%llu), abortHandler: %@", download,
168+
controller.compressedFabricID.unsignedLongLongValue, download.nodeID.unsignedLongLongValue,
169+
download.nodeID.unsignedLongLongValue, download.abortHandler);
170+
171+
[download abort:[MTRError errorForCHIPErrorCode:CHIP_ERROR_TIMEOUT]];
172+
}
173+
153174
- (instancetype)initWithType:(MTRDiagnosticLogType)type
154175
fabricIndex:(NSNumber *)fabricIndex
155176
nodeID:(NSNumber *)nodeID
177+
timeout:(NSTimeInterval)timeout
156178
queue:(dispatch_queue_t)queue
157179
completion:(void (^)(NSURL * _Nullable url, NSError * _Nullable error))completion
158180
done:(void (^)(MTRDownload * finishedDownload))done;
159181
{
182+
assertChipStackLockedByCurrentThread();
183+
160184
self = [super init];
161185
if (self) {
162186
auto * fileDesignator = [self _toFileDesignatorString:type nodeID:nodeID];
@@ -184,6 +208,22 @@ - (instancetype)initWithType:(MTRDiagnosticLogType)type
184208
_fileURL = fileURL;
185209
_fileHandle = nil;
186210
_finalize = bdxTransferDone;
211+
212+
if (timeout <= 0) {
213+
timeout = 0;
214+
} else if (timeout > UINT16_MAX) {
215+
MTR_LOG("Warning: timeout is too large. It will be truncated to UINT16_MAX.");
216+
timeout = UINT16_MAX;
217+
}
218+
219+
if (timeout > 0) {
220+
CHIP_ERROR timerStartErr = chip::DeviceLayer::SystemLayer().StartTimer(chip::System::Clock::Seconds16(static_cast<uint16_t>(timeout)),
221+
OnTransferTimeout, (__bridge void *) self);
222+
if (timerStartErr != CHIP_NO_ERROR) {
223+
MTR_LOG_ERROR("Failed to start timer for diagnostic log download timeout");
224+
return nil;
225+
}
226+
}
187227
}
188228
return self;
189229
}
@@ -259,7 +299,7 @@ - (BOOL)matches:(NSString *)fileDesignator
259299
return [_fileDesignator isEqualToString:fileDesignator] && [_fabricIndex isEqualToNumber:fabricIndex] && [_nodeID isEqualToNumber:nodeID];
260300
}
261301

262-
- (void)failure:(NSError * _Nullable)error
302+
- (void)failure:(NSError *)error
263303
{
264304
MTR_LOG("%@ Diagnostic log transfer failure: %@", self, error);
265305
_finalize(error);
@@ -270,6 +310,29 @@ - (void)success
270310
_finalize(nil);
271311
}
272312

313+
- (void)cancelTimeoutTimer
314+
{
315+
assertChipStackLockedByCurrentThread();
316+
chip::DeviceLayer::SystemLayer().CancelTimer(OnTransferTimeout, (__bridge void *) self);
317+
}
318+
319+
- (void)abort:(NSError *)error
320+
{
321+
assertChipStackLockedByCurrentThread();
322+
323+
[self cancelTimeoutTimer];
324+
325+
// If there is no abortHandler, it means that the BDX transfer has not
326+
// started, so we can just call failure: directly.
327+
//
328+
// If there is an abortHandler, we need to call it to abort the transfer.
329+
if (self.abortHandler == nil) {
330+
[self failure:error];
331+
} else {
332+
self.abortHandler(error);
333+
}
334+
}
335+
273336
- (NSURL *)_toFileURL:(MTRDiagnosticLogType)type nodeID:(NSNumber *)nodeID
274337
{
275338
auto * dateFormatter = [[NSDateFormatter alloc] init];
@@ -344,13 +407,14 @@ - (MTRDownload * _Nullable)get:(NSString *)fileDesignator fabricIndex:(NSNumber
344407
- (MTRDownload * _Nullable)add:(MTRDiagnosticLogType)type
345408
fabricIndex:(NSNumber *)fabricIndex
346409
nodeID:(NSNumber *)nodeID
410+
timeout:(NSTimeInterval)timeout
347411
queue:(dispatch_queue_t)queue
348412
completion:(void (^)(NSURL * _Nullable url, NSError * _Nullable error))completion
349413
done:(void (^)(MTRDownload * finishedDownload))done
350414
{
351415
assertChipStackLockedByCurrentThread();
352416

353-
auto download = [[MTRDownload alloc] initWithType:type fabricIndex:fabricIndex nodeID:nodeID queue:queue completion:completion done:done];
417+
auto download = [[MTRDownload alloc] initWithType:type fabricIndex:fabricIndex nodeID:nodeID timeout:timeout queue:queue completion:completion done:done];
354418
VerifyOrReturnValue(nil != download, nil);
355419

356420
[_downloads addObject:download];
@@ -367,7 +431,10 @@ - (void)abortDownloadsForController:(MTRDeviceController_Concrete *)controller
367431
continue;
368432
}
369433

370-
[download failure:[MTRError errorForCHIPErrorCode:CHIP_ERROR_CANCELLED]];
434+
[download abort:[MTRError errorForCHIPErrorCode:CHIP_ERROR_CANCELLED]];
435+
// Remove directly instead of waiting for the async bits to catch up,
436+
// since those async bits might run after controller shutdown finishes
437+
// and then not be able to dispatch the async task to do the removal.
371438
[self remove:download];
372439
}
373440
}
@@ -376,6 +443,7 @@ - (void)remove:(MTRDownload *)download
376443
{
377444
assertChipStackLockedByCurrentThread();
378445

446+
[download cancelTimeoutTimer];
379447
[_downloads removeObject:download];
380448
}
381449
@end
@@ -423,29 +491,15 @@ - (void)downloadLogFromNodeWithID:(NSNumber *)nodeID
423491
// any existing ones so we can start this new one.
424492
[self abortDownloadsForController:controller];
425493

426-
uint16_t timeoutInSeconds = 0;
427-
if (timeout <= 0) {
428-
timeoutInSeconds = 0;
429-
} else if (timeout > UINT16_MAX) {
430-
MTR_LOG("Warning: timeout is too large. It will be truncated to UINT16_MAX.");
431-
timeoutInSeconds = UINT16_MAX;
432-
} else {
433-
timeoutInSeconds = static_cast<uint16_t>(timeout);
434-
}
435-
436494
// This block is always called when a download is finished.
437495
auto done = ^(MTRDownload * finishedDownload) {
438496
[controller asyncDispatchToMatterQueue:^() {
439497
[self->_downloads remove:finishedDownload];
440-
441-
if (timeoutInSeconds > 0) {
442-
self->_bridge->CancelBDXTransferTimeout(finishedDownload);
443-
}
444498
} errorHandler:nil];
445499
};
446500

447501
auto fabricIndex = @(controller.fabricIndex);
448-
auto download = [_downloads add:type fabricIndex:fabricIndex nodeID:nodeID queue:queue completion:completion done:done];
502+
auto download = [_downloads add:type fabricIndex:fabricIndex nodeID:nodeID timeout:timeout queue:queue completion:completion done:done];
449503
VerifyOrReturn(nil != download,
450504
dispatch_async(queue, ^{ completion(nil, [MTRError errorForCHIPErrorCode:CHIP_ERROR_INTERNAL]); }));
451505

@@ -463,11 +517,6 @@ - (void)downloadLogFromNodeWithID:(NSNumber *)nodeID
463517

464518
[cluster retrieveLogsRequestWithParams:params expectedValues:nil expectedValueInterval:nil completion:interactionModelDone];
465519

466-
if (timeoutInSeconds > 0) {
467-
auto err = _bridge->StartBDXTransferTimeout(download, timeoutInSeconds);
468-
VerifyOrReturn(CHIP_NO_ERROR == err, [download failure:[MTRError errorForCHIPErrorCode:err]]);
469-
}
470-
471520
MTR_LOG("%@ Started log download attempt for node %016llX-%016llX (%llu)", download,
472521
controller.compressedFabricID.unsignedLongLongValue, nodeID.unsignedLongLongValue, nodeID.unsignedLongLongValue);
473522
}
@@ -648,40 +697,3 @@ - (void)handleBDXTransferSessionEndForFileDesignator:(NSString *)fileDesignator
648697
completion:completionHandler];
649698
return CHIP_NO_ERROR;
650699
}
651-
652-
CHIP_ERROR DiagnosticLogsDownloaderBridge::StartBDXTransferTimeout(MTRDownload * download, uint16_t timeoutInSeconds)
653-
{
654-
assertChipStackLockedByCurrentThread();
655-
return chip::DeviceLayer::SystemLayer().StartTimer(chip::System::Clock::Seconds16(timeoutInSeconds), OnTransferTimeout, (__bridge void *) download);
656-
}
657-
658-
void DiagnosticLogsDownloaderBridge::CancelBDXTransferTimeout(MTRDownload * download)
659-
{
660-
assertChipStackLockedByCurrentThread();
661-
chip::DeviceLayer::SystemLayer().CancelTimer(OnTransferTimeout, (__bridge void *) download);
662-
}
663-
664-
void DiagnosticLogsDownloaderBridge::OnTransferTimeout(chip::System::Layer * layer, void * context)
665-
{
666-
assertChipStackLockedByCurrentThread();
667-
668-
auto * download = (__bridge MTRDownload *) context;
669-
VerifyOrReturn(nil != download);
670-
671-
auto * controller = [[MTRDeviceControllerFactory sharedInstance] runningControllerForFabricIndex:download.fabricIndex.unsignedCharValue];
672-
673-
MTR_LOG("%@ Diagnostic log transfer timed out for %016llX-%016llX (%llu), abortHandler: %@", download,
674-
controller.compressedFabricID.unsignedLongLongValue, download.nodeID.unsignedLongLongValue,
675-
download.nodeID.unsignedLongLongValue, download.abortHandler);
676-
677-
// If there is no abortHandler, it means that the BDX transfer has not started.
678-
// When a BDX transfer has started we need to abort the transfer and we would error out
679-
// at next poll. We would end up calling OnTransferEnd and eventually [download failure:error].
680-
// But if the transfer has not started we would stop right now.
681-
auto error = [MTRError errorForCHIPErrorCode:CHIP_ERROR_TIMEOUT];
682-
if (download.abortHandler == nil) {
683-
[download failure:error];
684-
} else {
685-
download.abortHandler(error);
686-
}
687-
}

0 commit comments

Comments
 (0)