Skip to content

Commit d666567

Browse files
authored
[Darwin] MTRDevice in-memory cache needs to be resilient to data store returning nil (project-chip#34182)
* [Darwin] MTRDevice in-memory cache needs to be resilient to data store returning nil * Added tests and address PR comment * Fixed log string * Additional logging * Fixed SubscriptionCallback object handling on invalidate and subscription reset * Fixed error string for MTRErrorTests/testErrorSourcePaths
1 parent 27c640c commit d666567

File tree

5 files changed

+255
-38
lines changed

5 files changed

+255
-38
lines changed

src/darwin/Framework/CHIP/MTRDevice.mm

+84-4
Original file line numberDiff line numberDiff line change
@@ -908,12 +908,17 @@ - (void)invalidate
908908
_reattemptingSubscription = NO;
909909

910910
[_deviceController asyncDispatchToMatterQueue:^{
911+
MTR_LOG("%@ invalidate disconnecting ReadClient and SubscriptionCallback", self);
912+
911913
// Destroy the read client and callback (has to happen on the Matter
912914
// queue, to avoid deleting objects that are being referenced), to
913915
// tear down the subscription. We will get no more callbacks from
914916
// the subscrption after this point.
915917
std::lock_guard lock(self->_lock);
916918
self->_currentReadClient = nullptr;
919+
if (self->_currentSubscriptionCallback) {
920+
delete self->_currentSubscriptionCallback;
921+
}
917922
self->_currentSubscriptionCallback = nullptr;
918923

919924
[self _changeInternalState:MTRInternalDeviceStateUnsubscribed];
@@ -940,6 +945,7 @@ - (void)nodeMayBeAdvertisingOperational
940945
// whether it might be.
941946
- (void)_triggerResubscribeWithReason:(NSString *)reason nodeLikelyReachable:(BOOL)nodeLikelyReachable
942947
{
948+
MTR_LOG("%@ _triggerResubscribeWithReason called with reason %@", self, reason);
943949
assertChipStackLockedByCurrentThread();
944950

945951
// We might want to trigger a resubscribe on our existing ReadClient. Do
@@ -1235,6 +1241,12 @@ - (void)_handleSubscriptionEstablished
12351241
- (void)_handleSubscriptionError:(NSError *)error
12361242
{
12371243
std::lock_guard lock(_lock);
1244+
[self _doHandleSubscriptionError:error];
1245+
}
1246+
1247+
- (void)_doHandleSubscriptionError:(NSError *)error
1248+
{
1249+
os_unfair_lock_assert_owner(&_lock);
12381250

12391251
[self _changeInternalState:MTRInternalDeviceStateUnsubscribed];
12401252
_unreportedEvents = nil;
@@ -1400,6 +1412,12 @@ - (void)_handleResubscriptionNeededWithDelay:(NSNumber *)resubscriptionDelayMs
14001412
- (void)_handleSubscriptionReset:(NSNumber * _Nullable)retryDelay
14011413
{
14021414
std::lock_guard lock(_lock);
1415+
[self _doHandleSubscriptionReset:retryDelay];
1416+
}
1417+
1418+
- (void)_doHandleSubscriptionReset:(NSNumber * _Nullable)retryDelay
1419+
{
1420+
os_unfair_lock_assert_owner(&_lock);
14031421

14041422
// If we are here, then either we failed to establish initial CASE, or we
14051423
// failed to send the initial SubscribeRequest message, or our ReadClient
@@ -1471,7 +1489,7 @@ - (void)_reattemptSubscriptionNowIfNeededWithReason:(NSString *)reason
14711489
return;
14721490
}
14731491

1474-
MTR_LOG("%@ reattempting subscription", self);
1492+
MTR_LOG("%@ reattempting subscription with reason %@", self, reason);
14751493
self.reattemptingSubscription = NO;
14761494
[self _setupSubscriptionWithReason:reason];
14771495
}
@@ -2100,6 +2118,22 @@ - (void)unitTestClearClusterData
21002118
}
21012119
#endif
21022120

2121+
- (void)_reconcilePersistedClustersWithStorage
2122+
{
2123+
os_unfair_lock_assert_owner(&self->_lock);
2124+
2125+
NSMutableSet * clusterPathsToRemove = [NSMutableSet set];
2126+
for (MTRClusterPath * clusterPath in _persistedClusters) {
2127+
MTRDeviceClusterData * data = [_deviceController.controllerDataStore getStoredClusterDataForNodeID:_nodeID endpointID:clusterPath.endpoint clusterID:clusterPath.cluster];
2128+
if (!data) {
2129+
[clusterPathsToRemove addObject:clusterPath];
2130+
}
2131+
}
2132+
2133+
MTR_LOG_ERROR("%@ Storage missing %lu / %lu clusters - reconciling in-memory records", self, static_cast<unsigned long>(clusterPathsToRemove.count), static_cast<unsigned long>(_persistedClusters.count));
2134+
[_persistedClusters minusSet:clusterPathsToRemove];
2135+
}
2136+
21032137
- (nullable MTRDeviceClusterData *)_clusterDataForPath:(MTRClusterPath *)clusterPath
21042138
{
21052139
os_unfair_lock_assert_owner(&self->_lock);
@@ -2132,8 +2166,16 @@ - (nullable MTRDeviceClusterData *)_clusterDataForPath:(MTRClusterPath *)cluster
21322166

21332167
// Page in the stored value for the data.
21342168
MTRDeviceClusterData * data = [_deviceController.controllerDataStore getStoredClusterDataForNodeID:_nodeID endpointID:clusterPath.endpoint clusterID:clusterPath.cluster];
2169+
MTR_LOG("%@ cluster path %@ cache miss - load from storage success %@", self, clusterPath, YES_NO(data));
21352170
if (data != nil) {
21362171
[_persistedClusterData setObject:data forKey:clusterPath];
2172+
} else {
2173+
// If clusterPath is in _persistedClusters and the data store returns nil for it, then the in-memory cache is now not dependable, and subscription should be reset and reestablished to reload cache from device
2174+
2175+
// First make sure _persistedClusters is consistent with storage, so repeated calls don't immediately re-trigger this
2176+
[self _reconcilePersistedClustersWithStorage];
2177+
2178+
[self _resetSubscriptionWithReasonString:[NSString stringWithFormat:@"Data store has no data for cluster %@", clusterPath]];
21372179
}
21382180

21392181
return data;
@@ -2303,13 +2345,43 @@ - (void)_stopConnectivityMonitoring
23032345
}
23042346
}
23052347

2348+
- (void)_resetSubscriptionWithReasonString:(NSString *)reasonString
2349+
{
2350+
os_unfair_lock_assert_owner(&self->_lock);
2351+
MTR_LOG_ERROR("%@ %@ - resetting subscription", self, reasonString);
2352+
2353+
[_deviceController asyncDispatchToMatterQueue:^{
2354+
MTR_LOG("%@ subscription reset disconnecting ReadClient and SubscriptionCallback", self);
2355+
2356+
std::lock_guard lock(self->_lock);
2357+
self->_currentReadClient = nullptr;
2358+
if (self->_currentSubscriptionCallback) {
2359+
delete self->_currentSubscriptionCallback;
2360+
}
2361+
self->_currentSubscriptionCallback = nullptr;
2362+
2363+
[self _doHandleSubscriptionError:nil];
2364+
// Use nil reset delay so that this keeps existing backoff timing
2365+
[self _doHandleSubscriptionReset:nil];
2366+
}
2367+
errorHandler:nil];
2368+
}
2369+
2370+
#ifdef DEBUG
2371+
- (void)unitTestResetSubscription
2372+
{
2373+
std::lock_guard lock(self->_lock);
2374+
[self _resetSubscriptionWithReasonString:@"Unit test reset subscription"];
2375+
}
2376+
#endif
2377+
23062378
// assume lock is held
23072379
- (void)_setupSubscriptionWithReason:(NSString *)reason
23082380
{
23092381
os_unfair_lock_assert_owner(&self->_lock);
23102382

23112383
if (![self _subscriptionsAllowed]) {
2312-
MTR_LOG("%@ _setupSubscription: Subscriptions not allowed. Do not set up subscription", self);
2384+
MTR_LOG("%@ _setupSubscription: Subscriptions not allowed. Do not set up subscription (reason: %@)", self, reason);
23132385
return;
23142386
}
23152387

@@ -2328,6 +2400,7 @@ - (void)_setupSubscriptionWithReason:(NSString *)reason
23282400

23292401
// for now just subscribe once
23302402
if (!NeedToStartSubscriptionSetup(_internalDeviceState)) {
2403+
MTR_LOG("%@ setupSubscription: no need to subscribe due to internal state %lu (reason: %@)", self, static_cast<unsigned long>(_internalDeviceState), reason);
23312404
return;
23322405
}
23332406

@@ -3758,14 +3831,21 @@ - (void)_storePersistedDeviceData
37583831
}
37593832

37603833
#ifdef DEBUG
3761-
- (MTRDeviceClusterData *)_getClusterDataForPath:(MTRClusterPath *)path
3834+
- (MTRDeviceClusterData *)unitTestGetClusterDataForPath:(MTRClusterPath *)path
37623835
{
37633836
std::lock_guard lock(_lock);
37643837

37653838
return [[self _clusterDataForPath:path] copy];
37663839
}
37673840

3768-
- (BOOL)_clusterHasBeenPersisted:(MTRClusterPath *)path
3841+
- (NSSet<MTRClusterPath *> *)unitTestGetPersistedClusters
3842+
{
3843+
std::lock_guard lock(_lock);
3844+
3845+
return [_persistedClusters copy];
3846+
}
3847+
3848+
- (BOOL)unitTestClusterHasBeenPersisted:(MTRClusterPath *)path
37693849
{
37703850
std::lock_guard lock(_lock);
37713851

0 commit comments

Comments
 (0)