Skip to content

Commit c3eedb7

Browse files
Change: Only embed specs/namespaces for types that are included in NWB file on export (#615)
* Only include namespaces for types that are included in NWB file on export (Issue #607) * Add functionality for installing extensions * Minor fixes * Update comment * Add comment + print message when extension has been installed * Update installExtension.m * Fix changed variable name * Update matnwb_createNwbInstallExtension.m Update docstring * Create listNwbTypeHierarchy.m Add utility function for listing the type hierarchy of an nwb type * Add private method for embedding specifications to file on export * Fix variable name * Add workflow for updating nwbInstallExtension * Add option to save extension in custom location * Create InstallExtensionTest.m * Update docstring * Change dispExtensionInfo to return info instead of displaying + add test * Reorganize code into separate functions and add tests * Minor changes to improve test coverage * add nwbInstallExtension to docs * Update update_extension_list.yml Add schedule event for workflow to update nwbInstallExtension * Update downloadExtensionRepository.m Remove local function * Update docstring for nwbInstallExtension * Fix docstring indentation in nwbInstallExtension * Add doc pages describing how to use (ndx) extensions * Fix typo * Update +tests/+unit/InstallExtensionTest.m Co-authored-by: Ben Dichter <ben.dichter@gmail.com> * Update docs/source/pages/getting_started/using_extensions/generating_extension_api.rst Co-authored-by: Ben Dichter <ben.dichter@gmail.com> * Add docstrings for functions to retrieve and list extension info * Fix docstring formatting/whitespace * Update listExtensions.m Add example to docstring * Move static test methods into io.internal.h5 namespace Introduce some functions that will be useful later * Update writeEmbeddedSpecifications.m Add arguments block, fix function name * Add validateEmbeddedSpecifications * Update NwbFile.m Redefine listNwbTypes method, add validation of embedded namespaces * Create listEmbeddedSpecNamespaces.m * Update nwbExportTest.m * Update test for spec/namespace embedding * Update read_indexed_column.m * Add disclaimer in deleteGroup function * Update read_indexed_column.m * Fix broken test * add test-requirement * Fix: Ensure object is group before deleting * Fix error id * Add unittests for functions in io.internal.h5 namespace * Update nwbExportTest.m Added comments and a better test to test for warning with ID 'NWB:validators:MissingEmbeddedNamespace' * Fix failing tests --------- Co-authored-by: Ben Dichter <ben.dichter@gmail.com>
1 parent 1a2f696 commit c3eedb7

26 files changed

+1362
-46
lines changed

+io/+internal/+h5/deleteAttribute.m

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
function deleteAttribute(fileReference, objectLocation, attributeName)
2+
% deleteAttribute - Delete the specified attribute from an NWB file
3+
4+
arguments
5+
fileReference {io.internal.h5.mustBeH5FileReference}
6+
objectLocation (1,1) string
7+
attributeName (1,1) string
8+
end
9+
10+
objectLocation = io.internal.h5.validateLocation(objectLocation);
11+
12+
% Open the HDF5 file in read-write mode
13+
[fileId, fileCleanupObj] = io.internal.h5.resolveFileReference(fileReference, "w"); %#ok<ASGLU>
14+
15+
% Open the object (dataset or group)
16+
[objectId, objectCleanupObj] = io.internal.h5.openObject(fileId, objectLocation); %#ok<ASGLU>
17+
18+
% Delete the attribute
19+
H5A.delete(objectId, attributeName);
20+
end

+io/+internal/+h5/deleteGroup.m

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
function deleteGroup(fileReference, groupLocation)
2+
% deleteGroup - Delete the specified group from an NWB file
3+
%
4+
% NB NB NB: Deleting groups & datasets from an HDF5 file does not free up space
5+
%
6+
% HDF5 files use a structured format to store data in hierarchical groups and
7+
% datasets. Internally, the file maintains a structure similar to a filesystem,
8+
% with metadata pointing to the actual data blocks.
9+
%
10+
% Implication: When you delete a group or dataset in an HDF5 file, the metadata
11+
% entries for that group or dataset are removed, so they are no longer accessible.
12+
% However, the space previously occupied by the actual data is not reclaimed or
13+
% reused by default. This is because HDF5 does not automatically reorganize or
14+
% compress the file when items are deleted.
15+
16+
arguments
17+
fileReference {io.internal.h5.mustBeH5FileReference}
18+
groupLocation (1,1) string
19+
end
20+
21+
groupLocation = io.internal.h5.validateLocation(groupLocation);
22+
23+
% Open the HDF5 file in read-write mode
24+
[fileId, fileCleanupObj] = io.internal.h5.resolveFileReference(fileReference, "w"); %#ok<ASGLU>
25+
26+
[objectId, objectCleanupObj] = io.internal.h5.openObject(fileId, groupLocation); %#ok<ASGLU>
27+
objInfo = H5O.get_info(objectId);
28+
clear objectCleanupObj
29+
30+
if objInfo.type == H5ML.get_constant_value('H5O_TYPE_GROUP')
31+
% Delete the group
32+
H5L.delete(fileId, groupLocation, 'H5P_DEFAULT');
33+
else
34+
error('NWB:DeleteGroup:NotAGroup', ...
35+
'The h5 object in location "%s" is not a group', groupLocation)
36+
end
37+
end

+io/+internal/+h5/listGroupNames.m

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
function groupNames = listGroupNames(fileReference, h5Location)
2+
3+
arguments
4+
fileReference {io.internal.h5.mustBeH5FileReference}
5+
h5Location (1,1) string
6+
end
7+
8+
[fileId, fileCleanupObj] = io.internal.h5.resolveFileReference(fileReference); %#ok<ASGLU>
9+
10+
% Open the specified location (group)
11+
[groupId, groupCleanupObj] = io.internal.h5.openGroup(fileId, h5Location); %#ok<ASGLU>
12+
13+
% Use H5L.iterate to iterate over the links
14+
[~, ~, groupNames] = H5L.iterate(...
15+
groupId, "H5_INDEX_NAME", "H5_ITER_INC", 0, @collectGroupNames, {});
16+
17+
% Define iteration function
18+
function [status, groupNames] = collectGroupNames(groupId, name, groupNames)
19+
% Only retrieve name of groups
20+
objId = H5O.open(groupId, name, 'H5P_DEFAULT');
21+
objInfo = H5O.get_info(objId);
22+
if objInfo.type == H5ML.get_constant_value('H5O_TYPE_GROUP')
23+
groupNames{end+1} = name;
24+
end
25+
H5O.close(objId);
26+
status = 0; % Continue iteration
27+
end
28+
end

+io/+internal/+h5/mustBeH5File.m

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
function mustBeH5File(value)
2+
arguments
3+
value {mustBeFile}
4+
end
5+
6+
VALID_FILE_ENDING = ["h5", "nwb"];
7+
validExtensions = "." + VALID_FILE_ENDING;
8+
9+
hasH5Extension = endsWith(value, validExtensions, 'IgnoreCase', true);
10+
11+
if ~hasH5Extension
12+
exception = MException(...
13+
'NWB:validators:mustBeH5File', ...
14+
'Expected file "%s" to have .h5 or .nwb file extension', value);
15+
throwAsCaller(exception)
16+
end
17+
end
+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
function mustBeH5FileReference(value)
2+
arguments
3+
value {mustBeA(value, ["char", "string", "H5ML.id"])}
4+
end
5+
6+
if isa(value, "char") || isa(value, "string")
7+
try
8+
io.internal.h5.mustBeH5File(value)
9+
catch ME
10+
throwAsCaller(ME)
11+
end
12+
else
13+
% value is a H5ML.id, ok!
14+
end
15+
end

+io/+internal/+h5/openFile.m

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
function [fileId, fileCleanupObj] = openFile(fileName, permission)
2+
% openFile Opens an HDF5 file with the specified permissions and ensures cleanup.
3+
%
4+
% [fileId, fileCleanupObj] = io.internal.h5.openFile(fileName) opens the HDF5
5+
% file specified by fileName in read-only mode ('r') by default.
6+
%
7+
% [fileId, fileCleanupObj] = io.internal.h5.openFile(fileName, permission)
8+
% opens the HDF5 file specified by fileName with the access mode defined by
9+
% permission.
10+
%
11+
% Input Arguments:
12+
% fileName - A string or character vector specifying the path to the
13+
% HDF5 file. This must be a .h5 or .nwb file.
14+
%
15+
% permission - (Optional) A scalar string specifying the file access mode.
16+
% Valid values are "r" for read-only (default) and "w" for
17+
% read-write.
18+
%
19+
% Output Arguments:
20+
% fileId - The file identifier returned by H5F.open, used to
21+
% reference the open file.
22+
%
23+
% fileCleanupObj - A cleanup object (onCleanup) that ensures the file is
24+
% closed automatically when fileCleanupObj goes out of
25+
% scope.
26+
%
27+
% Example:
28+
% [fid, cleanupObj] = io.internal.h5.openFile("data.h5", "w");
29+
% % Use fid for file operations.
30+
% % When cleanupObj is cleared or goes out of scope, the file is
31+
% % automatically closed.
32+
33+
arguments
34+
fileName {io.internal.h5.mustBeH5File}
35+
permission (1,1) string {mustBeMember(permission, ["r", "w"])} = "r"
36+
end
37+
38+
switch permission
39+
case "r"
40+
accessFlag = 'H5F_ACC_RDONLY';
41+
case "w"
42+
accessFlag = 'H5F_ACC_RDWR';
43+
end
44+
fileId = H5F.open(fileName, accessFlag, 'H5P_DEFAULT');
45+
fileCleanupObj = onCleanup(@(fid) H5F.close(fileId));
46+
end

+io/+internal/+h5/openGroup.m

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
function [groupId, groupCleanupObj] = openGroup(fileId, h5Location)
2+
% openGroup Opens an HDF5 group at given location and ensures cleanup.
3+
4+
arguments
5+
fileId {mustBeA(fileId, "H5ML.id")}
6+
h5Location (1,1) string
7+
end
8+
9+
% Open the specified location (group)
10+
groupLocation = io.internal.h5.validateLocation(h5Location);
11+
groupId = H5G.open(fileId, groupLocation);
12+
groupCleanupObj = onCleanup(@(gid) H5G.close(groupId));
13+
end

+io/+internal/+h5/openObject.m

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
function [objectId, objectCleanupObj] = openObject(fileId, objectLocation)
2+
% openObject Opens an HDF5 object at given location and ensures cleanup.
3+
4+
arguments
5+
fileId {mustBeA(fileId, "H5ML.id")}
6+
objectLocation (1,1) string
7+
end
8+
9+
% Open the object (dataset or group)
10+
objectLocation = io.internal.h5.validateLocation(objectLocation);
11+
objectId = H5O.open(fileId, objectLocation, 'H5P_DEFAULT');
12+
objectCleanupObj = onCleanup(@(oid) H5O.close(objectId));
13+
end
+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
function [h5FileId, fileCleanupObj] = resolveFileReference(fileReference, permission)
2+
% resolveFileReference - Resolve a file reference to a H5 File ID.
3+
%
4+
% Utility method to resolve a file reference, which can be either a
5+
% filepath or a file id for a h5 file.
6+
%
7+
% The returned value will always be a file ID. This allows functions that
8+
% does operations on h5 files to receive either a file path or a file id
9+
%
10+
% Note: If the file reference is a file ID for an open file, the permission
11+
% might be different than the provided/requested permission.
12+
13+
arguments
14+
fileReference {io.internal.h5.mustBeH5FileReference}
15+
permission (1,1) string {mustBeMember(permission, ["r", "w"])} = "r"
16+
end
17+
18+
if isa(fileReference, "char") || isa(fileReference, "string")
19+
% Need to open the file
20+
if isfile(fileReference)
21+
[h5FileId, fileCleanupObj] = io.internal.h5.openFile(fileReference, permission);
22+
else
23+
error('File "%s" does not exist', fileReference)
24+
end
25+
else
26+
h5FileId = fileReference;
27+
% If the file is already open, we are not responsible for closing it
28+
fileCleanupObj = [];
29+
end
30+
end

+io/+internal/+h5/validateLocation.m

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
function locationName = validateLocation(locationName)
2+
arguments
3+
locationName (1,1) string
4+
end
5+
6+
if ~startsWith(locationName, "/")
7+
locationName = "/" + locationName;
8+
end
9+
end
+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
function namespaceNames = listEmbeddedSpecNamespaces(fileReference)
2+
3+
arguments
4+
fileReference {io.internal.h5.mustBeH5FileReference}
5+
end
6+
7+
[fileId, fileCleanupObj] = io.internal.h5.resolveFileReference(fileReference); %#ok<ASGLU>
8+
9+
specLocation = io.spec.internal.readEmbeddedSpecLocation(fileId);
10+
namespaceNames = io.internal.h5.listGroupNames(fileId, specLocation);
11+
end
+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
function validateEmbeddedSpecifications(h5_file_id, expectedNamespaceNames)
2+
% validateEmbeddedSpecifications - Validate the embedded specifications
3+
%
4+
% This function does two things:
5+
% 1) Displays a warning if specifications of expected namespaces
6+
% are not embedded in the file.
7+
% E.g if cached namespaces were cleared prior to export.
8+
%
9+
% 2) Deletes specifications for unused namespaces that are embedded.
10+
% - E.g. If neurodata type from an embedded namespace was removed and the
11+
% file was re-exported
12+
13+
% NB: Input h5_file_id must point to a file opened with write access
14+
15+
specLocation = io.spec.internal.readEmbeddedSpecLocation(h5_file_id);
16+
embeddedNamespaceNames = io.internal.h5.listGroupNames(h5_file_id, specLocation);
17+
18+
checkMissingNamespaces(expectedNamespaceNames, embeddedNamespaceNames)
19+
20+
unusedNamespaces = checkUnusedNamespaces(...
21+
expectedNamespaceNames, embeddedNamespaceNames);
22+
23+
if ~isempty(unusedNamespaces)
24+
deleteUnusedNamespaces(h5_file_id, unusedNamespaces, specLocation)
25+
end
26+
end
27+
28+
function checkMissingNamespaces(expectedNamespaceNames, embeddedNamespaceNames)
29+
% checkMissingNamespaces - Check if any namespace specs are missing from the file
30+
missingNamespaces = setdiff(expectedNamespaceNames, embeddedNamespaceNames);
31+
if ~isempty(missingNamespaces)
32+
missingNamespacesStr = strjoin(" " + string(missingNamespaces), newline);
33+
warning('NWB:validators:MissingEmbeddedNamespace', 'Namespace is missing:\n%s', missingNamespacesStr)
34+
end
35+
end
36+
37+
function unusedNamespaces = checkUnusedNamespaces(expectedNamespaceNames, embeddedNamespaceNames)
38+
% checkUnusedNamespaces - Check if any namespace specs in the file are unused
39+
unusedNamespaces = setdiff(embeddedNamespaceNames, expectedNamespaceNames);
40+
end
41+
42+
function deleteUnusedNamespaces(fileId, unusedNamespaces, specRootLocation)
43+
for i = 1:numel(unusedNamespaces)
44+
thisName = unusedNamespaces{i};
45+
namespaceSpecLocation = strjoin( {specRootLocation, thisName}, '/');
46+
io.internal.h5.deleteGroup(fileId, namespaceSpecLocation)
47+
end
48+
end

+io/+spec/writeEmbeddedSpecifications.m

+9-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,11 @@
11
function writeEmbeddedSpecifications(fid, jsonSpecs)
2+
% writeEmbeddedSpecifications - Write schema specifications to an NWB file
3+
4+
arguments
5+
fid % File id for a h5 file
6+
jsonSpecs % String representation of schema specifications in json format
7+
end
8+
29
specLocation = io.spec.internal.readEmbeddedSpecLocation(fid);
310

411
if isempty(specLocation)
@@ -37,8 +44,8 @@ function writeEmbeddedSpecifications(fid, jsonSpecs)
3744
function versionNames = getVersionNames(namespaceGroupId)
3845
[~, ~, versionNames] = H5L.iterate(namespaceGroupId,...
3946
'H5_INDEX_NAME', 'H5_ITER_NATIVE',...
40-
0, @removeGroups, {});
41-
function [status, versionNames] = removeGroups(~, name, versionNames)
47+
0, @appendName, {});
48+
function [status, versionNames] = appendName(~, name, versionNames)
4249
versionNames{end+1} = name;
4350
status = 0;
4451
end
+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
function parentTypeNames = listNwbTypeHierarchy(nwbTypeName)
2+
% listNwbTypeHierarchy - List the NWB type hierarchy for an NWB type
3+
arguments
4+
nwbTypeName (1,1) string
5+
end
6+
7+
parentTypeNames = string.empty; % Initialize an empty cell array
8+
currentType = nwbTypeName; % Start with the specific type
9+
10+
while ~strcmp(currentType, 'types.untyped.MetaClass')
11+
parentTypeNames(end+1) = currentType; %#ok<AGROW>
12+
13+
% Use MetaClass information to get the parent type
14+
metaClass = meta.class.fromName(currentType);
15+
if isempty(metaClass.SuperclassList)
16+
break; % Reached the base type
17+
end
18+
% NWB parent type should always be the first superclass in the list
19+
currentType = metaClass.SuperclassList(1).Name;
20+
end
21+
end

0 commit comments

Comments
 (0)