diff --git a/dandi/files/bases.py b/dandi/files/bases.py index 56dc9f642..abd6ad4af 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -6,6 +6,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from dataclasses import dataclass from datetime import datetime +from functools import partial import os from pathlib import Path import re @@ -33,7 +34,21 @@ from dandi.pynwb_utils import validate as pynwb_validate from dandi.support.digests import get_dandietag, get_digest from dandi.utils import yaml_load -from dandi.validate_types import Scope, Severity, ValidationOrigin, ValidationResult +from dandi.validate_types import ( + DandiValidationResult, + Scope, + Severity, + ValidationOrigin, + ValidationResult, +) + +DandiSchemaValidationResult = partial( + ValidationResult, + origin=ValidationOrigin( + name="dandischema", + version=dandischema.__version__, + ), +) lgr = dandi.get_logger() @@ -130,7 +145,9 @@ def get_validation_errors( except Exception as e: if devel_debug: raise - return _pydantic_errors_to_validation_results(e, str(self.filepath)) + return _pydantic_errors_to_validation_results( + [e], str(self.filepath), dandiset_path=self.dandiset_path + ) return [] @@ -180,23 +197,9 @@ def get_validation_errors( except ValidationError as e: if devel_debug: raise - # TODO: how do we get **all** errors from validation - there must be a way - return [ - ValidationResult( - origin=ValidationOrigin( - name="dandischema", - version=dandischema.__version__, - ), - severity=Severity.ERROR, - id="dandischema.TODO", - scope=Scope.FILE, - # metadata=metadata, - path=self.filepath, # note that it is not relative .path - message=str(e), - # TODO? dataset_path=dataset_path, - dandiset_path=self.dandiset_path, - ) - ] + return _pydantic_errors_to_validation_results( + [e], str(self.filepath), dandiset_path=self.dandiset_path + ) except Exception as e: if devel_debug: raise @@ -207,11 +210,7 @@ def get_validation_errors( extra={"validating": True}, ) return [ - ValidationResult( - origin=ValidationOrigin( - name="dandi", - version=dandi.__version__, - ), + DandiSchemaValidationResult( severity=Severity.ERROR, id="dandi.SOFTWARE_ERROR", scope=Scope.FILE, @@ -540,18 +539,17 @@ def get_validation_errors( if devel_debug: raise # TODO: might reraise instead of making it into an error - return _pydantic_errors_to_validation_results([e], str(self.filepath)) + return _pydantic_errors_to_validation_results( + [e], str(self.filepath), dandiset_path=self.dandiset_path + ) from .bids import NWBBIDSAsset if not isinstance(self, NWBBIDSAsset): if self.dandiset_path is None: errors.append( - ValidationResult( + DandiValidationResult( id="DANDI.NO_DANDISET_FOUND", - origin=ValidationOrigin( - name="dandi", version=dandi.__version__ - ), severity=Severity.ERROR, scope=Scope.FILE, path=self.filepath, @@ -679,11 +677,7 @@ def _check_required_fields( if not v or (isinstance(v, str) and not v.strip()): message = f"Required field {f!r} has no value" errors.append( - ValidationResult( - origin=ValidationOrigin( - name="dandischema", - version=dandischema.__version__, - ), + DandiValidationResult( severity=Severity.ERROR, id="dandischema.requred_field", scope=Scope.FILE, @@ -694,11 +688,7 @@ def _check_required_fields( if v in ("REQUIRED", "PLACEHOLDER"): message = f"Required field {f!r} has value {v!r}" errors.append( - ValidationResult( - origin=ValidationOrigin( - name="dandischema", - version=dandischema.__version__, - ), + DandiValidationResult( severity=Severity.WARNING, id="dandischema.placeholder_value", scope=Scope.FILE, @@ -743,8 +733,9 @@ def _get_nwb_inspector_version(): def _pydantic_errors_to_validation_results( - errors: Any[list[dict], Exception], + errors: list[dict | Exception], file_path: str, + dandiset_path: Optional[Path] = None, ) -> list[ValidationResult]: """Convert list of dict from pydantic into our custom object.""" out = [] @@ -767,18 +758,14 @@ def _pydantic_errors_to_validation_results( message = e.get("message", None) scope = Scope.DANDISET out.append( - ValidationResult( - origin=ValidationOrigin( - name="dandischema", - version=dandischema.__version__, - ), + DandiSchemaValidationResult( severity=Severity.ERROR, id=id, scope=scope, path=Path(file_path), message=message, # TODO? dataset_path=dataset_path, - # TODO? dandiset_path=dandiset_path, + dandiset_path=dandiset_path, ) ) return out diff --git a/dandi/files/zarr.py b/dandi/files/zarr.py index 212877bdb..c71b58c4d 100644 --- a/dandi/files/zarr.py +++ b/dandi/files/zarr.py @@ -6,6 +6,7 @@ from contextlib import closing from dataclasses import dataclass, field, replace from datetime import datetime +from functools import partial import os from pathlib import Path from time import sleep @@ -40,6 +41,14 @@ lgr = get_logger() +ZarrValidationResult = partial( + ValidationResult, + origin=ValidationOrigin( + name="zarr", + version=zarr.version.version, + ), +) + @dataclass class LocalZarrEntry(BasePath): @@ -194,11 +203,7 @@ def get_validation_errors( if devel_debug: raise return [ - ValidationResult( - origin=ValidationOrigin( - name="zarr", - version=zarr.version.version, - ), + ZarrValidationResult( severity=Severity.ERROR, id="zarr.cannot_open", scope=Scope.FILE, @@ -208,11 +213,7 @@ def get_validation_errors( ] if isinstance(data, zarr.Group) and not data: return [ - ValidationResult( - origin=ValidationOrigin( - name="zarr", - version=zarr.version.version, - ), + ZarrValidationResult( severity=Severity.ERROR, id="zarr.empty_group", scope=Scope.FILE, @@ -225,11 +226,7 @@ def get_validation_errors( if devel_debug: raise ValueError(msg) return [ - ValidationResult( - origin=ValidationOrigin( - name="zarr", - version=zarr.version.version, - ), + ZarrValidationResult( severity=Severity.ERROR, id="zarr.tree_depth_exceeded", scope=Scope.FILE, diff --git a/dandi/organize.py b/dandi/organize.py index 7c3ee4fd3..cca79ecb6 100644 --- a/dandi/organize.py +++ b/dandi/organize.py @@ -16,7 +16,7 @@ import numpy as np -from . import __version__, get_logger +from . import get_logger from .dandiset import Dandiset from .exceptions import OrganizeImpossibleError from .metadata import get_metadata @@ -38,7 +38,7 @@ move_file, yaml_load, ) -from .validate_types import Scope, Severity, ValidationOrigin, ValidationResult +from .validate_types import DandiValidationResult, Scope, Severity, ValidationResult lgr = get_logger() @@ -1061,9 +1061,8 @@ def validate_organized_path( errors = [] if not re.fullmatch(ORGANIZED_FILENAME_REGEX, path.name): errors.append( - ValidationResult( + DandiValidationResult( id="DANDI.NON_DANDI_FILENAME", - origin=ValidationOrigin(name="dandi", version=__version__), severity=Severity.ERROR, scope=Scope.FILE, path=filepath, @@ -1077,9 +1076,8 @@ def validate_organized_path( and re.fullmatch(ORGANIZED_FOLDER_REGEX, str(path.parent)) ): errors.append( - ValidationResult( + DandiValidationResult( id="DANDI.NON_DANDI_FOLDERNAME", - origin=ValidationOrigin(name="dandi", version=__version__), severity=Severity.ERROR, scope=Scope.FOLDER, path=filepath, @@ -1093,9 +1091,8 @@ def validate_organized_path( assert m if str(path.parent) != m[0]: errors.append( - ValidationResult( + DandiValidationResult( id="DANDI.METADATA_MISMATCH_SUBJECT", - origin=ValidationOrigin(name="dandi", version=__version__), severity=Severity.ERROR, scope=Scope.FILE, path=filepath, diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index ca92f544e..667efc2ab 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -47,6 +47,11 @@ envvar="DANDI_CACHE", ) +PyNWBValidationOrigin = ValidationOrigin( + name="pynwb", + version=pynwb.__version__, +) + def _sanitize_nwb_version(v, filename=None, log=None): """Helper to sanitize the value of nwb_version where possible @@ -350,10 +355,7 @@ def validate( for error_output in error_outputs: errors.append( ValidationResult( - origin=ValidationOrigin( - name="pynwb", - version=pynwb.__version__, - ), + origin=PyNWBValidationOrigin, severity=Severity.WARNING, id=f"pywnb.{error_output}", scope=Scope.FILE, @@ -367,10 +369,7 @@ def validate( errors.extend( [ ValidationResult( - origin=ValidationOrigin( - name="pynwb", - version=pynwb.__version__, - ), + origin=PyNWBValidationOrigin, severity=Severity.ERROR, id="pywnb.GENERIC", scope=Scope.FILE, @@ -400,10 +399,7 @@ def validate( for e in nwb_errors: errors.append( ValidationResult( - origin=ValidationOrigin( - name="pynwb", - version=pynwb.__version__, - ), + origin=PyNWBValidationOrigin, severity=Severity.ERROR, id="pywnb.GENERIC", scope=Scope.FILE, diff --git a/dandi/validate_types.py b/dandi/validate_types.py index bd2790018..df1813da1 100644 --- a/dandi/validate_types.py +++ b/dandi/validate_types.py @@ -1,8 +1,11 @@ from dataclasses import dataclass from enum import Enum +from functools import partial from pathlib import Path from typing import Dict, List, Optional +from . import __version__ + @dataclass class ValidationOrigin: @@ -49,3 +52,12 @@ class ValidationResult: path: Optional[Path] = None path_regex: Optional[str] = None severity: Optional[Severity] = None + + +# +# Our common result shortcut to avoid respecifying origin. +# More specific library ones could be defined closer to those libraries +# imports. +DandiValidationResult = partial( + ValidationResult, origin=ValidationOrigin(name="dandi", version=__version__) +)