Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validation results refactoring: centralize results for different reused origins, use _pydantic_errors_to_validation_results more #1176

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 33 additions & 46 deletions dandi/files/bases.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from concurrent.futures import ThreadPoolExecutor, as_completed
from dataclasses import dataclass
from datetime import datetime
from functools import partial
import os
from pathlib import Path
import re
Expand Down Expand Up @@ -33,7 +34,21 @@
from dandi.pynwb_utils import validate as pynwb_validate
from dandi.support.digests import get_dandietag, get_digest
from dandi.utils import yaml_load
from dandi.validate_types import Scope, Severity, ValidationOrigin, ValidationResult
from dandi.validate_types import (
DandiValidationResult,
Scope,
Severity,
ValidationOrigin,
ValidationResult,
)

DandiSchemaValidationResult = partial(
ValidationResult,
origin=ValidationOrigin(
name="dandischema",
version=dandischema.__version__,
),
)

lgr = dandi.get_logger()

Expand Down Expand Up @@ -130,7 +145,9 @@ def get_validation_errors(
except Exception as e:
if devel_debug:
raise
return _pydantic_errors_to_validation_results(e, str(self.filepath))
return _pydantic_errors_to_validation_results(
[e], str(self.filepath), dandiset_path=self.dandiset_path
)
return []


Expand Down Expand Up @@ -180,23 +197,9 @@ def get_validation_errors(
except ValidationError as e:
if devel_debug:
raise
# TODO: how do we get **all** errors from validation - there must be a way
return [
ValidationResult(
origin=ValidationOrigin(
name="dandischema",
version=dandischema.__version__,
),
severity=Severity.ERROR,
id="dandischema.TODO",
scope=Scope.FILE,
# metadata=metadata,
path=self.filepath, # note that it is not relative .path
message=str(e),
# TODO? dataset_path=dataset_path,
dandiset_path=self.dandiset_path,
)
]
return _pydantic_errors_to_validation_results(
[e], str(self.filepath), dandiset_path=self.dandiset_path
)
except Exception as e:
if devel_debug:
raise
Expand All @@ -207,11 +210,7 @@ def get_validation_errors(
extra={"validating": True},
)
return [
ValidationResult(
origin=ValidationOrigin(
name="dandi",
version=dandi.__version__,
),
DandiSchemaValidationResult(
severity=Severity.ERROR,
id="dandi.SOFTWARE_ERROR",
scope=Scope.FILE,
Expand Down Expand Up @@ -540,18 +539,17 @@ def get_validation_errors(
if devel_debug:
raise
# TODO: might reraise instead of making it into an error
return _pydantic_errors_to_validation_results([e], str(self.filepath))
return _pydantic_errors_to_validation_results(
[e], str(self.filepath), dandiset_path=self.dandiset_path
)

from .bids import NWBBIDSAsset

if not isinstance(self, NWBBIDSAsset):
if self.dandiset_path is None:
errors.append(
ValidationResult(
DandiValidationResult(
id="DANDI.NO_DANDISET_FOUND",
origin=ValidationOrigin(
name="dandi", version=dandi.__version__
),
severity=Severity.ERROR,
scope=Scope.FILE,
path=self.filepath,
Expand Down Expand Up @@ -679,11 +677,7 @@ def _check_required_fields(
if not v or (isinstance(v, str) and not v.strip()):
message = f"Required field {f!r} has no value"
errors.append(
ValidationResult(
origin=ValidationOrigin(
name="dandischema",
version=dandischema.__version__,
),
DandiValidationResult(
severity=Severity.ERROR,
id="dandischema.requred_field",
scope=Scope.FILE,
Expand All @@ -694,11 +688,7 @@ def _check_required_fields(
if v in ("REQUIRED", "PLACEHOLDER"):
message = f"Required field {f!r} has value {v!r}"
errors.append(
ValidationResult(
origin=ValidationOrigin(
name="dandischema",
version=dandischema.__version__,
),
DandiValidationResult(
severity=Severity.WARNING,
id="dandischema.placeholder_value",
scope=Scope.FILE,
Expand Down Expand Up @@ -743,8 +733,9 @@ def _get_nwb_inspector_version():


def _pydantic_errors_to_validation_results(
errors: Any[list[dict], Exception],
errors: list[dict | Exception],
file_path: str,
dandiset_path: Optional[Path] = None,
) -> list[ValidationResult]:
"""Convert list of dict from pydantic into our custom object."""
out = []
Expand All @@ -767,18 +758,14 @@ def _pydantic_errors_to_validation_results(
message = e.get("message", None)
scope = Scope.DANDISET
out.append(
ValidationResult(
origin=ValidationOrigin(
name="dandischema",
version=dandischema.__version__,
),
DandiSchemaValidationResult(
severity=Severity.ERROR,
id=id,
scope=scope,
path=Path(file_path),
message=message,
# TODO? dataset_path=dataset_path,
# TODO? dandiset_path=dandiset_path,
dandiset_path=dandiset_path,
)
)
return out
27 changes: 12 additions & 15 deletions dandi/files/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from contextlib import closing
from dataclasses import dataclass, field, replace
from datetime import datetime
from functools import partial
import os
from pathlib import Path
from time import sleep
Expand Down Expand Up @@ -40,6 +41,14 @@

lgr = get_logger()

ZarrValidationResult = partial(
ValidationResult,
origin=ValidationOrigin(
name="zarr",
version=zarr.version.version,
),
)


@dataclass
class LocalZarrEntry(BasePath):
Expand Down Expand Up @@ -194,11 +203,7 @@ def get_validation_errors(
if devel_debug:
raise
return [
ValidationResult(
origin=ValidationOrigin(
name="zarr",
version=zarr.version.version,
),
ZarrValidationResult(
severity=Severity.ERROR,
id="zarr.cannot_open",
scope=Scope.FILE,
Expand All @@ -208,11 +213,7 @@ def get_validation_errors(
]
if isinstance(data, zarr.Group) and not data:
return [
ValidationResult(
origin=ValidationOrigin(
name="zarr",
version=zarr.version.version,
),
ZarrValidationResult(
severity=Severity.ERROR,
id="zarr.empty_group",
scope=Scope.FILE,
Expand All @@ -225,11 +226,7 @@ def get_validation_errors(
if devel_debug:
raise ValueError(msg)
return [
ValidationResult(
origin=ValidationOrigin(
name="zarr",
version=zarr.version.version,
),
ZarrValidationResult(
severity=Severity.ERROR,
id="zarr.tree_depth_exceeded",
scope=Scope.FILE,
Expand Down
13 changes: 5 additions & 8 deletions dandi/organize.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

import numpy as np

from . import __version__, get_logger
from . import get_logger
from .dandiset import Dandiset
from .exceptions import OrganizeImpossibleError
from .metadata import get_metadata
Expand All @@ -38,7 +38,7 @@
move_file,
yaml_load,
)
from .validate_types import Scope, Severity, ValidationOrigin, ValidationResult
from .validate_types import DandiValidationResult, Scope, Severity, ValidationResult

lgr = get_logger()

Expand Down Expand Up @@ -1061,9 +1061,8 @@ def validate_organized_path(
errors = []
if not re.fullmatch(ORGANIZED_FILENAME_REGEX, path.name):
errors.append(
ValidationResult(
DandiValidationResult(
id="DANDI.NON_DANDI_FILENAME",
origin=ValidationOrigin(name="dandi", version=__version__),
severity=Severity.ERROR,
scope=Scope.FILE,
path=filepath,
Expand All @@ -1077,9 +1076,8 @@ def validate_organized_path(
and re.fullmatch(ORGANIZED_FOLDER_REGEX, str(path.parent))
):
errors.append(
ValidationResult(
DandiValidationResult(
id="DANDI.NON_DANDI_FOLDERNAME",
origin=ValidationOrigin(name="dandi", version=__version__),
severity=Severity.ERROR,
scope=Scope.FOLDER,
path=filepath,
Expand All @@ -1093,9 +1091,8 @@ def validate_organized_path(
assert m
if str(path.parent) != m[0]:
errors.append(
ValidationResult(
DandiValidationResult(
id="DANDI.METADATA_MISMATCH_SUBJECT",
origin=ValidationOrigin(name="dandi", version=__version__),
severity=Severity.ERROR,
scope=Scope.FILE,
path=filepath,
Expand Down
20 changes: 8 additions & 12 deletions dandi/pynwb_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@
envvar="DANDI_CACHE",
)

PyNWBValidationOrigin = ValidationOrigin(
name="pynwb",
version=pynwb.__version__,
)


def _sanitize_nwb_version(v, filename=None, log=None):
"""Helper to sanitize the value of nwb_version where possible
Expand Down Expand Up @@ -350,10 +355,7 @@ def validate(
for error_output in error_outputs:
errors.append(
ValidationResult(
origin=ValidationOrigin(
name="pynwb",
version=pynwb.__version__,
),
origin=PyNWBValidationOrigin,
severity=Severity.WARNING,
id=f"pywnb.{error_output}",
scope=Scope.FILE,
Expand All @@ -367,10 +369,7 @@ def validate(
errors.extend(
[
ValidationResult(
origin=ValidationOrigin(
name="pynwb",
version=pynwb.__version__,
),
origin=PyNWBValidationOrigin,
severity=Severity.ERROR,
id="pywnb.GENERIC",
scope=Scope.FILE,
Expand Down Expand Up @@ -400,10 +399,7 @@ def validate(
for e in nwb_errors:
errors.append(
ValidationResult(
origin=ValidationOrigin(
name="pynwb",
version=pynwb.__version__,
),
origin=PyNWBValidationOrigin,
severity=Severity.ERROR,
id="pywnb.GENERIC",
scope=Scope.FILE,
Expand Down
12 changes: 12 additions & 0 deletions dandi/validate_types.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from dataclasses import dataclass
from enum import Enum
from functools import partial
from pathlib import Path
from typing import Dict, List, Optional

from . import __version__


@dataclass
class ValidationOrigin:
Expand Down Expand Up @@ -49,3 +52,12 @@ class ValidationResult:
path: Optional[Path] = None
path_regex: Optional[str] = None
severity: Optional[Severity] = None


#
# Our common result shortcut to avoid respecifying origin.
# More specific library ones could be defined closer to those libraries
# imports.
DandiValidationResult = partial(
ValidationResult, origin=ValidationOrigin(name="dandi", version=__version__)
)