Skip to content

Commit

Permalink
Avoid importing numpy
Browse files Browse the repository at this point in the history
  • Loading branch information
jwodder committed Nov 10, 2023
1 parent b314be5 commit 3d267b2
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 6 deletions.
5 changes: 4 additions & 1 deletion dandi/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
from .dandiset import Dandiset
from .exceptions import NotFoundError
from .files import LocalAsset, find_dandi_files
from .support.digests import get_digest, get_zarr_checksum
from .support.iterators import IteratorWithAggregation
from .support.pyout import naturalsize
from .utils import (
Expand Down Expand Up @@ -552,6 +551,8 @@ def _download_file(
possible checksums or other digests provided for the file. Only one
will be used to verify download
"""
from .support.digests import get_digest

if op.lexists(path):
annex_path = op.join(toplevel_path, ".git", "annex")
if existing is DownloadExisting.ERROR:
Expand Down Expand Up @@ -854,6 +855,8 @@ def _download_zarr(
lock: Lock,
jobs: int | None = None,
) -> Iterator[dict]:
from .support.digests import get_zarr_checksum

download_gens = {}
entries = list(asset.iterfiles())
digests = {}
Expand Down
5 changes: 4 additions & 1 deletion dandi/files/bases.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
from dandi.dandiapi import RemoteAsset, RemoteDandiset, RESTFullAPIClient
from dandi.metadata.core import get_default_metadata
from dandi.misctypes import DUMMY_DANDI_ETAG, Digest, LocalReadableFile, P
from dandi.support.digests import get_dandietag, get_digest
from dandi.utils import yaml_load
from dandi.validate_types import Scope, Severity, ValidationOrigin, ValidationResult

Expand Down Expand Up @@ -300,6 +299,8 @@ def get_metadata(

def get_digest(self) -> Digest:
"""Calculate a dandi-etag digest for the asset"""
from dandi.support.digests import get_digest

value = get_digest(self.filepath, digest="dandi-etag")
return Digest.dandi_etag(value)

Expand Down Expand Up @@ -330,6 +331,8 @@ def iter_upload(
``"done"`` and an ``"asset"`` key containing the resulting
`RemoteAsset`.
"""
from dandi.support.digests import get_dandietag

asset_path = metadata.setdefault("path", self.path)
client = dandiset.client
yield {"status": "calculating etag"}
Expand Down
20 changes: 17 additions & 3 deletions dandi/files/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
from dandischema.digests.zarr import get_checksum
from dandischema.models import BareAsset, DigestType
import requests
import zarr
from zarr_checksum import ZarrChecksumTree

from dandi import get_logger
from dandi.consts import (
Expand All @@ -34,7 +32,6 @@
)
from dandi.metadata.core import get_default_metadata
from dandi.misctypes import DUMMY_DANDI_ZARR_CHECKSUM, BasePath, Digest
from dandi.support.digests import get_digest, get_zarr_checksum, md5file_nocache
from dandi.utils import chunked, exclude_from_zarr, pluralize

from .bases import LocalDirectoryAsset
Expand Down Expand Up @@ -95,6 +92,9 @@ def get_digest(self) -> Digest:
directory, the algorithm will be the Dandi Zarr checksum algorithm; if
it is a file, it will be MD5.
"""

from dandi.support.digests import get_digest, get_zarr_checksum

if self.is_dir():
return Digest.dandi_zarr(get_zarr_checksum(self.filepath))
else:
Expand Down Expand Up @@ -151,6 +151,8 @@ def stat(self) -> ZarrStat:
"""Return various details about the Zarr asset"""

def dirstat(dirpath: LocalZarrEntry) -> ZarrStat:
from dandi.support.digests import md5file_nocache

size = 0
dir_md5s = {}
file_md5s = {}
Expand All @@ -175,6 +177,8 @@ def dirstat(dirpath: LocalZarrEntry) -> ZarrStat:

def get_digest(self) -> Digest:
"""Calculate a dandi-zarr-checksum digest for the asset"""
from dandi.support.digests import get_zarr_checksum

return Digest.dandi_zarr(get_zarr_checksum(self.filepath))

def get_metadata(
Expand All @@ -192,6 +196,8 @@ def get_validation_errors(
schema_version: str | None = None,
devel_debug: bool = False,
) -> list[ValidationResult]:
import zarr

errors: list[ValidationResult] = []
try:
data = zarr.open(str(self.filepath))
Expand Down Expand Up @@ -281,6 +287,10 @@ def iter_upload(
``"done"`` and an ``"asset"`` key containing the resulting
`RemoteAsset`.
"""
# Importing zarr_checksum leads to importing numpy, which we want to
# avoid unless necessary
from zarr_checksum import ZarrChecksumTree

# So that older clients don't get away with doing the wrong thing once
# Zarr upload to embargoed Dandisets is implemented in the API:
if dandiset.embargo_status is EmbargoStatus.EMBARGOED:
Expand Down Expand Up @@ -584,6 +594,8 @@ def register(self, e: LocalZarrEntry, digest: str | None = None) -> None:

@staticmethod
def _mkitem(e: LocalZarrEntry) -> UploadItem:
from dandi.support.digests import md5file_nocache

digest = md5file_nocache(e.filepath)
return UploadItem.from_entry(e, digest)

Expand Down Expand Up @@ -634,6 +646,8 @@ def upload_request(self) -> dict[str, str]:
def _cmp_digests(
asset_path: str, local_entry: LocalZarrEntry, remote_digest: str
) -> tuple[LocalZarrEntry, str, bool]:
from dandi.support.digests import md5file_nocache

local_digest = md5file_nocache(local_entry.filepath)
if local_digest != remote_digest:
lgr.debug(
Expand Down
9 changes: 8 additions & 1 deletion dandi/support/digests.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
"""Provides helper to compute digests (md5 etc) on files
"""

# Importing this module imports fscacher, which imports joblib, which imports
# numpy, which is a "heavy" import, so avoid importing this module at the top
# level of a module.

from __future__ import annotations

from collections.abc import Callable
Expand All @@ -20,7 +24,6 @@

from dandischema.digests.dandietag import DandiETag
from fscacher import PersistentCache
from zarr_checksum import ZarrChecksumTree

from .threaded_walk import threaded_walk
from ..utils import Hasher, exclude_from_zarr
Expand Down Expand Up @@ -101,6 +104,10 @@ def get_zarr_checksum(path: Path, known: dict[str, str] | None = None) -> str:
passed in the ``known`` argument, which must be a `dict` mapping
slash-separated paths relative to the root of the Zarr to hex digests.
"""
# Importing zarr_checksum leads to importing numpy, which we want to avoid
# unless necessary
from zarr_checksum import ZarrChecksumTree

if path.is_file():
s = get_digest(path, "md5")
assert isinstance(s, str)
Expand Down

0 comments on commit 3d267b2

Please sign in to comment.