diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4a8ce44d4..506d3d2ac 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -25,8 +25,10 @@ jobs: steps: - name: Set up environment uses: actions/checkout@v1 - with: # no need for the history - fetch-depth: 1 + with: + # Fetch all commits so that versioneer will return something compatible + # with semantic-version + fetch-depth: 0 - name: Set up Python ${{ matrix.python }} uses: actions/setup-python@v1 with: diff --git a/dandi/cli/cmd_download.py b/dandi/cli/cmd_download.py index 2e4d7ebc8..373461542 100644 --- a/dandi/cli/cmd_download.py +++ b/dandi/cli/cmd_download.py @@ -55,7 +55,7 @@ ) @click.argument("url", nargs=-1) @map_to_click_exceptions -def download(url, output_dir, existing, jobs=6, format="pytout"): +def download(url, output_dir, existing, jobs, format): """Download a file or entire folder from DANDI""" # First boring attempt at click commands being merely an interface to # Python function diff --git a/dandi/cli/formatter.py b/dandi/cli/formatter.py index 6dc28253a..dd851531b 100644 --- a/dandi/cli/formatter.py +++ b/dandi/cli/formatter.py @@ -45,9 +45,10 @@ def __init__(self, out=None): self.records = [] def __exit__(self, exc_type, exc_value, traceback): - import yaml + import ruamel.yaml - self.out.write(yaml.dump(self.records)) + yaml = ruamel.yaml.YAML() + yaml.dump(self.records, self.out) def __call__(self, rec): self.records.append(rec) diff --git a/dandi/cli/tests/test_ls.py b/dandi/cli/tests/test_ls.py index 19f29fd4b..d04f52f61 100644 --- a/dandi/cli/tests/test_ls.py +++ b/dandi/cli/tests/test_ls.py @@ -2,6 +2,7 @@ import pytest from ..command import ls +from ...utils import yaml_load @pytest.mark.parametrize("format", ("auto", "json", "json_pp", "yaml", "pyout")) @@ -17,10 +18,9 @@ def test_smoke(simple1_nwb_metadata, simple1_nwb, format): load = json.loads elif format == "yaml": - import yaml def load(s): - obj = yaml.load(s, Loader=yaml.BaseLoader) + obj = yaml_load(s, typ="base") assert len(obj) == 1 # will be a list with a single elem return obj[0] diff --git a/dandi/conftest.py b/dandi/conftest.py index 8a791c1e7..e07338088 100644 --- a/dandi/conftest.py +++ b/dandi/conftest.py @@ -1 +1 @@ -from .tests.fixtures import * +from .tests.fixtures import * # noqa: F401, F403 diff --git a/dandi/consts.py b/dandi/consts.py index 2e1da5f96..eab3d3fbe 100644 --- a/dandi/consts.py +++ b/dandi/consts.py @@ -75,9 +75,7 @@ dandiset_metadata_file = "dandiset.yaml" dandiset_identifier_regex = "^[0-9]{6}$" -dandi_instance = namedtuple( - "dandi_instance", ("girder", "gui", "redirector", "dandiapi") -) +dandi_instance = namedtuple("dandi_instance", ("girder", "gui", "redirector", "api")) known_instances = { "local-girder-only": dandi_instance( @@ -118,7 +116,7 @@ # Chunk size when iterating a download (and upload) body. Taken from girder-cli # TODO: should we make them smaller for download than for upload? # ATM used only in download -MAX_CHUNK_SIZE = os.environ.get("DANDI_MAX_CHUNK_SIZE", 1024 * 1024 * 8) # 64 +MAX_CHUNK_SIZE = int(os.environ.get("DANDI_MAX_CHUNK_SIZE", 1024 * 1024 * 8)) # 64 # # Some routes diff --git a/dandi/dandiapi.py b/dandi/dandiapi.py index e123c1aad..930c56794 100644 --- a/dandi/dandiapi.py +++ b/dandi/dandiapi.py @@ -9,16 +9,6 @@ lgr = get_logger() -class HTTPError(requests.HTTPError): - """An HTTP error occurred. - - Following Girder's recommendation of having its HttpError deprecated, - this is just a helper to bring that HTTPError into our space - """ - - pass - - # Following class is loosely based on GirderClient, with authentication etc # being stripped. # TODO: add copyright/license info @@ -56,10 +46,13 @@ def session(self, session=None): """ self._session = session if session else requests.Session() - yield self._session - - self._session.close() - self._session = None + try: + yield self._session + finally: + # close only if we started a new one + if not session: + self._session.close() + self._session = None def _request_func(self, method): if self._session is not None: @@ -141,8 +134,8 @@ def send_request( ) # If success, return the json object. Otherwise throw an exception. - if result.status_code not in (200, 201): - raise HTTPError( + if not result.ok: + raise requests.HTTPError( f"Error {result.status_code} while sending {method} request to {url}", response=result, ) @@ -234,9 +227,7 @@ def get_asset(self, dandiset_id, version, uuid): def get_dandiset(self, dandiset_id, version): return self.get(f"/dandisets/{dandiset_id}/versions/{version}/") - def get_dandiset_assets( - self, dandiset_id, version, location=None, page_size=None, include_metadata=True - ): + def get_dandiset_assets(self, dandiset_id, version, location=None, page_size=None): """A generator to provide asset records """ if location is not None: @@ -297,13 +288,11 @@ def get_dandiset_assets( r["modified"] = ensure_datetime(uploaded_mtime) yield r - def get_dandiset_and_assets( - self, dandiset_id, version, location=None, include_metadata=True - ): + def get_dandiset_and_assets(self, dandiset_id, version, location=None): """This is pretty much an adapter to provide "harmonized" output in both - girder and dandiapi clients. + girder and DANDI api clients. - Harmonization should happen toward "dandiapi" BUT AFAIK it is still influx + Harmonization should happen toward DADNDI API BUT AFAIK it is still influx """ # Fun begins! location_ = "/" + location if location else "" @@ -320,9 +309,7 @@ def get_dandiset_and_assets( # Get dandiset information dandiset = self.get_dandiset(dandiset_id, version) # TODO: location - assets = self.get_dandiset_assets( - dandiset_id, version, location=location, include_metadata=include_metadata - ) + assets = self.get_dandiset_assets(dandiset_id, version, location=location) return dandiset, assets def get_download_file_iter( diff --git a/dandi/dandiset.py b/dandi/dandiset.py index b00379722..c9b0c11cd 100644 --- a/dandi/dandiset.py +++ b/dandi/dandiset.py @@ -1,11 +1,10 @@ """Classes/utilities for support of a dandiset""" import os -import yaml from pathlib import Path from .consts import dandiset_metadata_file -from .utils import find_parent_directory_containing, yaml_dump +from .utils import find_parent_directory_containing, yaml_dump, yaml_load from . import get_logger @@ -42,7 +41,7 @@ def _load_metadata(self): with open(self._metadata_file_obj) as f: # TODO it would cast 000001 if not explicitly string into # an int -- we should prevent it... probably with some custom loader - self.metadata = yaml.safe_load(f) + self.metadata = yaml_load(f, typ="safe") else: self.metadata = None @@ -70,15 +69,9 @@ def update_metadata(self, meta): lgr.debug("No updates to metadata, returning") return - # We will use ruaml to load/save it - # Seems to be too tricky to add new entries, etc so we will - # just resort to explicitly adding the header while saving - # import ruamel.yaml - # yaml = ruamel.yaml.YAML() # defaults to round-trip if no parameters - # given if self._metadata_file_obj.exists(): with open(self._metadata_file_obj) as f: - rec = yaml.safe_load(f) + rec = yaml_load(f, typ="safe") else: rec = {} diff --git a/dandi/download.py b/dandi/download.py index 8dca651c5..6c39d7b2a 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -20,7 +20,7 @@ ) from .dandiset import Dandiset from .exceptions import FailedToConnectError, NotFoundError, UnknownURLError -from .utils import flattened, is_same_time +from .utils import flattened, is_same_time, get_instance import humanize from .support.pyout import naturalsize @@ -44,7 +44,7 @@ class _dandi_url_parser: # - 'only' - would interrupt if no redirection happens # server_type: # - 'girder' - the default/old - # - 'dandiapi' - the "new" (as of 20200715 state of various PRs) + # - 'api' - the "new" (as of 20200715 state of various PRs) # rewrite: # - callable -- which would rewrite that "URI" "DANDI:": {"rewrite": lambda x: "https://identifiers.org/" + x}, @@ -69,7 +69,7 @@ class _dandi_url_parser: "/(?P([.0-9]{5,}|draft))" "(/files(\\?location=(?P.*)?)?)?" f"(/files(\\?_id={id_grp}(&_modelType=folder)?)?)?" - "$": {"server_type": "dandiapi"}, + "$": {"server_type": "api"}, # https://deploy-preview-341--gui-dandiarchive-org.netlify.app/#/dandiset/000006/draft # (no API yet) "https?://.*": {"handle_redirect": "only"}, @@ -77,12 +77,12 @@ class _dandi_url_parser: # We might need to remap some assert_types map_asset_types = {"dandiset": "folder"} # And lets create our mapping into girder instances from known_instances: - map_to = {"girder": {}, "dandiapi": {}} - for girder, gui, redirector, dandiapi in known_instances.values(): # noqa: F402 + map_to = {"girder": {}, "api": {}} + for girder, gui, redirector, api in known_instances.values(): # noqa: F402 for h in (gui, redirector): if h: map_to["girder"][h] = girder - map_to["dandiapi"][h] = dandiapi + map_to["api"][h] = api @classmethod def parse(cls, url, *, map_instance=True): @@ -165,7 +165,7 @@ def parse(cls, url, *, map_instance=True): settings["map_instance"], ", ".join(known_instances) ) ) - known_instance = known_instances[settings["map_instance"]] + known_instance = get_instance(settings["map_instance"]) # for consistency, add server = getattr(known_instance, server_type) if not server.endswith("/"): @@ -205,7 +205,7 @@ def parse(cls, url, *, map_instance=True): i.split("+")[1] for i in groups["multiitem"].split("/") if i ] asset_type = "item" - elif server_type == "dandiapi": + elif server_type == "api": asset_type = groups.get("asset_type") dandiset_id = groups.get("dandiset_id") version = groups.get("version") @@ -270,123 +270,33 @@ def download(urls, output_dir, *, format="pyout", existing="error", jobs=1): import pyout from .support import pyout as pyouts - class ItemsSummary: - def __init__(self): - self.files = 0 - self.t0 = None # when first record is seen - self.size = 0 - self.has_unknown_sizes = False - - def __call__(self, rec, prior=None): - assert prior in (None, self) - if not self.files: - self.t0 = time.time() - self.files += 1 - size = rec.get("size") - if size is not None: - self.size += size - elif rec.get("path", "") == "dandiset.yaml": - # again -- so special. TODO: make it a proper file - pass - else: - self.has_unknown_sizes = True - return self - # dandi.cli.formatters are used in cmd_ls to provide switchable pyout_style = pyouts.get_style(hide_if_missing=False) rec_fields = ("path", "size", "done", "done%", "checksum", "status", "message") out = pyout.Tabular(style=pyout_style, columns=rec_fields, max_workers=jobs) - # Establish "fancy" download while still possibly traversing the dandiset - # functionality. - from .support.iterators import IteratorWithAggregation - - items_summary = ItemsSummary() - it = IteratorWithAggregation( - # unfortunately Yarik missed the point that we need to wrap - # "assets" generator within downloader_generator - # so we do not have assets here! Ad-hoc solution for now is to - # pass this beast so it could get .gen set within downloader_generator - None, # download_generator(urls, output_dir, existing=existing), - items_summary, - ) - - def agg_files(*ignored): - ret = str(items_summary.files) - if not it.finished: - ret += "+" - return ret - - def agg_size(sizes): - """Formatter for "size" column where it would show - - how much is "active" (or done) - +how much yet to be "shown". - """ - active = sum(sizes) - if (active, items_summary.size) == (0, 0): - return "" - v = [naturalsize(active)] - if not it.finished or ( - active != items_summary.size or items_summary.has_unknown_sizes - ): - extra = items_summary.size - active - if extra < 0: - lgr.debug("Extra size %d < 0 -- must not happen", extra) - else: - extra_str = "+%s" % naturalsize(extra) - if not it.finished: - extra_str = ">" + extra_str - if items_summary.has_unknown_sizes: - extra_str += "+?" - v.append(extra_str) - return v - - def agg_done(done_sizes): - """Formatter for "DONE" column - """ - done = sum(done_sizes) - if it.finished and done == 0 and items_summary.size == 0: - # even with 0s everywhere consider it 100% - r = 1.0 - elif items_summary.size: - r = done / items_summary.size - else: - r = 0 - pref = "" - if not it.finished: - pref += "<" - if items_summary.has_unknown_sizes: - pref += "?" - v = [naturalsize(done), "%s%.2f%%" % (pref, 100 * r)] - if done and items_summary.t0 is not None and r and items_summary.size != 0: - dt = time.time() - items_summary.t0 - more_time = dt / r if r != 1 else 0 - more_time_str = humanize.naturaldelta(more_time) - if not it.finished: - more_time_str += "<" - if items_summary.has_unknown_sizes: - more_time_str += "+?" - if more_time: - v.append("ETA: %s" % more_time_str) - return v - + out_helper = PYOUTHelper() pyout_style["done"] = pyout_style["size"].copy() - pyout_style["size"]["aggregate"] = agg_size - pyout_style["done"]["aggregate"] = agg_done + pyout_style["size"]["aggregate"] = out_helper.agg_size + pyout_style["done"]["aggregate"] = out_helper.agg_done # I thought I was making a beautiful flower but ended up with cacti # which never blooms... All because assets are looped through inside download_generator # TODO: redo - kw = dict(assets_it=it) + kw = dict(assets_it=out_helper.it) if jobs > 1 and format == "pyout": # It could handle delegated to generator downloads kw["yield_generator_for_fields"] = rec_fields[1:] # all but path gen_ = download_generator(urls, output_dir, existing=existing, **kw) - # TODO: redo frontends similarly to how command_ls did it + # TODOs: + # - redo frontends similarly to how command_ls did it + # - have a single loop with analysis of `rec` to either any file + # has failed to download. If any was: exception should probably be + # raised. API discussion for Python side of API: + # if format == "debug": for rec in gen_: print(rec) @@ -436,7 +346,7 @@ def download_generator( # We could later try to "dandi_authenticate" if run into permission issues. # May be it could be not just boolean but the "id" to be used? # TODO: remove whenever API starts to support drafts in an unknown version - if server_type == "dandiapi" and asset_id.get("version") == "draft": + if server_type == "api" and asset_id.get("version") == "draft": asset_id, asset_type, client, server_type = _map_to_girder(url) args = asset_id, asset_type elif server_type == "girder": @@ -444,7 +354,7 @@ def download_generator( server_url, authenticate=False, progressbars=True # TODO: redo all this ) args = asset_id, asset_type - elif server_type == "dandiapi": + elif server_type == "api": client = DandiAPIClient(server_url) args = (asset_id["dandiset_id"], asset_id["version"], asset_id.get("location")) else: @@ -491,7 +401,7 @@ def download_generator( if server_type == "girder": down_args = (asset["id"],) digests = digests_from_metadata - elif server_type == "dandiapi": + elif server_type == "api": # Even worse to get them from the asset record which also might have its return # record still changed, https://github.com/dandi/dandi-publish/issues/79 down_args = args[:2] + (asset["uuid"],) @@ -541,9 +451,129 @@ def download_generator( yield dict(resp, path=path) +class ItemsSummary: + """A helper "structure" to accumulate information about assets to be downloaded + + To be used as a callback to IteratorWithAggregation + """ + + def __init__(self): + self.files = 0 + # TODO: get rid of needing it + self.t0 = None # when first record is seen + self.size = 0 + self.has_unknown_sizes = False + + def as_dict(self): + return {a: getattr(self, a) for a in ("files", "size", "has_unknown_sizes")} + + def __call__(self, rec, prior=None): + assert prior in (None, self) + if not self.files: + self.t0 = time.time() + self.files += 1 + size = rec.get("size") + if size is not None: + self.size += size + elif rec.get("path", "") == "dandiset.yaml": + # again -- so special. TODO: make it a proper file + pass + else: + self.has_unknown_sizes = True + return self + + +class PYOUTHelper: + """Helper for PYOUT styling + + Provides aggregation callbacks for PyOUT and also an iterator to be wrapped around + iterating over assets, so it would get "totals" as soon as they are available. + """ + + def __init__(self): + # Establish "fancy" download while still possibly traversing the dandiset + # functionality. + from .support.iterators import IteratorWithAggregation + + self.items_summary = ItemsSummary() + self.it = IteratorWithAggregation( + # unfortunately Yarik missed the point that we need to wrap + # "assets" generator within downloader_generator + # so we do not have assets here! Ad-hoc solution for now is to + # pass this beast so it could get .gen set within downloader_generator + None, # download_generator(urls, output_dir, existing=existing), + self.items_summary, + ) + + def agg_files(self, *ignored): + ret = str(self.items_summary.files) + if not self.it.finished: + ret += "+" + return ret + + def agg_size(self, sizes): + """Formatter for "size" column where it would show + + how much is "active" (or done) + +how much yet to be "shown". + """ + active = sum(sizes) + if (active, self.items_summary.size) == (0, 0): + return "" + v = [naturalsize(active)] + if not self.it.finished or ( + active != self.items_summary.size or self.items_summary.has_unknown_sizes + ): + extra = self.items_summary.size - active + if extra < 0: + lgr.debug("Extra size %d < 0 -- must not happen", extra) + else: + extra_str = "+%s" % naturalsize(extra) + if not self.it.finished: + extra_str = ">" + extra_str + if self.items_summary.has_unknown_sizes: + extra_str += "+?" + v.append(extra_str) + return v + + def agg_done(self, done_sizes): + """Formatter for "DONE" column + """ + done = sum(done_sizes) + if self.it.finished and done == 0 and self.items_summary.size == 0: + # even with 0s everywhere consider it 100% + r = 1.0 + elif self.items_summary.size: + r = done / self.items_summary.size + else: + r = 0 + pref = "" + if not self.it.finished: + pref += "<" + if self.items_summary.has_unknown_sizes: + pref += "?" + v = [naturalsize(done), "%s%.2f%%" % (pref, 100 * r)] + if ( + done + and self.items_summary.t0 is not None + and r + and self.items_summary.size != 0 + ): + dt = time.time() - self.items_summary.t0 + more_time = dt / r if r != 1 else 0 + more_time_str = humanize.naturaldelta(more_time) + if not self.it.finished: + more_time_str += "<" + if self.items_summary.has_unknown_sizes: + more_time_str += "+?" + if more_time: + v.append("ETA: %s" % more_time_str) + return v + + def _map_to_girder(url): """ - "draft" datasets are not yet supported through dandiapi. So we need to + "draft" datasets are not yet supported through our DANDI API. So we need to perform special handling for now: discover girder_id for it and then proceed with girder """ @@ -576,7 +606,7 @@ def _map_to_girder(url): return asset_id, asset_type, client, server_type -def skip_file(msg): +def _skip_file(msg): return {"status": "skipped", "message": str(msg)} @@ -590,7 +620,7 @@ def _populate_dandiset_yaml(dandiset_path, metadata, overwrite): yield {"message": "updating"} lgr.debug(f"Updating {dandiset_metadata_file} from obtained dandiset " f"metadata") if op.lexists(dandiset_yaml) and not overwrite: - yield skip_file("already exists") + yield _skip_file("already exists") return else: dandiset = Dandiset(dandiset_path, allow_empty=True) @@ -618,7 +648,7 @@ def _download_file( Parameters ---------- downloader: callable returning a generator - A backend (girder or dandiapi) specific fixture for downloading some file into + A backend (girder or api) specific fixture for downloading some file into path. It should be a generator yielding downloaded blocks. size: int, optional Target size if known @@ -631,7 +661,7 @@ def _download_file( if existing == "error": raise FileExistsError(block) elif existing == "skip": - yield skip_file("already exists") + yield _skip_file("already exists") return elif existing == "overwrite": pass @@ -651,7 +681,7 @@ def _download_file( # but mtime is different if same == ["mtime", "size"]: # TODO: add recording and handling of .nwb object_id - yield skip_file("same time and size") + yield _skip_file("same time and size") return lgr.debug(f"{path!r} - same attributes: {same}. Redownloading") @@ -692,6 +722,7 @@ def _download_file( msg = {"done": downloaded} if size: if downloaded > size and not warned: + warned = True # Yield ERROR? lgr.warning( "Downloaded %d bytes although size was told to be just %d", diff --git a/dandi/exceptions.py b/dandi/exceptions.py index ce64b587d..c004be2a0 100644 --- a/dandi/exceptions.py +++ b/dandi/exceptions.py @@ -23,3 +23,40 @@ class FailedToConnectError(RuntimeError): """Failed to connect to online resource""" pass + + +class LockingError(RuntimeError): + """Failed to lock or unlock a resource""" + + pass + + +class CliVersionError(RuntimeError): + """ Base class for `CliVersionTooOldError` and `BadCliVersionError` """ + + def __init__(self, our_version, minversion, bad_versions): + self.our_version = our_version + self.minversion = minversion + self.bad_versions = bad_versions + + def server_requirements(self): + s = f"Server requires at least version {self.minversion}" + if self.bad_versions: + s += f" (but not {', '.join(map(str, self.bad_versions))})" + return s + + +class CliVersionTooOldError(CliVersionError): + def __str__(self): + return ( + f"Client version {self.our_version} is too old! " + + self.server_requirements() + ) + + +class BadCliVersionError(CliVersionError): + def __str__(self): + return ( + f"Client version {self.our_version} is rejected by server! " + + self.server_requirements() + ) diff --git a/dandi/girder.py b/dandi/girder.py index 594210985..2aefdf66b 100644 --- a/dandi/girder.py +++ b/dandi/girder.py @@ -1,7 +1,8 @@ -import os -import os.path as op +import contextlib import json import keyring +import os +import os.path as op import random import sys import time @@ -12,8 +13,9 @@ import girder_client as gcl from . import get_logger -from .consts import MAX_CHUNK_SIZE, known_instances_rev +from .exceptions import LockingError from .utils import ensure_datetime, flattened, flatten, remap_dict +from .consts import known_instances_rev, MAX_CHUNK_SIZE lgr = get_logger() @@ -30,6 +32,8 @@ class GirderNotFound(Exception): pass +# TODO: remove if we start to expose this as a Python library which could +# be longer lived than just a CLI @lru_cache(1024) def lookup(client, name, asset_type="collection", path=None): """A helper for common logic while looking up things on girder by name""" @@ -488,6 +492,29 @@ def get_dandiset_and_assets( ), ) + @contextlib.contextmanager + def lock_dandiset(self, dandiset_identifier: str): + presumably_locked = False + try: + lgr.debug("Trying to acquire lock for %s", dandiset_identifier) + try: + self.post(f"dandi/{dandiset_identifier}/lock") + except gcl.HttpError: + raise LockingError(f"Failed to lock dandiset {dandiset_identifier}") + else: + presumably_locked = True + + yield + finally: + if presumably_locked: + lgr.debug("Trying to release the lock for %s", dandiset_identifier) + try: + self.post(f"dandi/{dandiset_identifier}/unlock") + except gcl.HttpError: + raise LockingError( + f"Failed to unlock dandiset {dandiset_identifier}" + ) + def _harmonize_girder_dandiset_to_dandi_api(rec): """ diff --git a/dandi/organize.py b/dandi/organize.py index 987ebc69e..e88896e80 100644 --- a/dandi/organize.py +++ b/dandi/organize.py @@ -13,7 +13,7 @@ from .exceptions import OrganizeImpossibleError from . import get_logger from .pynwb_utils import get_neurodata_types_to_modalities_map, get_object_id -from .utils import ensure_datetime, flattened +from .utils import ensure_datetime, flattened, yaml_load lgr = get_logger() @@ -467,7 +467,7 @@ def populate_dataset_yml(filepath, metadata): pass with open(filepath) as f: - rec = yaml.load(f) + rec = yaml_load(f) if not rec: rec = {} diff --git a/dandi/register.py b/dandi/register.py index 870304898..6b4c3d9b6 100644 --- a/dandi/register.py +++ b/dandi/register.py @@ -3,14 +3,15 @@ from . import get_logger from . import girder -from .consts import dandiset_metadata_file, known_instances, routes +from .consts import dandiset_metadata_file, routes from .dandiset import Dandiset +from .utils import get_instance lgr = get_logger() def register(name, description, dandiset_path=None, dandi_instance="dandi"): - dandi_instance = known_instances[dandi_instance] + dandi_instance = get_instance(dandi_instance) if not dandiset_path and op.exists(dandiset_metadata_file): dandiset = Dandiset.find(os.getcwd()) if dandiset: diff --git a/dandi/support/iterators.py b/dandi/support/iterators.py index ed608f614..f08c756b2 100644 --- a/dandi/support/iterators.py +++ b/dandi/support/iterators.py @@ -100,6 +100,9 @@ def worker(): # yield from the queue (.total and .finished could be accessed meanwhile) while True: + if self.reraise_immediately and self._exc is not None: + break + # race condition HERE between checking for self.finished and if self.finished and queue.empty(): break @@ -109,8 +112,6 @@ def worker(): yield queue.get(timeout=0.001) except Empty: continue - if self.reraise_immediately and self._exc is not None: - break t.join() if self._exc is not None: raise self._exc diff --git a/dandi/support/tests/test_cache.py b/dandi/support/tests/test_cache.py index 5bb5c40d3..139a8c6cb 100644 --- a/dandi/support/tests/test_cache.py +++ b/dandi/support/tests/test_cache.py @@ -185,9 +185,9 @@ def func(path): # must be running during conda build which blows up paths with # _placehold_ers pytest.skip("seems to be running on conda and hitting the limits") - assert outputs[0].stdout.strip().decode() == f"Running script.py.DONE" + assert outputs[0].stdout.strip().decode() == "Running script.py.DONE" for o in outputs[1:]: - assert o.stdout.strip().decode() == f"DONE" + assert o.stdout.strip().decode() == "DONE" cache.clear() diff --git a/dandi/support/tests/test_digests.py b/dandi/support/tests/test_digests.py index 0792fa8f4..fa410e983 100644 --- a/dandi/support/tests/test_digests.py +++ b/dandi/support/tests/test_digests.py @@ -19,7 +19,9 @@ def test_digester(tmp_path): "md5": "202cb962ac59075b964b07152d234b70", "sha1": "40bd001563085fc35165329ea1ff5c5ecbdbbeef", "sha256": "a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3", - "sha512": "3c9909afec25354d551dae21590bb26e38d53f2173b8d3dc3eee4c047e7ab1c1eb8b85103e3be7ba613b31bb5c9c36214dc9f14a42fd7a2fdb84856bca5c44c2", + "sha512": "3c9909afec25354d551dae21590bb26e38d53f2173b8d3dc3eee4c047e7a" + "b1c1eb8b85103e3be7ba613b31bb5c9c36214dc9f14a42fd7a2fdb84856b" + "ca5c44c2", } f = tmp_path / "0" @@ -28,7 +30,9 @@ def test_digester(tmp_path): "md5": "93b885adfe0da089cdf634904fd59f71", "sha1": "5ba93c9db0cff93f52b521d7420e43f6eda2784f", "sha256": "6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d", - "sha512": "b8244d028981d693af7b456af8efa4cad63d282e19ff14942c246e50d9351d22704a802a71c3580b6370de4ceb293c324a8423342557d4e5c38438f0e36910ee", + "sha512": "b8244d028981d693af7b456af8efa4cad63d282e19ff14942c246e50d935" + "1d22704a802a71c3580b6370de4ceb293c324a8423342557d4e5c38438f0" + "e36910ee", } f = tmp_path / "long.txt" @@ -37,5 +41,7 @@ def test_digester(tmp_path): "md5": "81b196e3d8a1db4dd2e89faa39614396", "sha1": "5273ac6247322c3c7b4735a6d19fd4a5366e812f", "sha256": "80028815b3557e30d7cbef1d8dbc30af0ec0858eff34b960d2839fd88ad08871", - "sha512": "684d23393eee455f44c13ab00d062980937a5d040259d69c6b291c983bf635e1d405ff1dc2763e433d69b8f299b3f4da500663b813ce176a43e29ffcc31b0159", + "sha512": "684d23393eee455f44c13ab00d062980937a5d040259d69c6b291c983bf6" + "35e1d405ff1dc2763e433d69b8f299b3f4da500663b813ce176a43e29ffc" + "c31b0159", } diff --git a/dandi/support/tests/test_iterators.py b/dandi/support/tests/test_iterators.py index 4d9d37f99..15549b0e4 100644 --- a/dandi/support/tests/test_iterators.py +++ b/dandi/support/tests/test_iterators.py @@ -15,7 +15,8 @@ def sleeping_range(n, secs=0.01, thr=None): def test_IteratorWithAggregation(): - sumup = lambda v, t=0: v + t + def sumup(v, t=0): + return v + t it = IteratorWithAggregation(sleeping_range(3, 0.0001), agg=sumup) # we should get our summary available after 2nd iteration and before it finishes diff --git a/dandi/tests/data/dandiarchive-docker/docker-compose.yml b/dandi/tests/data/dandiarchive-docker/docker-compose.yml index ef0a109c3..5b825e21f 100644 --- a/dandi/tests/data/dandiarchive-docker/docker-compose.yml +++ b/dandi/tests/data/dandiarchive-docker/docker-compose.yml @@ -42,7 +42,7 @@ services: ports: - "8079:8080" environment: - GIRDER_URL: http://girder:8080 + GIRDER_URL: http://localhost:8081 GUI_URL: http://localhost:8086 ABOUT_URL: http://www.dandiarchive.org diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index 6b9f4640d..ef46e83a9 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -18,6 +18,7 @@ lgr = get_logger() + # TODO: move into some common fixtures. We might produce a number of files # and also carry some small ones directly in git for regression testing @pytest.fixture(scope="session") diff --git a/dandi/tests/skip.py b/dandi/tests/skip.py index 9cfd959ec..93f10b8e5 100644 --- a/dandi/tests/skip.py +++ b/dandi/tests/skip.py @@ -96,7 +96,7 @@ def windows(): # return msg, missing_deps # The following code is modified from the original ReproMan source: -### BEGIN MODIFIED CODE +# ### BEGIN MODIFIED CODE def no_docker_commands(): @@ -121,7 +121,7 @@ def is_engine_running(): return "docker engine not running", not is_engine_running() -### END MODIFIED CODE +# ### END MODIFIED CODE def no_network(): diff --git a/dandi/tests/test_download.py b/dandi/tests/test_download.py index e31c6d95a..5ae5732d5 100644 --- a/dandi/tests/test_download.py +++ b/dandi/tests/test_download.py @@ -20,10 +20,10 @@ def _assert_parse_girder_url(url): return a, aid -def _assert_parse_dandiapi_url(url): +def _assert_parse_api_url(url): st, s, a, aid = parse_dandi_url(url) - assert st == "dandiapi" - assert s == known_instances["dandi"].dandiapi + "/" + assert st == "api" + assert s == known_instances["dandi"].api + "/" return a, aid @@ -62,25 +62,29 @@ def test_parse_dandi_url(): # new (v1? not yet tagged) web UI, and as it comes from a PR, # so we need to provide yet another mapping to stock girder assert _assert_parse_girder_url( - "https://refactor--gui-dandiarchive-org.netlify.app/#/file-browser/folder/5e9f9588b5c9745bad9f58fe" + "https://refactor--gui-dandiarchive-org.netlify.app/#/file-browser" + "/folder/5e9f9588b5c9745bad9f58fe" ) == ("folder", ["5e9f9588b5c9745bad9f58fe"]) # New DANDI web UI driven by DANDI API. Again no version assigned/planned! # see https://github.com/dandi/dandiarchive/pull/341 - url1 = "https://deploy-preview-341--gui-dandiarchive-org.netlify.app/#/dandiset/000006/0.200714.1807" - assert _assert_parse_dandiapi_url(url1) == ( + url1 = ( + "https://deploy-preview-341--gui-dandiarchive-org.netlify.app/" + "#/dandiset/000006/0.200714.1807" + ) + assert _assert_parse_api_url(url1) == ( "dandiset", {"dandiset_id": "000006", "version": "0.200714.1807"}, ) - assert _assert_parse_dandiapi_url(url1 + "/files") == ( + assert _assert_parse_api_url(url1 + "/files") == ( "dandiset", {"dandiset_id": "000006", "version": "0.200714.1807"}, ) - assert _assert_parse_dandiapi_url(url1 + "/files?location=%2F") == ( + assert _assert_parse_api_url(url1 + "/files?location=%2F") == ( "dandiset", {"dandiset_id": "000006", "version": "0.200714.1807"}, ) - assert _assert_parse_dandiapi_url(url1 + "/files?location=%2Fsub-anm369962%2F") == ( + assert _assert_parse_api_url(url1 + "/files?location=%2Fsub-anm369962%2F") == ( "folder", { "dandiset_id": "000006", @@ -89,7 +93,7 @@ def test_parse_dandi_url(): }, ) # no trailing / - Yarik considers it to be an item (file) - assert _assert_parse_dandiapi_url(url1 + "/files?location=%2Fsub-anm369962") == ( + assert _assert_parse_api_url(url1 + "/files?location=%2Fsub-anm369962") == ( "item", { "dandiset_id": "000006", @@ -98,7 +102,7 @@ def test_parse_dandi_url(): }, ) # And the hybrid for "drafts" where it still goes by girder ID - assert _assert_parse_dandiapi_url( + assert _assert_parse_api_url( "https://deploy-preview-341--gui-dandiarchive-org.netlify.app/#/dandiset/000027" "/draft/files?_id=5f176583f63d62e1dbd06943&_modelType=folder" ) == ( @@ -184,7 +188,8 @@ def sleep(duration): @pytest.mark.parametrize( "url", [ # Should go through API - "https://deploy-preview-341--gui-dandiarchive-org.netlify.app/#/dandiset/000027/0.200721.2222", + "https://deploy-preview-341--gui-dandiarchive-org.netlify.app/" + "#/dandiset/000027/0.200721.2222", # Good old girder (draft) "https://gui.dandiarchive.org/#/dandiset/5f0640a2ab90ac46c4561e4f", ], diff --git a/dandi/tests/test_girder.py b/dandi/tests/test_girder.py new file mode 100644 index 000000000..37152cdee --- /dev/null +++ b/dandi/tests/test_girder.py @@ -0,0 +1,37 @@ +import pytest +from ..exceptions import LockingError +from .. import girder + + +def test_lock_dandiset(local_docker_compose_env): + DANDISET_ID = "000001" + client = girder.get_client(local_docker_compose_env["instance"].girder) + resp = client.get(f"dandi/{DANDISET_ID}/lock/owner") + assert resp is None + with client.lock_dandiset(DANDISET_ID): + resp = client.get(f"dandi/{DANDISET_ID}/lock/owner") + assert resp is not None + resp = client.get(f"dandi/{DANDISET_ID}/lock/owner") + assert resp is None + + +def test_lock_dandiset_error_on_nest(local_docker_compose_env): + DANDISET_ID = "000001" + client = girder.get_client(local_docker_compose_env["instance"].girder) + with client.lock_dandiset(DANDISET_ID): + with pytest.raises(LockingError) as excinfo: + with client.lock_dandiset(DANDISET_ID): + raise AssertionError("This shouldn't be reached") # pragma: no cover + assert str(excinfo.value) == f"Failed to lock dandiset {DANDISET_ID}" + + +def test_lock_dandiset_unlock_within(local_docker_compose_env): + DANDISET_ID = "000001" + client = girder.get_client(local_docker_compose_env["instance"].girder) + unlocked = False + with pytest.raises(LockingError) as excinfo: + with client.lock_dandiset(DANDISET_ID): + client.post(f"dandi/{DANDISET_ID}/unlock") + unlocked = True + assert unlocked + assert str(excinfo.value) == f"Failed to unlock dandiset {DANDISET_ID}" diff --git a/dandi/tests/test_organize.py b/dandi/tests/test_organize.py index e2c3b66bd..2166a8213 100644 --- a/dandi/tests/test_organize.py +++ b/dandi/tests/test_organize.py @@ -2,7 +2,6 @@ from glob import glob import os.path as op import ruamel.yaml -import yaml from ..consts import file_operation_modes @@ -14,14 +13,11 @@ get_obj_id, populate_dataset_yml, ) -from ..utils import find_files, on_windows +from ..utils import find_files, on_windows, yaml_load from ..pynwb_utils import copy_nwb_file, get_object_id import pytest -yaml_ruamel = ruamel.yaml.YAML() # defaults to round-trip if no parameters given - - def test_sanitize_value(): # . is not sanitized in extension but elsewhere assert _sanitize_value("_.ext", "extension") == "-.ext" @@ -34,7 +30,7 @@ def test_populate_dataset_yml(tmpdir): def c(): # shortcut with open(path) as f: - return yaml.safe_load(f) + return yaml_load(f, typ="safe") path.write("") populate_dataset_yml(str(path), []) # doesn't crash @@ -51,7 +47,7 @@ def c(): # shortcut {"age": 2, "cell_id": "2", "tissue_sample_id": 1, "sex": "F"}, ] - # even though we use ruyaml for manipulation, we should assure it is readable + # even though we use ruamel for manipulation, we should assure it is readable # by regular yaml populate_dataset_yml(str(path), metadata) assert c() == { @@ -63,9 +59,9 @@ def c(): # shortcut } # and if we set units and redo -- years should stay unchanged, while other fields change - m = yaml_ruamel.load(path.read()) + m = yaml_load(path.read()) m["age"]["units"] = "years" - yaml_ruamel.dump(m, open(path, "w")) + ruamel.yaml.YAML().dump(m, open(path, "w")) populate_dataset_yml(str(path), metadata[:1]) assert c() == { @@ -126,7 +122,7 @@ def test_organize_nwb_test_data(nwb_test_data, tmpdir, clirunner, mode): # with @map_to_click_exceptions we loose original str of message somehow # although it is shown to the user - checked. TODO - figure it out # assert "not containing all" in str(r.exc_info[1]) - assert r.exit_code != 0, f"Must have aborted since many files lack subject_id" + assert r.exit_code != 0, "Must have aborted since many files lack subject_id" assert not glob(op.join(outdir, "*")), "no files should have been populated" r = clirunner.invoke(organize, cmd + ["--invalid", "warn"]) diff --git a/dandi/tests/test_register.py b/dandi/tests/test_register.py index 3cd919131..c4e948dc1 100644 --- a/dandi/tests/test_register.py +++ b/dandi/tests/test_register.py @@ -1,17 +1,11 @@ import re -import yaml - from ..consts import dandiset_identifier_regex, dandiset_metadata_file from ..register import register +from ..utils import yaml_load -def yaml_load(s): - obj = yaml.load(s, Loader=yaml.BaseLoader) - return obj - - -def test_smoke_metadata_present(local_docker_compose_env, monkeypatch, tmp_path): +def test_smoke_metadata_present(local_docker_compose_env, tmp_path): (tmp_path / dandiset_metadata_file).write_text("{}\n") assert ( register( @@ -23,7 +17,7 @@ def test_smoke_metadata_present(local_docker_compose_env, monkeypatch, tmp_path) is None ) with (tmp_path / dandiset_metadata_file).open() as fp: - metadata = yaml_load(fp.read()) + metadata = yaml_load(fp, typ="base") assert metadata assert metadata["name"] == "Dandiset Name" assert metadata["description"] == "Dandiset Description" @@ -32,7 +26,7 @@ def test_smoke_metadata_present(local_docker_compose_env, monkeypatch, tmp_path) # with the given identifier -def test_smoke_metadata_not_present(local_docker_compose_env, monkeypatch, tmp_path): +def test_smoke_metadata_not_present(local_docker_compose_env, tmp_path): assert ( register( "Dandiset Name", @@ -43,7 +37,7 @@ def test_smoke_metadata_not_present(local_docker_compose_env, monkeypatch, tmp_p is None ) with (tmp_path / dandiset_metadata_file).open() as fp: - metadata = yaml_load(fp.read()) + metadata = yaml_load(fp, typ="base") assert metadata assert metadata["name"] == "Dandiset Name" assert metadata["description"] == "Dandiset Description" diff --git a/dandi/tests/test_upload.py b/dandi/tests/test_upload.py index 55a5d6b77..5422b7ecc 100644 --- a/dandi/tests/test_upload.py +++ b/dandi/tests/test_upload.py @@ -2,12 +2,12 @@ from shutil import copyfile import pytest -import yaml from .. import girder from ..consts import collection_drafts, dandiset_metadata_file from ..register import register from ..upload import upload +from ..utils import yaml_load DANDIFILES_DIR = Path(__file__).with_name("data") / "dandifiles" @@ -31,7 +31,7 @@ def test_upload(local_docker_compose_env, monkeypatch, tmp_path): dandi_instance=dandi_instance_id, ) with (tmp_path / dandiset_metadata_file).open() as fp: - metadata = yaml.safe_load(fp) + metadata = yaml_load(fp, typ="safe") dandi_id = metadata["identifier"] client = girder.get_client(local_docker_compose_env["instance"].girder) @@ -73,3 +73,43 @@ def test_upload_existing_error(local_docker_compose_env, monkeypatch, tmp_path): devel_debug=True, existing="error", ) + + +def test_upload_locks(local_docker_compose_env, mocker, monkeypatch, tmp_path): + DIRNAME = "sub-anm369963" + FILENAME = "sub-anm369963_ses-20170228.nwb" + dandi_instance_id = local_docker_compose_env["instance_id"] + (tmp_path / DIRNAME).mkdir(exist_ok=True, parents=True) + copyfile(DANDIFILES_DIR / DIRNAME / FILENAME, tmp_path / DIRNAME / FILENAME) + register( + "Upload Test", + "Upload Test Description", + dandiset_path=tmp_path, + dandi_instance=dandi_instance_id, + ) + monkeypatch.chdir(tmp_path) + lockmock = mocker.patch.object(girder.GirderCli, "lock_dandiset") + upload( + paths=[DIRNAME], + dandi_instance=dandi_instance_id, + devel_debug=True, + existing="error", + ) + lockmock.assert_called() + lockmock.return_value.__enter__.assert_called() + lockmock.return_value.__exit__.assert_called() + + +def test_upload_unregistered(local_docker_compose_env, monkeypatch, tmp_path): + DIRNAME = "sub-anm369963" + FILENAME = "sub-anm369963_ses-20170228.nwb" + dandi_instance_id = local_docker_compose_env["instance_id"] + (tmp_path / dandiset_metadata_file).write_text("identifier: '999999'\n") + (tmp_path / DIRNAME).mkdir(exist_ok=True, parents=True) + copyfile(DANDIFILES_DIR / DIRNAME / FILENAME, tmp_path / DIRNAME / FILENAME) + monkeypatch.chdir(tmp_path) + with pytest.raises(ValueError) as excinfo: + upload(paths=[DIRNAME], dandi_instance=dandi_instance_id, devel_debug=True) + assert str(excinfo.value) == ( + f"There is no 999999 in {collection_drafts}. Did you use 'dandi register'?" + ) diff --git a/dandi/tests/test_utils.py b/dandi/tests/test_utils.py index 279ebf5bd..db4d6270d 100644 --- a/dandi/tests/test_utils.py +++ b/dandi/tests/test_utils.py @@ -1,14 +1,18 @@ import inspect import os.path as op import time -import datetime import pytest +import responses +from .. import __version__ +from ..consts import dandi_instance, known_instances +from ..exceptions import BadCliVersionError, CliVersionTooOldError from ..utils import ( ensure_datetime, ensure_strtime, find_files, flatten, flattened, + get_instance, get_utcnow_datetime, is_same_time, on_windows, @@ -144,3 +148,171 @@ def test_flatten(): ) def test_remap_dict(from_, revmapping, to): assert remap_dict(from_, revmapping) == to + + +@responses.activate +def test_get_instance_dandi(): + responses.add( + responses.GET, + "https://dandiarchive.org/server-info", + json={ + "version": "1.0.0", + "cli-minimal-version": "0.5.0", + "cli-bad-versions": [], + "services": { + "girder": {"url": "https://girder.dandi"}, + "webui": {"url": "https://gui.dandi"}, + "api": {"url": "https://publish.dandi/api"}, + "jupyterhub": {"url": "https://hub.dandi"}, + }, + }, + ) + assert get_instance("dandi") == dandi_instance( + girder="https://girder.dandi", + gui="https://gui.dandi", + redirector="https://dandiarchive.org", + api="https://publish.dandi/api", + ) + + +@responses.activate +def test_get_instance_url(): + responses.add( + responses.GET, + "https://example.dandi/server-info", + json={ + "version": "1.0.0", + "cli-minimal-version": "0.5.0", + "cli-bad-versions": [], + "services": { + "girder": {"url": "https://girder.dandi"}, + "webui": {"url": "https://gui.dandi"}, + "api": {"url": "https://publish.dandi/api"}, + "jupyterhub": {"url": "https://hub.dandi"}, + }, + }, + ) + assert get_instance("https://example.dandi/") == dandi_instance( + girder="https://girder.dandi", + gui="https://gui.dandi", + redirector="https://example.dandi/", + api="https://publish.dandi/api", + ) + + +@responses.activate +def test_get_instance_cli_version_too_old(): + responses.add( + responses.GET, + "https://example.dandi/server-info", + json={ + "version": "1.0.0", + "cli-minimal-version": "99.99.99", + "cli-bad-versions": [], + "services": { + "girder": {"url": "https://girder.dandi"}, + "webui": {"url": "https://gui.dandi"}, + "api": {"url": "https://publish.dandi/api"}, + "jupyterhub": {"url": "https://hub.dandi"}, + }, + }, + ) + with pytest.raises(CliVersionTooOldError) as excinfo: + get_instance("https://example.dandi/") + assert str(excinfo.value) == ( + f"Client version {__version__} is too old!" + " Server requires at least version 99.99.99" + ) + + +@responses.activate +def test_get_instance_bad_cli_version(): + responses.add( + responses.GET, + "https://example.dandi/server-info", + json={ + "version": "1.0.0", + "cli-minimal-version": "0.5.0", + "cli-bad-versions": [__version__], + "services": { + "girder": {"url": "https://girder.dandi"}, + "webui": {"url": "https://gui.dandi"}, + "api": {"url": "https://publish.dandi/api"}, + "jupyterhub": {"url": "https://hub.dandi"}, + }, + }, + ) + with pytest.raises(BadCliVersionError) as excinfo: + get_instance("https://example.dandi/") + assert str(excinfo.value) == ( + f"Client version {__version__} is rejected by server!" + f" Server requires at least version 0.5.0 (but not {__version__})" + ) + + +@responses.activate +def test_get_instance_id_bad_response(): + responses.add( + responses.GET, + "https://dandiarchive.org/server-info", + body="404 -- not found", + status=404, + ) + assert get_instance("dandi") is known_instances["dandi"] + + +@responses.activate +def test_get_instance_known_url_bad_response(): + responses.add( + responses.GET, + "https://dandiarchive.org/server-info", + body="404 -- not found", + status=404, + ) + assert get_instance("https://dandiarchive.org") is known_instances["dandi"] + + +@responses.activate +def test_get_instance_unknown_url_bad_response(): + responses.add( + responses.GET, + "https://dandi.nil/server-info", + body="404 -- not found", + status=404, + ) + with pytest.raises(RuntimeError) as excinfo: + get_instance("https://dandi.nil") + assert str(excinfo.value) == ( + "Could not retrieve server info from https://dandi.nil," + " and client does not recognize URL" + ) + + +def test_get_instance_id_no_redirector(): + assert get_instance("local-girder-only") is known_instances["local-girder-only"] + + +@responses.activate +def test_get_instance_bad_version_from_server(): + responses.add( + responses.GET, + "https://example.dandi/server-info", + json={ + "version": "1.0.0", + "cli-minimal-version": "foobar", + "cli-bad-versions": [], + "services": { + "girder": {"url": "https://girder.dandi"}, + "webui": {"url": "https://gui.dandi"}, + "api": {"url": "https://publish.dandi/api"}, + "jupyterhub": {"url": "https://hub.dandi"}, + }, + }, + ) + with pytest.raises(ValueError) as excinfo: + get_instance("https://example.dandi/") + assert str(excinfo.value).startswith( + "https://example.dandi/ returned an incorrectly formatted version;" + " please contact that server's administrators: " + ) + assert "foobar" in str(excinfo.value) diff --git a/dandi/upload.py b/dandi/upload.py index edac0c7c2..9050f644a 100644 --- a/dandi/upload.py +++ b/dandi/upload.py @@ -8,12 +8,11 @@ from .cli.command import lgr from . import __version__ -from .utils import ensure_datetime, ensure_strtime +from .utils import ensure_datetime, ensure_strtime, get_instance from .consts import ( collection_drafts, dandiset_identifier_regex, dandiset_metadata_file, - known_instances, metadata_digests, ) @@ -73,13 +72,6 @@ def upload( f"into a collection directly." ) - # TODO: that the folder already exists - if False: - raise ValueError( - f"There is no {girder_top_folder} in {girder_collection}. " - f"Did you use 'dandi register'?" - ) - import multiprocessing from . import girder from .pynwb_utils import ignore_benign_pynwb_warnings, get_object_id @@ -91,7 +83,7 @@ def upload( ignore_benign_pynwb_warnings() # so validate doesn't whine - client = girder.get_client(known_instances[dandi_instance].girder) + client = girder.get_client(get_instance(dandi_instance).girder) try: collection_rec = girder.ensure_collection(client, girder_collection) @@ -108,6 +100,14 @@ def upload( lgr.debug("Working with collection %s", collection_rec) + try: + girder.lookup(client, girder_collection, path=girder_top_folder) + except girder.GirderNotFound: + raise ValueError( + f"There is no {girder_top_folder} in {girder_collection}. " + f"Did you use 'dandi register'?" + ) + # # Treat paths # @@ -505,7 +505,7 @@ def upload_agg(*ignored): rec_fields = ["path", "size", "errors", "upload", "status", "message"] out = pyout.Tabular(style=pyout_style, columns=rec_fields) - with out: + with out, client.lock_dandiset(dandiset.identifier): for path in paths: while len(process_paths) >= 10: lgr.log(2, "Sleep waiting for some paths to finish processing") diff --git a/dandi/utils.py b/dandi/utils.py index a8d46512d..d0e0e3170 100644 --- a/dandi/utils.py +++ b/dandi/utils.py @@ -1,5 +1,6 @@ import datetime import inspect +import io import itertools import os import os.path as op @@ -10,6 +11,14 @@ from pathlib import Path +import requests +import ruamel.yaml +from semantic_version import Version + +from . import __version__ +from .consts import dandi_instance, known_instances, known_instances_rev +from .exceptions import BadCliVersionError, CliVersionTooOldError + if sys.version_info[:2] < (3, 7): import dateutil.parser @@ -380,9 +389,31 @@ def yaml_dump(rec): to assure proper formatting on versions of pyyaml before 5.1: https://github.com/yaml/pyyaml/pull/256 """ - import yaml + yaml = ruamel.yaml.YAML(typ="safe") + yaml.default_flow_style = False + out = io.StringIO() + yaml.dump(rec, out) + return out.getvalue() - return yaml.safe_dump(rec, default_flow_style=False) + +def yaml_load(f, typ=None): + """ + Load YAML source from a file or string. + + Parameters + ---------- + f: str or IO[str] + The YAML source to load + typ: str, optional + The value of `typ` to pass to `ruamel.yaml.YAML`. May be "rt" (default), + "safe", "unsafe", or "base" + + Returns + ------- + Any + The parsed YAML value + """ + return ruamel.yaml.YAML(typ=typ).load(f) # @@ -512,3 +543,51 @@ def delayed(*args, **kwargs): import joblib return joblib.delayed(*args, **kwargs) + + +def get_instance(dandi_instance_id): + if dandi_instance_id.lower().startswith(("http://", "https://")): + redirector_url = dandi_instance_id + dandi_id = known_instances_rev.get(redirector_url) + if dandi_id is not None: + instance = known_instances[dandi_id] + else: + instance = None + else: + instance = known_instances[dandi_instance_id] + redirector_url = instance.redirector + if redirector_url is None: + return instance + try: + r = requests.get(redirector_url.rstrip("/") + "/server-info") + r.raise_for_status() + except Exception as e: + lgr.warning("Request to %s failed (%s)", redirector_url, str(e)) + if instance is not None: + lgr.warning("Using hard-coded URLs") + return instance + else: + raise RuntimeError( + f"Could not retrieve server info from {redirector_url}," + " and client does not recognize URL" + ) + server_info = r.json() + try: + minversion = Version(server_info["cli-minimal-version"]) + bad_versions = [Version(v) for v in server_info["cli-bad-versions"]] + except ValueError as e: + raise ValueError( + f"{redirector_url} returned an incorrectly formatted version;" + f" please contact that server's administrators: {e}" + ) + our_version = Version(__version__) + if our_version < minversion: + raise CliVersionTooOldError(our_version, minversion, bad_versions) + if our_version in bad_versions: + raise BadCliVersionError(our_version, minversion, bad_versions) + return dandi_instance( + girder=server_info["services"]["girder"]["url"], + gui=server_info["services"]["webui"]["url"], + redirector=redirector_url, + api=server_info["services"]["api"]["url"], + ) diff --git a/dandi/validate.py b/dandi/validate.py index 43fc28557..a752ef4f8 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -1,9 +1,8 @@ import os.path as op from .consts import dandiset_metadata_file -import yaml from .pynwb_utils import validate as pynwb_validate, validate_cache -from .utils import find_dandi_files +from .utils import find_dandi_files, yaml_load from .metadata import get_metadata @@ -42,7 +41,7 @@ def validate_file(filepath): def validate_dandiset_yaml(filepath): """Validate dandiset.yaml""" with open(filepath) as f: - meta = yaml.safe_load(f) + meta = yaml_load(f, typ="safe") return _check_required_fields(meta, _required_dandiset_metadata_fields) diff --git a/setup.cfg b/setup.cfg index ddc2c139a..797e4e7db 100644 --- a/setup.cfg +++ b/setup.cfg @@ -40,10 +40,10 @@ install_requires = click-didyoumean etelemetry >= 0.2.0 joblib - pyout + pyout != 0.6.0 humanize - pyyaml - ruamel.yaml + requests ~= 2.20 + ruamel.yaml >=0.15, <1 keyring keyrings.alt python-dateutil; python_version < "3.7" @@ -68,10 +68,10 @@ style = pre-commit test = coverage - mock pytest pytest-cov - requests ~= 2.20 + pytest-mock + responses all = #%(doc)s %(extensions)s @@ -87,7 +87,6 @@ max-line-length = 100 #ignore = D100,D101,D102,D103,D104,D105,D200,D201,D202,D204,D205,D208,D209,D210,D300,D301,D400,D401,D403,E24,E121,E123,E126,E226,E266,E402,E704,E731,F821,I100,I101,I201,N802,N803,N804,N806,W503,W504,W605 ignore = E203,W503 exclude = - *test* *sphinx* dandi/externals/* */__init__.py diff --git a/tox.ini b/tox.ini index 6ea8ad288..1bf601172 100644 --- a/tox.ini +++ b/tox.ini @@ -4,10 +4,9 @@ envlist = lint,py3 [testenv] extras = test commands = - # Using pytest-cov leaves a bunch of .coverage.$HOSTNAME.#.# files lying - # around for some reason - #python -m pytest -s -v --cov=dandi dandi - coverage run -m pytest -v dandi + # Using pytest-cov instead of using coverage directly leaves a bunch of + # .coverage.$HOSTNAME.#.# files lying around for some reason + coverage run -m pytest -v {posargs} dandi coverage combine coverage report @@ -24,6 +23,7 @@ filterwarnings = error ignore:No cached namespaces found .*:UserWarning ignore:ignoring namespace '.*' because it already exists:UserWarning + ignore::DeprecationWarning:responses [coverage:run] parallel = True