Skip to content

Commit 75540c8

Browse files
authored
Merge pull request #1011 from dandi/upload_bids
Built-in BIDS support for `dandi upload`
2 parents ddf46f2 + 8139966 commit 75540c8

7 files changed

+254
-39
lines changed

dandi/bids_utils.py

+77
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
from .utils import pluralize
2+
3+
4+
def is_valid(
5+
validation_result: dict,
6+
allow_invalid_filenames: bool = False,
7+
allow_missing_files: bool = False,
8+
) -> bool:
9+
"""Determine whether a dataset validation result marks it as valid.
10+
11+
Parameters
12+
----------
13+
validation_result: dict
14+
Dictionary as returned by `dandi.bids_validator_xs.validate_bids()`.
15+
allow_missing_files: bool, optional
16+
Whether to consider the dataset invalid if any mandatory files are not present.
17+
allow_invalid_filenames: bool, optional
18+
Whether to consider the dataset invalid if any filenames inside are invalid.
19+
20+
Returns
21+
-------
22+
bool: whether the dataset validation result marks it as valid.
23+
24+
"""
25+
26+
if allow_invalid_filenames and allow_missing_files:
27+
return True
28+
missing_files = [
29+
i["regex"] for i in validation_result["schema_tracking"] if i["mandatory"]
30+
]
31+
invalid_filenames = validation_result["path_tracking"]
32+
33+
if missing_files and not allow_missing_files:
34+
return False
35+
if invalid_filenames and not allow_invalid_filenames:
36+
return False
37+
else:
38+
return True
39+
40+
41+
def report_errors(
42+
validation_result: dict,
43+
) -> None:
44+
import click
45+
46+
missing_files = [
47+
pattern["regex"]
48+
for pattern in validation_result["schema_tracking"]
49+
if pattern["mandatory"]
50+
]
51+
error_list = []
52+
if missing_files:
53+
error_substring = (
54+
f"{pluralize(len(missing_files), 'filename pattern')} required "
55+
"by BIDS could not be found"
56+
)
57+
error_list.append(error_substring)
58+
if validation_result["path_tracking"]:
59+
error_substring = (
60+
f"{pluralize(len(validation_result['path_tracking']), 'filename')} "
61+
"did not match any pattern known to BIDS"
62+
)
63+
error_list.append(error_substring)
64+
if error_list:
65+
error_string = " and ".join(error_list)
66+
error_string = f"Summary: {error_string}."
67+
click.secho(
68+
error_string,
69+
bold=True,
70+
fg="red",
71+
)
72+
else:
73+
click.secho(
74+
"All filenames are BIDS-valid and no mandatory files are missing.",
75+
bold=True,
76+
fg="green",
77+
)

dandi/bids_validator_xs.py

+18-7
Original file line numberDiff line numberDiff line change
@@ -611,17 +611,28 @@ def select_schema_dir(
611611
)
612612
else:
613613
with open(dataset_description) as f:
614-
dataset_info = json.load(f)
615614
try:
616-
schema_version = dataset_info["BIDSVersion"]
617-
except KeyError:
618-
lgr.warning(
619-
"BIDSVersion is not specified in "
620-
"`dataset_description.json`. "
621-
"Falling back to %s.",
615+
dataset_info = json.load(f)
616+
except json.decoder.JSONDecodeError:
617+
lgr.error(
618+
"The `%s` file could not be loaded. "
619+
"Please check whether the file is valid JSON. "
620+
"Falling back to the %s BIDS version.",
621+
dataset_description,
622622
schema_min_version,
623623
)
624624
schema_version = schema_min_version
625+
else:
626+
try:
627+
schema_version = dataset_info["BIDSVersion"]
628+
except KeyError:
629+
lgr.warning(
630+
"BIDSVersion is not specified in "
631+
"`dataset_description.json`. "
632+
"Falling back to %s.",
633+
schema_min_version,
634+
)
635+
schema_version = schema_min_version
625636
if not schema_version:
626637
lgr.warning(
627638
"No BIDSVersion could be found for the dataset. Falling back to %s.",

dandi/cli/cmd_validate.py

+6-28
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import click
55

66
from .base import devel_debug_option, devel_option, lgr, map_to_click_exceptions
7-
from ..utils import pluralize
87

98

109
@click.command()
@@ -26,43 +25,22 @@ def validate_bids(
2625
paths, schema=None, devel_debug=False, report=False, report_flag=False
2726
):
2827
"""Validate BIDS paths."""
28+
from ..bids_utils import is_valid, report_errors
2929
from ..validate import validate_bids as validate_bids_
3030

3131
if report_flag and not report:
3232
report = report_flag
3333

34-
validation_result = validate_bids_(
34+
validator_result = validate_bids_(
3535
*paths,
3636
report=report,
3737
schema_version=schema,
3838
devel_debug=devel_debug,
3939
)
40-
missing_files = [
41-
pattern["regex"]
42-
for pattern in validation_result["schema_tracking"]
43-
if pattern["mandatory"]
44-
]
45-
error_list = []
46-
if missing_files:
47-
error_substring = (
48-
f"{pluralize(len(missing_files), 'filename pattern')} required "
49-
"by BIDS could not be found"
50-
)
51-
error_list.append(error_substring)
52-
if validation_result["path_tracking"]:
53-
error_substring = (
54-
f"{pluralize(len(validation_result['path_tracking']), 'filename')} "
55-
"did not match any pattern known to BIDS"
56-
)
57-
error_list.append(error_substring)
58-
if error_list:
59-
error_string = " and ".join(error_list)
60-
error_string = f"Summary: {error_string}."
61-
click.secho(
62-
error_string,
63-
bold=True,
64-
fg="red",
65-
)
40+
valid = is_valid(validator_result)
41+
report_errors(validator_result)
42+
43+
if not valid:
6644
raise SystemExit(1)
6745

6846

dandi/tests/fixtures.py

+36
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,32 @@
4040
lgr = get_logger()
4141

4242

43+
def copytree(src, dst, symlinks=False, ignore=None):
44+
"""Function mimicking `shutil.copytree()` behaviour but supporting existing target
45+
directories.
46+
47+
Notes
48+
-----
49+
* This function can be removed and replaced by a call to `shutil.copytree()`
50+
setting the `dirs_exist_ok` keyword argument to true, whenever Python 3.7
51+
is no longer supported.
52+
53+
References
54+
----------
55+
https://docs.python.org/3/whatsnew/3.8.html#shutil
56+
"""
57+
if not os.path.exists(dst):
58+
os.makedirs(dst)
59+
for item in os.listdir(src):
60+
s = os.path.join(src, item)
61+
d = os.path.join(dst, item)
62+
if os.path.isdir(s):
63+
copytree(s, d, symlinks, ignore)
64+
else:
65+
if not os.path.exists(d) or os.stat(s).st_mtime - os.stat(d).st_mtime > 1:
66+
shutil.copy2(s, d)
67+
68+
4369
@pytest.fixture(autouse=True)
4470
def capture_all_logs(caplog: pytest.LogCaptureFixture) -> None:
4571
caplog.set_level(logging.DEBUG, logger="dandi")
@@ -411,6 +437,16 @@ def zarr_dandiset(new_dandiset: SampleDandiset) -> SampleDandiset:
411437
return new_dandiset
412438

413439

440+
@pytest.fixture()
441+
def bids_dandiset(new_dandiset: SampleDandiset, bids_examples: str) -> SampleDandiset:
442+
copytree(
443+
os.path.join(bids_examples, "asl003"),
444+
str(new_dandiset.dspath) + "/",
445+
)
446+
(new_dandiset.dspath / "CHANGES").write_text("0.1.0 2014-11-03\n")
447+
return new_dandiset
448+
449+
414450
@pytest.fixture()
415451
def video_files(tmp_path):
416452
video_paths = []

dandi/tests/test_upload.py

+32
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,38 @@ def test_upload_sync_folder(
181181
text_dandiset.dandiset.get_asset_by_path("subdir2/banana.txt")
182182

183183

184+
def test_upload_bids(mocker: MockerFixture, bids_dandiset: SampleDandiset) -> None:
185+
iter_upload_spy = mocker.spy(LocalFileAsset, "iter_upload")
186+
bids_dandiset.upload(existing="forced")
187+
# Check whether upload was run
188+
iter_upload_spy.assert_called()
189+
# Check existence of assets:
190+
dandiset = bids_dandiset.dandiset
191+
# file we created?
192+
dandiset.get_asset_by_path("CHANGES")
193+
# BIDS descriptor file?
194+
dandiset.get_asset_by_path("dataset_description.json")
195+
# actual data file?
196+
dandiset.get_asset_by_path("sub-Sub1/anat/sub-Sub1_T1w.nii.gz")
197+
198+
199+
def test_upload_bids_validation_ignore(
200+
mocker: MockerFixture, bids_dandiset: SampleDandiset
201+
) -> None:
202+
iter_upload_spy = mocker.spy(LocalFileAsset, "iter_upload")
203+
bids_dandiset.upload(existing="forced", validation="ignore")
204+
# Check whether upload was run
205+
iter_upload_spy.assert_called()
206+
# Check existence of assets:
207+
dandiset = bids_dandiset.dandiset
208+
# file we created?
209+
dandiset.get_asset_by_path("CHANGES")
210+
# BIDS descriptor file?
211+
dandiset.get_asset_by_path("dataset_description.json")
212+
# actual data file?
213+
dandiset.get_asset_by_path("sub-Sub1/anat/sub-Sub1_T1w.nii.gz")
214+
215+
184216
def test_upload_sync_zarr(mocker, zarr_dandiset):
185217
rmtree(zarr_dandiset.dspath / "sample.zarr")
186218
zarr.save(zarr_dandiset.dspath / "identity.zarr", np.eye(5))

dandi/upload.py

+73
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import click
1111

1212
from . import lgr
13+
from .bids_utils import is_valid
1314
from .consts import DRAFT, dandiset_identifier_regex, dandiset_metadata_file
1415
from .dandiapi import RemoteAsset
1516
from .exceptions import NotFoundError
@@ -61,6 +62,20 @@ def upload(
6162
" paths. Use 'dandi download' or 'organize' first."
6263
)
6364

65+
# Pre-validate BIDS datasets before going for individual files.
66+
bids_datasets = _bids_discover_and_validate(dandiset_.path, validation)
67+
68+
if bids_datasets:
69+
_bids_datasets = [str(i) for i in bids_datasets]
70+
if not allow_any_path:
71+
lgr.info(
72+
"Enabling --allow-any-path since we detected %s under the following "
73+
"paths: %s",
74+
pluralize(len(_bids_datasets), "BIDS dataset"),
75+
", ".join(_bids_datasets),
76+
)
77+
allow_any_path = True
78+
6479
instance = get_instance(dandi_instance)
6580
assert instance.api is not None
6681
api_url = instance.api
@@ -383,3 +398,61 @@ def check_replace_asset(
383398

384399
def skip_file(msg: Any) -> Dict[str, str]:
385400
return {"status": "skipped", "message": str(msg)}
401+
402+
403+
def _bids_discover_and_validate(
404+
dandiset_path: str,
405+
validation: Optional[str] = "require",
406+
) -> List[Path]:
407+
"""Temporary implementation for discovery and validation of BIDS datasets
408+
409+
References:
410+
- unification of validation records: https://github.com/dandi/dandi-cli/issues/943
411+
- validation "design doc": https://github.com/dandi/dandi-cli/pull/663
412+
"""
413+
from .utils import find_files
414+
from .validate import validate_bids
415+
416+
bids_descriptions = map(
417+
Path, find_files(r"(^|[/\x5C])dataset_description\.json$", dandiset_path)
418+
)
419+
bids_datasets = [bd.parent for bd in bids_descriptions]
420+
if bids_datasets:
421+
lgr.debug(
422+
"Detected %s under following paths: %s",
423+
pluralize(len(bids_datasets), "BIDS dataset"),
424+
", ".join(str(i) for i in bids_datasets),
425+
)
426+
427+
if validation != "skip":
428+
if bids_datasets:
429+
bids_datasets_to_validate = list()
430+
for p in bids_datasets:
431+
for bd in bids_datasets:
432+
try:
433+
p.relative_to(bd)
434+
except ValueError:
435+
try:
436+
bd.relative_to(p)
437+
except ValueError:
438+
pass
439+
else:
440+
bids_datasets_to_validate.append(bd)
441+
else:
442+
bids_datasets_to_validate.append(bd)
443+
else:
444+
bids_datasets_to_validate = bids_datasets
445+
bids_datasets_to_validate.sort()
446+
validated_datasets = []
447+
for bd in bids_datasets_to_validate:
448+
validator_result = validate_bids(bd)
449+
valid = is_valid(
450+
validator_result,
451+
allow_missing_files=validation == "ignore",
452+
allow_invalid_filenames=validation == "ignore",
453+
)
454+
if valid:
455+
validated_datasets.append(bd)
456+
return validated_datasets
457+
else:
458+
return bids_datasets

dandi/validate.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
1-
from typing import Any, Iterator, List, Optional, Tuple
1+
from pathlib import Path
2+
from typing import Iterator, List, Optional, Tuple, Union
23

34
from .files import find_dandi_files
45

56
# TODO: provide our own "errors" records, which would also include warnings etc
67

78

89
def validate_bids(
9-
*paths: str,
10+
*paths: Union[str, Path],
1011
schema_version: Optional[str] = None,
1112
devel_debug: bool = False,
1213
report: Optional[str] = None,
13-
) -> Any:
14+
) -> dict:
1415
"""Validate BIDS paths.
1516
1617
Parameters
@@ -26,16 +27,23 @@ def validate_bids(
2627
If string, the string will be used as the output path.
2728
If the variable evaluates as False, no log will be written.
2829
30+
Returns
31+
-------
32+
dict
33+
Dictionary reporting required patterns not found and existing filenames not matching any
34+
patterns.
35+
2936
Notes
3037
-----
3138
Can be used from bash, as:
3239
DANDI_DEVEL=1 dandi validate-bids --schema="1.7.0+012+dandi001" --report="my.log" /my/path
3340
"""
3441
from .bids_validator_xs import validate_bids as validate_bids_
3542

36-
return validate_bids_(
43+
validation_result = validate_bids_(
3744
paths, schema_version=schema_version, debug=devel_debug, report_path=report
3845
)
46+
return dict(validation_result)
3947

4048

4149
def validate(

0 commit comments

Comments
 (0)