diff --git a/dandi/cli/cmd_organize.py b/dandi/cli/cmd_organize.py index cca3589a5..a45765ec6 100644 --- a/dandi/cli/cmd_organize.py +++ b/dandi/cli/cmd_organize.py @@ -59,6 +59,7 @@ ), ) @click.argument("paths", nargs=-1, type=click.Path(exists=True)) +@click.option("-J", "--jobs", type=int, help="Number of jobs during organization") @devel_debug_option() @map_to_click_exceptions def organize( @@ -70,6 +71,7 @@ def organize( devel_debug=False, update_external_file_paths=False, media_files_mode=None, + jobs=None, ): """(Re)organize files according to the metadata. @@ -115,4 +117,5 @@ def organize( update_external_file_paths=update_external_file_paths, media_files_mode=media_files_mode, required_fields=required_fields, + jobs=jobs, ) diff --git a/dandi/organize.py b/dandi/organize.py index 84010336d..2edab94d9 100644 --- a/dandi/organize.py +++ b/dandi/organize.py @@ -721,8 +721,10 @@ def organize( update_external_file_paths=False, media_files_mode=None, required_fields=None, + jobs=None, ): in_place = False # If we deduce that we are organizing in-place + jobs = jobs or -1 # will come handy when dry becomes proper separate option def dry_print(msg): @@ -812,12 +814,12 @@ def _get_metadata(path): meta["path"] = path return meta - if not devel_debug: + if not devel_debug and jobs != 1: # Do not use joblib at all if number_of_jobs=1 # Note: It is Python (pynwb) intensive, not IO, so ATM there is little # to no benefit from Parallel without using multiproc! But that would # complicate progress bar indication... TODO metadata = list( - Parallel(n_jobs=-1, verbose=10)( + Parallel(n_jobs=jobs, verbose=10)( delayed(_get_metadata)(path) for path in paths ) ) diff --git a/dandi/tests/test_organize.py b/dandi/tests/test_organize.py index 804b992ad..bc3d91de5 100644 --- a/dandi/tests/test_organize.py +++ b/dandi/tests/test_organize.py @@ -110,7 +110,8 @@ def c() -> Any: # shortcut @pytest.mark.integration @pytest.mark.parametrize("mode", no_move_modes) -def test_organize_nwb_test_data(nwb_test_data: Path, tmp_path: Path, mode: str) -> None: +@pytest.mark.parametrize("jobs", (1, -1)) +def test_organize_nwb_test_data(nwb_test_data: Path, tmp_path: Path, mode: str, jobs: int) -> None: outdir = tmp_path / "organized" relative = False @@ -152,7 +153,7 @@ def test_organize_nwb_test_data(nwb_test_data: Path, tmp_path: Path, mode: str) input_files = nwb_test_data / "v2.0.1" - cmd = ["-d", str(outdir), "--files-mode", mode, str(input_files)] + cmd = ["-d", str(outdir), "--files-mode", mode, str(input_files), "--jobs", str(jobs)] r = CliRunner().invoke(organize, cmd) # with @map_to_click_exceptions we loose original str of message somehow