diff --git a/docker/irods_client/tests/test_move.py b/docker/irods_client/tests/test_move.py index ee267e48..eb88d749 100644 --- a/docker/irods_client/tests/test_move.py +++ b/docker/irods_client/tests/test_move.py @@ -1,6 +1,8 @@ from pytest import mark + from ibridges.path import IrodsPath + @mark.parametrize("item_name", ["collection", "dataobject"]) def test_move(session, item_name, request): item = request.getfixturevalue(item_name) diff --git a/docs/source/conf.py b/docs/source/conf.py index f05ebd2b..ca66a713 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -8,14 +8,14 @@ import os import sys -import sphinx_rtd_theme + sys.path.insert(0, os.path.abspath('..')) project = 'iBridges' copyright = '2024, Christine Staiger' author = 'Christine Staiger' -release = '0.1' +release = '1.0.0' # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/docs/source/index.rst b/docs/source/index.rst index bc06404f..fafd4db6 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -10,8 +10,7 @@ Welcome to the iBridges documentation! .. admonition:: Warning - The project (including documentation) is currently in active development at a pre-alpha stage, which means that some functionality may be - broken or not implemented yet. We appreciate help, issues and bug report in our issue tracker on `github `__. + The project is in active development, current features are stable and the documentation is complete. However, we are still actively developing new features and improving the existing ones. We appreciate help, suggestions, issues and bug reports in our issue tracker on `github `__. iBridges is a wrapper around the `python-irodsclient `__. While the diff --git a/ibridges/__init__.py b/ibridges/__init__.py index b045db2c..2286a650 100644 --- a/ibridges/__init__.py +++ b/ibridges/__init__.py @@ -7,5 +7,13 @@ from ibridges.session import Session from ibridges.tickets import Tickets -__all__ = ["Session", "IrodsPath", "download", "upload", - "MetaData", "Tickets", "search_data", "sync"] +__all__ = [ + "Session", + "IrodsPath", + "download", + "upload", + "MetaData", + "Tickets", + "search_data", + "sync", +] diff --git a/ibridges/__main__.py b/ibridges/__main__.py index e1a8632a..0e40a2d7 100644 --- a/ibridges/__main__.py +++ b/ibridges/__main__.py @@ -1,4 +1,5 @@ """Command line tools for the iBridges library.""" + from __future__ import annotations import argparse @@ -102,6 +103,7 @@ def main() -> None: print(f"Invalid subcommand ({subcommand}). For help see ibridges --help") sys.exit(1) + def _set_ienv_path(ienv_path: Union[None, str, Path]): try: with open(IBRIDGES_CONFIG_FP, "r", encoding="utf-8") as handle: @@ -134,8 +136,7 @@ def _get_ienv_path() -> Union[None, str]: def ibridges_init(): """Create a cached password for future use.""" parser = argparse.ArgumentParser( - prog="ibridges init", - description="Cache your iRODS password to be used later." + prog="ibridges init", description="Cache your iRODS password to be used later." ) parser.add_argument( "irods_env_path", @@ -154,11 +155,18 @@ def ibridges_init(): def _list_coll(session: Session, remote_path: IrodsPath): if remote_path.collection_exists(): - print(str(remote_path)+':') + print(str(remote_path) + ":") coll = get_collection(session, remote_path) - print('\n'.join([' '+sub.path for sub in coll.data_objects])) - print('\n'.join([' C- '+sub.path for sub in coll.subcollections - if not str(remote_path) == sub.path])) + print("\n".join([" " + sub.path for sub in coll.data_objects])) + print( + "\n".join( + [ + " C- " + sub.path + for sub in coll.subcollections + if not str(remote_path) == sub.path + ] + ) + ) else: raise ValueError(f"Irods path '{remote_path}' is not a collection.") @@ -166,49 +174,47 @@ def _list_coll(session: Session, remote_path: IrodsPath): def ibridges_setup(): """Use templates to create an iRODS environment json.""" parser = argparse.ArgumentParser( - prog="ibridges setup", - description="Tool to create a valid irods_environment.json" + prog="ibridges setup", description="Tool to create a valid irods_environment.json" ) parser.add_argument( "server_name", help="Server name to create your irods_environment.json for.", type=str, default=None, - nargs="?" - ) - parser.add_argument( - "--list", - help="List all available server names.", - action="store_true" + nargs="?", ) + parser.add_argument("--list", help="List all available server names.", action="store_true") parser.add_argument( - "-o", "--output", + "-o", + "--output", help="Store the environment to a file.", type=Path, default=DEFAULT_IENV_PATH, required=False, ) parser.add_argument( - "--overwrite", - help="Overwrite the irods environment file.", - action="store_true" + "--overwrite", help="Overwrite the irods environment file.", action="store_true" ) args = parser.parse_args() env_providers = get_environment_providers() if args.list: if len(env_providers) == 0: - print("No server information was found. To use this function, please install a plugin" - " such as:\n\nhttps://github.com/UtrechtUniversity/ibridges-servers-uu" - "\n\nAlternatively create an irods_environment.json by yourself or with the help " - "of your iRODS administrator.") + print( + "No server information was found. To use this function, please install a plugin" + " such as:\n\nhttps://github.com/UtrechtUniversity/ibridges-servers-uu" + "\n\nAlternatively create an irods_environment.json by yourself or with the help " + "of your iRODS administrator." + ) print_environment_providers(env_providers) return try: provider = find_environment_provider(env_providers, args.server_name) except ValueError: - print(f"Error: Unknown server with name {args.server_name}.\n" - "Use `ibridges setup --list` to list all available server names.") + print( + f"Error: Unknown server with name {args.server_name}.\n" + "Use `ibridges setup --list` to list all available server names." + ) sys.exit(123) user_answers = {} @@ -221,8 +227,7 @@ def ibridges_setup(): print("\n") print(json_str) if args.output.is_dir(): - print(f"Output {args.output} is a directory, cannot export irods_environment" - " file.") + print(f"Output {args.output} is a directory, cannot export irods_environment" " file.") sys.exit(234) else: with open(args.output, "w", encoding="utf-8") as handle: @@ -232,8 +237,7 @@ def ibridges_setup(): def ibridges_list(): """List a collection on iRODS.""" parser = argparse.ArgumentParser( - prog="ibridges list", - description="List a collection on iRODS." + prog="ibridges list", description="List a collection on iRODS." ) parser.add_argument( "remote_path", @@ -247,16 +251,18 @@ def ibridges_list(): with interactive_auth(irods_env_path=_get_ienv_path()) as session: _list_coll(session, _parse_remote(args.remote_path, session)) + def _create_coll(session: Session, remote_path: IrodsPath): if remote_path.exists(): - raise ValueError(f'New collection path {remote_path} already exists.') + raise ValueError(f"New collection path {remote_path} already exists.") remote_path.create_collection(session, remote_path) + def ibridges_mkcoll(): """Create a collection with all its parents given the new path.""" parser = argparse.ArgumentParser( prog="ibridges mkcoll", - description="Create a new collecion with all its parent collections." + description="Create a new collecion with all its parent collections.", ) parser.add_argument( "remote_path", @@ -268,6 +274,7 @@ def ibridges_mkcoll(): with interactive_auth(irods_env_path=_get_ienv_path()) as session: _create_coll(session, _parse_remote(args.remote_path, session)) + def _parse_local(local_path: Union[None, str, Path]) -> Path: if local_path is None: return Path.cwd() @@ -277,6 +284,7 @@ def _parse_local(local_path: Union[None, str, Path]) -> Path: local_path = Path(local_path) return local_path + def _parse_remote(remote_path: Union[None, str], session: Session) -> IrodsPath: if remote_path is None: return IrodsPath(session, session.home) @@ -290,11 +298,12 @@ def _parse_remote(remote_path: Union[None, str], session: Session) -> IrodsPath: return IrodsPath(session, remote_path[7:]) return IrodsPath(session, remote_path[6:]) + def ibridges_download(): """Download a remote data object or collection.""" parser = argparse.ArgumentParser( prog="ibridges download", - description="Download a data object or collection from an iRODS server." + description="Download a data object or collection from an iRODS server.", ) parser.add_argument( "remote_path", @@ -317,21 +326,23 @@ def ibridges_download(): help="Name of the resource from which the data is to be downloaded.", type=str, default="", - required=False + required=False, ) parser.add_argument( "--dry-run", help="Do not perform the download, but list the files to be updated.", - action="store_true" + action="store_true", ) args = parser.parse_args() with interactive_auth(irods_env_path=_get_ienv_path()) as session: - ops = download(session, - _parse_remote(args.remote_path, session), - _parse_local(args.local_path), - overwrite=args.overwrite, - resc_name=args.resource, - dry_run=args.dry_run) + ops = download( + session, + _parse_remote(args.remote_path, session), + _parse_local(args.local_path), + overwrite=args.overwrite, + resc_name=args.resource, + dry_run=args.dry_run, + ) if args.dry_run: _summarize_ops(ops) @@ -340,7 +351,7 @@ def ibridges_upload(): """Upload a local file or directory to the irods server.""" parser = argparse.ArgumentParser( prog="ibridges upload", - description="Upload a data object or collection from an iRODS server." + description="Upload a data object or collection from an iRODS server.", ) parser.add_argument( "local_path", @@ -362,22 +373,23 @@ def ibridges_upload(): help="Name of the resource to which the data is to be uploaded.", type=str, default="", - required=False + required=False, ) parser.add_argument( "--dry-run", help="Do not perform the upload, but list the files to be updated.", - action="store_true" + action="store_true", ) args = parser.parse_args() with interactive_auth(irods_env_path=_get_ienv_path()) as session: - ops = upload(session, - _parse_local(args.local_path), - _parse_remote(args.remote_path, session), - overwrite=args.overwrite, - resc_name=args.resource, - dry_run=args.dry_run + ops = upload( + session, + _parse_local(args.local_path), + _parse_remote(args.remote_path, session), + overwrite=args.overwrite, + resc_name=args.resource, + dry_run=args.dry_run, ) if args.dry_run: _summarize_ops(ops) @@ -388,6 +400,7 @@ def _parse_str(remote_or_local: str, session) -> Union[str, IrodsPath]: return IrodsPath(session, remote_or_local[6:]) return remote_or_local + def _summarize_ops(ops): if len(ops["create_collection"]) > 0: print("Create collections:\n") @@ -413,8 +426,7 @@ def _summarize_ops(ops): def ibridges_sync(): """Synchronize files/directories between local and remote.""" parser = argparse.ArgumentParser( - prog="ibridges sync", - description="Synchronize files/directories between local and remote." + prog="ibridges sync", description="Synchronize files/directories between local and remote." ) parser.add_argument( "source", @@ -429,16 +441,16 @@ def ibridges_sync(): parser.add_argument( "--dry-run", help="Do not perform the synchronization, but list the files to be updated.", - action="store_true" + action="store_true", ) args = parser.parse_args() - with interactive_auth(irods_env_path=_get_ienv_path()) as session: - ops = sync(session, - _parse_str(args.source, session), - _parse_str(args.destination, session), - dry_run=args.dry_run, + ops = sync( + session, + _parse_str(args.source, session), + _parse_str(args.destination, session), + dry_run=args.dry_run, ) if args.dry_run: _summarize_ops(ops) @@ -446,28 +458,26 @@ def ibridges_sync(): # prefix components: _tree_elements = { - "pretty": - { - "space": ' ', - "branch": '│ ', - "skip": "...", - "tee": '├── ', - "last": '└── ', - }, - "ascii": - { - "space": ' ', - "branch": '| ', - "skip": "...", - "tee": '|-- ', - "last": '\\-- ', - } + "pretty": { + "space": " ", + "branch": "│ ", + "skip": "...", + "tee": "├── ", + "last": "└── ", + }, + "ascii": { + "space": " ", + "branch": "| ", + "skip": "...", + "tee": "|-- ", + "last": "\\-- ", + }, } def _print_build_list(build_list: list[str], prefix: str, pels: dict[str, str], show_max: int = 10): if len(build_list) > show_max: - n_half = (show_max-1)//2 + n_half = (show_max - 1) // 2 for item in build_list[:n_half]: print(prefix + pels["tee"] + item) print(prefix + pels["skip"]) @@ -479,8 +489,14 @@ def _print_build_list(build_list: list[str], prefix: str, pels: dict[str, str], if len(build_list) > 0: print(prefix + pels["last"] + build_list[-1]) -def _tree(ipath: IrodsPath, path_list: list[IrodsPath], pels: dict[str, str], prefix: str = '', - show_max: int = 10): + +def _tree( + ipath: IrodsPath, + path_list: list[IrodsPath], + pels: dict[str, str], + prefix: str = "", + show_max: int = 10, +): """Generate A recursive generator, given a directory Path object. will yield a visual tree structure line by line @@ -498,9 +514,13 @@ def _tree(ipath: IrodsPath, path_list: list[IrodsPath], pels: dict[str, str], pr if len(rel_path.parts) > 1: _print_build_list(build_list, prefix, show_max=show_max, pels=pels) build_list = [] - j_path += _tree(cur_path.parent, path_list[j_path:], show_max=show_max, - prefix=prefix + pels["branch"], - pels=pels) + j_path += _tree( + cur_path.parent, + path_list[j_path:], + show_max=show_max, + prefix=prefix + pels["branch"], + pels=pels, + ) continue build_list.append(str(rel_path)) j_path += 1 @@ -511,8 +531,7 @@ def _tree(ipath: IrodsPath, path_list: list[IrodsPath], pels: dict[str, str], pr def ibridges_tree(): """Print a tree representation of a remote directory.""" parser = argparse.ArgumentParser( - prog="ibridges tree", - description="Show collection/directory tree." + prog="ibridges tree", description="Show collection/directory tree." ) parser.add_argument( "remote_path", diff --git a/ibridges/data_operations.py b/ibridges/data_operations.py index ead7f721..6e3fc12d 100644 --- a/ibridges/data_operations.py +++ b/ibridges/data_operations.py @@ -2,6 +2,7 @@ Transfer data between local file system and iRODS, includes upload, download and sync. """ + from __future__ import annotations import base64 @@ -23,9 +24,15 @@ NUM_THREADS = 4 -def _obj_put(session: Session, local_path: Union[str, Path], irods_path: Union[str, IrodsPath], - overwrite: bool = False, resc_name: str = '', options: Optional[dict] = None, - ignore_err: bool = False): +def _obj_put( + session: Session, + local_path: Union[str, Path], + irods_path: Union[str, IrodsPath], + overwrite: bool = False, + resc_name: str = "", + options: Optional[dict] = None, + ignore_err: bool = False, +): """Upload `local_path` to `irods_path` following iRODS `options`. Parameters @@ -57,48 +64,55 @@ def _obj_put(session: Session, local_path: Union[str, Path], irods_path: Union[s return # Check if irods object already exists - obj_exists = IrodsPath(session, - irods_path / local_path.name).dataobject_exists() \ - or irods_path.dataobject_exists() + obj_exists = ( + IrodsPath(session, irods_path / local_path.name).dataobject_exists() + or irods_path.dataobject_exists() + ) if options is None: options = {} - options.update({ - kw.NUM_THREADS_KW: NUM_THREADS, - kw.REG_CHKSUM_KW: '', - kw.VERIFY_CHKSUM_KW: '' - }) + options.update({kw.NUM_THREADS_KW: NUM_THREADS, kw.REG_CHKSUM_KW: "", kw.VERIFY_CHKSUM_KW: ""}) - if resc_name not in ['', None]: + if resc_name not in ["", None]: options[kw.RESC_NAME_KW] = resc_name if overwrite or not obj_exists: try: session.irods_session.data_objects.put(local_path, str(irods_path), **options) except (PermissionError, OSError) as error: - err_msg = f'Cannot read {error.filename}.' + err_msg = f"Cannot read {error.filename}." if not ignore_err: raise PermissionError(err_msg) from error warnings.warn(err_msg) except irods.exception.CAT_NO_ACCESS_PERMISSION as error: - err_msg = f'Cannot write {str(irods_path)}.' + err_msg = f"Cannot write {str(irods_path)}." if not ignore_err: raise PermissionError(err_msg) from error warnings.warn(err_msg) except irods.exception.OVERWRITE_WITHOUT_FORCE_FLAG as error: raise FileExistsError( f"Dataset {irods_path} already exists. " - "Use overwrite=True to overwrite the existing file.") from error + "Use overwrite=True to overwrite the existing file." + ) from error else: if not ignore_err: raise FileExistsError( f"Dataset {irods_path} already exists. " - "Use overwrite=True to overwrite the existing file.") + "Use overwrite=True to overwrite the existing file." + ) warnings.warn(f"Cannot overwrite dataobject with name '{local_path.name}'.") -def upload(session: Session, local_path: Union[str, Path], irods_path: Union[str, IrodsPath], - overwrite: bool = False, ignore_err: bool = False, - resc_name: str = '', copy_empty_folders: bool = True, options: Optional[dict] = None, - dry_run: bool = False): + +def upload( + session: Session, + local_path: Union[str, Path], + irods_path: Union[str, IrodsPath], + overwrite: bool = False, + ignore_err: bool = False, + resc_name: str = "", + copy_empty_folders: bool = True, + options: Optional[dict] = None, + dry_run: bool = False, +): """Upload a local directory or file to iRODS. Parameters @@ -153,11 +167,11 @@ def upload(session: Session, local_path: Union[str, Path], irods_path: Union[str idest_path = ipath / local_path.name if not overwrite and idest_path.exists(): raise FileExistsError(f"{idest_path} already exists.") - ops = _up_sync_operations(local_path, idest_path, copy_empty_folders=copy_empty_folders, - depth=None) + ops = _up_sync_operations( + local_path, idest_path, copy_empty_folders=copy_empty_folders, depth=None + ) ops["create_collection"].add(str(idest_path)) elif local_path.is_file(): - if ipath.collection_exists(): ipath = ipath / local_path.name obj_exists = ipath.dataobject_exists() @@ -165,11 +179,15 @@ def upload(session: Session, local_path: Union[str, Path], irods_path: Union[str if obj_exists and not overwrite: raise FileExistsError( f"Dataset {irods_path} already exists. " - "Use overwrite=True to overwrite the existing file.") + "Use overwrite=True to overwrite the existing file." + ) if not (obj_exists and _calc_checksum(local_path) == _calc_checksum(ipath)): ops["upload"].append((local_path, ipath)) - + elif local_path.is_symlink(): + raise FileNotFoundError( + f"Cannot upload symbolic link {local_path}, please supply a direct " "path." + ) else: raise FileNotFoundError(f"Cannot upload {local_path}: file or directory does not exist.") ops.update({"resc_name": resc_name, "options": options}) @@ -177,10 +195,16 @@ def upload(session: Session, local_path: Union[str, Path], irods_path: Union[str perform_operations(session, ops, ignore_err=ignore_err) return ops -def _obj_get(session: Session, irods_path: IrodsPath, local_path: Path, - overwrite: bool = False, resc_name: Optional[str] = '', - options: Optional[dict] = None, - ignore_err: bool = False): + +def _obj_get( + session: Session, + irods_path: IrodsPath, + local_path: Path, + overwrite: bool = False, + resc_name: Optional[str] = "", + options: Optional[dict] = None, + ignore_err: bool = False, +): """Download `irods_path` to `local_path` following iRODS `options`. Parameters @@ -203,23 +227,25 @@ def _obj_get(session: Session, irods_path: IrodsPath, local_path: Path, """ if options is None: options = {} - options.update({ - kw.NUM_THREADS_KW: NUM_THREADS, - kw.VERIFY_CHKSUM_KW: '', - }) + options.update( + { + kw.NUM_THREADS_KW: NUM_THREADS, + kw.VERIFY_CHKSUM_KW: "", + } + ) if overwrite: - options[kw.FORCE_FLAG_KW] = '' - if resc_name not in ['', None]: + options[kw.FORCE_FLAG_KW] = "" + if resc_name not in ["", None]: options[kw.RESC_NAME_KW] = resc_name - #Quick fix for #126 + # Quick fix for #126 if Path(local_path).is_dir(): local_path = Path(local_path).joinpath(irods_path.name) try: session.irods_session.data_objects.get(str(irods_path), local_path, **options) except (OSError, irods.exception.CAT_NO_ACCESS_PERMISSION) as error: - msg = f'Cannot write to {local_path}.' + msg = f"Cannot write to {local_path}." if not ignore_err: raise PermissionError(msg) from error warnings.warn(msg) @@ -229,10 +255,18 @@ def _obj_get(session: Session, irods_path: IrodsPath, local_path: Path, raise PermissionError(msg) from exc warnings.warn(msg) -def download(session: Session, irods_path: Union[str, IrodsPath], local_path: Union[str, Path], - overwrite: bool = False, ignore_err: bool = False, resc_name: str = '', - copy_empty_folders: bool = True, options: Optional[dict] = None, - dry_run: bool = False): + +def download( + session: Session, + irods_path: Union[str, IrodsPath], + local_path: Union[str, Path], + overwrite: bool = False, + ignore_err: bool = False, + resc_name: str = "", + copy_empty_folders: bool = True, + options: Optional[dict] = None, + dry_run: bool = False, +): """Download a collection or data object to the local filesystem. Parameters @@ -292,10 +326,12 @@ def download(session: Session, irods_path: Union[str, IrodsPath], local_path: Un if local_path.is_file(): raise NotADirectoryError( f"Cannot download to directory {local_path} " - "since a file with the same name exists.") + "since a file with the same name exists." + ) - ops = _down_sync_operations(irods_path, local_path / irods_path.name, - copy_empty_folders=copy_empty_folders) + ops = _down_sync_operations( + irods_path, local_path / irods_path.name, copy_empty_folders=copy_empty_folders + ) ops["create_dir"].add(str(local_path / irods_path.name)) if not local_path.is_dir(): ops["create_dir"].add(str(local_path)) @@ -305,10 +341,13 @@ def download(session: Session, irods_path: Union[str, IrodsPath], local_path: Un if local_path.is_dir(): local_path = local_path / irods_path.name if (not overwrite) and local_path.is_file(): - raise FileExistsError(f"File or directory {local_path} already exists. " - "Use overwrite=True to overwrite the existing file(s).") - if not (local_path.is_file() and - (_calc_checksum(irods_path) == _calc_checksum(local_path))): + raise FileExistsError( + f"File or directory {local_path} already exists. " + "Use overwrite=True to overwrite the existing file(s)." + ) + if not ( + local_path.is_file() and (_calc_checksum(irods_path) == _calc_checksum(local_path)) + ): ops["download"].append((irods_path, local_path)) else: @@ -320,8 +359,9 @@ def download(session: Session, irods_path: Union[str, IrodsPath], local_path: Un return ops -def create_collection(session: Session, - coll_path: Union[IrodsPath, str]) -> irods.collection.iRODSCollection: +def create_collection( + session: Session, coll_path: Union[IrodsPath, str] +) -> irods.collection.iRODSCollection: """Create a collection and all parent collections that do not exist yet. Alias for :meth:`ibridges.path.IrodsPath.create_collection` @@ -347,7 +387,7 @@ def create_collection(session: Session, return IrodsPath.create_collection(session, coll_path) -def perform_operations(session: Session, operations: dict, ignore_err: bool=False): +def perform_operations(session: Session, operations: dict, ignore_err: bool = False): """Execute data operations. The operations can be obtained with a dry run of the upload/download/sync function. @@ -374,8 +414,13 @@ def perform_operations(session: Session, operations: dict, ignore_err: bool=Fals up_sizes = [lpath.stat().st_size for lpath, _ in operations["upload"]] down_sizes = [ipath.size for ipath, _ in operations["download"]] disable = len(up_sizes) + len(down_sizes) == 0 - pbar = tqdm(total=sum(up_sizes) + sum(down_sizes), unit="B", unit_scale=True, unit_divisor=1024, - disable=disable) + pbar = tqdm( + total=sum(up_sizes) + sum(down_sizes), + unit="B", + unit_scale=True, + unit_divisor=1024, + disable=disable, + ) # The code below does not work as expected, since connections in the # pool can be reused. Another solution for dynamic timeouts might be needed @@ -402,25 +447,41 @@ def perform_operations(session: Session, operations: dict, ignore_err: bool=Fals options = {} if options is None else options resc_name = operations.get("resc_name", "") for (lpath, ipath), size in zip(operations["upload"], up_sizes): - _obj_put(session, lpath, ipath, overwrite=True, ignore_err=ignore_err, options=options, - resc_name=resc_name) + _obj_put( + session, + lpath, + ipath, + overwrite=True, + ignore_err=ignore_err, + options=options, + resc_name=resc_name, + ) pbar.update(size) for (ipath, lpath), size in zip(operations["download"], down_sizes): - _obj_get(session, ipath, lpath, overwrite=True, ignore_err=ignore_err, options=options, - resc_name=resc_name) + _obj_get( + session, + ipath, + lpath, + overwrite=True, + ignore_err=ignore_err, + options=options, + resc_name=resc_name, + ) pbar.update(size) # session.irods_session.pool.connection_timeout = original_timeout -def sync(session: Session, - source: Union[str, Path, IrodsPath], - target: Union[str, Path, IrodsPath], - max_level: Optional[int] = None, - dry_run: bool = False, - ignore_err: bool = False, - copy_empty_folders: bool = False, - resc_name: str = "", - options: Optional[dict] = None) -> dict: +def sync( + session: Session, + source: Union[str, Path, IrodsPath], + target: Union[str, Path, IrodsPath], + max_level: Optional[int] = None, + dry_run: bool = False, + ignore_err: bool = False, + copy_empty_folders: bool = False, + resc_name: str = "", + options: Optional[dict] = None, +) -> dict: """Synchronize the data between a local copy (local file system) and the copy stored in iRODS. The command can be in one of the two modes: synchronization of data from the client's local file @@ -479,11 +540,13 @@ def sync(session: Session, raise ValueError(f"Source folder '{source}' does not exist") if isinstance(source, IrodsPath): - ops = _down_sync_operations(source, Path(target), copy_empty_folders=copy_empty_folders, - depth=max_level) + ops = _down_sync_operations( + source, Path(target), copy_empty_folders=copy_empty_folders, depth=max_level + ) else: - ops = _up_sync_operations(Path(source), target, copy_empty_folders=copy_empty_folders, - depth=max_level) + ops = _up_sync_operations( + Path(source), target, copy_empty_folders=copy_empty_folders, depth=max_level + ) ops.update({"resc_name": resc_name, "options": options}) if not dry_run: @@ -491,6 +554,7 @@ def sync(session: Session, return ops + def _param_checks(source, target): if not isinstance(source, IrodsPath) and not isinstance(target, IrodsPath): raise TypeError("Either source or target should be an iRODS path.") @@ -498,16 +562,18 @@ def _param_checks(source, target): if isinstance(source, IrodsPath) and isinstance(target, IrodsPath): raise TypeError("iRODS to iRODS copying is not supported.") + def _calc_checksum(filepath): if isinstance(filepath, IrodsPath): return filepath.checksum - f_hash=sha256() - memv=memoryview(bytearray(128*1024)) - with open(filepath, 'rb', buffering=0) as file: - for item in iter(lambda : file.readinto(memv), 0): + f_hash = sha256() + memv = memoryview(bytearray(128 * 1024)) + with open(filepath, "rb", buffering=0) as file: + for item in iter(lambda: file.readinto(memv), 0): f_hash.update(memv[:item]) return f"sha2:{str(base64.b64encode(f_hash.digest()), encoding='utf-8')}" + def _empty_ops(): return { "create_dir": set(), @@ -516,6 +582,7 @@ def _empty_ops(): "download": [], } + def _down_sync_operations(isource_path, ldest_path, copy_empty_folders=True, depth=None): operations = _empty_ops() for ipath in isource_path.walk(depth=depth): @@ -536,7 +603,7 @@ def _down_sync_operations(isource_path, ldest_path, copy_empty_folders=True, dep return operations -def _up_sync_operations(lsource_path, idest_path, copy_empty_folders=True, depth=None): +def _up_sync_operations(lsource_path, idest_path, copy_empty_folders=True, depth=None): # pylint: disable=too-many-branches operations = _empty_ops() session = idest_path.session try: @@ -551,6 +618,11 @@ def _up_sync_operations(lsource_path, idest_path, copy_empty_folders=True, depth for cur_file in files: ipath = root_ipath / cur_file lpath = lsource_path / root_part / cur_file + + # Ignore symlinks + if lpath.is_symlink(): + warnings.warn(f"Ignoring symlink {lpath}.") + continue if str(ipath) in remote_ipaths: ipath = remote_ipaths[str(ipath)] l_chksum = _calc_checksum(lpath) @@ -563,6 +635,11 @@ def _up_sync_operations(lsource_path, idest_path, copy_empty_folders=True, depth operations["upload"].append((lpath, ipath)) if copy_empty_folders: for fold in folders: + # Ignore folder symlinks + lpath = lsource_path / root_part / fold + if lpath.is_symlink(): + warnings.warn(f"Ignoring symlink {lpath}.") + continue if str(root_ipath / fold) not in remote_ipaths: operations["create_collection"].add(str(root_ipath / fold)) if str(root_ipath) not in remote_ipaths and str(root_ipath) != str(idest_path): diff --git a/ibridges/export_metadata.py b/ibridges/export_metadata.py index 9d7c804f..248e8bd3 100644 --- a/ibridges/export_metadata.py +++ b/ibridges/export_metadata.py @@ -1,5 +1,5 @@ - """Exporting metadata.""" + from __future__ import annotations from typing import Any, Optional, Union @@ -12,8 +12,9 @@ from ibridges.util import is_collection, is_dataobject -def export_metadata_to_dict(meta: MetaData, session: Session, - recursive: bool = True, keys: Optional[list] = None) -> dict: +def export_metadata_to_dict( + meta: MetaData, session: Session, recursive: bool = True, keys: Optional[list] = None +) -> dict: """Retrieve the metadata of the item and brings it into dict form. If the item is a collection all metadata from all subcollections @@ -76,39 +77,49 @@ def export_metadata_to_dict(meta: MetaData, session: Session, """ metadata_dict: dict[str, Any] = {"ibridges_metadata_version": 1.0} - metadata_dict.update(meta.to_dict(keys = keys)) + metadata_dict.update(meta.to_dict(keys=keys)) if is_dataobject(meta.item): return metadata_dict if is_collection(meta.item): if recursive is True: - objects, collections = _get_meta_from_irods_tree(session, meta.item, - root = meta.item.path) + objects, collections = _get_meta_from_irods_tree( + session, meta.item, root=meta.item.path + ) metadata_dict["subcollections"] = collections metadata_dict["dataobjects"] = objects return metadata_dict return metadata_dict raise ValueError("Not a data collection or data object: {item}") -def _get_meta_from_irods_tree(session: Session, coll: iRODSCollection, - root: Optional[Union[str, IrodsPath]] = None - ) -> tuple[list[dict], list[dict]]: + +def _get_meta_from_irods_tree( + session: Session, coll: iRODSCollection, root: Optional[Union[str, IrodsPath]] = None +) -> tuple[list[dict], list[dict]]: """Recursively gather the metadata for all subcollections and data objects.""" if root is not None: root_path = IrodsPath(session, root) else: root_path = IrodsPath(session, coll.path) - objects = [{'name': o.name, 'irods_id': o.id, 'checksum': o.checksum, - 'rel_path': '/'.join(IrodsPath(session, - o.path).parts[len(root_path.parts):]), - 'metadata': MetaData(o).to_dict()['metadata']} - for o in coll.data_objects - ] - collections = [{'name': c.name, 'irods_id': c.id, - 'rel_path': '/'.join(IrodsPath(session, - c.path).parts[len(root_path.parts):]), - 'metadata': MetaData(c).to_dict()['metadata']} - for c in coll.subcollections] + objects = [ + { + "name": o.name, + "irods_id": o.id, + "checksum": o.checksum, + "rel_path": "/".join(IrodsPath(session, o.path).parts[len(root_path.parts) :]), + "metadata": MetaData(o).to_dict()["metadata"], + } + for o in coll.data_objects + ] + collections = [ + { + "name": c.name, + "irods_id": c.id, + "rel_path": "/".join(IrodsPath(session, c.path).parts[len(root_path.parts) :]), + "metadata": MetaData(c).to_dict()["metadata"], + } + for c in coll.subcollections + ] if len(coll.subcollections) > 0: for subcoll in coll.subcollections: subobjects, subcollections = _get_meta_from_irods_tree(session, subcoll, root_path) diff --git a/ibridges/icat_columns.py b/ibridges/icat_columns.py index d4ed2741..bcaab8f2 100644 --- a/ibridges/icat_columns.py +++ b/ibridges/icat_columns.py @@ -1,4 +1,5 @@ """Definition of keywors and operators for iCAT searches.""" + import irods.column as cm import irods.models as imodels diff --git a/ibridges/interactive.py b/ibridges/interactive.py index 3a4087be..1fb54142 100644 --- a/ibridges/interactive.py +++ b/ibridges/interactive.py @@ -9,8 +9,10 @@ DEFAULT_IENV_PATH = Path(os.path.expanduser("~")).joinpath(".irods", "irods_environment.json") -def interactive_auth(password: Optional[str] = None, - irods_env_path: Union[None, str, Path] = None) -> Session: + +def interactive_auth( + password: Optional[str] = None, irods_env_path: Union[None, str, Path] = None +) -> Session: """Interactive authentication with iRODS server. Stores the password in ~/.irods/.irodsA upon success. @@ -37,12 +39,14 @@ def interactive_auth(password: Optional[str] = None, if irods_env_path is None: irods_env_path = DEFAULT_IENV_PATH if not os.path.exists(irods_env_path): - print(f'File not found: {irods_env_path}') + print(f"File not found: {irods_env_path}") raise FileNotFoundError session = None - if os.path.exists(Path(os.path.expanduser("~")).joinpath(".irods", ".irodsA")) and \ - password is None: + if ( + os.path.exists(Path(os.path.expanduser("~")).joinpath(".irods", ".irodsA")) + and password is None + ): session = _from_pw_file(irods_env_path) if password is not None: @@ -63,25 +67,27 @@ def interactive_auth(password: Optional[str] = None, return session except PasswordError as e: print(repr(e)) - print('INFO: The provided password is wrong.') - n_tries+=1 + print("INFO: The provided password is wrong.") + n_tries += 1 raise LoginError("Connection to iRODS could not be established.") + def _from_pw_file(irods_env_path): try: session = Session(irods_env_path) return session except IndexError: - print('INFO: The cached password in ~/.irods/.irodsA has been corrupted') + print("INFO: The cached password in ~/.irods/.irodsA has been corrupted") except PasswordError: - print('INFO: The cached password in ~/.irods/.irodsA is wrong.') + print("INFO: The cached password in ~/.irods/.irodsA is wrong.") return None + def _from_password(irods_env_path, password): try: session = Session(irods_env=irods_env_path, password=password) session.write_pam_password() return session except PasswordError: - print('INFO: Wrong password.') + print("INFO: Wrong password.") return None diff --git a/ibridges/meta.py b/ibridges/meta.py index 381ce386..926baa36 100644 --- a/ibridges/meta.py +++ b/ibridges/meta.py @@ -1,4 +1,5 @@ """metadata operations.""" + from __future__ import annotations from typing import Any, Iterator, Optional, Sequence, Union @@ -10,7 +11,7 @@ from ibridges.util import is_dataobject -class MetaData(): +class MetaData: """Irods metadata operations. This allows for adding and deleting of metadata entries for data objects @@ -33,8 +34,9 @@ class MetaData(): """ - def __init__(self, item: Union[irods.data_object.iRODSDataObject, - irods.collection.iRODSCollection]): + def __init__( + self, item: Union[irods.data_object.iRODSDataObject, irods.collection.iRODSCollection] + ): """Initialize the metadata object. Parameters @@ -68,7 +70,7 @@ def __contains__(self, val: Union[str, Sequence]) -> bool: """ if isinstance(val, str): val = [val] - all_attrs = ["name", "value", "units"][:len(val)] + all_attrs = ["name", "value", "units"][: len(val)] for meta in self: n_same = 0 for i_attr, attr in enumerate(all_attrs): @@ -84,7 +86,6 @@ def __repr__(self) -> str: """Create a sorted representation of the metadata.""" return f"MetaData<{self.item.path}>" - def __str__(self) -> str: """Return a string showing all metadata entries.""" # Sort the list of items name -> value -> units, where None is the lowest @@ -203,11 +204,14 @@ def delete(self, key: str, value: Optional[str], units: Optional[str] = None): else: self.item.metadata.remove(key, value, units) except irods.exception.CAT_SUCCESS_BUT_WITH_NO_INFO as error: - raise KeyError(f"Cannot delete metadata with key '{key}', value '{value}'" - f" and units '{units}' since it does not exist.") from error + raise KeyError( + f"Cannot delete metadata with key '{key}', value '{value}'" + f" and units '{units}' since it does not exist." + ) from error except irods.exception.CAT_NO_ACCESS_PERMISSION as error: - raise ValueError("Cannot delete metadata due to insufficient permission for " - "path '{item.path}'.") from error + raise ValueError( + "Cannot delete metadata due to insufficient permission for path '{item.path}'." + ) from error def clear(self): """Delete all metadata belonging to the item. diff --git a/ibridges/path.py b/ibridges/path.py index fe5b8ced..d6280ba6 100644 --- a/ibridges/path.py +++ b/ibridges/path.py @@ -1,4 +1,5 @@ """A class to handle iRODS paths.""" + from __future__ import annotations from collections import defaultdict @@ -11,7 +12,7 @@ import ibridges.icat_columns as icat -class IrodsPath(): +class IrodsPath: """A class analogous to the pathlib.Path for accessing iRods data. The IrodsPath can be used in much the same way as a Path from the pathlib library. @@ -52,11 +53,10 @@ def __init__(self, session, *args): """ self.session = session if not hasattr(session, "irods_session"): - raise ValueError(f'{str(self)} does not have a valid session.') + raise ValueError(f"{str(self)} does not have a valid session.") # We don't want recursive IrodsPaths, so we take the # path outside of the IrodsPath object. - args = [a._path if isinstance(a, IrodsPath) else a - for a in args] + args = [a._path if isinstance(a, IrodsPath) else a for a in args] self._path = PurePosixPath(*args) super().__init__() @@ -87,7 +87,6 @@ def absolute(self) -> IrodsPath: abs_str = str(PurePosixPath(begin, *end)) return IrodsPath(self.session, abs_str) - def __str__(self) -> str: """Get the absolute path if converting to string.""" return str(self.absolute()._path) @@ -177,12 +176,12 @@ def remove(self): obj = self.session.irods_session.data_objects.get(str(self)) obj.unlink() except irods.exception.CUT_ACTION_PROCESSED_ERR as exc: - raise PermissionError( - f"While removing {self}: iRODS server forbids action.") from exc + raise PermissionError(f"While removing {self}: iRODS server forbids action.") from exc @staticmethod - def create_collection(session, - coll_path: Union[IrodsPath, str]) -> irods.collection.iRODSCollection: + def create_collection( + session, coll_path: Union[IrodsPath, str] + ) -> irods.collection.iRODSCollection: """Create a collection and all parent collections that do not exist yet. Parameters @@ -214,7 +213,8 @@ def create_collection(session, raise PermissionError(f"Cannot create {str(coll_path)}, no access.") from error except irods.exception.CUT_ACTION_PROCESSED_ERR as exc: raise PermissionError( - "While creating collection '{coll_path}': iRODS server forbids action.") from exc + "While creating collection '{coll_path}': iRODS server forbids action." + ) from exc def rename(self, new_name: Union[str, IrodsPath]) -> IrodsPath: """Change the name or the path of a data object or collection. @@ -240,10 +240,10 @@ def rename(self, new_name: Union[str, IrodsPath]) -> IrodsPath: """ if not self.exists(): - raise ValueError(f'str{self} does not exist.') + raise ValueError(f"str{self} does not exist.") # Build new path - if str(new_name).startswith('/'+self.session.zone): + if str(new_name).startswith("/" + self.session.zone): new_path = IrodsPath(self.session, new_name) else: new_path = self.parent.joinpath(new_name) @@ -260,13 +260,13 @@ def rename(self, new_name: Union[str, IrodsPath]) -> IrodsPath: return new_path except irods.exception.SAME_SRC_DEST_PATHS_ERR as err: - raise ValueError(f'Path {new_path} already exists.') from err + raise ValueError(f"Path {new_path} already exists.") from err except irods.exception.SYS_CROSS_ZONE_MV_NOT_SUPPORTED as err: raise ValueError( - f'Path {new_path} needs to start with /{self.session.zone}/home') from err + f"Path {new_path} needs to start with /{self.session.zone}/home" + ) from err except irods.exception.CAT_NO_ACCESS_PERMISSION as err: - raise PermissionError(f'Not allowed to move data to {new_path}') from err - + raise PermissionError(f"Not allowed to move data to {new_path}") from err def collection_exists(self) -> bool: """Check if the path points to an iRODS collection. @@ -338,8 +338,10 @@ def collection(self) -> irods.collection.iRODSCollection: if self.collection_exists(): return self.session.irods_session.collections.get(str(self)) if self.dataobject_exists(): - raise ValueError("Error retrieving collection, path is linked to a data object." - " Use get_dataobject instead to retrieve the data object.") + raise ValueError( + "Error retrieving collection, path is linked to a data object." + " Use get_dataobject instead to retrieve the data object." + ) raise irods.exception.CollectionDoesNotExist(str(self)) @property @@ -365,12 +367,13 @@ def dataobject(self) -> irods.data_object.iRODSDataObject: if self.dataobject_exists(): return self.session.irods_session.data_objects.get(str(self)) if self.collection_exists(): - raise ValueError("Error retrieving data object, path is linked to a collection." - " Use get_collection instead to retrieve the collection.") + raise ValueError( + "Error retrieving data object, path is linked to a collection." + " Use get_collection instead to retrieve the collection." + ) raise irods.exception.DataObjectDoesNotExist(str(IrodsPath)) - def walk(self, depth: Optional[int] = None) -> Iterable[IrodsPath]: """Walk on a collection. @@ -423,8 +426,7 @@ def relative_to(self, other: IrodsPath) -> PurePosixPath: >>> IrodsPath(session, "~/col/dataobj.txt").relative_to(IrodsPath(session, "~/col")) PurePosixPath(dataobj.txt) """ - return PurePosixPath(str(self.absolute())).relative_to( - PurePosixPath(str(other.absolute()))) + return PurePosixPath(str(self.absolute())).relative_to(PurePosixPath(str(other.absolute()))) @property def size(self) -> int: @@ -449,8 +451,10 @@ def size(self) -> int: """ if not self.exists(): - raise ValueError(f"Path '{str(self)}' does not exist;" - " it is neither a collection nor a dataobject.") + raise ValueError( + f"Path '{str(self)}' does not exist;" + " it is neither a collection nor a dataobject." + ) if self.dataobject_exists(): return self.dataobject.size all_objs = _get_data_objects(self.session, self.collection) @@ -485,18 +489,25 @@ def checksum(self) -> str: raise ValueError("Cannot take checksum of irods path neither a dataobject or collection.") -def _recursive_walk(cur_col: IrodsPath, sub_collections: dict[str, list[IrodsPath]], - all_dataobjects: dict[str, list[IrodsPath]], start_col: IrodsPath, - depth: int, max_depth: Optional[int]): +def _recursive_walk( + cur_col: IrodsPath, + sub_collections: dict[str, list[IrodsPath]], + all_dataobjects: dict[str, list[IrodsPath]], + start_col: IrodsPath, + depth: int, + max_depth: Optional[int], +): if cur_col != start_col: yield cur_col if max_depth is not None and depth >= max_depth: return for sub_col in sub_collections[str(cur_col)]: - yield from _recursive_walk(sub_col, sub_collections, all_dataobjects, start_col, - depth+1, max_depth) + yield from _recursive_walk( + sub_col, sub_collections, all_dataobjects, start_col, depth + 1, max_depth + ) yield from sorted(all_dataobjects[str(cur_col)], key=str) + class CachedIrodsPath(IrodsPath): """Cached version of the IrodsPath. @@ -505,8 +516,9 @@ class CachedIrodsPath(IrodsPath): when other ibridges operations are used. """ - def __init__(self, session, size: Optional[int], is_dataobj: bool, - checksum: Optional[str], *args): + def __init__( + self, session, size: Optional[int], is_dataobj: bool, checksum: Optional[str], *args + ): """Initialize CachedIrodsPath. Parameters @@ -551,8 +563,9 @@ def collection_exists(self) -> bool: return not self._is_dataobj -def _get_data_objects(session, - coll: irods.collection.iRODSCollection) -> list[tuple[str, str, int, str]]: +def _get_data_objects( + session, coll: irods.collection.iRODSCollection +) -> list[tuple[str, str, int, str]]: """Retrieve all data objects in a collection and all its subcollections. Parameters @@ -569,13 +582,13 @@ def _get_data_objects(session, """ # all objects in the collection - objs = [(obj.collection.path, obj.name, obj.size, obj.checksum) - for obj in coll.data_objects] + objs = [(obj.collection.path, obj.name, obj.size, obj.checksum) for obj in coll.data_objects] # all objects in subcollections - data_query = session.irods_session.query(icat.COLL_NAME, icat.DATA_NAME, - DataObject.size, DataObject.checksum) - data_query = data_query.filter(icat.LIKE(icat.COLL_NAME, coll.path+"/%")) + data_query = session.irods_session.query( + icat.COLL_NAME, icat.DATA_NAME, DataObject.size, DataObject.checksum + ) + data_query = data_query.filter(icat.LIKE(icat.COLL_NAME, coll.path + "/%")) for res in data_query.get_results(): path, name, size, checksum = res.values() objs.append((path, name, size, checksum)) @@ -583,11 +596,13 @@ def _get_data_objects(session, return objs -def _get_subcoll_paths(session, - coll: irods.collection.iRODSCollection) -> list: +def _get_subcoll_paths(session, coll: irods.collection.iRODSCollection) -> list: """Retrieve all sub collections in a sub tree starting at coll and returns their IrodsPaths.""" coll_query = session.irods_session.query(icat.COLL_NAME) - coll_query = coll_query.filter(icat.LIKE(icat.COLL_NAME, coll.path+"/%")) + coll_query = coll_query.filter(icat.LIKE(icat.COLL_NAME, coll.path + "/%")) - return [CachedIrodsPath(session, None, False, None, p) for r in coll_query.get_results() - for p in r.values()] + return [ + CachedIrodsPath(session, None, False, None, p) + for r in coll_query.get_results() + for p in r.values() + ] diff --git a/ibridges/permissions.py b/ibridges/permissions.py index ceb65d8d..132a3e95 100644 --- a/ibridges/permissions.py +++ b/ibridges/permissions.py @@ -1,4 +1,5 @@ """permission operations.""" + from collections import defaultdict from typing import Iterator, Optional @@ -8,7 +9,7 @@ import irods.session -class Permissions(): +class Permissions: """Irods permissions operations.""" def __init__(self, session, item) -> None: @@ -33,12 +34,13 @@ def __str__(self) -> str: """Create a string table of all currently set permissions.""" acl_dict = defaultdict(list) for perm in self: - acl_dict[f'{perm.user_name}#{perm.user_zone}'].append( - f'{perm.access_name}\t{perm.user_type}') - acl = '' + acl_dict[f"{perm.user_name}#{perm.user_zone}"].append( + f"{perm.access_name}\t{perm.user_type}" + ) + acl = "" for key, value in sorted(acl_dict.items()): - v_str= '\n\t'.join(value) - acl += f'{key}\n\t{v_str}\n' + v_str = "\n\t".join(value) + acl += f"{key}\n\t{v_str}\n" if isinstance(self.item, irods.collection.iRODSCollection): coll = self.session.irods_session.collections.get(self.item.path) @@ -51,8 +53,14 @@ def available_permissions(self) -> dict: """Get available permissions.""" return self.session.irods_session.available_permissions.codes - def set(self, perm: str, user: Optional[str] = None, zone: Optional[str] = None, - recursive: bool = False, admin: bool = False) -> None: + def set( + self, + perm: str, + user: Optional[str] = None, + zone: Optional[str] = None, + recursive: bool = False, + admin: bool = False, + ) -> None: """Set permissions (ACL) for an iRODS collection or data object.""" if user is None: user = self.session.username @@ -60,9 +68,14 @@ def set(self, perm: str, user: Optional[str] = None, zone: Optional[str] = None, zone = self.session.zone # forbid that users can change their own ACLs, # does not apply to no/inherit, a setting on collections which is independent of the user - if perm not in ["inherit", "noinherit"] and user == self.session.username and \ - zone == self.session.zone: - raise ValueError("Cannot set your own permissions, because you would lose " - "access to the object/collection.") + if ( + perm not in ["inherit", "noinherit"] + and user == self.session.username + and zone == self.session.zone + ): + raise ValueError( + "Cannot set your own permissions, because you would lose " + "access to the object/collection." + ) acl = irods.access.iRODSAccess(perm, self.item.path, user, zone) self.session.irods_session.acls.set(acl, recursive=recursive, admin=admin) diff --git a/ibridges/resources.py b/ibridges/resources.py index 4ec28f28..ab742497 100644 --- a/ibridges/resources.py +++ b/ibridges/resources.py @@ -1,4 +1,5 @@ """resource operations.""" + from __future__ import annotations from typing import Optional @@ -10,7 +11,7 @@ from ibridges.session import Session -class Resources(): +class Resources: """Irods Resource operations.""" def __init__(self, session: Session): @@ -78,9 +79,9 @@ def get_free_space(self, resc_name: str) -> int: if resc.free_space is not None: return int(resc.free_space) children = self.get_resource_children(resc) - free_space = sum(( - int(child.free_space) for child in children - if child.free_space is not None)) + free_space = sum( + (int(child.free_space) for child in children if child.free_space is not None) + ) return free_space def get_resource_children(self, resc: irods.resource.iRODSResource) -> list: @@ -123,24 +124,24 @@ def resources(self, update: bool = False) -> dict: """ if self._resources is None or update: query = self.session.irods_session.query( - icat.RESC_NAME, icat.RESC_PARENT, icat.RESC_STATUS, icat.RESC_CONTEXT) + icat.RESC_NAME, icat.RESC_PARENT, icat.RESC_STATUS, icat.RESC_CONTEXT + ) resc_list = [] for item in query.get_results(): name, parent, status, context = item.values() - if name == 'bundleResc': + if name == "bundleResc": continue free_space = 0 if parent is None: free_space = self.get_free_space(name) metadata = { - 'parent': parent, - 'status': status, - 'context': context, - 'free_space': free_space, + "parent": parent, + "status": status, + "context": context, + "free_space": free_space, } resc_list.append((name, metadata)) - resc_dict = dict( - sorted(resc_list, key=lambda item: str.casefold(item[0]))) + resc_dict = dict(sorted(resc_list, key=lambda item: str.casefold(item[0]))) self._resources = resc_dict return self._resources @@ -156,6 +157,8 @@ def root_resources(self) -> list[tuple]: List [(resource_name, status, free_space, context)] """ - parents = [(key, val) for key, val in self.resources().items() if not val['parent']] - return [(resc[0], resc[1]["status"], resc[1]["free_space"], resc[1]["context"]) - for resc in parents] + parents = [(key, val) for key, val in self.resources().items() if not val["parent"]] + return [ + (resc[0], resc[1]["status"], resc[1]["free_space"], resc[1]["context"]) + for resc in parents + ] diff --git a/ibridges/rules.py b/ibridges/rules.py index 350518f5..9aeb1172 100644 --- a/ibridges/rules.py +++ b/ibridges/rules.py @@ -1,4 +1,5 @@ """Rule operations.""" + import logging from typing import Optional @@ -8,12 +9,14 @@ from ibridges.session import Session -def execute_rule(session: Session, - rule_file: Optional[str], - params: Optional[dict], - output: str = 'ruleExecOut', - instance_name: str = 'irods_rule_engine_plugin-irods_rule_language-instance', - **kwargs) -> tuple: +def execute_rule( + session: Session, + rule_file: Optional[str], + params: Optional[dict], + output: str = "ruleExecOut", + instance_name: str = "irods_rule_engine_plugin-irods_rule_language-instance", + **kwargs, +) -> tuple: """Execute an iRODS rule. params format example: @@ -52,23 +55,25 @@ def execute_rule(session: Session, rule_file=rule_file, params=params, instance_name=instance_name, - output=output, **kwargs) + output=output, + **kwargs, + ) out = rule.execute() except irods.exception.NetworkException as error: - logging.info('Lost connection to iRODS server.') - return '', repr(error) + logging.info("Lost connection to iRODS server.") + return "", repr(error) except irods.exception.SYS_HEADER_READ_LEN_ERR as error: - logging.info('iRODS server hiccuped. Check the results and try again.') - return '', repr(error) + logging.info("iRODS server hiccuped. Check the results and try again.") + return "", repr(error) except Exception as error: raise ValueError("Unknown rule execution error") from error # logging.info('RULE EXECUTION ERROR', exc_info=True) # return '', repr(error) - stdout, stderr = '', '' + stdout, stderr = "", "" if len(out.MsParam_PI) > 0: buffers = out.MsParam_PI[0].inOutStruct - stdout = (buffers.stdoutBuf.buf or b'').decode() + stdout = (buffers.stdoutBuf.buf or b"").decode() # Remove garbage after terminal newline. - stdout = '\n'.join(stdout.split('\n')[:-1]) - stderr = (buffers.stderrBuf.buf or b'').decode() + stdout = "\n".join(stdout.split("\n")[:-1]) + stderr = (buffers.stderrBuf.buf or b"").decode() return stdout, stderr diff --git a/ibridges/search.py b/ibridges/search.py index 60137904..7c746d56 100644 --- a/ibridges/search.py +++ b/ibridges/search.py @@ -1,4 +1,5 @@ """Data query.""" + from __future__ import annotations from typing import Optional, Union @@ -8,8 +9,12 @@ from ibridges.session import Session -def search_data(session: Session, path: Optional[Union[str, IrodsPath]] = None, - checksum: Optional[str] = None, key_vals: Optional[dict] = None) -> list[dict]: +def search_data( + session: Session, + path: Optional[Union[str, IrodsPath]] = None, + checksum: Optional[str] = None, + key_vals: Optional[dict] = None, +) -> list[dict]: """Retrieve all collections and data objects. (the absolute collection path, @@ -45,24 +50,24 @@ def search_data(session: Session, path: Optional[Union[str, IrodsPath]] = None, """ if path is None and checksum is None and key_vals is None: raise ValueError( - "QUERY: Error while searching in the metadata: No query criteria set." \ - + " Please supply either a path, checksum or key_vals.") + "QUERY: Error while searching in the metadata: No query criteria set." + + " Please supply either a path, checksum or key_vals." + ) # create the query for collections; we only want to return the collection name coll_query = session.irods_session.query(icat.COLL_NAME) # create the query for data objects; we need the collection name, the data name and its checksum - data_query = session.irods_session.query(icat.COLL_NAME, icat.DATA_NAME, - icat.DATA_CHECKSUM) - data_name_query = session.irods_session.query(icat.COLL_NAME, icat.DATA_NAME, - icat.DATA_CHECKSUM) + data_query = session.irods_session.query(icat.COLL_NAME, icat.DATA_NAME, icat.DATA_CHECKSUM) + data_name_query = session.irods_session.query( + icat.COLL_NAME, icat.DATA_NAME, icat.DATA_CHECKSUM + ) # iRODS queries do not know the 'or' operator, so we need three searches # One for the collection, and two for the data # one data search in case path is a collection path and we want to retrieve all data there # one in case the path is or ends with a file name - if path: - path = str(path) - parent = path.rsplit("/", maxsplit=1)[0] - name = path.rsplit("/", maxsplit=1)[1] + path_params = _path_params(path) + if path_params: + path, name, parent = path_params # all collections starting with path coll_query = coll_query.filter(icat.LIKE(icat.COLL_NAME, path)) @@ -70,7 +75,8 @@ def search_data(session: Session, path: Optional[Union[str, IrodsPath]] = None, data_query = data_query.filter(icat.LIKE(icat.COLL_NAME, path)) # all data objects on path.parent with name data_name_query = data_name_query.filter(icat.LIKE(icat.DATA_NAME, name)).filter( - icat.LIKE(icat.COLL_NAME, parent)) + icat.LIKE(icat.COLL_NAME, parent) + ) if key_vals: for key in key_vals: data_query.filter(icat.LIKE(icat.META_DATA_ATTR_NAME, key)) @@ -80,17 +86,25 @@ def search_data(session: Session, path: Optional[Union[str, IrodsPath]] = None, data_query.filter(icat.LIKE(icat.META_DATA_ATTR_VALUE, key_vals[key])) coll_query.filter(icat.LIKE(icat.META_COLL_ATTR_VALUE, key_vals[key])) data_name_query = data_name_query.filter( - icat.LIKE(icat.META_DATA_ATTR_VALUE, key_vals[key])) + icat.LIKE(icat.META_DATA_ATTR_VALUE, key_vals[key]) + ) + + results = [] if checksum: data_query = data_query.filter(icat.LIKE(icat.DATA_CHECKSUM, checksum)) data_name_query = data_name_query.filter(icat.LIKE(icat.DATA_CHECKSUM, checksum)) - # gather results - results = list(data_query.get_results())+list(data_name_query.get_results()) - if checksum is None: + else: + # gather collection results coll_res = list(coll_query.get_results()) if len(coll_res) > 0: results.extend(coll_res) + # gather results, data_query and data_name_query can contain the same results + results.extend([ + dict(s) for s in set(frozenset(d.items()) + for d in list(data_query) + list(data_name_query)) + ]) + for item in results: if isinstance(item, dict): new_keys = [k.icat_key for k in item.keys()] @@ -98,3 +112,16 @@ def search_data(session: Session, path: Optional[Union[str, IrodsPath]] = None, item[n_key] = item.pop(o_key) return results + +def _path_params(path_param): + """Parse the path parameter and return, path, name and parent.""" + if path_param: + path = str(path_param) + if len(path.rsplit("/", maxsplit=1)) > 1: + parent = path.rsplit("/", maxsplit=1)[0] + name = path.rsplit("/", maxsplit=1)[1] + else: + name = path + parent = "%" + return path, name, parent + return None diff --git a/ibridges/session.py b/ibridges/session.py index 40f238d9..09f2694f 100644 --- a/ibridges/session.py +++ b/ibridges/session.py @@ -1,4 +1,5 @@ """session operations.""" + from __future__ import annotations import json @@ -19,15 +20,18 @@ from ibridges import icat_columns as icat -APP_NAME="ibridges" +APP_NAME = "ibridges" + class Session: """Irods session authentication.""" - def __init__(self, - irods_env: Union[dict, str, Path], - password: Optional[str] = None, - irods_home: Optional[str] = None): + def __init__( + self, + irods_env: Union[dict, str, Path], + password: Optional[str] = None, + irods_home: Optional[str] = None, + ): """IRODS authentication with Python client. Parameters @@ -69,8 +73,10 @@ def __init__(self, with irods_env_path.open("r", encoding="utf-8") as envfd: irods_env = json.load(envfd) if not isinstance(irods_env, dict): - raise TypeError(f"Error reading environment file '{irods_env_path}': " - f"expected dictionary, got {type(irods_env)}.") + raise TypeError( + f"Error reading environment file '{irods_env_path}': " + f"expected dictionary, got {type(irods_env)}." + ) self.connection_timeout = irods_env.pop("connection_timeout", 25000) @@ -81,7 +87,7 @@ def __init__(self, if irods_home is not None: self.home = irods_home if "irods_home" not in self._irods_env: - self.home = '/'+self.zone+'/home/'+self.username + self.home = "/" + self.zone + "/home/" + self.username def __enter__(self): """Connect to the iRODS server if not already connected.""" @@ -156,16 +162,15 @@ def network_check(cls, hostname: str, port: int) -> bool: except socket.error: return False - def connect(self) -> iRODSSession: """Establish an iRODS session.""" - irods_host = self._irods_env.get('irods_host', None) - irods_port = self._irods_env.get('irods_port', None) + irods_host = self._irods_env.get("irods_host", None) + irods_port = self._irods_env.get("irods_port", None) network = self.network_check(irods_host, irods_port) if network is False: - raise ConnectionError(f'No internet connection to {irods_host} and port {irods_port}') - user = self._irods_env.get('irods_user_name', '') - if user == 'anonymous': + raise ConnectionError(f"No internet connection to {irods_host} and port {irods_port}") + user = self._irods_env.get("irods_user_name", "") + if user == "anonymous": # TODOx: implement and test for SSL enabled iRODS # self.irods_session = iRODSSession(user='anonymous', # password='', @@ -174,7 +179,7 @@ def connect(self) -> iRODSSession: # host=host) raise NotImplementedError # authentication with irods environment and password - if self._password is None or self._password == '': + if self._password is None or self._password == "": # use cached password of .irodsA built into prc # print("Auth without password") return self.authenticate_using_auth_file() @@ -201,7 +206,8 @@ def authenticate_using_password(self) -> iRODSSession: password=self._password, **self._irods_env, connection_timeout=self.connection_timeout, - application_name=APP_NAME) + application_name=APP_NAME, + ) _ = irods_session.server_version except Exception as e: raise _translate_irods_error(e) from e @@ -213,8 +219,10 @@ def authenticate_using_auth_file(self) -> iRODSSession: """Authenticate with an authentication file.""" try: irods_session = irods.session.iRODSSession( - irods_env_file=self._irods_env_path, application_name=APP_NAME, - connection_timeout=self.connection_timeout) + irods_env_file=self._irods_env_path, + application_name=APP_NAME, + connection_timeout=self.connection_timeout, + ) _ = irods_session.server_version except NonAnonymousLoginWithoutPassword as e: raise ValueError("No cached password found.") from e @@ -224,18 +232,16 @@ def authenticate_using_auth_file(self) -> iRODSSession: raise LoginError("iRODS server does not return a server version.") return irods_session - def write_pam_password(self): """Store the password in the iRODS authentication file in obfuscated form.""" connection = self.irods_session.pool.get_connection() pam_passwords = self.irods_session.pam_pw_negotiated if len(pam_passwords): irods_auth_file = self.irods_session.get_irods_password_file() - with open(irods_auth_file, 'w', encoding='utf-8') as authfd: - authfd.write( - irods.password_obfuscation.encode(pam_passwords[0])) + with open(irods_auth_file, "w", encoding="utf-8") as authfd: + authfd.write(irods.password_obfuscation.encode(pam_passwords[0])) else: - warnings.warn('WARNING -- unable to cache obfuscated password locally') + warnings.warn("WARNING -- unable to cache obfuscated password locally") connection.release() @property @@ -289,33 +295,33 @@ def get_user_info(self) -> tuple[list, list]: iRODS group names """ - query = self.irods_session.query(icat.USER_TYPE).filter(icat.LIKE( - icat.USER_NAME, self.username)) - user_type = [ - list(result.values())[0] for result in query.get_results() - ][0] - query = self.irods_session.query(icat.USER_GROUP_NAME).filter(icat.LIKE( - icat.USER_NAME, self.username)) - user_groups = [ - list(result.values())[0] for result in query.get_results() - ] + query = self.irods_session.query(icat.USER_TYPE).filter( + icat.LIKE(icat.USER_NAME, self.username) + ) + user_type = [list(result.values())[0] for result in query.get_results()][0] + query = self.irods_session.query(icat.USER_GROUP_NAME).filter( + icat.LIKE(icat.USER_NAME, self.username) + ) + user_groups = [list(result.values())[0] for result in query.get_results()] return user_type, user_groups class LoginError(AttributeError): """Error indicating a failure to log into the iRODS server due to the configuration.""" + class PasswordError(ValueError): """Error indicating failure to log into the iRODS server due to wrong or outdated password.""" - def _translate_irods_error(exc) -> Exception: # pylint: disable=too-many-return-statements if isinstance(exc, NetworkException): - if any((a.startswith('Client-Server negotiation failure') for a in exc.args)): - return LoginError("Host, port, irods_client_server_policy or " - "irods_client_server_negotiation not set correctly in " - "irods_environment.json") + if any((a.startswith("Client-Server negotiation failure") for a in exc.args)): + return LoginError( + "Host, port, irods_client_server_policy or " + "irods_client_server_negotiation not set correctly in " + "irods_environment.json" + ) if isinstance(exc, TypeError): return LoginError(f"Add info to irods_environment.json: {exc.args}") if isinstance(exc, CAT_INVALID_USER): diff --git a/ibridges/tickets.py b/ibridges/tickets.py index 8fb6bcd7..2c82d0d1 100644 --- a/ibridges/tickets.py +++ b/ibridges/tickets.py @@ -1,4 +1,5 @@ """Ticket operations.""" + from __future__ import annotations from collections import namedtuple @@ -11,9 +12,10 @@ import ibridges.icat_columns as icat from ibridges.session import Session -TicketData = namedtuple('TicketData', ["name", "type", "path", "expiration_date"]) +TicketData = namedtuple("TicketData", ["name", "type", "path", "expiration_date"]) + -class Tickets(): +class Tickets: """Irods Ticket operations.""" def __init__(self, session: Session): @@ -28,9 +30,12 @@ def __init__(self, session: Session): self.session = session self._all_tickets = self.update_tickets() - def create_ticket(self, obj_path: str, - ticket_type: str = 'read', - expiry_date: Optional[Union[str, datetime, date]] = None) -> tuple: + def create_ticket( + self, + obj_path: str, + ticket_type: str = "read", + expiry_date: Optional[Union[str, datetime, date]] = None, + ) -> tuple: """Create an iRODS ticket. This allows read access to the object referenced by `obj_path`. @@ -65,15 +70,17 @@ def create_ticket(self, obj_path: str, if isinstance(expiry_date, date): expiry_date = datetime.combine(expiry_date, datetime.min.time()) if isinstance(expiry_date, datetime): - expiry_date = expiry_date.strftime('%Y-%m-%d.%H:%M:%S') + expiry_date = expiry_date.strftime("%Y-%m-%d.%H:%M:%S") if not isinstance(expiry_date, str): - raise TypeError("Expecting datetime, date or string type for 'expiry_date' " - f"argument, got {type(expiry_date)}") + raise TypeError( + "Expecting datetime, date or string type for 'expiry_date' " + f"argument, got {type(expiry_date)}" + ) try: - expiration_set = ticket.modify('expire', expiry_date) == ticket + expiration_set = ticket.modify("expire", expiry_date) == ticket except Exception as error: self.delete_ticket(ticket) - raise ValueError('Could not set expiration date') from error + raise ValueError("Could not set expiration date") from error self.update_tickets() return ticket.ticket, expiration_set @@ -106,11 +113,11 @@ def get_ticket(self, ticket_str: str) -> irods.ticket.Ticket: """ if ticket_str in self.all_ticket_strings: return irods.ticket.Ticket(self.session.irods_session, ticket=ticket_str) - raise KeyError(f"Cannot obtain ticket: ticket with ticket_str '{ticket_str}' " - "does not exist.") + raise KeyError( + f"Cannot obtain ticket: ticket with ticket_str '{ticket_str}' " "does not exist." + ) - def delete_ticket(self, ticket: Union[str, irods.ticket.Ticket], - check: bool = False): + def delete_ticket(self, ticket: Union[str, irods.ticket.Ticket], check: bool = False): """Delete irods ticket. Parameters @@ -151,15 +158,18 @@ def update_tickets(self) -> list[TicketData]: user = self.session.username self._all_tickets = [] for row in self.session.irods_session.query(TicketQuery.Ticket).filter( - TicketQuery.Owner.name == user): + TicketQuery.Owner.name == user + ): time = row[TicketQuery.Ticket.expiry_ts] time_stamp = datetime.fromtimestamp(int(time)) if time else "" self._all_tickets.append( - TicketData(row[TicketQuery.Ticket.string], - row[TicketQuery.Ticket.type], - self._id_to_path(str(row[TicketQuery.Ticket.object_id])), - time_stamp - )) + TicketData( + row[TicketQuery.Ticket.string], + row[TicketQuery.Ticket.type], + self._id_to_path(str(row[TicketQuery.Ticket.object_id])), + time_stamp, + ) + ) return self._all_tickets def clear(self): @@ -198,4 +208,4 @@ def _id_to_path(self, itemid: str) -> str: if len(list(coll_query)) > 0: res = next(coll_query.get_results()) return list(res.values())[0] - return '' + return "" diff --git a/ibridges/util.py b/ibridges/util.py index a52519a9..3dab9b65 100644 --- a/ibridges/util.py +++ b/ibridges/util.py @@ -15,8 +15,7 @@ from importlib.metadata import entry_points # type: ignore -def get_dataobject(session, - path: Union[str, IrodsPath]) -> irods.data_object.iRODSDataObject: +def get_dataobject(session, path: Union[str, IrodsPath]) -> irods.data_object.iRODSDataObject: """Instantiate an iRODS data object. See :meth:`ibridges.path.IrodsPath.dataobject` for details. @@ -25,8 +24,8 @@ def get_dataobject(session, path = IrodsPath(session, path) return path.dataobject -def get_collection(session, - path: Union[str, IrodsPath]) -> irods.collection.iRODSCollection: + +def get_collection(session, path: Union[str, IrodsPath]) -> irods.collection.iRODSCollection: """Instantiate an iRODS collection. See :meth:`ibridges.path.IrodsPath.collection` for details. @@ -34,8 +33,9 @@ def get_collection(session, return IrodsPath(session, path).collection -def get_size(session, item: Union[irods.data_object.iRODSDataObject, - irods.collection.iRODSCollection]) -> int: +def get_size( + session, item: Union[irods.data_object.iRODSDataObject, irods.collection.iRODSCollection] +) -> int: """Collect the sizes of a data object or a collection. See :meth:`ibridges.path.IrodsPath.size` for details. @@ -47,6 +47,7 @@ def is_dataobject(item) -> bool: """Determine if item is an iRODS data object.""" return isinstance(item, irods.data_object.iRODSDataObject) + def is_collection(item) -> bool: """Determine if item is an iRODS collection.""" return isinstance(item, irods.collection.iRODSCollection) @@ -70,19 +71,17 @@ def obj_replicas(obj: irods.data_object.iRODSDataObject) -> list[tuple[int, str, replica status of the replica """ - #replicas = [] - repl_states = { - '0': 'stale', - '1': 'good', - '2': 'intermediate', - '3': 'write-locked' - } + # replicas = [] + repl_states = {"0": "stale", "1": "good", "2": "intermediate", "3": "write-locked"} - replicas = [(r.number, r.resource_name, r.checksum, - r.size, repl_states.get(r.status, r.status)) for r in obj.replicas] + replicas = [ + (r.number, r.resource_name, r.checksum, r.size, repl_states.get(r.status, r.status)) + for r in obj.replicas + ] return replicas + def get_environment_providers() -> list: """Get a list of all environment template providers. @@ -105,7 +104,7 @@ def print_environment_providers(env_providers: Sequence): """ for provider in env_providers: print(provider.name) - print("-"*len(provider.name)) + print("-" * len(provider.name)) print("\n") max_len = max(len(x) for x in provider.descriptions) for server_name, description in provider.descriptions.items(): @@ -135,5 +134,6 @@ def find_environment_provider(env_providers: list, server_name: str) -> object: for provider in env_providers: if provider.contains(server_name): return provider - raise ValueError("Cannot find provider with name {server_name} ensure that the plugin is " - "installed.") + raise ValueError( + "Cannot find provider with name {server_name} ensure that the plugin is installed." + ) diff --git a/pyproject.toml b/pyproject.toml index 09a0c461..390acdcd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,6 +6,8 @@ build-backend = "setuptools.build_meta" name = "ibridges" authors = [ { name = "Christine Staiger", email = "c.staiger@uu.nl" }, + { name = "Raoul Schram", email = "r.d.schram@uu.nl" }, + { name = "Maarten Schermer", email = "m.d.schermer@uu.nl" }, ] description = "Package for accessing data and metadata on iRods servers." readme = "README.md" @@ -24,7 +26,7 @@ classifiers = [ ] dependencies = [ - "python-irodsclient>=1.1.6", + "python-irodsclient>=2.0.0", "tqdm", "importlib-metadata;python_version<'3.10'", ] diff --git a/tests/test_irodspath.py b/tests/test_irodspath.py index 7bd8af44..799cbe0c 100644 --- a/tests/test_irodspath.py +++ b/tests/test_irodspath.py @@ -19,7 +19,7 @@ class MockIrodsSession: mock_session = MockIrodsSession() dirname = "blabla" filename = "blublub" -irods_path = IrodsPath(mock_session, dirname, filename) +irods_path = IrodsPath(mock_session, dirname, filename) windows_path = "windows\\path\\directory" linux_path = "linux/or/mac/path"