Skip to content

Commit

Permalink
replace usages of copy_arrays with memmap
Browse files Browse the repository at this point in the history
  • Loading branch information
zacharyburnett committed Jul 18, 2024
1 parent b5e2131 commit 7ba9c62
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 56 deletions.
14 changes: 7 additions & 7 deletions weldx/asdf/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ def reset_file_position(fh: SupportsFileReadWrite):
DEFAULT_ARRAY_COMPRESSION = "input"
"""All arrays will be compressed using this algorithm, if not specified by user."""

DEFAULT_ARRAY_COPYING = True
"""Stored Arrays will be copied to memory, or not. If False, use memory mapping."""
DEFAULT_MEMORY_MAPPING = False
"""Stored Arrays will be memory-mapped, or not. If True, use memory mapping."""

DEFAULT_ARRAY_INLINE_THRESHOLD = 10
"""Arrays with less or equal elements will be inlined (stored as string, not binary)."""
Expand Down Expand Up @@ -148,8 +148,8 @@ class WeldxFile(_ProtectedViewDict):
- ``lz4``: Use lz4 compression.
- ``input``: Use the same compression as in the file read.
If there is no prior file, acts as None.
copy_arrays :
When `False`, when reading files, attempt to memory map (memmap) underlying data
memmap :
When `True`, when reading files, attempt to memory map (memmap) underlying data
arrays when possible. This avoids blowing the memory when working with very
large datasets.
array_inline_threshold :
Expand Down Expand Up @@ -219,19 +219,19 @@ def __init__(
) = None,
software_history_entry: Mapping = None,
compression: str = DEFAULT_ARRAY_COMPRESSION,
copy_arrays: bool = DEFAULT_ARRAY_COPYING,
memmap: bool = DEFAULT_MEMORY_MAPPING,
array_inline_threshold: int = DEFAULT_ARRAY_INLINE_THRESHOLD,
):
if write_kwargs is None:
write_kwargs = dict(all_array_compression=compression)

if asdffile_kwargs is None:
asdffile_kwargs = dict(copy_arrays=copy_arrays)
asdffile_kwargs = dict(memmap=memmap)

# this parameter is now (asdf-2.8) a asdf.config parameter, so we store it here.
self._array_inline_threshold = array_inline_threshold

# TODO: ensure no mismatching args for compression and copy_arrays.
# TODO: ensure no mismatching args for compression and memmap.
self._write_kwargs = write_kwargs
self._asdffile_kwargs = asdffile_kwargs

Expand Down
10 changes: 5 additions & 5 deletions weldx/asdf/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def read_buffer_context(
Buffer containing ASDF file contents
open_kwargs
Additional keywords to pass to `asdf.AsdfFile.open`
Extensions are always set, ``copy_arrays=True`` is set by default.
Extensions are always set, ``memmap=False`` is set by default.
Returns
-------
Expand All @@ -158,7 +158,7 @@ def read_buffer_context(
"""
if open_kwargs is None:
open_kwargs = {"copy_arrays": True, "lazy_load": False}
open_kwargs = {"memmap": False, "lazy_load": False}

buffer.seek(0)

Expand Down Expand Up @@ -190,7 +190,7 @@ def read_buffer(
Buffer containing ASDF file contents
open_kwargs
Additional keywords to pass to `asdf.AsdfFile.open`
Extensions are always set, ``copy_arrays=True`` is set by default.
Extensions are always set, ``memmap=False`` is set by default.
Returns
-------
Expand Down Expand Up @@ -220,7 +220,7 @@ def write_read_buffer_context(
Extensions are always set.
open_kwargs
Additional keywords to pass to `asdf.AsdfFile.open`
Extensions are always set, ``copy_arrays=True`` is set by default.
Extensions are always set, ``memmap=False`` is set by default.
Returns
-------
Expand Down Expand Up @@ -248,7 +248,7 @@ def write_read_buffer(
Extensions are always set.
open_kwargs
Additional keywords to pass to `asdf.AsdfFile.open`
Extensions are always set, ``copy_arrays=True`` is set by default.
Extensions are always set, ``memmap=False`` is set by default.
Returns
-------
Expand Down
82 changes: 41 additions & 41 deletions weldx/tests/asdf_tests/test_asdf_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,15 +121,15 @@ def get_xarray_example_data_array():
return dax


@pytest.mark.parametrize("copy_arrays", [True, False])
@pytest.mark.parametrize("memmap", [True, False])
@pytest.mark.parametrize("lazy_load", [True, False])
@pytest.mark.parametrize("select", [{}, {"d4": "z"}])
def test_xarray_data_array(copy_arrays, lazy_load, select):
def test_xarray_data_array(memmap, lazy_load, select):
"""Test ASDF read/write of xarray.DataArray."""
dax = get_xarray_example_data_array().sel(**select)
tree = {"dax": dax}
with write_read_buffer_context(
tree, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load}
tree, open_kwargs={"memmap": memmap, "lazy_load": lazy_load}
) as data:
dax_file = data["dax"]
assert dax.identical(dax_file)
Expand Down Expand Up @@ -172,13 +172,13 @@ def get_xarray_example_dataset():
return dsx


@pytest.mark.parametrize("copy_arrays", [True, False])
@pytest.mark.parametrize("memmap", [True, False])
@pytest.mark.parametrize("lazy_load", [True, False])
def test_xarray_dataset(copy_arrays, lazy_load):
def test_xarray_dataset(memmap, lazy_load):
dsx = get_xarray_example_dataset()
tree = {"dsx": dsx}
with write_read_buffer_context(
tree, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load}
tree, open_kwargs={"memmap": memmap, "lazy_load": lazy_load}
) as data:
dsx_file = data["dsx"]
assert dsx.identical(dsx_file)
Expand Down Expand Up @@ -228,25 +228,25 @@ def get_local_coordinate_system(time_dep_orientation: bool, time_dep_coordinates

@pytest.mark.parametrize("time_dep_orientation", [False, True])
@pytest.mark.parametrize("time_dep_coordinates", [False, True])
@pytest.mark.parametrize("copy_arrays", [True, False])
@pytest.mark.parametrize("memmap", [True, False])
@pytest.mark.parametrize("lazy_load", [True, False])
def test_local_coordinate_system(
time_dep_orientation, time_dep_coordinates, copy_arrays, lazy_load
time_dep_orientation, time_dep_coordinates, memmap, lazy_load
):
"""Test (de)serialization of LocalCoordinateSystem in ASDF."""
lcs = get_local_coordinate_system(time_dep_orientation, time_dep_coordinates)
with write_read_buffer_context(
{"lcs": lcs}, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load}
{"lcs": lcs}, open_kwargs={"memmap": memmap, "lazy_load": lazy_load}
) as data:
assert data["lcs"] == lcs


@pytest.mark.parametrize("copy_arrays", [True, False])
@pytest.mark.parametrize("memmap", [True, False])
@pytest.mark.parametrize("lazy_load", [True, False])
@pytest.mark.parametrize("has_ref_time", [True, False])
@pytest.mark.parametrize("has_tdp_orientation", [True, False])
def test_local_coordinate_system_coords_timeseries(
copy_arrays, lazy_load, has_ref_time, has_tdp_orientation
memmap, lazy_load, has_ref_time, has_tdp_orientation
):
"""Test reading and writing a LCS with a `TimeSeries` as coordinates to asdf."""
# create inputs to lcs __init__
Expand All @@ -270,7 +270,7 @@ def test_local_coordinate_system_coords_timeseries(

# round trip and compare
with write_read_buffer_context(
{"lcs": lcs}, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load}
{"lcs": lcs}, open_kwargs={"memmap": memmap, "lazy_load": lazy_load}
) as data:
lcs_buffer = data["lcs"]
assert lcs_buffer == lcs
Expand Down Expand Up @@ -337,13 +337,13 @@ def get_example_coordinate_system_manager():
return csm


@pytest.mark.parametrize("copy_arrays", [True, False])
@pytest.mark.parametrize("memmap", [True, False])
@pytest.mark.parametrize("lazy_load", [True, False])
def test_coordinate_system_manager(copy_arrays, lazy_load):
def test_coordinate_system_manager(memmap, lazy_load):
csm = get_example_coordinate_system_manager()
tree = {"cs_hierarchy": csm}
with write_read_buffer_context(
tree, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load}
tree, open_kwargs={"memmap": memmap, "lazy_load": lazy_load}
) as data:
csm_file = data["cs_hierarchy"]
assert csm == csm_file
Expand Down Expand Up @@ -400,24 +400,24 @@ def get_coordinate_system_manager_with_subsystems(nested: bool):
return csm_global


@pytest.mark.parametrize("copy_arrays", [True, False])
@pytest.mark.parametrize("memmap", [True, False])
@pytest.mark.parametrize("lazy_load", [True, False])
@pytest.mark.parametrize("nested", [True, False])
def test_coordinate_system_manager_with_subsystems(copy_arrays, lazy_load, nested):
def test_coordinate_system_manager_with_subsystems(memmap, lazy_load, nested):
csm = get_coordinate_system_manager_with_subsystems(nested)
tree = {"cs_hierarchy": csm}
with write_read_buffer_context(
tree, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load}
tree, open_kwargs={"memmap": memmap, "lazy_load": lazy_load}
) as data:
csm_file = data["cs_hierarchy"]
assert csm == csm_file


@pytest.mark.parametrize("copy_arrays", [True, False])
@pytest.mark.parametrize("memmap", [True, False])
@pytest.mark.parametrize("lazy_load", [True, False])
@pytest.mark.parametrize("csm_time_ref", [None, "2000-03-16"])
def test_coordinate_system_manager_time_dependencies(
copy_arrays, lazy_load, csm_time_ref
memmap, lazy_load, csm_time_ref
):
"""Test serialization of time components from CSM and its attached LCS."""
lcs_tdp_1_time_ref = None
Expand Down Expand Up @@ -449,15 +449,15 @@ def test_coordinate_system_manager_time_dependencies(

tree = {"cs_hierarchy": csm_root}
with write_read_buffer_context(
tree, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load}
tree, open_kwargs={"memmap": memmap, "lazy_load": lazy_load}
) as data:
csm_file = data["cs_hierarchy"]
assert csm_root == csm_file


@pytest.mark.parametrize("copy_arrays", [True, False])
@pytest.mark.parametrize("memmap", [True, False])
@pytest.mark.parametrize("lazy_load", [True, False])
def test_coordinate_system_manager_with_data(copy_arrays, lazy_load):
def test_coordinate_system_manager_with_data(memmap, lazy_load):
"""Test if data attached to a CSM is stored and read correctly."""
csm = tf.CoordinateSystemManager("root", "csm")
csm.create_cs("cs_1", "root", coordinates=Q_([1, 1, 1], "mm"))
Expand All @@ -483,7 +483,7 @@ def test_coordinate_system_manager_with_data(copy_arrays, lazy_load):

tree = {"csm": csm}
with write_read_buffer_context(
tree, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load}
tree, open_kwargs={"memmap": memmap, "lazy_load": lazy_load}
) as buffer:
csm_buffer = buffer["csm"]

Expand All @@ -498,7 +498,7 @@ def test_coordinate_system_manager_with_data(copy_arrays, lazy_load):
# --------------------------------------------------------------------------------------


@pytest.mark.parametrize("copy_arrays", [True, False])
@pytest.mark.parametrize("memmap", [True, False])
@pytest.mark.parametrize("lazy_load", [True, False])
@pytest.mark.parametrize(
"ts",
Expand All @@ -518,9 +518,9 @@ def test_coordinate_system_manager_with_data(copy_arrays, lazy_load):
),
],
)
def test_time_series(ts, copy_arrays, lazy_load):
def test_time_series(ts, memmap, lazy_load):
with write_read_buffer_context(
{"ts": ts}, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load}
{"ts": ts}, open_kwargs={"memmap": memmap, "lazy_load": lazy_load}
) as data:
ts_file = data["ts"]
if isinstance(ts.data, ME):
Expand All @@ -536,7 +536,7 @@ def test_time_series(ts, copy_arrays, lazy_load):
# --------------------------------------------------------------------------------------


@pytest.mark.parametrize("copy_arrays", [True, False])
@pytest.mark.parametrize("memmap", [True, False])
@pytest.mark.parametrize("lazy_load", [True, False])
@pytest.mark.parametrize(
"coords, interpolation",
Expand All @@ -546,21 +546,21 @@ def test_time_series(ts, copy_arrays, lazy_load):
(dict(time=Q_([1, 2, 3], "s"), space=Q_([4, 5, 6, 7], "m")), "step"),
],
)
def test_generic_series_discrete(coords, interpolation, copy_arrays, lazy_load):
def test_generic_series_discrete(coords, interpolation, memmap, lazy_load):
shape = tuple(len(v) for v in coords.values())
data = Q_(np.ones(shape), "m")

gs = GenericSeries(data, coords=coords, interpolation=interpolation)

with write_read_buffer_context(
{"gs": gs}, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load}
{"gs": gs}, open_kwargs={"memmap": memmap, "lazy_load": lazy_load}
) as data:
gs_file = data["gs"]

assert gs == gs_file


@pytest.mark.parametrize("copy_arrays", [True, False])
@pytest.mark.parametrize("memmap", [True, False])
@pytest.mark.parametrize("lazy_load", [True, False])
@pytest.mark.parametrize(
"expr, params, units, dims",
Expand All @@ -581,11 +581,11 @@ def test_generic_series_discrete(coords, interpolation, copy_arrays, lazy_load):
),
],
)
def test_generic_series_expression(expr, params, units, dims, copy_arrays, lazy_load):
def test_generic_series_expression(expr, params, units, dims, memmap, lazy_load):
gs = GenericSeries(expr, parameters=params, units=units, dims=dims)

with write_read_buffer_context(
{"gs": gs}, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load}
{"gs": gs}, open_kwargs={"memmap": memmap, "lazy_load": lazy_load}
) as data:
gs_file = data["gs"]

Expand Down Expand Up @@ -746,16 +746,16 @@ def test_hashing(algorithm: str, buffer_size: int):
# test_asdf_serialization ----------------------------------------------------------

@staticmethod
@pytest.mark.parametrize("copy_arrays", [True, False])
@pytest.mark.parametrize("memmap", [True, False])
@pytest.mark.parametrize("lazy_load", [True, False])
@pytest.mark.parametrize("store_content", [True, False])
def test_asdf_serialization(copy_arrays, lazy_load, store_content):
def test_asdf_serialization(memmap, lazy_load, store_content):
"""Test the asdf serialization of the `ExternalFile` class.
Parameters
----------
copy_arrays : bool
If `False`, arrays are accessed via memory mapping whenever possible while
memmap : bool
If `True`, arrays are accessed via memory mapping whenever possible while
reading them.
lazy_load : bool
If `True`, items from the asdf file are not loaded until accessed.
Expand All @@ -769,7 +769,7 @@ def test_asdf_serialization(copy_arrays, lazy_load, store_content):
)
tree = {"file": ef}
with write_read_buffer_context(
tree, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load}
tree, open_kwargs={"memmap": memmap, "lazy_load": lazy_load}
) as data:
ef_file = data["file"]

Expand Down Expand Up @@ -801,10 +801,10 @@ def test_asdf_serialization(copy_arrays, lazy_load, store_content):

class TestPointCloud:
@staticmethod
@pytest.mark.parametrize("copy_arrays", [True, False])
@pytest.mark.parametrize("memmap", [True, False])
@pytest.mark.parametrize("lazy_load", [True, False])
@pytest.mark.parametrize("reshape", [True, False])
def test_asdf_serialization(copy_arrays, lazy_load, reshape):
def test_asdf_serialization(memmap, lazy_load, reshape):
time = None
coordinates = [
[0.0, 0.0, 0.0],
Expand All @@ -822,7 +822,7 @@ def test_asdf_serialization(copy_arrays, lazy_load, reshape):
pc = SpatialData(coordinates=coordinates, triangles=triangles, time=time)
tree = {"point_cloud": pc}
with write_read_buffer_context(
tree, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load}
tree, open_kwargs={"memmap": memmap, "lazy_load": lazy_load}
) as data:
pc_file = data["point_cloud"]

Expand Down
6 changes: 3 additions & 3 deletions weldx/tests/asdf_tests/test_asdf_measurement.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,17 +80,17 @@ def measurement_chain_with_equipment() -> MeasurementChain:
return mc


@pytest.mark.parametrize("copy_arrays", [True, False])
@pytest.mark.parametrize("memmap", [True, False])
@pytest.mark.parametrize("lazy_load", [True, False])
@pytest.mark.parametrize(
"measurement_chain",
[measurement_chain_without_equipment(), measurement_chain_with_equipment()],
)
def test_measurement_chain(copy_arrays, lazy_load, measurement_chain):
def test_measurement_chain(memmap, lazy_load, measurement_chain):
"""Test the asdf serialization of the measurement chain."""
tree = {"m_chain": measurement_chain}
with write_read_buffer_context(
tree, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load}
tree, open_kwargs={"memmap": memmap, "lazy_load": lazy_load}
) as data:
mc_file = data["m_chain"]
assert measurement_chain == mc_file

0 comments on commit 7ba9c62

Please sign in to comment.