Skip to content

Commit

Permalink
Let save_/load_results() properly handles experiments dtypes (#280)
Browse files Browse the repository at this point in the history
* bugfix for #277 : load_results now casts all columns in experiments to dtype as specified in metadata

bugfix for load_results. In parsing experiments, the dtype metadata is now correctly applied.  Also expands save)results unittest to cover all dtypes used in storing experiments.

* small typo in test
  • Loading branch information
quaquel authored and EwoutH committed Jun 18, 2023
1 parent ae1b657 commit 01abd6c
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 9 deletions.
3 changes: 3 additions & 0 deletions ema_workbench/util/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ def load_results(file_name):
except TypeError:
dtype = pd.api.types.pandas_dtype(dtype)

if experiments[name].dtype is not dtype:
experiments[name] = experiments[name].astype(dtype)
# this check is for backward compatability with data stored with 2.4.
if pd.api.types.is_object_dtype(dtype):
experiments[name] = experiments[name].astype("category")

Expand Down
Binary file added test/data/test.tar.gz
Binary file not shown.
26 changes: 17 additions & 9 deletions test/test_util/test_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,11 @@ def test_save_results(self):

# test for 1d
nr_experiments = 10000
experiments = pd.DataFrame(
index=np.arange(nr_experiments), columns={"x": float, "y": float}
cases = np.empty(
nr_experiments, dtype=[("x", float), ("y", int), ("z", bool), ("q", object)]
)
experiments = pd.DataFrame.from_records(cases)
experiments["q"] = experiments["q"].astype("category")
outcome_q = np.random.rand(nr_experiments, 1)

outcomes = {ScalarOutcome("q").name: outcome_q}
Expand Down Expand Up @@ -85,19 +87,19 @@ def test_save_results(self):

class LoadResultsTestCase(unittest.TestCase):
def test_load_results(self):
# test for 1d

# test for 3d

nr_experiments = 10000

# test for 2d
experiments = pd.DataFrame(
index=np.arange(nr_experiments), columns={"x": float, "y": float}
cases = np.empty(
nr_experiments, dtype=[("x", float), ("y", int), ("z", bool), ("q", object)]
)
experiments = pd.DataFrame.from_records(cases)

experiments["x"] = np.random.rand(nr_experiments)
experiments["y"] = np.random.rand(nr_experiments)
experiments["y"] = np.random.randint(0, 10, size=nr_experiments)
experiments["z"] = np.random.randint(0, 1, size=nr_experiments, dtype=bool)
experiments["q"] = np.random.randint(0, 10, size=nr_experiments).astype(object)
experiments["q"] = experiments["q"].astype("category")

outcome_a = np.zeros((nr_experiments, 1))

Expand All @@ -111,6 +113,12 @@ def test_load_results(self):
self.assertTrue(np.all(np.allclose(experiments["x"], loaded_experiments["x"])))
self.assertTrue(np.all(np.allclose(experiments["y"], loaded_experiments["y"])))

for name, dtype in experiments.dtypes.items():
self.assertTrue(
dtype == loaded_experiments[name].dtype,
msg=f"{name}, {dtype}, {loaded_experiments[name].dtype}",
)

os.remove("../data/test.tar.gz")

# test 3d
Expand Down

0 comments on commit 01abd6c

Please sign in to comment.