Skip to content

Commit

Permalink
making simulation tools save plot data to H5 files
Browse files Browse the repository at this point in the history
  • Loading branch information
jonrkarr committed Apr 14, 2021
1 parent 22b9d4c commit c3a9837
Show file tree
Hide file tree
Showing 5 changed files with 134 additions and 12 deletions.
2 changes: 1 addition & 1 deletion biosimulators_utils/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.1.51'
__version__ = '0.1.52'
6 changes: 5 additions & 1 deletion biosimulators_utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class Config(object):
Attributes:
ALGORITHM_SUBSTITUTION_POLICY (:obj:`str`): algorithm substition policy
SAVE_PLOT_DATA (:obj:`bool`): whether to save data for plots alongside data for reports in CSV/HDF5 files
REPORT_FORMATS (:obj:`list` of :obj:`str`): default formats to generate reports in
PLOT_FORMATS (:obj:`list` of :obj:`str`): default formats to generate plots in
H5_REPORTS_PATH (:obj:`str`): path to save reports in HDF5 format relative to base output directory
Expand All @@ -29,12 +30,13 @@ class Config(object):
VERBOSE (:obj:`bool`): whether to display the detailed output of the execution of each task
"""

def __init__(self, ALGORITHM_SUBSTITUTION_POLICY, REPORT_FORMATS, PLOT_FORMATS,
def __init__(self, ALGORITHM_SUBSTITUTION_POLICY, SAVE_PLOT_DATA, REPORT_FORMATS, PLOT_FORMATS,
H5_REPORTS_PATH, REPORTS_PATH, PLOTS_PATH, BUNDLE_OUTPUTS, KEEP_INDIVIDUAL_OUTPUTS,
LOG_PATH, BIOSIMULATORS_API_ENDPOINT, VERBOSE):
"""
Args:
ALGORITHM_SUBSTITUTION_POLICY (:obj:`str`): algorithm substition policy
SAVE_PLOT_DATA (:obj:`bool`): whether to save data for plots alongside data for reports in CSV/HDF5 files
REPORT_FORMATS (:obj:`list` of :obj:`str`): default formats to generate reports in
PLOT_FORMATS (:obj:`list` of :obj:`str`): default formats to generate plots in
H5_REPORTS_PATH (:obj:`str`): path to save reports in HDF5 format relative to base output directory
Expand All @@ -47,6 +49,7 @@ def __init__(self, ALGORITHM_SUBSTITUTION_POLICY, REPORT_FORMATS, PLOT_FORMATS,
VERBOSE (:obj:`bool`): whether to display the detailed output of the execution of each task
"""
self.ALGORITHM_SUBSTITUTION_POLICY = ALGORITHM_SUBSTITUTION_POLICY
self.SAVE_PLOT_DATA = SAVE_PLOT_DATA
self.REPORT_FORMATS = REPORT_FORMATS
self.PLOT_FORMATS = PLOT_FORMATS
self.H5_REPORTS_PATH = H5_REPORTS_PATH
Expand Down Expand Up @@ -79,6 +82,7 @@ def get_config():

return Config(
ALGORITHM_SUBSTITUTION_POLICY=os.environ.get('ALGORITHM_SUBSTITUTION_POLICY', 'SIMILAR_VARIABLES'),
SAVE_PLOT_DATA=os.environ.get('SAVE_PLOT_DATA', '1').lower() in ['1', 'true'],
REPORT_FORMATS=report_formats,
PLOT_FORMATS=plot_formats,
H5_REPORTS_PATH=os.environ.get('H5_REPORTS_PATH', 'reports.h5'),
Expand Down
30 changes: 25 additions & 5 deletions biosimulators_utils/report/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@
"""

from ..config import get_config
from ..sedml.data_model import Report # noqa: F401
from ..sedml.data_model import Output, Report, Plot2D, Plot3D # noqa: F401
from ..utils.core import pad_arrays_to_consistent_shapes
from ..warnings import warn
from .data_model import DataSetResults, ReportFormat
from .warnings import (RepeatDataSetLabelsWarning, MissingReportMetadataWarning, MissingDataWarning,
ExtraDataWarning, CannotExportMultidimensionalTableWarning)
import enum
import glob
import h5py
import numpy
Expand All @@ -26,10 +27,17 @@
]


class Hdf5DataSetType(enum.Enum):
""" Type of data encoded in an HDF5 data set """
SedReport = Report
SedPlot2D = Plot2D
SedPlot3D = Plot3D


class ReportWriter(object):
""" Class for writing reports of simulation results """

def run(self, report, results, base_path, rel_path, format=ReportFormat.h5):
def run(self, report, results, base_path, rel_path, format=ReportFormat.h5, type=Report):
""" Save a report
Args:
Expand All @@ -46,10 +54,12 @@ def run(self, report, results, base_path, rel_path, format=ReportFormat.h5):
* HDF5: key within HDF5 file
format (:obj:`ReportFormat`, optional): report format
type (:obj:`type`): type of output (e.g., subclass of :obj:`Output` such as :obj:`Report`, :obj:`Plot2D`)
"""
results_array = []
data_set_ids = []
data_set_labels = []
data_set_names = []
data_set_data_types = []
data_set_shapes = []
for data_set in report.data_sets:
Expand All @@ -58,6 +68,7 @@ def run(self, report, results, base_path, rel_path, format=ReportFormat.h5):
results_array.append(data_set_result)
data_set_ids.append(data_set.id)
data_set_labels.append(data_set.label)
data_set_names.append(data_set.name or '')
if data_set_result is None:
data_set_data_types.append('__None__')
data_set_shapes.append('')
Expand Down Expand Up @@ -114,7 +125,13 @@ def run(self, report, results, base_path, rel_path, format=ReportFormat.h5):

data_set = file.create_dataset(rel_path, data=results_array,
chunks=True, compression="gzip", compression_opts=9)
data_set.attrs['_type'] = Hdf5DataSetType(type).name
if report.id:
data_set.attrs['id'] = report.id
if report.name:
data_set.attrs['name'] = report.name
data_set.attrs['dataSetIds'] = data_set_ids
data_set.attrs['dataSetNames'] = data_set_names
data_set.attrs['dataSetLabels'] = data_set_labels
data_set.attrs['dataSetDataTypes'] = data_set_data_types
data_set.attrs['dataSetShapes'] = data_set_shapes
Expand Down Expand Up @@ -254,7 +271,7 @@ def run(self, report, base_path, rel_path, format=ReportFormat.h5):

return results

def get_ids(self, base_path, format=ReportFormat.h5):
def get_ids(self, base_path, format=ReportFormat.h5, type=Output):
""" Get the ids of the reports in a file
Args:
Expand All @@ -264,6 +281,7 @@ def get_ids(self, base_path, format=ReportFormat.h5):
* HDF5: file to save results
format (:obj:`ReportFormat`, optional): report format
type (:obj:`type`): type of report to get
Returns:
:obj:`list` of :obj:`str`: ids of reports
Expand All @@ -287,9 +305,11 @@ def get_ids(self, base_path, format=ReportFormat.h5):
with h5py.File(filename, 'r') as file:
report_ids = []

def append_report_id(name, object):
def append_report_id(name, object, type=type):
if isinstance(object, h5py.Dataset):
report_ids.append(name)
data_set_type = object.attrs.get('_type', None)
if data_set_type and (Hdf5DataSetType[data_set_type].value == type or issubclass(Hdf5DataSetType[data_set_type].value, type)):
report_ids.append(name)

file.visititems(append_report_id)

Expand Down
98 changes: 93 additions & 5 deletions biosimulators_utils/sedml/exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from ..report.io import ReportWriter
from ..utils.core import pad_arrays_to_consistent_shapes
from ..warnings import warn
from .data_model import SedDocument, Model, Task, RepeatedTask, Report, Plot2D, Plot3D, ModelAttributeChange
from .data_model import SedDocument, Model, Task, RepeatedTask, Output, Report, Plot2D, Plot3D, ModelAttributeChange, DataSet # noqa: F401
from .exceptions import SedmlExecutionError
from .io import SedmlSimulationReader
from .utils import (resolve_model_and_apply_xml_changes, get_variables_for_task,
Expand Down Expand Up @@ -229,7 +229,8 @@ def exec_task(task, variables, log=None):
output, variable_results,
base_out_path, rel_out_path, report_formats,
task,
log.outputs[output.id])
log=log.outputs[output.id],
type=Report)
task_contributes_to_output = task_contributes_to_output or task_contributes_to_report

elif isinstance(output, Plot2D):
Expand All @@ -240,6 +241,16 @@ def exec_task(task, variables, log=None):
log.outputs[output.id])
task_contributes_to_output = task_contributes_to_output or task_contributes_to_plot

# save data as report
if config.SAVE_PLOT_DATA:
report = get_report_for_plot2d(output)
report_results[output.id], _, _, _ = exec_report(
report, variable_results,
base_out_path, rel_out_path, report_formats,
task,
log=None,
type=output.__class__)

elif isinstance(output, Plot3D):
output_status, output_exception, task_contributes_to_plot = exec_plot_3d(
output, variable_results,
Expand All @@ -248,6 +259,16 @@ def exec_task(task, variables, log=None):
log.outputs[output.id])
task_contributes_to_output = task_contributes_to_output or task_contributes_to_plot

# save as report
if config.SAVE_PLOT_DATA:
report = get_report_for_plot3d(output)
report_results[output.id], _, _, _ = exec_report(
report, variable_results,
base_out_path, rel_out_path, report_formats,
task,
log=None,
type=output.__class__)

else:
# unreachable because the above cases cover all types of outputs
raise NotImplementedError('Outputs of type {} are not supported.'.format(output.__class__.__name__))
Expand Down Expand Up @@ -531,7 +552,7 @@ def exec_task(task, variables, log=None):
return variable_results


def exec_report(report, variable_results, base_out_path, rel_out_path, formats, task, log):
def exec_report(report, variable_results, base_out_path, rel_out_path, formats, task, log=None, type=Report):
""" Execute a report, generating the data sets which are available
Args:
Expand All @@ -548,6 +569,7 @@ def exec_report(report, variable_results, base_out_path, rel_out_path, formats,
formats (:obj:`list` of :obj:`ReportFormat`, optional): report format (e.g., csv or h5)
task (:obj:`Task`): task
log (:obj:`ReportLog`, optional): log of report
type (:obj:`types.Type`): type of output (e.g., subclass of :obj:`Output` such as :obj:`Report`, :obj:`Plot2D`)
Returns:
:obj:`tuple`:
Expand Down Expand Up @@ -577,7 +599,8 @@ def exec_report(report, variable_results, base_out_path, rel_out_path, formats,
data_set_results[data_set.id] = data_gen_res

data_gen_status = data_gen_statuses[data_set.data_generator.id]
log.data_sets[data_set.id] = data_gen_status
if log:
log.data_sets[data_set.id] = data_gen_status
if data_gen_status == Status.FAILED:
failed = True
if data_gen_status == Status.SUCCEEDED:
Expand All @@ -590,7 +613,8 @@ def exec_report(report, variable_results, base_out_path, rel_out_path, formats,
data_set_results,
base_out_path,
os.path.join(rel_out_path, report.id) if rel_out_path else report.id,
format=format)
format=format,
type=type)

if failed:
status = Status.FAILED
Expand Down Expand Up @@ -767,3 +791,67 @@ def exec_plot_3d(plot, variable_results, base_out_path, rel_out_path, formats, t

# return
return status, data_gen_exceptions, task_contributes_to_plot


def get_report_for_plot2d(plot):
""" Get a report for a 2D plot with a dataset for each data generator of the curves
Args:
plot (:obj:`Plot2D`): plot
Returns:
:obj:`Report`: report with a dataset for each data generator of the curves
"""
data_sets = {}
for curve in plot.curves:
data_sets[curve.x_data_generator.id] = DataSet(
id=curve.x_data_generator.id,
name=curve.x_data_generator.name,
label=curve.x_data_generator.id,
data_generator=curve.x_data_generator,
)
data_sets[curve.y_data_generator.id] = DataSet(
id=curve.y_data_generator.id,
name=curve.y_data_generator.name,
label=curve.y_data_generator.id,
data_generator=curve.y_data_generator,
)
return Report(
id=plot.id,
name=plot.name,
data_sets=sorted(data_sets.values(), key=lambda data_set: data_set.id))


def get_report_for_plot3d(plot):
""" Get a report for a 3D plot with a dataset for each data generator of the surfaces
Args:
plot (:obj:`Plot3D`): plot
Returns:
:obj:`Report`: report with a dataset for each data generator of the surfaces
"""
data_sets = {}
for surface in plot.surfaces:
data_sets[surface.x_data_generator.id] = DataSet(
id=surface.x_data_generator.id,
name=surface.x_data_generator.name,
label=surface.x_data_generator.id,
data_generator=surface.x_data_generator,
)
data_sets[surface.y_data_generator.id] = DataSet(
id=surface.y_data_generator.id,
name=surface.y_data_generator.name,
label=surface.y_data_generator.id,
data_generator=surface.y_data_generator,
)
data_sets[surface.z_data_generator.id] = DataSet(
id=surface.z_data_generator.id,
name=surface.z_data_generator.name,
label=surface.z_data_generator.id,
data_generator=surface.z_data_generator,
)
return Report(
id=plot.id,
name=plot.name,
data_sets=sorted(data_sets.values(), key=lambda data_set: data_set.id))
10 changes: 10 additions & 0 deletions tests/sedml/test_sedml_exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,12 @@ def execute_task(task, variables, log):
shutil.rmtree(out_dir)
exec.exec_sed_doc(execute_task, filename, os.path.dirname(filename), out_dir, report_formats=[ReportFormat.h5], plot_formats=[])

report_ids = ReportReader().get_ids(out_dir, format=ReportFormat.h5, type=data_model.Report)
self.assertEqual(set(report_ids), set([doc.outputs[0].id, doc.outputs[1].id, doc.outputs[2].id, doc.outputs[3].id]))

report_ids = ReportReader().get_ids(out_dir, format=ReportFormat.h5, type=data_model.Plot2D)
self.assertEqual(set(report_ids), set([]))

data_set_results = ReportReader().run(doc.outputs[0], out_dir, doc.outputs[0].id, format=ReportFormat.h5)
for data_set in doc.outputs[0].data_sets:
numpy.testing.assert_allclose(
Expand Down Expand Up @@ -1140,6 +1146,10 @@ def execute_task(task, variables, log=None):
self.assertTrue(os.path.isfile(os.path.join(out_dir, 'plot_2d_1.pdf')))
self.assertTrue(os.path.isfile(os.path.join(out_dir, 'plot_2d_2.pdf')))

self.assertTrue(os.path.isfile(os.path.join(out_dir, 'reports.h5')))
report_ids = ReportReader().get_ids(out_dir, format=ReportFormat.h5, type=data_model.Plot2D)
self.assertEqual(set(report_ids), set(['plot_2d_1', 'plot_2d_2']))

self.assertEqual(
log.to_json()['outputs'],
[
Expand Down

0 comments on commit c3a9837

Please sign in to comment.