Skip to content

Commit 8e6cd7f

Browse files
authored
Add "Load model ram used (KB)" metric for BAPP (#28931)
### Details: - Report memory usage for `ov::Core::compile_model`/`ov::Core::import_model` methods ### Tickets: - E#157655
1 parent cce35ac commit 8e6cd7f

File tree

4 files changed

+131
-1
lines changed

4 files changed

+131
-1
lines changed

docs/articles_en/get-started/learn-openvino/openvino-samples/benchmark-tool.rst

+3
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,9 @@ An example of running ``benchmark_app`` on CPU in latency mode and its output ar
726726
[ INFO ] output (node: output) : f32 / [...] / {1,100}
727727
[Step 7/11] Loading the model to the device
728728
[ INFO ] Compile model took 974.64 ms
729+
[ INFO ] Start of compilation memory usage: Peak 1000 KB
730+
[ INFO ] End of compilation memory usage: Peak 10000 KB
731+
[ INFO ] Compile model ram used 9000 KB
729732
[Step 8/11] Querying optimal runtime parameters
730733
[ INFO ] Model:
731734
[ INFO ] NETWORK_NAME: torch-jit-export

samples/cpp/benchmark_app/main.cpp

+87
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,71 @@
3535
#include "remote_tensors_filling.hpp"
3636
#include "statistics_report.hpp"
3737
#include "utils.hpp"
38+
39+
#if defined _WIN32
40+
#include <windows.h>
41+
#include <psapi.h>
42+
#else
43+
#include <fstream>
44+
#include <regex>
45+
#include <sstream>
46+
#endif
47+
3848
// clang-format on
3949

4050
namespace {
51+
52+
#if defined _WIN32
53+
54+
int64_t get_peak_memory_usage() {
55+
PROCESS_MEMORY_COUNTERS mem_counters;
56+
if (!GetProcessMemoryInfo(GetCurrentProcess(), &mem_counters, sizeof(mem_counters))) {
57+
throw std::runtime_error("Can't get system memory values");
58+
}
59+
60+
// Linux tracks memory usage in pages and then converts them to kB.
61+
// Thus, there is always some room for inaccuracy as pages are not guaranteed to be fully used.
62+
// In Windows, the situation is different: the system returns the memory usage in bytes, not in pages.
63+
// To align the output between the two operating systems as closely as possible, we have two options:
64+
// 1. Use rounding to the nearest integer.
65+
// 2. Try to estimate the number of pages used in Windows. However,
66+
// this approach is likely to be inaccurate as well, so option 1 was chosen.
67+
static constexpr double bytes_in_kilobyte = 1024.0;
68+
69+
// please note then we calculate difference
70+
// to get peak memory increment value, so we return int64, not size_t
71+
return static_cast<int64_t>(std::round(mem_counters.PeakWorkingSetSize / bytes_in_kilobyte));
72+
}
73+
74+
#else
75+
76+
int64_t get_peak_memory_usage() {
77+
size_t peak_mem_usage_kB = 0;
78+
79+
std::ifstream status_file("/proc/self/status");
80+
std::string line;
81+
std::regex vm_peak_regex("VmPeak:");
82+
std::smatch vm_match;
83+
bool mem_values_found = false;
84+
while (std::getline(status_file, line)) {
85+
if (std::regex_search(line, vm_match, vm_peak_regex)) {
86+
std::istringstream iss(vm_match.suffix());
87+
iss >> peak_mem_usage_kB;
88+
mem_values_found = true;
89+
}
90+
}
91+
92+
if (!mem_values_found) {
93+
throw std::runtime_error("Can't get system memory values");
94+
}
95+
96+
// please note then we calculate difference
97+
// to get peak memory increment value, so we return int64, not size_t
98+
return static_cast<int64_t>(peak_mem_usage_kB);
99+
}
100+
101+
#endif
102+
41103
bool parse_and_check_command_line(int argc, char* argv[]) {
42104
// ---------------------------Parsing and validating input
43105
// arguments--------------------------------------
@@ -566,10 +628,18 @@ int main(int argc, char* argv[]) {
566628
slog::info << "Skipping the step for loading model from file" << slog::endl;
567629
next_step();
568630
slog::info << "Skipping the step for loading model from file" << slog::endl;
631+
auto compile_model_mem_start = get_peak_memory_usage();
569632
auto startTime = Time::now();
570633
compiledModel = core.compile_model(FLAGS_m, device_name, device_config);
571634
auto duration_ms = get_duration_ms_till_now(startTime);
635+
auto compile_model_mem_end = get_peak_memory_usage();
572636
slog::info << "Compile model took " << double_to_string(duration_ms) << " ms" << slog::endl;
637+
638+
slog::info << "Start of compilation memory usage: Peak " << compile_model_mem_start << " KB" << slog::endl;
639+
slog::info << "End of compilation memory usage: Peak " << compile_model_mem_end << " KB" << slog::endl;
640+
slog::info << "Compile model ram used " << compile_model_mem_end - compile_model_mem_start << " KB"
641+
<< slog::endl;
642+
573643
slog::info << "Original model I/O parameters:" << slog::endl;
574644
printInputAndOutputsInfoShort(compiledModel);
575645

@@ -738,10 +808,18 @@ int main(int argc, char* argv[]) {
738808
// ----------------- 7. Loading the model to the device
739809
// --------------------------------------------------------
740810
next_step();
811+
auto compile_model_mem_start = get_peak_memory_usage();
741812
startTime = Time::now();
742813
compiledModel = core.compile_model(model, device_name, device_config);
743814
duration_ms = get_duration_ms_till_now(startTime);
815+
auto compile_model_mem_end = get_peak_memory_usage();
744816
slog::info << "Compile model took " << double_to_string(duration_ms) << " ms" << slog::endl;
817+
818+
slog::info << "Start of compilation memory usage: Peak " << compile_model_mem_start << " KB" << slog::endl;
819+
slog::info << "End of compilation memory usage: Peak " << compile_model_mem_end << " KB" << slog::endl;
820+
slog::info << "Compile model ram used " << compile_model_mem_end - compile_model_mem_start << " KB"
821+
<< slog::endl;
822+
745823
if (statistics)
746824
statistics->add_parameters(
747825
StatisticsReport::Category::EXECUTION_RESULTS,
@@ -760,17 +838,26 @@ int main(int argc, char* argv[]) {
760838
// ----------------- 7. Loading the model to the device
761839
// --------------------------------------------------------
762840
next_step();
841+
auto import_model_mem_start = get_peak_memory_usage();
763842
auto startTime = Time::now();
764843

765844
std::ifstream modelStream(FLAGS_m, std::ios_base::binary | std::ios_base::in);
766845
if (!modelStream.is_open()) {
767846
throw std::runtime_error("Cannot open model file " + FLAGS_m);
768847
}
848+
769849
compiledModel = core.import_model(modelStream, device_name, device_config);
770850
modelStream.close();
771851

772852
auto duration_ms = get_duration_ms_till_now(startTime);
853+
auto import_model_mem_end = get_peak_memory_usage();
773854
slog::info << "Import model took " << double_to_string(duration_ms) << " ms" << slog::endl;
855+
856+
slog::info << "Start of import memory usage: Peak " << import_model_mem_start << " KB" << slog::endl;
857+
slog::info << "End of import memory usage: Peak " << import_model_mem_end << " KB" << slog::endl;
858+
slog::info << "Import model ram used " << import_model_mem_end - import_model_mem_start << " KB"
859+
<< slog::endl;
860+
774861
slog::info << "Original model I/O paramteters:" << slog::endl;
775862
printInputAndOutputsInfoShort(compiledModel);
776863

tests/samples_tests/smoke_tests/test_benchmark_app.py

+5
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ def verify(sample_language, device, api=None, nireq=None, shape=None, data_shape
6161
'-d', device
6262
)
6363
assert 'FPS' in output
64+
65+
# No Windows support due to the lack of the ‘psutil’ module in the CI infrastructure
66+
if os.name == "posix":
67+
assert 'Compile model ram used' in output
68+
6469
if tmp_path:
6570
assert (tmp_path / 'exec_graph.xml').exists()
6671
with (tmp_path / 'conf.json').open(encoding='utf-8') as file:

tools/benchmark_tool/openvino/tools/benchmark/main.py

+36-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,27 @@
2222
from openvino.tools.benchmark.utils.statistics_report import StatisticsReport, JsonStatisticsReport, CsvStatisticsReport, \
2323
averageCntReport, detailedCntReport
2424

25+
def get_peak_memory_usage():
26+
if os.name == "posix":
27+
with open("/proc/self/status", "r") as f:
28+
for line in f:
29+
if line.startswith("VmPeak:"):
30+
return int(line.split()[1]) # The value in KB
31+
raise RuntimeError("VmPeak attribute not found. Unable to determine peak memory usage.")
32+
33+
# No Windows support due to the lack of the ‘psutil’ module in the CI infrastructure
34+
return None
35+
36+
def log_memory_usage(logger, start_mem_usage, end_mem_usage, action_name):
37+
if start_mem_usage is None or end_mem_usage is None:
38+
return
39+
40+
capitalized_action_name = action_name.capitalize()
41+
action_name = "compilation" if action_name == "compile" else action_name
42+
logger.info(f"Start of {action_name} memory usage: Peak {start_mem_usage} KB")
43+
logger.info(f"End of {action_name} memory usage: Peak {end_mem_usage} KB")
44+
logger.info(f"{capitalized_action_name} model ram used {end_mem_usage - start_mem_usage} KB")
45+
2546
def parse_and_check_command_line():
2647
def arg_not_empty(arg_value,empty_value):
2748
return not arg_value is None and not arg_value == empty_value
@@ -349,10 +370,15 @@ def set_nthreads_pin(property_name, property_value):
349370
# --------------------- 7. Loading the model to the device -------------------------------------------------
350371
next_step()
351372

373+
start_mem_usage = get_peak_memory_usage()
352374
start_time = datetime.utcnow()
375+
353376
compiled_model = benchmark.core.compile_model(args.path_to_model, benchmark.device, device_config)
377+
354378
duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}"
379+
end_mem_usage = get_peak_memory_usage()
355380
logger.info(f"Compile model took {duration_ms} ms")
381+
log_memory_usage(logger, start_mem_usage, end_mem_usage, "compile")
356382
if statistics:
357383
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
358384
[
@@ -411,11 +437,15 @@ def set_nthreads_pin(property_name, property_value):
411437

412438
# --------------------- 7. Loading the model to the device -------------------------------------------------
413439
next_step()
440+
start_mem_usage = get_peak_memory_usage()
414441
start_time = datetime.utcnow()
415-
compiled_model = benchmark.core.compile_model(model, benchmark.device, device_config)
416442

443+
compiled_model = benchmark.core.compile_model(model, benchmark.device, device_config)
444+
417445
duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}"
446+
end_mem_usage = get_peak_memory_usage()
418447
logger.info(f"Compile model took {duration_ms} ms")
448+
log_memory_usage(logger, start_mem_usage, end_mem_usage, "compile")
419449
if statistics:
420450
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
421451
[
@@ -435,10 +465,15 @@ def set_nthreads_pin(property_name, property_value):
435465
# --------------------- 7. Loading the model to the device -------------------------------------------------
436466
next_step()
437467

468+
start_mem_usage = get_peak_memory_usage()
438469
start_time = datetime.utcnow()
470+
439471
compiled_model = benchmark.core.import_model(args.path_to_model, benchmark.device, device_config)
472+
440473
duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}"
474+
end_mem_usage = get_peak_memory_usage()
441475
logger.info(f"Import model took {duration_ms} ms")
476+
log_memory_usage(logger, start_mem_usage, end_mem_usage, "import")
442477
if statistics:
443478
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
444479
[

0 commit comments

Comments
 (0)