Add "Load model ram used (KB)" metric for BAPP (#28931)

nikita-kud · web-flow · commit 8e6cd7fe79dc · 2025-02-28T23:48:48.000Z
### Details:
- Report memory usage for
`ov::Core::compile_model`/`ov::Core::import_model` methods

### Tickets:
 - E#157655
diff --git a/docs/articles_en/get-started/learn-openvino/openvino-samples/benchmark-tool.rst b/docs/articles_en/get-started/learn-openvino/openvino-samples/benchmark-tool.rst
@@ -726,6 +726,9 @@ An example of running ``benchmark_app`` on CPU in latency mode and its output ar
          [ INFO ]     output (node: output) : f32 / [...] / {1,100}
          [Step 7/11] Loading the model to the device
          [ INFO ] Compile model took 974.64 ms
+         [ INFO ] Start of compilation memory usage: Peak 1000 KB
+         [ INFO ] End of compilation memory usage: Peak 10000 KB
+         [ INFO ] Compile model ram used 9000 KB
          [Step 8/11] Querying optimal runtime parameters
          [ INFO ] Model:
          [ INFO ]   NETWORK_NAME: torch-jit-export
diff --git a/samples/cpp/benchmark_app/main.cpp b/samples/cpp/benchmark_app/main.cpp
@@ -35,9 +35,71 @@
 #include "remote_tensors_filling.hpp"
 #include "statistics_report.hpp"
 #include "utils.hpp"
+
+#if defined _WIN32
+#include <windows.h>
+#include <psapi.h>
+#else
+#include <fstream>
+#include <regex>
+#include <sstream>
+#endif
+
 // clang-format on
 
 namespace {
+
+#if defined _WIN32
+
+int64_t get_peak_memory_usage() {
+    PROCESS_MEMORY_COUNTERS mem_counters;
+    if (!GetProcessMemoryInfo(GetCurrentProcess(), &mem_counters, sizeof(mem_counters))) {
+        throw std::runtime_error("Can't get system memory values");
+    }
+
+    // Linux tracks memory usage in pages and then converts them to kB.
+    // Thus, there is always some room for inaccuracy as pages are not guaranteed to be fully used.
+    // In Windows, the situation is different: the system returns the memory usage in bytes, not in pages.
+    // To align the output between the two operating systems as closely as possible, we have two options:
+    //     1. Use rounding to the nearest integer.
+    //     2. Try to estimate the number of pages used in Windows. However,
+    //         this approach is likely to be inaccurate as well, so option 1 was chosen.
+    static constexpr double bytes_in_kilobyte = 1024.0;
+
+    // please note then we calculate difference
+    // to get peak memory increment value, so we return int64, not size_t
+    return static_cast<int64_t>(std::round(mem_counters.PeakWorkingSetSize / bytes_in_kilobyte));
+}
+
+#else
+
+int64_t get_peak_memory_usage() {
+    size_t peak_mem_usage_kB = 0;
+
+    std::ifstream status_file("/proc/self/status");
+    std::string line;
+    std::regex vm_peak_regex("VmPeak:");
+    std::smatch vm_match;
+    bool mem_values_found = false;
+    while (std::getline(status_file, line)) {
+        if (std::regex_search(line, vm_match, vm_peak_regex)) {
+            std::istringstream iss(vm_match.suffix());
+            iss >> peak_mem_usage_kB;
+            mem_values_found = true;
+        }
+    }
+
+    if (!mem_values_found) {
+        throw std::runtime_error("Can't get system memory values");
+    }
+
+    // please note then we calculate difference
+    // to get peak memory increment value, so we return int64, not size_t
+    return static_cast<int64_t>(peak_mem_usage_kB);
+}
+
+#endif
+
 bool parse_and_check_command_line(int argc, char* argv[]) {
     // ---------------------------Parsing and validating input
     // arguments--------------------------------------
@@ -566,10 +628,18 @@ int main(int argc, char* argv[]) {
             slog::info << "Skipping the step for loading model from file" << slog::endl;
             next_step();
             slog::info << "Skipping the step for loading model from file" << slog::endl;
+            auto compile_model_mem_start = get_peak_memory_usage();
             auto startTime = Time::now();
             compiledModel = core.compile_model(FLAGS_m, device_name, device_config);
             auto duration_ms = get_duration_ms_till_now(startTime);
+            auto compile_model_mem_end = get_peak_memory_usage();
             slog::info << "Compile model took " << double_to_string(duration_ms) << " ms" << slog::endl;
+
+            slog::info << "Start of compilation memory usage: Peak " << compile_model_mem_start << " KB" << slog::endl;
+            slog::info << "End of compilation memory usage: Peak " << compile_model_mem_end << " KB" << slog::endl;
+            slog::info << "Compile model ram used " << compile_model_mem_end - compile_model_mem_start << " KB"
+                       << slog::endl;
+
             slog::info << "Original model I/O parameters:" << slog::endl;
             printInputAndOutputsInfoShort(compiledModel);
 
@@ -738,10 +808,18 @@ int main(int argc, char* argv[]) {
             // ----------------- 7. Loading the model to the device
             // --------------------------------------------------------
             next_step();
+            auto compile_model_mem_start = get_peak_memory_usage();
             startTime = Time::now();
             compiledModel = core.compile_model(model, device_name, device_config);
             duration_ms = get_duration_ms_till_now(startTime);
+            auto compile_model_mem_end = get_peak_memory_usage();
             slog::info << "Compile model took " << double_to_string(duration_ms) << " ms" << slog::endl;
+
+            slog::info << "Start of compilation memory usage: Peak " << compile_model_mem_start << " KB" << slog::endl;
+            slog::info << "End of compilation memory usage: Peak " << compile_model_mem_end << " KB" << slog::endl;
+            slog::info << "Compile model ram used " << compile_model_mem_end - compile_model_mem_start << " KB"
+                       << slog::endl;
+
             if (statistics)
                 statistics->add_parameters(
                     StatisticsReport::Category::EXECUTION_RESULTS,
@@ -760,17 +838,26 @@ int main(int argc, char* argv[]) {
             // ----------------- 7. Loading the model to the device
             // --------------------------------------------------------
             next_step();
+            auto import_model_mem_start = get_peak_memory_usage();
             auto startTime = Time::now();
 
             std::ifstream modelStream(FLAGS_m, std::ios_base::binary | std::ios_base::in);
             if (!modelStream.is_open()) {
                 throw std::runtime_error("Cannot open model file " + FLAGS_m);
             }
+
             compiledModel = core.import_model(modelStream, device_name, device_config);
             modelStream.close();
 
             auto duration_ms = get_duration_ms_till_now(startTime);
+            auto import_model_mem_end = get_peak_memory_usage();
             slog::info << "Import model took " << double_to_string(duration_ms) << " ms" << slog::endl;
+
+            slog::info << "Start of import memory usage: Peak " << import_model_mem_start << " KB" << slog::endl;
+            slog::info << "End of import memory usage: Peak " << import_model_mem_end << " KB" << slog::endl;
+            slog::info << "Import model ram used " << import_model_mem_end - import_model_mem_start << " KB"
+                       << slog::endl;
+
             slog::info << "Original model I/O paramteters:" << slog::endl;
             printInputAndOutputsInfoShort(compiledModel);
 
diff --git a/tests/samples_tests/smoke_tests/test_benchmark_app.py b/tests/samples_tests/smoke_tests/test_benchmark_app.py
@@ -61,6 +61,11 @@ def verify(sample_language, device, api=None, nireq=None, shape=None, data_shape
         '-d', device
     )
     assert 'FPS' in output
+
+    # No Windows support due to the lack of the ‘psutil’ module in the CI infrastructure
+    if os.name == "posix":
+        assert 'Compile model ram used' in output
+
     if tmp_path:
         assert (tmp_path / 'exec_graph.xml').exists()
         with (tmp_path / 'conf.json').open(encoding='utf-8') as file:
diff --git a/tools/benchmark_tool/openvino/tools/benchmark/main.py b/tools/benchmark_tool/openvino/tools/benchmark/main.py
@@ -22,6 +22,27 @@
 from openvino.tools.benchmark.utils.statistics_report import StatisticsReport, JsonStatisticsReport, CsvStatisticsReport, \
     averageCntReport, detailedCntReport
 
+def get_peak_memory_usage():    
+    if os.name == "posix":
+        with open("/proc/self/status", "r") as f:
+            for line in f:
+                if line.startswith("VmPeak:"):
+                    return int(line.split()[1])  # The value in KB
+        raise RuntimeError("VmPeak attribute not found. Unable to determine peak memory usage.")
+    
+    # No Windows support due to the lack of the ‘psutil’ module in the CI infrastructure
+    return None
+
+def log_memory_usage(logger, start_mem_usage, end_mem_usage, action_name):
+    if start_mem_usage is None or end_mem_usage is None:
+        return
+
+    capitalized_action_name = action_name.capitalize()
+    action_name = "compilation" if action_name == "compile" else action_name
+    logger.info(f"Start of {action_name} memory usage: Peak {start_mem_usage} KB")
+    logger.info(f"End of {action_name} memory usage: Peak {end_mem_usage} KB")
+    logger.info(f"{capitalized_action_name} model ram used {end_mem_usage - start_mem_usage} KB")
+
 def parse_and_check_command_line():
     def arg_not_empty(arg_value,empty_value):
         return not arg_value is None and not arg_value == empty_value
@@ -349,10 +370,15 @@ def set_nthreads_pin(property_name, property_value):
             # --------------------- 7. Loading the model to the device -------------------------------------------------
             next_step()
 
+            start_mem_usage = get_peak_memory_usage()
             start_time = datetime.utcnow()
+
             compiled_model = benchmark.core.compile_model(args.path_to_model, benchmark.device, device_config)
+
             duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}"
+            end_mem_usage = get_peak_memory_usage()
             logger.info(f"Compile model took {duration_ms} ms")
+            log_memory_usage(logger, start_mem_usage, end_mem_usage, "compile")
             if statistics:
                 statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
                                           [
@@ -411,11 +437,15 @@ def set_nthreads_pin(property_name, property_value):
 
             # --------------------- 7. Loading the model to the device -------------------------------------------------
             next_step()
+            start_mem_usage = get_peak_memory_usage()
             start_time = datetime.utcnow()
-            compiled_model = benchmark.core.compile_model(model, benchmark.device, device_config)
 
+            compiled_model = benchmark.core.compile_model(model, benchmark.device, device_config)
+            
             duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}"
+            end_mem_usage = get_peak_memory_usage()
             logger.info(f"Compile model took {duration_ms} ms")
+            log_memory_usage(logger, start_mem_usage, end_mem_usage, "compile")
             if statistics:
                 statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
                                           [
@@ -435,10 +465,15 @@ def set_nthreads_pin(property_name, property_value):
             # --------------------- 7. Loading the model to the device -------------------------------------------------
             next_step()
 
+            start_mem_usage = get_peak_memory_usage()
             start_time = datetime.utcnow()
+
             compiled_model = benchmark.core.import_model(args.path_to_model, benchmark.device, device_config)
+
             duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}"
+            end_mem_usage = get_peak_memory_usage()
             logger.info(f"Import model took {duration_ms} ms")
+            log_memory_usage(logger, start_mem_usage, end_mem_usage, "import")
             if statistics:
                 statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
                                           [