openvinotoolkit
diff --git a/‎src/inference/dev_api/openvino/runtime/performance_heuristics.hpp
+1-1 b/‎src/inference/dev_api/openvino/runtime/performance_heuristics.hpp
+1-1
diff --git a/‎src/inference/dev_api/openvino/runtime/plugin_config.hpp
+1-1 b/‎src/inference/dev_api/openvino/runtime/plugin_config.hpp
+1-1
diff --git a/‎src/inference/include/openvino/runtime/intel_cpu/properties.hpp
+48-2 b/‎src/inference/include/openvino/runtime/intel_cpu/properties.hpp
+48-2
diff --git a/‎src/inference/src/dev/performance_heuristics.cpp
+1-1 b/‎src/inference/src/dev/performance_heuristics.cpp
+1-1
diff --git a/‎src/inference/src/dev/plugin_config.cpp
+1-1 b/‎src/inference/src/dev/plugin_config.cpp
+1-1
diff --git a/‎src/inference/tests/unit/config_test.cpp
+1-1 b/‎src/inference/tests/unit/config_test.cpp
+1-1
diff --git a/‎src/plugins/intel_cpu/CMakeLists.txt
+1 b/‎src/plugins/intel_cpu/CMakeLists.txt
+1
diff --git a/‎src/plugins/intel_cpu/src/compiled_model.cpp
+41-127 b/‎src/plugins/intel_cpu/src/compiled_model.cpp
+41-127
diff --git a/‎src/plugins/intel_cpu/src/compiled_model.h
+2 b/‎src/plugins/intel_cpu/src/compiled_model.h
+2
@@ -32,7 +32,7 @@ struct MemBandwidthPressure {
 };
 
 OPENVINO_RUNTIME_API MemBandwidthPressure mem_bandwidth_pressure_tolerance(
-    const std::shared_ptr<ov::Model> model,
+    const std::shared_ptr<const ov::Model> model,
     const float cache_size,
     const float memThresholdAssumeLimited = MemBandwidthPressure::LIMITED);
 
 
@@ -201,7 +201,7 @@ class OPENVINO_RUNTIME_API PluginConfig {
     virtual void apply_model_specific_options(const IRemoteContext* context, const ov::Model& model) {}
     void apply_env_options();
     void apply_config_options(std::string_view device_name, std::filesystem::path config_path = "");
-    virtual void finalize_impl(const IRemoteContext* context) {}
+    virtual void finalize_impl(const IRemoteContext* context, const ov::Model* model) {}
 
     template <typename T, PropertyMutability mutability>
     bool is_set_by_user(const ov::Property<T, mutability>& property) const {
 
@@ -26,14 +26,60 @@ namespace ov {
  */
 namespace intel_cpu {
 
+struct DenormalsOptimization {
+    enum class Mode {
+        DEFAULT,
+        ON,
+        OFF
+    };
+
+    DenormalsOptimization() {};
+    DenormalsOptimization(Mode mode) : m_mode(mode) {};
+    DenormalsOptimization(bool mode) { m_mode = mode ? Mode::ON : Mode::OFF; }
+    operator bool() const { return m_mode == Mode::ON; }
+
+    Mode m_mode = Mode::DEFAULT;
+};
+
+/** @cond INTERNAL */
+inline std::ostream& operator<<(std::ostream& os, const DenormalsOptimization& value) {
+    switch (value.m_mode) {
+    case DenormalsOptimization::Mode::DEFAULT:
+        return os << "DEFAULT";
+    case DenormalsOptimization::Mode::ON:
+        return os << "ON";
+    case DenormalsOptimization::Mode::OFF:
+        return os << "OFF";
+    default:
+        OPENVINO_THROW("Unsupported denormals optimization mode: ");
+    }
+}
+
+inline std::istream& operator>>(std::istream& is, DenormalsOptimization& value) {
+    std::string str;
+    is >> str;
+    if (str == "DEFAULT") {
+        value = DenormalsOptimization::Mode::DEFAULT;
+    } else if (str == "ON") {
+        value = DenormalsOptimization::Mode::ON;
+    } else if (str == "OFF") {
+        value = DenormalsOptimization::Mode::OFF;
+    } else {
+        OPENVINO_THROW("Could not read denormals optimization mode from str: ", str);
+    }
+    return is;
+}
+/** @endcond */
+
 /**
  * @brief This property define whether to perform denormals optimization.
  * @ingroup ov_runtime_cpu_prop_cpp_api
  *
  * Computation with denormals is very time consuming. FTZ(Flushing denormals to zero) and DAZ(Denormals as zero)
  * could significantly improve the performance, but it does not comply with IEEE standard. In most cases, this behavior
  * has little impact on model accuracy. Users could enable this optimization if no or acceptable accuracy drop is seen.
- * The following code enables denormals optimization
+ * By default OV runtime doesn't change master thread settings.
+ * The following code explicitly enables denormals optimization
  *
  * @code
  * ie.set_property(ov::denormals_optimization(true)); // enable denormals optimization
@@ -45,7 +91,7 @@ namespace intel_cpu {
  * ie.set_property(ov::denormals_optimization(false)); // disable denormals optimization
  * @endcode
  */
-static constexpr Property<bool> denormals_optimization{"CPU_DENORMALS_OPTIMIZATION"};
+static constexpr Property<DenormalsOptimization> denormals_optimization{"CPU_DENORMALS_OPTIMIZATION"};
 
 /**
  * @brief This property defines threshold for sparse weights decompression feature activation
 
@@ -6,7 +6,7 @@
 
 namespace ov {
 
-MemBandwidthPressure mem_bandwidth_pressure_tolerance(const std::shared_ptr<ov::Model> model,
+MemBandwidthPressure mem_bandwidth_pressure_tolerance(const std::shared_ptr<const ov::Model> model,
                                                       const float cache_size,
                                                       const float memThresholdAssumeLimited) {
     int total_convs = 0, mem_limited_convs = 0, compute_convs = 0, total_gemms = 0, mem_limited_gemms = 0,
 
@@ -116,7 +116,7 @@ void PluginConfig::finalize(const IRemoteContext* context, const ov::Model* mode
         option->set_any(value);
     }
 
-    finalize_impl(context);
+    finalize_impl(context, model);
 
 #ifdef ENABLE_DEBUG_CAPS
     apply_env_options();
 
@@ -141,7 +141,7 @@ struct NotEmptyTestConfig : public ov::PluginConfig {
         return supported_properties;
     }
 
-    void finalize_impl(const IRemoteContext* context) override {
+    void finalize_impl(const IRemoteContext* context, const ov::Model* model) override {
         if (!is_set_by_user(low_level_property)) {
             m_low_level_property.value = m_high_level_property.value;
         }
 
@@ -160,6 +160,7 @@ if(WIN32)
 endif()
 
 if(ENABLE_CPU_DEBUG_CAPS)
+    add_definitions(-DENABLE_DEBUG_CAPS)
     add_definitions(-DCPU_DEBUG_CAPS)
 endif()
 
 
@@ -27,6 +27,7 @@
 #include "utils/debug_capabilities.h"
 #include "utils/memory_stats_dump.hpp"
 #include "utils/serialize.hpp"
+#include "utils/denormals.hpp"
 
 #if defined(OV_CPU_WITH_ACL)
 #    include "nodes/executors/acl/acl_ie_scheduler.hpp"
@@ -63,28 +64,30 @@ CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
       m_cfg{std::move(cfg)},
       m_name{model->get_name()},
       m_loaded_from_cache(loaded_from_cache),
-      m_sub_memory_manager(std::move(sub_memory_manager)) {
+      m_sub_memory_manager(std::move(sub_memory_manager)),
+      m_model_name(model->get_friendly_name()) {
     m_mutex = std::make_shared<std::mutex>();
     const auto& core = m_plugin->get_core();
     if (!core) {
         OPENVINO_THROW("Unable to get API version. Core is unavailable");
     }
 
+
     IStreamsExecutor::Config executor_config;
-    if (m_cfg.exclusiveAsyncRequests) {
+    if (m_cfg.get_exclusive_async_requests()) {
         // special case when all InferRequests are muxed into a single queue
         m_task_executor = m_plugin->get_executor_manager()->get_executor("CPU");
     } else {
-        executor_config = m_cfg.numSubStreams > 0 ? IStreamsExecutor::Config{"CPUMainStreamExecutor",
+        executor_config = m_cfg.get_num_sub_streams() > 0 ? IStreamsExecutor::Config{"CPUMainStreamExecutor",
                                                                              1,
                                                                              1,
                                                                              ov::hint::SchedulingCoreType::ANY_CORE,
                                                                              false,
                                                                              true}
-                                                  : m_cfg.streamExecutorConfig;
+                                                          : m_cfg.get_stream_executor_config();
         m_task_executor = m_plugin->get_executor_manager()->get_idle_cpu_streams_executor(executor_config);
     }
-    if (0 != m_cfg.streamExecutorConfig.get_streams()) {
+    if (0 != m_cfg.get_stream_executor_config().get_streams()) {
         m_callback_executor = m_plugin->get_executor_manager()->get_idle_cpu_streams_executor(
             IStreamsExecutor::Config{"CPUCallbackExecutor", 1, 0});
     } else {
@@ -126,34 +129,33 @@ CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
     } else {
         CompiledModel::get_graph();
     }
-    if (m_cfg.numSubStreams > 0) {
+
+    if (m_cfg.get_num_sub_streams() > 0) {
         m_has_sub_compiled_models = true;
-        auto sub_cfg = m_cfg;
-        sub_cfg.numSubStreams = 0;
-        sub_cfg.enableNodeSplit = true;
-        auto streams_info_table = m_cfg.streamExecutorConfig.get_streams_info_table();
         auto message = message_manager();
-        m_sub_memory_manager = std::make_shared<SubMemoryManager>(m_cfg.numSubStreams);
-        message->set_num_sub_streams(m_cfg.numSubStreams);
-        for (int i = 0; i < m_cfg.numSubStreams; i++) {
-            std::vector<std::vector<int>> sub_streams_table;
-            sub_streams_table.push_back(streams_info_table[i + 1]);
-            sub_streams_table[0][NUMBER_OF_STREAMS] = 1;
-            sub_cfg.streamExecutorConfig = IStreamsExecutor::Config{"CPUStreamsExecutor",
-                                                                    1,
-                                                                    1,
-                                                                    ov::hint::SchedulingCoreType::ANY_CORE,
-                                                                    false,
-                                                                    true,
-                                                                    true,
-                                                                    std::move(sub_streams_table),
-                                                                    sub_cfg.streamsRankTable[i]};
+        m_sub_memory_manager = std::make_shared<SubMemoryManager>(m_cfg.get_num_sub_streams());
+        message->set_num_sub_streams(m_cfg.get_num_sub_streams());
+        for (int i = 0; i < m_cfg.get_num_sub_streams(); i++) {
+            auto sub_cfg = m_cfg.clone(i, true);
             m_sub_compiled_models.push_back(
                 std::make_shared<CompiledModel>(model, plugin, sub_cfg, loaded_from_cache, m_sub_memory_manager));
         }
     }
 }
 
+static bool set_denormals_optimization(const ov::intel_cpu::DenormalsOptimization& value){
+    if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::sse41)) {
+        if (value.m_mode == DenormalsOptimization::Mode::ON) {
+            flush_to_zero(true);
+            return denormals_as_zero(true);
+        } else if (value.m_mode == DenormalsOptimization::Mode::OFF) {
+            flush_to_zero(false);
+            denormals_as_zero(false);
+        }
+    }
+    return false;
+}
+
 CompiledModel::GraphGuard::Lock CompiledModel::get_graph() const {
     int streamId = 0;
     int socketId = 0;
@@ -170,11 +172,15 @@ CompiledModel::GraphGuard::Lock CompiledModel::get_graph() const {
                 GraphContext::Ptr ctx;
                 {
                     std::lock_guard<std::mutex> lock{*m_mutex.get()};
-                    auto isQuantizedFlag = (m_cfg.lpTransformsMode == Config::On) &&
+                    auto isQuantizedFlag = (m_cfg.get_enable_lp_transformations()) &&
                                            ov::pass::low_precision::LowPrecision::isFunctionQuantized(m_model);
+                                               // SSE runtime check is needed for some ATOM machine, which is x86-64 but w/o SSE
+
+                    bool denormalsAsZero = set_denormals_optimization(m_cfg.get_denormals_optimization());
                     ctx = std::make_shared<GraphContext>(m_cfg,
                                                          m_socketWeights[socketId],
                                                          isQuantizedFlag,
+                                                         denormalsAsZero,
                                                          streamsExecutor,
                                                          m_sub_memory_manager);
                 }
@@ -229,25 +235,6 @@ std::shared_ptr<const ov::Model> CompiledModel::get_runtime_model() const {
 }
 
 ov::Any CompiledModel::get_property(const std::string& name) const {
-    if (m_graphs.empty()) {
-        OPENVINO_THROW("No graph was found");
-    }
-
-    if (name == ov::loaded_from_cache) {
-        return m_loaded_from_cache;
-    }
-
-    Config engConfig = get_graph()._graph.getConfig();
-    auto option = engConfig._config.find(name);
-    if (option != engConfig._config.end()) {
-        return option->second;
-    }
-
-    // @todo Can't we just use local copy (_cfg) instead?
-    auto graphLock = get_graph();
-    const auto& graph = graphLock._graph;
-    const auto& config = graph.getConfig();
-
     auto RO_property = [](const std::string& propertyName) {
         return ov::PropertyName(propertyName, ov::PropertyMutability::RO);
     };
@@ -285,98 +272,25 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
     }
 
     if (name == ov::model_name) {
-        // @todo Does not seem ok to 'dump()' the whole graph everytime in order to get a name
-        const std::string modelName = graph.dump()->get_friendly_name();
-        return decltype(ov::model_name)::value_type(modelName);
+        return decltype(ov::model_name)::value_type {m_model_name};
+    }
+    if (name == ov::loaded_from_cache) {
+        return decltype(ov::loaded_from_cache)::value_type {m_loaded_from_cache};
     }
     if (name == ov::optimal_number_of_infer_requests) {
-        const auto streams = config.streamExecutorConfig.get_streams();
-        return static_cast<decltype(ov::optimal_number_of_infer_requests)::value_type>(
+        const auto streams = m_cfg.get_stream_executor_config().get_streams();
+        return decltype(ov::optimal_number_of_infer_requests)::value_type(
             streams > 0 ? streams : 1);  // ov::optimal_number_of_infer_requests has no negative values
     }
-    if (name == ov::num_streams) {
-        const auto streams = config.streamExecutorConfig.get_streams();
-        return decltype(ov::num_streams)::value_type(
-            streams);  // ov::num_streams has special negative values (AUTO = -1, NUMA = -2)
-    }
-    if (name == ov::inference_num_threads) {
-        const auto num_threads = config.streamExecutorConfig.get_threads();
-        return static_cast<decltype(ov::inference_num_threads)::value_type>(num_threads);
-    }
-    if (name == ov::enable_profiling.name()) {
-        const bool perfCount = config.collectPerfCounters;
-        return static_cast<decltype(ov::enable_profiling)::value_type>(perfCount);
-    }
-    if (name == ov::hint::inference_precision) {
-        return decltype(ov::hint::inference_precision)::value_type(config.inferencePrecision);
-    }
-    if (name == ov::hint::performance_mode) {
-        return static_cast<decltype(ov::hint::performance_mode)::value_type>(config.hintPerfMode);
-    }
-    if (name == ov::log::level) {
-        return static_cast<decltype(ov::log::level)::value_type>(config.logLevel);
-    }
-    if (name == ov::hint::enable_cpu_pinning.name()) {
-        const bool use_pin = config.enableCpuPinning;
-        return static_cast<decltype(ov::hint::enable_cpu_pinning)::value_type>(use_pin);
-    }
-    if (name == ov::hint::enable_cpu_reservation.name()) {
-        const bool use_reserve = config.enableCpuReservation;
-        return static_cast<decltype(ov::hint::enable_cpu_reservation)::value_type>(use_reserve);
-    }
-    if (name == ov::hint::scheduling_core_type) {
-        const auto stream_mode = config.schedulingCoreType;
-        return stream_mode;
-    }
-    if (name == ov::hint::model_distribution_policy) {
-        const auto& distribution_policy = config.modelDistributionPolicy;
-        return distribution_policy;
-    }
-    if (name == ov::hint::enable_hyper_threading.name()) {
-        const bool use_ht = config.enableHyperThreading;
-        return static_cast<decltype(ov::hint::enable_hyper_threading)::value_type>(use_ht);
-    }
-    if (name == ov::hint::execution_mode) {
-        return config.executionMode;
-    }
-    if (name == ov::hint::num_requests) {
-        return static_cast<decltype(ov::hint::num_requests)::value_type>(config.hintNumRequests);
-    }
     if (name == ov::execution_devices) {
         return decltype(ov::execution_devices)::value_type{m_plugin->get_device_name()};
     }
-    if (name == ov::intel_cpu::denormals_optimization) {
-        return static_cast<decltype(ov::intel_cpu::denormals_optimization)::value_type>(
-            config.denormalsOptMode == Config::DenormalsOptMode::DO_On);
-    }
-    if (name == ov::intel_cpu::sparse_weights_decompression_rate) {
-        return static_cast<decltype(ov::intel_cpu::sparse_weights_decompression_rate)::value_type>(
-            config.fcSparseWeiDecompressionRate);
-    }
-    if (name == ov::hint::dynamic_quantization_group_size) {
-        return static_cast<decltype(ov::hint::dynamic_quantization_group_size)::value_type>(
-            config.fcDynamicQuantizationGroupSize);
-    }
-    if (name == ov::hint::kv_cache_precision) {
-        return decltype(ov::hint::kv_cache_precision)::value_type(config.kvCachePrecision);
-    }
-    if (name == ov::key_cache_precision) {
-        return decltype(ov::key_cache_precision)::value_type(config.keyCachePrecision);
-    }
-    if (name == ov::value_cache_precision) {
-        return decltype(ov::value_cache_precision)::value_type(config.valueCachePrecision);
-    }
-    if (name == ov::key_cache_group_size) {
-        return static_cast<decltype(ov::key_cache_group_size)::value_type>(config.keyCacheGroupSize);
-    }
-    if (name == ov::value_cache_group_size) {
-        return static_cast<decltype(ov::value_cache_group_size)::value_type>(config.valueCacheGroupSize);
-    }
-    OPENVINO_THROW("Unsupported property: ", name);
+
+    return m_cfg.get_property(name, OptionVisibility::RELEASE);
 }
 
 void CompiledModel::export_model(std::ostream& modelStream) const {
-    ModelSerializer serializer(modelStream, m_cfg.cacheEncrypt);
+    ModelSerializer serializer(modelStream, m_cfg.get_cache_encryption_callbacks().encrypt);
     serializer << m_model;
 }
 
 
@@ -93,6 +93,8 @@ class CompiledModel : public ov::ICompiledModel {
     std::vector<std::shared_ptr<CompiledModel>> m_sub_compiled_models;
     std::shared_ptr<SubMemoryManager> m_sub_memory_manager = nullptr;
     bool m_has_sub_compiled_models = false;
+
+    std::string m_model_name;
 };
 
 // This class provides safe access to the internal CompiledModel structures and helps to decouple SyncInferRequest and
Original file line number	Diff line number	Diff line change
`@@ -116,7 +116,7 @@ void PluginConfig::finalize(const IRemoteContext* context, const ov::Model* mode`
`116`	`116`	`option->set_any(value);`
`117`	`117`	`}`
`118`	`118`
`119`		`- finalize_impl(context);`
	`119`	`+ finalize_impl(context, model);`
`120`	`120`
`121`	`121`	`#ifdef ENABLE_DEBUG_CAPS`
`122`	`122`	`apply_env_options();`
Original file line number	Diff line number	Diff line change
`@@ -141,7 +141,7 @@ struct NotEmptyTestConfig : public ov::PluginConfig {`
`141`	`141`	`return supported_properties;`
`142`	`142`	`}`
`143`	`143`
`144`		`- void finalize_impl(const IRemoteContext* context) override {`
	`144`	`+ void finalize_impl(const IRemoteContext* context, const ov::Model* model) override {`
`145`	`145`	`if (!is_set_by_user(low_level_property)) {`
`146`	`146`	`m_low_level_property.value = m_high_level_property.value;`
`147`	`147`	`}`