Skip to content

Commit 9f6d677

Browse files
[CPU] New plugin config impl
1 parent b1a07c7 commit 9f6d677

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+1029
-1344
lines changed

src/inference/dev_api/openvino/runtime/performance_heuristics.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ struct MemBandwidthPressure {
3232
};
3333

3434
OPENVINO_RUNTIME_API MemBandwidthPressure mem_bandwidth_pressure_tolerance(
35-
const std::shared_ptr<ov::Model> model,
35+
const std::shared_ptr<const ov::Model> model,
3636
const float cache_size,
3737
const float memThresholdAssumeLimited = MemBandwidthPressure::LIMITED);
3838

src/inference/dev_api/openvino/runtime/plugin_config.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ class OPENVINO_RUNTIME_API PluginConfig {
201201
virtual void apply_model_specific_options(const IRemoteContext* context, const ov::Model& model) {}
202202
void apply_env_options();
203203
void apply_config_options(std::string_view device_name, std::filesystem::path config_path = "");
204-
virtual void finalize_impl(const IRemoteContext* context) {}
204+
virtual void finalize_impl(const IRemoteContext* context, const ov::Model* model) {}
205205

206206
template <typename T, PropertyMutability mutability>
207207
bool is_set_by_user(const ov::Property<T, mutability>& property) const {

src/inference/include/openvino/runtime/intel_cpu/properties.hpp

+48-2
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,60 @@ namespace ov {
2626
*/
2727
namespace intel_cpu {
2828

29+
struct DenormalsOptimization {
30+
enum class Mode {
31+
DEFAULT,
32+
ON,
33+
OFF
34+
};
35+
36+
DenormalsOptimization() {};
37+
DenormalsOptimization(Mode mode) : m_mode(mode) {};
38+
DenormalsOptimization(bool mode) { m_mode = mode ? Mode::ON : Mode::OFF; }
39+
operator bool() const { return m_mode == Mode::ON; }
40+
41+
Mode m_mode = Mode::DEFAULT;
42+
};
43+
44+
/** @cond INTERNAL */
45+
inline std::ostream& operator<<(std::ostream& os, const DenormalsOptimization& value) {
46+
switch (value.m_mode) {
47+
case DenormalsOptimization::Mode::DEFAULT:
48+
return os << "DEFAULT";
49+
case DenormalsOptimization::Mode::ON:
50+
return os << "ON";
51+
case DenormalsOptimization::Mode::OFF:
52+
return os << "OFF";
53+
default:
54+
OPENVINO_THROW("Unsupported denormals optimization mode: ");
55+
}
56+
}
57+
58+
inline std::istream& operator>>(std::istream& is, DenormalsOptimization& value) {
59+
std::string str;
60+
is >> str;
61+
if (str == "DEFAULT") {
62+
value = DenormalsOptimization::Mode::DEFAULT;
63+
} else if (str == "ON") {
64+
value = DenormalsOptimization::Mode::ON;
65+
} else if (str == "OFF") {
66+
value = DenormalsOptimization::Mode::OFF;
67+
} else {
68+
OPENVINO_THROW("Could not read denormals optimization mode from str: ", str);
69+
}
70+
return is;
71+
}
72+
/** @endcond */
73+
2974
/**
3075
* @brief This property define whether to perform denormals optimization.
3176
* @ingroup ov_runtime_cpu_prop_cpp_api
3277
*
3378
* Computation with denormals is very time consuming. FTZ(Flushing denormals to zero) and DAZ(Denormals as zero)
3479
* could significantly improve the performance, but it does not comply with IEEE standard. In most cases, this behavior
3580
* has little impact on model accuracy. Users could enable this optimization if no or acceptable accuracy drop is seen.
36-
* The following code enables denormals optimization
81+
* By default OV runtime doesn't change master thread settings.
82+
* The following code explicitly enables denormals optimization
3783
*
3884
* @code
3985
* ie.set_property(ov::denormals_optimization(true)); // enable denormals optimization
@@ -45,7 +91,7 @@ namespace intel_cpu {
4591
* ie.set_property(ov::denormals_optimization(false)); // disable denormals optimization
4692
* @endcode
4793
*/
48-
static constexpr Property<bool> denormals_optimization{"CPU_DENORMALS_OPTIMIZATION"};
94+
static constexpr Property<DenormalsOptimization> denormals_optimization{"CPU_DENORMALS_OPTIMIZATION"};
4995

5096
/**
5197
* @brief This property defines threshold for sparse weights decompression feature activation

src/inference/src/dev/performance_heuristics.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
namespace ov {
88

9-
MemBandwidthPressure mem_bandwidth_pressure_tolerance(const std::shared_ptr<ov::Model> model,
9+
MemBandwidthPressure mem_bandwidth_pressure_tolerance(const std::shared_ptr<const ov::Model> model,
1010
const float cache_size,
1111
const float memThresholdAssumeLimited) {
1212
int total_convs = 0, mem_limited_convs = 0, compute_convs = 0, total_gemms = 0, mem_limited_gemms = 0,

src/inference/src/dev/plugin_config.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ void PluginConfig::finalize(const IRemoteContext* context, const ov::Model* mode
116116
option->set_any(value);
117117
}
118118

119-
finalize_impl(context);
119+
finalize_impl(context, model);
120120

121121
#ifdef ENABLE_DEBUG_CAPS
122122
apply_env_options();

src/inference/tests/unit/config_test.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ struct NotEmptyTestConfig : public ov::PluginConfig {
141141
return supported_properties;
142142
}
143143

144-
void finalize_impl(const IRemoteContext* context) override {
144+
void finalize_impl(const IRemoteContext* context, const ov::Model* model) override {
145145
if (!is_set_by_user(low_level_property)) {
146146
m_low_level_property.value = m_high_level_property.value;
147147
}

src/plugins/intel_cpu/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ if(WIN32)
160160
endif()
161161

162162
if(ENABLE_CPU_DEBUG_CAPS)
163+
add_definitions(-DENABLE_DEBUG_CAPS)
163164
add_definitions(-DCPU_DEBUG_CAPS)
164165
endif()
165166

src/plugins/intel_cpu/src/compiled_model.cpp

+41-127
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "utils/debug_capabilities.h"
2828
#include "utils/memory_stats_dump.hpp"
2929
#include "utils/serialize.hpp"
30+
#include "utils/denormals.hpp"
3031

3132
#if defined(OV_CPU_WITH_ACL)
3233
# include "nodes/executors/acl/acl_ie_scheduler.hpp"
@@ -63,28 +64,30 @@ CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
6364
m_cfg{std::move(cfg)},
6465
m_name{model->get_name()},
6566
m_loaded_from_cache(loaded_from_cache),
66-
m_sub_memory_manager(std::move(sub_memory_manager)) {
67+
m_sub_memory_manager(std::move(sub_memory_manager)),
68+
m_model_name(model->get_friendly_name()) {
6769
m_mutex = std::make_shared<std::mutex>();
6870
const auto& core = m_plugin->get_core();
6971
if (!core) {
7072
OPENVINO_THROW("Unable to get API version. Core is unavailable");
7173
}
7274

75+
7376
IStreamsExecutor::Config executor_config;
74-
if (m_cfg.exclusiveAsyncRequests) {
77+
if (m_cfg.get_exclusive_async_requests()) {
7578
// special case when all InferRequests are muxed into a single queue
7679
m_task_executor = m_plugin->get_executor_manager()->get_executor("CPU");
7780
} else {
78-
executor_config = m_cfg.numSubStreams > 0 ? IStreamsExecutor::Config{"CPUMainStreamExecutor",
81+
executor_config = m_cfg.get_num_sub_streams() > 0 ? IStreamsExecutor::Config{"CPUMainStreamExecutor",
7982
1,
8083
1,
8184
ov::hint::SchedulingCoreType::ANY_CORE,
8285
false,
8386
true}
84-
: m_cfg.streamExecutorConfig;
87+
: m_cfg.get_stream_executor_config();
8588
m_task_executor = m_plugin->get_executor_manager()->get_idle_cpu_streams_executor(executor_config);
8689
}
87-
if (0 != m_cfg.streamExecutorConfig.get_streams()) {
90+
if (0 != m_cfg.get_stream_executor_config().get_streams()) {
8891
m_callback_executor = m_plugin->get_executor_manager()->get_idle_cpu_streams_executor(
8992
IStreamsExecutor::Config{"CPUCallbackExecutor", 1, 0});
9093
} else {
@@ -126,34 +129,33 @@ CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
126129
} else {
127130
CompiledModel::get_graph();
128131
}
129-
if (m_cfg.numSubStreams > 0) {
132+
133+
if (m_cfg.get_num_sub_streams() > 0) {
130134
m_has_sub_compiled_models = true;
131-
auto sub_cfg = m_cfg;
132-
sub_cfg.numSubStreams = 0;
133-
sub_cfg.enableNodeSplit = true;
134-
auto streams_info_table = m_cfg.streamExecutorConfig.get_streams_info_table();
135135
auto message = message_manager();
136-
m_sub_memory_manager = std::make_shared<SubMemoryManager>(m_cfg.numSubStreams);
137-
message->set_num_sub_streams(m_cfg.numSubStreams);
138-
for (int i = 0; i < m_cfg.numSubStreams; i++) {
139-
std::vector<std::vector<int>> sub_streams_table;
140-
sub_streams_table.push_back(streams_info_table[i + 1]);
141-
sub_streams_table[0][NUMBER_OF_STREAMS] = 1;
142-
sub_cfg.streamExecutorConfig = IStreamsExecutor::Config{"CPUStreamsExecutor",
143-
1,
144-
1,
145-
ov::hint::SchedulingCoreType::ANY_CORE,
146-
false,
147-
true,
148-
true,
149-
std::move(sub_streams_table),
150-
sub_cfg.streamsRankTable[i]};
136+
m_sub_memory_manager = std::make_shared<SubMemoryManager>(m_cfg.get_num_sub_streams());
137+
message->set_num_sub_streams(m_cfg.get_num_sub_streams());
138+
for (int i = 0; i < m_cfg.get_num_sub_streams(); i++) {
139+
auto sub_cfg = m_cfg.clone(i, true);
151140
m_sub_compiled_models.push_back(
152141
std::make_shared<CompiledModel>(model, plugin, sub_cfg, loaded_from_cache, m_sub_memory_manager));
153142
}
154143
}
155144
}
156145

146+
static bool set_denormals_optimization(const ov::intel_cpu::DenormalsOptimization& value){
147+
if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::sse41)) {
148+
if (value.m_mode == DenormalsOptimization::Mode::ON) {
149+
flush_to_zero(true);
150+
return denormals_as_zero(true);
151+
} else if (value.m_mode == DenormalsOptimization::Mode::OFF) {
152+
flush_to_zero(false);
153+
denormals_as_zero(false);
154+
}
155+
}
156+
return false;
157+
}
158+
157159
CompiledModel::GraphGuard::Lock CompiledModel::get_graph() const {
158160
int streamId = 0;
159161
int socketId = 0;
@@ -170,11 +172,15 @@ CompiledModel::GraphGuard::Lock CompiledModel::get_graph() const {
170172
GraphContext::Ptr ctx;
171173
{
172174
std::lock_guard<std::mutex> lock{*m_mutex.get()};
173-
auto isQuantizedFlag = (m_cfg.lpTransformsMode == Config::On) &&
175+
auto isQuantizedFlag = (m_cfg.get_enable_lp_transformations()) &&
174176
ov::pass::low_precision::LowPrecision::isFunctionQuantized(m_model);
177+
// SSE runtime check is needed for some ATOM machine, which is x86-64 but w/o SSE
178+
179+
bool denormalsAsZero = set_denormals_optimization(m_cfg.get_denormals_optimization());
175180
ctx = std::make_shared<GraphContext>(m_cfg,
176181
m_socketWeights[socketId],
177182
isQuantizedFlag,
183+
denormalsAsZero,
178184
streamsExecutor,
179185
m_sub_memory_manager);
180186
}
@@ -229,25 +235,6 @@ std::shared_ptr<const ov::Model> CompiledModel::get_runtime_model() const {
229235
}
230236

231237
ov::Any CompiledModel::get_property(const std::string& name) const {
232-
if (m_graphs.empty()) {
233-
OPENVINO_THROW("No graph was found");
234-
}
235-
236-
if (name == ov::loaded_from_cache) {
237-
return m_loaded_from_cache;
238-
}
239-
240-
Config engConfig = get_graph()._graph.getConfig();
241-
auto option = engConfig._config.find(name);
242-
if (option != engConfig._config.end()) {
243-
return option->second;
244-
}
245-
246-
// @todo Can't we just use local copy (_cfg) instead?
247-
auto graphLock = get_graph();
248-
const auto& graph = graphLock._graph;
249-
const auto& config = graph.getConfig();
250-
251238
auto RO_property = [](const std::string& propertyName) {
252239
return ov::PropertyName(propertyName, ov::PropertyMutability::RO);
253240
};
@@ -285,98 +272,25 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
285272
}
286273

287274
if (name == ov::model_name) {
288-
// @todo Does not seem ok to 'dump()' the whole graph everytime in order to get a name
289-
const std::string modelName = graph.dump()->get_friendly_name();
290-
return decltype(ov::model_name)::value_type(modelName);
275+
return decltype(ov::model_name)::value_type {m_model_name};
276+
}
277+
if (name == ov::loaded_from_cache) {
278+
return decltype(ov::loaded_from_cache)::value_type {m_loaded_from_cache};
291279
}
292280
if (name == ov::optimal_number_of_infer_requests) {
293-
const auto streams = config.streamExecutorConfig.get_streams();
294-
return static_cast<decltype(ov::optimal_number_of_infer_requests)::value_type>(
281+
const auto streams = m_cfg.get_stream_executor_config().get_streams();
282+
return decltype(ov::optimal_number_of_infer_requests)::value_type(
295283
streams > 0 ? streams : 1); // ov::optimal_number_of_infer_requests has no negative values
296284
}
297-
if (name == ov::num_streams) {
298-
const auto streams = config.streamExecutorConfig.get_streams();
299-
return decltype(ov::num_streams)::value_type(
300-
streams); // ov::num_streams has special negative values (AUTO = -1, NUMA = -2)
301-
}
302-
if (name == ov::inference_num_threads) {
303-
const auto num_threads = config.streamExecutorConfig.get_threads();
304-
return static_cast<decltype(ov::inference_num_threads)::value_type>(num_threads);
305-
}
306-
if (name == ov::enable_profiling.name()) {
307-
const bool perfCount = config.collectPerfCounters;
308-
return static_cast<decltype(ov::enable_profiling)::value_type>(perfCount);
309-
}
310-
if (name == ov::hint::inference_precision) {
311-
return decltype(ov::hint::inference_precision)::value_type(config.inferencePrecision);
312-
}
313-
if (name == ov::hint::performance_mode) {
314-
return static_cast<decltype(ov::hint::performance_mode)::value_type>(config.hintPerfMode);
315-
}
316-
if (name == ov::log::level) {
317-
return static_cast<decltype(ov::log::level)::value_type>(config.logLevel);
318-
}
319-
if (name == ov::hint::enable_cpu_pinning.name()) {
320-
const bool use_pin = config.enableCpuPinning;
321-
return static_cast<decltype(ov::hint::enable_cpu_pinning)::value_type>(use_pin);
322-
}
323-
if (name == ov::hint::enable_cpu_reservation.name()) {
324-
const bool use_reserve = config.enableCpuReservation;
325-
return static_cast<decltype(ov::hint::enable_cpu_reservation)::value_type>(use_reserve);
326-
}
327-
if (name == ov::hint::scheduling_core_type) {
328-
const auto stream_mode = config.schedulingCoreType;
329-
return stream_mode;
330-
}
331-
if (name == ov::hint::model_distribution_policy) {
332-
const auto& distribution_policy = config.modelDistributionPolicy;
333-
return distribution_policy;
334-
}
335-
if (name == ov::hint::enable_hyper_threading.name()) {
336-
const bool use_ht = config.enableHyperThreading;
337-
return static_cast<decltype(ov::hint::enable_hyper_threading)::value_type>(use_ht);
338-
}
339-
if (name == ov::hint::execution_mode) {
340-
return config.executionMode;
341-
}
342-
if (name == ov::hint::num_requests) {
343-
return static_cast<decltype(ov::hint::num_requests)::value_type>(config.hintNumRequests);
344-
}
345285
if (name == ov::execution_devices) {
346286
return decltype(ov::execution_devices)::value_type{m_plugin->get_device_name()};
347287
}
348-
if (name == ov::intel_cpu::denormals_optimization) {
349-
return static_cast<decltype(ov::intel_cpu::denormals_optimization)::value_type>(
350-
config.denormalsOptMode == Config::DenormalsOptMode::DO_On);
351-
}
352-
if (name == ov::intel_cpu::sparse_weights_decompression_rate) {
353-
return static_cast<decltype(ov::intel_cpu::sparse_weights_decompression_rate)::value_type>(
354-
config.fcSparseWeiDecompressionRate);
355-
}
356-
if (name == ov::hint::dynamic_quantization_group_size) {
357-
return static_cast<decltype(ov::hint::dynamic_quantization_group_size)::value_type>(
358-
config.fcDynamicQuantizationGroupSize);
359-
}
360-
if (name == ov::hint::kv_cache_precision) {
361-
return decltype(ov::hint::kv_cache_precision)::value_type(config.kvCachePrecision);
362-
}
363-
if (name == ov::key_cache_precision) {
364-
return decltype(ov::key_cache_precision)::value_type(config.keyCachePrecision);
365-
}
366-
if (name == ov::value_cache_precision) {
367-
return decltype(ov::value_cache_precision)::value_type(config.valueCachePrecision);
368-
}
369-
if (name == ov::key_cache_group_size) {
370-
return static_cast<decltype(ov::key_cache_group_size)::value_type>(config.keyCacheGroupSize);
371-
}
372-
if (name == ov::value_cache_group_size) {
373-
return static_cast<decltype(ov::value_cache_group_size)::value_type>(config.valueCacheGroupSize);
374-
}
375-
OPENVINO_THROW("Unsupported property: ", name);
288+
289+
return m_cfg.get_property(name, OptionVisibility::RELEASE);
376290
}
377291

378292
void CompiledModel::export_model(std::ostream& modelStream) const {
379-
ModelSerializer serializer(modelStream, m_cfg.cacheEncrypt);
293+
ModelSerializer serializer(modelStream, m_cfg.get_cache_encryption_callbacks().encrypt);
380294
serializer << m_model;
381295
}
382296

src/plugins/intel_cpu/src/compiled_model.h

+2
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,8 @@ class CompiledModel : public ov::ICompiledModel {
9393
std::vector<std::shared_ptr<CompiledModel>> m_sub_compiled_models;
9494
std::shared_ptr<SubMemoryManager> m_sub_memory_manager = nullptr;
9595
bool m_has_sub_compiled_models = false;
96+
97+
std::string m_model_name;
9698
};
9799

98100
// This class provides safe access to the internal CompiledModel structures and helps to decouple SyncInferRequest and

0 commit comments

Comments
 (0)