27
27
#include " utils/debug_capabilities.h"
28
28
#include " utils/memory_stats_dump.hpp"
29
29
#include " utils/serialize.hpp"
30
+ #include " utils/denormals.hpp"
30
31
31
32
#if defined(OV_CPU_WITH_ACL)
32
33
# include " nodes/executors/acl/acl_ie_scheduler.hpp"
@@ -63,28 +64,30 @@ CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
63
64
m_cfg{std::move (cfg)},
64
65
m_name{model->get_name ()},
65
66
m_loaded_from_cache (loaded_from_cache),
66
- m_sub_memory_manager (std::move(sub_memory_manager)) {
67
+ m_sub_memory_manager (std::move(sub_memory_manager)),
68
+ m_model_name (model->get_friendly_name ()) {
67
69
m_mutex = std::make_shared<std::mutex>();
68
70
const auto & core = m_plugin->get_core ();
69
71
if (!core) {
70
72
OPENVINO_THROW (" Unable to get API version. Core is unavailable" );
71
73
}
72
74
75
+
73
76
IStreamsExecutor::Config executor_config;
74
- if (m_cfg.exclusiveAsyncRequests ) {
77
+ if (m_cfg.get_exclusive_async_requests () ) {
75
78
// special case when all InferRequests are muxed into a single queue
76
79
m_task_executor = m_plugin->get_executor_manager ()->get_executor (" CPU" );
77
80
} else {
78
- executor_config = m_cfg.numSubStreams > 0 ? IStreamsExecutor::Config{" CPUMainStreamExecutor" ,
81
+ executor_config = m_cfg.get_num_sub_streams () > 0 ? IStreamsExecutor::Config{" CPUMainStreamExecutor" ,
79
82
1 ,
80
83
1 ,
81
84
ov::hint::SchedulingCoreType::ANY_CORE,
82
85
false ,
83
86
true }
84
- : m_cfg.streamExecutorConfig ;
87
+ : m_cfg.get_stream_executor_config () ;
85
88
m_task_executor = m_plugin->get_executor_manager ()->get_idle_cpu_streams_executor (executor_config);
86
89
}
87
- if (0 != m_cfg.streamExecutorConfig .get_streams ()) {
90
+ if (0 != m_cfg.get_stream_executor_config () .get_streams ()) {
88
91
m_callback_executor = m_plugin->get_executor_manager ()->get_idle_cpu_streams_executor (
89
92
IStreamsExecutor::Config{" CPUCallbackExecutor" , 1 , 0 });
90
93
} else {
@@ -126,34 +129,33 @@ CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
126
129
} else {
127
130
CompiledModel::get_graph ();
128
131
}
129
- if (m_cfg.numSubStreams > 0 ) {
132
+
133
+ if (m_cfg.get_num_sub_streams () > 0 ) {
130
134
m_has_sub_compiled_models = true ;
131
- auto sub_cfg = m_cfg;
132
- sub_cfg.numSubStreams = 0 ;
133
- sub_cfg.enableNodeSplit = true ;
134
- auto streams_info_table = m_cfg.streamExecutorConfig .get_streams_info_table ();
135
135
auto message = message_manager ();
136
- m_sub_memory_manager = std::make_shared<SubMemoryManager>(m_cfg.numSubStreams );
137
- message->set_num_sub_streams (m_cfg.numSubStreams );
138
- for (int i = 0 ; i < m_cfg.numSubStreams ; i++) {
139
- std::vector<std::vector<int >> sub_streams_table;
140
- sub_streams_table.push_back (streams_info_table[i + 1 ]);
141
- sub_streams_table[0 ][NUMBER_OF_STREAMS] = 1 ;
142
- sub_cfg.streamExecutorConfig = IStreamsExecutor::Config{" CPUStreamsExecutor" ,
143
- 1 ,
144
- 1 ,
145
- ov::hint::SchedulingCoreType::ANY_CORE,
146
- false ,
147
- true ,
148
- true ,
149
- std::move (sub_streams_table),
150
- sub_cfg.streamsRankTable [i]};
136
+ m_sub_memory_manager = std::make_shared<SubMemoryManager>(m_cfg.get_num_sub_streams ());
137
+ message->set_num_sub_streams (m_cfg.get_num_sub_streams ());
138
+ for (int i = 0 ; i < m_cfg.get_num_sub_streams (); i++) {
139
+ auto sub_cfg = m_cfg.clone (i, true );
151
140
m_sub_compiled_models.push_back (
152
141
std::make_shared<CompiledModel>(model, plugin, sub_cfg, loaded_from_cache, m_sub_memory_manager));
153
142
}
154
143
}
155
144
}
156
145
146
+ static bool set_denormals_optimization (const ov::intel_cpu::DenormalsOptimization& value){
147
+ if (dnnl::impl::cpu::x64::mayiuse (dnnl::impl::cpu::x64::sse41)) {
148
+ if (value.m_mode == DenormalsOptimization::Mode::ON) {
149
+ flush_to_zero (true );
150
+ return denormals_as_zero (true );
151
+ } else if (value.m_mode == DenormalsOptimization::Mode::OFF) {
152
+ flush_to_zero (false );
153
+ denormals_as_zero (false );
154
+ }
155
+ }
156
+ return false ;
157
+ }
158
+
157
159
CompiledModel::GraphGuard::Lock CompiledModel::get_graph () const {
158
160
int streamId = 0 ;
159
161
int socketId = 0 ;
@@ -170,11 +172,15 @@ CompiledModel::GraphGuard::Lock CompiledModel::get_graph() const {
170
172
GraphContext::Ptr ctx;
171
173
{
172
174
std::lock_guard<std::mutex> lock{*m_mutex.get ()};
173
- auto isQuantizedFlag = (m_cfg.lpTransformsMode == Config::On ) &&
175
+ auto isQuantizedFlag = (m_cfg.get_enable_lp_transformations () ) &&
174
176
ov::pass::low_precision::LowPrecision::isFunctionQuantized (m_model);
177
+ // SSE runtime check is needed for some ATOM machine, which is x86-64 but w/o SSE
178
+
179
+ bool denormalsAsZero = set_denormals_optimization (m_cfg.get_denormals_optimization ());
175
180
ctx = std::make_shared<GraphContext>(m_cfg,
176
181
m_socketWeights[socketId],
177
182
isQuantizedFlag,
183
+ denormalsAsZero,
178
184
streamsExecutor,
179
185
m_sub_memory_manager);
180
186
}
@@ -229,25 +235,6 @@ std::shared_ptr<const ov::Model> CompiledModel::get_runtime_model() const {
229
235
}
230
236
231
237
ov::Any CompiledModel::get_property (const std::string& name) const {
232
- if (m_graphs.empty ()) {
233
- OPENVINO_THROW (" No graph was found" );
234
- }
235
-
236
- if (name == ov::loaded_from_cache) {
237
- return m_loaded_from_cache;
238
- }
239
-
240
- Config engConfig = get_graph ()._graph .getConfig ();
241
- auto option = engConfig._config .find (name);
242
- if (option != engConfig._config .end ()) {
243
- return option->second ;
244
- }
245
-
246
- // @todo Can't we just use local copy (_cfg) instead?
247
- auto graphLock = get_graph ();
248
- const auto & graph = graphLock._graph ;
249
- const auto & config = graph.getConfig ();
250
-
251
238
auto RO_property = [](const std::string& propertyName) {
252
239
return ov::PropertyName (propertyName, ov::PropertyMutability::RO);
253
240
};
@@ -285,98 +272,25 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
285
272
}
286
273
287
274
if (name == ov::model_name) {
288
- // @todo Does not seem ok to 'dump()' the whole graph everytime in order to get a name
289
- const std::string modelName = graph.dump ()->get_friendly_name ();
290
- return decltype (ov::model_name)::value_type (modelName);
275
+ return decltype (ov::model_name)::value_type {m_model_name};
276
+ }
277
+ if (name == ov::loaded_from_cache) {
278
+ return decltype (ov::loaded_from_cache)::value_type {m_loaded_from_cache};
291
279
}
292
280
if (name == ov::optimal_number_of_infer_requests) {
293
- const auto streams = config. streamExecutorConfig .get_streams ();
294
- return static_cast < decltype (ov::optimal_number_of_infer_requests)::value_type> (
281
+ const auto streams = m_cfg. get_stream_executor_config () .get_streams ();
282
+ return decltype (ov::optimal_number_of_infer_requests)::value_type (
295
283
streams > 0 ? streams : 1 ); // ov::optimal_number_of_infer_requests has no negative values
296
284
}
297
- if (name == ov::num_streams) {
298
- const auto streams = config.streamExecutorConfig .get_streams ();
299
- return decltype (ov::num_streams)::value_type (
300
- streams); // ov::num_streams has special negative values (AUTO = -1, NUMA = -2)
301
- }
302
- if (name == ov::inference_num_threads) {
303
- const auto num_threads = config.streamExecutorConfig .get_threads ();
304
- return static_cast <decltype (ov::inference_num_threads)::value_type>(num_threads);
305
- }
306
- if (name == ov::enable_profiling.name ()) {
307
- const bool perfCount = config.collectPerfCounters ;
308
- return static_cast <decltype (ov::enable_profiling)::value_type>(perfCount);
309
- }
310
- if (name == ov::hint::inference_precision) {
311
- return decltype (ov::hint::inference_precision)::value_type (config.inferencePrecision );
312
- }
313
- if (name == ov::hint::performance_mode) {
314
- return static_cast <decltype (ov::hint::performance_mode)::value_type>(config.hintPerfMode );
315
- }
316
- if (name == ov::log ::level) {
317
- return static_cast <decltype (ov::log ::level)::value_type>(config.logLevel );
318
- }
319
- if (name == ov::hint::enable_cpu_pinning.name ()) {
320
- const bool use_pin = config.enableCpuPinning ;
321
- return static_cast <decltype (ov::hint::enable_cpu_pinning)::value_type>(use_pin);
322
- }
323
- if (name == ov::hint::enable_cpu_reservation.name ()) {
324
- const bool use_reserve = config.enableCpuReservation ;
325
- return static_cast <decltype (ov::hint::enable_cpu_reservation)::value_type>(use_reserve);
326
- }
327
- if (name == ov::hint::scheduling_core_type) {
328
- const auto stream_mode = config.schedulingCoreType ;
329
- return stream_mode;
330
- }
331
- if (name == ov::hint::model_distribution_policy) {
332
- const auto & distribution_policy = config.modelDistributionPolicy ;
333
- return distribution_policy;
334
- }
335
- if (name == ov::hint::enable_hyper_threading.name ()) {
336
- const bool use_ht = config.enableHyperThreading ;
337
- return static_cast <decltype (ov::hint::enable_hyper_threading)::value_type>(use_ht);
338
- }
339
- if (name == ov::hint::execution_mode) {
340
- return config.executionMode ;
341
- }
342
- if (name == ov::hint::num_requests) {
343
- return static_cast <decltype (ov::hint::num_requests)::value_type>(config.hintNumRequests );
344
- }
345
285
if (name == ov::execution_devices) {
346
286
return decltype (ov::execution_devices)::value_type{m_plugin->get_device_name ()};
347
287
}
348
- if (name == ov::intel_cpu::denormals_optimization) {
349
- return static_cast <decltype (ov::intel_cpu::denormals_optimization)::value_type>(
350
- config.denormalsOptMode == Config::DenormalsOptMode::DO_On);
351
- }
352
- if (name == ov::intel_cpu::sparse_weights_decompression_rate) {
353
- return static_cast <decltype (ov::intel_cpu::sparse_weights_decompression_rate)::value_type>(
354
- config.fcSparseWeiDecompressionRate );
355
- }
356
- if (name == ov::hint::dynamic_quantization_group_size) {
357
- return static_cast <decltype (ov::hint::dynamic_quantization_group_size)::value_type>(
358
- config.fcDynamicQuantizationGroupSize );
359
- }
360
- if (name == ov::hint::kv_cache_precision) {
361
- return decltype (ov::hint::kv_cache_precision)::value_type (config.kvCachePrecision );
362
- }
363
- if (name == ov::key_cache_precision) {
364
- return decltype (ov::key_cache_precision)::value_type (config.keyCachePrecision );
365
- }
366
- if (name == ov::value_cache_precision) {
367
- return decltype (ov::value_cache_precision)::value_type (config.valueCachePrecision );
368
- }
369
- if (name == ov::key_cache_group_size) {
370
- return static_cast <decltype (ov::key_cache_group_size)::value_type>(config.keyCacheGroupSize );
371
- }
372
- if (name == ov::value_cache_group_size) {
373
- return static_cast <decltype (ov::value_cache_group_size)::value_type>(config.valueCacheGroupSize );
374
- }
375
- OPENVINO_THROW (" Unsupported property: " , name);
288
+
289
+ return m_cfg.get_property (name, OptionVisibility::RELEASE);
376
290
}
377
291
378
292
void CompiledModel::export_model (std::ostream& modelStream) const {
379
- ModelSerializer serializer (modelStream, m_cfg.cacheEncrypt );
293
+ ModelSerializer serializer (modelStream, m_cfg.get_cache_encryption_callbacks (). encrypt );
380
294
serializer << m_model;
381
295
}
382
296
0 commit comments