Skip to content

Commit b2a471b

Browse files
authored
[intel-npu] Adding NPU_TURBO option to plugin (#25603)
### Details: - Adding npu_turbo option for intel-npu plugin ### Tickets: - [*ticket-id*](https://jira.devtools.intel.com/browse/CVS-147038)
1 parent 8a26cf1 commit b2a471b

File tree

18 files changed

+180
-35
lines changed

18 files changed

+180
-35
lines changed

docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst

+1
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ offer a limited set of supported OpenVINO features.
132132
ov::enable_profiling
133133
ov::workload_type
134134
ov::intel_npu::compilation_mode_params
135+
ov::intel_npu::turbo
135136
136137
.. tab-item:: Read-only properties
137138

src/inference/include/openvino/runtime/intel_npu/properties.hpp

+8
Original file line numberDiff line numberDiff line change
@@ -61,5 +61,13 @@ static constexpr ov::Property<uint32_t, ov::PropertyMutability::RO> driver_versi
6161
*/
6262
static constexpr ov::Property<std::string> compilation_mode_params{"NPU_COMPILATION_MODE_PARAMS"};
6363

64+
/**
65+
* @brief [Only for NPU plugin]
66+
* Type: std::bool
67+
* Set turbo on or off.
68+
* @ingroup ov_runtime_npu_prop_cpp_api
69+
*/
70+
static constexpr ov::Property<bool> turbo{"NPU_TURBO"};
71+
6472
} // namespace intel_npu
6573
} // namespace ov

src/plugins/intel_npu/README.md

+40
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,14 @@ The following properties are supported:
166166
| `ov::device::architecture`/</br>`DEVICE_ARCHITECTURE` | RO | Returns the platform information. | `N/A`| `N/A` |
167167
| `ov::device::full_name`/</br>`FULL_DEVICE_NAME` | RO | Returns the full name of the NPU device. | `N/A`| `N/A` |
168168
| `ov::internal::exclusive_async_requests`/</br>`EXCLUSIVE_ASYNC_REQUESTS` | RW | Allows to use exclusive task executor for asynchronous infer requests. | `YES`/ `NO`| `NO` |
169+
| `ov::device::type`/</br>`DEVICE_TYPE` | RO | Returns the type of device, discrete or integrated. | `DISCREETE` /</br>`INTEGRATED` | `N/A` |
170+
| `ov::device::gops`/</br>`DEVICE_GOPS` | RO | Returns the Giga OPS per second count (GFLOPS or GIOPS) for a set of precisions supported by specified device. | `N/A`| `N/A` |
171+
| `ov::device::pci_info`/</br>`DEVICE_PCI_INFO` | RO | Returns the PCI bus information of device. See PCIInfo struct definition for details | `N/A`| `N/A` |
172+
| `ov::intel_npu::device_alloc_mem_size`/</br>`NPU_DEVICE_ALLOC_MEM_SIZE` | RO | Size of already allocated NPU DDR memory (both for discrete/integrated NPU devices) | `N/A` | `N/A` |
173+
| `ov::intel_npu::device_total_mem_size`/</br>`NPU_DEVICE_TOTAL_MEM_SIZE` | RO | Size of available NPU DDR memory (both for discrete/integrated NPU devices) | `N/A` | `N/A` |
174+
| `ov::intel_npu::driver_version`/</br>`NPU_DRIVER_VERSION` | RO | NPU driver version (for both discrete/integrated NPU devices). | `N/A` | `N/A` |
175+
| `ov::intel_npu::compilation_mode_params`/</br>`NPU_COMPILATION_MODE_PARAMS` | RW | Set various parameters supported by the NPU compiler. (See bellow) | `<std::string>`| `N/A` |
176+
| `ov::intel_npu::turbo`/</br>`NPU_TURBO` | RW | Set Turbo mode on/off | `YES`/ `NO`| `NO` |
169177

170178
&nbsp;
171179
### Performance Hint: Default Number of DPU Groups / DMA Engines
@@ -192,6 +200,38 @@ The following table shows the optimal number of inference requests returned by t
192200
| 3720 | 4 | 1 |
193201
| 4000 | 8 | 1 |
194202

203+
&nbsp;
204+
### Compilation mode parameters
205+
``ov::intel_npu::compilation_mode_params`` is an NPU-specific property that allows to control model compilation for NPU.
206+
Note: The functionality is in experimental stage currently, can be a subject for deprecation and may be replaced with generic OV API in future OV releases.
207+
208+
Following configuration options are supported:
209+
210+
#### optimization-level
211+
Defines a preset of optimization passes to be applied during compilation. Supported values:
212+
213+
| Value | Description |
214+
| :--- | :--- |
215+
| 0 | Reduced subset of optimization passes. Smaller compile time. |
216+
| 1 | Default. Balanced performance/compile time. |
217+
| 2 | Prioritize performance over compile time that may be an issue. |
218+
219+
#### performance-hint-override
220+
An extension for LATENCY mode being specified using ``ov::hint::performance_mode``
221+
Has no effect for other ``ov::hint::PerformanceMode`` hints.
222+
223+
Supported values:
224+
225+
| Value | Description |
226+
| :--- | :--- |
227+
| efficiency | Default. Balanced performance and power consumption. |
228+
| latency | Prioritize performance over power efficiency. |
229+
230+
#### Usage example:
231+
```
232+
map<str, str> config = {ov::intel_npu::compilation_mode_params.name(), ov::Any("optimization-level=1 performance-hint-override=latency")};
233+
compile_model(model, config);
234+
```
195235

196236
&nbsp;
197237
## Stateful models

src/plugins/intel_npu/src/al/include/intel_npu/al/config/runtime.hpp

+17
Original file line numberDiff line numberDiff line change
@@ -204,4 +204,21 @@ struct WORKLOAD_TYPE final : OptionBase<WORKLOAD_TYPE, ov::WorkloadType> {
204204

205205
static std::string toString(const ov::WorkloadType& val);
206206
};
207+
208+
//
209+
// TURBO
210+
//
211+
struct TURBO final : OptionBase<TURBO, bool> {
212+
static std::string_view key() {
213+
return ov::intel_npu::turbo.name();
214+
}
215+
216+
static bool defaultValue() {
217+
return false;
218+
}
219+
220+
static OptionMode mode() {
221+
return OptionMode::RunTime;
222+
}
223+
};
207224
} // namespace intel_npu

src/plugins/intel_npu/src/al/include/npu.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class IEngineBackend : public std::enable_shared_from_this<IEngineBackend> {
3636
/** @brief Backend has support for concurrency batching */
3737
virtual bool isBatchingSupported() const = 0;
3838
/** @brief Backend has support for workload type */
39-
virtual bool isWorkloadTypeSupported() const = 0;
39+
virtual bool isCommandQueueExtSupported() const = 0;
4040
/** @brief Register backend-specific options */
4141
virtual void registerOptions(OptionsDesc& options) const;
4242

src/plugins/intel_npu/src/al/src/config/runtime.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ void intel_npu::registerRunTimeOptions(OptionsDesc& desc) {
2424
desc.add<NUM_STREAMS>();
2525
desc.add<ENABLE_CPU_PINNING>();
2626
desc.add<WORKLOAD_TYPE>();
27+
desc.add<TURBO>();
2728
}
2829

2930
// Heuristically obtained number. Varies depending on the values of PLATFORM and PERFORMANCE_HINT

src/plugins/intel_npu/src/backend/include/zero_backend.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ class ZeroEngineBackend final : public IEngineBackend {
2626
uint32_t getDriverExtVersion() const override;
2727

2828
bool isBatchingSupported() const override;
29-
bool isWorkloadTypeSupported() const override;
29+
bool isCommandQueueExtSupported() const override;
3030

3131
private:
3232
std::shared_ptr<ZeroInitStructsHolder> _instance;

src/plugins/intel_npu/src/backend/src/zero_backend.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ bool ZeroEngineBackend::isBatchingSupported() const {
3434
return _instance->getDriverExtVersion() >= ZE_GRAPH_EXT_VERSION_1_6;
3535
}
3636

37-
bool ZeroEngineBackend::isWorkloadTypeSupported() const {
37+
bool ZeroEngineBackend::isCommandQueueExtSupported() const {
3838
return _instance->getCommandQueueDdiTable() != nullptr;
3939
}
4040

src/plugins/intel_npu/src/backend/src/zero_wrappers.cpp

+9
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,15 @@ CommandQueue::CommandQueue(const ze_device_handle_t& device_handle,
116116
_log("CommandQueue", config.get<LOG_LEVEL>()) {
117117
ze_command_queue_desc_t queue_desc =
118118
{ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, nullptr, group_ordinal, 0, 0, ZE_COMMAND_QUEUE_MODE_DEFAULT, priority};
119+
if (config.has<TURBO>()) {
120+
if (_command_queue_npu_dditable_ext != nullptr) {
121+
bool turbo = config.get<TURBO>();
122+
ze_command_queue_desc_npu_ext_t turbo_cfg = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC_NPU_EXT, nullptr, turbo};
123+
queue_desc.pNext = &turbo_cfg;
124+
} else {
125+
OPENVINO_THROW("Turbo is not supported by the current driver");
126+
}
127+
}
119128
zeroUtils::throwOnFail("zeCommandQueueCreate",
120129
zeCommandQueueCreate(_context, device_handle, &queue_desc, &_handle));
121130
}

src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,10 @@ std::string LevelZeroCompilerInDriver<TableExtension>::serializeConfig(
512512
std::ostringstream workloadtypestr;
513513
workloadtypestr << ov::workload_type.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\S+" << VALUE_DELIMITER;
514514
content = std::regex_replace(content, std::regex(workloadtypestr.str()), "");
515+
// Remove turbo property as it is not used by compiler
516+
std::ostringstream turbostring;
517+
turbostring << ov::intel_npu::turbo.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\S+" << VALUE_DELIMITER;
518+
content = std::regex_replace(content, std::regex(turbostring.str()), "");
515519

516520
// FINAL step to convert prefixes of remaining params, to ensure backwards compatibility
517521
// From 5.0.0, driver compiler start to use NPU_ prefix, the old version uses VPU_ prefix

src/plugins/intel_npu/src/plugin/include/backends.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ class NPUBackends final {
3232
uint32_t getDriverVersion() const;
3333
uint32_t getDriverExtVersion() const;
3434
bool isBatchingSupported() const;
35-
bool isWorkloadTypeSupported() const;
35+
bool isCommandQueueExtSupported() const;
3636
void registerOptions(OptionsDesc& options) const;
3737
std::string getCompilationPlatform(const std::string_view platform, const std::string& deviceId) const;
3838

src/plugins/intel_npu/src/plugin/src/backends.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -163,9 +163,9 @@ bool NPUBackends::isBatchingSupported() const {
163163
return false;
164164
}
165165

166-
bool NPUBackends::isWorkloadTypeSupported() const {
166+
bool NPUBackends::isCommandQueueExtSupported() const {
167167
if (_backend != nullptr) {
168-
return _backend->isWorkloadTypeSupported();
168+
return _backend->isCommandQueueExtSupported();
169169
}
170170

171171
return false;

src/plugins/intel_npu/src/plugin/src/compiled_model.cpp

+7-1
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ void CompiledModel::configure_stream_executors() {
209209
}
210210

211211
void CompiledModel::initialize_properties() {
212-
const auto& pluginSupportedProperties =
212+
const auto pluginSupportedProperties =
213213
get_plugin()->get_property(ov::supported_properties.name(), {}).as<std::vector<ov::PropertyName>>();
214214
const auto& isPropertySupported = [&pluginSupportedProperties](const std::string& name) {
215215
return std::any_of(pluginSupportedProperties.begin(),
@@ -328,6 +328,12 @@ void CompiledModel::initialize_properties() {
328328
[](const Config& config) {
329329
return config.get<COMPILATION_MODE_PARAMS>();
330330
}}},
331+
{ov::intel_npu::turbo.name(),
332+
{isPropertySupported(ov::intel_npu::turbo.name()),
333+
ov::PropertyMutability::RO,
334+
[](const Config& config) {
335+
return config.get<TURBO>();
336+
}}},
331337
// NPU Private
332338
// =========
333339
{ov::intel_npu::tiles.name(),

src/plugins/intel_npu/src/plugin/src/plugin.cpp

+7-1
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ Plugin::Plugin()
307307
return _metrics->GetAvailableDevicesNames();
308308
}}},
309309
{ov::workload_type.name(),
310-
{_backends->isWorkloadTypeSupported(),
310+
{_backends->isCommandQueueExtSupported(),
311311
ov::PropertyMutability::RW,
312312
[](const Config& config) {
313313
return config.get<WORKLOAD_TYPE>();
@@ -448,6 +448,12 @@ Plugin::Plugin()
448448
[](const Config& config) {
449449
return config.get<COMPILATION_MODE_PARAMS>();
450450
}}},
451+
{ov::intel_npu::turbo.name(),
452+
{_backends->isCommandQueueExtSupported(),
453+
ov::PropertyMutability::RW,
454+
[](const Config& config) {
455+
return config.get<TURBO>();
456+
}}},
451457
// NPU Private
452458
// =========
453459
{ov::intel_npu::dma_engines.name(),
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// Copyright (C) 2018-2024 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#include "overload/compile_and_infer.hpp"
6+
7+
#include <npu_private_properties.hpp>
8+
9+
#include "common/npu_test_env_cfg.hpp"
10+
#include "common/utils.hpp"
11+
12+
namespace {
13+
14+
using namespace ov::test::behavior;
15+
16+
const std::vector<ov::AnyMap> configs = {{}};
17+
18+
INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTests,
19+
OVCompileAndInferRequest,
20+
::testing::Combine(::testing::Values(getConstantGraph(ov::element::f32)),
21+
::testing::Values(ov::test::utils::DEVICE_NPU),
22+
::testing::ValuesIn(configs)),
23+
ov::test::utils::appendPlatformTypeTestName<OVCompileAndInferRequest>);
24+
25+
INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTests,
26+
OVCompileAndInferRequestTurbo,
27+
::testing::Combine(::testing::Values(getConstantGraph(ov::element::f32)),
28+
::testing::Values(ov::test::utils::DEVICE_NPU),
29+
::testing::ValuesIn(std::vector<ov::AnyMap>{
30+
{ov::intel_npu::create_executor(0)},
31+
{ov::intel_npu::create_executor(1)}})),
32+
ov::test::utils::appendPlatformTypeTestName<OVCompileAndInferRequestTurbo>);
33+
34+
} // namespace

src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp

+45-4
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ inline std::shared_ptr<ov::Model> getConstantGraph(element::Type type) {
3333
return std::make_shared<Model>(results, params);
3434
}
3535

36-
inline bool isWorkloadTypeSupported() {
36+
inline bool isCommandQueueExtSupported() {
3737
return std::make_shared<::intel_npu::ZeroInitStructsHolder>()->getCommandQueueDdiTable() != nullptr;
3838
}
3939

@@ -100,7 +100,7 @@ TEST_P(OVCompileAndInferRequest, PluginWorkloadType) {
100100
return property == workload_type.name();
101101
});
102102

103-
if (isWorkloadTypeSupported()) {
103+
if (isCommandQueueExtSupported()) {
104104
ASSERT_TRUE(workloadTypeSupported);
105105
ov::InferRequest req;
106106
OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration));
@@ -137,7 +137,7 @@ TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadType) {
137137
return property == workload_type.name();
138138
});
139139

140-
if (isWorkloadTypeSupported()) {
140+
if (isCommandQueueExtSupported()) {
141141
ASSERT_TRUE(workloadTypeSupported);
142142
OV_ASSERT_NO_THROW(execNet.set_property(modelConfiguration));
143143
ov::InferRequest req;
@@ -165,7 +165,7 @@ TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadTypeDelayedExecutor) {
165165
modelConfiguration[workload_type.name()] = WorkloadType::DEFAULT;
166166
OV_ASSERT_NO_THROW(execNet.set_property(modelConfiguration));
167167

168-
if (isWorkloadTypeSupported()) {
168+
if (isCommandQueueExtSupported()) {
169169
ov::InferRequest req;
170170
OV_ASSERT_NO_THROW(req = execNet.create_infer_request());
171171
bool is_called = false;
@@ -183,6 +183,47 @@ TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadTypeDelayedExecutor) {
183183
}
184184
}
185185

186+
using OVCompileAndInferRequestTurbo = OVCompileAndInferRequest;
187+
188+
TEST_P(OVCompileAndInferRequestTurbo, CompiledModelTurbo) {
189+
configuration[intel_npu::turbo.name()] = true;
190+
191+
auto supportedProperties = core->get_property("NPU", supported_properties.name()).as<std::vector<PropertyName>>();
192+
bool isTurboSupported =
193+
std::any_of(supportedProperties.begin(), supportedProperties.end(), [](const PropertyName& property) {
194+
return property == intel_npu::turbo.name();
195+
});
196+
197+
if (isCommandQueueExtSupported()) {
198+
ASSERT_TRUE(isTurboSupported);
199+
OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration));
200+
auto turbosetting_compiled_model = execNet.get_property(intel_npu::turbo.name());
201+
OV_ASSERT_NO_THROW(turbosetting_compiled_model = true);
202+
ov::InferRequest req;
203+
OV_ASSERT_NO_THROW(req = execNet.create_infer_request());
204+
bool is_called = false;
205+
OV_ASSERT_NO_THROW(req.set_callback([&](std::exception_ptr exception_ptr) {
206+
ASSERT_EQ(exception_ptr, nullptr);
207+
is_called = true;
208+
}));
209+
OV_ASSERT_NO_THROW(req.start_async());
210+
OV_ASSERT_NO_THROW(req.wait());
211+
ASSERT_TRUE(is_called);
212+
} else {
213+
auto cr_ex = configuration.find(intel_npu::create_executor.name());
214+
if (cr_ex->second.as<int64_t>() == 1) {
215+
OV_EXPECT_THROW_HAS_SUBSTRING(core->compile_model(function, target_device, configuration),
216+
ov::Exception,
217+
"Turbo is not supported by the current driver");
218+
} else {
219+
OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration));
220+
OV_EXPECT_THROW_HAS_SUBSTRING(execNet.create_infer_request(),
221+
ov::Exception,
222+
"Turbo is not supported by the current driver");
223+
}
224+
}
225+
}
226+
186227
} // namespace behavior
187228
} // namespace test
188229
} // namespace ov

src/plugins/intel_npu/tests/functional/shared_tests_instances/behavior/ov_infer_request/compile_and_infer.cpp

-22
This file was deleted.

0 commit comments

Comments
 (0)