[intel-npu] Adding NPU_TURBO option to plugin (#25603)

csoka · web-flow · commit b2a471b5158d · 2024-07-18T20:47:18.000Z
### Details: - Adding npu_turbo option for intel-npu plugin ### Tickets: - [*ticket-id*](https://jira.devtools.intel.com/browse/CVS-147038)
diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst
@@ -132,6 +132,7 @@ offer a limited set of supported OpenVINO features.
          ov::enable_profiling
          ov::workload_type
          ov::intel_npu::compilation_mode_params
+         ov::intel_npu::turbo
 
    .. tab-item:: Read-only properties
 
diff --git a/src/inference/include/openvino/runtime/intel_npu/properties.hpp b/src/inference/include/openvino/runtime/intel_npu/properties.hpp
@@ -61,5 +61,13 @@ static constexpr ov::Property<uint32_t, ov::PropertyMutability::RO> driver_versi
  */
 static constexpr ov::Property<std::string> compilation_mode_params{"NPU_COMPILATION_MODE_PARAMS"};
 
+/**
+ * @brief [Only for NPU plugin]
+ * Type: std::bool
+ * Set turbo on or off.
+ * @ingroup ov_runtime_npu_prop_cpp_api
+ */
+static constexpr ov::Property<bool> turbo{"NPU_TURBO"};
+
 }  // namespace intel_npu
 }  // namespace ov
diff --git a/src/plugins/intel_npu/README.md b/src/plugins/intel_npu/README.md
@@ -166,6 +166,14 @@ The following properties are supported:
 | `ov::device::architecture`/</br>`DEVICE_ARCHITECTURE` | RO | Returns the platform information. | `N/A`| `N/A` |
 | `ov::device::full_name`/</br>`FULL_DEVICE_NAME` | RO | Returns the full name of the NPU device. | `N/A`| `N/A` |
 | `ov::internal::exclusive_async_requests`/</br>`EXCLUSIVE_ASYNC_REQUESTS` | RW | Allows to use exclusive task executor for asynchronous infer requests. | `YES`/ `NO`| `NO` |
+| `ov::device::type`/</br>`DEVICE_TYPE` | RO | Returns the type of device, discrete or integrated. | `DISCREETE` /</br>`INTEGRATED` | `N/A` |
+| `ov::device::gops`/</br>`DEVICE_GOPS` | RO | Returns the Giga OPS per second count (GFLOPS or GIOPS) for a set of precisions supported by specified device. | `N/A`| `N/A` |
+| `ov::device::pci_info`/</br>`DEVICE_PCI_INFO` | RO | Returns the PCI bus information of device. See PCIInfo struct definition for details | `N/A`| `N/A` |
+| `ov::intel_npu::device_alloc_mem_size`/</br>`NPU_DEVICE_ALLOC_MEM_SIZE` | RO | Size of already allocated NPU DDR memory (both for discrete/integrated NPU devices) | `N/A` | `N/A` |
+| `ov::intel_npu::device_total_mem_size`/</br>`NPU_DEVICE_TOTAL_MEM_SIZE` | RO | Size of available NPU DDR memory (both for discrete/integrated NPU devices) | `N/A` | `N/A` |
+| `ov::intel_npu::driver_version`/</br>`NPU_DRIVER_VERSION` | RO | NPU driver version (for both discrete/integrated NPU devices). | `N/A` | `N/A` |
+| `ov::intel_npu::compilation_mode_params`/</br>`NPU_COMPILATION_MODE_PARAMS` | RW | Set various parameters supported by the NPU compiler. (See bellow) | `<std::string>`| `N/A` |
+| `ov::intel_npu::turbo`/</br>`NPU_TURBO` | RW | Set Turbo mode on/off | `YES`/ `NO`| `NO` |
 
 &nbsp;
 ### Performance Hint: Default Number of DPU Groups / DMA Engines
@@ -192,6 +200,38 @@ The following table shows the optimal number of inference requests returned by t
 | 3720                | 4                                           | 1                                       |
 | 4000                | 8                                           | 1                                       |
 
+&nbsp;
+### Compilation mode parameters
+``ov::intel_npu::compilation_mode_params`` is an NPU-specific property that allows to control model compilation for NPU.
+Note: The functionality is in experimental stage currently, can be a subject for deprecation and may be replaced with generic OV API in future OV releases.
+
+Following configuration options are supported:
+
+#### optimization-level
+Defines a preset of optimization passes to be applied during compilation. Supported values:
+
+| Value  | Description                                                    |
+| :---   | :---                                                           | 
+| 0      | Reduced subset of optimization passes. Smaller compile time.   |
+| 1      | Default. Balanced performance/compile time.                    |
+| 2      | Prioritize performance over compile time that may be an issue. |
+
+#### performance-hint-override
+An extension for LATENCY mode being specified using ``ov::hint::performance_mode``
+Has no effect for other ``ov::hint::PerformanceMode`` hints.
+
+Supported values:
+
+| Value      | Description                                          |
+| :---       | :---                                                 | 
+| efficiency | Default. Balanced performance and power consumption. |
+| latency    | Prioritize performance over power efficiency.        |
+
+#### Usage example:
+```
+    map<str, str> config = {ov::intel_npu::compilation_mode_params.name(), ov::Any("optimization-level=1 performance-hint-override=latency")};
+    compile_model(model, config);
+```
 
 &nbsp;
 ## Stateful models
diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/al/config/runtime.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/al/config/runtime.hpp
@@ -204,4 +204,21 @@ struct WORKLOAD_TYPE final : OptionBase<WORKLOAD_TYPE, ov::WorkloadType> {
 
     static std::string toString(const ov::WorkloadType& val);
 };
+
+//
+// TURBO
+//
+struct TURBO final : OptionBase<TURBO, bool> {
+    static std::string_view key() {
+        return ov::intel_npu::turbo.name();
+    }
+
+    static bool defaultValue() {
+        return false;
+    }
+
+    static OptionMode mode() {
+        return OptionMode::RunTime;
+    }
+};
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/al/include/npu.hpp b/src/plugins/intel_npu/src/al/include/npu.hpp
@@ -36,7 +36,7 @@ class IEngineBackend : public std::enable_shared_from_this<IEngineBackend> {
     /** @brief Backend has support for concurrency batching */
     virtual bool isBatchingSupported() const = 0;
     /** @brief Backend has support for workload type */
-    virtual bool isWorkloadTypeSupported() const = 0;
+    virtual bool isCommandQueueExtSupported() const = 0;
     /** @brief Register backend-specific options */
     virtual void registerOptions(OptionsDesc& options) const;
 
diff --git a/src/plugins/intel_npu/src/al/src/config/runtime.cpp b/src/plugins/intel_npu/src/al/src/config/runtime.cpp
@@ -24,6 +24,7 @@ void intel_npu::registerRunTimeOptions(OptionsDesc& desc) {
     desc.add<NUM_STREAMS>();
     desc.add<ENABLE_CPU_PINNING>();
     desc.add<WORKLOAD_TYPE>();
+    desc.add<TURBO>();
 }
 
 // Heuristically obtained number. Varies depending on the values of PLATFORM and PERFORMANCE_HINT
diff --git a/src/plugins/intel_npu/src/backend/include/zero_backend.hpp b/src/plugins/intel_npu/src/backend/include/zero_backend.hpp
@@ -26,7 +26,7 @@ class ZeroEngineBackend final : public IEngineBackend {
     uint32_t getDriverExtVersion() const override;
 
     bool isBatchingSupported() const override;
-    bool isWorkloadTypeSupported() const override;
+    bool isCommandQueueExtSupported() const override;
 
 private:
     std::shared_ptr<ZeroInitStructsHolder> _instance;
diff --git a/src/plugins/intel_npu/src/backend/src/zero_backend.cpp b/src/plugins/intel_npu/src/backend/src/zero_backend.cpp
@@ -34,7 +34,7 @@ bool ZeroEngineBackend::isBatchingSupported() const {
     return _instance->getDriverExtVersion() >= ZE_GRAPH_EXT_VERSION_1_6;
 }
 
-bool ZeroEngineBackend::isWorkloadTypeSupported() const {
+bool ZeroEngineBackend::isCommandQueueExtSupported() const {
     return _instance->getCommandQueueDdiTable() != nullptr;
 }
 
diff --git a/src/plugins/intel_npu/src/backend/src/zero_wrappers.cpp b/src/plugins/intel_npu/src/backend/src/zero_wrappers.cpp
@@ -116,6 +116,15 @@ CommandQueue::CommandQueue(const ze_device_handle_t& device_handle,
       _log("CommandQueue", config.get<LOG_LEVEL>()) {
     ze_command_queue_desc_t queue_desc =
         {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, nullptr, group_ordinal, 0, 0, ZE_COMMAND_QUEUE_MODE_DEFAULT, priority};
+    if (config.has<TURBO>()) {
+        if (_command_queue_npu_dditable_ext != nullptr) {
+            bool turbo = config.get<TURBO>();
+            ze_command_queue_desc_npu_ext_t turbo_cfg = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC_NPU_EXT, nullptr, turbo};
+            queue_desc.pNext = &turbo_cfg;
+        } else {
+            OPENVINO_THROW("Turbo is not supported by the current driver");
+        }
+    }
     zeroUtils::throwOnFail("zeCommandQueueCreate",
                            zeCommandQueueCreate(_context, device_handle, &queue_desc, &_handle));
 }
diff --git a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp
@@ -512,6 +512,10 @@ std::string LevelZeroCompilerInDriver<TableExtension>::serializeConfig(
     std::ostringstream workloadtypestr;
     workloadtypestr << ov::workload_type.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\S+" << VALUE_DELIMITER;
     content = std::regex_replace(content, std::regex(workloadtypestr.str()), "");
+    // Remove turbo property as it is not used by compiler
+    std::ostringstream turbostring;
+    turbostring << ov::intel_npu::turbo.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\S+" << VALUE_DELIMITER;
+    content = std::regex_replace(content, std::regex(turbostring.str()), "");
 
     // FINAL step to convert prefixes of remaining params, to ensure backwards compatibility
     // From 5.0.0, driver compiler start to use NPU_ prefix, the old version uses VPU_ prefix
diff --git a/src/plugins/intel_npu/src/plugin/include/backends.hpp b/src/plugins/intel_npu/src/plugin/include/backends.hpp
@@ -32,7 +32,7 @@ class NPUBackends final {
     uint32_t getDriverVersion() const;
     uint32_t getDriverExtVersion() const;
     bool isBatchingSupported() const;
-    bool isWorkloadTypeSupported() const;
+    bool isCommandQueueExtSupported() const;
     void registerOptions(OptionsDesc& options) const;
     std::string getCompilationPlatform(const std::string_view platform, const std::string& deviceId) const;
 
diff --git a/src/plugins/intel_npu/src/plugin/src/backends.cpp b/src/plugins/intel_npu/src/plugin/src/backends.cpp
@@ -163,9 +163,9 @@ bool NPUBackends::isBatchingSupported() const {
     return false;
 }
 
-bool NPUBackends::isWorkloadTypeSupported() const {
+bool NPUBackends::isCommandQueueExtSupported() const {
     if (_backend != nullptr) {
-        return _backend->isWorkloadTypeSupported();
+        return _backend->isCommandQueueExtSupported();
     }
 
     return false;
diff --git a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp
@@ -209,7 +209,7 @@ void CompiledModel::configure_stream_executors() {
 }
 
 void CompiledModel::initialize_properties() {
-    const auto& pluginSupportedProperties =
+    const auto pluginSupportedProperties =
         get_plugin()->get_property(ov::supported_properties.name(), {}).as<std::vector<ov::PropertyName>>();
     const auto& isPropertySupported = [&pluginSupportedProperties](const std::string& name) {
         return std::any_of(pluginSupportedProperties.begin(),
@@ -328,6 +328,12 @@ void CompiledModel::initialize_properties() {
           [](const Config& config) {
               return config.get<COMPILATION_MODE_PARAMS>();
           }}},
+        {ov::intel_npu::turbo.name(),
+         {isPropertySupported(ov::intel_npu::turbo.name()),
+          ov::PropertyMutability::RO,
+          [](const Config& config) {
+              return config.get<TURBO>();
+          }}},
         // NPU Private
         // =========
         {ov::intel_npu::tiles.name(),
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -307,7 +307,7 @@ Plugin::Plugin()
               return _metrics->GetAvailableDevicesNames();
           }}},
         {ov::workload_type.name(),
-         {_backends->isWorkloadTypeSupported(),
+         {_backends->isCommandQueueExtSupported(),
           ov::PropertyMutability::RW,
           [](const Config& config) {
               return config.get<WORKLOAD_TYPE>();
@@ -448,6 +448,12 @@ Plugin::Plugin()
           [](const Config& config) {
               return config.get<COMPILATION_MODE_PARAMS>();
           }}},
+        {ov::intel_npu::turbo.name(),
+         {_backends->isCommandQueueExtSupported(),
+          ov::PropertyMutability::RW,
+          [](const Config& config) {
+              return config.get<TURBO>();
+          }}},
         // NPU Private
         // =========
         {ov::intel_npu::dma_engines.name(),
diff --git a/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/compile_and_infer.cpp b/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/compile_and_infer.cpp
@@ -0,0 +1,34 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "overload/compile_and_infer.hpp"
+
+#include <npu_private_properties.hpp>
+
+#include "common/npu_test_env_cfg.hpp"
+#include "common/utils.hpp"
+
+namespace {
+
+using namespace ov::test::behavior;
+
+const std::vector<ov::AnyMap> configs = {{}};
+
+INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTests,
+                         OVCompileAndInferRequest,
+                         ::testing::Combine(::testing::Values(getConstantGraph(ov::element::f32)),
+                                            ::testing::Values(ov::test::utils::DEVICE_NPU),
+                                            ::testing::ValuesIn(configs)),
+                         ov::test::utils::appendPlatformTypeTestName<OVCompileAndInferRequest>);
+
+INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTests,
+                         OVCompileAndInferRequestTurbo,
+                         ::testing::Combine(::testing::Values(getConstantGraph(ov::element::f32)),
+                                            ::testing::Values(ov::test::utils::DEVICE_NPU),
+                                            ::testing::ValuesIn(std::vector<ov::AnyMap>{
+                                                {ov::intel_npu::create_executor(0)},
+                                                {ov::intel_npu::create_executor(1)}})),
+                         ov::test::utils::appendPlatformTypeTestName<OVCompileAndInferRequestTurbo>);
+
+}  // namespace
diff --git a/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp b/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp
@@ -33,7 +33,7 @@ inline std::shared_ptr<ov::Model> getConstantGraph(element::Type type) {
     return std::make_shared<Model>(results, params);
 }
 
-inline bool isWorkloadTypeSupported() {
+inline bool isCommandQueueExtSupported() {
     return std::make_shared<::intel_npu::ZeroInitStructsHolder>()->getCommandQueueDdiTable() != nullptr;
 }
 
@@ -100,7 +100,7 @@ TEST_P(OVCompileAndInferRequest, PluginWorkloadType) {
             return property == workload_type.name();
         });
 
-    if (isWorkloadTypeSupported()) {
+    if (isCommandQueueExtSupported()) {
         ASSERT_TRUE(workloadTypeSupported);
         ov::InferRequest req;
         OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration));
@@ -137,7 +137,7 @@ TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadType) {
             return property == workload_type.name();
         });
 
-    if (isWorkloadTypeSupported()) {
+    if (isCommandQueueExtSupported()) {
         ASSERT_TRUE(workloadTypeSupported);
         OV_ASSERT_NO_THROW(execNet.set_property(modelConfiguration));
         ov::InferRequest req;
@@ -165,7 +165,7 @@ TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadTypeDelayedExecutor) {
     modelConfiguration[workload_type.name()] = WorkloadType::DEFAULT;
     OV_ASSERT_NO_THROW(execNet.set_property(modelConfiguration));
 
-    if (isWorkloadTypeSupported()) {
+    if (isCommandQueueExtSupported()) {
         ov::InferRequest req;
         OV_ASSERT_NO_THROW(req = execNet.create_infer_request());
         bool is_called = false;
@@ -183,6 +183,47 @@ TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadTypeDelayedExecutor) {
     }
 }
 
+using OVCompileAndInferRequestTurbo = OVCompileAndInferRequest;
+
+TEST_P(OVCompileAndInferRequestTurbo, CompiledModelTurbo) {
+    configuration[intel_npu::turbo.name()] = true;
+
+    auto supportedProperties = core->get_property("NPU", supported_properties.name()).as<std::vector<PropertyName>>();
+    bool isTurboSupported =
+        std::any_of(supportedProperties.begin(), supportedProperties.end(), [](const PropertyName& property) {
+            return property == intel_npu::turbo.name();
+        });
+
+    if (isCommandQueueExtSupported()) {
+        ASSERT_TRUE(isTurboSupported);
+        OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration));
+        auto turbosetting_compiled_model = execNet.get_property(intel_npu::turbo.name());
+        OV_ASSERT_NO_THROW(turbosetting_compiled_model = true);
+        ov::InferRequest req;
+        OV_ASSERT_NO_THROW(req = execNet.create_infer_request());
+        bool is_called = false;
+        OV_ASSERT_NO_THROW(req.set_callback([&](std::exception_ptr exception_ptr) {
+            ASSERT_EQ(exception_ptr, nullptr);
+            is_called = true;
+        }));
+        OV_ASSERT_NO_THROW(req.start_async());
+        OV_ASSERT_NO_THROW(req.wait());
+        ASSERT_TRUE(is_called);
+    } else {
+        auto cr_ex = configuration.find(intel_npu::create_executor.name());
+        if (cr_ex->second.as<int64_t>() == 1) {
+            OV_EXPECT_THROW_HAS_SUBSTRING(core->compile_model(function, target_device, configuration),
+                                          ov::Exception,
+                                          "Turbo is not supported by the current driver");
+        } else {
+            OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration));
+            OV_EXPECT_THROW_HAS_SUBSTRING(execNet.create_infer_request(),
+                                          ov::Exception,
+                                          "Turbo is not supported by the current driver");
+        }
+    }
+}
+
 }  // namespace behavior
 }  // namespace test
 }  // namespace ov
diff --git a/src/plugins/intel_npu/tests/functional/shared_tests_instances/behavior/ov_infer_request/compile_and_infer.cpp b/src/plugins/intel_npu/tests/functional/shared_tests_instances/behavior/ov_infer_request/compile_and_infer.cpp
diff --git a/src/plugins/intel_npu/thirdparty/level-zero-ext b/src/plugins/intel_npu/thirdparty/level-zero-ext
@@ -1 +1 @@
-Subproject commit 518d64125521cd0f8c98d65f9a0fb40013e95d15
+Subproject commit 16c85231a82ee1a0b06ed7ab7da3f411a0878ed7

Original file line number	Diff line number	Diff line change
`@@ -24,6 +24,7 @@ void intel_npu::registerRunTimeOptions(OptionsDesc& desc) {`
`24`	`24`	`desc.add<NUM_STREAMS>();`
`25`	`25`	`desc.add<ENABLE_CPU_PINNING>();`
`26`	`26`	`desc.add<WORKLOAD_TYPE>();`
	`27`	`+ desc.add<TURBO>();`
`27`	`28`	`}`
`28`	`29`
`29`	`30`	`// Heuristically obtained number. Varies depending on the values of PLATFORM and PERFORMANCE_HINT`
Original file line number	Diff line number	Diff line change
`@@ -34,7 +34,7 @@ bool ZeroEngineBackend::isBatchingSupported() const {`
`34`	`34`	`return _instance->getDriverExtVersion() >= ZE_GRAPH_EXT_VERSION_1_6;`
`35`	`35`	`}`
`36`	`36`
`37`		`-bool ZeroEngineBackend::isWorkloadTypeSupported() const {`
	`37`	`+bool ZeroEngineBackend::isCommandQueueExtSupported() const {`
`38`	`38`	`return _instance->getCommandQueueDdiTable() != nullptr;`
`39`	`39`	`}`
`40`	`40`
Original file line number	Diff line number	Diff line change
`@@ -163,9 +163,9 @@ bool NPUBackends::isBatchingSupported() const {`
`163`	`163`	`return false;`
`164`	`164`	`}`
`165`	`165`
`166`		`-bool NPUBackends::isWorkloadTypeSupported() const {`
	`166`	`+bool NPUBackends::isCommandQueueExtSupported() const {`
`167`	`167`	`if (_backend != nullptr) {`
`168`		`- return _backend->isWorkloadTypeSupported();`
	`168`	`+ return _backend->isCommandQueueExtSupported();`
`169`	`169`	`}`
`170`	`170`
`171`	`171`	`return false;`