Skip to content

Commit b274da8

Browse files
authored
[intel-npu] Adding NPU_QDQ_OPTIMIZATION property (#29377)
### Details: - Adding new property NPU_QDQ_OPTIMIZATION for enabling/disabling additional optimizations and balances performance and accuracy for QDQ format models, quantized using ONNX Runtime - intends to map to "NPU_COMPILATION_MODE_PARAMS":"enable-adaptive-stripping=true" ### Tickets: - *EISW-159283*
1 parent f79b04d commit b274da8

File tree

9 files changed

+66
-0
lines changed

9 files changed

+66
-0
lines changed

docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst

+1
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ offer a limited set of supported OpenVINO features.
144144
ov::workload_type
145145
ov::intel_npu::compilation_mode_params
146146
ov::intel_npu::compiler_dynamic_quantization
147+
ov::intel_npu::qdq_optimization
147148
ov::intel_npu::turbo
148149
ov::intel_npu::tiles
149150
ov::intel_npu::max_tiles

src/bindings/python/src/pyopenvino/core/properties/properties.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -337,4 +337,5 @@ void regmodule_properties(py::module m) {
337337
wrap_property_RW(m_intel_npu, ov::intel_npu::bypass_umd_caching, "bypass_umd_caching");
338338
wrap_property_RW(m_intel_npu, ov::intel_npu::defer_weights_load, "defer_weights_load");
339339
wrap_property_RW(m_intel_npu, ov::intel_npu::compiler_dynamic_quantization, "compiler_dynamic_quantization");
340+
wrap_property_RW(m_intel_npu, ov::intel_npu::qdq_optimization, "qdq_optimization");
340341
}

src/inference/include/openvino/runtime/intel_npu/properties.hpp

+9
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,15 @@ static constexpr ov::Property<std::string> compilation_mode_params{"NPU_COMPILAT
7777
*/
7878
static constexpr ov::Property<bool> compiler_dynamic_quantization{"NPU_COMPILER_DYNAMIC_QUANTIZATION"};
7979

80+
/**
81+
* @brief [Only for NPU compiler]
82+
* Type: boolean
83+
* This option enables additional optimizations and balances performance and accuracy for QDQ format models, quantized
84+
* using ONNX Runtime
85+
* @ingroup ov_runtime_npu_prop_cpp_api
86+
*/
87+
static constexpr ov::Property<bool> qdq_optimization{"NPU_QDQ_OPTIMIZATION"};
88+
8089
/**
8190
* @brief [Only for NPU plugin]
8291
* Type: std::bool

src/plugins/intel_npu/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ The following properties are supported:
174174
| `ov::intel_npu::compiler_version`/</br>`NPU_COMPILER_VERSION` | RO | NPU compiler version. MSB 16 bits are Major version, LSB 16 bits are Minor version | `N/A` | `N/A` |
175175
| `ov::intel_npu::compilation_mode_params`/</br>`NPU_COMPILATION_MODE_PARAMS` | RW | Set various parameters supported by the NPU compiler. (See bellow) | `<std::string>`| `N/A` |
176176
| `ov::intel_npu::compiler_dynamic_quantization`/</br>`NPU_COMPILER_DYNAMIC_QUANTIZATION` | RW | Enable/Disable dynamic quantization by NPU compiler | `YES` / `NO` | `N/A` |
177+
| `ov::intel_npu::qdq_optimization`/</br>`NPU_QDQ_OPTIMIZATION` | RW | Enable/Disable additional optimizations and balances performance and accuracy for QDQ format models, quantized using ONNX Runtime | `YES` / `NO` | `NO` |
177178
| `ov::intel_npu::turbo`/</br>`NPU_TURBO` | RW | Set Turbo mode on/off | `YES`/ `NO`| `NO` |
178179
| `ov::intel_npu::tiles`/</br>`NPU_TILES` | RW | Sets the number of npu tiles to compile the model for | `[0-]` | `-1` |
179180
| `ov::intel_npu::max_tiles`/</br>`NPU_MAX_TILES` | RW | Maximum number of tiles supported by the device we compile for. Can be set for offline compilation. If not set, it will be populated by driver.| `[0-]` | `[1-6] depends on npu platform` |

src/plugins/intel_npu/src/al/include/intel_npu/config/compiler.hpp

+22
Original file line numberDiff line numberDiff line change
@@ -379,4 +379,26 @@ struct COMPILER_DYNAMIC_QUANTIZATION final : OptionBase<COMPILER_DYNAMIC_QUANTIZ
379379
}
380380
};
381381

382+
//
383+
// NPU_QDQ_OPTIMIZATION
384+
//
385+
386+
struct QDQ_OPTIMIZATION final : OptionBase<QDQ_OPTIMIZATION, bool> {
387+
static std::string_view key() {
388+
return ov::intel_npu::qdq_optimization.name();
389+
}
390+
391+
static bool defaultValue() {
392+
return false;
393+
}
394+
395+
static OptionMode mode() {
396+
return OptionMode::CompileTime;
397+
}
398+
399+
static bool isPublic() {
400+
return true;
401+
}
402+
};
403+
382404
} // namespace intel_npu

src/plugins/intel_npu/src/al/src/config/compiler.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ void intel_npu::registerCompilerOptions(OptionsDesc& desc) {
2525
desc.add<DYNAMIC_SHAPE_TO_STATIC>();
2626
desc.add<EXECUTION_MODE_HINT>();
2727
desc.add<COMPILER_DYNAMIC_QUANTIZATION>();
28+
desc.add<QDQ_OPTIMIZATION>();
2829
}
2930

3031
//

src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp

+10
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,16 @@ std::string DriverCompilerAdapter::serializeConfig(const Config& config,
552552
content = std::regex_replace(content, std::regex(dqstr.str()), "");
553553
}
554554

555+
// QDQ_OPTIMIZATION is not supported in versions < 7.5 - need to remove it
556+
if ((compilerVersion.major < 7) || (compilerVersion.major == 7 && compilerVersion.minor < 5)) {
557+
std::ostringstream qdqstr;
558+
qdqstr << ov::intel_npu::qdq_optimization.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\S+"
559+
<< VALUE_DELIMITER;
560+
logger.warning("NPU_QDQ_OPTIMIZATION property is not supported by this compiler version. Removing from "
561+
"parameters");
562+
content = std::regex_replace(content, std::regex(qdqstr.str()), "");
563+
}
564+
555565
// NPU_DEFER_WEIGHTS_LOAD is needed at runtime only
556566
{
557567
std::ostringstream batchstr;

src/plugins/intel_npu/src/plugin/src/compiled_model.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,12 @@ void CompiledModel::initialize_properties() {
271271
[](const Config& config) {
272272
return config.get<COMPILER_DYNAMIC_QUANTIZATION>();
273273
}}},
274+
{ov::intel_npu::qdq_optimization.name(),
275+
{true,
276+
ov::PropertyMutability::RO,
277+
[](const Config& config) {
278+
return config.get<QDQ_OPTIMIZATION>();
279+
}}},
274280
{ov::intel_npu::turbo.name(),
275281
{isPropertySupported(ov::intel_npu::turbo.name()),
276282
ov::PropertyMutability::RO,

src/plugins/intel_npu/src/plugin/src/plugin.cpp

+15
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,12 @@ Plugin::Plugin()
462462
[](const Config& config) {
463463
return config.get<COMPILER_DYNAMIC_QUANTIZATION>();
464464
}}},
465+
{ov::intel_npu::qdq_optimization.name(),
466+
{false,
467+
ov::PropertyMutability::RW,
468+
[](const Config& config) {
469+
return config.get<QDQ_OPTIMIZATION>();
470+
}}},
465471
{ov::intel_npu::turbo.name(),
466472
{_backends->isCommandQueueExtSupported(),
467473
ov::PropertyMutability::RW,
@@ -613,6 +619,15 @@ void Plugin::reset_compiler_dependent_properties() const {
613619
std::get<0>(_properties[ov::intel_npu::compiler_dynamic_quantization.name()]) = false; // mark unsupported
614620
}
615621
}
622+
// NPU_QDQ_OPTIMIZATION
623+
// unpublish if compiler version requirement is not met
624+
if (_properties.find(ov::intel_npu::qdq_optimization.name()) != _properties.end()) {
625+
if (active_compiler_version >= ICOMPILER_MAKE_VERSION(7, 5)) {
626+
std::get<0>(_properties[ov::intel_npu::qdq_optimization.name()]) = true; /// mark supported
627+
} else {
628+
std::get<0>(_properties[ov::intel_npu::qdq_optimization.name()]) = false; // mark unsupported
629+
}
630+
}
616631
}
617632

618633
void Plugin::set_property(const ov::AnyMap& properties) {

0 commit comments

Comments
 (0)