Skip to content

Commit ec9dfae

Browse files
[GPU] Dynamic element type instead of undefined. Earlier config finalize call (#29127)
### Details: - Call config.finalize() in program's c-tor earlier to ensure that options are actually applied in unit tests. - Replaced deprecated `undefined` element type with `dynamic` Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
1 parent 810589a commit ec9dfae

File tree

4 files changed

+6
-6
lines changed

4 files changed

+6
-6
lines changed

src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,8 @@ struct weightless_cache_manager {
153153

154154
size_t bin_offset = SIZE_MAX;
155155
size_t original_size = SIZE_MAX;
156-
ov::element::Type original_dtype = ov::element::Type_t::undefined;
157-
ov::element::Type curr_dtype = ov::element::Type_t::undefined;
156+
ov::element::Type original_dtype = ov::element::Type_t::dynamic;
157+
ov::element::Type curr_dtype = ov::element::Type_t::dynamic;
158158
ov::Shape shape{};
159159

160160
bool should_run_reorder() const {

src/plugins/intel_gpu/include/intel_gpu/runtime/options.inl

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ OV_CONFIG_RELEASE_OPTION(ov::device, id, "0", "ID of the current device")
88
OV_CONFIG_RELEASE_OPTION(ov, cache_dir, "", "Directory where model cache can be stored. Caching is disabled if empty")
99
OV_CONFIG_RELEASE_OPTION(ov, num_streams, 1, "Number of streams to be used for inference")
1010
OV_CONFIG_RELEASE_OPTION(ov, compilation_num_threads, std::max(1, static_cast<int>(std::thread::hardware_concurrency())), "Max number of CPU threads used for model compilation for the stages that supports parallelism")
11-
OV_CONFIG_RELEASE_OPTION(ov::hint, inference_precision, ov::element::f16, "Model floating-point inference precision. Supported values: { f16, f32, undefined }", [](ov::element::Type t) { return t == ov::element::f16 || t == ov::element::f32 || t == ov::element::undefined; })
11+
OV_CONFIG_RELEASE_OPTION(ov::hint, inference_precision, ov::element::f16, "Model floating-point inference precision. Supported values: { f16, f32, dynamic }", [](ov::element::Type t) { return t == ov::element::f16 || t == ov::element::f32 || t == ov::element::dynamic; })
1212
OV_CONFIG_RELEASE_OPTION(ov::hint, model_priority, ov::hint::Priority::MEDIUM, "High-level hint that defines the priority of the model. It may impact number of threads used for model compilton and inference as well as device queue settings")
1313
OV_CONFIG_RELEASE_OPTION(ov::hint, performance_mode, ov::hint::PerformanceMode::LATENCY, "High-level hint that defines target model inference mode. It may impact number of streams, auto batching, etc")
1414
OV_CONFIG_RELEASE_OPTION(ov::hint, execution_mode, ov::hint::ExecutionMode::PERFORMANCE, "High-level hint that defines the most important metric for the model. Performance mode allows unsafe optimizations that may reduce the model accuracy")
@@ -26,7 +26,7 @@ OV_CONFIG_RELEASE_OPTION(ov::internal, query_model_ratio, 1.0f, "")
2626
OV_CONFIG_RELEASE_OPTION(ov, cache_mode, ov::CacheMode::OPTIMIZE_SPEED, "Cache mode defines the trade-off between the model compilation time and the disk space required for the cache")
2727
OV_CONFIG_RELEASE_OPTION(ov, cache_encryption_callbacks, ov::EncryptionCallbacks{}, "Callbacks used to encrypt/decrypt the model")
2828
OV_CONFIG_RELEASE_OPTION(ov::hint, dynamic_quantization_group_size, 0, "")
29-
OV_CONFIG_RELEASE_OPTION(ov::hint, kv_cache_precision, ov::element::undefined, "")
29+
OV_CONFIG_RELEASE_OPTION(ov::hint, kv_cache_precision, ov::element::dynamic, "")
3030
OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_kernels_reuse, false, "")
3131
OV_CONFIG_RELEASE_OPTION(ov, weights_path, "", "Path to the model weights file used for weightless caching")
3232
OV_CONFIG_RELEASE_OPTION(ov::hint, activations_scale_factor, -1.0f, "Scalar floating point value that is used for runtime activation tensor scaling with fp16 inference precision")

src/plugins/intel_gpu/src/graph/program.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -154,13 +154,13 @@ program::program(engine& engine_ref,
154154
_is_body_program(is_body_program),
155155
_compilation_context(compilation_context) {
156156
init_primitives();
157+
_config.finalize(_engine);
157158
GPU_DEBUG_INFO << "Program config\n" << _config.to_string();
158159
init_program();
159160
prepare_nodes(topology);
160161
program_node::reset_unique_id();
161162
if (no_optimizations) {
162163
init_graph();
163-
_config.finalize(_engine);
164164
} else {
165165
build_program(is_internal);
166166
if (_is_body_program) {

src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ TEST_P(CheckWeightlessCacheAccuracyLowPrecision, MatmulWeightsDecompression) {
203203
ov::element::f32,
204204
model_dtype,
205205
ov::element::f32,
206-
ov::element::undefined,
206+
ov::element::dynamic,
207207
true,
208208
ov::test::DecompressionType::full,
209209
ov::test::DecompressionType::full,

0 commit comments

Comments
 (0)