Skip to content

Commit bd91467

Browse files
[AUTO] Refine the logic of creating HW plugins in AUTO (openvinotoolkit#27691)
### Details: - The core needs to pass the `ov::device::priorities` into meta plugin to avoid creating the unnecessary HW plugin when the core is checking if meta plugin supports caching property. - AUTO only creates/holds the HW plugins based on the device list that is specified from user - AUTO collects the capabilities from specified device candidate list rather than from all of registered devices in core. ### Tickets: - CVS-157496 --------- Co-authored-by: Wanglei Shen <wanglei.shen@intel.com>
1 parent 914cd0c commit bd91467

14 files changed

+268
-134
lines changed

src/inference/src/dev/core_impl.cpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -774,7 +774,7 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::shared_ptr<
774774
// will consume ov::cache_dir if plugin not support it
775775
auto cacheManager = parsed._core_config.get_cache_config_for_device(plugin, parsed._config)._cacheManager;
776776
// Skip caching for proxy plugin. HW plugin will load network from the cache
777-
if (cacheManager && device_supports_model_caching(plugin) && !is_proxy_device(plugin)) {
777+
if (cacheManager && device_supports_model_caching(plugin, parsed._config) && !is_proxy_device(plugin)) {
778778
CacheContent cacheContent{cacheManager, parsed._core_config.get_enable_mmap()};
779779
cacheContent.blobId = ov::ModelCache::compute_hash(model, create_compile_config(plugin, parsed._config));
780780
std::unique_ptr<CacheGuardEntry> lock = cacheGuard.get_hash_lock(cacheContent.blobId);
@@ -808,7 +808,7 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::shared_ptr<
808808
// will consume ov::cache_dir if plugin not support it
809809
auto cacheManager = parsed._core_config.get_cache_config_for_device(plugin, parsed._config)._cacheManager;
810810
// Skip caching for proxy plugin. HW plugin will load network from the cache
811-
if (cacheManager && device_supports_model_caching(plugin) && !is_proxy_device(plugin)) {
811+
if (cacheManager && device_supports_model_caching(plugin, parsed._config) && !is_proxy_device(plugin)) {
812812
CacheContent cacheContent{cacheManager, parsed._core_config.get_enable_mmap()};
813813
cacheContent.blobId = ov::ModelCache::compute_hash(model, create_compile_config(plugin, parsed._config));
814814
std::unique_ptr<CacheGuardEntry> lock = cacheGuard.get_hash_lock(cacheContent.blobId);
@@ -832,7 +832,7 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::string& mod
832832
// will consume ov::cache_dir if plugin not support it
833833
auto cacheManager = parsed._core_config.get_cache_config_for_device(plugin, parsed._config)._cacheManager;
834834

835-
if (cacheManager && device_supports_model_caching(plugin) && !is_proxy_device(plugin)) {
835+
if (cacheManager && device_supports_model_caching(plugin, parsed._config) && !is_proxy_device(plugin)) {
836836
// Skip caching for proxy plugin. HW plugin will load network from the cache
837837
CoreConfig::remove_core_skip_cache_dir(parsed._config);
838838
CacheContent cacheContent{cacheManager, parsed._core_config.get_enable_mmap(), model_path};
@@ -860,7 +860,7 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::string& mod
860860
// will consume ov::cache_dir if plugin not support it
861861
auto cacheManager = parsed._core_config.get_cache_config_for_device(plugin, parsed._config)._cacheManager;
862862
// Skip caching for proxy plugin. HW plugin will load network from the cache
863-
if (cacheManager && device_supports_model_caching(plugin) && !is_proxy_device(plugin)) {
863+
if (cacheManager && device_supports_model_caching(plugin, parsed._config) && !is_proxy_device(plugin)) {
864864
CacheContent cacheContent{cacheManager, parsed._core_config.get_enable_mmap()};
865865
cacheContent.blobId =
866866
ov::ModelCache::compute_hash(model_str, weights, create_compile_config(plugin, parsed._config));
@@ -1378,8 +1378,8 @@ bool ov::CoreImpl::device_supports_internal_property(const ov::Plugin& plugin, c
13781378
return util::contains(plugin.get_property(ov::internal::supported_properties), key);
13791379
}
13801380

1381-
bool ov::CoreImpl::device_supports_model_caching(const ov::Plugin& plugin) const {
1382-
return plugin.supports_model_caching();
1381+
bool ov::CoreImpl::device_supports_model_caching(const ov::Plugin& plugin, const ov::AnyMap& arguments) const {
1382+
return plugin.supports_model_caching(arguments);
13831383
}
13841384

13851385
bool ov::CoreImpl::device_supports_cache_dir(const ov::Plugin& plugin) const {

src/inference/src/dev/core_impl.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ class CoreImpl : public ov::ICore, public std::enable_shared_from_this<ov::ICore
216216
const ov::SoPtr<ov::IRemoteContext>& context,
217217
std::function<ov::SoPtr<ov::ICompiledModel>()> compile_model_lambda) const;
218218

219-
bool device_supports_model_caching(const ov::Plugin& plugin) const;
219+
bool device_supports_model_caching(const ov::Plugin& plugin, const ov::AnyMap& origConfig = {}) const;
220220

221221
bool device_supports_property(const ov::Plugin& plugin, const ov::PropertyName& key) const;
222222
bool device_supports_internal_property(const ov::Plugin& plugin, const ov::PropertyName& key) const;

src/inference/src/dev/plugin.cpp

+5-4
Original file line numberDiff line numberDiff line change
@@ -101,10 +101,11 @@ ov::Any ov::Plugin::get_property(const std::string& name, const AnyMap& argument
101101
return {m_ptr->get_property(name, arguments), {m_so}};
102102
}
103103

104-
bool ov::Plugin::supports_model_caching() const {
104+
bool ov::Plugin::supports_model_caching(const ov::AnyMap& arguments) const {
105105
bool supported(false);
106-
supported = util::contains(get_property(ov::supported_properties), ov::device::capabilities) &&
107-
util::contains(get_property(ov::device::capabilities), ov::device::capability::EXPORT_IMPORT) &&
108-
util::contains(get_property(ov::internal::supported_properties), ov::internal::caching_properties);
106+
supported =
107+
util::contains(get_property(ov::supported_properties), ov::device::capabilities) &&
108+
util::contains(get_property(ov::device::capabilities, arguments), ov::device::capability::EXPORT_IMPORT) &&
109+
util::contains(get_property(ov::internal::supported_properties), ov::internal::caching_properties);
109110
return supported;
110111
}

src/inference/src/dev/plugin.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ class Plugin {
7474
T get_property(const ov::Property<T, M>& property, const AnyMap& arguments) const {
7575
return get_property(property.name(), arguments).template as<T>();
7676
}
77-
bool supports_model_caching() const;
77+
bool supports_model_caching(const AnyMap& arguments = {}) const;
7878
};
7979

8080
} // namespace ov

src/plugins/auto/src/auto_schedule.cpp

+10-1
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ void AutoSchedule::init() {
101101
auto load_device_task = [&](AutoCompileContext* context_ptr, const std::shared_ptr<ov::Model>& model) {
102102
try_to_compile_model(*context_ptr, model);
103103
if (context_ptr->m_is_load_success) {
104+
// release cloned model here
105+
const_cast<std::shared_ptr<ov::Model>&>(model).reset();
104106
if (context_ptr->m_worker_name.empty()) {
105107
context_ptr->m_worker_name = context_ptr->m_device_info.device_name;
106108
}
@@ -187,12 +189,14 @@ void AutoSchedule::init() {
187189
} else {
188190
customize_helper_context_from_cache_setting(is_actual_cpu, m_compile_context, m_context);
189191
}
192+
std::shared_ptr<ov::Model> model;
190193
// initialize the rest members of load context
191194
for (int i = 0; i < CONTEXTNUM; i++) {
192195
if (m_compile_context[i].m_is_enabled) {
193196
m_compile_context[i].m_future = m_compile_context[i].m_promise.get_future();
194197
auto* context_ptr = &m_compile_context[i];
195-
auto model = m_context->m_model;
198+
// clone this model if multi HW plugins need to load model in a background thread
199+
model = !model ? m_context->m_model : m_context->m_model->clone();
196200
m_compile_context[i].m_task = std::bind(load_device_task, context_ptr, model);
197201
}
198202
}
@@ -297,6 +301,11 @@ void AutoSchedule::init() {
297301
// only one device need to compile model, do not need to compile it async
298302
m_compile_context[ACTUALDEVICE].m_task();
299303
m_passthrough_compiled_model = m_compile_context[ACTUALDEVICE].m_compiled_model;
304+
if (!m_context->m_bind_buffer) {
305+
m_worker_requests.clear();
306+
m_idle_worker_requests.clear();
307+
m_infer_pipeline_tasks_device_specific.clear();
308+
}
300309
}
301310
m_context->m_hw_compiled_model = wait_first_compiled_model_ready();
302311
}

0 commit comments

Comments
 (0)