diff --git a/src/common/util/include/openvino/util/mmap_object.hpp b/src/common/util/include/openvino/util/mmap_object.hpp index 5cfc2adac1ec0f..364e1eed4ca712 100644 --- a/src/common/util/include/openvino/util/mmap_object.hpp +++ b/src/common/util/include/openvino/util/mmap_object.hpp @@ -9,6 +9,7 @@ #pragma once +#include #include #include @@ -50,4 +51,17 @@ std::shared_ptr load_mmap_object(const std::wstring& path); #endif // OPENVINO_ENABLE_UNICODE_PATH_SUPPORT +class MmapStream final : public std::ifstream { +public: + MmapStream(const std::string& path) : std::ifstream(path, std::ios_base::binary) { + m_memory = ov::load_mmap_object(path); + } + +#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT + MmapStream(const std::wstring& path); +#endif // OPENVINO_ENABLE_UNICODE_PATH_SUPPORT + + std::shared_ptr m_memory; +}; + } // namespace ov diff --git a/src/common/util/src/os/win/win_mmap_object.cpp b/src/common/util/src/os/win/win_mmap_object.cpp index 114ef6cd9cd6a7..0b14d7ac774700 100644 --- a/src/common/util/src/os/win/win_mmap_object.cpp +++ b/src/common/util/src/os/win/win_mmap_object.cpp @@ -142,6 +142,10 @@ std::shared_ptr load_mmap_object(const std::wstring& path) { return holder; } +MmapStream::MmapStream(const std::wstring& path) : std::ifstream(path.data(), std::ios_base::binary) { + m_memory = ov::load_mmap_object(path); +} + #endif } // namespace ov diff --git a/src/core/dev_api/openvino/runtime/shared_buffer.hpp b/src/core/dev_api/openvino/runtime/shared_buffer.hpp index c4420b9e3fa822..7f1e2e9ba7601f 100644 --- a/src/core/dev_api/openvino/runtime/shared_buffer.hpp +++ b/src/core/dev_api/openvino/runtime/shared_buffer.hpp @@ -8,7 +8,7 @@ namespace ov { -/// \brief SharedBuffer class to store pointer to pre-acclocated buffer. +/// \brief SharedBuffer class to store pointer to pre-allocated buffer. template class SharedBuffer : public ov::AlignedBuffer { public: diff --git a/src/frontends/ir/src/frontend.cpp b/src/frontends/ir/src/frontend.cpp index c1f80f102e5a87..db979a35d932af 100644 --- a/src/frontends/ir/src/frontend.cpp +++ b/src/frontends/ir/src/frontend.cpp @@ -37,19 +37,17 @@ size_t get_ir_version(const pugi::xml_document& doc) { return 0; } +constexpr size_t HEADER_SIZE_LIM = 512lu; + /** * @brief Extracts IR version from model stream * @param model Model's stream * @return IR version, 0 if model does represent IR */ -size_t get_ir_version(std::istream& model) { +size_t get_ir_version(const char* model, size_t model_size) { // IR version is a value of root tag attribuite thought not need to parse the whole stream. - std::array header{}; - model.seekg(0, model.beg); - model.read(header.data(), header.size()); - model.clear(); - model.seekg(0, model.beg); + size_t header_size = model_size > HEADER_SIZE_LIM ? HEADER_SIZE_LIM : model_size; pugi::xml_document doc; // For dominant number of IRs `load_buffer' in this case returns parsing-error as 512 is not enough for the whole @@ -57,15 +55,32 @@ size_t get_ir_version(std::istream& model) { // tree is not destroyed - despite the fact that load function returns error, you can use the part of the tree that // was successfully parsed." root node is processed because it should be enough to read model version. However if IR // is small enough to fit 512 bytes ok-status is returned. Thus ignoring returned value. - std::ignore = - doc.load_buffer(header.data(), header.size(), pugi::parse_default | pugi::parse_fragment, pugi::encoding_utf8); + std::ignore = doc.load_buffer(model, header_size, pugi::parse_default | pugi::parse_fragment, pugi::encoding_utf8); auto ir_version = get_ir_version(doc); // In case attribute name is very long and placed before version attribute of root node or there is long comment // node before root node then version attribute of root node is not accesible within first 512 bytes, so read the // whole stream and try to obtain version value. - if (ir_version == 0) { + if (ir_version == 0lu && header_size < model_size && + doc.load_buffer(model, model_size, pugi::parse_default | pugi::parse_fragment, pugi::encoding_utf8)) { + ir_version = get_ir_version(doc); + } + + return ir_version; +} + +size_t get_ir_version(std::istream& model) { + char header[HEADER_SIZE_LIM]; + + model.seekg(0, model.beg); + model.read(header, HEADER_SIZE_LIM); + model.clear(); + model.seekg(0, model.beg); + + auto ir_version = get_ir_version(header, HEADER_SIZE_LIM); + if (ir_version == 0lu) { + pugi::xml_document doc; if (doc.load(model)) ir_version = get_ir_version(doc); @@ -75,6 +90,7 @@ size_t get_ir_version(std::istream& model) { return ir_version; } + } // namespace bool FrontEnd::supported_impl(const std::vector& variants) const { @@ -82,6 +98,7 @@ bool FrontEnd::supported_impl(const std::vector& variants) const { size_t extra_variants_num = variants.size() > 0 && variants[variants.size() - 1].is() ? 1 : 0; std::ifstream local_model_stream; std::istream* provided_model_stream = nullptr; + std::shared_ptr model_buffer = nullptr; if (variants.empty() || variants.size() > 3 + extra_variants_num) { return false; @@ -102,6 +119,8 @@ bool FrontEnd::supported_impl(const std::vector& variants) const { provided_model_stream = model_variant.as(); } else if (model_variant.is()) { provided_model_stream = model_variant.as(); + } else if (model_variant.is>()) { + model_buffer = model_variant.as>(); } if (provided_model_stream && local_model_stream.is_open()) { @@ -114,6 +133,8 @@ bool FrontEnd::supported_impl(const std::vector& variants) const { } else if (local_model_stream.is_open()) { version = get_ir_version(local_model_stream); local_model_stream.close(); + } else if (model_buffer) { + version = get_ir_version(model_buffer->get_ptr(), model_buffer->size()); } else { return false; } @@ -135,6 +156,7 @@ void FrontEnd::add_extension(const ov::Extension::Ptr& ext) { InputModel::Ptr FrontEnd::load_impl(const std::vector& variants) const { std::ifstream local_model_stream; std::istream* provided_model_stream = nullptr; + std::shared_ptr model_buf; std::shared_ptr weights; auto create_extensions_map = [&]() -> std::unordered_map { @@ -153,6 +175,8 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector& variants) const auto input_model = std::make_shared(local_model_stream, weights, create_extensions_map()); local_model_stream.close(); return input_model; + } else if (model_buf) { + return std::make_shared(model_buf, weights, create_extensions_map()); } return nullptr; }; @@ -184,6 +208,8 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector& variants) const provided_model_stream = model_variant.as(); } else if (model_variant.is()) { provided_model_stream = model_variant.as(); + } else if (model_variant.is>()) { + model_buf = model_variant.as>(); } // Check weights and extensions diff --git a/src/frontends/ir/src/input_model.cpp b/src/frontends/ir/src/input_model.cpp index 968cbac7a51430..6c59617c69a48d 100644 --- a/src/frontends/ir/src/input_model.cpp +++ b/src/frontends/ir/src/input_model.cpp @@ -207,28 +207,47 @@ class InputModel::InputModelIRImpl { pugi::xml_document m_xml_doc; public: - InputModelIRImpl(std::istream& stream, + InputModelIRImpl(std::istream& model, const std::shared_ptr& weights, const std::unordered_map& extensions) : m_weights(weights), m_extensions(extensions) { - pugi::xml_parse_result res = m_xml_doc.load(stream); - if (res.status != pugi::status_ok) { - OPENVINO_THROW(res.description(), " at offset ", res.offset); - } + pugi::xml_parse_result res = m_xml_doc.load(model); + OPENVINO_ASSERT(res.status == pugi::status_ok, res.description(), " at offset ", res.offset); + init_opset(); + } + + InputModelIRImpl(const std::shared_ptr& model, + const std::shared_ptr& weights, + const std::unordered_map& extensions) + : m_weights(weights), + m_extensions(extensions) { + auto res = m_xml_doc.load_buffer(model->get_ptr(), model->size(), pugi::parse_default, pugi::encoding_utf8); + OPENVINO_ASSERT(res.status == pugi::status_ok, res.description(), " at offset ", res.offset); + init_opset(); + } + + std::shared_ptr convert(); + +private: + void init_opset() { m_root = m_xml_doc.document_element(); for (const auto& it : ov::get_available_opsets()) { m_opsets[it.first] = it.second(); } } - - std::shared_ptr convert(); }; -InputModel::InputModel(std::istream& stream, +InputModel::InputModel(std::istream& model, + const std::shared_ptr& weights, + const std::unordered_map& extensions) { + _impl = std::make_shared(model, weights, extensions); +} + +InputModel::InputModel(const std::shared_ptr& model, const std::shared_ptr& weights, const std::unordered_map& extensions) { - _impl = std::make_shared(stream, weights, extensions); + _impl = std::make_shared(model, weights, extensions); } std::shared_ptr InputModel::convert() { diff --git a/src/frontends/ir/src/input_model.hpp b/src/frontends/ir/src/input_model.hpp index a1878fe88d6714..331092749bbeb9 100644 --- a/src/frontends/ir/src/input_model.hpp +++ b/src/frontends/ir/src/input_model.hpp @@ -24,6 +24,10 @@ class InputModel : public ov::frontend::InputModel { const std::shared_ptr& weights, const std::unordered_map& extensions); + InputModel(const std::shared_ptr& model_buf, + const std::shared_ptr& weights, + const std::unordered_map& extensions); + std::shared_ptr convert(); }; diff --git a/src/inference/dev_api/openvino/runtime/icore.hpp b/src/inference/dev_api/openvino/runtime/icore.hpp index 378221b0dc7dbe..659b9c5c0f5788 100644 --- a/src/inference/dev_api/openvino/runtime/icore.hpp +++ b/src/inference/dev_api/openvino/runtime/icore.hpp @@ -11,6 +11,7 @@ #include +#include "openvino/runtime/aligned_buffer.hpp" #include "openvino/runtime/icompiled_model.hpp" #include "openvino/runtime/properties.hpp" #include "openvino/runtime/so_ptr.hpp" @@ -45,6 +46,15 @@ class OPENVINO_RUNTIME_API ICore { const ov::Tensor& weights, bool frontend_mode = false) const = 0; + /** + * @brief Reads IR xml and bin from buffer + * @param model shared pointer to aligned buffer with IR + * @param weights shared pointer to aligned buffer with weights + * @return shared pointer to ov::Model + */ + virtual std::shared_ptr read_model(const std::shared_ptr& model, + const std::shared_ptr& weights) const = 0; + /** * @brief Reads IR xml and bin files * @param model_path path to IR file diff --git a/src/inference/dev_api/openvino/runtime/internal_properties.hpp b/src/inference/dev_api/openvino/runtime/internal_properties.hpp index eb4bc9bee916a7..60d6b66cfda897 100644 --- a/src/inference/dev_api/openvino/runtime/internal_properties.hpp +++ b/src/inference/dev_api/openvino/runtime/internal_properties.hpp @@ -29,6 +29,13 @@ static constexpr Property, PropertyMutability::RO> sup */ static constexpr Property, PropertyMutability::RO> caching_properties{"CACHING_PROPERTIES"}; +/** + * @brief Read-only property to get a std::vector of properties + * which should affect the loading time from cache + * @ingroup ov_dev_api_plugin_api + */ +static constexpr Property caching_with_mmap{"CACHING_WITH_MMAP"}; + /** * @brief Allow to create exclusive_async_requests with one executor * @ingroup ov_dev_api_plugin_api diff --git a/src/inference/src/cache_manager.hpp b/src/inference/src/cache_manager.hpp index b14fe2abe18a7e..9e9ebd3ddcbc2b 100644 --- a/src/inference/src/cache_manager.hpp +++ b/src/inference/src/cache_manager.hpp @@ -15,6 +15,7 @@ #include #include "openvino/util/file_util.hpp" +#include "openvino/util/mmap_object.hpp" namespace ov { @@ -78,7 +79,7 @@ class ICacheManager { * @param id Id of cache (hash of the model) * @param reader Lambda function to be called when input stream is created */ - virtual void read_cache_entry(const std::string& id, StreamReader reader) = 0; + virtual void read_cache_entry(const std::string& id, StreamReader reader, bool mmap = false) = 0; /** * @brief Callback when OpenVINO intends to remove cache entry @@ -129,13 +130,18 @@ class FileStorageCacheManager final : public ICacheManager { writer(stream); } - void read_cache_entry(const std::string& id, StreamReader reader) override { + void read_cache_entry(const std::string& id, StreamReader reader, bool mmap = false) override { // Fix the bug caused by pugixml, which may return unexpected results if the locale is different from "C". ScopedLocale plocal_C(LC_ALL, "C"); - auto blobFileName = getBlobFile(id); - if (ov::util::file_exists(blobFileName)) { - std::ifstream stream(blobFileName, std::ios_base::binary); - reader(stream); + auto blob_file_name = getBlobFile(id); + if (ov::util::file_exists(blob_file_name)) { + if (mmap) { + MmapStream stream(blob_file_name); + reader(stream); + } else { + std::ifstream stream(blob_file_name, std::ios_base::binary); + reader(stream); + } } } diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp index 237c246ab38bdc..9f55dc53ccd24f 100644 --- a/src/inference/src/dev/core_impl.cpp +++ b/src/inference/src/dev/core_impl.cpp @@ -24,6 +24,7 @@ #include "openvino/runtime/itensor.hpp" #include "openvino/runtime/make_tensor.hpp" #include "openvino/runtime/remote_context.hpp" +#include "openvino/runtime/shared_buffer.hpp" #include "openvino/runtime/threading/executor_manager.hpp" #include "openvino/util/common_util.hpp" #include "openvino/util/file_util.hpp" @@ -1396,6 +1397,13 @@ ov::SoPtr ov::CoreImpl::compile_model_and_cache(ov::Plugin& return compiled_model; } +static bool does_plugin_support_model_caching_with_mmap(const ov::Plugin& plugin) { + bool supported = plugin.supports_model_caching(); + supported &= + ov::util::contains(plugin.get_property(ov::internal::supported_properties), ov::internal::caching_with_mmap); + return supported; +} + ov::SoPtr ov::CoreImpl::load_model_from_cache( const CacheContent& cacheContent, ov::Plugin& plugin, @@ -1406,43 +1414,48 @@ ov::SoPtr ov::CoreImpl::load_model_from_cache( struct HeaderException {}; OPENVINO_ASSERT(cacheContent.cacheManager != nullptr); + try { - cacheContent.cacheManager->read_cache_entry(cacheContent.blobId, [&](std::istream& networkStream) { - OV_ITT_SCOPE(FIRST_INFERENCE, - ov::itt::domains::LoadTime, - "Core::load_model_from_cache::ReadStreamAndImport"); - try { - ov::CompiledBlobHeader header; - networkStream >> header; - if (header.get_file_info() != ov::ModelCache::calculate_file_info(cacheContent.modelPath)) { - // Original file is changed, don't use cache - OPENVINO_THROW("Original model file is changed"); - } - if (util::contains(plugin.get_property(ov::internal::supported_properties), - ov::internal::compiled_model_runtime_properties_supported.name())) { - ov::AnyMap compiled_model_runtime_properties = { - {ov::internal::compiled_model_runtime_properties.name(), - std::string(header.get_runtime_info())}}; - auto res = plugin.get_property(ov::internal::compiled_model_runtime_properties_supported.name(), - compiled_model_runtime_properties); - if (!res.as()) { - OPENVINO_THROW("Original model runtime properties have been changed, not supported anymore!"); + cacheContent.cacheManager->read_cache_entry( + cacheContent.blobId, + [&](std::istream& networkStream) { + OV_ITT_SCOPE(FIRST_INFERENCE, + ov::itt::domains::LoadTime, + "Core::load_model_from_cache::ReadStreamAndImport"); + try { + ov::CompiledBlobHeader header; + networkStream >> header; + if (header.get_file_info() != ov::ModelCache::calculate_file_info(cacheContent.modelPath)) { + // Original file is changed, don't use cache + OPENVINO_THROW("Original model file is changed"); } - } else { - if (header.get_openvino_version() != ov::get_openvino_version().buildNumber) { - // Build number mismatch, don't use this cache - OPENVINO_THROW("Version does not match"); + if (util::contains(plugin.get_property(ov::internal::supported_properties), + ov::internal::compiled_model_runtime_properties_supported.name())) { + ov::AnyMap compiled_model_runtime_properties = { + {ov::internal::compiled_model_runtime_properties.name(), + std::string(header.get_runtime_info())}}; + auto res = plugin.get_property(ov::internal::compiled_model_runtime_properties_supported.name(), + compiled_model_runtime_properties); + if (!res.as()) { + OPENVINO_THROW( + "Original model runtime properties have been changed, not supported anymore!"); + } + } else { + if (header.get_openvino_version() != ov::get_openvino_version().buildNumber) { + // Build number mismatch, don't use this cache + OPENVINO_THROW("Version does not match"); + } } + } catch (...) { + throw HeaderException(); } - } catch (...) { - throw HeaderException(); - } - ov::AnyMap update_config = config; - update_config[ov::loaded_from_cache.name()] = true; - compiled_model = context ? plugin.import_model(networkStream, context, update_config) - : plugin.import_model(networkStream, update_config); - }); + ov::AnyMap update_config = config; + update_config[ov::loaded_from_cache.name()] = true; + compiled_model = context ? plugin.import_model(networkStream, context, update_config) + : plugin.import_model(networkStream, update_config); + }, + does_plugin_support_model_caching_with_mmap(plugin)); } catch (const HeaderException&) { // For these exceptions just remove old cache and set that import didn't work cacheContent.cacheManager->remove_cache_entry(cacheContent.blobId); @@ -1603,6 +1616,12 @@ std::shared_ptr ov::CoreImpl::read_model(const std::string& model, return ov::util::read_model(model, weights, extensions, frontendMode); } +std::shared_ptr ov::CoreImpl::read_model(const std::shared_ptr& model, + const std::shared_ptr& weights) const { + OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::ReadTime, "CoreImpl::read_model from memory"); + return ov::util::read_model(model, weights, extensions); +} + std::map ov::CoreImpl::get_versions(const std::string& deviceName) const { std::map versions; std::vector deviceNames; diff --git a/src/inference/src/dev/core_impl.hpp b/src/inference/src/dev/core_impl.hpp index 40f2a15bb725e0..79b1b96d57ac30 100644 --- a/src/inference/src/dev/core_impl.hpp +++ b/src/inference/src/dev/core_impl.hpp @@ -253,6 +253,9 @@ class CoreImpl : public ov::ICore, public std::enable_shared_from_this read_model(const std::shared_ptr& model, + const std::shared_ptr& weights) const override; + std::shared_ptr read_model(const std::string& model_path, const std::string& bin_path) const override; ov::SoPtr compile_model(const std::shared_ptr& model, diff --git a/src/inference/src/dev/plugin.cpp b/src/inference/src/dev/plugin.cpp index 288389c46db859..40207bac9087fa 100644 --- a/src/inference/src/dev/plugin.cpp +++ b/src/inference/src/dev/plugin.cpp @@ -73,10 +73,10 @@ ov::SoPtr ov::Plugin::import_model(std::istream& model, cons OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, properties), m_so}); } -ov::SoPtr ov::Plugin::import_model(std::istream& networkModel, +ov::SoPtr ov::Plugin::import_model(std::istream& model, const ov::SoPtr& context, const ov::AnyMap& config) const { - OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(networkModel, context, config), m_so}); + OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, context, config), m_so}); } ov::SoPtr ov::Plugin::create_context(const AnyMap& params) const { diff --git a/src/inference/src/dev/plugin.hpp b/src/inference/src/dev/plugin.hpp index 9eeed484840fff..14a5adebbab3a4 100644 --- a/src/inference/src/dev/plugin.hpp +++ b/src/inference/src/dev/plugin.hpp @@ -55,7 +55,7 @@ class Plugin { SoPtr import_model(std::istream& model, const ov::AnyMap& properties) const; - SoPtr import_model(std::istream& networkModel, + SoPtr import_model(std::istream& model, const ov::SoPtr& context, const ov::AnyMap& config) const; diff --git a/src/inference/src/model_reader.cpp b/src/inference/src/model_reader.cpp index febd9b1174dda6..aaf620ea0f803a 100644 --- a/src/inference/src/model_reader.cpp +++ b/src/inference/src/model_reader.cpp @@ -195,5 +195,33 @@ std::shared_ptr read_model(const std::string& model, OPENVINO_THROW("Unable to read the model. Please check if the model format is supported and model is correct."); } +std::shared_ptr read_model(const std::shared_ptr& model, + const std::shared_ptr& weights, + const std::vector& ov_exts) { + // Try to load with FrontEndManager + ov::frontend::FrontEndManager manager; + ov::frontend::FrontEnd::Ptr FE; + ov::frontend::InputModel::Ptr inputModel; + + ov::AnyVector params{model}; + if (weights) { + params.emplace_back(weights); + } + + FE = manager.load_by_model(params); + if (FE) { + FE->add_extension(ov_exts); + inputModel = FE->load(params); + } + if (inputModel) { + auto model = FE->convert(inputModel); + update_v10_model(model); + return model; + } + + OPENVINO_THROW( + "[ CORE ] Unable to read the model. Please check if the model format is supported and model is correct."); +} + } // namespace util } // namespace ov diff --git a/src/inference/src/model_reader.hpp b/src/inference/src/model_reader.hpp index 4617fa55b83e9b..433da2ee5d2107 100644 --- a/src/inference/src/model_reader.hpp +++ b/src/inference/src/model_reader.hpp @@ -9,6 +9,7 @@ #include "openvino/core/extension.hpp" #include "openvino/core/model.hpp" +#include "openvino/runtime/aligned_buffer.hpp" namespace ov { @@ -28,6 +29,17 @@ std::shared_ptr read_model(const std::string& modelPath, const std::vector& extensions, bool enable_mmap); +/** + * @brief Reads model + * @param model shared pointer to aligned buffer with IR. + * @param weights shared pointer to aligned buffer with weights. + * @param extensions vector with OpenVINO extensions + * @return Shared pointer to ov::Model + */ +std::shared_ptr read_model(const std::shared_ptr& model, + const std::shared_ptr& weights, + const std::vector& extensions); + /** * @brief Reads model * @param model Serialized model representation diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp index 72943b837f1f3b..bbee5d937be5d5 100644 --- a/src/plugins/intel_cpu/src/compiled_model.cpp +++ b/src/plugins/intel_cpu/src/compiled_model.cpp @@ -10,7 +10,6 @@ #include "memory_state.h" #include "openvino/core/type/element_type.hpp" #include "openvino/runtime/intel_cpu/properties.hpp" -#include "serialize.h" #include "openvino/runtime/threading/executor_manager.hpp" #include "transformations/transformation_pipeline.h" #include "openvino/runtime/properties.hpp" @@ -19,6 +18,7 @@ #include "transformations/utils/utils.hpp" #include "openvino/runtime/threading/cpu_streams_info.hpp" #include "openvino/runtime/threading/cpu_message.hpp" +#include "utils/serialize.hpp" #include "cpu/x64/cpu_isa_traits.hpp" #include diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index 1aae0adf83bb47..2b9cdcc4ac1203 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -379,6 +379,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { } catch (ov::Exception&) { OPENVINO_THROW("Wrong value for property key ", ov::cache_encryption_callbacks.name()); } + } else if (key == ov::internal::caching_with_mmap.name()) { } else { OPENVINO_THROW("NotFound: Unsupported property ", key, " by CPU plugin."); } diff --git a/src/plugins/intel_cpu/src/nodes/input.cpp b/src/plugins/intel_cpu/src/nodes/input.cpp index c311c40714cb37..4ee5707e0a9e76 100644 --- a/src/plugins/intel_cpu/src/nodes/input.cpp +++ b/src/plugins/intel_cpu/src/nodes/input.cpp @@ -287,21 +287,20 @@ void Input::cloneBlobIfRequired() { return ptr; }; - auto isBlobAligned = [&, this] () { - const void *ptr = constOp->get_data_ptr(); + auto isBlobAligned = [&] () { bool blobAlignedOnSSE = true; #if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) // Majority of arithmetic and data processing instructions in legacy SSE isa requires // the memory address in the operands must be aligned on 16-byte boundary. To ensure // safely reusing ngraph const blob memory, need to check address alignment. + const void *ptr = constOp->get_data_ptr(); blobAlignedOnSSE = mayiuse(cpu_isa_t::avx2) || ((reinterpret_cast(ptr) & 15) == 0); #endif - const bool blobAlignedWithPrec = prec.size() > 1 ? (reinterpret_cast(ptr) % prec.size()) == 0 : true; - return blobAlignedWithPrec && blobAlignedOnSSE; + return blobAlignedOnSSE; }; // The presence of subnormals is better to determined at IR read time. - auto hasSubnormals = [&, this] () { + auto hasSubnormals = [&] () { if (prec == ov::element::f32) { uint32_t const *u32data = constOp->get_data_ptr(); @@ -344,7 +343,7 @@ void Input::cloneBlobIfRequired() { return false; }; - auto blobKey = [&, this] () { + auto blobKey = [&] () { char ptr[32]; snprintf(ptr, sizeof ptr, "%p", constOp->get_data_ptr()); return getName() @@ -362,7 +361,6 @@ void Input::cloneBlobIfRequired() { // This is possible only in multistream case on multisocket machine. // TODO: don't clone blob for multisocket + multistream case if current stream is run on the numa node where original weights are stored. (!weightCache || context->getNumNumaNodes() == 1 || context->getCPUStreamExecutor()->get_streams_num() == 1); - memoryPtr = clone_is_not_needed ? std::make_shared(getEngine(), memDesc, constOp->get_data_ptr()) : std::const_pointer_cast( weightCache ? *weightCache->findOrCreate(blobKey(), cloneBlob) : cloneBlob()); diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 3bfe2ec01a360d..1082157e86d53f 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -4,6 +4,7 @@ #include "plugin.h" +#include "cpu_streams_calculation.hpp" #include "internal_properties.hpp" #include "itt.h" #include "openvino/runtime/intel_cpu/properties.hpp" @@ -11,12 +12,12 @@ #include "openvino/runtime/properties.hpp" #include "openvino/runtime/threading/cpu_streams_info.hpp" #include "openvino/runtime/threading/executor_manager.hpp" -#include "openvino/util/codec_xor.hpp" -#include "serialize.h" #include "transformations/transformation_pipeline.h" #include "transformations/utils/utils.hpp" +#include "utils/codec_xor.hpp" #include "utils/denormals.hpp" #include "utils/precision_support.h" +#include "utils/serialize.hpp" #include "weights_cache.hpp" #if defined(__linux__) @@ -253,8 +254,8 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< calculate_streams(conf, cloned_model); if (!conf.cacheEncrypt || !conf.cacheDecrypt) { - conf.cacheEncrypt = ov::util::codec_xor; - conf.cacheDecrypt = ov::util::codec_xor; + conf.cacheEncrypt = codec_xor_str; + conf.cacheDecrypt = codec_xor_str; } transformations.PostLpt(); @@ -444,6 +445,9 @@ ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& optio } else if (ov::internal::supported_properties == name) { return decltype(ov::internal::supported_properties)::value_type{ ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO}, +#if !defined(OPENVINO_ARCH_ARM) + ov::PropertyName{ov::internal::caching_with_mmap.name(), ov::PropertyMutability::RO}, +#endif ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}, ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO}, ov::PropertyName{ov::internal::compiled_model_runtime_properties_supported.name(), @@ -545,25 +549,24 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& return res; } -std::shared_ptr Plugin::import_model(std::istream& networkModel, const ov::AnyMap& config) const { +std::shared_ptr Plugin::import_model(std::istream& model_stream, + const ov::AnyMap& config) const { OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model"); - std::function decrypt; + CacheDecrypt decrypt{ codec_xor }; + bool decript_from_string = false; if (config.count(ov::cache_encryption_callbacks.name())) { auto encryption_callbacks = config.at(ov::cache_encryption_callbacks.name()).as(); - decrypt = encryption_callbacks.decrypt; - } - - if (!decrypt) { - decrypt = ov::util::codec_xor; + decrypt.m_decrypt_str = encryption_callbacks.decrypt; + decript_from_string = true; } ModelDeserializer deserializer( - networkModel, - [this](const std::string& model, const ov::Tensor& weights) { - return get_core()->read_model(model, weights, true); + model_stream, + [this](const std::shared_ptr& model, const std::shared_ptr& weights) { + return get_core()->read_model(model, weights); }, - std::move(decrypt)); + decrypt, decript_from_string); std::shared_ptr model; deserializer >> model; diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h index c2d24e98ee6f98..2548ba2c1cc8af 100644 --- a/src/plugins/intel_cpu/src/plugin.h +++ b/src/plugins/intel_cpu/src/plugin.h @@ -5,7 +5,6 @@ #pragma once #include "compiled_model.h" -#include "cpu_streams_calculation.hpp" #include "openvino/runtime/threading/cpu_message.hpp" namespace ov { @@ -22,7 +21,7 @@ class Plugin : public ov::IPlugin { const ov::AnyMap& properties, const ov::SoPtr& context) const override { OPENVINO_THROW_NOT_IMPLEMENTED( - "Not Implemented compile_model with RemoteContext is not supported by CPU plugin!"); + "compile_model with RemoteContext is not supported by CPU plugin!"); }; void set_property(const ov::AnyMap& properties) override; @@ -32,16 +31,16 @@ class Plugin : public ov::IPlugin { const ov::SoPtr& context, const ov::AnyMap& properties) const override { OPENVINO_THROW_NOT_IMPLEMENTED( - "Not Implemented import_model with RemoteContext is not supported by CPU plugin!"); + "import_model with RemoteContext is not supported by CPU plugin!"); }; ov::SupportedOpsMap query_model(const std::shared_ptr& model, const ov::AnyMap& properties) const override; ov::SoPtr create_context(const ov::AnyMap& remote_properties) const override { - OPENVINO_THROW_NOT_IMPLEMENTED("Not Implemented create_context is not supported by CPU plugin!"); + OPENVINO_THROW_NOT_IMPLEMENTED("create_context is not supported by CPU plugin!"); }; ov::SoPtr get_default_context(const ov::AnyMap& remote_properties) const override { - OPENVINO_THROW_NOT_IMPLEMENTED("Not Implemented get_default_context is not supported by CPU plugin!"); + OPENVINO_THROW_NOT_IMPLEMENTED("get_default_context is not supported by CPU plugin!"); }; std::shared_ptr m_msg_manager; diff --git a/src/plugins/intel_cpu/src/serialize.cpp b/src/plugins/intel_cpu/src/serialize.cpp deleted file mode 100644 index 16583cf1c73ef2..00000000000000 --- a/src/plugins/intel_cpu/src/serialize.cpp +++ /dev/null @@ -1,116 +0,0 @@ -// Copyright (C) 2018-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// -#include "serialize.h" - -#include - -#include "openvino/pass/serialize.hpp" -#include "openvino/util/codec_xor.hpp" -#include "transformations/utils/utils.hpp" - -namespace ov { -namespace intel_cpu { - -static void setInfo(pugi::xml_node& root, std::shared_ptr& model) { - pugi::xml_node outputs = root.child("outputs"); - auto nodes_it = outputs.children("out").begin(); - size_t size = model->outputs().size(); - for (size_t i = 0; i < size; ++nodes_it, i++) { - std::string name = nodes_it->attribute("name").value(); - if (name.empty()) - continue; - auto result = model->output(i).get_node_shared_ptr(); - ov::descriptor::set_ov_tensor_legacy_name(result->input_value(0).get_tensor(), name); - } -} - -ModelSerializer::ModelSerializer(std::ostream& ostream, cache_encrypt encrypt_fn) - : _ostream(ostream), _cache_encrypt(std::move(encrypt_fn)) {} - -void ModelSerializer::operator<<(const std::shared_ptr& model) { - auto serializeInfo = [&](std::ostream& stream) { - const std::string name = "cnndata"; - pugi::xml_document xml_doc; - pugi::xml_node root = xml_doc.append_child(name.c_str()); - pugi::xml_node outputs = root.append_child("outputs"); - for (const auto& out : model->get_results()) { - auto out_node = outputs.append_child("out"); - const std::string name = ov::descriptor::get_ov_tensor_legacy_name(out->input_value(0).get_tensor()); - out_node.append_attribute("name").set_value(name.c_str()); - } - xml_doc.save(stream); - }; - - ov::pass::StreamSerialize serializer(_ostream, serializeInfo, _cache_encrypt); - serializer.run_on_model(std::const_pointer_cast(model->clone())); -} - -ModelDeserializer::ModelDeserializer(std::istream & istream, model_builder fn, cache_decrypt decrypt_fn) - : _istream(istream) - , _model_builder(std::move(fn)) - , _cache_decrypt(std::move(decrypt_fn)) { -} - -void ModelDeserializer::operator>>(std::shared_ptr& model) { - using namespace ov::pass; - - std::string xmlString; - ov::Tensor dataBlob; - - // get file size before seek content - // blob from cache may have other header, skip it - const size_t _pos = _istream.tellg(); - _istream.seekg(0, _istream.end); - const size_t file_size = _istream.tellg(); - _istream.seekg(_pos, _istream.beg); - - StreamSerialize::DataHeader hdr = {}; - _istream.read(reinterpret_cast(&hdr), sizeof hdr); - - // check if model header contains valid data - bool isValidModel = (hdr.custom_data_offset == sizeof(hdr) + _pos) && - (hdr.custom_data_size == hdr.consts_offset - hdr.custom_data_offset) && - (hdr.consts_size == hdr.model_offset - hdr.consts_offset) && - (hdr.model_size = file_size - hdr.model_offset); - if (!isValidModel) { - OPENVINO_THROW("Failed to read CPU device xml header"); - } - // read model input/output precisions - _istream.seekg(hdr.custom_data_offset); - - pugi::xml_document xmlInOutDoc; - if (hdr.custom_data_size > 0) { - std::string xmlInOutString; - xmlInOutString.resize(hdr.custom_data_size); - _istream.read(const_cast(xmlInOutString.c_str()), hdr.custom_data_size); - auto res = xmlInOutDoc.load_string(xmlInOutString.c_str()); - if (res.status != pugi::status_ok) { - OPENVINO_THROW("NetworkNotRead: The inputs and outputs information is invalid."); - } - } - - // read blob content - _istream.seekg(hdr.consts_offset); - if (hdr.consts_size) { - dataBlob = ov::Tensor(ov::element::u8, ov::Shape({hdr.consts_size})); - _istream.read(static_cast(dataBlob.data(ov::element::u8)), hdr.consts_size); - } - - // read XML content - _istream.seekg(hdr.model_offset); - xmlString.resize(hdr.model_size); - _istream.read(const_cast(xmlString.c_str()), hdr.model_size); - if (_cache_decrypt) { - xmlString = _cache_decrypt(xmlString); - } - - model = _model_builder(xmlString, std::move(dataBlob)); - - // Set Info - pugi::xml_node root = xmlInOutDoc.child("cnndata"); - setInfo(root, model); -} - -} // namespace intel_cpu -} // namespace ov diff --git a/src/plugins/intel_cpu/src/serialize.h b/src/plugins/intel_cpu/src/serialize.h deleted file mode 100644 index b364c428419c96..00000000000000 --- a/src/plugins/intel_cpu/src/serialize.h +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (C) 2018-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// -#pragma once - -#include -#include -#include -#include - -#include "openvino/core/model.hpp" -#include "openvino/runtime/tensor.hpp" - -namespace ov { -namespace intel_cpu { - -class ModelSerializer { -public: - typedef std::function cache_encrypt; - ModelSerializer(std::ostream& ostream, cache_encrypt encrypt_fn = {}); - void operator<<(const std::shared_ptr& model); - -private: - std::ostream& _ostream; - cache_encrypt _cache_encrypt; -}; - -class ModelDeserializer { -public: - typedef std::function(const std::string&, const ov::Tensor&)> model_builder; - typedef std::function cache_decrypt; - ModelDeserializer(std::istream& istream, model_builder fn, cache_decrypt decrypt_fn = {}); - void operator>>(std::shared_ptr& model); - -private: - std::istream& _istream; - model_builder _model_builder; - cache_decrypt _cache_decrypt; -}; - -} // namespace intel_cpu -} // namespace ov diff --git a/src/plugins/intel_cpu/src/utils/codec_xor.cpp b/src/plugins/intel_cpu/src/utils/codec_xor.cpp new file mode 100644 index 00000000000000..06061fc704e228 --- /dev/null +++ b/src/plugins/intel_cpu/src/utils/codec_xor.cpp @@ -0,0 +1,33 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "utils/codec_xor.hpp" +#include "openvino/core/parallel.hpp" + +namespace ov { +namespace intel_cpu { + +void codec_xor(char* dst_str, const char* src_str, size_t len) { + static const char codec_key[] = {0x30, 0x60, 0x70, 0x02, 0x04, 0x08, 0x3F, 0x6F, 0x72, 0x74, 0x78, 0x7F}; + auto key_size = sizeof(codec_key); + + if (dst_str == src_str) { + parallel_for(len, [&](size_t key_idx) { + dst_str[key_idx] ^= codec_key[key_idx % key_size]; + }); + } else { + parallel_for(len, [&](size_t key_idx) { + dst_str[key_idx] = src_str[key_idx] ^ codec_key[key_idx % key_size]; + }); + } +} + +std::string codec_xor_str(const std::string& source_str) { + std::string new_str(source_str); + codec_xor(&new_str[0], &new_str[0], new_str.size()); + return new_str; +} + +} // namespace intel_cpu +} // namespace ov. diff --git a/src/plugins/intel_cpu/src/utils/codec_xor.hpp b/src/plugins/intel_cpu/src/utils/codec_xor.hpp new file mode 100644 index 00000000000000..d99a6d0d52bc37 --- /dev/null +++ b/src/plugins/intel_cpu/src/utils/codec_xor.hpp @@ -0,0 +1,37 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#pragma once + +#include +#include + +namespace ov { +namespace intel_cpu { + +void codec_xor(char* dst_str, const char* src_str, size_t len); + +std::string codec_xor_str(const std::string& source_str); + +typedef std::function CacheDecryptStr; +typedef std::function CacheDecryptChar; + +union CacheDecrypt { + CacheDecryptChar m_decrypt_char = nullptr; + CacheDecryptStr m_decrypt_str; + + CacheDecrypt() {} + + CacheDecrypt(CacheDecryptStr fn) : m_decrypt_str(fn) {} + + CacheDecrypt(CacheDecryptChar fn) : m_decrypt_char(fn) {} + + ~CacheDecrypt() {} + + operator bool() { + return m_decrypt_char || m_decrypt_str; + } +}; + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/utils/serialize.cpp b/src/plugins/intel_cpu/src/utils/serialize.cpp new file mode 100644 index 00000000000000..6666d42fb4f586 --- /dev/null +++ b/src/plugins/intel_cpu/src/utils/serialize.cpp @@ -0,0 +1,198 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "serialize.hpp" + +#include "openvino/core/descriptor_tensor.hpp" +#include "openvino/core/parallel.hpp" +#include "openvino/runtime/shared_buffer.hpp" + +namespace ov { +namespace intel_cpu { + +////////// ModelSerializer ////////// + +ModelSerializer::ModelSerializer(std::ostream& ostream, CacheEncrypt encrypt_fn) + : m_ostream(ostream), m_cache_encrypt(std::move(encrypt_fn)) {} + +void ModelSerializer::operator<<(const std::shared_ptr& model) { + auto serialize_info = [&](std::ostream& stream) { + pugi::xml_document xml_doc; + pugi::xml_node root = xml_doc.append_child("cnndata"); + pugi::xml_node outputs = root.append_child("outputs"); + for (const auto& out : model->get_results()) { + auto out_node = outputs.append_child("out"); + const auto name = ov::descriptor::get_ov_tensor_legacy_name(out->input_value(0).get_tensor()); + out_node.append_attribute("name").set_value(name.c_str()); + } + xml_doc.save(stream); + }; + + ov::pass::StreamSerialize serializer(m_ostream, serialize_info, m_cache_encrypt); + serializer.run_on_model(std::const_pointer_cast(model->clone())); +} + +////////// ModelDeserializer ////////// + +ModelDeserializer::ModelDeserializer(std::istream& model_stream, ModelBuilder fn, const CacheDecrypt& decrypt_fn, bool decript_from_string) + : m_istream(model_stream), m_model_builder(std::move(fn)), m_decript_from_string(decript_from_string) { + if (m_decript_from_string) { + m_cache_decrypt.m_decrypt_str = decrypt_fn.m_decrypt_str; + } else { + m_cache_decrypt.m_decrypt_char = decrypt_fn.m_decrypt_char; + } + } + +void ModelDeserializer::set_info(pugi::xml_node& root, std::shared_ptr& model) { + pugi::xml_node outputs = root.child("outputs"); + auto nodes_it = outputs.children("out").begin(); + size_t size = model->outputs().size(); + for (size_t i = 0lu; i < size; ++nodes_it, i++) { + std::string name = nodes_it->attribute("name").value(); + if (name.empty()) + continue; + auto result = model->output(i).get_node_shared_ptr(); + ov::descriptor::set_ov_tensor_legacy_name(result->input_value(0).get_tensor(), name); + } +} + +void ModelDeserializer::operator>>(std::shared_ptr& model) { + if (auto mmap_stream = dynamic_cast(&m_istream)) { + process_mmap(model, mmap_stream->m_memory); + } else { + process_stream(model); + } +} + +void ModelDeserializer::process_mmap(std::shared_ptr& model, + const std::shared_ptr& mmemory) { + // Note: Don't use seekg with mmaped stream. This may affect the performance of some models. + // Get file size before seek content. + // Blob from cache may have other header, so need to skip this. + auto buffer_base = mmemory->data(); + const auto file_size = mmemory->size(); + const size_t hdr_pos = m_istream.tellg(); + + pass::StreamSerialize::DataHeader hdr = {}; + std::memcpy(reinterpret_cast(&hdr), buffer_base + hdr_pos, sizeof hdr); + + // Check if model header contains valid data. + bool is_valid_model = (hdr.custom_data_offset == sizeof(hdr) + hdr_pos) && + (hdr.custom_data_size == hdr.consts_offset - hdr.custom_data_offset) && + (hdr.consts_size == hdr.model_offset - hdr.consts_offset) && + (hdr.model_size = file_size - hdr.model_offset); + if (!is_valid_model) { + OPENVINO_THROW("[CPU] Could not deserialize by device xml header."); + } + + // Read model input/output precisions. + pugi::xml_document xml_in_out_doc; + if (hdr.custom_data_size > 0lu) { + auto res = xml_in_out_doc.load_buffer(buffer_base + hdr.custom_data_offset, hdr.custom_data_size, pugi::parse_default, pugi::encoding_utf8); + if (res.status != pugi::status_ok) { + OPENVINO_THROW("[CPU] Could to deserialize custom data."); + } + } + + // Map blob content + std::shared_ptr weights_buf; + if (hdr.consts_size) { + weights_buf = std::make_shared>>(buffer_base + hdr.consts_offset, + hdr.consts_size, + mmemory); + } + + // XML content + auto xml_buff = std::make_shared(); + if (m_cache_decrypt) { + if (m_decript_from_string) { + xml_buff->assign(buffer_base + hdr.model_offset, hdr.model_size); + *xml_buff = m_cache_decrypt.m_decrypt_str(*xml_buff); + } else { + xml_buff->reserve(hdr.model_size + 1); + m_cache_decrypt.m_decrypt_char(&((*xml_buff)[0]), buffer_base + hdr.model_offset, hdr.model_size); + } + } else { + xml_buff->assign(buffer_base + hdr.model_offset, hdr.model_size); + } + std::shared_ptr model_buf = + std::make_shared>>(&((*xml_buff)[0]), + hdr.model_size, + xml_buff); + + model = m_model_builder(model_buf, weights_buf); + + // Set Info + pugi::xml_node root = xml_in_out_doc.child("cnndata"); + set_info(root, model); +} + +void ModelDeserializer::process_stream(std::shared_ptr& model) { + const size_t hdr_pos = m_istream.tellg(); + m_istream.seekg(0, m_istream.end); + const size_t file_size = m_istream.tellg(); + m_istream.seekg(hdr_pos, m_istream.beg); + + pass::StreamSerialize::DataHeader hdr = {}; + m_istream.read(reinterpret_cast(&hdr), sizeof hdr); + + // Check if model header contains valid data. + bool is_valid_model = (hdr.custom_data_offset == sizeof(hdr) + hdr_pos) && + (hdr.custom_data_size == hdr.consts_offset - hdr.custom_data_offset) && + (hdr.consts_size == hdr.model_offset - hdr.consts_offset) && + (hdr.model_size = file_size - hdr.model_offset); + if (!is_valid_model) { + OPENVINO_THROW("[CPU] Could not deserialize by device xml header."); + } + + // read model input/output precisions + m_istream.seekg(hdr.custom_data_offset); + + pugi::xml_document xmlInOutDoc; + if (hdr.custom_data_size > 0) { + std::string xmlInOutString; + xmlInOutString.resize(hdr.custom_data_size); + m_istream.read(const_cast(xmlInOutString.c_str()), hdr.custom_data_size); + auto res = xmlInOutDoc.load_string(xmlInOutString.c_str()); + if (res.status != pugi::status_ok) { + OPENVINO_THROW("NetworkNotRead: The inputs and outputs information is invalid."); + } + } + + // read blob content + auto data_blob = std::make_shared(ov::element::u8, ov::Shape({hdr.consts_size})); + m_istream.seekg(hdr.consts_offset); + if (hdr.consts_size) { + m_istream.read(static_cast(data_blob->data(ov::element::u8)), hdr.consts_size); + } + + // read XML content + auto xml_string = std::make_shared(); + m_istream.seekg(hdr.model_offset); + xml_string->resize(hdr.model_size); + m_istream.read(const_cast(xml_string->data()), hdr.model_size); + if (m_cache_decrypt) { + if (m_decript_from_string) { + *xml_string = m_cache_decrypt.m_decrypt_str(*xml_string); + } else { + m_cache_decrypt.m_decrypt_char(const_cast(xml_string->data()), xml_string->data(), xml_string->size()); + } + } + + auto model_buf = std::make_shared>>(const_cast(xml_string->data()), + xml_string->size(), + xml_string); + auto weights_buf = std::make_shared>>(reinterpret_cast(data_blob->data(ov::element::u8)), + hdr.consts_size, + data_blob); + + model = m_model_builder(model_buf, weights_buf); + + // Set Info + pugi::xml_node root = xmlInOutDoc.child("cnndata"); + set_info(root, model); +} + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/utils/serialize.hpp b/src/plugins/intel_cpu/src/utils/serialize.hpp new file mode 100644 index 00000000000000..817041452c9597 --- /dev/null +++ b/src/plugins/intel_cpu/src/utils/serialize.hpp @@ -0,0 +1,54 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#pragma once + +#include + +#include "openvino/core/model.hpp" +#include "openvino/pass/serialize.hpp" +#include "openvino/runtime/aligned_buffer.hpp" +#include "openvino/util/mmap_object.hpp" +#include "utils/codec_xor.hpp" + +namespace ov { +namespace intel_cpu { + +class ModelSerializer { +public: + typedef std::function CacheEncrypt; + + ModelSerializer(std::ostream& ostream, CacheEncrypt encrypt_fn = {}); + + void operator<<(const std::shared_ptr& model); + +private: + std::ostream& m_ostream; + CacheEncrypt m_cache_encrypt; +}; + +class ModelDeserializer { +public: + typedef std::function(const std::shared_ptr&, const std::shared_ptr&)> ModelBuilder; + + ModelDeserializer(std::istream& model, ModelBuilder fn, const CacheDecrypt& encrypt_fn, bool decript_from_string); + + virtual ~ModelDeserializer() = default; + + void operator>>(std::shared_ptr& model); + +protected: + static void set_info(pugi::xml_node& root, std::shared_ptr& model); + + void process_mmap(std::shared_ptr& model, const std::shared_ptr& memory); + + void process_stream(std::shared_ptr& model); + + std::istream& m_istream; + ModelBuilder m_model_builder; + CacheDecrypt m_cache_decrypt; + bool m_decript_from_string; +}; + +} // namespace intel_cpu +} // namespace ov diff --git a/src/tests/test_utils/unit_test_utils/mocks/openvino/runtime/mock_icore.hpp b/src/tests/test_utils/unit_test_utils/mocks/openvino/runtime/mock_icore.hpp index 521a8dc60e3fe0..367818ebbf9572 100644 --- a/src/tests/test_utils/unit_test_utils/mocks/openvino/runtime/mock_icore.hpp +++ b/src/tests/test_utils/unit_test_utils/mocks/openvino/runtime/mock_icore.hpp @@ -48,6 +48,10 @@ class MockICore : public ov::ICore { (const)); MOCK_METHOD(std::shared_ptr, read_model, (const std::string&, const ov::Tensor&, bool), (const)); MOCK_METHOD(std::shared_ptr, read_model, (const std::string&, const std::string&), (const)); + MOCK_METHOD(std::shared_ptr, + read_model, + (const std::shared_ptr&, const std::shared_ptr&), + (const)); MOCK_METHOD(ov::SoPtr, get_default_context, (const std::string&), (const)); MOCK_METHOD(ov::SoPtr, import_model,