Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CPU] Enable mmap for model loading from cache. #23315

Merged
merged 24 commits into from
Oct 12, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
b6c8c84
[CPU] Enable mmap for model loading from cache.
nshchego Mar 7, 2024
e392bef
Code style + comments.
nshchego Mar 8, 2024
3e84f33
Another API version.
nshchego Mar 11, 2024
190adab
Move import_model definition
nshchego Mar 26, 2024
9663493
Fixes as per comments
nshchego Apr 1, 2024
f092feb
Fixes as per comments
nshchego Apr 19, 2024
2e3861f
Serialize was divided on subclasses
nshchego Apr 25, 2024
fca7623
Add enable_mmap flag
nshchego Apr 28, 2024
0484ea1
Pass buffer without copy
nshchego Jul 4, 2024
fcc99fa
Pass MappedMemory via stream
nshchego Aug 6, 2024
9a11c96
Fixes as per comments
nshchego Aug 10, 2024
052dc3c
Merge remote-tracking branch 'origin/master' into cpu/compile_mmap
nshchego Sep 17, 2024
c172eed
Fixes as per comments
nshchego Sep 17, 2024
235330f
Build fix
nshchego Sep 18, 2024
6d46bac
Merge remote-tracking branch 'origin/master' into cpu/compile_mmap
nshchego Oct 2, 2024
d93f3ff
Merge remote-tracking branch 'origin/master' into cpu/compile_mmap
nshchego Oct 2, 2024
3302736
Merge remote-tracking branch 'origin/master' into cpu/compile_mmap
nshchego Oct 6, 2024
5d2c475
Merge remote-tracking branch 'origin/master' into cpu/compile_mmap
nshchego Oct 7, 2024
d5961c5
Fixes as per comments
nshchego Oct 9, 2024
cc9f589
Merge remote-tracking branch 'origin/master' into cpu/compile_mmap
nshchego Oct 9, 2024
9dad4e8
Merge remote-tracking branch 'origin/master' into cpu/compile_mmap
nshchego Oct 10, 2024
aaff252
Merge remote-tracking branch 'origin/master' into cpu/compile_mmap
nshchego Oct 11, 2024
87ab2de
Merge remote-tracking branch 'origin/master' into cpu/compile_mmap
nshchego Oct 11, 2024
44ce367
Reveret read_model due to frontends conflicts
nshchego Oct 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions src/common/util/include/openvino/util/mmap_object.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@

#include <memory>
#include <string>
#ifdef _WIN32
#include <sstream>
#endif

namespace ov {

Expand Down Expand Up @@ -50,4 +53,14 @@ std::shared_ptr<ov::MappedMemory> load_mmap_object(const std::wstring& path);

#endif // OPENVINO_ENABLE_UNICODE_PATH_SUPPORT

class MmapStreamBuffer final : public std::stringbuf {
public:
MmapStreamBuffer(std::shared_ptr<ov::MappedMemory> mem) {
m_memory = mem;
this->basic_streambuf::pubsetbuf(mem->data(), mem->size());
}

std::shared_ptr<ov::MappedMemory> m_memory;
};

} // namespace ov
33 changes: 23 additions & 10 deletions src/frontends/ir/src/frontend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,9 @@ inline size_t get_ir_version(pugi::xml_node& root) {
* @param model Models stream
* @return IR version, 0 if model does represent IR
*/
size_t get_ir_version(std::istream& model) {
std::array<char, 512> header{};

model.seekg(0, model.beg);
model.read(header.data(), header.size());
model.clear();
model.seekg(0, model.beg);

size_t get_ir_version(char* header) {
pugi::xml_document doc;
auto res =
doc.load_buffer(header.data(), header.size(), pugi::parse_default | pugi::parse_fragment, pugi::encoding_utf8);
auto res = doc.load_buffer(header, 512, pugi::parse_default | pugi::parse_fragment, pugi::encoding_utf8);

if (res == pugi::status_ok) {
pugi::xml_node root = doc.document_element();
Expand All @@ -59,13 +51,25 @@ size_t get_ir_version(std::istream& model) {
return 0;
}

size_t get_ir_version(std::istream& model) {
char header[512];

model.seekg(0, model.beg);
model.read(header, 512);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like a possible issue if nothing received from the istream. May suggest to check istream.gcount()

model.clear();
model.seekg(0, model.beg);

return get_ir_version(header);
}

} // namespace

bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
// Last boolean flag in `variants` (if presented) is reserved for FE configuration
size_t extra_variants_num = variants.size() > 0 && variants[variants.size() - 1].is<bool>() ? 1 : 0;
std::ifstream local_model_stream;
std::istream* provided_model_stream = nullptr;
std::shared_ptr<AlignedBuffer> model_buffer = nullptr;

if (variants.empty() || variants.size() > 3 + extra_variants_num) {
return false;
Expand All @@ -86,6 +90,8 @@ bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
provided_model_stream = model_variant.as<std::istream*>();
} else if (model_variant.is<std::istringstream*>()) {
provided_model_stream = model_variant.as<std::istringstream*>();
} else if (model_variant.is<std::shared_ptr<AlignedBuffer>>()) {
model_buffer = model_variant.as<std::shared_ptr<AlignedBuffer>>();
}

if (provided_model_stream && local_model_stream.is_open()) {
Expand All @@ -98,6 +104,8 @@ bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
} else if (local_model_stream.is_open()) {
version = get_ir_version(local_model_stream);
local_model_stream.close();
} else if (model_buffer) {
version = get_ir_version(model_buffer->get_ptr<char>());
} else {
return false;
}
Expand All @@ -119,6 +127,7 @@ void FrontEnd::add_extension(const ov::Extension::Ptr& ext) {
InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& variants) const {
std::ifstream local_model_stream;
std::istream* provided_model_stream = nullptr;
std::shared_ptr<ov::AlignedBuffer> model_buf;
std::shared_ptr<ov::AlignedBuffer> weights;

auto create_extensions_map = [&]() -> std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr> {
Expand All @@ -137,6 +146,8 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& variants) const
auto input_model = std::make_shared<InputModel>(local_model_stream, weights, create_extensions_map());
local_model_stream.close();
return input_model;
} else if (model_buf) {
return std::make_shared<InputModel>(model_buf, weights, create_extensions_map());
}
return nullptr;
};
Expand Down Expand Up @@ -168,6 +179,8 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& variants) const
provided_model_stream = model_variant.as<std::istream*>();
} else if (model_variant.is<std::istringstream*>()) {
provided_model_stream = model_variant.as<std::istringstream*>();
} else if (model_variant.is<std::shared_ptr<AlignedBuffer>>()) {
model_buf = model_variant.as<std::shared_ptr<AlignedBuffer>>();
}

// Check weights and extensions
Expand Down
35 changes: 29 additions & 6 deletions src/frontends/ir/src/input_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,28 +207,51 @@ class InputModel::InputModelIRImpl {
pugi::xml_document m_xml_doc;

public:
InputModelIRImpl(std::istream& stream,
InputModelIRImpl(std::istream& model,
const std::shared_ptr<ov::AlignedBuffer>& weights,
const std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr>& extensions)
: m_weights(weights),
m_extensions(extensions) {
pugi::xml_parse_result res = m_xml_doc.load(stream);
pugi::xml_parse_result res = m_xml_doc.load(model);
if (res.status != pugi::status_ok) {
OPENVINO_THROW(res.description(), " at offset ", res.offset);
}
init_opset();
}

InputModelIRImpl(const std::shared_ptr<ov::AlignedBuffer>& model,
const std::shared_ptr<ov::AlignedBuffer>& weights,
const std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr>& extensions)
: m_weights(weights),
m_extensions(extensions) {
auto res = m_xml_doc.load_buffer(model->get_ptr(), model->size(), pugi::parse_default, pugi::encoding_utf8);
if (res.status != pugi::status_ok) {
OPENVINO_THROW(res.description(), " at offset ", res.offset);
}
init_opset();
}

std::shared_ptr<ov::Model> convert();

private:
void init_opset() {
m_root = m_xml_doc.document_element();
for (const auto& it : ov::get_available_opsets()) {
m_opsets[it.first] = it.second();
}
}

std::shared_ptr<ov::Model> convert();
};

InputModel::InputModel(std::istream& stream,
InputModel::InputModel(std::istream& model,
const std::shared_ptr<ov::AlignedBuffer>& weights,
const std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr>& extensions) {
_impl = std::make_shared<InputModelIRImpl>(model, weights, extensions);
}

InputModel::InputModel(const std::shared_ptr<ov::AlignedBuffer>& model,
const std::shared_ptr<ov::AlignedBuffer>& weights,
const std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr>& extensions) {
_impl = std::make_shared<InputModelIRImpl>(stream, weights, extensions);
_impl = std::make_shared<InputModelIRImpl>(model, weights, extensions);
}

std::shared_ptr<ov::Model> InputModel::convert() {
Expand Down
4 changes: 4 additions & 0 deletions src/frontends/ir/src/input_model.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ class InputModel : public ov::frontend::InputModel {
const std::shared_ptr<ov::AlignedBuffer>& weights,
const std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr>& extensions);

InputModel(const std::shared_ptr<ov::AlignedBuffer>& model_buf,
const std::shared_ptr<ov::AlignedBuffer>& weights,
const std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr>& extensions);

std::shared_ptr<Model> convert();
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ class CompiledBlobHeader final {
return m_runtimeInfo;
}

friend void operator>>(const char* xml_str, CompiledBlobHeader& header);

friend std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header);

friend std::ostream& operator<<(std::ostream& stream, const CompiledBlobHeader& header);
Expand Down
10 changes: 10 additions & 0 deletions src/inference/dev_api/openvino/runtime/icore.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include <memory>

#include "openvino/runtime/aligned_buffer.hpp"
#include "openvino/runtime/icompiled_model.hpp"
#include "openvino/runtime/properties.hpp"
#include "openvino/runtime/so_ptr.hpp"
Expand Down Expand Up @@ -45,6 +46,15 @@ class OPENVINO_RUNTIME_API ICore {
const ov::Tensor& weights,
bool frontend_mode = false) const = 0;

/**
* @brief Reads IR xml and bin from buffer
* @param model shared pointer to aligned buffer with IR
* @param weights shared pointer to aligned buffer with weights
* @return shared pointer to ov::Model
*/
virtual std::shared_ptr<ov::Model> read_model(const std::shared_ptr<AlignedBuffer>& model,
const std::shared_ptr<AlignedBuffer>& weights) const = 0;

/**
* @brief Reads IR xml and bin files
* @param model_path path to IR file
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@ static constexpr Property<std::vector<PropertyName>, PropertyMutability::RO> sup
*/
static constexpr Property<std::vector<PropertyName>, PropertyMutability::RO> caching_properties{"CACHING_PROPERTIES"};

/**
* @brief Read-only property to get a std::vector<PropertyName> of properties
* which should affect the loading time from cache
* @ingroup ov_dev_api_plugin_api
*/
static constexpr Property<std::vector<PropertyName>, PropertyMutability::RO> caching_with_mmap{"CACHING_WITH_MMAP"};

/**
* @brief Allow to create exclusive_async_requests with one executor
* @ingroup ov_dev_api_plugin_api
Expand Down
1 change: 1 addition & 0 deletions src/inference/dev_api/openvino/runtime/iplugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "openvino/runtime/icore.hpp"
#include "openvino/runtime/iremote_context.hpp"
#include "openvino/runtime/threading/executor_manager.hpp"
#include "openvino/util/mmap_object.hpp"
#include "openvino/util/pp.hpp"

namespace ov {
Expand Down
18 changes: 14 additions & 4 deletions src/inference/src/cache_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <string>

#include "openvino/util/file_util.hpp"
#include "openvino/util/mmap_object.hpp"

namespace ov {

Expand Down Expand Up @@ -78,7 +79,7 @@ class ICacheManager {
* @param id Id of cache (hash of the model)
* @param reader Lambda function to be called when input stream is created
*/
virtual void read_cache_entry(const std::string& id, StreamReader reader) = 0;
virtual void read_cache_entry(const std::string& id, StreamReader reader, bool mmap = false) = 0;

/**
* @brief Callback when OpenVINO intends to remove cache entry
Expand Down Expand Up @@ -129,13 +130,22 @@ class FileStorageCacheManager final : public ICacheManager {
writer(stream);
}

void read_cache_entry(const std::string& id, StreamReader reader) override {
void read_cache_entry(const std::string& id, StreamReader reader, bool mmap = false) override {
// Fix the bug caused by pugixml, which may return unexpected results if the locale is different from "C".
ScopedLocale plocal_C(LC_ALL, "C");
auto blobFileName = getBlobFile(id);
if (ov::util::file_exists(blobFileName)) {
std::ifstream stream(blobFileName, std::ios_base::binary);
reader(stream);
if (mmap) {
auto mmap_buffer = ov::load_mmap_object(blobFileName);
MmapStreamBuffer stream_buf(mmap_buffer);
std::istringstream stream;
stream.basic_ios::rdbuf(&stream_buf);

reader(stream);
} else {
std::ifstream stream(blobFileName, std::ios_base::binary);
reader(stream);
}
}
}

Expand Down
14 changes: 14 additions & 0 deletions src/inference/src/dev/compilation_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,20 @@ std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header) {
return stream;
}

void operator>>(const char* xml_str, CompiledBlobHeader& header) {
pugi::xml_document document;
auto res = document.load_string(xml_str);

if (res.status != pugi::status_ok) {
OPENVINO_THROW("[COMPILATION CONTEXT] Could not read compiled blob header.");
}

auto compiled_blob_node = document.document_element();
header.m_ieVersion = ov::util::pugixml::get_str_attr(compiled_blob_node, "ie_version");
header.m_fileInfo = ov::util::pugixml::get_str_attr(compiled_blob_node, "file_info");
header.m_runtimeInfo = ov::util::pugixml::get_str_attr(compiled_blob_node, "runtime_info");
}

std::ostream& operator<<(std::ostream& stream, const CompiledBlobHeader& header) {
pugi::xml_document document;
auto compiledBlobNode = document.append_child("compiled_blob");
Expand Down
75 changes: 43 additions & 32 deletions src/inference/src/dev/core_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1406,43 +1406,48 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
struct HeaderException {};

OPENVINO_ASSERT(cacheContent.cacheManager != nullptr);

try {
cacheContent.cacheManager->read_cache_entry(cacheContent.blobId, [&](std::istream& networkStream) {
OV_ITT_SCOPE(FIRST_INFERENCE,
ov::itt::domains::LoadTime,
"Core::load_model_from_cache::ReadStreamAndImport");
try {
ov::CompiledBlobHeader header;
networkStream >> header;
if (header.get_file_info() != ov::ModelCache::calculate_file_info(cacheContent.modelPath)) {
// Original file is changed, don't use cache
OPENVINO_THROW("Original model file is changed");
}
if (util::contains(plugin.get_property(ov::internal::supported_properties),
ov::internal::compiled_model_runtime_properties_supported.name())) {
ov::AnyMap compiled_model_runtime_properties = {
{ov::internal::compiled_model_runtime_properties.name(),
std::string(header.get_runtime_info())}};
auto res = plugin.get_property(ov::internal::compiled_model_runtime_properties_supported.name(),
compiled_model_runtime_properties);
if (!res.as<bool>()) {
OPENVINO_THROW("Original model runtime properties have been changed, not supported anymore!");
cacheContent.cacheManager->read_cache_entry(
cacheContent.blobId,
[&](std::istream& networkStream) {
OV_ITT_SCOPE(FIRST_INFERENCE,
ov::itt::domains::LoadTime,
"Core::load_model_from_cache::ReadStreamAndImport");
try {
ov::CompiledBlobHeader header;
networkStream >> header;
if (header.get_file_info() != ov::ModelCache::calculate_file_info(cacheContent.modelPath)) {
// Original file is changed, don't use cache
OPENVINO_THROW("Original model file is changed");
}
} else {
if (header.get_openvino_version() != ov::get_openvino_version().buildNumber) {
// Build number mismatch, don't use this cache
OPENVINO_THROW("Version does not match");
if (util::contains(plugin.get_property(ov::internal::supported_properties),
ov::internal::compiled_model_runtime_properties_supported.name())) {
ov::AnyMap compiled_model_runtime_properties = {
{ov::internal::compiled_model_runtime_properties.name(),
std::string(header.get_runtime_info())}};
auto res = plugin.get_property(ov::internal::compiled_model_runtime_properties_supported.name(),
compiled_model_runtime_properties);
if (!res.as<bool>()) {
OPENVINO_THROW(
"Original model runtime properties have been changed, not supported anymore!");
}
} else {
if (header.get_openvino_version() != ov::get_openvino_version().buildNumber) {
// Build number mismatch, don't use this cache
OPENVINO_THROW("Version does not match");
}
}
} catch (...) {
throw HeaderException();
}
} catch (...) {
throw HeaderException();
}

ov::AnyMap update_config = config;
update_config[ov::loaded_from_cache.name()] = true;
compiled_model = context ? plugin.import_model(networkStream, context, update_config)
: plugin.import_model(networkStream, update_config);
});
ov::AnyMap update_config = config;
update_config[ov::loaded_from_cache.name()] = true;
compiled_model = context ? plugin.import_model(networkStream, context, update_config)
: plugin.import_model(networkStream, update_config);
},
plugin.supports_model_caching_with_mmap());
} catch (const HeaderException&) {
// For these exceptions just remove old cache and set that import didn't work
cacheContent.cacheManager->remove_cache_entry(cacheContent.blobId);
Expand Down Expand Up @@ -1603,6 +1608,12 @@ std::shared_ptr<ov::Model> ov::CoreImpl::read_model(const std::string& model,
return ov::util::read_model(model, weights, extensions, frontendMode);
}

std::shared_ptr<ov::Model> ov::CoreImpl::read_model(const std::shared_ptr<AlignedBuffer>& model,
const std::shared_ptr<AlignedBuffer>& weights) const {
OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::ReadTime, "CoreImpl::read_model from memory");
return ov::util::read_model(model, weights, extensions);
}

std::map<std::string, ov::Version> ov::CoreImpl::get_versions(const std::string& deviceName) const {
std::map<std::string, ov::Version> versions;
std::vector<std::string> deviceNames;
Expand Down
Loading
Loading