Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CPU] Enable mmap for model loading from cache. #23315

Merged
merged 24 commits into from
Oct 12, 2024
Merged
Changes from 18 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
b6c8c84
[CPU] Enable mmap for model loading from cache.
nshchego Mar 7, 2024
e392bef
Code style + comments.
nshchego Mar 8, 2024
3e84f33
Another API version.
nshchego Mar 11, 2024
190adab
Move import_model definition
nshchego Mar 26, 2024
9663493
Fixes as per comments
nshchego Apr 1, 2024
f092feb
Fixes as per comments
nshchego Apr 19, 2024
2e3861f
Serialize was divided on subclasses
nshchego Apr 25, 2024
fca7623
Add enable_mmap flag
nshchego Apr 28, 2024
0484ea1
Pass buffer without copy
nshchego Jul 4, 2024
fcc99fa
Pass MappedMemory via stream
nshchego Aug 6, 2024
9a11c96
Fixes as per comments
nshchego Aug 10, 2024
052dc3c
Merge remote-tracking branch 'origin/master' into cpu/compile_mmap
nshchego Sep 17, 2024
c172eed
Fixes as per comments
nshchego Sep 17, 2024
235330f
Build fix
nshchego Sep 18, 2024
6d46bac
Merge remote-tracking branch 'origin/master' into cpu/compile_mmap
nshchego Oct 2, 2024
d93f3ff
Merge remote-tracking branch 'origin/master' into cpu/compile_mmap
nshchego Oct 2, 2024
3302736
Merge remote-tracking branch 'origin/master' into cpu/compile_mmap
nshchego Oct 6, 2024
5d2c475
Merge remote-tracking branch 'origin/master' into cpu/compile_mmap
nshchego Oct 7, 2024
d5961c5
Fixes as per comments
nshchego Oct 9, 2024
cc9f589
Merge remote-tracking branch 'origin/master' into cpu/compile_mmap
nshchego Oct 9, 2024
9dad4e8
Merge remote-tracking branch 'origin/master' into cpu/compile_mmap
nshchego Oct 10, 2024
aaff252
Merge remote-tracking branch 'origin/master' into cpu/compile_mmap
nshchego Oct 11, 2024
87ab2de
Merge remote-tracking branch 'origin/master' into cpu/compile_mmap
nshchego Oct 11, 2024
44ce367
Reveret read_model due to frontends conflicts
nshchego Oct 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions src/common/util/include/openvino/util/mmap_object.hpp
Original file line number Diff line number Diff line change
@@ -9,6 +9,7 @@

#pragma once

#include <fstream>
#include <memory>
#include <string>

@@ -50,4 +51,17 @@ std::shared_ptr<ov::MappedMemory> load_mmap_object(const std::wstring& path);

#endif // OPENVINO_ENABLE_UNICODE_PATH_SUPPORT

class MmapStream final : public std::ifstream {
public:
MmapStream(const std::string& path) : std::ifstream(path, std::ios_base::binary) {
m_memory = ov::load_mmap_object(path);
}

#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
MmapStream(const std::wstring& path);
#endif // OPENVINO_ENABLE_UNICODE_PATH_SUPPORT

std::shared_ptr<ov::MappedMemory> m_memory;
};

} // namespace ov
4 changes: 4 additions & 0 deletions src/common/util/src/os/win/win_mmap_object.cpp
Original file line number Diff line number Diff line change
@@ -142,6 +142,10 @@ std::shared_ptr<ov::MappedMemory> load_mmap_object(const std::wstring& path) {
return holder;
}

MmapStream::MmapStream(const std::wstring& path) : std::ifstream(path.data(), std::ios_base::binary) {
m_memory = ov::load_mmap_object(path);
}

#endif

} // namespace ov
44 changes: 35 additions & 9 deletions src/frontends/ir/src/frontend.cpp
Original file line number Diff line number Diff line change
@@ -37,35 +37,50 @@ size_t get_ir_version(const pugi::xml_document& doc) {
return 0;
}

constexpr size_t HEADER_SIZE_LIM = 512lu;

/**
* @brief Extracts IR version from model stream
* @param model Model's stream
* @return IR version, 0 if model does represent IR
*/
size_t get_ir_version(std::istream& model) {
size_t get_ir_version(const char* model, size_t model_size) {
// IR version is a value of root tag attribuite thought not need to parse the whole stream.
std::array<char, 512> header{};
model.seekg(0, model.beg);
model.read(header.data(), header.size());
model.clear();
model.seekg(0, model.beg);

size_t header_size = model_size > HEADER_SIZE_LIM ? HEADER_SIZE_LIM : model_size;
pugi::xml_document doc;

// For dominant number of IRs `load_buffer' in this case returns parsing-error as 512 is not enough for the whole
// root node. Basing on Pugi manual "If parsing failed because the source data was not a valid XML, the resulting
// tree is not destroyed - despite the fact that load function returns error, you can use the part of the tree that
// was successfully parsed." root node is processed because it should be enough to read model version. However if IR
// is small enough to fit 512 bytes ok-status is returned. Thus ignoring returned value.
std::ignore =
doc.load_buffer(header.data(), header.size(), pugi::parse_default | pugi::parse_fragment, pugi::encoding_utf8);
std::ignore = doc.load_buffer(model, header_size, pugi::parse_default | pugi::parse_fragment, pugi::encoding_utf8);

auto ir_version = get_ir_version(doc);

// In case attribute name is very long and placed before version attribute of root node or there is long comment
// node before root node then version attribute of root node is not accesible within first 512 bytes, so read the
// whole stream and try to obtain version value.
if (ir_version == 0) {
if (ir_version == 0lu && header_size < model_size &&
doc.load_buffer(model, model_size, pugi::parse_default | pugi::parse_fragment, pugi::encoding_utf8)) {
ir_version = get_ir_version(doc);
}

return ir_version;
}

size_t get_ir_version(std::istream& model) {
char header[HEADER_SIZE_LIM];

model.seekg(0, model.beg);
model.read(header, HEADER_SIZE_LIM);
model.clear();
model.seekg(0, model.beg);

auto ir_version = get_ir_version(header, HEADER_SIZE_LIM);
if (ir_version == 0lu) {
pugi::xml_document doc;
if (doc.load(model))
ir_version = get_ir_version(doc);

@@ -75,13 +90,15 @@ size_t get_ir_version(std::istream& model) {

return ir_version;
}

} // namespace

bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
// Last boolean flag in `variants` (if presented) is reserved for FE configuration
size_t extra_variants_num = variants.size() > 0 && variants[variants.size() - 1].is<bool>() ? 1 : 0;
std::ifstream local_model_stream;
std::istream* provided_model_stream = nullptr;
std::shared_ptr<AlignedBuffer> model_buffer = nullptr;

if (variants.empty() || variants.size() > 3 + extra_variants_num) {
return false;
@@ -102,6 +119,8 @@ bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
provided_model_stream = model_variant.as<std::istream*>();
} else if (model_variant.is<std::istringstream*>()) {
provided_model_stream = model_variant.as<std::istringstream*>();
} else if (model_variant.is<std::shared_ptr<AlignedBuffer>>()) {
model_buffer = model_variant.as<std::shared_ptr<AlignedBuffer>>();
}

if (provided_model_stream && local_model_stream.is_open()) {
@@ -114,6 +133,8 @@ bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
} else if (local_model_stream.is_open()) {
version = get_ir_version(local_model_stream);
local_model_stream.close();
} else if (model_buffer) {
version = get_ir_version(model_buffer->get_ptr<char>(), model_buffer->size());
} else {
return false;
}
@@ -135,6 +156,7 @@ void FrontEnd::add_extension(const ov::Extension::Ptr& ext) {
InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& variants) const {
std::ifstream local_model_stream;
std::istream* provided_model_stream = nullptr;
std::shared_ptr<ov::AlignedBuffer> model_buf;
std::shared_ptr<ov::AlignedBuffer> weights;

auto create_extensions_map = [&]() -> std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr> {
@@ -153,6 +175,8 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& variants) const
auto input_model = std::make_shared<InputModel>(local_model_stream, weights, create_extensions_map());
local_model_stream.close();
return input_model;
} else if (model_buf) {
return std::make_shared<InputModel>(model_buf, weights, create_extensions_map());
}
return nullptr;
};
@@ -184,6 +208,8 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& variants) const
provided_model_stream = model_variant.as<std::istream*>();
} else if (model_variant.is<std::istringstream*>()) {
provided_model_stream = model_variant.as<std::istringstream*>();
} else if (model_variant.is<std::shared_ptr<AlignedBuffer>>()) {
model_buf = model_variant.as<std::shared_ptr<AlignedBuffer>>();
}

// Check weights and extensions
37 changes: 28 additions & 9 deletions src/frontends/ir/src/input_model.cpp
Original file line number Diff line number Diff line change
@@ -207,28 +207,47 @@ class InputModel::InputModelIRImpl {
pugi::xml_document m_xml_doc;

public:
InputModelIRImpl(std::istream& stream,
InputModelIRImpl(std::istream& model,
const std::shared_ptr<ov::AlignedBuffer>& weights,
const std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr>& extensions)
: m_weights(weights),
m_extensions(extensions) {
pugi::xml_parse_result res = m_xml_doc.load(stream);
if (res.status != pugi::status_ok) {
OPENVINO_THROW(res.description(), " at offset ", res.offset);
}
pugi::xml_parse_result res = m_xml_doc.load(model);
OPENVINO_ASSERT(res.status == pugi::status_ok, res.description(), " at offset ", res.offset);
init_opset();
}

InputModelIRImpl(const std::shared_ptr<ov::AlignedBuffer>& model,
const std::shared_ptr<ov::AlignedBuffer>& weights,
const std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr>& extensions)
: m_weights(weights),
m_extensions(extensions) {
auto res = m_xml_doc.load_buffer(model->get_ptr(), model->size(), pugi::parse_default, pugi::encoding_utf8);
OPENVINO_ASSERT(res.status == pugi::status_ok, res.description(), " at offset ", res.offset);
init_opset();
}

std::shared_ptr<ov::Model> convert();

private:
void init_opset() {
m_root = m_xml_doc.document_element();
for (const auto& it : ov::get_available_opsets()) {
m_opsets[it.first] = it.second();
}
}

std::shared_ptr<ov::Model> convert();
};

InputModel::InputModel(std::istream& stream,
InputModel::InputModel(std::istream& model,
const std::shared_ptr<ov::AlignedBuffer>& weights,
const std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr>& extensions) {
_impl = std::make_shared<InputModelIRImpl>(model, weights, extensions);
}

InputModel::InputModel(const std::shared_ptr<ov::AlignedBuffer>& model,
const std::shared_ptr<ov::AlignedBuffer>& weights,
const std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr>& extensions) {
_impl = std::make_shared<InputModelIRImpl>(stream, weights, extensions);
_impl = std::make_shared<InputModelIRImpl>(model, weights, extensions);
}

std::shared_ptr<ov::Model> InputModel::convert() {
4 changes: 4 additions & 0 deletions src/frontends/ir/src/input_model.hpp
Original file line number Diff line number Diff line change
@@ -24,6 +24,10 @@ class InputModel : public ov::frontend::InputModel {
const std::shared_ptr<ov::AlignedBuffer>& weights,
const std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr>& extensions);

InputModel(const std::shared_ptr<ov::AlignedBuffer>& model_buf,
const std::shared_ptr<ov::AlignedBuffer>& weights,
const std::unordered_map<ov::DiscreteTypeInfo, ov::BaseOpExtension::Ptr>& extensions);

std::shared_ptr<Model> convert();
};

10 changes: 10 additions & 0 deletions src/inference/dev_api/openvino/runtime/icore.hpp
Original file line number Diff line number Diff line change
@@ -11,6 +11,7 @@

#include <memory>

#include "openvino/runtime/aligned_buffer.hpp"
#include "openvino/runtime/icompiled_model.hpp"
#include "openvino/runtime/properties.hpp"
#include "openvino/runtime/so_ptr.hpp"
@@ -45,6 +46,15 @@ class OPENVINO_RUNTIME_API ICore {
const ov::Tensor& weights,
bool frontend_mode = false) const = 0;

/**
* @brief Reads IR xml and bin from buffer
* @param model shared pointer to aligned buffer with IR
* @param weights shared pointer to aligned buffer with weights
* @return shared pointer to ov::Model
*/
virtual std::shared_ptr<ov::Model> read_model(const std::shared_ptr<AlignedBuffer>& model,
const std::shared_ptr<AlignedBuffer>& weights) const = 0;

/**
* @brief Reads IR xml and bin files
* @param model_path path to IR file
Original file line number Diff line number Diff line change
@@ -29,6 +29,13 @@ static constexpr Property<std::vector<PropertyName>, PropertyMutability::RO> sup
*/
static constexpr Property<std::vector<PropertyName>, PropertyMutability::RO> caching_properties{"CACHING_PROPERTIES"};

/**
* @brief Read-only property to get a std::vector<PropertyName> of properties
* which should affect the loading time from cache
* @ingroup ov_dev_api_plugin_api
*/
static constexpr Property<bool, PropertyMutability::RO> caching_with_mmap{"CACHING_WITH_MMAP"};

/**
* @brief Allow to create exclusive_async_requests with one executor
* @ingroup ov_dev_api_plugin_api
18 changes: 12 additions & 6 deletions src/inference/src/cache_manager.hpp
Original file line number Diff line number Diff line change
@@ -15,6 +15,7 @@
#include <string>

#include "openvino/util/file_util.hpp"
#include "openvino/util/mmap_object.hpp"

namespace ov {

@@ -78,7 +79,7 @@ class ICacheManager {
* @param id Id of cache (hash of the model)
* @param reader Lambda function to be called when input stream is created
*/
virtual void read_cache_entry(const std::string& id, StreamReader reader) = 0;
virtual void read_cache_entry(const std::string& id, StreamReader reader, bool mmap = false) = 0;

/**
* @brief Callback when OpenVINO intends to remove cache entry
@@ -129,13 +130,18 @@ class FileStorageCacheManager final : public ICacheManager {
writer(stream);
}

void read_cache_entry(const std::string& id, StreamReader reader) override {
void read_cache_entry(const std::string& id, StreamReader reader, bool mmap = false) override {
// Fix the bug caused by pugixml, which may return unexpected results if the locale is different from "C".
ScopedLocale plocal_C(LC_ALL, "C");
auto blobFileName = getBlobFile(id);
if (ov::util::file_exists(blobFileName)) {
std::ifstream stream(blobFileName, std::ios_base::binary);
reader(stream);
auto blob_file_name = getBlobFile(id);
if (ov::util::file_exists(blob_file_name)) {
if (mmap) {
MmapStream stream(blob_file_name);
reader(stream);
} else {
std::ifstream stream(blob_file_name, std::ios_base::binary);
reader(stream);
}
}
}

Loading