Skip to content

Commit 005152a

Browse files
authored
Enable mmap for reading model from cache (openvinotoolkit#26696)
### Details: - Enable mmap for reading model from cache ### Tickets: - CVS-152423
1 parent 58073ca commit 005152a

File tree

10 files changed

+83
-46
lines changed

10 files changed

+83
-46
lines changed

src/common/util/include/openvino/util/mmap_object.hpp

-14
Original file line numberDiff line numberDiff line change
@@ -50,18 +50,4 @@ std::shared_ptr<ov::MappedMemory> load_mmap_object(const std::string& path);
5050
std::shared_ptr<ov::MappedMemory> load_mmap_object(const std::wstring& path);
5151

5252
#endif // OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
53-
54-
class MmapStream final : public std::ifstream {
55-
public:
56-
MmapStream(const std::string& path) : std::ifstream(path, std::ios_base::binary) {
57-
m_memory = ov::load_mmap_object(path);
58-
}
59-
60-
#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
61-
MmapStream(const std::wstring& path);
62-
#endif // OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
63-
64-
std::shared_ptr<ov::MappedMemory> m_memory;
65-
};
66-
6753
} // namespace ov

src/common/util/src/os/win/win_mmap_object.cpp

-5
Original file line numberDiff line numberDiff line change
@@ -141,11 +141,6 @@ std::shared_ptr<ov::MappedMemory> load_mmap_object(const std::wstring& path) {
141141
holder->set(path);
142142
return holder;
143143
}
144-
145-
MmapStream::MmapStream(const std::wstring& path) : std::ifstream(path.data(), std::ios_base::binary) {
146-
m_memory = ov::load_mmap_object(path);
147-
}
148-
149144
#endif
150145

151146
} // namespace ov

src/core/dev_api/openvino/runtime/shared_buffer.hpp

+57-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
namespace ov {
1010

11-
/// \brief SharedBuffer class to store pointer to pre-allocated buffer.
11+
/// \brief SharedBuffer class to store pointer to pre-allocated buffer. Own the shared object.
1212
template <typename T>
1313
class SharedBuffer : public ov::AlignedBuffer {
1414
public:
@@ -28,4 +28,60 @@ class SharedBuffer : public ov::AlignedBuffer {
2828
T _shared_object;
2929
};
3030

31+
/// \brief SharedStreamBuffer class to store pointer to pre-acclocated buffer and provide streambuf interface.
32+
/// Can return ptr to shared memory and its size
33+
class SharedStreamBuffer : public std::streambuf {
34+
public:
35+
SharedStreamBuffer(char* data, size_t size) : m_data(data), m_size(size), m_offset(0) {}
36+
37+
protected:
38+
// override std::streambuf methods
39+
std::streamsize xsgetn(char* s, std::streamsize count) override {
40+
auto real_count = std::min<std::streamsize>(m_size - m_offset, count);
41+
std::memcpy(s, m_data + m_offset, real_count);
42+
m_offset += real_count;
43+
return real_count;
44+
}
45+
46+
int_type underflow() override {
47+
return (m_size == m_offset) ? traits_type::eof() : traits_type::to_int_type(*(m_data + m_offset));
48+
}
49+
50+
int_type uflow() override {
51+
return (m_size == m_offset) ? traits_type::eof() : traits_type::to_int_type(*(m_data + m_offset++));
52+
}
53+
54+
std::streamsize showmanyc() override {
55+
return m_size - m_offset;
56+
}
57+
58+
pos_type seekoff(off_type off,
59+
std::ios_base::seekdir dir,
60+
std::ios_base::openmode which = std::ios_base::in) override {
61+
if (dir != std::ios_base::cur || which != std::ios_base::in) {
62+
return pos_type(off_type(-1));
63+
}
64+
m_offset += off;
65+
return pos_type(m_offset);
66+
}
67+
68+
char* m_data;
69+
size_t m_size;
70+
size_t m_offset;
71+
};
72+
73+
/// \brief OwningSharedStreamBuffer is a SharedStreamBuffer which owns its shared object.
74+
class OwningSharedStreamBuffer : public SharedStreamBuffer {
75+
public:
76+
OwningSharedStreamBuffer(std::shared_ptr<ov::AlignedBuffer> buffer)
77+
: SharedStreamBuffer(static_cast<char*>(buffer->get_ptr()), buffer->size()),
78+
m_shared_obj(buffer) {}
79+
80+
std::shared_ptr<ov::AlignedBuffer> get_buffer() {
81+
return m_shared_obj;
82+
}
83+
84+
protected:
85+
std::shared_ptr<ov::AlignedBuffer> m_shared_obj;
86+
};
3187
} // namespace ov

src/inference/src/cache_manager.hpp

+10-4
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <memory>
1515
#include <string>
1616

17+
#include "openvino/runtime/shared_buffer.hpp"
1718
#include "openvino/util/file_util.hpp"
1819
#include "openvino/util/mmap_object.hpp"
1920

@@ -77,9 +78,10 @@ class ICacheManager {
7778
* Otherwise, model will not be read from cache and will be loaded as usual
7879
*
7980
* @param id Id of cache (hash of the model)
81+
* @param enable_mmap use mmap or ifstream to read model file
8082
* @param reader Lambda function to be called when input stream is created
8183
*/
82-
virtual void read_cache_entry(const std::string& id, StreamReader reader, bool mmap = false) = 0;
84+
virtual void read_cache_entry(const std::string& id, bool enable_mmap, StreamReader reader) = 0;
8385

8486
/**
8587
* @brief Callback when OpenVINO intends to remove cache entry
@@ -130,13 +132,17 @@ class FileStorageCacheManager final : public ICacheManager {
130132
writer(stream);
131133
}
132134

133-
void read_cache_entry(const std::string& id, StreamReader reader, bool mmap = false) override {
135+
void read_cache_entry(const std::string& id, bool enable_mmap, StreamReader reader) override {
134136
// Fix the bug caused by pugixml, which may return unexpected results if the locale is different from "C".
135137
ScopedLocale plocal_C(LC_ALL, "C");
136138
auto blob_file_name = getBlobFile(id);
137139
if (ov::util::file_exists(blob_file_name)) {
138-
if (mmap) {
139-
MmapStream stream(blob_file_name);
140+
if (enable_mmap) {
141+
auto mmap = ov::load_mmap_object(blob_file_name);
142+
auto shared_buffer =
143+
std::make_shared<ov::SharedBuffer<std::shared_ptr<MappedMemory>>>(mmap->data(), mmap->size(), mmap);
144+
OwningSharedStreamBuffer buf(shared_buffer);
145+
std::istream stream(&buf);
140146
reader(stream);
141147
} else {
142148
std::ifstream stream(blob_file_name, std::ios_base::binary);

src/inference/src/dev/core_impl.cpp

+4-10
Original file line numberDiff line numberDiff line change
@@ -1397,19 +1397,12 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model_and_cache(ov::Plugin&
13971397
return compiled_model;
13981398
}
13991399

1400-
static bool does_plugin_support_model_caching_with_mmap(const ov::Plugin& plugin) {
1401-
bool supported = plugin.supports_model_caching();
1402-
supported &=
1403-
ov::util::contains(plugin.get_property(ov::internal::supported_properties), ov::internal::caching_with_mmap);
1404-
return supported;
1405-
}
1406-
14071400
ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
14081401
const CacheContent& cacheContent,
14091402
ov::Plugin& plugin,
14101403
const ov::AnyMap& config,
14111404
const ov::SoPtr<ov::IRemoteContext>& context,
1412-
std::function<ov::SoPtr<ov::ICompiledModel>()> compile_model_lambda) {
1405+
std::function<ov::SoPtr<ov::ICompiledModel>()> compile_model_lambda) const {
14131406
ov::SoPtr<ov::ICompiledModel> compiled_model;
14141407
struct HeaderException {};
14151408

@@ -1418,6 +1411,8 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
14181411
try {
14191412
cacheContent.cacheManager->read_cache_entry(
14201413
cacheContent.blobId,
1414+
coreConfig.get_enable_mmap() && ov::util::contains(plugin.get_property(ov::internal::supported_properties),
1415+
ov::internal::caching_with_mmap),
14211416
[&](std::istream& networkStream) {
14221417
OV_ITT_SCOPE(FIRST_INFERENCE,
14231418
ov::itt::domains::LoadTime,
@@ -1454,8 +1449,7 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
14541449
update_config[ov::loaded_from_cache.name()] = true;
14551450
compiled_model = context ? plugin.import_model(networkStream, context, update_config)
14561451
: plugin.import_model(networkStream, update_config);
1457-
},
1458-
does_plugin_support_model_caching_with_mmap(plugin));
1452+
});
14591453
} catch (const HeaderException&) {
14601454
// For these exceptions just remove old cache and set that import didn't work
14611455
cacheContent.cacheManager->remove_cache_entry(cacheContent.blobId);

src/inference/src/dev/core_impl.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -149,12 +149,12 @@ class CoreImpl : public ov::ICore, public std::enable_shared_from_this<ov::ICore
149149
const ov::SoPtr<ov::IRemoteContext>& context,
150150
const CacheContent& cacheContent) const;
151151

152-
static ov::SoPtr<ov::ICompiledModel> load_model_from_cache(
152+
ov::SoPtr<ov::ICompiledModel> load_model_from_cache(
153153
const CacheContent& cacheContent,
154154
ov::Plugin& plugin,
155155
const ov::AnyMap& config,
156156
const ov::SoPtr<ov::IRemoteContext>& context,
157-
std::function<ov::SoPtr<ov::ICompiledModel>()> compile_model_lambda);
157+
std::function<ov::SoPtr<ov::ICompiledModel>()> compile_model_lambda) const;
158158

159159
bool device_supports_model_caching(const ov::Plugin& plugin) const;
160160

src/plugins/intel_cpu/src/plugin.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,7 @@ ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& optio
446446
} else if (ov::internal::supported_properties == name) {
447447
return decltype(ov::internal::supported_properties)::value_type{
448448
ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO},
449-
#if !defined(OPENVINO_ARCH_ARM)
449+
#if !defined(OPENVINO_ARCH_ARM) && !(defined(__APPLE__) || defined(__MACOSX))
450450
ov::PropertyName{ov::internal::caching_with_mmap.name(), ov::PropertyMutability::RO},
451451
#endif
452452
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW},

src/plugins/intel_cpu/src/utils/serialize.cpp

+6-7
Original file line numberDiff line numberDiff line change
@@ -58,19 +58,20 @@ void ModelDeserializer::set_info(pugi::xml_node& root, std::shared_ptr<ov::Model
5858
}
5959

6060
void ModelDeserializer::operator>>(std::shared_ptr<ov::Model>& model) {
61-
if (auto mmap_stream = dynamic_cast<MmapStream*>(&m_istream)) {
62-
process_mmap(model, mmap_stream->m_memory);
61+
if (auto mmap_buffer = dynamic_cast<OwningSharedStreamBuffer*>(m_istream.rdbuf())) {
62+
auto buffer = mmap_buffer->get_buffer();
63+
process_mmap(model, buffer);
6364
} else {
6465
process_stream(model);
6566
}
6667
}
6768

6869
void ModelDeserializer::process_mmap(std::shared_ptr<ov::Model>& model,
69-
const std::shared_ptr<ov::MappedMemory>& mmemory) {
70+
const std::shared_ptr<ov::AlignedBuffer>& mmemory) {
7071
// Note: Don't use seekg with mmaped stream. This may affect the performance of some models.
7172
// Get file size before seek content.
7273
// Blob from cache may have other header, so need to skip this.
73-
auto buffer_base = mmemory->data();
74+
auto buffer_base = reinterpret_cast<char*>(mmemory->get_ptr());
7475
const auto file_size = mmemory->size();
7576
const size_t hdr_pos = m_istream.tellg();
7677

@@ -98,9 +99,7 @@ void ModelDeserializer::process_mmap(std::shared_ptr<ov::Model>& model,
9899
// Map blob content
99100
std::shared_ptr<ov::AlignedBuffer> weights_buf;
100101
if (hdr.consts_size) {
101-
weights_buf = std::make_shared<ov::SharedBuffer<std::shared_ptr<MappedMemory>>>(buffer_base + hdr.consts_offset,
102-
hdr.consts_size,
103-
mmemory);
102+
weights_buf = std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::AlignedBuffer>>>(buffer_base + hdr.consts_offset, hdr.consts_size, mmemory);
104103
}
105104

106105
// XML content

src/plugins/intel_cpu/src/utils/serialize.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class ModelDeserializer {
4040
protected:
4141
static void set_info(pugi::xml_node& root, std::shared_ptr<ov::Model>& model);
4242

43-
void process_mmap(std::shared_ptr<ov::Model>& model, const std::shared_ptr<ov::MappedMemory>& memory);
43+
void process_mmap(std::shared_ptr<ov::Model>& model, const std::shared_ptr<ov::AlignedBuffer>& memory);
4444

4545
void process_stream(std::shared_ptr<ov::Model>& model);
4646

src/plugins/intel_gpu/src/plugin/plugin.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -575,7 +575,8 @@ std::vector<ov::PropertyName> Plugin::get_supported_internal_properties() const
575575
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW},
576576
ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO},
577577
ov::PropertyName{ov::internal::compiled_model_runtime_properties_supported.name(), ov::PropertyMutability::RO},
578-
ov::PropertyName{ov::internal::query_model_ratio.name(), PropertyMutability::RW}};
578+
ov::PropertyName{ov::internal::query_model_ratio.name(), PropertyMutability::RW},
579+
ov::PropertyName{ov::internal::caching_with_mmap.name(), PropertyMutability::RO}};
579580
return supported_internal_properties;
580581
}
581582

0 commit comments

Comments
 (0)