Skip to content

Commit 094f1bc

Browse files
authored
[intel_npu] [DUPLICATE] [master] [reduce memory consumption] Avoid creating a blob copy while exporting a compiled model (#26783)
### Details: - *Duplicates PR #26754 - *Add support for new L0 API 1.7* - *Change return type of `getCompiledNetwork` to new custom `CompiledNetwork` container* ### Tickets: - *[151912](https://jira.devtools.intel.com/browse/CVS-151912)*
1 parent 81bd537 commit 094f1bc

File tree

8 files changed

+150
-51
lines changed

8 files changed

+150
-51
lines changed

src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp

+30-2
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,32 @@ struct NetworkDescription final {
151151
NetworkMetadata metadata;
152152
};
153153

154+
/**
155+
* @struct CompiledNetwork
156+
* @brief Custom container for compiled network, used for export
157+
* @var CompiledNetwork::data
158+
* Pointer to the address of compiled network
159+
* @var CompiledNetwork:size
160+
* Size of the compiled network
161+
* @var CompiledNetwork::ownedStorage
162+
* Plugin owned compiled network storage that is required in case of a driver that
163+
* doesn't support graph extension 1.7, as in this case plugin must create a copy of the compiled network.
164+
* @note It's unsafe to store either data or size outside of the compiled network object as its destructor
165+
* would release the owning container
166+
*/
167+
168+
struct CompiledNetwork {
169+
const uint8_t* data;
170+
size_t size;
171+
CompiledNetwork(const uint8_t* data, size_t size, std::vector<uint8_t> storage)
172+
: data(data),
173+
size(size),
174+
ownedStorage(std::move(storage)) {}
175+
176+
private:
177+
std::vector<uint8_t> ownedStorage;
178+
};
179+
154180
/**
155181
* @interface ICompiler
156182
* @brief An interface to be implemented by a concrete compiler to provide
@@ -203,8 +229,10 @@ class ICompiler : public std::enable_shared_from_this<ICompiler> {
203229
// Driver compiler can use this to release graphHandle, if we do not have executor
204230
virtual void release([[maybe_unused]] std::shared_ptr<const NetworkDescription> networkDescription){};
205231

206-
virtual std::vector<uint8_t> getCompiledNetwork(std::shared_ptr<const NetworkDescription> networkDescription) {
207-
return networkDescription->compiledNetwork;
232+
virtual CompiledNetwork getCompiledNetwork(const NetworkDescription& networkDescription) {
233+
return CompiledNetwork(networkDescription.compiledNetwork.data(),
234+
networkDescription.compiledNetwork.size(),
235+
networkDescription.compiledNetwork);
208236
}
209237

210238
protected:

src/plugins/intel_npu/src/backend/include/zero_types.hpp

+7-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
/**
1717
* @brief Last version of Table of Graph Extension functions used within plugin
1818
*/
19-
using ze_graph_dditable_ext_last_t = ze_graph_dditable_ext_1_6_t;
19+
using ze_graph_dditable_ext_last_t = ze_graph_dditable_ext_1_7_t;
2020
/**
2121
* @brief Last version of the Command Queue functions used within plugin
2222
*/
@@ -155,6 +155,12 @@ struct ze_graph_dditable_ext_decorator final {
155155
throwWhenUnsupported("pfnDeviceGetGraphProperties2", ZE_GRAPH_EXT_VERSION_1_6);
156156
return _impl->pfnDeviceGetGraphProperties2(hDevice, pDeviceGraphProperties);
157157
}
158+
159+
// version 1.7
160+
ze_result_t ZE_APICALL pfnGetNativeBinary2(ze_graph_handle_t hGraph, size_t* pSize, uint8_t** pGraphNativeBinary) {
161+
throwWhenUnsupported("pfnGetNativeBinary2", ZE_GRAPH_EXT_VERSION_1_7);
162+
return _impl->pfnGetNativeBinary2(hGraph, pSize, pGraphNativeBinary);
163+
}
158164
};
159165

160166
/**

src/plugins/intel_npu/src/compiler/include/driver_compiler_adapter.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class LevelZeroCompilerAdapter final : public ICompiler {
3636

3737
void release(std::shared_ptr<const NetworkDescription> networkDescription) override;
3838

39-
std::vector<uint8_t> getCompiledNetwork(std::shared_ptr<const NetworkDescription> networkDescription) override;
39+
CompiledNetwork getCompiledNetwork(const NetworkDescription& networkDescription) override;
4040

4141
private:
4242
/**

src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp

+20-1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@ using SerializedIR = std::pair<size_t, std::shared_ptr<uint8_t>>;
4343
(std::is_same<T, ze_graph_dditable_ext_1_2_t>::value || std::is_same<T, ze_graph_dditable_ext_1_3_t>::value || \
4444
std::is_same<T, ze_graph_dditable_ext_1_4_t>::value || std::is_same<T, ze_graph_dditable_ext_1_5_t>::value)
4545

46+
#define UseCopyForNativeBinary(T) \
47+
(std::is_same<T, ze_graph_dditable_ext_1_2_t>::value || std::is_same<T, ze_graph_dditable_ext_1_3_t>::value || \
48+
std::is_same<T, ze_graph_dditable_ext_1_4_t>::value || std::is_same<T, ze_graph_dditable_ext_1_5_t>::value || \
49+
std::is_same<T, ze_graph_dditable_ext_1_6_t>::value)
50+
4651
/**
4752
* Adapter to use CiD through ZeroAPI
4853
*/
@@ -100,7 +105,7 @@ class LevelZeroCompilerInDriver final : public ICompiler {
100105

101106
void release(std::shared_ptr<const NetworkDescription> networkDescription) override;
102107

103-
std::vector<uint8_t> getCompiledNetwork(std::shared_ptr<const NetworkDescription> networkDescription) override;
108+
CompiledNetwork getCompiledNetwork(const NetworkDescription& networkDescription) override;
104109

105110
private:
106111
NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const;
@@ -123,6 +128,20 @@ class LevelZeroCompilerInDriver final : public ICompiler {
123128
std::vector<IODescriptor>& inputs,
124129
std::vector<IODescriptor>& outputs) const;
125130

131+
template <typename T = TableExtension, typename std::enable_if_t<UseCopyForNativeBinary(T), bool> = true>
132+
void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
133+
ze_graph_handle_t graphHandle,
134+
std::vector<uint8_t>& blob,
135+
uint8_t*& blobPtr,
136+
size_t& blobSize) const;
137+
138+
template <typename T = TableExtension, typename std::enable_if_t<!UseCopyForNativeBinary(T), bool> = true>
139+
void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
140+
ze_graph_handle_t graphHandle,
141+
std::vector<uint8_t>& /* unusedBlob */,
142+
uint8_t*& blobPtr,
143+
size_t& blobSize) const;
144+
126145
template <typename T = TableExtension, typename std::enable_if_t<SupportAPIGraphQueryNetworkV2(T), bool> = true>
127146
ze_result_t seriazlideIRModelAndQueryNetworkCreateV2(const std::shared_ptr<const ov::Model>& model,
128147
const Config& config,

src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp

+8-3
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,12 @@ LevelZeroCompilerAdapter::LevelZeroCompilerAdapter(std::shared_ptr<IEngineBacken
6464
zeContext,
6565
graph_ddi_table_ext);
6666
break;
67+
case ZE_GRAPH_EXT_VERSION_1_7:
68+
apiAdapter = std::make_shared<LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_7_t>>(driverHandle,
69+
deviceHandle,
70+
zeContext,
71+
graph_ddi_table_ext);
72+
break;
6773
default:
6874
apiAdapter = std::make_shared<LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_2_t>>(driverHandle,
6975
deviceHandle,
@@ -109,10 +115,9 @@ void LevelZeroCompilerAdapter::release(std::shared_ptr<const NetworkDescription>
109115
apiAdapter->release(std::move(networkDescription));
110116
}
111117

112-
std::vector<uint8_t> LevelZeroCompilerAdapter::getCompiledNetwork(
113-
std::shared_ptr<const NetworkDescription> networkDescription) {
118+
CompiledNetwork LevelZeroCompilerAdapter::getCompiledNetwork(const NetworkDescription& networkDescription) {
114119
_logger.info("getCompiledNetwork - using adapter to perform getCompiledNetwork(networkDescription)");
115-
return apiAdapter->getCompiledNetwork(std::move(networkDescription));
120+
return apiAdapter->getCompiledNetwork(networkDescription);
116121
}
117122

118123
} // namespace driverCompilerAdapter

src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp

+72-34
Original file line numberDiff line numberDiff line change
@@ -363,46 +363,83 @@ void LevelZeroCompilerInDriver<TableExtension>::release(std::shared_ptr<const Ne
363363
}
364364

365365
template <typename TableExtension>
366-
std::vector<uint8_t> LevelZeroCompilerInDriver<TableExtension>::getCompiledNetwork(
367-
std::shared_ptr<const NetworkDescription> networkDescription) {
368-
if (networkDescription->metadata.graphHandle != nullptr && networkDescription->compiledNetwork.size() == 0) {
366+
template <typename T, std::enable_if_t<UseCopyForNativeBinary(T), bool>>
367+
void LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
368+
ze_graph_handle_t graphHandle,
369+
std::vector<uint8_t>& blob,
370+
uint8_t*& blobPtr,
371+
size_t& blobSize) const {
372+
// Get blob size first
373+
auto result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, &blobSize, nullptr);
374+
blob.resize(blobSize);
375+
376+
OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS,
377+
"Failed to compile network. L0 pfnGetNativeBinary get blob size",
378+
" result: ",
379+
ze_result_to_string(result),
380+
", code 0x",
381+
std::hex,
382+
uint64_t(result),
383+
". ",
384+
getLatestBuildError());
385+
386+
// Get blob data
387+
result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, &blobSize, blob.data());
388+
389+
OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS,
390+
"Failed to compile network. L0 pfnGetNativeBinary get blob data",
391+
" result: ",
392+
ze_result_to_string(result),
393+
", code 0x",
394+
std::hex,
395+
uint64_t(result),
396+
". ",
397+
getLatestBuildError());
398+
399+
blobPtr = blob.data();
400+
}
401+
402+
template <typename TableExtension>
403+
template <typename T, std::enable_if_t<!UseCopyForNativeBinary(T), bool>>
404+
void LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
405+
ze_graph_handle_t graphHandle,
406+
std::vector<uint8_t>& /* unusedBlob */,
407+
uint8_t*& blobPtr,
408+
size_t& blobSize) const {
409+
// Get blob ptr and size
410+
auto result = _graphDdiTableExt.pfnGetNativeBinary2(graphHandle, &blobSize, &blobPtr);
411+
412+
OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS,
413+
"Failed to compile network. L0 pfnGetNativeBinary get blob size",
414+
" result: ",
415+
ze_result_to_string(result),
416+
", code 0x",
417+
std::hex,
418+
uint64_t(result),
419+
". ",
420+
getLatestBuildError());
421+
}
422+
423+
template <typename TableExtension>
424+
CompiledNetwork LevelZeroCompilerInDriver<TableExtension>::getCompiledNetwork(
425+
const NetworkDescription& networkDescription) {
426+
if (networkDescription.metadata.graphHandle != nullptr && networkDescription.compiledNetwork.size() == 0) {
369427
_logger.info("LevelZeroCompilerInDriver getCompiledNetwork get blob from graphHandle");
370-
ze_graph_handle_t graphHandle = static_cast<ze_graph_handle_t>(networkDescription->metadata.graphHandle);
428+
ze_graph_handle_t graphHandle = static_cast<ze_graph_handle_t>(networkDescription.metadata.graphHandle);
371429

372-
// Get blob size first
430+
uint8_t* blobPtr = nullptr;
373431
size_t blobSize = -1;
432+
std::vector<uint8_t> blob;
433+
434+
getNativeBinary(_graphDdiTableExt, graphHandle, blob, blobPtr, blobSize);
374435

375-
auto result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, &blobSize, nullptr);
376-
377-
OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS,
378-
"Failed to compile network. L0 pfnGetNativeBinary get blob size",
379-
" result: ",
380-
ze_result_to_string(result),
381-
", code 0x",
382-
std::hex,
383-
uint64_t(result),
384-
". ",
385-
getLatestBuildError());
386-
387-
std::vector<uint8_t> blob(blobSize);
388-
// Get blob data
389-
result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, &blobSize, blob.data());
390-
391-
OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS,
392-
"Failed to compile network. L0 pfnGetNativeBinary get blob data",
393-
" result: ",
394-
ze_result_to_string(result),
395-
", code 0x",
396-
std::hex,
397-
uint64_t(result),
398-
". ",
399-
getLatestBuildError());
400436
_logger.info("LevelZeroCompilerInDriver getCompiledNetwork returning blob");
401-
return blob;
402-
} else {
403-
_logger.info("return the blob from network description");
404-
return networkDescription->compiledNetwork;
437+
return CompiledNetwork(blobPtr, blobSize, std::move(blob));
405438
}
439+
_logger.info("return the blob from network description");
440+
return CompiledNetwork(networkDescription.compiledNetwork.data(),
441+
networkDescription.compiledNetwork.size(),
442+
networkDescription.compiledNetwork);
406443
}
407444

408445
template <typename TableExtension>
@@ -1201,6 +1238,7 @@ template class LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_3_t>;
12011238
template class LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_4_t>;
12021239
template class LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_5_t>;
12031240
template class LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_6_t>;
1241+
template class LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_7_t>;
12041242

12051243
} // namespace driverCompilerAdapter
12061244
} // namespace intel_npu

src/plugins/intel_npu/src/plugin/src/compiled_model.cpp

+11-8
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,11 @@ constexpr std::string_view NO_EXECUTOR_FOR_INFERENCE =
2727
"Can't create infer request!\n"
2828
"Please make sure that the device is available. Only exports can be made.";
2929

30-
std::uint32_t hash(const std::vector<uint8_t>& data) {
30+
std::uint32_t hash(const intel_npu::CompiledNetwork& blob) {
3131
std::uint32_t result = 1171117u;
32-
for (const auto& c : data)
33-
result = ((result << 7) + result) + static_cast<uint32_t>(c);
32+
for (const uint8_t* it = blob.data; it != blob.data + blob.size; ++it) {
33+
result = ((result << 7) + result) + static_cast<uint32_t>(*it);
34+
}
3435
return result;
3536
}
3637

@@ -139,15 +140,17 @@ std::shared_ptr<ov::ISyncInferRequest> CompiledModel::create_sync_infer_request(
139140

140141
void CompiledModel::export_model(std::ostream& stream) const {
141142
_logger.debug("CompiledModel::export_model");
142-
const auto&& blob = _compiler->getCompiledNetwork(_networkPtr);
143-
stream.write(reinterpret_cast<const char*>(blob.data()), blob.size());
144-
std::stringstream str;
145-
str << "Blob size: " << blob.size() << ", hash: " << std::hex << hash(blob);
146-
_logger.info(str.str().c_str());
143+
const auto blob = _compiler->getCompiledNetwork(*_networkPtr);
144+
stream.write(reinterpret_cast<const char*>(blob.data), blob.size);
147145

148146
if (!stream) {
149147
_logger.error("Write blob to stream failed. Blob is broken!");
150148
} else {
149+
if (_logger.level() >= ov::log::Level::INFO) {
150+
std::stringstream str;
151+
str << "Blob size: " << blob.size << ", hash: " << std::hex << hash(blob);
152+
_logger.info(str.str().c_str());
153+
}
151154
_logger.info("Write blob to stream successfully.");
152155
}
153156
}

0 commit comments

Comments
 (0)