Skip to content

Commit 0ebff04

Browse files
authored
[NPU] Use different graph init (#26903)
### Details: - *Use different graph init in case graph file schema or elf is used* ### Tickets: - *CVS-154233*
1 parent 4a30cb8 commit 0ebff04

9 files changed

+93
-37
lines changed

src/plugins/intel_npu/src/backend/include/zero_executor.hpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ class ZeroExecutor final : public IExecutor {
6161
}
6262

6363
private:
64+
void initialize_graph_through_command_list() const;
65+
6466
const Config _config;
6567
Logger _logger;
6668

@@ -72,7 +74,6 @@ class ZeroExecutor final : public IExecutor {
7274
const uint32_t _group_ordinal;
7375

7476
ze_graph_handle_t _graph = nullptr;
75-
ze_graph_properties_t _props{};
7677

7778
std::vector<ArgumentDescriptor> _input_descriptors;
7879
std::vector<ArgumentDescriptor> _output_descriptors;

src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class ZeroRemoteTensor : public RemoteTensor {
4545
void* _mem = nullptr;
4646
void* _data = nullptr;
4747

48-
bool _external_memory_support = true;
48+
bool _external_memory_support = false;
4949
};
5050

5151
} // namespace intel_npu

src/plugins/intel_npu/src/backend/include/zero_types.hpp

+15-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
/**
1717
* @brief Last version of Table of Graph Extension functions used within plugin
1818
*/
19-
using ze_graph_dditable_ext_last_t = ze_graph_dditable_ext_1_7_t;
19+
using ze_graph_dditable_ext_last_t = ze_graph_dditable_ext_1_8_t;
2020
/**
2121
* @brief Last version of the Command Queue functions used within plugin
2222
*/
@@ -157,10 +157,23 @@ struct ze_graph_dditable_ext_decorator final {
157157
}
158158

159159
// version 1.7
160-
ze_result_t ZE_APICALL pfnGetNativeBinary2(ze_graph_handle_t hGraph, size_t* pSize, uint8_t** pGraphNativeBinary) {
160+
ze_result_t ZE_APICALL pfnGetNativeBinary2(ze_graph_handle_t hGraph,
161+
size_t* pSize,
162+
const uint8_t** pGraphNativeBinary) {
161163
throwWhenUnsupported("pfnGetNativeBinary2", ZE_GRAPH_EXT_VERSION_1_7);
162164
return _impl->pfnGetNativeBinary2(hGraph, pSize, pGraphNativeBinary);
163165
}
166+
167+
// version 1.8
168+
ze_result_t ZE_APICALL pfnGetProperties2(ze_graph_handle_t hGraph, ze_graph_properties_2_t* pGraphProperties) {
169+
throwWhenUnsupported("ze_pfnGraphGetProperties_ext_2_t", ZE_GRAPH_EXT_VERSION_1_8);
170+
return _impl->pfnGetProperties2(hGraph, pGraphProperties);
171+
}
172+
173+
ze_result_t ZE_APICALL pfnGraphInitialize(ze_graph_handle_t hGraph) {
174+
throwWhenUnsupported("ze_pfnGraphGetProperties_ext_2_t", ZE_GRAPH_EXT_VERSION_1_8);
175+
return _impl->pfnGraphInitialize(hGraph);
176+
}
164177
};
165178

166179
/**

src/plugins/intel_npu/src/backend/src/zero_executor.cpp

+52-24
Original file line numberDiff line numberDiff line change
@@ -37,23 +37,6 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr<const ZeroInitStructsHolder>& i
3737
_initStructs->getCommandQueueDdiTable(),
3838
_config,
3939
group_ordinal)} {
40-
_logger.debug("ZeroExecutor::ZeroExecutor init start - create graph_command_list");
41-
OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Executor::ZeroExecutor");
42-
CommandList graph_command_list(_initStructs->getDevice(),
43-
_initStructs->getContext(),
44-
_graph_ddi_table_ext,
45-
_config,
46-
_group_ordinal);
47-
_logger.debug("ZeroExecutor::ZeroExecutor - create graph_command_queue");
48-
CommandQueue graph_command_queue(_initStructs->getDevice(),
49-
_initStructs->getContext(),
50-
ZE_COMMAND_QUEUE_PRIORITY_NORMAL,
51-
_initStructs->getCommandQueueDdiTable(),
52-
_config,
53-
_group_ordinal);
54-
_logger.debug("ZeroExecutor::ZeroExecutor - create fence");
55-
Fence fence(graph_command_queue, _config);
56-
5740
_logger.debug("ZeroExecutor::ZeroExecutor - create graph");
5841
OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_GRAPH, itt::domains::LevelZeroBackend, "Executor::ZeroExecutor", "graphCreate");
5942

@@ -79,7 +62,10 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr<const ZeroInitStructsHolder>& i
7962

8063
OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGetProperties");
8164
_logger.debug("performing pfnGetProperties");
82-
zeroUtils::throwOnFail("pfnGetProperties", _graph_ddi_table_ext.pfnGetProperties(_graph, &_props));
65+
ze_graph_properties_t props{};
66+
props.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES;
67+
68+
zeroUtils::throwOnFail("pfnGetProperties", _graph_ddi_table_ext.pfnGetProperties(_graph, &props));
8369
auto targetDriverExtVersion = _graph_ddi_table_ext.version();
8470
if (targetDriverExtVersion <= ZE_GRAPH_EXT_VERSION_1_1) {
8571
OPENVINO_THROW("Incompatibility between the NPU plugin and driver! The driver version is too old, please "
@@ -88,8 +74,9 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr<const ZeroInitStructsHolder>& i
8874

8975
OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGetArgumentProperties3");
9076
_logger.debug("performing pfnGetArgumentProperties3");
91-
for (uint32_t index = 0; index < _props.numGraphArgs; ++index) {
92-
ze_graph_argument_properties_3_t arg3;
77+
for (uint32_t index = 0; index < props.numGraphArgs; ++index) {
78+
ze_graph_argument_properties_3_t arg3{};
79+
arg3.stype = ZE_STRUCTURE_TYPE_GRAPH_ARGUMENT_PROPERTIES;
9380
zeroUtils::throwOnFail("pfnGetArgumentProperties3",
9481
_graph_ddi_table_ext.pfnGetArgumentProperties3(_graph, index, &arg3));
9582

@@ -100,6 +87,51 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr<const ZeroInitStructsHolder>& i
10087
}
10188
}
10289

90+
if (_graph_ddi_table_ext.version() < ZE_GRAPH_EXT_VERSION_1_8) {
91+
initialize_graph_through_command_list();
92+
} else {
93+
ze_graph_properties_2_t properties = {};
94+
properties.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES;
95+
_graph_ddi_table_ext.pfnGetProperties2(_graph, &properties);
96+
97+
if (properties.initStageRequired & ZE_GRAPH_STAGE_INITIALIZE) {
98+
OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGraphInitialize");
99+
_graph_ddi_table_ext.pfnGraphInitialize(_graph);
100+
}
101+
102+
if (properties.initStageRequired & ZE_GRAPH_STAGE_COMMAND_LIST_INITIALIZE) {
103+
initialize_graph_through_command_list();
104+
}
105+
}
106+
107+
if (config.has<WORKLOAD_TYPE>()) {
108+
setWorkloadType(config.get<WORKLOAD_TYPE>());
109+
}
110+
}
111+
112+
void ZeroExecutor::initialize_graph_through_command_list() const {
113+
OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_GRAPH,
114+
itt::domains::LevelZeroBackend,
115+
"Executor::ZeroExecutor",
116+
"initialize_graph_through_command_list");
117+
118+
_logger.debug("ZeroExecutor::ZeroExecutor init start - create graph_command_list");
119+
OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Executor::ZeroExecutor");
120+
CommandList graph_command_list(_initStructs->getDevice(),
121+
_initStructs->getContext(),
122+
_graph_ddi_table_ext,
123+
_config,
124+
_group_ordinal);
125+
_logger.debug("ZeroExecutor::ZeroExecutor - create graph_command_queue");
126+
CommandQueue graph_command_queue(_initStructs->getDevice(),
127+
_initStructs->getContext(),
128+
ZE_COMMAND_QUEUE_PRIORITY_NORMAL,
129+
_initStructs->getCommandQueueDdiTable(),
130+
_config,
131+
_group_ordinal);
132+
_logger.debug("ZeroExecutor::ZeroExecutor - create fence");
133+
Fence fence(graph_command_queue, _config);
134+
103135
OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "appendGraphInitialize");
104136
_logger.debug("ZeroExecutor::ZeroExecutor - performing appendGraphInitialize");
105137
graph_command_list.appendGraphInitialize(_graph);
@@ -112,10 +144,6 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr<const ZeroInitStructsHolder>& i
112144
_logger.debug("ZeroExecutor::ZeroExecutor - performing hostSynchronize");
113145
fence.hostSynchronize();
114146
_logger.debug("ZeroExecutor::ZeroExecutor - hostSynchronize completed");
115-
116-
if (config.has<WORKLOAD_TYPE>()) {
117-
setWorkloadType(config.get<WORKLOAD_TYPE>());
118-
}
119147
}
120148

121149
void ZeroExecutor::setWorkloadType(const ov::WorkloadType workloadType) const {

src/plugins/intel_npu/src/backend/src/zero_remote_tensor.cpp

+10-3
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,16 @@ ZeroRemoteTensor::ZeroRemoteTensor(std::shared_ptr<ov::IRemoteContext> context,
4444
ze_device_external_memory_properties_t desc = {};
4545
desc.stype = ZE_STRUCTURE_TYPE_DEVICE_EXTERNAL_MEMORY_PROPERTIES;
4646
auto res = zeDeviceGetExternalMemoryProperties(_init_structs->getDevice(), &desc);
47-
if (res != ZE_RESULT_SUCCESS || (desc.memoryAllocationImportTypes != ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF &&
48-
desc.memoryAllocationImportTypes != ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32)) {
49-
_external_memory_support = false;
47+
if (res == ZE_RESULT_SUCCESS) {
48+
#ifdef _WIN32
49+
if (desc.memoryAllocationImportTypes & ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32) {
50+
_external_memory_support = true;
51+
}
52+
#else
53+
if (desc.memoryAllocationImportTypes & ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF) {
54+
_external_memory_support = true;
55+
}
56+
#endif
5057
}
5158

5259
allocate(byte_size);

src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -132,14 +132,14 @@ class LevelZeroCompilerInDriver final : public ICompiler {
132132
void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
133133
ze_graph_handle_t graphHandle,
134134
std::vector<uint8_t>& blob,
135-
uint8_t*& blobPtr,
135+
const uint8_t*& blobPtr,
136136
size_t& blobSize) const;
137137

138138
template <typename T = TableExtension, typename std::enable_if_t<!UseCopyForNativeBinary(T), bool> = true>
139139
void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
140140
ze_graph_handle_t graphHandle,
141141
std::vector<uint8_t>& /* unusedBlob */,
142-
uint8_t*& blobPtr,
142+
const uint8_t*& blobPtr,
143143
size_t& blobSize) const;
144144

145145
template <typename T = TableExtension, typename std::enable_if_t<SupportAPIGraphQueryNetworkV2(T), bool> = true>

src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,12 @@ LevelZeroCompilerAdapter::LevelZeroCompilerAdapter(std::shared_ptr<IEngineBacken
7070
zeContext,
7171
graph_ddi_table_ext);
7272
break;
73+
case ZE_GRAPH_EXT_VERSION_1_8:
74+
apiAdapter = std::make_shared<LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_8_t>>(driverHandle,
75+
deviceHandle,
76+
zeContext,
77+
graph_ddi_table_ext);
78+
break;
7379
default:
7480
apiAdapter = std::make_shared<LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_2_t>>(driverHandle,
7581
deviceHandle,

src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp

+4-3
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ template <typename T, std::enable_if_t<UseCopyForNativeBinary(T), bool>>
371371
void LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
372372
ze_graph_handle_t graphHandle,
373373
std::vector<uint8_t>& blob,
374-
uint8_t*& blobPtr,
374+
const uint8_t*& blobPtr,
375375
size_t& blobSize) const {
376376
// Get blob size first
377377
auto result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, &blobSize, nullptr);
@@ -408,7 +408,7 @@ template <typename T, std::enable_if_t<!UseCopyForNativeBinary(T), bool>>
408408
void LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
409409
ze_graph_handle_t graphHandle,
410410
std::vector<uint8_t>& /* unusedBlob */,
411-
uint8_t*& blobPtr,
411+
const uint8_t*& blobPtr,
412412
size_t& blobSize) const {
413413
// Get blob ptr and size
414414
auto result = _graphDdiTableExt.pfnGetNativeBinary2(graphHandle, &blobSize, &blobPtr);
@@ -431,7 +431,7 @@ CompiledNetwork LevelZeroCompilerInDriver<TableExtension>::getCompiledNetwork(
431431
_logger.info("LevelZeroCompilerInDriver getCompiledNetwork get blob from graphHandle");
432432
ze_graph_handle_t graphHandle = static_cast<ze_graph_handle_t>(networkDescription.metadata.graphHandle);
433433

434-
uint8_t* blobPtr = nullptr;
434+
const uint8_t* blobPtr = nullptr;
435435
size_t blobSize = -1;
436436
std::vector<uint8_t> blob;
437437

@@ -1243,6 +1243,7 @@ template class LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_4_t>;
12431243
template class LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_5_t>;
12441244
template class LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_6_t>;
12451245
template class LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_7_t>;
1246+
template class LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_8_t>;
12461247

12471248
} // namespace driverCompilerAdapter
12481249
} // namespace intel_npu

0 commit comments

Comments
 (0)