Skip to content

Commit ffc135c

Browse files
authored
[NPU] Adding support for the remote tensor feature - [Part II - leftovers] (#25572)
### Details: - *Allocate a Host L0 buffer when the create host tensor method is used* - *Do not chain the mutable descriptor, call the update mutable command list after each descriptor instead* - *Do not throw an error if using an older ze_loader only if the unsupported functions are going to be called* - *Call updateMutableCommandList after set_tensor method is used* - *Add extra test cases for batching flow using MCL* ### Tickets: - *EISW-131915*
1 parent 56a8c7e commit ffc135c

File tree

18 files changed

+331
-99
lines changed

18 files changed

+331
-99
lines changed

src/plugins/intel_npu/src/al/include/npu.hpp

+5
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,11 @@ class IDevice : public std::enable_shared_from_this<IDevice> {
9292
ov::intel_npu::MemType mem_type = ov::intel_npu::MemType::L0_INTERNAL_BUF,
9393
void* mem = nullptr);
9494

95+
virtual ov::SoPtr<ov::ITensor> createHostTensor(std::shared_ptr<ov::IRemoteContext> context,
96+
const ov::element::Type& element_type,
97+
const ov::Shape& shape,
98+
const Config& config);
99+
95100
protected:
96101
virtual ~IDevice() = default;
97102
};

src/plugins/intel_npu/src/al/src/npu.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -81,4 +81,11 @@ ov::SoPtr<ov::IRemoteTensor> IDevice::createRemoteTensor(std::shared_ptr<ov::IRe
8181
OPENVINO_THROW("Create Remote Tensor is not supported");
8282
}
8383

84+
ov::SoPtr<ov::ITensor> IDevice::createHostTensor(std::shared_ptr<ov::IRemoteContext>,
85+
const ov::element::Type&,
86+
const ov::Shape&,
87+
const Config&) {
88+
OPENVINO_THROW("Create Host Tensor is not supported");
89+
}
90+
8491
} // namespace intel_npu

src/plugins/intel_npu/src/backend/include/zero_device.hpp

+5
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ class ZeroDevice : public IDevice {
4747
ov::intel_npu::MemType mem_type = ov::intel_npu::MemType::L0_INTERNAL_BUF,
4848
void* mem = nullptr) override;
4949

50+
ov::SoPtr<ov::ITensor> createHostTensor(std::shared_ptr<ov::IRemoteContext> context,
51+
const ov::element::Type& element_type,
52+
const ov::Shape& shape,
53+
const Config& config) override;
54+
5055
ZeroDevice& operator=(const ZeroDevice&) = delete;
5156
ZeroDevice(const ZeroDevice&) = delete;
5257

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// Copyright (C) 2018-2024 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#pragma once
6+
7+
#include "intel_npu/al/config/config.hpp"
8+
#include "openvino/runtime/itensor.hpp"
9+
#include "zero_init.hpp"
10+
#include "zero_remote_tensor.hpp"
11+
12+
namespace intel_npu {
13+
14+
class ZeroHostTensor : public ov::ITensor {
15+
public:
16+
ZeroHostTensor(std::shared_ptr<ov::IRemoteContext> context,
17+
std::shared_ptr<ZeroInitStructsHolder> init_structs,
18+
const ov::element::Type element_type,
19+
const ov::Shape& shape,
20+
const Config& config);
21+
22+
~ZeroHostTensor() override = default;
23+
24+
void* data(const ov::element::Type& element_type) const override;
25+
const ov::element::Type& get_element_type() const override;
26+
27+
const ov::Shape& get_shape() const override;
28+
29+
const ov::Strides& get_strides() const override;
30+
31+
void set_shape(ov::Shape new_shape) override;
32+
33+
std::shared_ptr<ZeroRemoteTensor> get_impl() const;
34+
35+
private:
36+
std::shared_ptr<ZeroRemoteTensor> m_impl;
37+
};
38+
39+
} // namespace intel_npu

src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,9 @@ class ZeroInferRequest final : public SyncInferRequest {
5353
* @brief Check the received remote tensor and copy it to the Level Zero tensor
5454
* @param tensor Reference to a tensor.
5555
* @param name Friendly name of the tensor.
56+
* @param isParameter True if tensor is a parameter.
5657
*/
57-
void set_remote_tensor_data(std::shared_ptr<ZeroRemoteTensor> tensor, const std::string& name);
58+
void set_remote_tensor_data(std::shared_ptr<ZeroRemoteTensor> tensor, const std::string& name, bool isParameter);
5859

5960
void check_network_precision(const ov::element::Type_t precision) const override;
6061
void create_pipeline();
@@ -77,8 +78,7 @@ class ZeroInferRequest final : public SyncInferRequest {
7778
// specific operations on the plugin in this case.
7879
size_t _batchSize = DEFAULT_BATCH_SIZE;
7980

80-
bool _createPipeline = true;
81-
bool _updateCommandList = false;
81+
bool _pipelineIsCreated = false;
8282
};
8383

8484
} // namespace intel_npu

src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp

+1-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ struct TensorData {
1616
void* mem;
1717
size_t size;
1818
bool levelZeroTensorCreatedLocally = true;
19-
bool changed = false;
2019
};
2120

2221
struct Pipeline {
@@ -32,7 +31,7 @@ struct Pipeline {
3231
virtual void pull(size_t batch_index) = 0;
3332
virtual void reset(size_t batch_index) const = 0;
3433

35-
virtual void updateCommandList(std::unordered_map<std::string, TensorData>& tensors_data, size_t batch_size) = 0;
34+
virtual void updateCommandList(const TensorData& tensors_data, uint32_t index, size_t batch_size) = 0;
3635

3736
protected:
3837
zeroMemory::MemoryManagementUnit _deviceInputs;

src/plugins/intel_npu/src/backend/include/zero_wrappers.hpp

+1-4
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ class CommandList {
8787
void appendGraphInitialize(const ze_graph_handle_t& graph_handle) const;
8888
void appendGraphExecute(const ze_graph_handle_t& graph_handle,
8989
const ze_graph_profiling_query_handle_t& profiling_query_handle) const;
90-
void updateMutableCommandList(const void* pNext = nullptr) const;
90+
void updateMutableCommandList(uint32_t arg_index, const void* arg_value) const;
9191
void appendNpuTimestamp(uint64_t* timestamp_buff) const;
9292
void appendBarrier() const;
9393
void close() const;
@@ -96,9 +96,6 @@ class CommandList {
9696
inline ze_command_list_handle_t handle() const {
9797
return _handle;
9898
}
99-
uint64_t getCommandListId() const {
100-
return _command_id;
101-
}
10299

103100
private:
104101
ze_command_list_handle_t _handle = nullptr;

src/plugins/intel_npu/src/backend/src/zero_device.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "intel_npu/al/itt.hpp"
1010
#include "intel_npu/utils/zero/zero_api.hpp"
1111
#include "zero_executor.hpp"
12+
#include "zero_host_tensor.hpp"
1213
#include "zero_infer_request.hpp"
1314
#include "zero_remote_tensor.hpp"
1415
#include "zero_utils.hpp"
@@ -193,3 +194,10 @@ ov::SoPtr<ov::IRemoteTensor> ZeroDevice::createRemoteTensor(std::shared_ptr<ov::
193194
return {std::make_shared<
194195
ZeroRemoteTensor>(context, _initStructs, element_type, shape, config, tensor_type, mem_type, mem)};
195196
};
197+
198+
ov::SoPtr<ov::ITensor> ZeroDevice::createHostTensor(std::shared_ptr<ov::IRemoteContext> context,
199+
const ov::element::Type& element_type,
200+
const ov::Shape& shape,
201+
const Config& config) {
202+
return {std::make_shared<ZeroHostTensor>(context, _initStructs, element_type, shape, config)};
203+
};
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// Copyright (C) 2018-2024 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#include "zero_host_tensor.hpp"
6+
7+
#include "openvino/runtime/intel_npu/remote_properties.hpp"
8+
9+
namespace intel_npu {
10+
11+
ZeroHostTensor::ZeroHostTensor(std::shared_ptr<ov::IRemoteContext> context,
12+
std::shared_ptr<ZeroInitStructsHolder> init_structs,
13+
const ov::element::Type element_type,
14+
const ov::Shape& shape,
15+
const Config& config)
16+
: m_impl(std::make_shared<ZeroRemoteTensor>(context,
17+
init_structs,
18+
element_type,
19+
shape,
20+
config,
21+
ov::intel_npu::TensorType::BINDED,
22+
ov::intel_npu::MemType::L0_INTERNAL_BUF)) {}
23+
24+
void* ZeroHostTensor::data(const ov::element::Type&) const {
25+
return m_impl->get_properties().find(ov::intel_npu::mem_handle.name())->second.as<void*>();
26+
}
27+
28+
const ov::element::Type& ZeroHostTensor::get_element_type() const {
29+
return m_impl->get_element_type();
30+
}
31+
32+
const ov::Shape& ZeroHostTensor::get_shape() const {
33+
return m_impl->get_shape();
34+
}
35+
36+
const ov::Strides& ZeroHostTensor::get_strides() const {
37+
return m_impl->get_strides();
38+
}
39+
40+
void ZeroHostTensor::set_shape(ov::Shape new_shape) {
41+
m_impl->set_shape(new_shape);
42+
}
43+
44+
std::shared_ptr<ZeroRemoteTensor> ZeroHostTensor::get_impl() const {
45+
return m_impl;
46+
}
47+
48+
} // namespace intel_npu

src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp

+36-21
Original file line numberDiff line numberDiff line change
@@ -402,13 +402,26 @@ void ZeroInferRequest::set_tensor_data(std::shared_ptr<ov::ITensor> tensor, cons
402402
if (setTensorData) {
403403
_tensorsData[name] = TensorData{_copyAllTensors.at(name)->data(),
404404
_copyAllTensors.at(name)->get_byte_size(),
405-
levelZeroTensorCreatedLocally,
406-
!_createPipeline};
407-
_updateCommandList = true;
405+
levelZeroTensorCreatedLocally};
406+
407+
if (_pipelineIsCreated) {
408+
_logger.debug("ZeroInferRequest::infer_async - update command list");
409+
410+
intel_npu::ZeroExecutor::ArgumentDescriptor desc;
411+
if (isParameter) {
412+
desc = _executor->inputs_desc_map().at(name);
413+
} else {
414+
desc = _executor->outputs_desc_map().at(name);
415+
}
416+
417+
_pipeline->updateCommandList(_tensorsData[name], desc.idx, _batchSize);
418+
}
408419
}
409420
}
410421

411-
void ZeroInferRequest::set_remote_tensor_data(std::shared_ptr<ZeroRemoteTensor> tensor, const std::string& name) {
422+
void ZeroInferRequest::set_remote_tensor_data(std::shared_ptr<ZeroRemoteTensor> tensor,
423+
const std::string& name,
424+
bool isParameter) {
412425
auto l0_context = reinterpret_cast<ze_context_handle_t>(
413426
extract_object(tensor->get_context()->get_property(), ov::intel_npu::l0_context));
414427
if (_initStructs->getContext() != l0_context) {
@@ -421,8 +434,20 @@ void ZeroInferRequest::set_remote_tensor_data(std::shared_ptr<ZeroRemoteTensor>
421434
}
422435

423436
_copyAllTensors[name] = tensor;
424-
_tensorsData[name] = TensorData{data, tensor->get_byte_size(), false, !_createPipeline};
425-
_updateCommandList = true;
437+
_tensorsData[name] = TensorData{data, tensor->get_byte_size(), false};
438+
439+
if (_pipelineIsCreated) {
440+
_logger.debug("ZeroInferRequest::infer_async - update command list");
441+
442+
intel_npu::ZeroExecutor::ArgumentDescriptor desc;
443+
if (isParameter) {
444+
desc = _executor->inputs_desc_map().at(name);
445+
} else {
446+
desc = _executor->outputs_desc_map().at(name);
447+
}
448+
449+
_pipeline->updateCommandList(_tensorsData[name], desc.idx, _batchSize);
450+
}
426451
}
427452

428453
void ZeroInferRequest::set_tensor(const ov::Output<const ov::Node>& port, const ov::SoPtr<ov::ITensor>& tensor) {
@@ -444,7 +469,9 @@ void ZeroInferRequest::set_tensor(const ov::Output<const ov::Node>& port, const
444469
ov::op::util::is_parameter(port.get_node()));
445470
} else {
446471
_logger.debug("ZeroInferRequest::set_tensor - set new remote tensor");
447-
set_remote_tensor_data(remoteTensor, port.get_node()->get_friendly_name());
472+
set_remote_tensor_data(remoteTensor,
473+
port.get_node()->get_friendly_name(),
474+
ov::op::util::is_parameter(port.get_node()));
448475
}
449476
}
450477
}
@@ -489,23 +516,11 @@ void ZeroInferRequest::infer_async() {
489516
OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "infer_async");
490517

491518
_executor->mutexLock();
492-
493-
if (_createPipeline) {
519+
if (!_pipelineIsCreated) {
494520
create_pipeline();
495521

496-
_createPipeline = false;
497-
_updateCommandList = false;
522+
_pipelineIsCreated = true;
498523
}
499-
500-
if (_initStructs->getMutableCommandListVersion()) {
501-
if (_updateCommandList) {
502-
_logger.debug("ZeroInferRequest::infer_async - update command list");
503-
_pipeline->updateCommandList(_tensorsData, _batchSize);
504-
505-
_updateCommandList = false;
506-
}
507-
}
508-
509524
_executor->mutexUnlock();
510525

511526
for (const std::string& name : _inputAndStateInputNames) {

src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp

+5-54
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ struct DiscretePipeline final : public Pipeline {
143143
}
144144
};
145145

146-
void updateCommandList(std::unordered_map<std::string, TensorData>&, size_t) override{};
146+
void updateCommandList(const TensorData&, uint32_t, size_t) override {}
147147

148148
private:
149149
const Config _config;
@@ -274,60 +274,11 @@ struct IntegratedPipeline final : public Pipeline {
274274
_logger.debug("IntegratedPipeline - rest() completed");
275275
};
276276

277-
void updateCommandList(std::unordered_map<std::string, TensorData>& tensors_data, size_t batch_size) override {
278-
std::vector<ze_mutable_graph_argument_exp_desc_t> mutable_argument_desc;
279-
int32_t changed_tensors = 0;
280-
281-
for (const auto& desc : tensors_data) {
282-
if (desc.second.changed == true) {
283-
changed_tensors++;
284-
}
285-
}
286-
287-
mutable_argument_desc.reserve(changed_tensors);
288-
289-
auto set_mutable_desc =
290-
[&](int32_t mutable_desc_index, uint64_t command_list_id, uint32_t arg_index, const void* arg_value) {
291-
mutable_argument_desc.emplace_back(ze_mutable_graph_argument_exp_desc_t{
292-
ZE_STRUCTURE_TYPE_MUTABLE_GRAPH_ARGUMENT_EXP_DESC,
293-
mutable_desc_index ? &mutable_argument_desc.at(mutable_desc_index - 1) : nullptr,
294-
command_list_id,
295-
arg_index,
296-
arg_value});
297-
};
298-
277+
void updateCommandList(const TensorData& tensors_data, uint32_t index, size_t batch_size) override {
299278
for (size_t i = 0; i < batch_size; i++) {
300-
int32_t mutable_argument_desc_index = -1;
301-
302-
for (const auto& desc : _executor->inputs_desc_map()) {
303-
TensorData& inputTensorData = tensors_data.at(desc.first);
304-
305-
if (inputTensorData.changed == true) {
306-
set_mutable_desc(
307-
++mutable_argument_desc_index,
308-
_command_lists.at(i)->getCommandListId(),
309-
desc.second.idx,
310-
static_cast<unsigned char*>(inputTensorData.mem) + (i * inputTensorData.size) / batch_size);
311-
312-
inputTensorData.changed = false;
313-
}
314-
}
315-
316-
for (const auto& desc : _executor->outputs_desc_map()) {
317-
TensorData& outputTensorData = tensors_data.at(desc.first);
318-
319-
if (outputTensorData.changed == true) {
320-
set_mutable_desc(
321-
++mutable_argument_desc_index,
322-
_command_lists.at(i)->getCommandListId(),
323-
desc.second.idx,
324-
static_cast<unsigned char*>(outputTensorData.mem) + (i * outputTensorData.size) / batch_size);
325-
326-
outputTensorData.changed = false;
327-
}
328-
}
329-
330-
_command_lists.at(i)->updateMutableCommandList(&mutable_argument_desc.at(mutable_argument_desc_index));
279+
_command_lists.at(i)->updateMutableCommandList(
280+
index,
281+
static_cast<unsigned char*>(tensors_data.mem) + (i * tensors_data.size) / batch_size);
331282
_command_lists.at(i)->close();
332283
}
333284
};

src/plugins/intel_npu/src/backend/src/zero_wrappers.cpp

+10-5
Original file line numberDiff line numberDiff line change
@@ -114,11 +114,16 @@ CommandList::~CommandList() {
114114
_log.error("zeCommandListDestroy failed %#X", uint64_t(result));
115115
}
116116
}
117-
void CommandList::updateMutableCommandList(const void* pNext) const {
118-
ze_mutable_commands_exp_desc_t mutable_commands_exp_desc_t = {
119-
static_cast<ze_structure_type_t>(ZE_MUTABLE_COMMAND_EXP_FLAG_GRAPH_ARGUMENT),
120-
pNext,
121-
0};
117+
void CommandList::updateMutableCommandList(uint32_t arg_index, const void* arg_value) const {
118+
ze_mutable_graph_argument_exp_desc_t desc = {ZE_STRUCTURE_TYPE_MUTABLE_GRAPH_ARGUMENT_EXP_DESC,
119+
nullptr,
120+
_command_id,
121+
arg_index,
122+
arg_value};
123+
124+
ze_mutable_commands_exp_desc_t mutable_commands_exp_desc_t = {ZE_STRUCTURE_TYPE_MUTABLE_COMMANDS_EXP_DESC,
125+
&desc,
126+
0};
122127

123128
zeroUtils::throwOnFail("zeCommandListUpdateMutableCommandsExp",
124129
zeCommandListUpdateMutableCommandsExp(_handle, &mutable_commands_exp_desc_t));

0 commit comments

Comments
 (0)