Skip to content

Commit 75e47d7

Browse files
pereanubyangwang201911
authored andcommitted
[NPU] Adding support for the remote tensor feature (#24584)
Bellow can be found some diagrams of the proposed implementation: Objects and dependencies: ![Screenshot 2024-05-28 144347](https://github.com/openvinotoolkit/openvino/assets/10560145/8b52dae9-27c3-4597-a07a-7155e0131cae) Workflow of the the RemoteContext and RemoteTensor: ![Screenshot 2024-05-28 144207](https://github.com/openvinotoolkit/openvino/assets/10560145/8cb749c7-0819-4f2a-ade5-f330e21a90a5) New workflow of the ZeroInferRequest: ![Screenshot 2024-05-28 145045](https://github.com/openvinotoolkit/openvino/assets/10560145/aa295a38-ccc1-4c83-9e82-8dab9108f915) - *EISW-125271* - *EISW-125272* - *EISW-125273*
1 parent d8f0729 commit 75e47d7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+2581
-199
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
// Copyright (C) 2018-2024 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
/**
6+
* @brief a header that defines wrappers for internal NPU plugin-specific
7+
* LevelZero context and LevelZero shared memory tensors
8+
*
9+
* @file openvino/runtime/intel_npu/level_zero/level_zero.hpp
10+
*/
11+
#pragma once
12+
13+
#include "openvino/runtime/core.hpp"
14+
#include "openvino/runtime/intel_npu/properties.hpp"
15+
#include "openvino/runtime/intel_npu/remote_properties.hpp"
16+
#include "openvino/runtime/remote_context.hpp"
17+
#include "openvino/runtime/remote_tensor.hpp"
18+
19+
namespace ov {
20+
namespace intel_npu {
21+
22+
/**
23+
* @defgroup ov_runtime_level_zero_npu_cpp_api Intel NPU LevelZero interoperability
24+
* @ingroup ov_runtime_cpp_api
25+
* Set of C++ classes and properties to work with Remote API for Intel NPU LevelZero plugin.
26+
*/
27+
28+
/**
29+
* @brief Namespace with Intel NPU LevelZero specific remote objects
30+
*/
31+
namespace level_zero {
32+
33+
/**
34+
* @brief This class represents an abstraction for NPU plugin remote tensor
35+
* which can be shared with user-supplied LevelZero buffer.
36+
* The plugin object derived from this class can be obtained with ZeroContext::create_tensor() call.
37+
* @note User can obtain LevelZero buffer handle from this class.
38+
* @ingroup ov_runtime_level_zero_npu_cpp_api
39+
*/
40+
class ZeroBufferTensor : public RemoteTensor {
41+
public:
42+
/**
43+
* @brief Checks that type defined runtime parameters are presented in remote object
44+
* @param tensor a tensor to check
45+
*/
46+
static void type_check(const Tensor& tensor) {
47+
RemoteTensor::type_check(
48+
tensor,
49+
{{std::string(mem_handle.name()), {}},
50+
{std::string(mem_type.name()),
51+
{ov::Any(MemType::L0_INTERNAL_BUF).as<std::string>(), ov::Any(MemType::SHARED_BUF).as<std::string>()}}});
52+
}
53+
54+
/**
55+
* @brief Returns the underlying LevelZero memory object handle.
56+
* @return underlying void* memory object handle
57+
*/
58+
void* get() {
59+
return get_params().at(mem_handle.name()).as<void*>();
60+
}
61+
};
62+
63+
/**
64+
* @brief This class represents an abstraction for NPU plugin remote context
65+
* which is shared with LevelZero context object.
66+
* The plugin object derived from this class can be obtained either with
67+
* CompiledModel::get_context() or Core::create_context() calls.
68+
* @ingroup ov_runtime_level_zero_npu_cpp_api
69+
*/
70+
class ZeroContext : public RemoteContext {
71+
protected:
72+
/**
73+
* @brief NPU device name
74+
*/
75+
static constexpr const char* device_name = "NPU";
76+
77+
/**
78+
* @brief Default constructor which can be used in derived classes to avoid multiple create_context() calls
79+
*/
80+
ZeroContext() = default;
81+
82+
public:
83+
// Needed to make create_tensor overloads from base class visible for user
84+
using RemoteContext::create_tensor;
85+
86+
/**
87+
* @brief Constructs context object from user-supplied LevelZero context handle
88+
* @param core A reference to OpenVINO Runtime Core object
89+
*/
90+
ZeroContext(Core& core) {
91+
*this = core.get_default_context(device_name).as<ZeroContext>();
92+
}
93+
94+
/**
95+
* @brief Returns the underlying LevelZero context handle.
96+
* @return `void*`
97+
*/
98+
void* get() {
99+
return get_params().at(l0_context.name()).as<void*>();
100+
}
101+
102+
/**
103+
* @brief This function is used to obtain remote tensor object from user-supplied Direct3D 12 Core object
104+
* @param type Tensor element type
105+
* @param shape Tensor shape
106+
* @param buffer A void* object that should be wrapped by a remote tensor
107+
* @return A remote tensor instance
108+
*/
109+
ZeroBufferTensor create_tensor(const element::Type type, const Shape& shape, void* buffer) {
110+
AnyMap params = {{mem_type.name(), MemType::SHARED_BUF}, {mem_handle.name(), buffer}};
111+
return create_tensor(type, shape, params).as<ZeroBufferTensor>();
112+
}
113+
114+
/**
115+
* @brief This function is used to obtain remote tensor object from user-supplied DMA-BUF System Heap object
116+
* @param type Tensor element type
117+
* @param shape Tensor shape
118+
* @param fd A int object that should be wrapped by a remote tensor
119+
* @return A remote tensor instance
120+
*/
121+
ZeroBufferTensor create_tensor(const element::Type type, const Shape& shape, int fd) {
122+
AnyMap params = {{mem_type.name(), MemType::SHARED_BUF},
123+
{mem_handle.name(), reinterpret_cast<void*>(static_cast<intptr_t>(fd))}};
124+
return create_tensor(type, shape, params).as<ZeroBufferTensor>();
125+
}
126+
127+
/**
128+
* @brief This function is used to obtain remote tensor object
129+
* @param type Tensor element type
130+
* @param shape Tensor shape
131+
* @param tensor_type Type of the tensor to be shared, input, output or binded
132+
* @return A remote tensor instance
133+
*/
134+
ZeroBufferTensor create_l0_host_tensor(const element::Type type,
135+
const Shape& shape,
136+
const TensorType tensor_type = TensorType::BINDED) {
137+
AnyMap params = {{mem_type.name(), MemType::L0_INTERNAL_BUF}, {ov::intel_npu::tensor_type.name(), tensor_type}};
138+
return create_tensor(type, shape, params).as<ZeroBufferTensor>();
139+
}
140+
};
141+
142+
} // namespace level_zero
143+
} // namespace intel_npu
144+
} // namespace ov
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
// Copyright (C) 2018-2024 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
/**
6+
* @brief A header for properties of shared device contexts and shared device memory tensors for NPU device
7+
* To use in constructors of Remote objects
8+
*
9+
* @file openvino/runtime/intel_npu/remote_properties.hpp
10+
*/
11+
#pragma once
12+
13+
#include "openvino/runtime/properties.hpp"
14+
15+
namespace ov {
16+
namespace intel_npu {
17+
18+
using npu_handle_param = void*;
19+
20+
/**
21+
* @brief Enum to define the type of the shared memory buffer
22+
*/
23+
enum class MemType {
24+
L0_INTERNAL_BUF = 0, //!< Internal L0 buffer type allocated by plugin
25+
SHARED_BUF = 1, //!< Shared buffer
26+
};
27+
28+
/**
29+
* @brief Enum to define the type of the tensor
30+
*/
31+
enum class TensorType {
32+
INPUT = 0, //!< Tensor is only used as input
33+
OUTPUT = 1, //!< Tensor is only used as output
34+
BINDED = 2 //!< Tensor could be used as input and output
35+
};
36+
37+
/** @cond INTERNAL */
38+
inline std::ostream& operator<<(std::ostream& os, const MemType& share_mem_type) {
39+
switch (share_mem_type) {
40+
case MemType::L0_INTERNAL_BUF:
41+
return os << "L0_INTERNAL_BUF";
42+
case MemType::SHARED_BUF:
43+
return os << "SHARED_BUF";
44+
default:
45+
OPENVINO_THROW("Unsupported memory type");
46+
}
47+
}
48+
49+
inline std::istream& operator>>(std::istream& is, MemType& share_mem_type) {
50+
std::string str;
51+
is >> str;
52+
if (str == "L0_INTERNAL_BUF") {
53+
share_mem_type = MemType::L0_INTERNAL_BUF;
54+
} else if (str == "SHARED_BUF") {
55+
share_mem_type = MemType::SHARED_BUF;
56+
} else {
57+
OPENVINO_THROW("Unsupported memory type: ", str);
58+
}
59+
return is;
60+
}
61+
/** @endcond */
62+
63+
/**
64+
* @brief This key identifies type of internal shared memory
65+
* in a shared memory tensor parameter map.
66+
*/
67+
static constexpr Property<MemType> mem_type{"MEM_TYPE"};
68+
69+
/**
70+
* @brief This key identifies memory handle
71+
* in a shared memory tensor parameter map
72+
*/
73+
static constexpr Property<npu_handle_param> mem_handle{"MEM_HANDLE"};
74+
75+
/**
76+
* @brief This key identifies LevelZero context handle
77+
* in a shared context or shared memory tensor parameter map
78+
*/
79+
static constexpr Property<npu_handle_param> l0_context{"L0_CONTEXT"};
80+
81+
/**
82+
* @brief This key identifies type of the tensor
83+
* in a shared memory tensor parameter map.
84+
*/
85+
static constexpr Property<TensorType> tensor_type{"TENSOR_TYPE"};
86+
87+
} // namespace intel_npu
88+
} // namespace ov

src/plugins/intel_npu/src/al/include/npu.hpp

+13
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
#include "intel_npu/al/config/config.hpp"
1010
#include "intel_npu/al/icompiled_model.hpp"
1111
#include "intel_npu/al/icompiler.hpp"
12+
#include "openvino/runtime/intel_npu/remote_properties.hpp"
13+
#include "openvino/runtime/iremote_context.hpp"
1214
#include "openvino/runtime/properties.hpp"
1315
#include "sync_infer_request.hpp"
1416

@@ -37,6 +39,8 @@ class IEngineBackend : public std::enable_shared_from_this<IEngineBackend> {
3739
virtual bool isBatchingSupported() const = 0;
3840
/** @brief Register backend-specific options */
3941
virtual void registerOptions(OptionsDesc& options) const;
42+
/** @brief Get Level Zero context*/
43+
virtual void* getContext() const;
4044

4145
protected:
4246
virtual ~IEngineBackend() = default;
@@ -75,6 +79,15 @@ class IDevice : public std::enable_shared_from_this<IDevice> {
7579
const std::shared_ptr<IExecutor>& executor,
7680
const Config& config) = 0;
7781

82+
virtual ov::SoPtr<ov::IRemoteTensor> createRemoteTensor(
83+
std::shared_ptr<ov::IRemoteContext> context,
84+
const ov::element::Type& element_type,
85+
const ov::Shape& shape,
86+
const Config& config,
87+
ov::intel_npu::TensorType tensor_type = ov::intel_npu::TensorType::BINDED,
88+
ov::intel_npu::MemType mem_type = ov::intel_npu::MemType::L0_INTERNAL_BUF,
89+
void* mem = nullptr);
90+
7891
protected:
7992
virtual ~IDevice() = default;
8093
};
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
// Copyright (C) 2018-2024 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#pragma once
6+
7+
#include <map>
8+
#include <memory>
9+
#include <string>
10+
11+
#include "intel_npu/al/config/config.hpp"
12+
#include "openvino/runtime/iremote_context.hpp"
13+
#include "openvino/runtime/iremote_tensor.hpp"
14+
15+
namespace intel_npu {
16+
17+
/**
18+
* @brief Acts as an interface for the remote tensor structures implemented by all backends.
19+
* @details The operations common for all backends can be found implemented here
20+
*/
21+
class RemoteTensor : public ov::IRemoteTensor {
22+
public:
23+
RemoteTensor(std::shared_ptr<ov::IRemoteContext> context,
24+
const ov::element::Type& element_type,
25+
const ov::Shape& shape);
26+
27+
~RemoteTensor() override;
28+
29+
/**
30+
* @brief Returns additional information associated with tensor
31+
* @return Map of property names to properties
32+
*/
33+
const ov::AnyMap& get_properties() const override;
34+
35+
/**
36+
* @brief Returns device name
37+
* @return Device name
38+
*/
39+
const std::string& get_device_name() const override;
40+
41+
/**
42+
* @brief Set new shape for tensor
43+
* @note Memory allocation may happen
44+
* @param shape A new shape
45+
*/
46+
void set_shape(ov::Shape shape) override;
47+
48+
/**
49+
* @return A tensor element type
50+
*/
51+
const ov::element::Type& get_element_type() const override;
52+
53+
/**
54+
* @return A tensor shape
55+
*/
56+
const ov::Shape& get_shape() const override;
57+
58+
/**
59+
* @return Tensor's strides in bytes
60+
*/
61+
const ov::Strides& get_strides() const override;
62+
63+
/**
64+
* @return The remote context
65+
*/
66+
std::shared_ptr<ov::IRemoteContext> get_context() const;
67+
68+
protected:
69+
virtual void allocate(const size_t bytes) = 0;
70+
virtual bool deallocate() noexcept = 0;
71+
void update_strides();
72+
73+
std::shared_ptr<ov::IRemoteContext> _context;
74+
75+
ov::element::Type _element_type;
76+
ov::Shape _shape;
77+
ov::Shape _capacity;
78+
ov::Strides _strides{};
79+
ov::AnyMap _properties;
80+
};
81+
82+
} // namespace intel_npu

src/plugins/intel_npu/src/al/include/sync_infer_request.hpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,15 @@ class SyncInferRequest : public ov::IInferRequest {
3030
* @param port Port of the tensor to get.
3131
* @return Tensor for the port @p port.
3232
*/
33-
ov::SoPtr<ov::ITensor> get_tensor(const ov::Output<const ov::Node>& port) const override;
33+
virtual ov::SoPtr<ov::ITensor> get_tensor(const ov::Output<const ov::Node>& port) const override;
3434

3535
/**
3636
* @brief Sets an input/output tensor to infer.
3737
* @param port Port of the input or output tensor.
3838
* @param tensor Reference to a tensor. The element_type and shape of a tensor must match
3939
* the model's input/output element_type and size.
4040
*/
41-
void set_tensor(const ov::Output<const ov::Node>& port, const ov::SoPtr<ov::ITensor>& tensor) override;
41+
virtual void set_tensor(const ov::Output<const ov::Node>& port, const ov::SoPtr<ov::ITensor>& tensor) override;
4242

4343
/**
4444
* @brief Currently there is no support implemented for batches of tensors, thus this call is a simple redirection
@@ -160,7 +160,7 @@ class SyncInferRequest : public ov::IInferRequest {
160160
* otherwise.
161161
* @param precision The precision value to be checked.
162162
*/
163-
virtual void check_network_precision(const ov::element::Type_t precision) = 0;
163+
virtual void check_network_precision(const ov::element::Type_t precision) const = 0;
164164

165165
/**
166166
* @brief Indicates a kind of provided tensor. Marks special tensors, used for internal implementation
@@ -179,16 +179,16 @@ class SyncInferRequest : public ov::IInferRequest {
179179
void allocate_tensor(std::string tensorName,
180180
const IONodeDescriptor& descriptor,
181181
TensorType tensorType = TensorType::InputOrOutput,
182-
const ov::Allocator& allocator = {});
182+
const ov::Allocator& allocator = {}) const;
183183

184184
// Mutable to return reference to ov::Tensor
185185
mutable std::unordered_map<std::string, std::shared_ptr<ov::ITensor>> _allTensors;
186186
mutable std::unordered_map<std::string, std::shared_ptr<ov::ITensor>> _shapesTensors;
187187
// A copy of each tensor is needed to maintain the original L0 memory allocation in case the user provides another
188188
// memory area for the tensor.
189-
std::unordered_map<std::string, std::shared_ptr<ov::ITensor>> _copyAllTensors;
189+
mutable std::unordered_map<std::string, std::shared_ptr<ov::ITensor>> _copyAllTensors;
190190

191-
std::unordered_map<std::string, std::shared_ptr<VariableState>> _variableStates;
191+
mutable std::unordered_map<std::string, std::shared_ptr<VariableState>> _variableStates;
192192

193193
// This is intel_npu::ICompiledModel pointer, but need to use OV base class because
194194
// ov::IInferRequest::get_compiled_model returns a refernce to shared_ptr!

0 commit comments

Comments
 (0)