Skip to content

Commit 2817977

Browse files
authored
[NPU] Use zero tensor to get correct data (#27980)
### Details: - *Can not deallocate and re-allocate a newer memory for tensor if update mutable command list is not supported. Newer memory should be updated in the graph and this can be done only using the updating command list feature* - *In order to check if the memory was or wasn't re-allocated we are checking the unique ID provided by the driver when memory is created* ### Tickets: - *E#134453* --------- Signed-off-by: Bogdan Pereanu <bogdan.pereanu@intel.com>
1 parent 2b0f127 commit 2817977

File tree

16 files changed

+556
-163
lines changed

16 files changed

+556
-163
lines changed

src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp

+4-5
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "zero_pipeline.hpp"
1616
#include "zero_profiling.hpp"
1717
#include "zero_remote_tensor.hpp"
18+
#include "zero_tensor.hpp"
1819

1920
namespace intel_npu {
2021

@@ -62,8 +63,9 @@ class ZeroInferRequest final : public SyncInferRequest {
6263
std::shared_ptr<ov::ITensor>& get_level_zero_input(size_t index, size_t tensorNo = 0) const;
6364
std::vector<std::shared_ptr<ov::ITensor>>& get_level_zero_inputs(size_t index) const;
6465

65-
std::optional<TensorData>& get_input_tensor_data(size_t index, size_t tensorNo = 0) const;
66-
std::vector<std::optional<TensorData>>& get_input_tensors_data(size_t index) const;
66+
std::shared_ptr<ov::ITensor> create_tensor(ov::element::Type type,
67+
const ov::Shape& shape,
68+
const ov::Allocator& allocator = {}) const override;
6769

6870
const std::shared_ptr<ZeroInitStructsHolder> _initStructs;
6971
const std::shared_ptr<IGraph> _graph;
@@ -75,9 +77,6 @@ class ZeroInferRequest final : public SyncInferRequest {
7577
mutable std::vector<std::vector<std::shared_ptr<ov::ITensor>>> _levelZeroInputTensors;
7678
mutable std::vector<std::shared_ptr<ov::ITensor>> _levelZeroOutputTensors;
7779

78-
mutable std::vector<std::vector<std::optional<TensorData>>> _inputTensorsData;
79-
mutable std::vector<std::optional<TensorData>> _outputTensorsData;
80-
8180
ze_device_properties_t _properties = {};
8281
std::shared_ptr<const zeroMemory::HostMemAllocator> _inputAllocator;
8382
std::shared_ptr<const zeroMemory::HostMemAllocator> _outputAllocator;

src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp

+9-11
Original file line numberDiff line numberDiff line change
@@ -9,25 +9,20 @@
99
#include "intel_npu/utils/zero/zero_wrappers.hpp"
1010
#include "zero_memory.hpp"
1111
#include "zero_profiling.hpp"
12+
#include "zero_tensor.hpp"
1213

1314
namespace intel_npu {
1415

15-
struct TensorData {
16-
void* mem;
17-
size_t size;
18-
bool levelZeroTensorCreatedLocally = true;
19-
};
20-
2116
struct Pipeline {
2217
public:
2318
Pipeline(const Config& config,
24-
const std::shared_ptr<ZeroInitStructsHolder>& initStructs,
19+
const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
2520
const std::shared_ptr<IGraph>& graph,
2621
zeroProfiling::ProfilingPool& profiling_pool,
2722
zeroProfiling::ProfilingQuery& profiling_query,
2823
const std::shared_ptr<zeroProfiling::NpuInferProfiling>& npu_profiling,
29-
const std::vector<std::vector<std::optional<TensorData>>>& inputTensorsData,
30-
const std::vector<std::optional<TensorData>>& outputTensorsData,
24+
const std::vector<std::vector<std::shared_ptr<ov::ITensor>>>& input_tensors,
25+
const std::vector<std::shared_ptr<ov::ITensor>>& output_tensors,
3126
uint32_t group_ordinal);
3227

3328
Pipeline(const Pipeline&) = delete;
@@ -38,8 +33,11 @@ struct Pipeline {
3833
void pull();
3934
void reset() const;
4035

41-
void updateCommandList(const TensorData& tensorsData, uint32_t index);
42-
void updateCommandList(const TensorData& tensorsData, uint32_t index, size_t commandListIndex);
36+
void updateCommandList(uint32_t arg_index, const void* arg_data, size_t byte_size);
37+
void updateCommandListIndex(uint32_t arg_index, const void* arg_data, size_t command_list_index);
38+
39+
void closeCommandList();
40+
void closeCommandListIndex(size_t command_list_index);
4341

4442
protected:
4543
std::shared_ptr<IGraph> _graph;

src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
namespace intel_npu {
1616

17-
class ZeroRemoteTensor : public RemoteTensor {
17+
class ZeroRemoteTensor final : public RemoteTensor {
1818
public:
1919
ZeroRemoteTensor(const std::shared_ptr<ov::IRemoteContext>& context,
2020
const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
@@ -48,4 +48,8 @@ class ZeroRemoteTensor : public RemoteTensor {
4848
bool _external_memory_support = false;
4949
};
5050

51+
inline bool is_remote_tensor(const std::shared_ptr<ov::ITensor>& tensor) {
52+
return std::dynamic_pointer_cast<ZeroRemoteTensor>(tensor) != nullptr;
53+
}
54+
5155
} // namespace intel_npu
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
// Copyright (C) 2018-2024 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#pragma once
6+
7+
#include <memory>
8+
#include <mutex>
9+
10+
#include "intel_npu/config/config.hpp"
11+
#include "intel_npu/utils/zero/zero_init.hpp"
12+
#include "openvino/runtime/common.hpp"
13+
#include "openvino/runtime/itensor.hpp"
14+
#include "openvino/runtime/so_ptr.hpp"
15+
16+
namespace intel_npu {
17+
18+
/**
19+
* @brief Constructs Tensor using element type and shape. Allocate internal host storage using custom allocator.
20+
* @details The implementation is simillar to the AllocatedTensor class from OV namespace.
21+
* @note Set_shape method throws an error in case re-allocation is needed but this is not supported by the driver.
22+
* There are two extra methods to notify the consumer if memory changed or not and to reset the flag.
23+
*/
24+
class ZeroTensor final : public ov::ITensor {
25+
public:
26+
ZeroTensor(const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
27+
const ov::element::Type element_type,
28+
const ov::Shape& shape,
29+
const ov::Allocator& allocator);
30+
31+
void* data(const ov::element::Type& type = {}) const override;
32+
33+
const ov::element::Type& get_element_type() const override;
34+
35+
const ov::Shape& get_shape() const override;
36+
37+
void set_shape(ov::Shape new_shape) override;
38+
39+
const ov::Strides& get_strides() const override;
40+
41+
bool memory_address_changed();
42+
void reset_memory_flag();
43+
44+
~ZeroTensor();
45+
46+
private:
47+
static void initialize_elements(void* data, const ov::element::Type& element_type, const ov::Shape& shape);
48+
void update_strides() const;
49+
size_t get_capacity() const;
50+
size_t get_bytes_capacity() const;
51+
void destroy_elements(size_t begin_ind, size_t end_ind);
52+
void destroy_memory();
53+
54+
std::shared_ptr<ZeroInitStructsHolder> _init_structs;
55+
56+
ov::element::Type _element_type;
57+
ov::Shape _shape;
58+
ov::Shape _capacity;
59+
mutable ov::Strides _strides;
60+
mutable std::once_flag _strides_once;
61+
ov::Allocator _allocator;
62+
void* _ptr = nullptr;
63+
bool _reset_tensor_memory = false;
64+
};
65+
66+
} // namespace intel_npu

0 commit comments

Comments
 (0)