openvinotoolkit
diff --git a/‎src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp
+4-5 b/‎src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp
+4-5
diff --git a/‎src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp
+9-11 b/‎src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp
+9-11
diff --git a/‎src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp
+5-1 b/‎src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp
+5-1
diff --git a/‎src/plugins/intel_npu/src/backend/include/zero_tensor.hpp
+66 b/‎src/plugins/intel_npu/src/backend/include/zero_tensor.hpp
+66
@@ -15,6 +15,7 @@
 #include "zero_pipeline.hpp"
 #include "zero_profiling.hpp"
 #include "zero_remote_tensor.hpp"
+#include "zero_tensor.hpp"
 
 namespace intel_npu {
 
@@ -62,8 +63,9 @@ class ZeroInferRequest final : public SyncInferRequest {
     std::shared_ptr<ov::ITensor>& get_level_zero_input(size_t index, size_t tensorNo = 0) const;
     std::vector<std::shared_ptr<ov::ITensor>>& get_level_zero_inputs(size_t index) const;
 
-    std::optional<TensorData>& get_input_tensor_data(size_t index, size_t tensorNo = 0) const;
-    std::vector<std::optional<TensorData>>& get_input_tensors_data(size_t index) const;
+    std::shared_ptr<ov::ITensor> create_tensor(ov::element::Type type,
+                                               const ov::Shape& shape,
+                                               const ov::Allocator& allocator = {}) const override;
 
     const std::shared_ptr<ZeroInitStructsHolder> _initStructs;
     const std::shared_ptr<IGraph> _graph;
@@ -75,9 +77,6 @@ class ZeroInferRequest final : public SyncInferRequest {
     mutable std::vector<std::vector<std::shared_ptr<ov::ITensor>>> _levelZeroInputTensors;
     mutable std::vector<std::shared_ptr<ov::ITensor>> _levelZeroOutputTensors;
 
-    mutable std::vector<std::vector<std::optional<TensorData>>> _inputTensorsData;
-    mutable std::vector<std::optional<TensorData>> _outputTensorsData;
-
     ze_device_properties_t _properties = {};
     std::shared_ptr<const zeroMemory::HostMemAllocator> _inputAllocator;
     std::shared_ptr<const zeroMemory::HostMemAllocator> _outputAllocator;
 
@@ -9,25 +9,20 @@
 #include "intel_npu/utils/zero/zero_wrappers.hpp"
 #include "zero_memory.hpp"
 #include "zero_profiling.hpp"
+#include "zero_tensor.hpp"
 
 namespace intel_npu {
 
-struct TensorData {
-    void* mem;
-    size_t size;
-    bool levelZeroTensorCreatedLocally = true;
-};
-
 struct Pipeline {
 public:
     Pipeline(const Config& config,
-             const std::shared_ptr<ZeroInitStructsHolder>& initStructs,
+             const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
              const std::shared_ptr<IGraph>& graph,
              zeroProfiling::ProfilingPool& profiling_pool,
              zeroProfiling::ProfilingQuery& profiling_query,
              const std::shared_ptr<zeroProfiling::NpuInferProfiling>& npu_profiling,
-             const std::vector<std::vector<std::optional<TensorData>>>& inputTensorsData,
-             const std::vector<std::optional<TensorData>>& outputTensorsData,
+             const std::vector<std::vector<std::shared_ptr<ov::ITensor>>>& input_tensors,
+             const std::vector<std::shared_ptr<ov::ITensor>>& output_tensors,
              uint32_t group_ordinal);
 
     Pipeline(const Pipeline&) = delete;
@@ -38,8 +33,11 @@ struct Pipeline {
     void pull();
     void reset() const;
 
-    void updateCommandList(const TensorData& tensorsData, uint32_t index);
-    void updateCommandList(const TensorData& tensorsData, uint32_t index, size_t commandListIndex);
+    void updateCommandList(uint32_t arg_index, const void* arg_data, size_t byte_size);
+    void updateCommandListIndex(uint32_t arg_index, const void* arg_data, size_t command_list_index);
+
+    void closeCommandList();
+    void closeCommandListIndex(size_t command_list_index);
 
 protected:
     std::shared_ptr<IGraph> _graph;
 
@@ -14,7 +14,7 @@
 
 namespace intel_npu {
 
-class ZeroRemoteTensor : public RemoteTensor {
+class ZeroRemoteTensor final : public RemoteTensor {
 public:
     ZeroRemoteTensor(const std::shared_ptr<ov::IRemoteContext>& context,
                      const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
@@ -48,4 +48,8 @@ class ZeroRemoteTensor : public RemoteTensor {
     bool _external_memory_support = false;
 };
 
+inline bool is_remote_tensor(const std::shared_ptr<ov::ITensor>& tensor) {
+    return std::dynamic_pointer_cast<ZeroRemoteTensor>(tensor) != nullptr;
+}
+
 }  // namespace intel_npu
@@ -0,0 +1,66 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <mutex>
+
+#include "intel_npu/config/config.hpp"
+#include "intel_npu/utils/zero/zero_init.hpp"
+#include "openvino/runtime/common.hpp"
+#include "openvino/runtime/itensor.hpp"
+#include "openvino/runtime/so_ptr.hpp"
+
+namespace intel_npu {
+
+/**
+ * @brief Constructs Tensor using element type and shape. Allocate internal host storage using custom allocator.
+ * @details The implementation is simillar to the AllocatedTensor class from OV namespace.
+ * @note Set_shape method throws an error in case re-allocation is needed but this is not supported by the driver.
+ * There are two extra methods to notify the consumer if memory changed or not and to reset the flag.
+ */
+class ZeroTensor final : public ov::ITensor {
+public:
+    ZeroTensor(const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
+               const ov::element::Type element_type,
+               const ov::Shape& shape,
+               const ov::Allocator& allocator);
+
+    void* data(const ov::element::Type& type = {}) const override;
+
+    const ov::element::Type& get_element_type() const override;
+
+    const ov::Shape& get_shape() const override;
+
+    void set_shape(ov::Shape new_shape) override;
+
+    const ov::Strides& get_strides() const override;
+
+    bool memory_address_changed();
+    void reset_memory_flag();
+
+    ~ZeroTensor();
+
+private:
+    static void initialize_elements(void* data, const ov::element::Type& element_type, const ov::Shape& shape);
+    void update_strides() const;
+    size_t get_capacity() const;
+    size_t get_bytes_capacity() const;
+    void destroy_elements(size_t begin_ind, size_t end_ind);
+    void destroy_memory();
+
+    std::shared_ptr<ZeroInitStructsHolder> _init_structs;
+
+    ov::element::Type _element_type;
+    ov::Shape _shape;
+    ov::Shape _capacity;
+    mutable ov::Strides _strides;
+    mutable std::once_flag _strides_once;
+    ov::Allocator _allocator;
+    void* _ptr = nullptr;
+    bool _reset_tensor_memory = false;
+};
+
+}  // namespace intel_npu