openvinotoolkit
diff --git a/‎src/inference/include/openvino/runtime/intel_npu/level_zero/level_zero.hpp
+144 b/‎src/inference/include/openvino/runtime/intel_npu/level_zero/level_zero.hpp
+144
diff --git a/‎src/inference/include/openvino/runtime/intel_npu/remote_properties.hpp
+88 b/‎src/inference/include/openvino/runtime/intel_npu/remote_properties.hpp
+88
diff --git a/‎src/plugins/intel_npu/src/al/include/npu.hpp
+13 b/‎src/plugins/intel_npu/src/al/include/npu.hpp
+13
diff --git a/‎src/plugins/intel_npu/src/al/include/remote_tensor.hpp
+82 b/‎src/plugins/intel_npu/src/al/include/remote_tensor.hpp
+82
diff --git a/‎src/plugins/intel_npu/src/al/include/sync_infer_request.hpp
+6-6 b/‎src/plugins/intel_npu/src/al/include/sync_infer_request.hpp
+6-6
@@ -0,0 +1,144 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief a header that defines wrappers for internal NPU plugin-specific
+ * LevelZero context and LevelZero shared memory tensors
+ *
+ * @file openvino/runtime/intel_npu/level_zero/level_zero.hpp
+ */
+#pragma once
+
+#include "openvino/runtime/core.hpp"
+#include "openvino/runtime/intel_npu/properties.hpp"
+#include "openvino/runtime/intel_npu/remote_properties.hpp"
+#include "openvino/runtime/remote_context.hpp"
+#include "openvino/runtime/remote_tensor.hpp"
+
+namespace ov {
+namespace intel_npu {
+
+/**
+ * @defgroup ov_runtime_level_zero_npu_cpp_api Intel NPU LevelZero interoperability
+ * @ingroup ov_runtime_cpp_api
+ * Set of C++ classes and properties to work with Remote API for Intel NPU LevelZero plugin.
+ */
+
+/**
+ * @brief Namespace with Intel NPU LevelZero specific remote objects
+ */
+namespace level_zero {
+
+/**
+ * @brief This class represents an abstraction for NPU plugin remote tensor
+ * which can be shared with user-supplied LevelZero buffer.
+ * The plugin object derived from this class can be obtained with ZeroContext::create_tensor() call.
+ * @note User can obtain LevelZero buffer handle from this class.
+ * @ingroup ov_runtime_level_zero_npu_cpp_api
+ */
+class ZeroBufferTensor : public RemoteTensor {
+public:
+    /**
+     * @brief Checks that type defined runtime parameters are presented in remote object
+     * @param tensor a tensor to check
+     */
+    static void type_check(const Tensor& tensor) {
+        RemoteTensor::type_check(
+            tensor,
+            {{std::string(mem_handle.name()), {}},
+             {std::string(mem_type.name()),
+              {ov::Any(MemType::L0_INTERNAL_BUF).as<std::string>(), ov::Any(MemType::SHARED_BUF).as<std::string>()}}});
+    }
+
+    /**
+     * @brief Returns the underlying LevelZero memory object handle.
+     * @return underlying void* memory object handle
+     */
+    void* get() {
+        return get_params().at(mem_handle.name()).as<void*>();
+    }
+};
+
+/**
+ * @brief This class represents an abstraction for NPU plugin remote context
+ * which is shared with LevelZero context object.
+ * The plugin object derived from this class can be obtained either with
+ * CompiledModel::get_context() or Core::create_context() calls.
+ * @ingroup ov_runtime_level_zero_npu_cpp_api
+ */
+class ZeroContext : public RemoteContext {
+protected:
+    /**
+     * @brief NPU device name
+     */
+    static constexpr const char* device_name = "NPU";
+
+    /**
+     * @brief Default constructor which can be used in derived classes to avoid multiple create_context() calls
+     */
+    ZeroContext() = default;
+
+public:
+    // Needed to make create_tensor overloads from base class visible for user
+    using RemoteContext::create_tensor;
+
+    /**
+     * @brief Constructs context object from user-supplied LevelZero context handle
+     * @param core A reference to OpenVINO Runtime Core object
+     */
+    ZeroContext(Core& core) {
+        *this = core.get_default_context(device_name).as<ZeroContext>();
+    }
+
+    /**
+     * @brief Returns the underlying LevelZero context handle.
+     * @return `void*`
+     */
+    void* get() {
+        return get_params().at(l0_context.name()).as<void*>();
+    }
+
+    /**
+     * @brief This function is used to obtain remote tensor object from user-supplied Direct3D 12 Core object
+     * @param type Tensor element type
+     * @param shape Tensor shape
+     * @param buffer A void* object that should be wrapped by a remote tensor
+     * @return A remote tensor instance
+     */
+    ZeroBufferTensor create_tensor(const element::Type type, const Shape& shape, void* buffer) {
+        AnyMap params = {{mem_type.name(), MemType::SHARED_BUF}, {mem_handle.name(), buffer}};
+        return create_tensor(type, shape, params).as<ZeroBufferTensor>();
+    }
+
+    /**
+     * @brief This function is used to obtain remote tensor object from user-supplied DMA-BUF System Heap object
+     * @param type Tensor element type
+     * @param shape Tensor shape
+     * @param fd A int object that should be wrapped by a remote tensor
+     * @return A remote tensor instance
+     */
+    ZeroBufferTensor create_tensor(const element::Type type, const Shape& shape, int fd) {
+        AnyMap params = {{mem_type.name(), MemType::SHARED_BUF},
+                         {mem_handle.name(), reinterpret_cast<void*>(static_cast<intptr_t>(fd))}};
+        return create_tensor(type, shape, params).as<ZeroBufferTensor>();
+    }
+
+    /**
+     * @brief This function is used to obtain remote tensor object
+     * @param type Tensor element type
+     * @param shape Tensor shape
+     * @param tensor_type Type of the tensor to be shared, input, output or binded
+     * @return A remote tensor instance
+     */
+    ZeroBufferTensor create_l0_host_tensor(const element::Type type,
+                                           const Shape& shape,
+                                           const TensorType tensor_type = TensorType::BINDED) {
+        AnyMap params = {{mem_type.name(), MemType::L0_INTERNAL_BUF}, {ov::intel_npu::tensor_type.name(), tensor_type}};
+        return create_tensor(type, shape, params).as<ZeroBufferTensor>();
+    }
+};
+
+}  // namespace level_zero
+}  // namespace intel_npu
+}  // namespace ov
@@ -0,0 +1,88 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief A header for properties of shared device contexts and shared device memory tensors for NPU device
+ *        To use in constructors of Remote objects
+ *
+ * @file openvino/runtime/intel_npu/remote_properties.hpp
+ */
+#pragma once
+
+#include "openvino/runtime/properties.hpp"
+
+namespace ov {
+namespace intel_npu {
+
+using npu_handle_param = void*;
+
+/**
+ * @brief Enum to define the type of the shared memory buffer
+ */
+enum class MemType {
+    L0_INTERNAL_BUF = 0,  //!< Internal L0 buffer type allocated by plugin
+    SHARED_BUF = 1,       //!< Shared buffer
+};
+
+/**
+ * @brief Enum to define the type of the tensor
+ */
+enum class TensorType {
+    INPUT = 0,   //!< Tensor is only used as input
+    OUTPUT = 1,  //!< Tensor is only used as output
+    BINDED = 2   //!< Tensor could be used as input and output
+};
+
+/** @cond INTERNAL */
+inline std::ostream& operator<<(std::ostream& os, const MemType& share_mem_type) {
+    switch (share_mem_type) {
+    case MemType::L0_INTERNAL_BUF:
+        return os << "L0_INTERNAL_BUF";
+    case MemType::SHARED_BUF:
+        return os << "SHARED_BUF";
+    default:
+        OPENVINO_THROW("Unsupported memory type");
+    }
+}
+
+inline std::istream& operator>>(std::istream& is, MemType& share_mem_type) {
+    std::string str;
+    is >> str;
+    if (str == "L0_INTERNAL_BUF") {
+        share_mem_type = MemType::L0_INTERNAL_BUF;
+    } else if (str == "SHARED_BUF") {
+        share_mem_type = MemType::SHARED_BUF;
+    } else {
+        OPENVINO_THROW("Unsupported memory type: ", str);
+    }
+    return is;
+}
+/** @endcond */
+
+/**
+ * @brief This key identifies type of internal shared memory
+ * in a shared memory tensor parameter map.
+ */
+static constexpr Property<MemType> mem_type{"MEM_TYPE"};
+
+/**
+ * @brief This key identifies memory handle
+ * in a shared memory tensor parameter map
+ */
+static constexpr Property<npu_handle_param> mem_handle{"MEM_HANDLE"};
+
+/**
+ * @brief This key identifies LevelZero context handle
+ * in a shared context or shared memory tensor parameter map
+ */
+static constexpr Property<npu_handle_param> l0_context{"L0_CONTEXT"};
+
+/**
+ * @brief This key identifies type of the tensor
+ * in a shared memory tensor parameter map.
+ */
+static constexpr Property<TensorType> tensor_type{"TENSOR_TYPE"};
+
+}  // namespace intel_npu
+}  // namespace ov
@@ -9,6 +9,8 @@
 #include "intel_npu/al/config/config.hpp"
 #include "intel_npu/al/icompiled_model.hpp"
 #include "intel_npu/al/icompiler.hpp"
+#include "openvino/runtime/intel_npu/remote_properties.hpp"
+#include "openvino/runtime/iremote_context.hpp"
 #include "openvino/runtime/properties.hpp"
 #include "sync_infer_request.hpp"
 
@@ -37,6 +39,8 @@ class IEngineBackend : public std::enable_shared_from_this<IEngineBackend> {
     virtual bool isBatchingSupported() const = 0;
     /** @brief Register backend-specific options */
     virtual void registerOptions(OptionsDesc& options) const;
+    /** @brief Get Level Zero context*/
+    virtual void* getContext() const;
 
 protected:
     virtual ~IEngineBackend() = default;
@@ -75,6 +79,15 @@ class IDevice : public std::enable_shared_from_this<IDevice> {
         const std::shared_ptr<IExecutor>& executor,
         const Config& config) = 0;
 
+    virtual ov::SoPtr<ov::IRemoteTensor> createRemoteTensor(
+        std::shared_ptr<ov::IRemoteContext> context,
+        const ov::element::Type& element_type,
+        const ov::Shape& shape,
+        const Config& config,
+        ov::intel_npu::TensorType tensor_type = ov::intel_npu::TensorType::BINDED,
+        ov::intel_npu::MemType mem_type = ov::intel_npu::MemType::L0_INTERNAL_BUF,
+        void* mem = nullptr);
+
 protected:
     virtual ~IDevice() = default;
 };
 
@@ -0,0 +1,82 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <map>
+#include <memory>
+#include <string>
+
+#include "intel_npu/al/config/config.hpp"
+#include "openvino/runtime/iremote_context.hpp"
+#include "openvino/runtime/iremote_tensor.hpp"
+
+namespace intel_npu {
+
+/**
+ * @brief Acts as an interface for the remote tensor structures implemented by all backends.
+ * @details The operations common for all backends can be found implemented here
+ */
+class RemoteTensor : public ov::IRemoteTensor {
+public:
+    RemoteTensor(std::shared_ptr<ov::IRemoteContext> context,
+                 const ov::element::Type& element_type,
+                 const ov::Shape& shape);
+
+    ~RemoteTensor() override;
+
+    /**
+     * @brief Returns additional information associated with tensor
+     * @return Map of property names to properties
+     */
+    const ov::AnyMap& get_properties() const override;
+
+    /**
+     * @brief Returns device name
+     * @return Device name
+     */
+    const std::string& get_device_name() const override;
+
+    /**
+     * @brief Set new shape for tensor
+     * @note Memory allocation may happen
+     * @param shape A new shape
+     */
+    void set_shape(ov::Shape shape) override;
+
+    /**
+     * @return A tensor element type
+     */
+    const ov::element::Type& get_element_type() const override;
+
+    /**
+     * @return A tensor shape
+     */
+    const ov::Shape& get_shape() const override;
+
+    /**
+     * @return Tensor's strides in bytes
+     */
+    const ov::Strides& get_strides() const override;
+
+    /**
+     * @return The remote context
+     */
+    std::shared_ptr<ov::IRemoteContext> get_context() const;
+
+protected:
+    virtual void allocate(const size_t bytes) = 0;
+    virtual bool deallocate() noexcept = 0;
+    void update_strides();
+
+    std::shared_ptr<ov::IRemoteContext> _context;
+
+    ov::element::Type _element_type;
+    ov::Shape _shape;
+    ov::Shape _capacity;
+    ov::Strides _strides{};
+    ov::AnyMap _properties;
+};
+
+}  // namespace intel_npu
@@ -30,15 +30,15 @@ class SyncInferRequest : public ov::IInferRequest {
      * @param port Port of the tensor to get.
      * @return Tensor for the port @p port.
      */
-    ov::SoPtr<ov::ITensor> get_tensor(const ov::Output<const ov::Node>& port) const override;
+    virtual ov::SoPtr<ov::ITensor> get_tensor(const ov::Output<const ov::Node>& port) const override;
 
     /**
      * @brief Sets an input/output tensor to infer.
      * @param port Port of the input or output tensor.
      * @param tensor Reference to a tensor. The element_type and shape of a tensor must match
      * the model's input/output element_type and size.
      */
-    void set_tensor(const ov::Output<const ov::Node>& port, const ov::SoPtr<ov::ITensor>& tensor) override;
+    virtual void set_tensor(const ov::Output<const ov::Node>& port, const ov::SoPtr<ov::ITensor>& tensor) override;
 
     /**
      * @brief Currently there is no support implemented for batches of tensors, thus this call is a simple redirection
@@ -160,7 +160,7 @@ class SyncInferRequest : public ov::IInferRequest {
      * otherwise.
      * @param precision The precision value to be checked.
      */
-    virtual void check_network_precision(const ov::element::Type_t precision) = 0;
+    virtual void check_network_precision(const ov::element::Type_t precision) const = 0;
 
     /**
      * @brief Indicates a kind of provided tensor. Marks special tensors, used for internal implementation
@@ -179,16 +179,16 @@ class SyncInferRequest : public ov::IInferRequest {
     void allocate_tensor(std::string tensorName,
                          const IONodeDescriptor& descriptor,
                          TensorType tensorType = TensorType::InputOrOutput,
-                         const ov::Allocator& allocator = {});
+                         const ov::Allocator& allocator = {}) const;
 
     // Mutable to return reference to ov::Tensor
     mutable std::unordered_map<std::string, std::shared_ptr<ov::ITensor>> _allTensors;
     mutable std::unordered_map<std::string, std::shared_ptr<ov::ITensor>> _shapesTensors;
     // A copy of each tensor is needed to maintain the original L0 memory allocation in case the user provides another
     // memory area for the tensor.
-    std::unordered_map<std::string, std::shared_ptr<ov::ITensor>> _copyAllTensors;
+    mutable std::unordered_map<std::string, std::shared_ptr<ov::ITensor>> _copyAllTensors;
 
-    std::unordered_map<std::string, std::shared_ptr<VariableState>> _variableStates;
+    mutable std::unordered_map<std::string, std::shared_ptr<VariableState>> _variableStates;
 
     // This is intel_npu::ICompiledModel pointer, but need to use OV base class because
     // ov::IInferRequest::get_compiled_model returns a refernce to shared_ptr!