openvinotoolkit
diff --git a/‎src/common/snippets/include/snippets/kernel_executor_table.hpp
+26-29 b/‎src/common/snippets/include/snippets/kernel_executor_table.hpp
+26-29
diff --git a/‎src/common/snippets/include/snippets/utils.hpp
+5-34 b/‎src/common/snippets/include/snippets/utils.hpp
+5-34
diff --git a/‎src/common/snippets/src/kernel_executor_table.cpp
+2-2 b/‎src/common/snippets/src/kernel_executor_table.cpp
+2-2
diff --git a/‎src/common/snippets/src/lowered/pass/compute_buffer_allocation_size.cpp
+3-1 b/‎src/common/snippets/src/lowered/pass/compute_buffer_allocation_size.cpp
+3-1
diff --git a/‎src/common/snippets/src/op/subgraph.cpp
+1-1 b/‎src/common/snippets/src/op/subgraph.cpp
+1-1
diff --git a/‎src/common/snippets/src/utils.cpp
+9-16 b/‎src/common/snippets/src/utils.cpp
+9-16
diff --git a/‎src/plugins/intel_cpu/src/emitters/snippets/cpu_kernel_executor_table.hpp
+11-10 b/‎src/plugins/intel_cpu/src/emitters/snippets/cpu_kernel_executor_table.hpp
+11-10
diff --git a/‎src/plugins/intel_cpu/src/emitters/snippets/x64/jit_brgemm_copy_b_emitter.cpp
+1-1 b/‎src/plugins/intel_cpu/src/emitters/snippets/x64/jit_brgemm_copy_b_emitter.cpp
+1-1
@@ -5,6 +5,7 @@
 #pragma once
 
 #include "snippets/lowered/linear_ir.hpp"
+#include <typeinfo>
 #if defined(SNIPPETS_DEBUG_CAPS) && !defined(_WIN32)
 #include <cxxabi.h>
 #endif
@@ -27,14 +28,11 @@ class KernelExecutorBase {
         virtual bool is_completed() const = 0;
 
         /*** Return deep copy of the config */
-        virtual std::shared_ptr<GenericConfig> clone() const = 0;
+        virtual std::unique_ptr<GenericConfig> get_clone_ptr() const = 0;
 
         /*** Compute hash for fast comparison operations or caching support */
         virtual size_t hash() const = 0;
 
-        bool operator==(const GenericConfig& rhs) const { return hash() == rhs.hash(); }
-        bool operator!=(const GenericConfig& rhs) const { return hash() != rhs.hash(); }
-
         virtual ~GenericConfig() = default;
         /** serialize config for debug purposes */
 #ifdef SNIPPETS_DEBUG_CAPS
@@ -45,14 +43,14 @@ class KernelExecutorBase {
     * @brief Update current kernel config in accordance with the passed expression. Corresponding kernel is recompiled if necessary.
      * This method should be called to update KernelExecutor based on runtime info (e.g. shapes) available through expression ptr
     */
-    virtual void update_by_expression(const ov::snippets::lowered::ExpressionPtr& expr) = 0;
+    virtual void update_by_expression(const lowered::ExpressionPtr& expr) = 0;
     /**
     * @brief Replace current kernel config with the provided value. Corresponding kernel is recompiled if necessary.
      * This method should be called to restore a saved state of the executor, that was configured using update_by_expression().
     */
-    virtual void update_by_config(const std::shared_ptr<const GenericConfig>& new_config) = 0;
+    virtual void update_by_config(const GenericConfig& new_config) = 0;
 
-    virtual std::shared_ptr<const GenericConfig> get_config() const = 0;
+    virtual const GenericConfig& get_config() const = 0;
     /** serialize for debug purposes */
 #ifdef SNIPPETS_DEBUG_CAPS
     virtual std::string to_string() const = 0;
@@ -67,27 +65,27 @@ class KernelExecutorBase {
 
 template<typename Conf, typename KernelType,
          typename std::enable_if<std::is_base_of<KernelExecutorBase::GenericConfig, Conf>::value, bool>::type = true>
-class KernelExecutor : public snippets::KernelExecutorBase {
+class KernelExecutor : public KernelExecutorBase {
 public:
-    explicit KernelExecutor(std::shared_ptr<Conf> c) : KernelExecutorBase(), m_config{std::move(c)} {}
+    explicit KernelExecutor(Conf c) : KernelExecutorBase(), m_config{std::move(c)} {}
 
     // Note: override when final is redundant, but needed to avoid warnings on some compilers
-    void update_by_expression(const ov::snippets::lowered::ExpressionPtr& expr) override final { // NOLINT
-        m_config = std::static_pointer_cast<Conf>(m_config->clone());
+    void update_by_expression(const lowered::ExpressionPtr& expr) override final { // NOLINT
         update_config(expr, m_config);
-        OPENVINO_ASSERT(m_config && m_config->is_completed(), "Failed to update kernel config in update_by_expression");
+        OPENVINO_ASSERT(m_config.is_completed(), "Failed to update kernel config in update_by_expression");
         update_kernel(m_config, m_kernel);
         OPENVINO_ASSERT(m_kernel, "Failed to compile kernel executor");
     }
-    void update_by_config(const std::shared_ptr<const GenericConfig>& new_config) override final { // NOLINT
-        if (*m_config == *new_config)
+    void update_by_config(const GenericConfig& new_config) override final { // NOLINT
+        if (m_config.hash() == new_config.hash())
             return;
-        m_config = std::static_pointer_cast<Conf>(std::const_pointer_cast<GenericConfig>(new_config));
-        OPENVINO_ASSERT(m_config && m_config->is_completed(), "Failed to update kernel config in get_config");
+        const auto& new_ptr = dynamic_cast<const Conf*>(&new_config);
+        OPENVINO_ASSERT(new_config.is_completed() && new_ptr, "Failed to update kernel config in get_config");
+        m_config = *new_ptr;
         update_kernel(m_config, m_kernel);
         OPENVINO_ASSERT(m_kernel, "Failed to compile kernel executor");
     }
-    std::shared_ptr<const GenericConfig> get_config() const override { return m_config; }
+    const GenericConfig& get_config() const override { return m_config; }
     std::shared_ptr<const KernelType> get_kernel() const { return m_kernel; }
 #ifdef SNIPPETS_DEBUG_CAPS
     std::string to_string() const override {
@@ -99,20 +97,20 @@ class KernelExecutor : public snippets::KernelExecutorBase {
                 std::free);
         type_name = demangled_name.get();
 #endif
-        return  "KernelExecutorType: " + std::string(type_name) + " KernelConfig: " + m_config->to_string();
+        return  "KernelExecutorType: " + std::string(type_name) + " KernelConfig: " + m_config.to_string();
     }
 #endif
 
 protected:
     /*** Updates stored kernel config based on runtime info from expression (e.g. new input shapes). */
-    virtual void update_config(const ov::snippets::lowered::ExpressionPtr& expr, std::shared_ptr<Conf>& config) const = 0;
+    virtual void update_config(const lowered::ExpressionPtr& expr, Conf& config) const = 0;
     /*** Updates stored kernel in accordance with the passed config. Recompilation of the kernel is
-     * performed only if necessary, otherwise an appropriate kernel is retrieved from cache. */
-    virtual void update_kernel(const std::shared_ptr<const Conf>& c, std::shared_ptr<KernelType>& kernel) const = 0;
+     * performed if necessary. */
+    virtual void update_kernel(const Conf& c, std::shared_ptr<KernelType>& kernel) const = 0;
 
 private:
     /** Contains all the necessary information to compile a desired kernel*/
-    std::shared_ptr<Conf> m_config = nullptr;
+    Conf m_config {};
     /** Stores pointer to compiled kernel since the last update_kernel() call */
     std::shared_ptr<KernelType> m_kernel = nullptr;
 };
@@ -122,13 +120,12 @@ class KernelExecutorTable {
     /*** Register KernelExecutor in the KernelExecutorTable so it can be later updated in runtime. */
     template<typename T, class ...C,
             typename std::enable_if<std::is_base_of<KernelExecutorBase, T>::value, bool>::type = true>
-    std::shared_ptr<T> register_kernel(const snippets::lowered::ExpressionPtr& expr, C... args) {
-        OPENVINO_ASSERT(!m_table.count(expr), "This expression already has an alterable kernel");
+    std::shared_ptr<T> register_kernel(const lowered::ExpressionPtr& expr, C... args) {
         const auto& instance = std::make_shared<T>(args...);
-        m_table[expr] = instance;
+        OPENVINO_ASSERT(m_table.insert({expr, instance}).second, "This expression already has an alterable kernel");
         return instance;
     }
-    std::shared_ptr<KernelExecutorBase> get_kernel_executor(const snippets::lowered::ExpressionPtr& expr) const {
+   const std::shared_ptr<KernelExecutorBase>& get_kernel_executor(const lowered::ExpressionPtr& expr) const {
         OPENVINO_ASSERT(m_table.count(expr), "This expression doesn't have a registered kernel executor");
         return m_table.at(expr);
     }
@@ -150,13 +147,13 @@ class KernelExecutorTable {
      * be accessible from RuntimeConfigurator. In order to replace these cloned ExpressionPtrs with the original ones,
      * we need to call this method.
     */
-    void replace_key_expression(const snippets::lowered::ExpressionPtr& from, const snippets::lowered::ExpressionPtr& to);
+    void replace_key_expression(const lowered::ExpressionPtr& from, const lowered::ExpressionPtr& to);
 
     virtual ~KernelExecutorTable() = default;
 
 protected:
-    std::unordered_map<snippets::lowered::ExpressionPtr, std::shared_ptr<KernelExecutorBase>> m_table{};
-    typedef std::vector<std::pair<snippets::lowered::ExpressionPtr, std::shared_ptr<const KernelExecutorBase::GenericConfig>>> ExecTableState;
+    std::unordered_map<lowered::ExpressionPtr, std::shared_ptr<KernelExecutorBase>> m_table{};
+    typedef std::vector<std::pair<lowered::ExpressionPtr, std::shared_ptr<const KernelExecutorBase::GenericConfig>>> ExecTableState;
 
     /*** Restore the table state previously obtained by get_state() */
     void reset_state(const ExecTableState& state);
 
@@ -247,43 +247,14 @@ std::shared_ptr<ov::Node> get_leaf_node_of_first_child_shape_infer_seq(const std
  */
 std::shared_ptr<ov::Node> get_leaf_node_of_first_parent_shape_infer_seq(const std::shared_ptr<ov::Node>& start_node);
 
-/**
- * @brief Calculate leading dimension of the shape that should be read according to the layout
- * @param shape original (not reordered) input shape
- * @param layout specifies the order in what dimensions of in the input shape should be read
- * @return stride of the dimension idx = layout[layout.size() - 2] in the original shape
-   Example:
-         Original shape (shape) = [1, 49, 2, 23]
-         Layout (transpose order) = [2, 0, 1, 3]
-
-         dim_idx = layout.size() - 2 = 2
-         // Since layout specifies the order of dimensions in which the shape should be read
-         dim = layout[dim_idx] = 1
-         stride(shape[1]) = shape[2] * shape[3] = 2 * 23
- */
-size_t get_in_leading_dim(const VectorDims& shape, const std::vector<size_t>& layout);
-inline size_t get_in_leading_dim(const lowered::PortDescriptorPtr& pd) {
-    return get_in_leading_dim(pd->get_shape(), pd->get_layout());
-}
 /**
  *
- * @param shape reordered input shape that is stored according to the layout
- * @param layout specifies the order in what the dimensions of the input shape are stored
- * @return
-     Output shape is already transposed, we need to correctly write the data with original shape by the order
-     Example:
-          Original transposed shape (shape) = [49, 2, 7, 39]
-          Layout (transpose order) = [2, 0, 1, 3]
-
-          dim_idx = layout.size() - 2 = 2
-          // Since the shape dimensions are already reordered according to the layout
-          dim = /find dim_idx index in layout/ = 0
-          stride(shape[0]) = shape[1] * shape[2] * shape[3] = 2 * 7 * 39
+ * @param Get stride of input/output dimension
+ * @param expr_port target port that contains shape and layout info
+ * @param idx index of the target dimension starting from the shape's end (default = 1)
  */
-size_t get_out_leading_dim(const VectorDims& shape, const std::vector<size_t>& layout);
-inline size_t get_out_leading_dim(const lowered::PortDescriptorPtr& pd) {
-    return get_out_leading_dim(pd->get_shape(), pd->get_layout());
-}
+
+int64_t get_dim_stride(const lowered::ExpressionPort& expr_port, size_t idx = 1);
 
 } // namespace utils
 } // namespace snippets
 
@@ -22,7 +22,7 @@ void KernelExecutorTable::reset_state(const ExecTableState& state) {
     for (const auto& table_record : m_table) {
         const auto& state_record = *state_it++;
         OPENVINO_ASSERT(table_record.first == state_record.first, "Invalid state in restore_state: expressions mismatch");
-        table_record.second->update_by_config(state_record.second);
+        table_record.second->update_by_config(*state_record.second);
     }
 }
 
@@ -31,7 +31,7 @@ KernelExecutorTable::ExecTableState KernelExecutorTable::get_state() const {
     // Note: we need to clone configs when saving the state, since the configs still stored in the table can
     // be modified e.g. by calling update_by_expression();
     for (const auto& record : m_table)
-        result.emplace_back(std::make_pair(record.first, record.second->get_config()->clone()));
+        result.emplace_back(std::make_pair(record.first, record.second->get_config().get_clone_ptr()));
     return result;
 }
 
 
@@ -27,7 +27,9 @@ std::vector<size_t> get_parent_inner_loops(const std::vector<size_t>& parent_loo
 // Ticket: 113744
 // TODO: This logic covers only several specific cases so it should be generalized.
 size_t ComputeBufferAllocationSize::get_allocation_size(const LoopManagerPtr& loop_manager, const ExpressionPtr& buffer_expr, size_t allocation_rank) {
-    const auto& parent_port = buffer_expr->get_input_port_connector(0)->get_source();
+    // Note: Buffer expressions can have more than one parent after the loops splitting transformation, but only the last parent
+    // can be used to access valid loop ports. More info in the ticket: 146646
+    const auto& parent_port = buffer_expr->get_input_port_connector(buffer_expr->get_input_count() - 1)->get_source();
     const auto& parent_loop_ids = get_parent_inner_loops(parent_port.get_expr()->get_loop_ids(), buffer_expr->get_loop_ids());
     const auto planar_shape = utils::get_preordered_vdims(parent_port);
 
 
@@ -546,7 +546,7 @@ snippets::Schedule Subgraph::generate(const void* compile_params) const {
     auto lowering_result = m_generator->generate(linear_ir, compile_params);
 
     // Note: Since the code emission is performed on a copy of LIR, but RuntimeConfigurator works with the initial instance,
-    //  we need to replace cloned expression pointers to original ones in the KernelExecutorTable
+    //  we need to replace cloned expression pointers to original ones in the KernelExecutorTable. Ticket: 129772
     const auto& exec_table = m_generator->get_target_machine()->get_runtime_configurator()->get_kernel_executor_table();
     for (const auto& expr : *m_linear_ir)
         exec_table->replace_key_expression(expression_map.at(expr.get()), expr);
 
@@ -291,22 +291,15 @@ std::shared_ptr<ov::Node> get_leaf_node_of_first_parent_shape_infer_seq(const st
     return leaf_node;
 }
 
-size_t get_in_leading_dim(const VectorDims& shape, const std::vector<size_t>& layout) {
-    if (layout.empty())
-        return shape.back();
-    OPENVINO_ASSERT(layout.back() == layout.size() - 1 && layout.size() == shape.size(),
-                              "detected invalid layout values: check that this shape + layout combination is schedulable");
-    const auto idx = static_cast<VectorDims::difference_type>(layout[layout.size() - 2]);
-    return std::accumulate(shape.cbegin() + idx + 1, shape.end(), 1ull, std::multiplies<size_t>());
-}
-size_t get_out_leading_dim(const VectorDims& shape, const std::vector<size_t>& layout) {
-    if (layout.empty())
-        return shape.back();
-    OPENVINO_ASSERT(layout.back() == layout.size() - 1 && layout.size() == shape.size(),
-                              "detected invalid layout values: check that this shape + layout combination is schedulable");
-    const auto idx = layout.size() - 2;
-    const auto dim = std::distance(layout.cbegin(), std::find(layout.cbegin(), layout.cend(), idx));
-    return std::accumulate(shape.cbegin() + dim + 1, shape.cend(), 1ull, std::multiplies<size_t>());
+int64_t get_dim_stride(const lowered::ExpressionPort& expr_port, size_t idx) {
+    size_t dim_idx = 0;
+    const auto& layout = expr_port.get_descriptor_ptr()->get_layout();
+    switch (expr_port.get_type()) {
+        case lowered::ExpressionPort::Input: dim_idx = utils::get_input_dim_idx(layout, idx); break;
+        case lowered::ExpressionPort::Output: dim_idx = utils::get_output_dim_idx(layout, idx); break;
+        default: OPENVINO_THROW("Unsupported expression port type!");
+    }
+    return get_stride(dim_idx, expr_port.get_descriptor_ptr()->get_shape());
 }
 
 } // namespace utils
 
@@ -13,15 +13,10 @@ namespace intel_cpu {
 template<typename Conf, typename KernelType>
 class CPUKernelExecutor : public snippets::KernelExecutor<Conf, KernelType> {
 public:
-     CPUKernelExecutor(ov::intel_cpu::MultiCacheWeakPtr kernel_cache, std::shared_ptr<Conf> c) :
-                       snippets::KernelExecutor<Conf, KernelType>(c), m_kernel_cache(std::move(kernel_cache)) {}
-     struct Key {
-         explicit Key(const std::shared_ptr<const Conf>& c) : config{c} {}
-         const std::shared_ptr<const  Conf> config;
-         size_t hash() const { return config->hash(); }
-         bool operator==(const Key& rhs) const { return *config == *rhs.config; }
-     };
-    void update_kernel(const std::shared_ptr<const Conf>& config, std::shared_ptr<KernelType>& kernel) const override final { // NOLINT
+     CPUKernelExecutor(ov::intel_cpu::MultiCacheWeakPtr kernel_cache, Conf c) :
+                       snippets::KernelExecutor<Conf, KernelType>(std::move(c)), m_kernel_cache(std::move(kernel_cache)) {}
+
+    void update_kernel(const Conf& config, std::shared_ptr<KernelType>& kernel) const override final { // NOLINT
         const auto& cache = m_kernel_cache.lock();
         OPENVINO_ASSERT(cache, "Invalid kernel cache pointer in CPUKernelExecutor::update_kernel()");
         const auto& lookup_result = cache->getOrCreate(Key(config),
@@ -32,8 +27,14 @@ class CPUKernelExecutor : public snippets::KernelExecutor<Conf, KernelType> {
     }
 
 protected:
+    struct Key {
+        explicit Key(Conf c) : config{std::move(c)} {}
+        const  Conf config;
+        size_t hash() const { return config.hash(); }
+        bool operator==(const Key& rhs) const { return config == rhs.config; }
+    };
     /** Compile kernel managed by KernelExecutor instance. Will be called only if Kernel is not found in the cache */
-    virtual std::shared_ptr<KernelType> compile_kernel(const std::shared_ptr<const Conf>& c) const = 0;
+    virtual std::shared_ptr<KernelType> compile_kernel(const Conf& c) const = 0;
     /** CPU plugin cache implementation is used to avoid redundant recompilations */
     ov::intel_cpu::MultiCacheWeakPtr m_kernel_cache;
 };
 
@@ -43,7 +43,7 @@ jit_brgemm_copy_b_emitter::jit_brgemm_copy_b_emitter(jit_generator* h, cpu_isa_t
     size_t leading_dimension = *(original_shape.rbegin());
     if (!layout.empty()) {
         transposed_shape = snippets::utils::get_planar_vdims(original_shape, layout);
-        leading_dimension = ov::snippets::utils::get_in_leading_dim(original_shape, layout);
+        leading_dimension = ov::snippets::utils::get_dim_stride(expr->get_input_port(0));
     }
 
     const auto& in_subtensor = in_desc->get_subtensor();
Original file line number	Diff line number	Diff line change
`@@ -22,7 +22,7 @@ void KernelExecutorTable::reset_state(const ExecTableState& state) {`
`22`	`22`	`for (const auto& table_record : m_table) {`
`23`	`23`	`const auto& state_record = *state_it++;`
`24`	`24`	`OPENVINO_ASSERT(table_record.first == state_record.first, "Invalid state in restore_state: expressions mismatch");`
`25`		`- table_record.second->update_by_config(state_record.second);`
	`25`	`+ table_record.second->update_by_config(*state_record.second);`
`26`	`26`	`}`
`27`	`27`	`}`
`28`	`28`
`@@ -31,7 +31,7 @@ KernelExecutorTable::ExecTableState KernelExecutorTable::get_state() const {`
`31`	`31`	`// Note: we need to clone configs when saving the state, since the configs still stored in the table can`
`32`	`32`	`// be modified e.g. by calling update_by_expression();`
`33`	`33`	`for (const auto& record : m_table)`
`34`		`- result.emplace_back(std::make_pair(record.first, record.second->get_config()->clone()));`
	`34`	`+ result.emplace_back(std::make_pair(record.first, record.second->get_config().get_clone_ptr()));`
`35`	`35`	`return result;`
`36`	`36`	`}`
`37`	`37`
Original file line number	Diff line number	Diff line change
`@@ -43,7 +43,7 @@ jit_brgemm_copy_b_emitter::jit_brgemm_copy_b_emitter(jit_generator* h, cpu_isa_t`
`43`	`43`	`size_t leading_dimension = *(original_shape.rbegin());`
`44`	`44`	`if (!layout.empty()) {`
`45`	`45`	`transposed_shape = snippets::utils::get_planar_vdims(original_shape, layout);`
`46`		`- leading_dimension = ov::snippets::utils::get_in_leading_dim(original_shape, layout);`
	`46`	`+ leading_dimension = ov::snippets::utils::get_dim_stride(expr->get_input_port(0));`
`47`	`47`	`}`
`48`	`48`
`49`	`49`	`const auto& in_subtensor = in_desc->get_subtensor();`