Merge branch 'master' into tensor_view_for_partial_value_propagation

jane-intel · web-flow · commit 65debd21b860 · 2025-03-13T15:04:30.000+04:00
diff --git a/src/common/transformations/src/transformations/symbolic_transformations/dereshape_matmul.cpp b/src/common/transformations/src/transformations/symbolic_transformations/dereshape_matmul.cpp
@@ -113,6 +113,7 @@ ov::Output<ov::Node> get_target_shape_from_sources(const ov::Output<ov::Node>& b
         if (curr_is_const && next_is_const) {
             dims[curr_i] = nullptr;
             dims[curr_i + 1] = ov::op::util::make_try_fold<ov::op::v0::Concat>(ov::NodeVector{curr_node, next_node}, 0);
+            ov::copy_runtime_info(copy_rt_info_from, dims[curr_i + 1]);
         }
     }
     dims.erase(std::remove_if(dims.begin(),
@@ -327,7 +328,8 @@ ov::pass::DeReshapeMatMul::DeReshapeMatMul() {
             auto other_input_reshape =
                 op::util::make_try_fold<ov::op::v1::Reshape>(add_node->input_value(non_matmul_port), pattern, true);
             add_node->input(non_matmul_port).replace_source_output(other_input_reshape->output(0));
-            ov::copy_runtime_info({in_reshape_0, in_reshape_1}, {first_batch_dim, minus_one, other_input_reshape});
+            ov::copy_runtime_info({in_reshape_0, in_reshape_1},
+                                  {first_batch_dim, minus_one, other_input_reshape, pattern});
             add_node->validate_and_infer_types();
         }
         ov::replace_output_update_name(out_reshape->output(0), out_reshape->input_value(0));
diff --git a/src/common/transformations/src/transformations/transpose_sinking/ts_slice.cpp b/src/common/transformations/src/transformations/transpose_sinking/ts_slice.cpp
@@ -43,6 +43,7 @@ TSSliceForward::TSSliceForward() {
                                                            transpose_axis_order);
         const auto& indices = main_node->input_value(4);
         auto new_axis = std::make_shared<ov::op::v8::Gather>(data, indices, axis);
+        ov::copy_runtime_info(indices.get_node_shared_ptr(), new_axis);
 
         main_node->input(4).replace_source_output(new_axis);
 
@@ -96,6 +97,7 @@ TSSliceBackward::TSSliceBackward() {
                                                            reversed_transpose_order);
         const auto& indices = main_node->input_value(4);
         auto new_axis = std::make_shared<ov::op::v8::Gather>(data, indices, axis);
+        ov::copy_runtime_info(indices.get_node_shared_ptr(), new_axis);
         main_node->input(4).replace_source_output(new_axis);
 
         main_node->validate_and_infer_types();
diff --git a/src/core/src/bound_evaluate.cpp b/src/core/src/bound_evaluate.cpp
@@ -18,36 +18,6 @@
 namespace {
 using namespace ov;
 
-void propagate_rt_info(Node* node, const Output<Node>& final_port) {
-    auto node_outputs = node->outputs();
-    bool same_outputs = std::all_of(node_outputs.begin(), node_outputs.end(), [](const Output<Node>& output) {
-        return output.get_tensor().has_and_set_bound();
-    });
-    if (same_outputs && op::util::is_constant(node))  // constant should not propagate it's rt_info
-    {
-        std::unordered_set<Node*> stop_nodes;
-        for (const auto& in : final_port.get_target_inputs())
-            stop_nodes.insert(in.get_node());
-
-        auto curr_node = node->shared_from_this();
-        for (const auto& output : node_outputs) {
-            if (output == final_port)
-                continue;
-            for (auto& in : output.get_target_inputs()) {
-                if (stop_nodes.count(in.get_node()))
-                    continue;
-                try {
-                    auto consumer = in.get_node()->shared_from_this();
-                    copy_runtime_info({curr_node, consumer}, consumer);
-                } catch (const std::bad_weak_ptr&) {
-                    // Exception can be thrown, if `shared_from_this()` was called during node creation.
-                    // Continue propagation for other nodes.
-                }
-            }
-        }
-    }
-}
-
 bool are_same_tensor(const ov::Tensor& lhs, const ov::Tensor& rhs) {
     return (lhs && rhs) && (lhs.get_element_type() == rhs.get_element_type()) && (lhs.get_shape() == rhs.get_shape()) &&
            (lhs.data() == rhs.data());
@@ -287,7 +257,6 @@ void evaluate_bound(const Output<Node>& output) {
             }
             bound_evaluator.set_bounds_and_symbols();
             invalidate_unused_values(node->input_values());
-            propagate_rt_info(node, output);
         }
     }
 }
diff --git a/src/plugins/intel_gpu/src/plugin/transformations/optimize_subsequent_reshapes.cpp b/src/plugins/intel_gpu/src/plugin/transformations/optimize_subsequent_reshapes.cpp
@@ -18,7 +18,7 @@ OptimizeSubsequentReshapes::OptimizeSubsequentReshapes() {
     using namespace ov::pass::pattern;
     using ov::pass::pattern::op::Or;
 
-    auto dynamic_batch_only = [](Output<Node> output) {
+    auto single_dynamic_dim = [](Output<Node> output) {
         const auto& shape = output.get_partial_shape();
 
         if (shape.rank().is_dynamic())
@@ -27,23 +27,23 @@ OptimizeSubsequentReshapes::OptimizeSubsequentReshapes() {
         if (shape.size() <= 1)
             return false;
 
-        if (shape[0].is_static())
-            return false;
+        auto dynamic_dims = 0;
+        for (size_t i = 0; i < shape.size(); i++)
+            dynamic_dims += shape[i].is_dynamic() ? 1 : 0;
 
-        for (size_t i = 1; i < shape.size(); i++)
-            if (shape[i].is_dynamic())
-                return false;
+        if (dynamic_dims != 1)
+            return false;
 
         return true;
     };
 
-    auto first_reshape_data = any_input(dynamic_batch_only);
+    auto first_reshape_data = any_input(single_dynamic_dim);
     auto first_reshape_pattern = ov::pass::pattern::wrap_type<ov::op::v0::Constant>();
     auto first_reshape = wrap_type<ov::op::v1::Reshape>({ first_reshape_data, first_reshape_pattern },
-                                                        dynamic_batch_only && ov::pass::pattern::consumers_count(1));
+                                                        single_dynamic_dim && ov::pass::pattern::consumers_count(1));
 
     auto second_reshape_pattern = ov::pass::pattern::wrap_type<ov::op::v0::Constant>();
-    auto second_reshape = wrap_type<ov::op::v1::Reshape>({ first_reshape, second_reshape_pattern }, dynamic_batch_only);
+    auto second_reshape = wrap_type<ov::op::v1::Reshape>({ first_reshape, second_reshape_pattern }, single_dynamic_dim);
 
     ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) {
         const auto& pattern_map = m.get_pattern_value_map();
@@ -74,14 +74,14 @@ OptimizeSubsequentReshapes::OptimizeSubsequentReshapes() {
         std::vector<int32_t> new_pattern;
         for (auto& dim : second_reshape_ps) {
             if (dim.is_dynamic()) {
-                new_pattern.push_back(0);
+                new_pattern.push_back(-1);
             } else {
                 new_pattern.push_back(dim.get_length());
             }
         }
 
         auto new_pattern_const = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{new_pattern.size()}, new_pattern);
-        auto new_reshape = std::make_shared<ov::op::v1::Reshape>(first_reshape_node->input(0).get_source_output(), new_pattern_const, true);
+        auto new_reshape = std::make_shared<ov::op::v1::Reshape>(first_reshape_node->input(0).get_source_output(), new_pattern_const, false);
         new_reshape->set_friendly_name(second_reshape_node->get_friendly_name());
 
         ov::replace_node(second_reshape_node, new_reshape);
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp
@@ -6,7 +6,7 @@
 #ifndef NOMINMAX
 # define NOMINMAX
 #endif
-#include "gpu/intel/jit/jit_generator.hpp"
+#include "gpu/intel/jit/generator.hpp"
 #endif  // ENABLE_ONEDNN_FOR_GPU
 
 #include "ocl_device.hpp"
@@ -324,7 +324,7 @@ device_info init_device_info(const cl::Device& device, const cl::Context& contex
     using namespace dnnl::impl::gpu::intel::jit;
     ngen::HW hw = ngen::HW::Unknown;
     ngen::Product product = {ngen::ProductFamily::Unknown, 0};
-    jit_generator<ngen::HW::Unknown>::detectHWInfo(context.get(), device.get(), hw, product);
+    generator_t<ngen::HW::Unknown>::detectHWInfo(context.get(), device.get(), hw, product);
     info.arch = convert_ngen_arch(hw);
     // We change the value of this flag to avoid OneDNN usage for the platforms unknown to OneDNN
     // This is required to guarantee some level of forward compatibility for the new HW generations
diff --git a/src/plugins/intel_gpu/tests/unit/transformations/optimize_subsequent_reshapes_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/optimize_subsequent_reshapes_test.cpp
@@ -39,8 +39,8 @@ TEST_F(TransformationTestsF, OptimizeSubsequentReshapes1) {
     }
     {
         auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, ov::PartialShape{ -1, 1, 4096 });
-        auto reshape_pattern = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{2}, std::vector<int32_t>{ 0, 4096 });
-        auto reshape = std::make_shared<ov::op::v1::Reshape>(input, reshape_pattern, true);
+        auto reshape_pattern = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{2}, std::vector<int32_t>{ -1, 4096 });
+        auto reshape = std::make_shared<ov::op::v1::Reshape>(input, reshape_pattern, false);
         auto result = std::make_shared<ov::op::v0::Result>(reshape);
 
         model_ref = std::make_shared<ov::Model>(ov::NodeVector{ result }, ov::ParameterVector{ input });
@@ -63,8 +63,8 @@ TEST_F(TransformationTestsF, OptimizeSubsequentReshapes2) {
     }
     {
         auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, ov::PartialShape{ -1, 1, 4096 });
-        auto reshape_pattern = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{4}, std::vector<int32_t>{ 0, 32, 1, 128 });
-        auto reshape = std::make_shared<ov::op::v1::Reshape>(input, reshape_pattern, true);
+        auto reshape_pattern = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{4}, std::vector<int32_t>{ -1, 32, 1, 128 });
+        auto reshape = std::make_shared<ov::op::v1::Reshape>(input, reshape_pattern, false);
         auto result = std::make_shared<ov::op::v0::Result>(reshape);
 
         model_ref = std::make_shared<ov::Model>(ov::NodeVector{ result }, ov::ParameterVector{ input });
@@ -87,8 +87,56 @@ TEST_F(TransformationTestsF, OptimizeSubsequentReshapes3) {
     }
     {
         auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, ov::PartialShape{ -1, 32, 1, 128 });
-        auto reshape_pattern = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{2}, std::vector<int32_t>{ 0, 4096 });
-        auto reshape = std::make_shared<ov::op::v1::Reshape>(input, reshape_pattern, true);
+        auto reshape_pattern = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{2}, std::vector<int32_t>{ -1, 4096 });
+        auto reshape = std::make_shared<ov::op::v1::Reshape>(input, reshape_pattern, false);
+        auto result = std::make_shared<ov::op::v0::Result>(reshape);
+
+        model_ref = std::make_shared<ov::Model>(ov::NodeVector{ result }, ov::ParameterVector{ input });
+    }
+    comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
+}
+
+TEST_F(TransformationTestsF, OptimizeSubsequentReshapes4) {
+    {
+        auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, ov::PartialShape{ 1, -1, 256 });
+        auto first_reshape_pattern = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{4}, std::vector<int32_t>{ 0, 0, 2, 128 });
+        auto first_reshape = std::make_shared<ov::op::v1::Reshape>(input, first_reshape_pattern, true);
+
+        auto second_reshape_pattern = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{2}, std::vector<int32_t>{ -1, 256 });
+        auto second_reshape = std::make_shared<ov::op::v1::Reshape>(first_reshape, second_reshape_pattern, false);
+        auto result = std::make_shared<ov::op::v0::Result>(second_reshape);
+
+        model = std::make_shared<ov::Model>(ov::NodeVector{ result }, ov::ParameterVector{ input });
+        manager.register_pass<OptimizeSubsequentReshapes>();
+    }
+    {
+        auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, ov::PartialShape{ 1, -1, 256 });
+        auto reshape_pattern = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{2}, std::vector<int32_t>{ -1, 256 });
+        auto reshape = std::make_shared<ov::op::v1::Reshape>(input, reshape_pattern, false);
+        auto result = std::make_shared<ov::op::v0::Result>(reshape);
+
+        model_ref = std::make_shared<ov::Model>(ov::NodeVector{ result }, ov::ParameterVector{ input });
+    }
+    comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
+}
+
+TEST_F(TransformationTestsF, OptimizeSubsequentReshapes5) {
+    {
+        auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, ov::PartialShape{ 1, 256, -1 });
+        auto first_reshape_pattern = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{4}, std::vector<int32_t>{ 0, 64, 4, -1 });
+        auto first_reshape = std::make_shared<ov::op::v1::Reshape>(input, first_reshape_pattern, true);
+
+        auto second_reshape_pattern = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{4}, std::vector<int32_t>{ -1, 32, 2, 4 });
+        auto second_reshape = std::make_shared<ov::op::v1::Reshape>(first_reshape, second_reshape_pattern, true);
+        auto result = std::make_shared<ov::op::v0::Result>(second_reshape);
+
+        model = std::make_shared<ov::Model>(ov::NodeVector{ result }, ov::ParameterVector{ input });
+        manager.register_pass<OptimizeSubsequentReshapes>();
+    }
+    {
+        auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, ov::PartialShape{ 1, 256, -1 });
+        auto reshape_pattern = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{4}, std::vector<int32_t>{ -1, 32, 2, 4 });
+        auto reshape = std::make_shared<ov::op::v1::Reshape>(input, reshape_pattern, false);
         auto result = std::make_shared<ov::op::v0::Result>(reshape);
 
         model_ref = std::make_shared<ov::Model>(ov::NodeVector{ result }, ov::ParameterVector{ input });
diff --git a/src/plugins/intel_gpu/thirdparty/CMakeLists.txt b/src/plugins/intel_gpu/thirdparty/CMakeLists.txt
@@ -156,18 +156,13 @@ if(ENABLE_ONEDNN_FOR_GPU)
             )
         endif()
 
-        set(LIB_INCLUDE_DIRS "${ONEDNN_INSTALL_DIR}/include" "${CMAKE_CURRENT_SOURCE_DIR}/onednn_gpu/src")
+        set(LIB_INCLUDE_DIRS "${ONEDNN_INSTALL_DIR}/include" "${CMAKE_CURRENT_SOURCE_DIR}/onednn_gpu/src" "${CMAKE_CURRENT_SOURCE_DIR}/onednn_gpu/src/gpu/intel/jit/ngen" "${CMAKE_CURRENT_SOURCE_DIR}/onednn_gpu/third_party/ngen")
         set(LIB_DEFINITIONS ENABLE_ONEDNN_FOR_GPU
                             DNNL_DLL
                             DNNL_DLL_EXPORTS
                             DNNL_ENABLE_CPU_ISA_HINTS
                             DNNL_ENABLE_MAX_CPU_ISA
-                            DNNL_X64=1
-                            NGEN_CPP11
-                            NGEN_NEO_INTERFACE
-                            NGEN_NO_OP_NAMES
-                            NGEN_SAFE
-                            NGEN_WINDOWS_COMPAT)
+                            DNNL_X64=1)
         add_library(onednn_gpu_tgt INTERFACE)
         set_target_properties(onednn_gpu_tgt PROPERTIES
             INTERFACE_LINK_LIBRARIES $<BUILD_INTERFACE:${ONEDNN_GPU_LIB_PATH}>
diff --git a/src/plugins/intel_gpu/thirdparty/onednn_gpu b/src/plugins/intel_gpu/thirdparty/onednn_gpu
@@ -1 +1 @@
-Subproject commit 1584b5a36b851265d59bd723abc235e13759be2f
+Subproject commit a526909f5c62c191eb097fc19d0a98db9e13b877

Original file line number	Diff line number	Diff line change
`@@ -113,6 +113,7 @@ ov::Output<ov::Node> get_target_shape_from_sources(const ov::Output<ov::Node>& b`
`113`	`113`	`if (curr_is_const && next_is_const) {`
`114`	`114`	`dims[curr_i] = nullptr;`
`115`	`115`	`dims[curr_i + 1] = ov::op::util::make_try_fold<ov::op::v0::Concat>(ov::NodeVector{curr_node, next_node}, 0);`
	`116`	`+ ov::copy_runtime_info(copy_rt_info_from, dims[curr_i + 1]);`
`116`	`117`	`}`
`117`	`118`	`}`
`118`	`119`	`dims.erase(std::remove_if(dims.begin(),`
`@@ -327,7 +328,8 @@ ov::pass::DeReshapeMatMul::DeReshapeMatMul() {`
`327`	`328`	`auto other_input_reshape =`
`328`	`329`	`op::util::make_try_fold<ov::op::v1::Reshape>(add_node->input_value(non_matmul_port), pattern, true);`
`329`	`330`	`add_node->input(non_matmul_port).replace_source_output(other_input_reshape->output(0));`
`330`		`- ov::copy_runtime_info({in_reshape_0, in_reshape_1}, {first_batch_dim, minus_one, other_input_reshape});`
	`331`	`+ ov::copy_runtime_info({in_reshape_0, in_reshape_1},`
	`332`	`+ {first_batch_dim, minus_one, other_input_reshape, pattern});`
`331`	`333`	`add_node->validate_and_infer_types();`
`332`	`334`	`}`
`333`	`335`	`ov::replace_output_update_name(out_reshape->output(0), out_reshape->input_value(0));`