Skip to content

Commit 65debd2

Browse files
authored
Merge branch 'master' into tensor_view_for_partial_value_propagation
2 parents f244fb0 + b59e74d commit 65debd2

File tree

8 files changed

+75
-59
lines changed

8 files changed

+75
-59
lines changed

src/common/transformations/src/transformations/symbolic_transformations/dereshape_matmul.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ ov::Output<ov::Node> get_target_shape_from_sources(const ov::Output<ov::Node>& b
113113
if (curr_is_const && next_is_const) {
114114
dims[curr_i] = nullptr;
115115
dims[curr_i + 1] = ov::op::util::make_try_fold<ov::op::v0::Concat>(ov::NodeVector{curr_node, next_node}, 0);
116+
ov::copy_runtime_info(copy_rt_info_from, dims[curr_i + 1]);
116117
}
117118
}
118119
dims.erase(std::remove_if(dims.begin(),
@@ -327,7 +328,8 @@ ov::pass::DeReshapeMatMul::DeReshapeMatMul() {
327328
auto other_input_reshape =
328329
op::util::make_try_fold<ov::op::v1::Reshape>(add_node->input_value(non_matmul_port), pattern, true);
329330
add_node->input(non_matmul_port).replace_source_output(other_input_reshape->output(0));
330-
ov::copy_runtime_info({in_reshape_0, in_reshape_1}, {first_batch_dim, minus_one, other_input_reshape});
331+
ov::copy_runtime_info({in_reshape_0, in_reshape_1},
332+
{first_batch_dim, minus_one, other_input_reshape, pattern});
331333
add_node->validate_and_infer_types();
332334
}
333335
ov::replace_output_update_name(out_reshape->output(0), out_reshape->input_value(0));

src/common/transformations/src/transformations/transpose_sinking/ts_slice.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ TSSliceForward::TSSliceForward() {
4343
transpose_axis_order);
4444
const auto& indices = main_node->input_value(4);
4545
auto new_axis = std::make_shared<ov::op::v8::Gather>(data, indices, axis);
46+
ov::copy_runtime_info(indices.get_node_shared_ptr(), new_axis);
4647

4748
main_node->input(4).replace_source_output(new_axis);
4849

@@ -96,6 +97,7 @@ TSSliceBackward::TSSliceBackward() {
9697
reversed_transpose_order);
9798
const auto& indices = main_node->input_value(4);
9899
auto new_axis = std::make_shared<ov::op::v8::Gather>(data, indices, axis);
100+
ov::copy_runtime_info(indices.get_node_shared_ptr(), new_axis);
99101
main_node->input(4).replace_source_output(new_axis);
100102

101103
main_node->validate_and_infer_types();

src/core/src/bound_evaluate.cpp

-31
Original file line numberDiff line numberDiff line change
@@ -18,36 +18,6 @@
1818
namespace {
1919
using namespace ov;
2020

21-
void propagate_rt_info(Node* node, const Output<Node>& final_port) {
22-
auto node_outputs = node->outputs();
23-
bool same_outputs = std::all_of(node_outputs.begin(), node_outputs.end(), [](const Output<Node>& output) {
24-
return output.get_tensor().has_and_set_bound();
25-
});
26-
if (same_outputs && op::util::is_constant(node)) // constant should not propagate it's rt_info
27-
{
28-
std::unordered_set<Node*> stop_nodes;
29-
for (const auto& in : final_port.get_target_inputs())
30-
stop_nodes.insert(in.get_node());
31-
32-
auto curr_node = node->shared_from_this();
33-
for (const auto& output : node_outputs) {
34-
if (output == final_port)
35-
continue;
36-
for (auto& in : output.get_target_inputs()) {
37-
if (stop_nodes.count(in.get_node()))
38-
continue;
39-
try {
40-
auto consumer = in.get_node()->shared_from_this();
41-
copy_runtime_info({curr_node, consumer}, consumer);
42-
} catch (const std::bad_weak_ptr&) {
43-
// Exception can be thrown, if `shared_from_this()` was called during node creation.
44-
// Continue propagation for other nodes.
45-
}
46-
}
47-
}
48-
}
49-
}
50-
5121
bool are_same_tensor(const ov::Tensor& lhs, const ov::Tensor& rhs) {
5222
return (lhs && rhs) && (lhs.get_element_type() == rhs.get_element_type()) && (lhs.get_shape() == rhs.get_shape()) &&
5323
(lhs.data() == rhs.data());
@@ -287,7 +257,6 @@ void evaluate_bound(const Output<Node>& output) {
287257
}
288258
bound_evaluator.set_bounds_and_symbols();
289259
invalidate_unused_values(node->input_values());
290-
propagate_rt_info(node, output);
291260
}
292261
}
293262
}

src/plugins/intel_gpu/src/plugin/transformations/optimize_subsequent_reshapes.cpp

+11-11
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ OptimizeSubsequentReshapes::OptimizeSubsequentReshapes() {
1818
using namespace ov::pass::pattern;
1919
using ov::pass::pattern::op::Or;
2020

21-
auto dynamic_batch_only = [](Output<Node> output) {
21+
auto single_dynamic_dim = [](Output<Node> output) {
2222
const auto& shape = output.get_partial_shape();
2323

2424
if (shape.rank().is_dynamic())
@@ -27,23 +27,23 @@ OptimizeSubsequentReshapes::OptimizeSubsequentReshapes() {
2727
if (shape.size() <= 1)
2828
return false;
2929

30-
if (shape[0].is_static())
31-
return false;
30+
auto dynamic_dims = 0;
31+
for (size_t i = 0; i < shape.size(); i++)
32+
dynamic_dims += shape[i].is_dynamic() ? 1 : 0;
3233

33-
for (size_t i = 1; i < shape.size(); i++)
34-
if (shape[i].is_dynamic())
35-
return false;
34+
if (dynamic_dims != 1)
35+
return false;
3636

3737
return true;
3838
};
3939

40-
auto first_reshape_data = any_input(dynamic_batch_only);
40+
auto first_reshape_data = any_input(single_dynamic_dim);
4141
auto first_reshape_pattern = ov::pass::pattern::wrap_type<ov::op::v0::Constant>();
4242
auto first_reshape = wrap_type<ov::op::v1::Reshape>({ first_reshape_data, first_reshape_pattern },
43-
dynamic_batch_only && ov::pass::pattern::consumers_count(1));
43+
single_dynamic_dim && ov::pass::pattern::consumers_count(1));
4444

4545
auto second_reshape_pattern = ov::pass::pattern::wrap_type<ov::op::v0::Constant>();
46-
auto second_reshape = wrap_type<ov::op::v1::Reshape>({ first_reshape, second_reshape_pattern }, dynamic_batch_only);
46+
auto second_reshape = wrap_type<ov::op::v1::Reshape>({ first_reshape, second_reshape_pattern }, single_dynamic_dim);
4747

4848
ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) {
4949
const auto& pattern_map = m.get_pattern_value_map();
@@ -74,14 +74,14 @@ OptimizeSubsequentReshapes::OptimizeSubsequentReshapes() {
7474
std::vector<int32_t> new_pattern;
7575
for (auto& dim : second_reshape_ps) {
7676
if (dim.is_dynamic()) {
77-
new_pattern.push_back(0);
77+
new_pattern.push_back(-1);
7878
} else {
7979
new_pattern.push_back(dim.get_length());
8080
}
8181
}
8282

8383
auto new_pattern_const = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{new_pattern.size()}, new_pattern);
84-
auto new_reshape = std::make_shared<ov::op::v1::Reshape>(first_reshape_node->input(0).get_source_output(), new_pattern_const, true);
84+
auto new_reshape = std::make_shared<ov::op::v1::Reshape>(first_reshape_node->input(0).get_source_output(), new_pattern_const, false);
8585
new_reshape->set_friendly_name(second_reshape_node->get_friendly_name());
8686

8787
ov::replace_node(second_reshape_node, new_reshape);

src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
#ifndef NOMINMAX
77
# define NOMINMAX
88
#endif
9-
#include "gpu/intel/jit/jit_generator.hpp"
9+
#include "gpu/intel/jit/generator.hpp"
1010
#endif // ENABLE_ONEDNN_FOR_GPU
1111

1212
#include "ocl_device.hpp"
@@ -324,7 +324,7 @@ device_info init_device_info(const cl::Device& device, const cl::Context& contex
324324
using namespace dnnl::impl::gpu::intel::jit;
325325
ngen::HW hw = ngen::HW::Unknown;
326326
ngen::Product product = {ngen::ProductFamily::Unknown, 0};
327-
jit_generator<ngen::HW::Unknown>::detectHWInfo(context.get(), device.get(), hw, product);
327+
generator_t<ngen::HW::Unknown>::detectHWInfo(context.get(), device.get(), hw, product);
328328
info.arch = convert_ngen_arch(hw);
329329
// We change the value of this flag to avoid OneDNN usage for the platforms unknown to OneDNN
330330
// This is required to guarantee some level of forward compatibility for the new HW generations

src/plugins/intel_gpu/tests/unit/transformations/optimize_subsequent_reshapes_test.cpp

+54-6
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ TEST_F(TransformationTestsF, OptimizeSubsequentReshapes1) {
3939
}
4040
{
4141
auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, ov::PartialShape{ -1, 1, 4096 });
42-
auto reshape_pattern = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{2}, std::vector<int32_t>{ 0, 4096 });
43-
auto reshape = std::make_shared<ov::op::v1::Reshape>(input, reshape_pattern, true);
42+
auto reshape_pattern = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{2}, std::vector<int32_t>{ -1, 4096 });
43+
auto reshape = std::make_shared<ov::op::v1::Reshape>(input, reshape_pattern, false);
4444
auto result = std::make_shared<ov::op::v0::Result>(reshape);
4545

4646
model_ref = std::make_shared<ov::Model>(ov::NodeVector{ result }, ov::ParameterVector{ input });
@@ -63,8 +63,8 @@ TEST_F(TransformationTestsF, OptimizeSubsequentReshapes2) {
6363
}
6464
{
6565
auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, ov::PartialShape{ -1, 1, 4096 });
66-
auto reshape_pattern = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{4}, std::vector<int32_t>{ 0, 32, 1, 128 });
67-
auto reshape = std::make_shared<ov::op::v1::Reshape>(input, reshape_pattern, true);
66+
auto reshape_pattern = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{4}, std::vector<int32_t>{ -1, 32, 1, 128 });
67+
auto reshape = std::make_shared<ov::op::v1::Reshape>(input, reshape_pattern, false);
6868
auto result = std::make_shared<ov::op::v0::Result>(reshape);
6969

7070
model_ref = std::make_shared<ov::Model>(ov::NodeVector{ result }, ov::ParameterVector{ input });
@@ -87,8 +87,56 @@ TEST_F(TransformationTestsF, OptimizeSubsequentReshapes3) {
8787
}
8888
{
8989
auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, ov::PartialShape{ -1, 32, 1, 128 });
90-
auto reshape_pattern = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{2}, std::vector<int32_t>{ 0, 4096 });
91-
auto reshape = std::make_shared<ov::op::v1::Reshape>(input, reshape_pattern, true);
90+
auto reshape_pattern = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{2}, std::vector<int32_t>{ -1, 4096 });
91+
auto reshape = std::make_shared<ov::op::v1::Reshape>(input, reshape_pattern, false);
92+
auto result = std::make_shared<ov::op::v0::Result>(reshape);
93+
94+
model_ref = std::make_shared<ov::Model>(ov::NodeVector{ result }, ov::ParameterVector{ input });
95+
}
96+
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
97+
}
98+
99+
TEST_F(TransformationTestsF, OptimizeSubsequentReshapes4) {
100+
{
101+
auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, ov::PartialShape{ 1, -1, 256 });
102+
auto first_reshape_pattern = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{4}, std::vector<int32_t>{ 0, 0, 2, 128 });
103+
auto first_reshape = std::make_shared<ov::op::v1::Reshape>(input, first_reshape_pattern, true);
104+
105+
auto second_reshape_pattern = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{2}, std::vector<int32_t>{ -1, 256 });
106+
auto second_reshape = std::make_shared<ov::op::v1::Reshape>(first_reshape, second_reshape_pattern, false);
107+
auto result = std::make_shared<ov::op::v0::Result>(second_reshape);
108+
109+
model = std::make_shared<ov::Model>(ov::NodeVector{ result }, ov::ParameterVector{ input });
110+
manager.register_pass<OptimizeSubsequentReshapes>();
111+
}
112+
{
113+
auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, ov::PartialShape{ 1, -1, 256 });
114+
auto reshape_pattern = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{2}, std::vector<int32_t>{ -1, 256 });
115+
auto reshape = std::make_shared<ov::op::v1::Reshape>(input, reshape_pattern, false);
116+
auto result = std::make_shared<ov::op::v0::Result>(reshape);
117+
118+
model_ref = std::make_shared<ov::Model>(ov::NodeVector{ result }, ov::ParameterVector{ input });
119+
}
120+
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
121+
}
122+
123+
TEST_F(TransformationTestsF, OptimizeSubsequentReshapes5) {
124+
{
125+
auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, ov::PartialShape{ 1, 256, -1 });
126+
auto first_reshape_pattern = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{4}, std::vector<int32_t>{ 0, 64, 4, -1 });
127+
auto first_reshape = std::make_shared<ov::op::v1::Reshape>(input, first_reshape_pattern, true);
128+
129+
auto second_reshape_pattern = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{4}, std::vector<int32_t>{ -1, 32, 2, 4 });
130+
auto second_reshape = std::make_shared<ov::op::v1::Reshape>(first_reshape, second_reshape_pattern, true);
131+
auto result = std::make_shared<ov::op::v0::Result>(second_reshape);
132+
133+
model = std::make_shared<ov::Model>(ov::NodeVector{ result }, ov::ParameterVector{ input });
134+
manager.register_pass<OptimizeSubsequentReshapes>();
135+
}
136+
{
137+
auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, ov::PartialShape{ 1, 256, -1 });
138+
auto reshape_pattern = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{4}, std::vector<int32_t>{ -1, 32, 2, 4 });
139+
auto reshape = std::make_shared<ov::op::v1::Reshape>(input, reshape_pattern, false);
92140
auto result = std::make_shared<ov::op::v0::Result>(reshape);
93141

94142
model_ref = std::make_shared<ov::Model>(ov::NodeVector{ result }, ov::ParameterVector{ input });

src/plugins/intel_gpu/thirdparty/CMakeLists.txt

+2-7
Original file line numberDiff line numberDiff line change
@@ -156,18 +156,13 @@ if(ENABLE_ONEDNN_FOR_GPU)
156156
)
157157
endif()
158158

159-
set(LIB_INCLUDE_DIRS "${ONEDNN_INSTALL_DIR}/include" "${CMAKE_CURRENT_SOURCE_DIR}/onednn_gpu/src")
159+
set(LIB_INCLUDE_DIRS "${ONEDNN_INSTALL_DIR}/include" "${CMAKE_CURRENT_SOURCE_DIR}/onednn_gpu/src" "${CMAKE_CURRENT_SOURCE_DIR}/onednn_gpu/src/gpu/intel/jit/ngen" "${CMAKE_CURRENT_SOURCE_DIR}/onednn_gpu/third_party/ngen")
160160
set(LIB_DEFINITIONS ENABLE_ONEDNN_FOR_GPU
161161
DNNL_DLL
162162
DNNL_DLL_EXPORTS
163163
DNNL_ENABLE_CPU_ISA_HINTS
164164
DNNL_ENABLE_MAX_CPU_ISA
165-
DNNL_X64=1
166-
NGEN_CPP11
167-
NGEN_NEO_INTERFACE
168-
NGEN_NO_OP_NAMES
169-
NGEN_SAFE
170-
NGEN_WINDOWS_COMPAT)
165+
DNNL_X64=1)
171166
add_library(onednn_gpu_tgt INTERFACE)
172167
set_target_properties(onednn_gpu_tgt PROPERTIES
173168
INTERFACE_LINK_LIBRARIES $<BUILD_INTERFACE:${ONEDNN_GPU_LIB_PATH}>

0 commit comments

Comments
 (0)