Skip to content

Commit 7df9c21

Browse files
committedFeb 4, 2025
[GPU] Add intermediate weight reorder including siblings without impl
yet
1 parent 3571d18 commit 7df9c21

File tree

2 files changed

+63
-0
lines changed

2 files changed

+63
-0
lines changed
 

‎src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp

+15
Original file line numberDiff line numberDiff line change
@@ -129,16 +129,31 @@ void post_optimize_weights::optimize_weights(T& node, program& p) {
129129
auto& weights_reorder_node = node.get_dependency(i);
130130
weights_reorder_node.get_output_layout(false);
131131
} else {
132+
auto siblings = prev_node.get_users();
132133
auto weights_reorder = _rf.get_weights_reorder(prev_node.id(), weights_reorder_params);
133134
// insert new weights reorder node to topology
134135
p.add_intermediate(weights_reorder.first, node, i, !weights_reorder.second);
135136
// set weights reorder's node output layout and implementation
136137
auto& weights_reorder_node = node.get_dependency(i);
137138
weights_reorder_node.get_output_layout(false);
138139

140+
// apply to other siblings
141+
for (auto sib : siblings) {
142+
auto sib_impl = sib->get_selected_impl();
143+
if (sib->id().compare(node.id()) != 0 && sib->id().compare(weights_reorder.first->id) != 0 && !sib_impl) {
144+
auto weights_reorder2 = _rf.get_weights_reorder(prev_node.id(), weights_reorder_params);
145+
p.add_intermediate(weights_reorder2.first, *sib, i, !weights_reorder2.second);
146+
147+
// auto& weights_reorder_node = node.get_dependency(i);
148+
// weights_reorder_node.get_output_layout(false);
149+
}
150+
}
151+
139152
if (!weights_reorder.second) {
140153
set_implementation(weights_reorder_node);
141154
}
155+
156+
142157
}
143158
}
144159
}

‎src/plugins/intel_gpu/tests/unit/passes/post_optimize_weights.cpp

+48
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
// SPDX-License-Identifier: Apache-2.0
33
//
44

5+
#include "intel_gpu/primitives/permute.hpp"
56
#include "test_utils.h"
7+
#include "random_generator.hpp"
68
#include "program_wrapper.h"
79
#include "fully_connected_inst.h"
810
#include "convolution_inst.h"
@@ -242,3 +244,49 @@ TEST(post_optimize_weights, onednn_group_conv_weights_reorder_test) {
242244
ASSERT_TRUE(onednn_weights_params->_out_desc.get_size() == prog->get_node("weights_weights_reorder_0").get_output_layout().bytes_count());
243245
#endif
244246
}
247+
248+
TEST(post_optimize_weights, fuse_constant_transposes_removal_and_add_intermediate_including_siblings) {
249+
auto& engine = get_test_engine();
250+
251+
auto input2_layout_dyn = layout{ ov::PartialShape{ -1, 32 }, data_types::f16, format::bfyx };
252+
253+
auto input = engine.allocate_memory({ { 2, 32 }, data_types::f16, format::bfyx });
254+
auto input2 = engine.allocate_memory({ { 2, 32 }, data_types::f16, format::bfyx });
255+
auto weights = engine.allocate_memory({{ 32, 2 }, data_types::f32, format::bfyx });
256+
257+
tests::random_generator rg(GET_SUITE_NAME);
258+
auto input_data = rg.generate_random_2d<ov::float16>(2, 32, -1, 1);
259+
auto input2_data = rg.generate_random_2d<ov::float16>(2, 32, -1, -1);
260+
auto weights_data = rg.generate_random_2d<float>(32, 2, -1, 1);
261+
262+
set_values(input, flatten_2d(format::bfyx, input_data));
263+
set_values(input2, input2_data);
264+
set_values(weights, flatten_2d(format::bfyx, weights_data));
265+
266+
topology topology(
267+
input_layout("input", input->get_layout()),
268+
input_layout("input2", input2_layout_dyn),
269+
data("weights", weights),
270+
permute("permute_test", input_info("weights"), {1, 0}),
271+
reorder("reorder_dt", input_info("permute_test"), format::any, data_types::f16, std::vector<float>()),
272+
fully_connected("fc1", input_info("input"), { "reorder_dt" }, "", data_types::f16),
273+
fully_connected("fc2", input_info("input2"), { "reorder_dt" }, "", data_types::f16)
274+
);
275+
276+
ExecutionConfig config = get_test_default_config(engine);
277+
config.set_property(ov::intel_gpu::optimize_data(true));
278+
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
279+
280+
if (engine.get_device_info().supports_immad) {
281+
ov::intel_gpu::ImplementationDesc fc_impl = { format::bfyx, "", impl_types::onednn };
282+
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc1", fc_impl} }));
283+
}
284+
285+
cldnn::network network(engine, topology, config);
286+
network.set_input_data("input", input);
287+
network.set_input_data("input2", input2);
288+
289+
auto outputs = network.execute();
290+
auto output = outputs.at("fc1").get_memory();
291+
cldnn::mem_lock<ov::float16> output_ptr(output, get_test_stream());
292+
}

0 commit comments

Comments
 (0)