Skip to content

Commit 64eb742

Browse files
[GPU][Loop] Change condition to reinterprete buffer (#25086)
### Details: - In primitive_inst.cpp when skippable node which is optimized out, is returned at realloc_if_needed(), output memory layout should be changed. - So it needs to adjust in respective primitive_inst (for this case, Reorder) ### Tickets: - 143848
1 parent 943a94a commit 64eb742

File tree

2 files changed

+121
-1
lines changed

2 files changed

+121
-1
lines changed

src/plugins/intel_gpu/src/graph/reorder.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,9 @@ void reorder_inst::update_output_memory() {
270270
if (!can_be_optimized())
271271
return;
272272

273-
if (static_cast<bool>(_outputs[0]) && _network.get_engine().is_the_same_buffer(output_memory(), input_memory()))
273+
if (static_cast<bool>(_outputs[0])
274+
&& _network.get_engine().is_the_same_buffer(output_memory(), input_memory())
275+
&& output_memory().get_layout().identical(get_output_layout()))
274276
return;
275277

276278
if (_node != nullptr)

src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp

+118
Original file line numberDiff line numberDiff line change
@@ -875,6 +875,115 @@ static void test_loop_gpu_multiple_shapes(ov::PartialShape body_input_layout,
875875
}
876876
}
877877

878+
static void test_loop_gpu_multiple_shapes_single_shared(ov::PartialShape body_input_layout,
879+
std::vector<ov::PartialShape> whole_layouts,
880+
std::vector<std::vector<float>> input_data_list,
881+
std::vector<float> expected_output_data,
882+
int32_t axis,
883+
size_t exit_value,
884+
bool is_caching_test = false) {
885+
auto& engine = get_test_engine();
886+
887+
auto b_input_layout = cldnn::layout{ body_input_layout, data_types::f32, format::bfyx };
888+
auto const_layout = cldnn::layout{ {}, data_types::i64, format::bfyx };
889+
890+
auto e_initial_condition_mem = engine.allocate_memory(const_layout);
891+
auto e_num_iteration_mem = engine.allocate_memory(const_layout);
892+
auto b_exit_value_mem = engine.allocate_memory(const_layout);
893+
auto b_index_inc_mem = engine.allocate_memory(const_layout);
894+
895+
// initialize input buffers
896+
set_values(e_initial_condition_mem, {1});
897+
set_values(b_exit_value_mem, {exit_value});
898+
set_values(b_index_inc_mem, {1});
899+
set_values(e_num_iteration_mem, {10});
900+
901+
primitive_id body_current_iteration_id = "b_index";
902+
primitive_id body_execution_condition_id = "b_cond_exit_value";
903+
904+
cldnn::topology body(
905+
input_layout(body_current_iteration_id, const_layout),
906+
input_layout("b_parameter", b_input_layout),
907+
data("b_exit_value", b_exit_value_mem),
908+
data("b_index_inc", b_index_inc_mem),
909+
eltwise("b_index_update", input_info(body_current_iteration_id), input_info("b_index_inc"), eltwise_mode::sum),
910+
eltwise("b_permute", input_info("b_parameter"), input_info("b_index_update"), eltwise_mode::sum),
911+
reorder("b_result", input_info("b_permute"), b_input_layout),
912+
eltwise(body_execution_condition_id, input_info(body_current_iteration_id), input_info("b_exit_value"), eltwise_mode::lt)
913+
);
914+
915+
primitive_id trip_count_id = "";
916+
primitive_id actual_iteration_count_id = "actual_iteration_count";
917+
primitive_id initial_condition_id = "initial_condition";
918+
int64_t num_iterations = -1;
919+
920+
std::vector<loop::io_primitive_map> input_primitive_maps {
921+
loop::io_primitive_map("input", "b_parameter", axis),
922+
loop::io_primitive_map(actual_iteration_count_id, body_current_iteration_id) };
923+
std::vector<loop::io_primitive_map> output_primitive_maps {
924+
loop::io_primitive_map(cldnn::input_info("loop"), cldnn::input_info("b_result"), axis) };
925+
std::vector<loop::backedge_mapping> back_edges {
926+
loop::backedge_mapping("b_result", "b_parameter"),
927+
loop::backedge_mapping("b_index_update", body_current_iteration_id) };
928+
929+
auto body_program = build_program(engine, body, body_execution_condition_id, output_primitive_maps, back_edges, true);
930+
931+
auto const_shape = engine.allocate_memory({ov::PartialShape{4}, data_types::i32, format::bfyx});
932+
std::vector<int32_t> body_input_layouts;
933+
for (size_t i = 0; i < body_input_layout.size(); i++) {
934+
if (body_input_layout[i].is_dynamic())
935+
body_input_layouts.push_back(-1);
936+
else
937+
body_input_layouts.push_back(body_input_layout[i].get_length());
938+
}
939+
set_values<int32_t>(const_shape, body_input_layouts);
940+
941+
cldnn::topology topology(
942+
input_layout("input_origin", b_input_layout),
943+
input_layout(initial_condition_id, e_initial_condition_mem->get_layout()),
944+
mutable_data(actual_iteration_count_id, e_num_iteration_mem),
945+
permute("input2", input_info("input_origin"), {0, 1, 2, 3}),
946+
data("const", const_shape),
947+
permute("permute1", input_info("input_origin"), {0, 1, 2, 3}),
948+
concatenation("input", {input_info("permute1"), input_info("input_origin")}, 0),
949+
loop("loop",
950+
{input_info(actual_iteration_count_id), input_info(initial_condition_id), input_info("input")},
951+
body_program, trip_count_id, initial_condition_id, actual_iteration_count_id,
952+
input_primitive_maps, output_primitive_maps, back_edges,
953+
num_iterations, body_current_iteration_id, body_execution_condition_id, 1),
954+
permute("result", input_info("loop"), {0, 1, 2, 3}));
955+
956+
ExecutionConfig config = get_test_default_config(engine);
957+
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
958+
959+
network network(engine, topology, config);
960+
for (size_t i = 0 ; i < whole_layouts.size(); i++) {
961+
auto whole_layout = whole_layouts[i];
962+
auto input_data = input_data_list[i];
963+
964+
set_values(e_initial_condition_mem, {1});
965+
set_values(b_exit_value_mem, {exit_value});
966+
set_values(b_index_inc_mem, {1});
967+
set_values(e_num_iteration_mem, {10});
968+
969+
auto e_input_layout = cldnn::layout{ whole_layout, data_types::f32, format::bfyx };
970+
auto e_input_mem = engine.allocate_memory(e_input_layout); // b,f,x,y
971+
auto expected_output_layout = whole_layout;
972+
set_values(e_input_mem, input_data);
973+
974+
network.set_input_data("input_origin", e_input_mem);
975+
network.set_input_data(initial_condition_id, e_initial_condition_mem);
976+
977+
auto outputs = network.execute();
978+
auto output_layout = outputs.begin()->second.get_layout();
979+
auto input_layout = network.get_primitive("input")->get_output_layout();
980+
981+
ASSERT_EQ(output_layout.feature(), input_layout.feature());
982+
ASSERT_EQ(output_layout.spatial(0), input_layout.spatial(0));
983+
ASSERT_EQ(output_layout.spatial(1), input_layout.spatial(1));
984+
}
985+
}
986+
878987
std::vector<float> input_data_2_4{
879988
1.0f, 2.0f,
880989
4.0f, -15.f,
@@ -919,6 +1028,15 @@ TEST(loop_gpu, support_loop_w_dynamic_input_w_various_shapes2) {
9191028
-1, 10);
9201029
}
9211030

1031+
TEST(loop_gpu, support_loop_w_dynamic_input_w_various_shapes3) {
1032+
test_loop_gpu_multiple_shapes_single_shared(
1033+
{ 1, -1, 560 },
1034+
{{ 1, 58, 560 }, { 1, 87, 560 }, { 1, 72, 560 }, { 1, 88, 560 }, { 1, 89, 560 }},
1035+
{input_data_2_4_4, input_data_2_4_4, input_data_2_4_4, input_data_2_4_4, input_data_2_4_4},
1036+
std::vector<float>(),
1037+
-1, 20);
1038+
}
1039+
9221040
static void test_loop_gpu_wo_trip_count_update_primitive_id(ov::PartialShape body_input_layout,
9231041
std::vector<ov::PartialShape> whole_layouts,
9241042
std::vector<std::vector<float>> input_data_list,

0 commit comments

Comments
 (0)