Skip to content

Commit bf7215d

Browse files
[GPU] Update loop inst ids instead of getting from prim
1 parent 3777479 commit bf7215d

File tree

3 files changed

+233
-22
lines changed

3 files changed

+233
-22
lines changed

src/plugins/intel_gpu/src/graph/impls/common/loop.cpp

+16-16
Original file line numberDiff line numberDiff line change
@@ -130,17 +130,17 @@ struct loop_impl : typed_primitive_impl<loop> {
130130
}
131131

132132
body_network->set_shape_predictor(outer_network.get_shape_predictor());
133-
OPENVINO_ASSERT(!primitive->num_iteration_id.empty(), "loop operation should have num_iteration_id");
133+
OPENVINO_ASSERT(!instance.get_num_iterations_id().empty(), "loop operation should have num_iteration_id");
134134

135135
// shortcut of execution_condition memory in body network
136136
memory::ptr body_execution_condition_mem = nullptr;
137-
if (!primitive->body_execution_condition_id.empty()) {
138-
body_execution_condition_mem = body_network->get_primitive(primitive->body_execution_condition_id)->output_memory_ptr();
137+
if (!instance.get_condition_id().empty()) {
138+
body_execution_condition_mem = body_network->get_primitive(instance.get_condition_id())->output_memory_ptr();
139139
}
140140

141141
// shortcut of current_iteration memory in body network
142-
if (!primitive->body_current_iteration_id.empty()) {
143-
memory::ptr body_current_iteration_mem = body_network->get_primitive(primitive->body_current_iteration_id)->output_memory_ptr();
142+
if (!instance.get_current_iteration_id().empty()) {
143+
memory::ptr body_current_iteration_mem = body_network->get_primitive(instance.get_current_iteration_id())->output_memory_ptr();
144144
write_scalar_value(body_current_iteration_mem, body_network->get_stream(), 0);
145145
}
146146

@@ -149,11 +149,11 @@ struct loop_impl : typed_primitive_impl<loop> {
149149

150150
// read trip_count from outer network
151151
int64_t trip_count = -1;
152-
if (!primitive->trip_count_id.empty()) {
153-
memory::ptr trip_count_mem = outer_network.get_primitive(primitive->trip_count_id)->output_memory_ptr();
152+
if (!instance.get_trip_count_id().empty()) {
153+
memory::ptr trip_count_mem = outer_network.get_primitive(instance.get_trip_count_id())->output_memory_ptr();
154154
trip_count = read_scalar_value(std::move(trip_count_mem), stream);
155155
} else {
156-
OPENVINO_ASSERT(!primitive->body_execution_condition_id.empty()
156+
OPENVINO_ASSERT(!instance.get_condition_id().empty()
157157
|| num_iterations > 0 || primitive->max_num_iterations > 0,
158158
"num_iterations should be positive when trip_count_id is not existed");
159159
// If trip_count_id is not existed, the original ngraph operation is TensorIterator.
@@ -166,19 +166,19 @@ struct loop_impl : typed_primitive_impl<loop> {
166166

167167
// read initial execution condition from outer network
168168
int64_t execution_condition = 1;
169-
if (!primitive->first_execution_condition_id.empty()) {
169+
if (!instance.get_initial_execution_id().empty()) {
170170
// Wait for completion of the execution_condition of outer_network
171-
if (outer_network.has_event(primitive->first_execution_condition_id))
172-
outer_network.get_primitive_event(primitive->first_execution_condition_id)->wait();
173-
memory::ptr first_execution_condition_mem = outer_network.get_primitive(primitive->first_execution_condition_id)->output_memory_ptr();
171+
if (outer_network.has_event(instance.get_initial_execution_id()))
172+
outer_network.get_primitive_event(instance.get_initial_execution_id())->wait();
173+
memory::ptr first_execution_condition_mem = outer_network.get_primitive(instance.get_initial_execution_id())->output_memory_ptr();
174174
execution_condition = read_scalar_value(first_execution_condition_mem, stream);
175175
}
176176
GPU_DEBUG_LOG << "execution_condition: " << execution_condition << std::endl;
177177

178178
// When execution_condition is false or trip_count is zero, return execute_impl without any body_network execution.
179179
if (!execution_condition || trip_count == 0) {
180180
// Update num_iterations (actual number of iterations)
181-
memory::ptr num_actual_iterations_mem = outer_network.get_primitive(primitive->num_iteration_id)->output_memory_ptr();
181+
memory::ptr num_actual_iterations_mem = outer_network.get_primitive(instance.get_num_iterations_id())->output_memory_ptr();
182182
write_scalar_value(num_actual_iterations_mem, stream, current_iteration_idx);
183183

184184
instance.update_output_layout();
@@ -255,7 +255,7 @@ struct loop_impl : typed_primitive_impl<loop> {
255255

256256
// execution condition is the result of body network execution
257257
if (body_execution_condition_mem != nullptr) {
258-
auto execution_id = primitive->body_execution_condition_id;
258+
auto execution_id = instance.get_condition_id();
259259
if (body_network->has_event(execution_id)) {
260260
auto ev = body_network->get_primitive_event(execution_id);
261261
if (ev) ev->wait();
@@ -275,9 +275,9 @@ struct loop_impl : typed_primitive_impl<loop> {
275275

276276
// Update actual num iteration
277277
// update num_iterations (actual number of iterations)
278-
memory::ptr num_actual_iterations_mem = outer_network.get_primitive(primitive->num_iteration_id)->output_memory_ptr();
278+
memory::ptr num_actual_iterations_mem = outer_network.get_primitive(instance.get_num_iterations_id())->output_memory_ptr();
279279
write_scalar_value(num_actual_iterations_mem, stream, current_iteration_idx);
280-
GPU_DEBUG_LOG << "current_iteration_idx(" << primitive->num_iteration_id << ", "
280+
GPU_DEBUG_LOG << "current_iteration_idx(" << instance.get_num_iterations_id() << ", "
281281
<< num_actual_iterations_mem << ") : " << current_iteration_idx << std::endl;
282282

283283
if (is_dynamic)

src/plugins/intel_gpu/src/graph/include/loop_inst.h

+41-6
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@ struct typed_program_node<loop> : public typed_program_node_base<loop> {
2222
private:
2323
using parent = typed_program_node_base<loop>;
2424

25+
primitive_id trip_count_id;
26+
primitive_id initial_execution_id;
27+
primitive_id current_iteration_id;
28+
primitive_id execution_condition_id;
29+
primitive_id num_iterations_id;
30+
2531
std::vector<loop::io_primitive_map>& input_primitive_maps;
2632
std::vector<loop::io_primitive_map>& output_primitive_maps;
2733
std::vector<loop::backedge_mapping>& back_edges;
@@ -31,21 +37,32 @@ struct typed_program_node<loop> : public typed_program_node_base<loop> {
3137
parent(prim, prog),
3238
input_primitive_maps(prim->input_primitive_maps),
3339
output_primitive_maps(prim->output_primitive_maps),
34-
back_edges(prim->back_edges) {}
40+
back_edges(prim->back_edges) {
41+
set_primitive_ids(prim);
42+
}
3543

3644
program::ptr get_body_program() const { return get_primitive()->body_program; }
3745

38-
const primitive_id& get_trip_count_id() const { return get_primitive()->trip_count_id; }
39-
const primitive_id& get_initial_execution_id() const { return get_primitive()->first_execution_condition_id; }
40-
const primitive_id& get_current_iteration_id() const { return get_primitive()->body_current_iteration_id; }
41-
const primitive_id& get_execution_condition_id() const { return get_primitive()->body_execution_condition_id; }
42-
const primitive_id& get_num_iterations_id() const { return get_primitive()->num_iteration_id; }
46+
const primitive_id& get_trip_count_id() const { return trip_count_id; }
47+
const primitive_id& get_initial_execution_id() const { return initial_execution_id; }
48+
const primitive_id& get_current_iteration_id() const { return current_iteration_id; }
49+
const primitive_id& get_execution_condition_id() const { return execution_condition_id; }
50+
const primitive_id& get_num_iterations_id() const { return num_iterations_id; }
51+
4352
const int32_t get_max_num_iteration() const { return get_primitive()->max_num_iterations; }
4453

4554
const std::vector<loop::io_primitive_map>& get_input_primitive_maps() const { return input_primitive_maps; }
4655
const std::vector<loop::io_primitive_map>& get_output_primitive_maps() const { return output_primitive_maps; }
4756
const std::vector<loop::backedge_mapping>& get_back_edges() const { return back_edges;}
4857

58+
void set_primitive_ids(std::shared_ptr<loop> prim) {
59+
trip_count_id = prim->trip_count_id;
60+
initial_execution_id = prim->first_execution_condition_id;
61+
current_iteration_id = prim->body_current_iteration_id;
62+
execution_condition_id = prim->body_execution_condition_id;
63+
num_iterations_id = prim->num_iteration_id;
64+
}
65+
4966
void update_primitive_map(const primitive_id& prevID, const primitive_id& newID, bool external_id = true) {
5067
if (external_id) {
5168
for (auto& pm : input_primitive_maps) {
@@ -78,6 +95,18 @@ struct typed_program_node<loop> : public typed_program_node_base<loop> {
7895
}
7996
}
8097
}
98+
99+
// Update ids
100+
if (get_trip_count_id() == prevID)
101+
trip_count_id = newID;
102+
if (get_initial_execution_id() == prevID)
103+
initial_execution_id = newID;
104+
if (get_current_iteration_id() == prevID)
105+
current_iteration_id = newID;
106+
if (get_execution_condition_id() == prevID)
107+
execution_condition_id = newID;
108+
if (get_num_iterations_id() == prevID)
109+
num_iterations_id = newID;
81110
}
82111

83112
// current_iteration is necessary to calculate output layout in dynamic shape
@@ -329,6 +358,12 @@ class typed_primitive_inst<loop> : public typed_primitive_inst_base<loop> {
329358
std::vector<event::ptr> preprocess_memory_for_body_network(int64_t current_iteration_idx);
330359
std::vector<event::ptr> postprocess_memory_for_body_network(int64_t current_iteration_idx);
331360

361+
primitive_id get_trip_count_id() { return _trip_count_id; };
362+
primitive_id get_initial_execution_id() { return _initial_execution_id; };
363+
primitive_id get_current_iteration_id() { return _current_iteration_id; };
364+
primitive_id get_condition_id() { return _condition_id; };
365+
primitive_id get_num_iterations_id() { return _num_iterations_id; };
366+
332367
private:
333368
network::ptr body_network;
334369
memory::ptr get_external_memory(const primitive_id& external_id, size_t mem_idx = 0) const;

src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp

+176
Original file line numberDiff line numberDiff line change
@@ -787,3 +787,179 @@ TEST(loop_gpu, support_loop_w_dynamic_input_w_various_shapes) {
787787
std::vector<float>(),
788788
2, 3);
789789
}
790+
791+
static void test_loop_gpu_wo_trip_count_update_primitive_id(ov::PartialShape body_input_layout,
792+
std::vector<ov::PartialShape> whole_layouts,
793+
std::vector<std::vector<float>> input_data_list,
794+
std::vector<float> expected_output_data,
795+
size_t axis,
796+
size_t exit_value,
797+
bool is_caching_test = false) {
798+
auto& engine = get_test_engine();
799+
800+
auto b_input_layout = cldnn::layout{ body_input_layout, data_types::f32, format::bfyx };
801+
802+
ov::PartialShape sliced_input_shape = body_input_layout;
803+
sliced_input_shape[axis] = 1;
804+
auto sliced_input_layout = cldnn::layout{ sliced_input_shape, data_types::f32, format::bfyx };
805+
806+
auto const_layout = cldnn::layout{ {}, data_types::i64, format::bfyx };
807+
808+
auto e_initial_condition_mem = engine.allocate_memory(const_layout);
809+
auto e_num_iteration_mem = engine.allocate_memory(const_layout);
810+
auto b_exit_value_mem = engine.allocate_memory(const_layout);
811+
auto b_index_inc_mem = engine.allocate_memory(const_layout);
812+
813+
// initialize input buffers
814+
set_values(e_initial_condition_mem, {1});
815+
set_values(b_exit_value_mem, {exit_value});
816+
set_values(b_index_inc_mem, {1});
817+
set_values(e_num_iteration_mem, {0});
818+
819+
primitive_id body_current_iteration_id = "b_index";
820+
primitive_id body_execution_condition_id = "b_cond_exit_value";
821+
822+
cldnn::topology body(
823+
input_layout(body_current_iteration_id, const_layout),
824+
input_layout("b_add_data", sliced_input_layout),
825+
input_layout("b_mul_data", sliced_input_layout),
826+
data("b_exit_value", b_exit_value_mem),
827+
data("b_index_inc", b_index_inc_mem),
828+
eltwise("b_index_update", input_info(body_current_iteration_id), input_info("b_index_inc"), eltwise_mode::sum),
829+
reorder("b_index_cast", input_info("b_index_update"),
830+
cldnn::format::any, data_types::f32, {}, cldnn::reorder_mean_mode::subtract, cldnn::padding(), true),
831+
eltwise(body_execution_condition_id, input_info("b_index"), input_info("b_exit_value"), eltwise_mode::lt),
832+
eltwise("b_add", input_info("b_add_data"), input_info("b_index_cast"), eltwise_mode::sum),
833+
eltwise("b_mul", input_info("b_mul_data"), input_info("b_index_cast"), eltwise_mode::prod));
834+
835+
primitive_id trip_count_id = "";
836+
primitive_id actual_iteration_count_id = "actual_iteration_count";
837+
primitive_id initial_mean = "initial_mean";
838+
839+
primitive_id initial_condition_id = "initial_condition";
840+
primitive_id initial_condition_id_elt = "initial_condition_elt";
841+
primitive_id initial_condition_id_reorder = "initial_condition_reorder";
842+
primitive_id initial_condition_id_reorder2 = "initial_condition_reorder2";
843+
int64_t num_iterations = -1;
844+
845+
std::vector<loop::io_primitive_map> input_primitive_maps {
846+
loop::io_primitive_map("input", "b_add_data", axis),
847+
loop::io_primitive_map("input", "b_mul_data", axis),
848+
loop::io_primitive_map(actual_iteration_count_id, body_current_iteration_id) };
849+
std::vector<loop::io_primitive_map> output_primitive_maps {
850+
loop::io_primitive_map(cldnn::input_info("loop", 0), cldnn::input_info("b_add", 0), axis),
851+
loop::io_primitive_map(cldnn::input_info("loop", 1), cldnn::input_info("b_mul", 0), axis) };
852+
std::vector<loop::backedge_mapping> back_edges {
853+
loop::backedge_mapping("b_index_update", body_current_iteration_id) };
854+
855+
auto body_program = build_program(engine, body, body_execution_condition_id, output_primitive_maps, back_edges, true);
856+
857+
auto const_shape = engine.allocate_memory({ov::PartialShape{4}, data_types::i32, format::bfyx});
858+
859+
860+
std::vector<int32_t> body_input_layouts;
861+
for (size_t i = 0; i < body_input_layout.size(); i++) {
862+
if (body_input_layout[i].is_dynamic())
863+
body_input_layouts.push_back(-1);
864+
else
865+
body_input_layouts.push_back(body_input_layout[i].get_length());
866+
}
867+
set_values<int32_t>(const_shape, body_input_layouts);
868+
const std::vector<float> values_to_subtract = {0.f};
869+
870+
cldnn::topology topology(
871+
input_layout("input_origin", b_input_layout),
872+
input_layout(initial_condition_id, e_initial_condition_mem->get_layout()),
873+
mutable_data(actual_iteration_count_id, e_num_iteration_mem),
874+
875+
reorder(initial_condition_id_reorder, input_info(initial_condition_id), cldnn::format::any, data_types::f32, values_to_subtract),
876+
reorder(initial_condition_id_reorder2, input_info(initial_condition_id_reorder), cldnn::format::any, data_types::i32), // should be fused to test updating input id of loop
877+
878+
shape_of("shape_of_input", input_info("input_origin"), data_types::i32),
879+
reduce("reduced_shape", input_info("shape_of_input"), reduce_mode::prod, {0}, true),
880+
reshape("reshape1", input_info("input_origin"), input_info("reduced_shape"), false, ov::PartialShape::dynamic(1)),
881+
data("const", const_shape),
882+
reshape("input", input_info("reshape1"), input_info("const"), false, ov::PartialShape::dynamic(4)),
883+
884+
loop("loop", { input_info(actual_iteration_count_id), input_info(initial_condition_id_reorder2), input_info("input") }, body_program,
885+
trip_count_id, initial_condition_id_reorder2, actual_iteration_count_id,
886+
input_primitive_maps, output_primitive_maps, back_edges,
887+
num_iterations, body_current_iteration_id, body_execution_condition_id, 2),
888+
eltwise("out_sum", input_info("loop", 0), input_info("loop", 1), eltwise_mode::sum));
889+
890+
ExecutionConfig config = get_test_default_config(engine);
891+
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
892+
893+
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
894+
895+
for (size_t i = 0 ; i < whole_layouts.size(); i++) {
896+
auto whole_layout = whole_layouts[i];
897+
auto input_data = input_data_list[i];
898+
899+
// initialize input buffers
900+
set_values(e_initial_condition_mem, {1});
901+
set_values(b_exit_value_mem, {exit_value});
902+
set_values(b_index_inc_mem, {1});
903+
set_values(e_num_iteration_mem, {0});
904+
905+
auto e_input_layout = cldnn::layout{ whole_layout, data_types::f32, format::bfyx };
906+
auto e_input_mem = engine.allocate_memory(e_input_layout); // b,f,x,y
907+
auto expected_output_layout = whole_layout;
908+
set_values(e_input_mem, input_data);
909+
network->set_input_data("input_origin", e_input_mem);
910+
911+
network->set_input_data(initial_condition_id, e_initial_condition_mem);
912+
913+
auto outputs = network->execute();
914+
ASSERT_EQ(outputs.size(), 1);
915+
916+
auto expected_num_iterations = (exit_value + 1);
917+
expected_output_layout[axis] = expected_num_iterations;
918+
auto e_output_layout = cldnn::layout{ expected_output_layout, data_types::f32, format::bfyx };
919+
920+
auto num_iter_mem = network->get_output_memory(actual_iteration_count_id);
921+
if (num_iter_mem != nullptr) {
922+
mem_lock<int64_t> num_iter_ptr{ num_iter_mem, get_test_stream() };
923+
ASSERT_EQ(num_iter_ptr.data()[0], expected_num_iterations);
924+
}
925+
926+
std::vector<float> expected(input_data.size());
927+
if (expected_output_data.size() == 0) {
928+
size_t unit = 1;
929+
for (size_t k = axis; k < whole_layout.size(); k++) {
930+
unit *= whole_layout[k].get_length();
931+
}
932+
933+
for (size_t j = 0; j < input_data.size(); j++) {
934+
auto val = static_cast<size_t>((j % unit) / 4) + 1;
935+
expected[j] = static_cast<float>(input_data[j] + val) + static_cast<float>(input_data[j] * val);
936+
}
937+
} else {
938+
expected = expected_output_data;
939+
}
940+
941+
auto output_mem = outputs.begin()->second.get_memory();
942+
auto output_layout = output_mem->get_layout();
943+
ASSERT_EQ(output_layout.batch(), e_output_layout.batch());
944+
ASSERT_EQ(output_layout.feature(), e_output_layout.feature());
945+
ASSERT_EQ(output_layout.spatial(0), e_output_layout.spatial(0));
946+
ASSERT_EQ(output_layout.spatial(1), e_output_layout.spatial(1));
947+
// value check
948+
{
949+
mem_lock<float> output_ptr{ output_mem, get_test_stream() };
950+
for (size_t i = 0, iend = output_layout.count(); i < iend; ++i) {
951+
ASSERT_FLOAT_EQ(output_ptr[i], expected.at(i));
952+
}
953+
}
954+
}
955+
}
956+
957+
958+
TEST(loop_gpu, support_loop_w_dynamic_input_update_primitive_id) {
959+
test_loop_gpu_wo_trip_count_update_primitive_id(
960+
{ 1, -1, 4, 4 },
961+
{{ 1, 1, 4, 4 }}, // axis value should be iter_num = (exit_value + 1)
962+
{input_data_4_4, input_data_2_4_4},
963+
std::vector<float>(),
964+
2, 3);
965+
}

0 commit comments

Comments
 (0)