Skip to content

Commit de94a33

Browse files
[GPU] network code cleanup (#26908)
### Details: - Removed few unnecessary methods - Refactor `network::execute()`
1 parent 2167b03 commit de94a33

11 files changed

+96
-175
lines changed

src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp

-32
Original file line numberDiff line numberDiff line change
@@ -116,34 +116,10 @@ struct network {
116116

117117
std::vector<std::shared_ptr<primitive_inst>> const& get_outputs() { return _outputs; }
118118

119-
const std::vector<std::shared_ptr<const primitive_inst>>& get_outputs() const {
120-
return reinterpret_cast<const std::vector<std::shared_ptr<const primitive_inst>>&>(_outputs);
121-
}
122-
123-
network_output get_output(const primitive_id& output_id) {
124-
event::ptr evt;
125-
if (get_stream().get_queue_type() == QueueTypes::out_of_order || _enable_profiling)
126-
evt = get_primitive_event(output_id);
127-
return network_output(evt, get_output_memory(output_id), get_stream_ptr(), get_output_layout(output_id));
128-
}
129-
layout get_node_output_layout(const primitive_id& output_id) const;
130119
memory::ptr get_output_memory(const primitive_id& output_id);
131120
layout get_output_layout(const primitive_id& output_id) const;
132121
std::vector<layout> get_input_layouts() const;
133122

134-
/// @brief Returns the list of primitive ids before and after graph optimization.
135-
/// @details If primitive was not optimized, the old and actual id will be the same.
136-
/// @n If primitive was optimized during graph optimization, the actual id will be "_optimized_".
137-
std::map<primitive_id, primitive_id> get_all_primitives() const {
138-
auto primitive_ids = get_all_primitive_ids();
139-
auto primitive_org_ids = get_all_primitive_org_ids();
140-
std::map<primitive_id, primitive_id> result;
141-
for (decltype(primitive_org_ids.size()) i = 0; i < primitive_org_ids.size(); i++) {
142-
result.emplace(primitive_org_ids[i], primitive_ids[i]);
143-
}
144-
return result;
145-
}
146-
147123
/// @brief Returns the list of @ref event for the primitives that were executed in network.
148124
std::map<primitive_id, event::ptr> get_executed_primitives() const {
149125
auto primitive_ids = get_executed_primitive_ids();
@@ -201,7 +177,6 @@ struct network {
201177
void configure_primitives_second_output();
202178
void build_insts_deps();
203179
uint32_t get_id() const { return net_id; }
204-
uint32_t get_local_id() const { return _local_net_id; }
205180
stream& get_stream() const { return *_stream; }
206181
stream::ptr get_stream_ptr() const { return _stream; }
207182
bool is_internal() const { return _internal; }
@@ -219,7 +194,6 @@ struct network {
219194
const ov::intel_gpu::VariableStateInfo& get_variable_info(const std::string &variable_id) const;
220195
const ov::intel_gpu::VariablesMap& get_variables() const;
221196
const ov::intel_gpu::VariablesInfoMap& get_variables_info() const;
222-
std::vector<primitive_id> get_kv_cache_ids() const { return kv_cache_ids; }
223197

224198
const ExecutionConfig& get_config() const { return _config; }
225199

@@ -243,8 +217,6 @@ struct network {
243217
bool _is_dynamic = false;
244218
bool _enable_profiling = false;
245219
bool _reset_arguments;
246-
uint32_t _local_net_id = 0; // This is for thread-safe deserialization. 'net_id' is globally unique,
247-
// but '_local_net_id' is unique only in each intel_gpu::Graph.
248220

249221
std::unordered_map<primitive_id, std::shared_ptr<primitive_inst>> _primitives;
250222
std::vector<shared_mem_type> _in_out_shared_mem_types;
@@ -255,10 +227,8 @@ struct network {
255227

256228
ov::intel_gpu::VariablesMap _variables_states;
257229
ov::intel_gpu::VariablesInfoMap _variables_state_info;
258-
std::vector<primitive_id> kv_cache_ids;
259230

260231
program::primitives_info _prims_info;
261-
std::map<primitive_id, primitive_id> _ext_id_mapping;
262232
size_t _weights_cache_capacity = 1;
263233

264234
std::unordered_map<primitive_id, event::ptr> _events;
@@ -272,9 +242,7 @@ struct network {
272242
void allocate_primitive_instance(program_node const& node);
273243
void transfer_memory_to_device(std::shared_ptr<primitive_inst> instance, program_node const& node);
274244
void add_to_exec_order(const primitive_id& id);
275-
std::shared_ptr<primitive_inst> find_in_internal_networks(const primitive_id& id) const;
276245
std::shared_ptr<primitive_inst> find_primitive(const primitive_id& id) const;
277-
void check_names();
278246
void add_default_output_chains();
279247
void calculate_weights_cache_capacity();
280248
output_chains_map::iterator add_output_chain(std::shared_ptr<primitive_inst>& p_inst);

src/plugins/intel_gpu/src/graph/kv_cache.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ GPU_DEFINE_PRIMITIVE_TYPE_ID(kv_cache)
1717
kv_cache_inst::typed_primitive_inst(network& network, const kv_cache_node& node) :
1818
parent{network, node, false},
1919
memory_state::variable{node.get_primitive()->variable_info.variable_id} {
20-
kv_cache_id = network.get_kv_cache_ids().size();
20+
thread_local size_t kv_cache_counter = 0;
21+
kv_cache_id = kv_cache_counter++;
2122
}
2223

2324
layout kv_cache_inst::calc_output_layout(const kv_cache_node& node, kernel_impl_params const& impl_param) {

src/plugins/intel_gpu/src/graph/network.cpp

+20-69
Original file line numberDiff line numberDiff line change
@@ -203,8 +203,6 @@ network::network(program::ptr program, stream::ptr stream, bool is_internal, boo
203203
calculate_weights_cache_capacity();
204204
allocate_primitives();
205205
configure_primitives_second_output();
206-
if (!_program->is_loaded_from_cache())
207-
check_names();
208206
build_insts_deps();
209207
build_exec_order();
210208
validate_primitives();
@@ -333,11 +331,7 @@ void network::reset_execution(bool wait) {
333331

334332
event::ptr network::set_input_data(const primitive_id& id, memory::ptr data) {
335333
GPU_DEBUG_TRACE_DETAIL << "Set input " << id << " " << data->get_layout().to_short_string() << std::endl;
336-
std::shared_ptr<primitive_inst> primitive_inst;
337-
338-
primitive_inst = find_primitive(id);
339-
340-
OPENVINO_ASSERT(primitive_inst != nullptr, "[GPU] topology doesn't contain primitive: ", id);
334+
auto primitive_inst = find_primitive(id);
341335

342336
if (primitive_inst->type() != input_layout::type_id()) {
343337
CLDNN_ERROR_MESSAGE(id, "primitive " + id + " is not an input");
@@ -481,11 +475,8 @@ network::output_chains_map::iterator network::add_output_chain(std::shared_ptr<p
481475

482476
std::vector<event::ptr> network::set_output_memory(const primitive_id& id, memory::ptr mem_new) {
483477
GPU_DEBUG_TRACE_DETAIL << "Set output " << id << " " << mem_new->get_layout().to_short_string() << std::endl;
484-
std::shared_ptr<primitive_inst> p_inst;
485478
std::vector<event::ptr> ret_ev;
486-
p_inst = find_primitive(id);
487-
488-
OPENVINO_ASSERT(p_inst != nullptr, "[GPU] topology doesn't contain primitive: ", id);
479+
std::shared_ptr<primitive_inst> p_inst = find_primitive(id);
489480

490481
auto iter = std::find(_outputs.begin(), _outputs.end(), p_inst);
491482
if (iter == _outputs.end())
@@ -513,35 +504,10 @@ std::vector<event::ptr> network::set_output_memory(const primitive_id& id, memor
513504
return ret_ev;
514505
}
515506

516-
void cldnn::network::check_names() {
517-
for (auto const& prim : _primitives) {
518-
if (find_in_internal_networks(prim.first) != nullptr)
519-
CLDNN_ERROR_MESSAGE("Network", "Found primitive with id: " + prim.first + "in anotother network.");
520-
}
521-
}
522-
523507
std::shared_ptr<primitive_inst> cldnn::network::find_primitive(const primitive_id& id) const {
524-
if (_primitives.find(id) != _primitives.end())
525-
return _primitives.at(id);
526-
527-
return find_in_internal_networks(id);
528-
}
529-
530-
std::shared_ptr<primitive_inst> cldnn::network::find_in_internal_networks(const primitive_id& id) const {
531-
std::shared_ptr<primitive_inst> ret;
532-
533-
for (auto const& prim : _primitives) {
534-
if (prim.second->type() == condition::type_id()) { // currently only condition inst contains mini networks
535-
auto cond_inst = std::static_pointer_cast<condition_inst>(prim.second);
536-
ret = cond_inst->get_net_true()->find_primitive(id);
537-
if (ret != nullptr)
538-
return ret;
539-
ret = cond_inst->get_net_false()->find_primitive(id);
540-
if (ret != nullptr)
541-
return ret;
542-
}
543-
}
544-
return nullptr;
508+
auto it = _primitives.find(id);
509+
OPENVINO_ASSERT(it != _primitives.end(), "[GPU] Network doesn't contain primitive ", id);
510+
return it->second;
545511
}
546512

547513
std::string network::get_primitive_info(const primitive_id& id) const {
@@ -552,9 +518,6 @@ std::string network::get_primitive_info(const primitive_id& id) const {
552518
bool network::does_node_need_lockable_output(const primitive_id& id) const {
553519
auto prim_inst = find_primitive(id);
554520

555-
OPENVINO_ASSERT(prim_inst, "[GPU] Can't get implementation type, since topology ",
556-
"doesn't contain primitive with requested id: ", id);
557-
558521
const auto& node = prim_inst->get_node();
559522
if (node.is_type<input_layout>()) {
560523
for (const auto& user : node.get_users()) {
@@ -574,15 +537,6 @@ std::string network::get_implementation_info(const primitive_id& id) const {
574537
return _program->get_implementation_info(id);
575538
}
576539

577-
layout network::get_node_output_layout(const primitive_id& output_id) const {
578-
auto res = std::find_if(_outputs.begin(), _outputs.end(), [&](const std::shared_ptr<primitive_inst>& v) {
579-
return v->id() == output_id;
580-
});
581-
OPENVINO_ASSERT(res != _outputs.end(), "[GPU] Couldn't get output layout for ", output_id, ". Output with such name is not found in the outputs list");
582-
583-
return (*res)->get_node_output_layout();
584-
}
585-
586540
memory::ptr network::get_output_memory(const primitive_id& output_id) {
587541
return get_primitive(output_id)->output_memory_ptr();
588542
}
@@ -729,17 +683,6 @@ void network::add_to_exec_order(const primitive_id& id) {
729683
}
730684

731685
std::map<primitive_id, network_output> network::execute(const std::vector<event::ptr>& dependencies) {
732-
execute_impl(dependencies);
733-
734-
auto output_ids = get_output_ids();
735-
std::map<primitive_id, network_output> result;
736-
for (auto& id : output_ids) {
737-
result.emplace(id, get_output(id));
738-
}
739-
return result;
740-
}
741-
742-
void network::execute_impl(const std::vector<event::ptr>& events) {
743686
OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "NetworkImpl::Execute");
744687
NETWORK_DEBUG(*this);
745688

@@ -779,6 +722,21 @@ void network::execute_impl(const std::vector<event::ptr>& events) {
779722
// in some cases.
780723
auto surf_lock = surfaces_lock::create(get_engine().type(), in_out_mem, get_stream());
781724

725+
execute_impl(dependencies);
726+
727+
std::map<primitive_id, network_output> result;
728+
for (auto& inst : _outputs) {
729+
event::ptr ev = nullptr;
730+
const auto& id = inst->id();
731+
if (get_stream().get_queue_type() == QueueTypes::out_of_order || _enable_profiling)
732+
ev = _events.at(id);
733+
734+
result.emplace(id, network_output(ev, inst->output_memory_ptr(0), get_stream_ptr(), inst->get_output_layout(0)));
735+
}
736+
return result;
737+
}
738+
739+
void network::execute_impl(const std::vector<event::ptr>& events) {
782740
set_arguments();
783741

784742
// This extra flush command is needed for dynamic models in both cases of out_of_order / in_order operating mode
@@ -904,10 +862,6 @@ const program::graph_optimizer_info& network::get_optimizer_passes_info() const
904862
}
905863

906864
std::map<primitive_id, primitive_id> network::get_ext_id_mapping() const {
907-
if (_program == nullptr) {
908-
return _ext_id_mapping;
909-
}
910-
911865
std::map<primitive_id, primitive_id> result;
912866
for (auto& prim : _primitives) {
913867
result.emplace(prim.first, prim.second->get_node().get_primitive()->origin_op_name);
@@ -1008,9 +962,6 @@ void network::allocate_primitive_instance(program_node const& node) {
1008962
if (node.is_type<data>())
1009963
_data_outputs.push_back(inst);
1010964
}
1011-
if (node.is_type<kv_cache>()) {
1012-
kv_cache_ids.push_back(node.id());
1013-
}
1014965
if (auto state_prim = std::dynamic_pointer_cast<memory_state::variable>(inst)) {
1015966
auto prim = inst->get_node().get_primitive();
1016967
set_variables_state_info(state_prim->variable_id(), node.get_output_layout(0), state_prim->get_user_specified_type(), prim.get());

src/plugins/intel_gpu/src/plugin/graph.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -558,7 +558,6 @@ void Graph::update_profiling_info() {
558558
};
559559

560560
std::map<cldnn::primitive_id, cldnn::event::ptr> executedPrimitives = get_network()->get_executed_primitives();
561-
auto allPrimitives = get_network()->get_all_primitives();
562561

563562
// Get profiling info for all layers
564563
for (auto &profiledID : profilingIDs) {

src/plugins/intel_gpu/tests/unit/passes/reorder_inputs_test.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -183,15 +183,15 @@ TEST(reorder_inputs, impl_forcing_basic_format) {
183183
7.f, 3.f, -2.f, -1.f });
184184

185185
network.set_input_data("input", input);
186-
network.execute();
186+
auto outputs = network.execute();
187187

188188
const auto& prog = network.get_program();
189189
auto& pool_node = prog->get_node("pool");
190190
auto pool_layout = pool_node.get_output_layout();
191191

192192
ASSERT_EQ(pool_layout.format.value, format::yxfb);
193193

194-
auto out_mem = network.get_output("pool").get_memory();
194+
auto out_mem = outputs.at("pool").get_memory();
195195
cldnn::mem_lock<float> out_mem_ptr(out_mem, get_test_stream());
196196

197197
ASSERT_EQ(out_mem_ptr.size(), 4u);
@@ -239,7 +239,7 @@ TEST(reorder_inputs, impl_forcing_basic_format_kernel) {
239239
7.f, 3.f, -2.f, -1.f });
240240

241241
network.set_input_data("input", input);
242-
network.execute();
242+
auto outputs = network.execute();
243243

244244
auto prog = network.get_program();
245245
auto& node = prog->get_node("actv");
@@ -250,7 +250,7 @@ TEST(reorder_inputs, impl_forcing_basic_format_kernel) {
250250
ASSERT_EQ(actv_layout.format.value, format::yxfb);
251251
ASSERT_EQ(kernel_name, actv_impl.kernel_name);
252252

253-
auto out_mem = network.get_output("actv").get_memory();
253+
auto out_mem = outputs.at("actv").get_memory();
254254
cldnn::mem_lock<float> out_mem_ptr(out_mem, get_test_stream());
255255

256256
ASSERT_EQ(out_mem_ptr.size(), 8u);

src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp

+12-12
Original file line numberDiff line numberDiff line change
@@ -1031,9 +1031,9 @@ struct concat_gpu_4d : public concat_gpu {
10311031
network.set_input_data(input_ids[i].pid, in_memory[i]);
10321032
}
10331033

1034-
network.execute();
1034+
auto outputs = network.execute();
10351035

1036-
auto out_mem = network.get_output("concat").get_memory();
1036+
auto out_mem = outputs.at("concat").get_memory();
10371037
cldnn::mem_lock<Type> out_ptr(out_mem, get_test_stream());
10381038

10391039
for (size_t bi = 0; bi < batch_num; bi++) {
@@ -1117,9 +1117,9 @@ struct concat_gpu_4d_axis3 : public concat_axis3_gpu {
11171117
network.set_input_data(input_ids[i].pid, in_memory[i]);
11181118
}
11191119

1120-
network.execute();
1120+
auto outputs = network.execute();
11211121

1122-
auto out_mem = network.get_output("concat").get_memory();
1122+
auto out_mem = outputs.at("concat").get_memory();
11231123
cldnn::mem_lock<Type> out_ptr(out_mem, get_test_stream());
11241124

11251125
for (size_t bi = 0; bi < batch_num; bi++) {
@@ -1283,9 +1283,9 @@ struct concat_id_conv_gpu_4d : public concat_gpu {
12831283
network.set_input_data(input_ids[i].pid, in_memory[i]);
12841284
}
12851285

1286-
network.execute();
1286+
auto outputs = network.execute();
12871287

1288-
auto out_mem = network.get_output("conv").get_memory();
1288+
auto out_mem = outputs.at("conv").get_memory();
12891289
cldnn::mem_lock<OutputT> out_ptr(out_mem, get_test_stream());
12901290
ASSERT_EQ(out_mem->get_layout().format, fmt);
12911291

@@ -1420,13 +1420,13 @@ struct concat_gpu_4d_implicit : public concat_gpu {
14201420
for (size_t i = 0; i < in_features.size(); i++) {
14211421
concat_network->set_input_data(input_ids[i], in_memory[i]);
14221422
}
1423-
concat_network->execute();
1423+
auto outputs = concat_network->execute();
14241424

14251425
bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data);
14261426
bool concat_opt_result = std::static_pointer_cast<concatenation_inst>(concat_network->get_primitive("concat"))->can_be_optimized();
14271427
EXPECT_EQ(concat_opt_enabled, concat_opt_result);
14281428

1429-
return concat_network->get_output("reorder").get_memory();
1429+
return outputs.at("reorder").get_memory();
14301430
}
14311431

14321432
std::vector<std::vector<std::vector<std::vector<std::vector<Type>>>>> generate_input() {
@@ -1640,13 +1640,13 @@ struct concat_gpu_4d_implicit_onednn : public concat_gpu {
16401640
for (size_t i = 0; i < in_features.size(); i++) {
16411641
concat_network.set_input_data(input_ids[i], in_memory[i]);
16421642
}
1643-
concat_network.execute();
1643+
auto outputs = concat_network.execute();
16441644

16451645
bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data);
16461646
bool concat_opt_result = std::static_pointer_cast<concatenation_inst>(concat_network.get_primitive("concat"))->node->can_be_optimized();
16471647
EXPECT_EQ(concat_opt_enabled, concat_opt_result);
16481648

1649-
return concat_network.get_output("reorder").get_memory();
1649+
return outputs.at("reorder").get_memory();
16501650
}
16511651

16521652
std::vector<std::vector<std::vector<std::vector<std::vector<Type>>>>> generate_input() {
@@ -1803,7 +1803,7 @@ struct concat_gpu_4d_explicit : public concat_gpu {
18031803
for (size_t i = 0; i < 4; i++) {
18041804
concat_network.set_input_data(input_ids[i], in_memory[i]);
18051805
}
1806-
concat_network.execute();
1806+
auto outputs = concat_network.execute();
18071807

18081808
bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data);
18091809
bool concat_opt_result = std::static_pointer_cast<concatenation_inst>(concat_network.get_primitive("concat"))->node->can_be_optimized();
@@ -1813,7 +1813,7 @@ struct concat_gpu_4d_explicit : public concat_gpu {
18131813
if (concat_opt_enabled && batch_num > 1) concat_opt_result = !concat_opt_result;
18141814
EXPECT_EQ(concat_opt_enabled, concat_opt_result);
18151815

1816-
return concat_network.get_output("reorder").get_memory();
1816+
return outputs.at("reorder").get_memory();
18171817
}
18181818

18191819
std::vector<std::vector<std::vector<std::vector<std::vector<Type>>>>> generate_input() {

0 commit comments

Comments
 (0)