Skip to content

Commit a093512

Browse files
[GPU] update shape when current node is the fused prim of user and it… (#25410)
### Details: - *update shape when current node is the fused prim of user and it is updated shape by other* ### Tickets: - *145756*
1 parent 7d33b17 commit a093512

File tree

1 file changed

+36
-11
lines changed

1 file changed

+36
-11
lines changed

src/plugins/intel_gpu/src/graph/primitive_inst.cpp

+36-11
Original file line numberDiff line numberDiff line change
@@ -586,19 +586,44 @@ event::ptr primitive_inst::realloc_if_needed() {
586586
user_insts.size(), " and ", user_insts_origin.size());
587587
}
588588
for (auto user : user_insts) {
589+
auto is_fused_prim_of_user = [&](primitive_id id) -> bool {
590+
for (auto& p : user->get_node().get_fused_primitives()) {
591+
if (p.has_outer_dep()) {
592+
const auto start_idx = p.outer_dep_start_idx;
593+
// exclude fused_node from total_num_deps
594+
const auto end_idx = p.outer_dep_start_idx + p.total_num_deps -1;
595+
for (size_t idx = start_idx; idx < end_idx; idx++) {
596+
if (user->get_node().get_dependency(idx).id() == id) {
597+
return true;
598+
}
599+
}
600+
}
601+
}
602+
return false;
603+
};
589604
// Since fake alignment is applicable for input tensor as well, make sure we allocate enough memory
590605
// to prevent reading beyond the allocated memory bounds
591-
if (user->get_node().is_type<fully_connected>() && user->is_dynamic() && user->_deps[0].first == this) {
592-
GPU_DEBUG_TRACE_DETAIL << "Check fc user " << user->id() << "'s fake alignment-ed input size" << std::endl;
593-
user->update_shape();
594-
user->update_shape_done_by_other = true;
595-
596-
auto fc_impl_params = *user->_impl_params;
597-
auto fc_input_layout = user->get_node().type()->get_fake_aligned_params(fc_impl_params).input_layouts[0];
598-
if (fc_input_layout.bytes_count() > updated_layout.bytes_count()) {
599-
GPU_DEBUG_TRACE_DETAIL << id() << ": increase output layout allocation size from " << actual_layout.to_short_string() << " -> "
600-
<< fc_input_layout.to_short_string() << " to meet the input buffer alignment requirements for FC\n";
601-
updated_layout = fc_input_layout;
606+
if (user->get_node().is_type<fully_connected>() && user->is_dynamic()) {
607+
if (user->_deps[0].first == this
608+
|| (is_fused_prim_of_user(id()) && user->update_shape_done_by_other)) {
609+
GPU_DEBUG_TRACE_DETAIL << "Check fc user " << user->id() << "'s fake alignment-ed input size" << std::endl;
610+
// Setting update_shape_done_by_other to false before running update_shape,
611+
// since update_Shape is already called in realloc_if_needed of current node's dep node
612+
// but current node's output layout is not updated to the this user node yet.
613+
user->update_shape_done_by_other = false;
614+
bool prev_shape_changed = user->shape_changed();
615+
user->update_shape();
616+
// Set again shape_change status if shape is changed in the prev udpate_shape() for this user node.
617+
if (prev_shape_changed)
618+
user->set_shape_change();
619+
user->update_shape_done_by_other = true;
620+
auto fc_impl_params = *user->_impl_params;
621+
auto fc_input_layout = user->get_node().type()->get_fake_aligned_params(fc_impl_params).input_layouts[0];
622+
if (fc_input_layout.bytes_count() > updated_layout.bytes_count()) {
623+
GPU_DEBUG_TRACE_DETAIL << id() << ": increase output layout allocation size from " << actual_layout.to_short_string() << " -> "
624+
<< fc_input_layout.to_short_string() << " to meet the input buffer alignment requirements for FC\n";
625+
updated_layout = fc_input_layout;
626+
}
602627
}
603628
}
604629
}

0 commit comments

Comments
 (0)