Skip to content

Commit 5cc808c

Browse files
authored
[GPU] release of dynamic layout from memory pool (openvinotoolkit#29148)
### Details: - part of accuracy repair of FasterRCNN_Resnet50 - memory dependencies not end recursion when dependency can be optimized and is runtime skipable - fix documentation OV_GPU_Verbose=2 -> OV_VERBOSE=2 ### Tickets: - 101294
1 parent 63fc0ab commit 5cc808c

File tree

6 files changed

+23
-5
lines changed

6 files changed

+23
-5
lines changed

src/plugins/intel_gpu/docs/memory_allocation_gpu_plugin.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ calls the corresponding memory object wrapper for each allocation type: [gpu_buf
2020

2121
## Dump memory allocation history
2222

23-
The memory allocation history is being managed by the `engine`, which can be dumped by setting the environment variable `OV_GPU_Verbose=2` if OpenVINO is built with the cmake configuration `ENABLE_DEBUG_CAPS=ON`.
23+
The memory allocation history is being managed by the `engine`, which can be dumped by setting the environment variable `OV_VERBOSE=2` if OpenVINO is built with the cmake configuration `ENABLE_DEBUG_CAPS=ON`.
2424
```cpp
2525
...
2626
GPU_Debug: Allocate 58982400 bytes of usm_host allocation type (current=117969612; max=117969612)

src/plugins/intel_gpu/src/graph/include/pass_manager.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,8 @@ class memory_dependency_pass : public base_pass {
320320
return;
321321
}
322322

323-
if ((node->can_be_optimized() && !node->is_runtime_skippable()) || !dep->can_be_optimized()) {
323+
if ((!dep->can_be_optimized() || !dep->is_runtime_skippable()) && ((node->can_be_optimized() && !node->is_runtime_skippable())
324+
|| !dep->can_be_optimized())) {
324325
node->add_memory_dependency(static_cast<int32_t>(dep->get_unique_id()));
325326
} else {
326327
if (node->is_runtime_skippable() || dep->is_runtime_skippable() || dep->can_be_optimized()) {

src/plugins/intel_gpu/src/graph/primitive_inst.cpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -1660,17 +1660,20 @@ void primitive_inst::do_runtime_skip_scatter_update() {
16601660
return;
16611661

16621662
GPU_DEBUG_TRACE_DETAIL << "[do_runtime_skip_scatter_update] " << id() << " : check optimizability" << std::endl;
1663+
const auto& input_layout = _impl_params->get_input_layout(0);
1664+
const auto& output_layout = _impl_params->get_output_layout(0);
16631665
const auto& idx_layout = _impl_params->get_input_layout(1);
16641666
const auto& update_layout = _impl_params->get_input_layout(2);
16651667

1666-
if (idx_layout.count() > 0 && update_layout.count() > 0) {
1668+
if ((idx_layout.count() > 0 && update_layout.count() > 0) || (get_node().is_type<scatter_elements_update>() && input_layout != output_layout)) {
16671669
// set shape_change to realloc memory for same input shapes
16681670
if (can_be_optimized()) {
16691671
set_flag(ExecutionFlags::SHAPE_CHANGED);
16701672
}
16711673
set_can_be_optimized(false);
16721674
GPU_DEBUG_TRACE_DETAIL << "--- Cannot optimize because idx_layout (" << idx_layout.to_short_string()
1673-
<< ") and update_layout(" << update_layout.to_short_string() << ") are not zero" << std::endl;
1675+
<< ") and update_layout(" << update_layout.to_short_string() << ") are not zero"
1676+
"or input layout is different than output layout" << std::endl;
16741677
return;
16751678
}
16761679

src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_elements_update_kernel_ref.cpp

+10-1
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,15 @@ bool ScatterElementsUpdateKernelRef::Validate(const Params& p) const {
163163
return true;
164164
}
165165

166+
bool ScatterElementsUpdateKernelRef::SkipKernelExecution(const scatter_elements_update_params& params, size_t kernel_id) const {
167+
if (kernel_id == 0) {
168+
if (params.outputs[0].LogicalSize() != 0 && params.outputs[0] != params.inputs[0]) {
169+
return false;
170+
}
171+
}
172+
return KernelData::SkipKernelExecution(params);
173+
}
174+
166175
void ScatterElementsUpdateKernelRef::GetUpdateDispatchDataFunc(KernelData& kd) const {
167176
kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
168177
const auto& prim_params = static_cast<const scatter_elements_update_params&>(params);
@@ -172,7 +181,7 @@ void ScatterElementsUpdateKernelRef::GetUpdateDispatchDataFunc(KernelData& kd) c
172181
auto dispatchData = SetDefault(prim_params, i == 1);
173182
kd.kernels[i].params.workGroups.global = dispatchData.gws;
174183
kd.kernels[i].params.workGroups.local = dispatchData.lws;
175-
kd.kernels[i].skip_execution = KernelData::SkipKernelExecution(prim_params);
184+
kd.kernels[i].skip_execution = SkipKernelExecution(prim_params, i);
176185
}
177186
};
178187
}

src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_elements_update_kernel_ref.h

+1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ class ScatterElementsUpdateKernelRef : public KernelBaseOpenCL {
3434

3535
protected:
3636
bool Validate(const Params& p) const override;
37+
bool SkipKernelExecution(const scatter_elements_update_params& params, size_t kernel_id) const;
3738
void GetUpdateDispatchDataFunc(KernelData& kd) const override;
3839
};
3940
} // namespace kernel_selector

src/plugins/intel_gpu/src/runtime/memory_pool.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ bool memory_pool::has_conflict(const memory_set& mem_cand,
4949
void memory_pool::release_memory(memory* mem, const size_t& unique_id, primitive_id prim_id, uint32_t network_id) {
5050
// check non padded pool first
5151
auto _layout = mem->get_layout();
52+
if (_layout.is_dynamic()) {
53+
const auto max_shape = _layout.get_partial_shape().get_max_shape();
54+
_layout = _layout.clone_with_other_shape(max_shape);
55+
}
5256
auto type = mem->get_allocation_type();
5357
const auto _layout_bytes_count = _layout.bytes_count();
5458

0 commit comments

Comments
 (0)