Skip to content

Commit 71d9463

Browse files
[GPU] Fix memory reuse (#27827)
### Details: - Fixed invalid memory reuse that caused accuracy issue for SD3 transformer model in case of iGPUs ### Tickets: - *CVS-157962* Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
1 parent aafb3fc commit 71d9463

File tree

3 files changed

+137
-6
lines changed

3 files changed

+137
-6
lines changed

src/plugins/intel_gpu/src/graph/debug_helper.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,13 @@ void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream, bool dump
5151
if (tmp_size == size) {
5252
file_stream << "shape: " << size.to_string() << " ";
5353
file_stream << "(count: " << size.count()
54+
<< ", addr: " << mem->buffer_ptr()
5455
<< ", original format: " << cldnn::fmt_to_str(mem->get_layout().format) << ")"
5556
<< (dump_raw ? " raw data" : "") << std::endl;
5657
} else {
5758
file_stream << "shape: " << tmp_size.to_string() << " ";
5859
file_stream << "(count: " << tmp_size.count()
60+
<< ", addr: " << mem->buffer_ptr()
5961
<< ", original format: " << cldnn::fmt_to_str(mem->get_layout().format)
6062
<< ", original shape: " << size.to_string() << ")"
6163
<< (dump_raw ? " raw data" : "") << std::endl;

src/plugins/intel_gpu/src/graph/include/pass_manager.h

+1-6
Original file line numberDiff line numberDiff line change
@@ -307,13 +307,8 @@ class memory_dependency_pass : public base_pass {
307307
if ((node->can_be_optimized() && !node->is_runtime_skippable()) || !dep->can_be_optimized()) {
308308
node->add_memory_dependency(static_cast<int32_t>(dep->get_unique_id()));
309309
} else {
310-
if (node->is_runtime_skippable() || dep->is_runtime_skippable()) {
310+
if (node->is_runtime_skippable() || dep->is_runtime_skippable() || dep->can_be_optimized()) {
311311
node->add_memory_dependency(static_cast<int32_t>(dep->get_unique_id()));
312-
for (const auto& subdep : dep->get_dependencies()) {
313-
add_memory_dependency(node, subdep.first);
314-
add_memory_dependency(subdep.first, node);
315-
}
316-
return;
317312
}
318313

319314
for (const auto& subdep : dep->get_dependencies()) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
// Copyright (C) 2024 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
#include "shared_test_classes/base/ov_subgraph.hpp"
5+
#include "common_test_utils/ov_tensor_utils.hpp"
6+
#include "common_test_utils/node_builders/eltwise.hpp"
7+
8+
#include "openvino/op/parameter.hpp"
9+
#include "openvino/op/constant.hpp"
10+
#include "openvino/op/result.hpp"
11+
#include "openvino/op/variadic_split.hpp"
12+
13+
namespace {
14+
using ov::test::InputShape;
15+
16+
typedef std::tuple<
17+
std::vector<InputShape>, // input shapes
18+
size_t, // split axis
19+
ov::element::Type, // Model type
20+
std::string // Device name
21+
> SplitReshapeEltwiseTestParams;
22+
23+
const std::vector<ov::element::Type> model_precisions = {
24+
ov::element::f16
25+
};
26+
27+
class SplitReshapeEltwiseTest : public testing::WithParamInterface<SplitReshapeEltwiseTestParams>,
28+
virtual public ov::test::SubgraphBaseTest {
29+
public:
30+
static std::string getTestCaseName(const testing::TestParamInfo<SplitReshapeEltwiseTestParams>& obj) {
31+
SplitReshapeEltwiseTestParams test_params = obj.param;
32+
std::ostringstream result;
33+
std::vector<InputShape> input_shapes;
34+
size_t axis;
35+
ov::element::Type precision;
36+
std::string target_device;
37+
38+
std::tie(input_shapes, axis, precision, target_device) = test_params;
39+
result << "IS=";
40+
for (const auto& shape : input_shapes) {
41+
result << ov::test::utils::partialShape2str({shape.first}) << "_";
42+
for (const auto& actual_shape : shape.second) {
43+
result << ov::test::utils::partialShape2str({actual_shape}) << "_";
44+
}
45+
}
46+
result << "axis=" << axis << "_";
47+
result << "Precision=" << precision << "_";
48+
result << "target_device=" << target_device;
49+
return result.str();
50+
}
51+
52+
protected:
53+
void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
54+
inputs.clear();
55+
const auto& funcInputs = function->inputs();
56+
for (size_t i = 0; i < funcInputs.size(); ++i) {
57+
const auto& funcInput = funcInputs[i];
58+
59+
ov::Tensor tensor;
60+
ov::test::utils::InputGenerateData in_data;
61+
in_data.start_from = 0;
62+
in_data.range = 80;
63+
in_data.resolution = 8;
64+
tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], in_data);
65+
inputs.insert({funcInput.get_node_shared_ptr(), tensor});
66+
}
67+
}
68+
69+
void SetUp() override {
70+
SplitReshapeEltwiseTestParams test_params = this->GetParam();
71+
std::vector<InputShape> input_shapes;
72+
size_t axis;
73+
ov::element::Type model_type;
74+
std::tie(input_shapes, axis, model_type, targetDevice) = test_params;
75+
76+
init_input_shapes(input_shapes);
77+
78+
ov::ParameterVector params = {
79+
std::make_shared<ov::op::v0::Parameter>(model_type, inputDynamicShapes[0]),
80+
std::make_shared<ov::op::v0::Parameter>(model_type, inputDynamicShapes[1]),
81+
std::make_shared<ov::op::v0::Parameter>(model_type, inputDynamicShapes[2]),
82+
};
83+
84+
auto axis_op = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{}, {1});
85+
axis_op->set_friendly_name("axis");
86+
87+
auto split_sizes = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{2}, {5, 5});
88+
split_sizes->set_friendly_name("split_sizes");
89+
90+
auto split = std::make_shared<ov::op::v1::VariadicSplit>(params[0], axis_op, split_sizes);
91+
split->set_friendly_name("split");
92+
93+
auto add_not_reshaped = std::make_shared<ov::op::v1::Add>(split->output(1), params[1]);
94+
add_not_reshaped->set_friendly_name("add_not_reshaped");
95+
96+
std::vector<int64_t> target_shape;
97+
for (auto& d : inputDynamicShapes[2]) {
98+
target_shape.push_back(d.is_dynamic() ? -1 : d.get_length());
99+
}
100+
auto target_shape_node = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{target_shape.size()}, target_shape);
101+
auto reshape = std::make_shared<ov::op::v1::Reshape>(split->output(0), target_shape_node, false);
102+
103+
auto add_reshaped = std::make_shared<ov::op::v1::Add>(params[2], reshape);
104+
add_reshaped->set_friendly_name("add_reshaped");
105+
106+
auto convert1 = std::make_shared<ov::op::v0::Convert>(add_not_reshaped, ov::element::f32);
107+
auto convert2 = std::make_shared<ov::op::v0::Convert>(add_reshaped, ov::element::f32);
108+
109+
ov::ResultVector results = {std::make_shared<ov::op::v0::Result>(convert1), std::make_shared<ov::op::v0::Result>(convert2)};
110+
function = std::make_shared<ov::Model>(results, params, "eltwise_add_out");
111+
}
112+
};
113+
114+
TEST_P(SplitReshapeEltwiseTest, Inference) {
115+
run();
116+
}
117+
118+
const std::vector<std::vector<ov::test::InputShape>> input_shapes = {
119+
{
120+
{{-1, 10}, {{2, 10}, {1, 10}}}, // split in shape
121+
{{-1, 5}, {{2, 5}, {1, 5}}}, // not reshaped add input shape
122+
{{-1, 1, 5}, {{2, 1, 5}, {1, 1, 5}}} // reshaped add input shape
123+
},
124+
};
125+
126+
127+
const auto testParams_smoke = ::testing::Combine(::testing::ValuesIn(input_shapes),
128+
::testing::Values(1), // axis
129+
::testing::ValuesIn(model_precisions),
130+
::testing::Values(ov::test::utils::DEVICE_GPU));
131+
132+
INSTANTIATE_TEST_SUITE_P(smoke_dynamic_model, SplitReshapeEltwiseTest,
133+
testParams_smoke, SplitReshapeEltwiseTest::getTestCaseName);
134+
} // namespace

0 commit comments

Comments
 (0)