Skip to content

Commit 11cacc9

Browse files
authored
[GPU] Add new_shape_infer support in deconvolution (openvinotoolkit#28916)
### Details: - Add new_shape_infer support in deconvolution ### Tickets: - 101276 - 160258
1 parent 6db4939 commit 11cacc9

File tree

3 files changed

+120
-16
lines changed

3 files changed

+120
-16
lines changed

src/plugins/intel_gpu/src/graph/deconvolution.cpp

+16-15
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ std::vector<layout> deconvolution_inst::calc_output_layouts(deconvolution_node c
134134
out_fmt = node.get_preferred_output_fmt();
135135
}
136136

137-
if (desc->with_output_size) {
137+
if (!node.get_program().is_new_shape_infer() && desc->with_output_size) {
138138
CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id,
139139
"User-defined output spatial X",
140140
desc->output_size.spatial[0],
@@ -207,7 +207,7 @@ std::vector<layout> deconvolution_inst::calc_output_layouts(deconvolution_node c
207207
op.set_output_shape(output_partial_shape.to_shape());
208208
input_shapes.push_back(ov::Shape{output_partial_shape.size()});
209209
output_shapes = ov::op::v1::shape_infer(&op, input_shapes, pads_begin, pads_end);
210-
} else if (memory_deps.count(2)) {
210+
} else if ((desc->output_shape_id != "" || desc->output_partial_shape.size() > 0) && memory_deps.count(2)) {
211211
auto mem = memory_deps.at(2);
212212
auto dims = read_vector<int64_t>(mem, impl_param.get_stream());
213213
auto dims_shape = ov::Shape{dims.size()};
@@ -275,19 +275,20 @@ deconvolution_inst::typed_primitive_inst(network& network, deconvolution_node co
275275
"output size",
276276
output_layout.get_rank(),
277277
"Input/output number of dimension does not match.");
278-
CLDNN_ERROR_NOT_EQUAL(node.id(),
279-
"Stride size",
280-
stride.size(),
281-
"output size",
282-
output_layout.get_spatial_rank(),
283-
"Stride/output number of dimension does not match.");
284-
285-
CLDNN_ERROR_NOT_EQUAL(node.id(),
286-
"Input offset size",
287-
pad.size(),
288-
"input number of dimensions",
289-
output_layout.get_spatial_rank(),
290-
"");
278+
if (!node.get_program().is_new_shape_infer()) {
279+
CLDNN_ERROR_NOT_EQUAL(node.id(),
280+
"Stride size",
281+
stride.size(),
282+
"output size",
283+
output_layout.get_spatial_rank(),
284+
"Stride/output number of dimension does not match.");
285+
CLDNN_ERROR_NOT_EQUAL(node.id(),
286+
"Input offset size",
287+
pad.size(),
288+
"input number of dimensions",
289+
output_layout.get_spatial_rank(),
290+
"");
291+
}
291292

292293
auto filter_inst = node.weights().get_output_layout().convert_to_weights_layout(argument->grouped_weights_shape);
293294

src/plugins/intel_gpu/src/plugin/ops/convolution.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ static void CreateConvolutionBackpropDataOp(ProgramBuilder& p, const std::shared
132132
auto pads_end = op->get_pads_end();
133133
auto output_padding = op->get_output_padding();
134134

135-
if (!op->is_dynamic()) {
135+
if (!op->is_dynamic() && !p.use_new_shape_infer()) {
136136
// Extend 1d vectors to 2d as 1d can't be handled properly by the graph optimizer for now
137137
strides.resize(std::max<size_t>(2, strides.size()), 1);
138138
dilations.resize(std::max<size_t>(2, strides.size()), 1);

src/plugins/intel_gpu/tests/unit/fusions/deconvolution_fusion_test.cpp

+103
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,25 @@ struct deconv_eltw_test_params {
5656
size_t expected_not_fused_primitives;
5757
};
5858

59+
struct deconv_new_shape_infer_test_params {
60+
ov::PartialShape in_shape;
61+
ov::PartialShape out_shape;
62+
ov::PartialShape kernel;
63+
ov::Strides stride;
64+
ov::CoordinateDiff pad;
65+
ov::Strides dilation;
66+
uint32_t groups;
67+
data_types data_type;
68+
format input_format;
69+
data_types weights_type;
70+
format weights_format;
71+
data_types default_type;
72+
format default_format;
73+
size_t expected_fused_primitives;
74+
size_t expected_fused_primitives_onednn;
75+
size_t expected_not_fused_primitives;
76+
};
77+
5978
class DeconvolutionFusingTest : public ::BaseFusingTest<deconv_test_params> {
6079
public:
6180
void execute(deconv_test_params& p, bool is_caching_test = false) {
@@ -94,6 +113,60 @@ class DeconvolutionFusingTest : public ::BaseFusingTest<deconv_test_params> {
94113
}
95114
};
96115

116+
class DeconvolutionFusingNewShapeInferTest : public ::BaseFusingTest<deconv_new_shape_infer_test_params> {
117+
public:
118+
void SetUp() override {
119+
rg.set_seed(GET_SUITE_NAME);
120+
cfg_fused = get_test_default_config(engine);
121+
cfg_not_fused = get_test_default_config(engine);
122+
123+
cfg_fused.set_property(ov::intel_gpu::optimize_data(true));
124+
cfg_fused.set_property(ov::intel_gpu::allow_new_shape_infer(true));
125+
cfg_not_fused.set_property(ov::intel_gpu::optimize_data(false));
126+
cfg_not_fused.set_property(ov::intel_gpu::allow_new_shape_infer(true));
127+
cfg_not_fused.set_property(ov::intel_gpu::allow_static_input_reorder(true));
128+
}
129+
130+
void execute(deconv_new_shape_infer_test_params& p, bool is_caching_test = false) {
131+
execute(p, get_mem(get_input_layout(p)), is_caching_test);
132+
}
133+
void execute(deconv_new_shape_infer_test_params& p, cldnn::memory::ptr input_prim, bool is_caching_test = false) {
134+
if (engine.get_device_info().supports_immad)
135+
p.expected_fused_primitives = p.expected_fused_primitives_onednn;
136+
137+
network::ptr network_not_fused = get_network(this->engine, this->topology_non_fused, cfg_not_fused, get_test_stream_ptr(cfg_not_fused), is_caching_test);
138+
network::ptr network_fused = get_network(this->engine, this->topology_fused, cfg_fused, get_test_stream_ptr(cfg_fused), is_caching_test);
139+
network_fused->set_input_data("input", input_prim);
140+
network_not_fused->set_input_data("input", input_prim);
141+
142+
compare(*network_not_fused, *network_fused, p);
143+
auto find_conv = [](primitive_info& p) -> bool {
144+
if (p.original_id == "deconv")
145+
return true;
146+
return false;
147+
};
148+
149+
auto pi_fused = network_fused->get_primitives_info();
150+
auto info_fused = std::find_if(pi_fused.begin(), pi_fused.end(), find_conv);
151+
if (info_fused != pi_fused.end())
152+
std::cout << "kernel: " << info_fused->kernel_id << std::endl;
153+
}
154+
155+
layout get_input_layout(deconv_new_shape_infer_test_params& p) {
156+
auto pad = p.pad;
157+
std::vector<int> pad_ = { 0, 0, static_cast<int>(pad[0]) };
158+
return layout{ p.in_shape, p.data_type, p.input_format, padding{ pad_ } };
159+
}
160+
161+
layout get_weights_layout(deconv_new_shape_infer_test_params& p) {
162+
return layout{ p.kernel, p.weights_type, p.weights_format };
163+
}
164+
165+
layout get_bias_layout(deconv_new_shape_infer_test_params& p) {
166+
return layout{ ov::PartialShape{1, static_cast<int64_t>(p.out_shape.get_shape().at(1)), 1}, p.default_type, format::bfyx };
167+
}
168+
};
169+
97170
class ConvEltwTest : public ::BaseFusingTest<deconv_eltw_test_params> {
98171
public:
99172

@@ -147,6 +220,9 @@ class ConvEltwTest : public ::BaseFusingTest<deconv_eltw_test_params> {
147220
#define CASE_DECONV_FP32_7 { 1, 16, 4, 5 }, { 1, 32, 7, 9 }, { 1, 1, 1, 1 }, { 2, 2 }, { 0, 0 }, { 1, 1 }, 1, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::is_os_yx_isv16_osv16, data_types::f32, format::bfyx
148221
#define CASE_DECONV_FP32_8 { 1, 32, 4, 5 }, { 1, 32, 7, 9 }, { 1, 1, 3, 3 }, { 2, 2 }, { 1, 1 }, { 1, 1 }, 32, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::gs_oiyx_gsv16, data_types::f32, format::bfyx
149222

223+
// 1D deconv
224+
#define CASE_DECONV_1D_FP32_1 { 1, 512, 1500 }, { 1, 256, 12008 }, { 256, 512, 16 }, { 8 }, { 0 }, { 1 }, 1, data_types::f32, format::bfyx, data_types::f32, format::oiyx, data_types::f32, format::bfyx
225+
150226
#define CASE_DECONV_FP16_1 { 1, 15, 4, 5 }, { 1, 30, 6, 7 }, { 1, 1, 3, 3 }, { 1, 1 }, { 0, 0 }, { 1, 1 }, 1, data_types::f16, format::bfyx, data_types::f16, format::oiyx, data_types::f16, format::bfyx
151227
#define CASE_DECONV_FP16_2 { 1, 16, 4, 5 }, { 1, 32, 6, 7 }, { 1, 1, 3, 3 }, { 1, 1 }, { 0, 0 }, { 1, 1 }, 1, data_types::f16, format::b_fs_yx_fsv16, data_types::f16, format::is_os_yx_isv16_osv16, data_types::f16, format::bfyx
152228
#define CASE_DECONV_FP16_3 { 1, 16, 4, 5 }, { 1, 32, 4, 5 }, { 1, 1, 1, 1 }, { 1, 1 }, { 0, 0 }, { 1, 1 }, 1, data_types::f16, format::b_fs_yx_fsv16, data_types::f16, format::is_os_yx_isv16_osv16, data_types::f16, format::bfyx
@@ -390,6 +466,33 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, deconv_bias, ::testing::ValuesIn(std::vect
390466
deconv_test_params{ CASE_DECONV_S8S8_3D_3, 2, 2, 3 },
391467
}));
392468

469+
class deconv_bias_1d : public DeconvolutionFusingNewShapeInferTest {};
470+
TEST_P(deconv_bias_1d, basic) {
471+
auto p = GetParam();
472+
create_topologies(
473+
input_layout("input", get_input_layout(p)),
474+
data("weights", get_mem(get_weights_layout(p))),
475+
data("bias", get_mem(get_bias_layout(p))),
476+
deconvolution("deconv", input_info("input"), { "weights" }, p.groups, p.stride, p.pad, p.dilation),
477+
eltwise("bias_add", { input_info("deconv"), input_info("bias") }, eltwise_mode::sum),
478+
reorder("out", input_info("bias_add"), p.default_format, data_types::f32)
479+
);
480+
481+
if (engine.get_device_info().supports_immad &&
482+
p.default_type == data_types::f16 &&
483+
p.weights_format == format::is_os_yx_isv16_osv16) {
484+
GTEST_SKIP(); // Issue: 94154
485+
}
486+
487+
// Need much higher tolerance because of deconvolution -> convolution optimization
488+
tolerance = 1.f;
489+
execute(p);
490+
}
491+
492+
INSTANTIATE_TEST_SUITE_P(fusings_gpu, deconv_bias_1d, ::testing::ValuesIn(std::vector<deconv_new_shape_infer_test_params>{
493+
deconv_new_shape_infer_test_params{ CASE_DECONV_1D_FP32_1, 2, 2, 3 },
494+
}));
495+
393496
class deconv_scale : public DeconvolutionFusingTest {};
394497
TEST_P(deconv_scale, basic) {
395498
auto p = GetParam();

0 commit comments

Comments
 (0)