@@ -209,6 +209,112 @@ TEST(concat_cpu_impl, dynamic_4d_f) {
209
209
start_concat_test_dynamic (impl_types::cpu);
210
210
}
211
211
212
+ TEST (concat_gpu, dynamic_2d_bfyx_and_b_fs_yx_fsv32) {
213
+ auto & engine = get_test_engine ();
214
+
215
+ topology topology (
216
+ input_layout (" input0" , { { 2 , 4 }, data_types::f32, format::bfyx }),
217
+ input_layout (" input1" , { { -1 , 1 }, data_types::f32, format::bfyx }),
218
+ reorder (" reorder_input1" , input_info (" input1" ), { { -1 , 1 }, data_types::f16, format::b_fs_yx_fsv32 }),
219
+ concatenation (" concat" ,
220
+ { input_info (" input0" ), input_info (" reorder_input1" ) },
221
+ 1 ,
222
+ data_types::f32)
223
+ );
224
+
225
+ ExecutionConfig config = get_test_default_config (engine);
226
+ config.set_property (ov::intel_gpu::optimize_data (false ));
227
+ config.set_property (ov::intel_gpu::allow_new_shape_infer (true ));
228
+ ov::intel_gpu::ImplementationDesc impl = { format::bfyx, " " , impl_types::ocl };
229
+ config.set_property (ov::intel_gpu::force_implementations (ov::intel_gpu::ImplForcingMap{ { " concat" , impl } }));
230
+
231
+ auto network = cldnn::network::build_network (engine, topology, config);
232
+
233
+ layout layout0 = { { 2 , 4 }, data_types::f32, format::bfyx };
234
+ layout layout1 = { { 2 , 1 }, data_types::f32, format::bfyx };
235
+
236
+ auto input0 = engine.allocate_memory (layout0);
237
+ auto input1 = engine.allocate_memory (layout1);
238
+
239
+ set_values<float >(input0, { 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 });
240
+ set_values<float >(input1, { 8 , 9 });
241
+ VF<float > expected_out = { 0 , 1 , 2 , 3 , 8 , 4 , 5 , 6 , 7 , 9 };
242
+
243
+ network->set_input_data (" input0" , input0);
244
+ network->set_input_data (" input1" , input1);
245
+
246
+ auto outputs = network->execute ();
247
+ ASSERT_EQ (outputs.size (), size_t (1 ));
248
+ ASSERT_EQ (outputs.begin ()->first , " concat" );
249
+
250
+ auto output_memory = outputs.at (" concat" ).get_memory ();
251
+ auto output_layout = outputs.at (" concat" ).get_layout ();
252
+ cldnn::mem_lock<float > output_ptr (output_memory, get_test_stream ());
253
+
254
+ ov::PartialShape expected_shape = layout0.get_partial_shape ();
255
+ expected_shape[1 ] = layout0.get_partial_shape ()[1 ] +
256
+ layout1.get_partial_shape ()[1 ];
257
+
258
+ ASSERT_EQ (output_layout.get_partial_shape (), expected_shape);
259
+
260
+ for (size_t i = 0 ; i < output_layout.count (); ++i) {
261
+ ASSERT_EQ (expected_out[i], output_ptr[i]) << " i = " << i;
262
+ }
263
+ }
264
+
265
+ TEST (concat_gpu, dynamic_4d_bfyx_and_b_fs_yx_fsv32) {
266
+ auto & engine = get_test_engine ();
267
+
268
+ topology topology (
269
+ input_layout (" input0" , { { -1 , -1 , -1 , -1 }, data_types::f32, format::bfyx }),
270
+ input_layout (" input1" , { { -1 , -1 , -1 , -1 }, data_types::f32, format::bfyx }),
271
+ reorder (" reorder_input1" , input_info (" input1" ), { { -1 , -1 , -1 , -1 }, data_types::f16, format::b_fs_yx_fsv32 }),
272
+ concatenation (" concat" ,
273
+ { input_info (" input0" ), input_info (" reorder_input1" ) },
274
+ 1 ,
275
+ data_types::f32)
276
+ );
277
+
278
+ ExecutionConfig config = get_test_default_config (engine);
279
+ config.set_property (ov::intel_gpu::optimize_data (false ));
280
+ config.set_property (ov::intel_gpu::allow_new_shape_infer (true ));
281
+ ov::intel_gpu::ImplementationDesc impl = { format::bfyx, " " , impl_types::ocl };
282
+ config.set_property (ov::intel_gpu::force_implementations (ov::intel_gpu::ImplForcingMap{ { " concat" , impl } }));
283
+
284
+ auto network = cldnn::network::build_network (engine, topology, config);
285
+
286
+ layout layout0 = { { 2 , 4 , 1 , 1 }, data_types::f32, format::bfyx };
287
+ layout layout1 = { { 2 , 1 , 1 , 1 }, data_types::f32, format::bfyx };
288
+
289
+ auto input0 = engine.allocate_memory (layout0);
290
+ auto input1 = engine.allocate_memory (layout1);
291
+
292
+ set_values<float >(input0, { 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 });
293
+ set_values<float >(input1, { 8 , 9 });
294
+ VF<float > expected_out = { 0 , 1 , 2 , 3 , 8 , 4 , 5 , 6 , 7 , 9 };
295
+
296
+ network->set_input_data (" input0" , input0);
297
+ network->set_input_data (" input1" , input1);
298
+
299
+ auto outputs = network->execute ();
300
+ ASSERT_EQ (outputs.size (), size_t (1 ));
301
+ ASSERT_EQ (outputs.begin ()->first , " concat" );
302
+
303
+ auto output_memory = outputs.at (" concat" ).get_memory ();
304
+ auto output_layout = outputs.at (" concat" ).get_layout ();
305
+ cldnn::mem_lock<float > output_ptr (output_memory, get_test_stream ());
306
+
307
+ ov::PartialShape expected_shape = layout0.get_partial_shape ();
308
+ expected_shape[1 ] = layout0.get_partial_shape ()[1 ] +
309
+ layout1.get_partial_shape ()[1 ];
310
+
311
+ ASSERT_EQ (output_layout.get_partial_shape (), expected_shape);
312
+
313
+ for (size_t i = 0 ; i < output_layout.count (); ++i) {
314
+ ASSERT_EQ (expected_out[i], output_ptr[i]) << " i = " << i;
315
+ }
316
+ }
317
+
212
318
TEST (concat_gpu, dynamic_6d_f) {
213
319
auto & engine = get_test_engine ();
214
320
0 commit comments