@@ -316,6 +316,83 @@ TEST(fully_connected_gpu, no_biases_int8) {
316
316
ASSERT_EQ (-52 .0f , output_ptr[3 ]);
317
317
}
318
318
319
+ TEST (fully_connected_gpu, no_biases_fc_i32_reorder_f16) {
320
+ auto & engine = get_test_engine ();
321
+
322
+ auto input_prim = engine.allocate_memory ({ data_types::i32, format::bfyx, { 1 , 1 , 3 , 8 } });
323
+ auto weights_prim = engine.allocate_memory ({ data_types::i32, format::bfyx, { 1 , 1 , 3 , 4 } });
324
+
325
+ set_values<int32_t >(input_prim, {
326
+ 1 , 2 , 3 ,
327
+ 5 , 2 , 2 ,
328
+ 4 , 6 , 3 ,
329
+ 3 , 5 , 1 ,
330
+ 1 , 1 , 1 ,
331
+ 9 , -2 , -1 ,
332
+ -3 , -2 , -5 ,
333
+ 3 , 2 , 5
334
+ });
335
+
336
+ set_values<int32_t >(weights_prim, {
337
+ 1 , 2 , 3 ,
338
+ 4 , 5 , 6 ,
339
+ 1 , 1 , 1 ,
340
+ 4 , 2 , 0
341
+ });
342
+
343
+ cldnn::topology topology{
344
+ input_layout (" input" , input_prim->get_layout ()),
345
+ data (" weights" , weights_prim),
346
+ fully_connected (" fc_prim" , input_info (" input" ), " weights" ),
347
+ reorder (" reorder_to_f16" , input_info (" fc_prim" ), { data_types::f16, format::bfyx, { 1 , 1 , 4 , 5 } }),
348
+ activation (" output" , input_info (" reorder_to_f16" ), activation_func::floor )
349
+ };
350
+
351
+ ExecutionConfig config = get_test_default_config (engine);
352
+ config.set_property (ov::intel_gpu::optimize_data (true ));
353
+ config.set_property (ov::intel_gpu::allow_new_shape_infer (true ));
354
+
355
+ cldnn::network network (engine, topology, config);
356
+
357
+ network.set_input_data (" input" , input_prim);
358
+
359
+ auto outputs = network.execute ();
360
+ ASSERT_EQ (outputs.size (), size_t (1 ));
361
+ ASSERT_EQ (outputs.begin ()->first , " output" );
362
+
363
+ auto output_memory = outputs.begin ()->second .get_memory ();
364
+ cldnn::mem_lock<ov::float16> output_ptr (output_memory, get_test_stream ());
365
+ auto output_layout = output_memory->get_layout ();
366
+
367
+ int y_size = output_layout.spatial (1 );
368
+ int x_size = output_layout.spatial (0 );
369
+ int f_size = output_layout.feature ();
370
+ int b_size = output_layout.batch ();
371
+
372
+ ASSERT_EQ (output_layout.format , format::bfyx);
373
+ ASSERT_EQ (y_size, 8 );
374
+ ASSERT_EQ (x_size, 4 );
375
+ ASSERT_EQ (f_size, 1 );
376
+ ASSERT_EQ (b_size, 1 );
377
+
378
+ VVF<float > output_ref = {
379
+ { 210 , 173 , 152 , 108 },
380
+ { 112 , 91 , 79 , 66 },
381
+ { 47 , 18 , 2 , 0 },
382
+ { 0 , 0 , 0 , 0 },
383
+ { 0 , 0 , 0 , 0 },
384
+ { 0 , 0 , 0 , 0 },
385
+ { 0 , 0 , 0 , 0 },
386
+ { 0 , 0 , 0 , 0 },
387
+ };
388
+
389
+ for (int y = 0 ; y < y_size; ++y) {
390
+ for (int x = 0 ; x < x_size; ++x) {
391
+ ASSERT_EQ (output_ref[y][x], output_ptr[y * x_size + x]);
392
+ }
393
+ }
394
+ }
395
+
319
396
TEST (fully_connected_gpu, xb_f32_batch_1) {
320
397
// Input : 3x1
321
398
// Output : 4x1
0 commit comments