Skip to content

Commit 434cbcc

Browse files
[GPU] Add i32 output for FC
1 parent cf9aa88 commit 434cbcc

File tree

3 files changed

+84
-1
lines changed

3 files changed

+84
-1
lines changed

src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bfyx_ref.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ ParamsKey FullyConnected_bfyx_Ref::GetSupportedKey() const {
1515
k.EnableInputDataType(Datatype::F32);
1616
k.EnableInputDataType(Datatype::INT8);
1717
k.EnableInputDataType(Datatype::UINT8);
18+
k.EnableOutputDataType(Datatype::INT32);
1819
k.EnableOutputDataType(Datatype::F16);
1920
k.EnableOutputDataType(Datatype::F32);
2021
k.EnableOutputDataType(Datatype::INT8);

src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/matmul.cpp

+6-1
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,11 @@ const std::vector<ov::element::Type> netPRCs {
157157
ov::element::f32,
158158
};
159159

160+
const std::vector<ov::element::Type> netPRCs_f32_i32 {
161+
ov::element::f32,
162+
ov::element::i32
163+
};
164+
160165

161166
/* ============= FullyConnected ============= */
162167

@@ -309,7 +314,7 @@ const std::vector<ShapeRelatedParams> IS3D_nightly = {
309314
};
310315

311316
const auto fullyConnectedParams3D_smoke = ::testing::Combine(::testing::ValuesIn(IS3D_smoke),
312-
::testing::Values(ov::element::f32),
317+
::testing::ValuesIn(netPRCs_f32_i32),
313318
::testing::Values(ov::element::undefined),
314319
::testing::Values(ov::element::undefined),
315320
::testing::Values(ov::test::utils::InputLayerType::CONSTANT),

src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp

+77
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,83 @@ TEST(fully_connected_gpu, no_biases_int8) {
316316
ASSERT_EQ(-52.0f, output_ptr[3]);
317317
}
318318

319+
TEST(fully_connected_gpu, no_biases_fc_i32_reorder_f16) {
320+
auto& engine = get_test_engine();
321+
322+
auto input_prim = engine.allocate_memory({ data_types::i32, format::bfyx, { 1, 1, 3, 8 } });
323+
auto weights_prim = engine.allocate_memory({ data_types::i32, format::bfyx, { 1, 1, 3, 4 } });
324+
325+
set_values<int32_t>(input_prim, {
326+
1, 2, 3,
327+
5, 2, 2,
328+
4, 6, 3,
329+
3, 5, 1,
330+
1, 1, 1,
331+
9, -2, -1,
332+
-3, -2, -5,
333+
3, 2, 5
334+
});
335+
336+
set_values<int32_t>(weights_prim, {
337+
1, 2, 3,
338+
4, 5, 6,
339+
1, 1, 1,
340+
4, 2, 0
341+
});
342+
343+
cldnn::topology topology{
344+
input_layout("input", input_prim->get_layout()),
345+
data("weights", weights_prim),
346+
fully_connected("fc_prim", input_info("input"), "weights"),
347+
reorder("reorder_to_f16", input_info("fc_prim"), { data_types::f16, format::bfyx, { 1, 1, 4, 5 } }),
348+
activation("output", input_info("reorder_to_f16"), activation_func::floor)
349+
};
350+
351+
ExecutionConfig config = get_test_default_config(engine);
352+
config.set_property(ov::intel_gpu::optimize_data(true));
353+
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
354+
355+
cldnn::network network(engine, topology, config);
356+
357+
network.set_input_data("input", input_prim);
358+
359+
auto outputs = network.execute();
360+
ASSERT_EQ(outputs.size(), size_t(1));
361+
ASSERT_EQ(outputs.begin()->first, "output");
362+
363+
auto output_memory = outputs.begin()->second.get_memory();
364+
cldnn::mem_lock<ov::float16> output_ptr (output_memory, get_test_stream());
365+
auto output_layout = output_memory->get_layout();
366+
367+
int y_size = output_layout.spatial(1);
368+
int x_size = output_layout.spatial(0);
369+
int f_size = output_layout.feature();
370+
int b_size = output_layout.batch();
371+
372+
ASSERT_EQ(output_layout.format, format::bfyx);
373+
ASSERT_EQ(y_size, 8);
374+
ASSERT_EQ(x_size, 4);
375+
ASSERT_EQ(f_size, 1);
376+
ASSERT_EQ(b_size, 1);
377+
378+
VVF<float> output_ref = {
379+
{ 210, 173, 152, 108 },
380+
{ 112, 91, 79, 66 },
381+
{ 47, 18, 2, 0 },
382+
{ 0, 0, 0, 0 },
383+
{ 0, 0, 0, 0 },
384+
{ 0, 0, 0, 0 },
385+
{ 0, 0, 0, 0 },
386+
{ 0, 0, 0, 0 },
387+
};
388+
389+
for (int y = 0; y < y_size; ++y) {
390+
for (int x = 0; x < x_size; ++x) {
391+
ASSERT_EQ(output_ref[y][x], output_ptr[y * x_size + x]);
392+
}
393+
}
394+
}
395+
319396
TEST(fully_connected_gpu, xb_f32_batch_1) {
320397
// Input : 3x1
321398
// Output : 4x1

0 commit comments

Comments
 (0)