Skip to content

Commit bc0e01d

Browse files
[GPU] Add i32 output for FC
1 parent cf9aa88 commit bc0e01d

File tree

4 files changed

+63
-1
lines changed

4 files changed

+63
-1
lines changed

src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ attach_fully_connected_impl::attach_fully_connected_impl() {
205205
typed_primitive_impl_ocl<fully_connected>::create<fully_connected_impl>, {
206206
std::make_tuple(data_types::f32, format::bfyx),
207207
std::make_tuple(data_types::f16, format::bfyx),
208+
std::make_tuple(data_types::i32, format::bfyx),
208209
std::make_tuple(data_types::u8, format::bfyx),
209210
std::make_tuple(data_types::i8, format::bfyx),
210211
});

src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bfyx_ref.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ ParamsKey FullyConnected_bfyx_Ref::GetSupportedKey() const {
1515
k.EnableInputDataType(Datatype::F32);
1616
k.EnableInputDataType(Datatype::INT8);
1717
k.EnableInputDataType(Datatype::UINT8);
18+
k.EnableOutputDataType(Datatype::INT32);
1819
k.EnableOutputDataType(Datatype::F16);
1920
k.EnableOutputDataType(Datatype::F32);
2021
k.EnableOutputDataType(Datatype::INT8);

src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/matmul.cpp

+6-1
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,11 @@ const std::vector<ov::element::Type> netPRCs {
157157
ov::element::f32,
158158
};
159159

160+
const std::vector<ov::element::Type> netPRCs_f32_i32 {
161+
ov::element::f32,
162+
ov::element::i32
163+
};
164+
160165

161166
/* ============= FullyConnected ============= */
162167

@@ -309,7 +314,7 @@ const std::vector<ShapeRelatedParams> IS3D_nightly = {
309314
};
310315

311316
const auto fullyConnectedParams3D_smoke = ::testing::Combine(::testing::ValuesIn(IS3D_smoke),
312-
::testing::Values(ov::element::f32),
317+
::testing::ValuesIn(netPRCs_f32_i32),
313318
::testing::Values(ov::element::undefined),
314319
::testing::Values(ov::element::undefined),
315320
::testing::Values(ov::test::utils::InputLayerType::CONSTANT),

src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp

+55
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,61 @@ TEST(fully_connected_gpu, no_biases_int8) {
316316
ASSERT_EQ(-52.0f, output_ptr[3]);
317317
}
318318

319+
TEST(fully_connected_gpu, no_biases_fc_i32) {
320+
const int32_t input_b = 1, input_f = 3, // size of the whole input buffer
321+
weight_b = 4, weight_f = 3; // size of the whole weights buffer
322+
323+
auto& engine = get_test_engine();
324+
325+
auto input_prim = engine.allocate_memory({ data_types::i32, format::bfyx, { input_b, input_f, 1, 1 } });
326+
auto weights_prim = engine.allocate_memory({ data_types::i32, format::bfyx, { weight_b, weight_f, 1, 1 } });
327+
328+
set_values<int32_t>(input_prim, {
329+
8, 2, -4
330+
});
331+
332+
set_values<int32_t>(weights_prim, {
333+
2, 1, 0,
334+
-3, -2, 1,
335+
0, -2, -4,
336+
-5, 10, 8
337+
});
338+
339+
cldnn::topology topology{
340+
input_layout("input", input_prim->get_layout()),
341+
data("weights", weights_prim),
342+
reorder("reorder_to_i32", input_info("input"), { data_types::i32, format::bfyx, { input_b, input_f, 1, 1 } }),
343+
fully_connected("fc_prim", input_info("reorder_to_i32"), "weights"),
344+
reorder("reorder_to_float", input_info("fc_prim"), { data_types::f32, format::bfyx, { input_b, weight_b, 1, 1 } })
345+
};
346+
347+
ExecutionConfig config = get_test_default_config(engine);
348+
config.set_property(ov::intel_gpu::optimize_data(true));
349+
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
350+
351+
cldnn::network network(engine, topology, config);
352+
353+
network.set_input_data("input", input_prim);
354+
355+
auto outputs = network.execute();
356+
ASSERT_EQ(outputs.size(), size_t(1));
357+
ASSERT_EQ(outputs.begin()->first, "reorder_to_float");
358+
359+
auto output_memory = outputs.begin()->second.get_memory();
360+
cldnn::mem_lock<float> output_ptr (output_memory, get_test_stream());
361+
auto output_layout = output_memory->get_layout();
362+
363+
ASSERT_EQ(output_layout.format, format::bfyx);
364+
365+
VVF<float> output_ref = {
366+
{ 18, -32, 12, -52 },
367+
};
368+
369+
for (int i = 0; i< weight_b; ++i) {
370+
ASSERT_EQ(output_ref[0][i], output_ptr[i]);
371+
}
372+
}
373+
319374
TEST(fully_connected_gpu, xb_f32_batch_1) {
320375
// Input : 3x1
321376
// Output : 4x1

0 commit comments

Comments
 (0)