[GPU] Add i32 output for FC

kelvinchoi-intel · kelvinchoi-intel · commit 434cbcc37938 · 2024-05-08T20:44:06.000+09:00
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bfyx_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bfyx_ref.cpp
@@ -15,6 +15,7 @@ ParamsKey FullyConnected_bfyx_Ref::GetSupportedKey() const {
     k.EnableInputDataType(Datatype::F32);
     k.EnableInputDataType(Datatype::INT8);
     k.EnableInputDataType(Datatype::UINT8);
+    k.EnableOutputDataType(Datatype::INT32);
     k.EnableOutputDataType(Datatype::F16);
     k.EnableOutputDataType(Datatype::F32);
     k.EnableOutputDataType(Datatype::INT8);
diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/matmul.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/matmul.cpp
@@ -157,6 +157,11 @@ const std::vector<ov::element::Type> netPRCs {
     ov::element::f32,
 };
 
+const std::vector<ov::element::Type> netPRCs_f32_i32 {
+    ov::element::f32,
+    ov::element::i32
+};
+
 
 /* ============= FullyConnected ============= */
 
@@ -309,7 +314,7 @@ const std::vector<ShapeRelatedParams> IS3D_nightly = {
 };
 
 const auto fullyConnectedParams3D_smoke = ::testing::Combine(::testing::ValuesIn(IS3D_smoke),
-                                                       ::testing::Values(ov::element::f32),
+                                                       ::testing::ValuesIn(netPRCs_f32_i32),
                                                        ::testing::Values(ov::element::undefined),
                                                        ::testing::Values(ov::element::undefined),
                                                        ::testing::Values(ov::test::utils::InputLayerType::CONSTANT),
diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp
@@ -316,6 +316,83 @@ TEST(fully_connected_gpu, no_biases_int8) {
     ASSERT_EQ(-52.0f, output_ptr[3]);
 }
 
+TEST(fully_connected_gpu, no_biases_fc_i32_reorder_f16) {
+    auto& engine = get_test_engine();
+
+    auto input_prim = engine.allocate_memory({ data_types::i32, format::bfyx, { 1, 1, 3, 8 } });
+    auto weights_prim = engine.allocate_memory({ data_types::i32, format::bfyx, { 1, 1, 3, 4 } });
+
+    set_values<int32_t>(input_prim, {
+        1, 2, 3,
+        5, 2, 2,
+        4, 6, 3,
+        3, 5, 1,
+        1, 1, 1,
+        9, -2, -1,
+        -3, -2, -5,
+        3, 2, 5
+    });
+
+    set_values<int32_t>(weights_prim, {
+        1, 2, 3,
+        4, 5, 6,
+        1, 1, 1,
+        4, 2, 0
+    });
+
+    cldnn::topology topology{
+        input_layout("input", input_prim->get_layout()),
+        data("weights", weights_prim),
+        fully_connected("fc_prim", input_info("input"), "weights"),
+        reorder("reorder_to_f16", input_info("fc_prim"), { data_types::f16, format::bfyx, { 1, 1, 4, 5 } }),
+        activation("output", input_info("reorder_to_f16"), activation_func::floor)
+    };
+
+    ExecutionConfig config = get_test_default_config(engine);
+    config.set_property(ov::intel_gpu::optimize_data(true));
+    config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
+
+    cldnn::network network(engine, topology, config);
+
+    network.set_input_data("input", input_prim);
+
+    auto outputs = network.execute();
+    ASSERT_EQ(outputs.size(), size_t(1));
+    ASSERT_EQ(outputs.begin()->first, "output");
+
+    auto output_memory = outputs.begin()->second.get_memory();
+    cldnn::mem_lock<ov::float16> output_ptr (output_memory, get_test_stream());
+    auto output_layout = output_memory->get_layout();
+
+    int y_size = output_layout.spatial(1);
+    int x_size = output_layout.spatial(0);
+    int f_size = output_layout.feature();
+    int b_size = output_layout.batch();
+
+    ASSERT_EQ(output_layout.format, format::bfyx);
+    ASSERT_EQ(y_size, 8);
+    ASSERT_EQ(x_size, 4);
+    ASSERT_EQ(f_size, 1);
+    ASSERT_EQ(b_size, 1);
+
+    VVF<float> output_ref = {
+        { 210, 173, 152, 108 },
+        { 112, 91, 79, 66 },
+        { 47, 18, 2, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 0 }, 
+        { 0, 0, 0, 0 },
+    };
+
+    for (int y = 0; y < y_size; ++y) {
+        for (int x = 0; x < x_size; ++x) {
+            ASSERT_EQ(output_ref[y][x], output_ptr[y * x_size + x]);
+        }
+    }
+}
+
 TEST(fully_connected_gpu, xb_f32_batch_1) {
     //  Input  : 3x1
     //  Output : 4x1