@@ -201,6 +201,9 @@ class FullyConnectedFusingTestOneDNN : public BaseFusingTest<fully_connected_tes
201
201
#define CASE_FC_FP16_INT4_COMP_1 { 1 , 128 }, { 1 , 128 }, { 128 , 128 }, data_types::f16, format::bfyx, data_types::u4, format::oiyx, data_types::f16, format::bfyx
202
202
#define CASE_FC_FP16_INT4_COMP_2 { 2 , 128 }, { 2 , 128 }, { 128 , 128 }, data_types::f16, format::bfyx, data_types::u4, format::oiyx, data_types::f16, format::bfyx
203
203
204
+ #define CASE_FC_FP16_INT8_COMP_1 { 1 , 128 }, { 1 , 128 }, { 128 , 128 }, data_types::f16, format::bfyx, data_types::u8, format::oiyx, data_types::f16, format::bfyx
205
+ #define CASE_FC_FP16_3D_INT8_COMP_1 { 2 , 32 , 4 }, { 2 , 32 , 16 }, { 16 , 4 , 1 }, data_types::f16, format::bfyx, data_types::u8, format::oiyx, data_types::f16, format::bfyx
206
+
204
207
#define CASE_FC_FP16_INT4_SWIGLU_1 { 1 , 64 }, { 1 , 64 }, { 64 , 64 }, data_types::f16, format::bfyx, data_types::u4, format::oiyx, data_types::f16, format::bfyx
205
208
#define CASE_FC_FP16_INT4_SWIGLU_2 { 1 , 64 }, { 1 , 128 }, { 128 , 64 }, data_types::f16, format::bfyx, data_types::u4, format::oiyx, data_types::f16, format::bfyx
206
209
#define CASE_FC_FP16_INT4_SWIGLU_3 { 1 , 312 }, { 1 , 128 }, { 128 , 312 }, data_types::f16, format::bfyx, data_types::u4, format::oiyx, data_types::f16, format::bfyx
@@ -643,6 +646,51 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_fp16_eltwise_prod_unfused_dynamic, ::te
643
646
fully_connected_test_params{ CASE_FC_FP16_4, 2 , 3 }
644
647
}));
645
648
649
+ class fc_compressed_int8_bias_eltwise_quantize_u8_onednn : public FullyConnectedFusingTestOneDNN {};
650
+ TEST_P (fc_compressed_int8_bias_eltwise_quantize_u8_onednn, basic) {
651
+ auto p = GetParam ();
652
+ auto test_input_layout = get_input_layout (p);
653
+
654
+ auto supports_immad = engine.get_device_info ().supports_immad ;
655
+ auto dcomp_zp_name = supports_immad ? " dcomp_zp" : " " ;
656
+
657
+ auto fc_prim = fully_connected (" fc_prim" , input_info (" input" ), " weights" , " " , " scale" , dcomp_zp_name, data_types::f16, get_output_dim_size (p), get_input_weights_rank (p));
658
+ fc_prim.decompression_zero_point_scalar = 8 .0f ;
659
+
660
+ // onednn FC supports scalar ZP for int4 compressed weight.
661
+ auto dcomp_zp_layout = layout{ {1 , 1 , 1 , 1 }, data_types::u8, format::bfyx };
662
+
663
+ create_topologies (
664
+ input_layout (" input" , get_input_layout (p)),
665
+ data (" weights" , get_mem (get_weights_layout (p))),
666
+ data (" scale" , get_mem (get_scale_layout (p, 128 ))),
667
+ data (" bias" , get_mem (get_bias_layout (p))),
668
+ data (" dcomp_zp" , get_mem (dcomp_zp_layout, 8 .0f )),
669
+ data (" eltwise_data" , get_mem (get_per_channel_layout (p), 1 , 9 )),
670
+ data (" in_lo" , get_mem (get_per_channel_layout (p), -2 , -2 )),
671
+ data (" in_hi" , get_mem (get_per_channel_layout (p), 2 , 2 )),
672
+ data (" out_lo" , get_mem (get_single_element_layout (p), 0 )),
673
+ data (" out_hi" , get_mem (get_single_element_layout (p), 255 )),
674
+ fc_prim,
675
+ eltwise (" bias_add" , { input_info (" fc_prim" ), input_info (" bias" ) }, eltwise_mode::sum),
676
+ eltwise (" eltwise" , { input_info (" bias_add" ), input_info (" eltwise_data" ) }, eltwise_mode::sum),
677
+ quantize (" quantize" , input_info (" eltwise" ), input_info (" in_lo" ), input_info (" in_hi" ),
678
+ input_info (" out_lo" ), input_info (" out_hi" ), 256 , data_types::u8),
679
+ reorder (" reorder_bfyx" , input_info (" quantize" ), p.default_format , data_types::f32)
680
+ );
681
+
682
+ bool is_dynamic = false ;
683
+ cfg_not_fused.set_property (ov::intel_gpu::allow_new_shape_infer (is_dynamic));
684
+ cfg_not_fused.set_property (ov::hint::dynamic_quantization_group_size (0 ));
685
+ tolerance = 1 .0f ;
686
+ execute (p, false , is_dynamic);
687
+ }
688
+
689
+ INSTANTIATE_TEST_SUITE_P (fusings_gpu, fc_compressed_int8_bias_eltwise_quantize_u8_onednn, ::testing::ValuesIn(std::vector<fully_connected_test_params>{
690
+ fully_connected_test_params{ CASE_FC_FP16_INT8_COMP_1, 2 , 5 },
691
+ fully_connected_test_params{ CASE_FC_FP16_3D_INT8_COMP_1, 2 , 5 },
692
+ }));
693
+
646
694
class fc_compressed_int8_bias_dynamic_onednn : public FullyConnectedFusingTestOneDNN {};
647
695
TEST_P (fc_compressed_int8_bias_dynamic_onednn, basic) {
648
696
auto p = GetParam ();
0 commit comments