Skip to content

Commit 7054e90

Browse files
authored
[GPU] Fix SD1.5 hybrid accuracy issue. (#29188)
Compressed onednn FC with eltwise 4D and quantize u8 caused accuracy problem. Current FC always uses 2Dim output tensor and if FC has 4D eltwise in post-op, eltwise should be 2D in case onednn ocl:gemm_with_po. ### Tickets: - *157491* Signed-off-by: hyunback <hyunback.kim@intel.com>
1 parent eebe1c2 commit 7054e90

File tree

3 files changed

+57
-15
lines changed

3 files changed

+57
-15
lines changed

src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp

+2-12
Original file line numberDiff line numberDiff line change
@@ -1066,15 +1066,6 @@ void reorder_inputs::run(program& p, reorder_factory& rf) {
10661066
if (fc_layout.is_dynamic() || data_layout.is_dynamic())
10671067
continue;
10681068

1069-
auto same_spatial = [](layout a, layout b) {
1070-
if (a.get_spatial_rank() != b.get_spatial_rank())
1071-
return false;
1072-
for (size_t i = 0; i < a.get_spatial_rank(); i++) {
1073-
if (a.spatial(i) != b.spatial(i))
1074-
return false;
1075-
}
1076-
return true;
1077-
};
10781069
// fc_b | fc_f | data_b | data_f | broadcast condition
10791070
// ---------+-----------+-----------+-----------+--------------------
10801071
// 1 | 1 | 1 | 1 | no broadcast
@@ -1090,12 +1081,11 @@ void reorder_inputs::run(program& p, reorder_factory& rf) {
10901081
// N | 1 | N | 1 | no broadcast
10911082
// N | 1 | N | N | N/A
10921083
// N | N | 1 | 1 | implicit broadcast
1093-
// N | N | 1 | N | explicit broadcast when spatial different
1094-
// N | N | N | 1 | explicit broadcast when spatial different
1084+
// N | N | 1 | N | explicit broadcast
1085+
// N | N | N | 1 | explicit broadcast
10951086
// N | N | N | N | no broadcast
10961087
if ((fc_layout.batch() == 1 || fc_layout.feature() == 1) ||
10971088
(data_layout.batch() == 1 && data_layout.feature() == 1) ||
1098-
((data_layout.batch() == 1 || data_layout.feature() == 1) && same_spatial(fc_layout, data_layout)) ||
10991089
(fc_layout.count() == data_layout.count())) {
11001090
continue;
11011091
}

src/plugins/intel_gpu/src/graph/program_node.cpp

+7-3
Original file line numberDiff line numberDiff line change
@@ -1579,15 +1579,19 @@ void program_node::create_onednn_primitive_attributes(
15791579
if (is_type<fully_connected>()) {
15801580
auto prim = this->as<fully_connected>().get_primitive();
15811581
if (prim->input_size == in_pshape.size()) {
1582-
if (prim->input_size == 3 && !fc_needs_full_tensor()) {
1582+
if (prim->input_size >= 3 && !fc_needs_full_tensor()) {
15831583
cldnn::onednn::combine_bf_with_first_spatial_dim(in);
15841584
in_pshape = in.get_partial_shape();
15851585
}
15861586
ones_to_add = std::max(out_pshape.size(), static_cast<size_t>(rank)) - in_pshape.size();
15871587
} else {
1588-
if (prim->input_size == 3)
1588+
if (prim->input_size >= 3) {
15891589
cldnn::onednn::combine_bf_with_first_spatial_dim(in);
1590-
ones_to_add = std::max(in_pshape.size(), prim->input_size) - std::min(in_pshape.size(), prim->input_size);
1590+
in_pshape = in.get_partial_shape();
1591+
ones_to_add = std::max(out_pshape.size(), static_cast<size_t>(rank)) - in_pshape.size();
1592+
} else {
1593+
ones_to_add = 2;
1594+
}
15911595
}
15921596
if (ones_to_add > 0) {
15931597
layout new_layout = in;

src/plugins/intel_gpu/tests/unit/fusions/fully_connected_fusion_test.cpp

+48
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,9 @@ class FullyConnectedFusingTestOneDNN : public BaseFusingTest<fully_connected_tes
201201
#define CASE_FC_FP16_INT4_COMP_1 { 1, 128 }, { 1, 128 }, { 128, 128 }, data_types::f16, format::bfyx, data_types::u4, format::oiyx, data_types::f16, format::bfyx
202202
#define CASE_FC_FP16_INT4_COMP_2 { 2, 128 }, { 2, 128 }, { 128, 128 }, data_types::f16, format::bfyx, data_types::u4, format::oiyx, data_types::f16, format::bfyx
203203

204+
#define CASE_FC_FP16_INT8_COMP_1 { 1, 128 }, { 1, 128 }, { 128, 128 }, data_types::f16, format::bfyx, data_types::u8, format::oiyx, data_types::f16, format::bfyx
205+
#define CASE_FC_FP16_3D_INT8_COMP_1 { 2, 32, 4 }, { 2, 32, 16 }, { 16, 4, 1 }, data_types::f16, format::bfyx, data_types::u8, format::oiyx, data_types::f16, format::bfyx
206+
204207
#define CASE_FC_FP16_INT4_SWIGLU_1 { 1, 64 }, { 1, 64 }, { 64, 64 }, data_types::f16, format::bfyx, data_types::u4, format::oiyx, data_types::f16, format::bfyx
205208
#define CASE_FC_FP16_INT4_SWIGLU_2 { 1, 64}, { 1, 128 }, { 128, 64 }, data_types::f16, format::bfyx, data_types::u4, format::oiyx, data_types::f16, format::bfyx
206209
#define CASE_FC_FP16_INT4_SWIGLU_3 { 1, 312 }, { 1, 128 }, { 128, 312 }, data_types::f16, format::bfyx, data_types::u4, format::oiyx, data_types::f16, format::bfyx
@@ -643,6 +646,51 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_fp16_eltwise_prod_unfused_dynamic, ::te
643646
fully_connected_test_params{ CASE_FC_FP16_4, 2, 3 }
644647
}));
645648

649+
class fc_compressed_int8_bias_eltwise_quantize_u8_onednn : public FullyConnectedFusingTestOneDNN {};
650+
TEST_P(fc_compressed_int8_bias_eltwise_quantize_u8_onednn, basic) {
651+
auto p = GetParam();
652+
auto test_input_layout = get_input_layout(p);
653+
654+
auto supports_immad = engine.get_device_info().supports_immad;
655+
auto dcomp_zp_name = supports_immad ? "dcomp_zp" : "";
656+
657+
auto fc_prim = fully_connected("fc_prim", input_info("input"), "weights", "", "scale", dcomp_zp_name, data_types::f16, get_output_dim_size(p), get_input_weights_rank(p));
658+
fc_prim.decompression_zero_point_scalar = 8.0f;
659+
660+
// onednn FC supports scalar ZP for int4 compressed weight.
661+
auto dcomp_zp_layout = layout{ {1, 1, 1, 1}, data_types::u8, format::bfyx };
662+
663+
create_topologies(
664+
input_layout("input", get_input_layout(p)),
665+
data("weights", get_mem(get_weights_layout(p))),
666+
data("scale", get_mem(get_scale_layout(p, 128))),
667+
data("bias", get_mem(get_bias_layout(p))),
668+
data("dcomp_zp", get_mem(dcomp_zp_layout, 8.0f)),
669+
data("eltwise_data", get_mem(get_per_channel_layout(p), 1, 9)),
670+
data("in_lo", get_mem(get_per_channel_layout(p), -2, -2)),
671+
data("in_hi", get_mem(get_per_channel_layout(p), 2, 2)),
672+
data("out_lo", get_mem(get_single_element_layout(p), 0)),
673+
data("out_hi", get_mem(get_single_element_layout(p), 255)),
674+
fc_prim,
675+
eltwise("bias_add", { input_info("fc_prim"), input_info("bias") }, eltwise_mode::sum),
676+
eltwise("eltwise", { input_info("bias_add"), input_info("eltwise_data") }, eltwise_mode::sum),
677+
quantize("quantize", input_info("eltwise"), input_info("in_lo"), input_info("in_hi"),
678+
input_info("out_lo"), input_info("out_hi"), 256, data_types::u8),
679+
reorder("reorder_bfyx", input_info("quantize"), p.default_format, data_types::f32)
680+
);
681+
682+
bool is_dynamic = false;
683+
cfg_not_fused.set_property(ov::intel_gpu::allow_new_shape_infer(is_dynamic));
684+
cfg_not_fused.set_property(ov::hint::dynamic_quantization_group_size(0));
685+
tolerance = 1.0f;
686+
execute(p, false, is_dynamic);
687+
}
688+
689+
INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_compressed_int8_bias_eltwise_quantize_u8_onednn, ::testing::ValuesIn(std::vector<fully_connected_test_params>{
690+
fully_connected_test_params{ CASE_FC_FP16_INT8_COMP_1, 2, 5 },
691+
fully_connected_test_params{ CASE_FC_FP16_3D_INT8_COMP_1, 2, 5 },
692+
}));
693+
646694
class fc_compressed_int8_bias_dynamic_onednn : public FullyConnectedFusingTestOneDNN {};
647695
TEST_P(fc_compressed_int8_bias_dynamic_onednn, basic) {
648696
auto p = GetParam();

0 commit comments

Comments
 (0)