Skip to content

Commit 493921c

Browse files
rzhangTaoLv
rzhang
authored andcommitted
graph: backend: dnnl: support lnorm + q with zps!=0
1 parent a1c76b5 commit 493921c

File tree

4 files changed

+5
-3
lines changed

4 files changed

+5
-3
lines changed

src/graph/backend/dnnl/kernels/layernorm.hpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,9 @@ struct layernorm_fwd_t : public kernel_base_t {
8989

9090
BACKEND_DNNL_ADD_PASS(pipeline, lower_down);
9191
BACKEND_DNNL_ADD_PASS(pipeline, fuse_post_typecast_to_predecessor);
92-
BACKEND_DNNL_ADD_PASS(pipeline, fuse_post_ops);
9392
BACKEND_DNNL_ADD_PASS(pipeline, remove_quant_data_with_no_effect);
93+
BACKEND_DNNL_ADD_PASS(pipeline, replace_quant_data_with_binary_post_op);
94+
BACKEND_DNNL_ADD_PASS(pipeline, fuse_post_ops);
9495
BACKEND_DNNL_ADD_PASS(pipeline, convert_to_runtime_dst_scales);
9596
BACKEND_DNNL_ADD_PASS(pipeline, fuse_dst_scales);
9697
BACKEND_DNNL_ADD_PASS(pipeline, infer_shape);

src/graph/backend/dnnl/patterns/layernorm_fusion.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ DNNL_BACKEND_REGISTER_PATTERN_MATCHER_PASS(dnnl, layernorm_post_ops_fusion_cpu)
7979
auto q_graph = std::make_shared<pb_graph_t>();
8080
pm::pb_op_t *pquantize
8181
= q_graph->append_op(graph::op_kind::Quantize);
82-
pquantize->append_decision_function(check_zps_values<0>);
8382
q_graph->create_input_port(0, pquantize, 0);
8483
q_graph->create_output_port(0, pquantize, 0);
8584
pgraph->append_optional(

tests/benchdnn/inputs/graph/pattern/harness_int8_all

+2
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@
114114
--reset --in-shapes=0:1x128x150x150*acdb+1:1x128x150x150*acdb+2:1x128x150x150*acdb --op-attrs=3:axis:0 --case=pattern/int8/int8_concat_fusion_3.json
115115
#layernorm
116116
--reset --case=pattern/int8/int8_lnorm_gelu_quantize.json
117+
# layernorm with zp != 0
118+
--reset --op-attrs=2:zps:1 --case=pattern/int8/int8_lnorm_gelu_quantize.json
117119
--reset --case=pattern/int8/int8_lnorm_multiply_quantize.json
118120
--reset --case=pattern/int8/int8_lnorm_tc_multiply_quantize.json
119121
#softmax

tests/gtests/graph/unit/backend/dnnl/test_pass.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -11836,7 +11836,7 @@ TEST(test_pass_pass_system, FuseLayernormTypecastQuantize_CPU) {
1183611836
ASSERT_EQ(agraph.get_partitions()[0]->get_outputs()[0].id, 5U);
1183711837
}
1183811838

11839-
TEST(test_pass_pass_system, NotFuseLayernormTypecast) {
11839+
TEST(test_pass_pass_system, NotFuseLayernormTypecast_GPU) {
1184011840
/*
1184111841
| (bf16)
1184211842
layernorm

0 commit comments

Comments
 (0)