Skip to content

Commit b2e573e

Browse files
author
dmitrygo
committed
[FORK][FIX] Restore choose_loop_order logic for weights decompression case
1 parent dc69ce5 commit b2e573e

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

src/cpu/x64/jit_brgemm_inner_product_utils.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818

1919
#include "cpu/x64/brgemm/brgemm.hpp"
2020

21+
#include <iostream>
22+
2123
namespace dnnl {
2224
namespace impl {
2325
namespace cpu {
@@ -1885,7 +1887,8 @@ void jit_brgemm_ip_fwd_conf_t::choose_loop_order() {
18851887
const bool is_int8 = one_of(src_dt, u8, s8) && wei_dt == s8;
18861888
const bool is_compute_amx = (is_xf16 || is_int8) && is_amx;
18871889

1888-
if ((os_block < 32 || do_occ_osc) && (is_compute_amx || is_f32_avx2))
1890+
// Better to keep ocb loop outermost for weights_decompression case due to overhead on weights unpack into intermediate buffer
1891+
if ((os_block < 32 || do_occ_osc) && (is_compute_amx || is_f32_avx2) && !weights_decompression)
18891892
loop_order = icc_occ_osc_ocb_osb;
18901893
}
18911894

0 commit comments

Comments
 (0)