Skip to content

Commit 8a97fbb

Browse files
committed
cpu: x64: jit_binary_kernel: pass src2_dt unconditionally
>cl --version Microsoft (R) C/C++ Optimizing Compiler Version 19.29.30158 for x64 This version of compiler for C++17 (C++11 is fine) will generate wrong code for non-ternary algorithms due to the branch in the constructor. It inverts objects that must be created and must not: io_=0000028F599311D8 io_.at(conf_.src0_type)=0000000000000000 io_.at(conf_.src1_type)=0000000000000000 io_.at(conf_.src2_type)=0000028F59929C00 io_.at(conf_.dst_type)=0000000000000000 The solution is to pass src2_type unconditionally, access to such io_injector must be regulated by checking for is_ternary().
1 parent 93b395f commit 8a97fbb

File tree

1 file changed

+13
-16
lines changed

1 file changed

+13
-16
lines changed

src/cpu/x64/jit_uni_binary_kernel.cpp

+13-16
Original file line numberDiff line numberDiff line change
@@ -93,22 +93,19 @@ jit_uni_binary_kernel_t<isa, Vmm>::jit_uni_binary_kernel_t(
9393
: binary_kernel_t(vreg_traits<Vmm>::vlen, pd, conf, jit_name(), tail_kernel)
9494
, offt_src0_(vlen_ / ((conf_.is_bf16 || conf_.is_f16) ? 2 : 1))
9595
, offt_src1_(conf_.use_stride_src1 ? offt_src0_ : 0)
96-
, offt_src2_(offt_src0_)
97-
, io_(this, isa,
98-
conf_.is_ternary_op
99-
? std::initializer_list<data_type_t> {conf_.src0_type,
100-
conf_.src1_type, conf_.src2_type, conf_.dst_type}
101-
: std::initializer_list<data_type_t> {conf_.src0_type,
102-
conf_.src1_type, conf_.dst_type},
103-
{false},
104-
io::io_tail_conf_t {simd_w_, tail_size_, tail_opmask_,
105-
vmm_tail_vmask_.getIdx(), reg_tmp_},
106-
io::io_emu_bf16_conf_t {vreg_bf16_emu_1_, vreg_bf16_emu_2_,
107-
vreg_bf16_emu_3_, reg_tmp_, vreg_bf16_emu_4_},
108-
create_saturation_vmm_map(),
109-
io::io_gather_conf_t {simd_w_, full_mask_,
110-
vmm_full_mask_.getIdx(), reg_tmp_, reg_tmp1_,
111-
vmm_tmp_gather_.getIdx()}) {
96+
, offt_src2_(offt_src0_) {
97+
typename io::jit_io_multi_dt_helper_t<Vmm>::data_types_t dts
98+
= {conf_.src0_type, conf_.src1_type, conf_.dst_type};
99+
if (conf.is_ternary_op) dts.emplace(conf_.src2_type);
100+
101+
io_ = io::jit_io_multi_dt_helper_t<Vmm>(this, isa, dts, {false},
102+
io::io_tail_conf_t {simd_w_, tail_size_, tail_opmask_,
103+
vmm_tail_vmask_.getIdx(), reg_tmp_},
104+
io::io_emu_bf16_conf_t {vreg_bf16_emu_1_, vreg_bf16_emu_2_,
105+
vreg_bf16_emu_3_, reg_tmp_, vreg_bf16_emu_4_},
106+
create_saturation_vmm_map(),
107+
io::io_gather_conf_t {simd_w_, full_mask_, vmm_full_mask_.getIdx(),
108+
reg_tmp_, reg_tmp1_, vmm_tmp_gather_.getIdx()});
112109
init();
113110
}
114111

0 commit comments

Comments
 (0)