From 9496130ee43c7dcc24f74d415f8b70f25b72c0da Mon Sep 17 00:00:00 2001 From: Dmitrii Zarukin Date: Fri, 28 Feb 2025 12:14:29 -0800 Subject: [PATCH] cpu: x64: jit_binary_kernel: pass src2_dt unconditionally >cl --version Microsoft (R) C/C++ Optimizing Compiler Version 19.29.30158 for x64 This version of compiler for C++17 (C++11 is fine) will generate wrong code for non-ternary algorithms due to the branch in the constructor. It inverts objects that must be created and must not: io_=0000028F599311D8 io_.at(conf_.src0_type)=0000000000000000 io_.at(conf_.src1_type)=0000000000000000 io_.at(conf_.src2_type)=0000028F59929C00 io_.at(conf_.dst_type)=0000000000000000 The solution is to pass src2_type unconditionally, access to such io_injector must be regulated by checking for is_ternary(). --- src/cpu/x64/jit_uni_binary_kernel.cpp | 29 ++++++++++++--------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/src/cpu/x64/jit_uni_binary_kernel.cpp b/src/cpu/x64/jit_uni_binary_kernel.cpp index 7497bcd5862..949e9247139 100644 --- a/src/cpu/x64/jit_uni_binary_kernel.cpp +++ b/src/cpu/x64/jit_uni_binary_kernel.cpp @@ -93,22 +93,19 @@ jit_uni_binary_kernel_t::jit_uni_binary_kernel_t( : binary_kernel_t(vreg_traits::vlen, pd, conf, jit_name(), tail_kernel) , offt_src0_(vlen_ / ((conf_.is_bf16 || conf_.is_f16) ? 2 : 1)) , offt_src1_(conf_.use_stride_src1 ? offt_src0_ : 0) - , offt_src2_(offt_src0_) - , io_(this, isa, - conf_.is_ternary_op - ? std::initializer_list {conf_.src0_type, - conf_.src1_type, conf_.src2_type, conf_.dst_type} - : std::initializer_list {conf_.src0_type, - conf_.src1_type, conf_.dst_type}, - {false}, - io::io_tail_conf_t {simd_w_, tail_size_, tail_opmask_, - vmm_tail_vmask_.getIdx(), reg_tmp_}, - io::io_emu_bf16_conf_t {vreg_bf16_emu_1_, vreg_bf16_emu_2_, - vreg_bf16_emu_3_, reg_tmp_, vreg_bf16_emu_4_}, - create_saturation_vmm_map(), - io::io_gather_conf_t {simd_w_, full_mask_, - vmm_full_mask_.getIdx(), reg_tmp_, reg_tmp1_, - vmm_tmp_gather_.getIdx()}) { + , offt_src2_(offt_src0_) { + typename io::jit_io_multi_dt_helper_t::data_types_t dts + = {conf_.src0_type, conf_.src1_type, conf_.dst_type}; + if (conf.is_ternary_op) dts.emplace(conf_.src2_type); + + io_ = io::jit_io_multi_dt_helper_t(this, isa, dts, {false}, + io::io_tail_conf_t {simd_w_, tail_size_, tail_opmask_, + vmm_tail_vmask_.getIdx(), reg_tmp_}, + io::io_emu_bf16_conf_t {vreg_bf16_emu_1_, vreg_bf16_emu_2_, + vreg_bf16_emu_3_, reg_tmp_, vreg_bf16_emu_4_}, + create_saturation_vmm_map(), + io::io_gather_conf_t {simd_w_, full_mask_, vmm_full_mask_.getIdx(), + reg_tmp_, reg_tmp1_, vmm_tmp_gather_.getIdx()}); init(); }