diff --git a/src/cpu/cpu_primitive.hpp b/src/cpu/cpu_primitive.hpp index d82b9ae0d92..baea88d774a 100644 --- a/src/cpu/cpu_primitive.hpp +++ b/src/cpu/cpu_primitive.hpp @@ -29,6 +29,9 @@ #include "cpu/ref_io_helper.hpp" +//NOLINTBEGIN(bugprone-macro-parentheses) +// These macros are actual pieces of code, can't put certain pieces into `()`. +// TODO: consider making them functions. #define DEFINE_ARG_SCALES_BUFFER_ATTR(attr, scales, arg) \ alignas(16) float CONCAT2(scales, _buf16)[16] = {0}; \ const float *scales {nullptr}; \ @@ -37,10 +40,11 @@ utils::array_set(CONCAT2(scales, _buf16), 1.0f, 16); \ scales = CONCAT2(scales, _buf16); \ } else { \ - scales = CTX_IN_MEM(const float *, DNNL_ARG_ATTR_SCALES | arg); \ + scales = CTX_IN_MEM(const float *, DNNL_ARG_ATTR_SCALES | (arg)); \ VCHECK_ATTR(scales != nullptr, \ - "Scales buffer for arg %d is missing", arg); \ - const auto scales_d = ctx.memory_mdw(DNNL_ARG_ATTR_SCALES | arg); \ + "Scales buffer for arg %d is missing", (arg)); \ + const auto scales_d \ + = ctx.memory_mdw(DNNL_ARG_ATTR_SCALES | (arg)); \ VCHECK_ATTR( \ utils::one_of(scales_d.data_type(), data_type::f32, \ data_type::f16, data_type::bf16, data_type::e8m0), \ @@ -48,7 +52,7 @@ if (scales_d.nelems() == 1) { \ const float s = cpu::io::load_float_value( \ scales_d.data_type(), scales, 0); \ - if (utils::one_of(arg, DNNL_ARG_DST, \ + if (utils::one_of((arg), DNNL_ARG_DST, \ DNNL_ARG_ATTR_POST_OP_DW | DNNL_ARG_DST)) { \ utils::array_set(CONCAT2(scales, _buf16), 1.f / s, 16); \ } else { \ @@ -61,7 +65,7 @@ MAYBE_UNUSED(scales); #define DEFINE_ARG_SCALES_BUFFER(scales, arg) \ - DEFINE_ARG_SCALES_BUFFER_ATTR(pd()->attr(), scales, arg) + DEFINE_ARG_SCALES_BUFFER_ATTR(pd()->attr(), scales, (arg)) #define DEFINE_ZERO_POINTS_BUFFER_ATTR(attr, zero_points_ptr, arg) \ int32_t CONCAT2(default_zero_point_, arg) = 0; \ @@ -74,11 +78,11 @@ * Accessing `zero_points_ptr` by index will lead to a crash for * datatypes different from s32. */ \ zero_points_ptr = CTX_IN_MEM( \ - const int32_t *, DNNL_ARG_ATTR_ZERO_POINTS | arg); \ + const int32_t *, DNNL_ARG_ATTR_ZERO_POINTS | (arg)); \ VCHECK_ATTR(zero_points_ptr != nullptr, \ - "Zero points buffer for arg %d is missing", arg); \ + "Zero points buffer for arg %d is missing", (arg)); \ const auto zero_points_d \ - = ctx.memory_mdw(DNNL_ARG_ATTR_ZERO_POINTS | arg); \ + = ctx.memory_mdw(DNNL_ARG_ATTR_ZERO_POINTS | (arg)); \ VCHECK_ATTR(utils::one_of(zero_points_d.data_type(), \ data_type::s32, data_type::s8, data_type::u8, \ data_type::s4, data_type::u4), \ @@ -132,4 +136,6 @@ #define DEFINE_ZERO_POINT_VALUE(zero_point, mem_arg) \ DEFINE_ZERO_POINT_VALUE_ATTR(pd()->attr(), zero_point, mem_arg) +//NOLINTEND(bugprone-macro-parentheses) + #endif // CPU_CPU_PRIMITIVE_HPP diff --git a/src/cpu/cpu_stream.hpp b/src/cpu/cpu_stream.hpp index 30d5a6e058b..7bf2cac3a44 100644 --- a/src/cpu/cpu_stream.hpp +++ b/src/cpu/cpu_stream.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,7 +34,7 @@ namespace cpu { struct cpu_stream_t : public stream_t { cpu_stream_t(engine_t *engine, impl::stream_impl_t *stream_impl) : stream_t(engine, stream_impl) {} - virtual ~cpu_stream_t() = default; + ~cpu_stream_t() override = default; dnnl::impl::status_t wait() override { // CPU execution is synchronous so return immediately diff --git a/src/cpu/platform.cpp b/src/cpu/platform.cpp index 8dbbeffa56b..51abf9b4e90 100644 --- a/src/cpu/platform.cpp +++ b/src/cpu/platform.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * Copyright 2020-2024 FUJITSU LIMITED * Copyright 2022-2024 Arm Ltd. and affiliates * @@ -258,9 +258,9 @@ unsigned get_max_threads_to_use() { int get_vector_register_size() { #if DNNL_X64 using namespace x64; - if (mayiuse(avx512_core)) return cpu_isa_traits::vlen; - if (mayiuse(avx)) return cpu_isa_traits::vlen; - if (mayiuse(sse41)) return cpu_isa_traits::vlen; + if (mayiuse(avx512_core)) return cpu_isa_traits_t::vlen; + if (mayiuse(avx)) return cpu_isa_traits_t::vlen; + if (mayiuse(sse41)) return cpu_isa_traits_t::vlen; #elif DNNL_AARCH64 using namespace aarch64; if (mayiuse(asimd)) return cpu_isa_traits::vlen; diff --git a/src/cpu/ref_batch_normalization.cpp b/src/cpu/ref_batch_normalization.cpp index 0e4a23e8d7e..6ab3c20742f 100644 --- a/src/cpu/ref_batch_normalization.cpp +++ b/src/cpu/ref_batch_normalization.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2016-2024 Intel Corporation +* Copyright 2016-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -158,8 +158,8 @@ status_t ref_batch_normalization_fwd_t::execute_forward( } } if (d_type == s8) - dst[d_off] - = q10n::qz_a1b0()(maybe_post_op(bn_res)); + dst[d_off] = q10n::qz_a1b0_t()( + maybe_post_op(bn_res)); else dst[d_off] = maybe_post_op(bn_res); } diff --git a/src/cpu/ref_concat.hpp b/src/cpu/ref_concat.hpp index 090fbb6863d..6b87295dfcf 100644 --- a/src/cpu/ref_concat.hpp +++ b/src/cpu/ref_concat.hpp @@ -109,7 +109,7 @@ struct ref_concat_t : public primitive_t { return status::success; } - ~ref_concat_t() = default; + ~ref_concat_t() override = default; status_t execute(const exec_ctx_t &ctx) const override { using namespace memory_tracking::names; diff --git a/src/cpu/ref_deconvolution.hpp b/src/cpu/ref_deconvolution.hpp index a41c871bc6e..63cc0c37fcc 100644 --- a/src/cpu/ref_deconvolution.hpp +++ b/src/cpu/ref_deconvolution.hpp @@ -102,8 +102,6 @@ struct ref_deconvolution_fwd_t : public primitive_t { , dst_tag_(other.dst_tag_) , name_(other.name_) {} - ~pd_t() = default; - DECLARE_COMMON_PD_T(name_.c_str(), ref_deconvolution_fwd_t); status_t init_convolution(engine_t *engine) { @@ -335,8 +333,6 @@ struct ref_deconvolution_bwd_data_t : public primitive_t { , conv_pd_(other.conv_pd_->clone()) , name_(other.name_) {} - ~pd_t() = default; - DECLARE_COMMON_PD_T(name_.c_str(), ref_deconvolution_bwd_data_t); status_t init_convolution(engine_t *engine) { @@ -446,8 +442,6 @@ struct ref_deconvolution_bwd_weights_t : public primitive_t { , dst_tag_(other.dst_tag_) , name_(other.name_) {} - ~pd_t() = default; - DECLARE_COMMON_PD_T(name_.c_str(), ref_deconvolution_bwd_weights_t); status_t init_convolution(engine_t *engine) { diff --git a/src/cpu/ref_resampling.hpp b/src/cpu/ref_resampling.hpp index bb0c4e63465..cc6941ca58e 100644 --- a/src/cpu/ref_resampling.hpp +++ b/src/cpu/ref_resampling.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -67,7 +67,8 @@ struct ref_resampling_fwd_t : public primitive_t { }; ref_resampling_fwd_t(const pd_t *apd); - ~ref_resampling_fwd_t(); + + ~ref_resampling_fwd_t() override; status_t init(engine_t *engine) override { ref_post_ops_ @@ -114,7 +115,8 @@ struct ref_resampling_bwd_t : public primitive_t { }; ref_resampling_bwd_t(const pd_t *apd); - ~ref_resampling_bwd_t(); + + ~ref_resampling_bwd_t() override; status_t execute(const exec_ctx_t &ctx) const override { execute_backward(ctx); diff --git a/src/cpu/ref_shuffle.hpp b/src/cpu/ref_shuffle.hpp index 5d2adf13407..168c7cd6170 100644 --- a/src/cpu/ref_shuffle.hpp +++ b/src/cpu/ref_shuffle.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2018-2024 Intel Corporation +* Copyright 2018-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -92,7 +92,7 @@ struct ref_shuffle_t : public primitive_t { return dnnl_success; } - ~ref_shuffle_t() { free(rev_transposed_); } + ~ref_shuffle_t() override { free(rev_transposed_); } status_t execute(const exec_ctx_t &ctx) const override { const memory_desc_wrapper src_d( diff --git a/src/cpu/reorder/simple_reorder.hpp b/src/cpu/reorder/simple_reorder.hpp index fc89168ba2a..65aaa195413 100644 --- a/src/cpu/reorder/simple_reorder.hpp +++ b/src/cpu/reorder/simple_reorder.hpp @@ -46,10 +46,10 @@ template using data_t = typename prec_traits_t::type; template -using _qz_a1b0 = q10n::qz_a1b0, data_t>; +using _qz_a1b0 = q10n::qz_a1b0_t, data_t>; template -using _qz = q10n::qz, data_t>; +using _qz = q10n::qz_t, data_t>; namespace fmt_order { const bool keep = true; @@ -343,7 +343,7 @@ struct simple_reorder_impl, data_t>()( + o = q10n::qz_b0_t, data_t>()( i, s * adj_scale * d); if (req_comp) cp[g * OC + oc] -= (int32_t)o; if (has_asymmetric_comp) zp[g * OC + oc] -= (int32_t)o; @@ -547,7 +547,7 @@ struct simple_reorder_impl, data_t>()( + = q10n::qz_b0_t, data_t>()( inp[plain_off], src_scale * adj_scale * dst_scale); if (req_comp) c[oc] -= (128 * (int32_t)(out[index(oc, ic)])); @@ -710,7 +710,7 @@ struct simple_reorder_impl, data_t>()( + out[oc] = q10n::qz_b0_t, data_t>()( inp[plain_off], s[oc] * adj_scale * d[oc]); if (has_asymmetric_comp) zp[oc] -= (int32_t)(out[oc]); } @@ -904,7 +904,7 @@ struct simple_reorder_impl::inner_blks>( oc, ic); - out[index] = q10n::qz_b0, data_t>()( + out[index] = q10n::qz_b0_t, data_t>()( inp[plain_off], s[oc] * adj_scale * d[oc]); if (has_asymmetric_comp) zp[oc] -= (int32_t)(out[index]); @@ -1077,8 +1077,9 @@ struct simple_reorder_impl::inner_blks>( d0, d1); - out[index] = q10n::qz_b0, data_t>()( - inp[plain_off], s[0] * adj_scale * d[0]); + out[index] + = q10n::qz_b0_t, data_t>()( + inp[plain_off], s[0] * adj_scale * d[0]); auto o = static_cast(out[index]); if (req_comp) cp[d1] -= (128 * o); @@ -1088,8 +1089,9 @@ struct simple_reorder_impl::inner_blks>( d0, d1); - out[index] = q10n::qz_b0, data_t>()( - 0, s[0] * adj_scale * d[0]); + out[index] + = q10n::qz_b0_t, data_t>()( + 0, s[0] * adj_scale * d[0]); } } @@ -1097,7 +1099,7 @@ struct simple_reorder_impl::inner_blks>( d0, d1); - out[index] = q10n::qz_b0, data_t>()( + out[index] = q10n::qz_b0_t, data_t>()( 0, s[0] * adj_scale * d[0]); } }; @@ -1265,7 +1267,7 @@ struct simple_reorder_impl, data_t>()( + out[g] = q10n::qz_b0_t, data_t>()( inp[i_off], src_scale * adj_scale * dst_scale); } }; @@ -2094,25 +2096,26 @@ struct simple_reorder_impl, data_t>()( - input[e]); + output[e] + = q10n::qz_a1b0_t, data_t>()( + input[e]); } } else if (alpha == 1.0) { PRAGMA_OMP_SIMD() for (size_t e = start; e < end; ++e) { - output[e] = q10n::qz_a1, data_t>()( + output[e] = q10n::qz_a1_t, data_t>()( input[e], output[e], beta); } } else if (beta == 0.0) { PRAGMA_OMP_SIMD() for (size_t e = start; e < end; ++e) { - output[e] = q10n::qz_b0, data_t>()( + output[e] = q10n::qz_b0_t, data_t>()( input[e], alpha); } } else { PRAGMA_OMP_SIMD() for (size_t e = start; e < end; ++e) { - output[e] = q10n::qz, data_t>()( + output[e] = q10n::qz_t, data_t>()( input[e], output[e], alpha, beta); } } @@ -2121,28 +2124,27 @@ struct simple_reorder_impl, + output[e] = q10n::qz_a1b0_t, data_t>()(input[e]); } } else if (alpha == 1.0) { PRAGMA_OMP_SIMD() for (size_t e = nelems - rem_elems; e < nelems; ++e) { - output[e] - = q10n::qz_a1, data_t>()( - input[e], output[e], beta); + output[e] = q10n::qz_a1_t, + data_t>()(input[e], output[e], beta); } } else if (beta == 0.0) { PRAGMA_OMP_SIMD() for (size_t e = nelems - rem_elems; e < nelems; ++e) { - output[e] - = q10n::qz_b0, data_t>()( - input[e], alpha); + output[e] = q10n::qz_b0_t, + data_t>()(input[e], alpha); } } else { PRAGMA_OMP_SIMD() for (size_t e = nelems - rem_elems; e < nelems; ++e) { - output[e] = q10n::qz, data_t>()( - input[e], output[e], alpha, beta); + output[e] + = q10n::qz_t, data_t>()( + input[e], output[e], alpha, beta); } } } diff --git a/src/cpu/rnn/postgemm_dispatcher.hpp b/src/cpu/rnn/postgemm_dispatcher.hpp index 0d2637c1f86..be43824f7aa 100644 --- a/src/cpu/rnn/postgemm_dispatcher.hpp +++ b/src/cpu/rnn/postgemm_dispatcher.hpp @@ -253,20 +253,25 @@ struct rnn_postgemm_dispatcher { && !mayiuse(avx512_core)) return status::success; +//NOLINTBEGIN(bugprone-macro-parentheses) +// Can't put types into `()`: +// error: expected type-specifier before ‘)’ token #define CREATE_WITH_DIR(k, ker_t) \ do { \ if (mayiuse(avx512_core)) \ - k.reset(new ker_t(rnn, pd_)); \ + (k).reset( \ + new ker_t(rnn, pd_)); \ else if (mayiuse(avx2)) \ - k.reset(new ker_t(rnn, pd_)); \ + (k).reset(new ker_t(rnn, pd_)); \ else \ - k.reset(new ker_t(rnn, pd_)); \ + (k).reset(new ker_t(rnn, pd_)); \ } while (0) #define CREATE(k, ker_t) \ do { \ - if (jit_fwd) CREATE_WITH_DIR(k, CONCAT2(ker_t, _fwd)); \ - if (jit_bwd) CREATE_WITH_DIR(k, CONCAT2(ker_t, _bwd)); \ + if (jit_fwd) CREATE_WITH_DIR((k), CONCAT2(ker_t, _fwd)); \ + if (jit_bwd) CREATE_WITH_DIR((k), CONCAT2(ker_t, _bwd)); \ } while (0) + //NOLINTEND(bugprone-macro-parentheses) if (pd_->cell_kind() == alg_kind::vanilla_lstm) { CREATE(rnn_postgemm_, jit_uni_lstm_cell_postgemm); diff --git a/src/cpu/rnn/ref_postgemm_lstm.cpp b/src/cpu/rnn/ref_postgemm_lstm.cpp index 8fef036b710..d452eb39fc2 100644 --- a/src/cpu/rnn/ref_postgemm_lstm.cpp +++ b/src/cpu/rnn/ref_postgemm_lstm.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2018-2024 Intel Corporation +* Copyright 2018-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -188,7 +188,7 @@ rnn_postgemm_sig(rnn_postgemm_fwd_u8_t::lstm_postgemm) { const auto quantize_f32_u8 = [&](float f) { float qf = f * data_scale + data_shift; - return q10n::qz_a1b0()(qf); + return q10n::qz_a1b0_t()(qf); }; const auto dequantize_s32_f32 = [&](gemm_acc_t s, int gate, int j) { @@ -229,7 +229,7 @@ rnn_postgemm_sig(rnn_postgemm_fwd_s8_t::lstm_postgemm) { const auto quantize_f32_s8 = [&](float f) { float qf = f * data_scale + data_shift; - return q10n::qz_a1b0()(qf); + return q10n::qz_a1b0_t()(qf); }; const auto dequantize_s32_f32 = [&](gemm_acc_t s, int gate, int j) { diff --git a/src/cpu/rnn/ref_postgemm_lstm_projection.cpp b/src/cpu/rnn/ref_postgemm_lstm_projection.cpp index 153603ecafe..5a3c728d8db 100644 --- a/src/cpu/rnn/ref_postgemm_lstm_projection.cpp +++ b/src/cpu/rnn/ref_postgemm_lstm_projection.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -104,7 +104,7 @@ rnn_postgemm_sig(rnn_postgemm_fwd_u8_t::lstm_projection_postgemm) { float qf = f * data_scale + data_shift; qf = nstl::min(qf, 255.0f); qf = nstl::max(qf, 0.0f); - return q10n::qz_a1b0()(qf); + return q10n::qz_a1b0_t()(qf); }; const auto dequantize_s32_f32 = [&](gemm_acc_t s, int j) { @@ -149,7 +149,7 @@ rnn_postgemm_sig(rnn_postgemm_fwd_s8_t::lstm_projection_postgemm) { const auto quantize_f32_s8 = [&](float f) { const float qf = f * data_scale + data_shift; - return q10n::qz_a1b0()(qf); + return q10n::qz_a1b0_t()(qf); }; const auto dequantize_s32_f32 = [&](gemm_acc_t s, int j) { diff --git a/src/cpu/rnn/ref_rnn.cpp b/src/cpu/rnn/ref_rnn.cpp index c4a44f1a04f..21ccbd20d54 100644 --- a/src/cpu/rnn/ref_rnn.cpp +++ b/src/cpu/rnn/ref_rnn.cpp @@ -1423,7 +1423,7 @@ void copy_init_iter_fwd_template(const rnn_conf_t &rnn, const rnn_pd_t *pd, const auto maybe_q = [&](input_data_t f) { if (quantize) { float qf = f * data_scale + data_shift; - return q10n::qz_a1b0()(qf); + return q10n::qz_a1b0_t()(qf); } else return (src_data_t)f; }; @@ -1589,7 +1589,7 @@ void copy_res_layer_fwd_template(const rnn_conf_t &rnn, const rnn_pd_t *pd, PRAGMA_OMP_SIMD() for (int s = 0; s < rnn.dlc; s++) { float val = (float)ss[s] + dd[s]; - val = q10n::qz_a1b0()(val); + val = q10n::qz_a1b0_t()(val); dd[s] = (dst_layer_dt)((val - 2 * shift) / scale); } } else if (rnn_u8u8_case diff --git a/src/cpu/rnn/ref_rnn.hpp b/src/cpu/rnn/ref_rnn.hpp index d5315b0fe4b..45abcd83a77 100644 --- a/src/cpu/rnn/ref_rnn.hpp +++ b/src/cpu/rnn/ref_rnn.hpp @@ -172,7 +172,7 @@ struct _ref_rnn_common_t : public primitive_t { : primitive_t(apd), rnn_postgemm_(nullptr) {} status_t init(engine_t *engine) override; - virtual ~_ref_rnn_common_t() { delete rnn_postgemm_; } + ~_ref_rnn_common_t() override { delete rnn_postgemm_; } status_t execute(const exec_ctx_t &ctx) const override; diff --git a/src/cpu/rnn/rnn_reorders.hpp b/src/cpu/rnn/rnn_reorders.hpp index 04878bdf2b8..79b1ff21e93 100644 --- a/src/cpu/rnn/rnn_reorders.hpp +++ b/src/cpu/rnn/rnn_reorders.hpp @@ -76,7 +76,7 @@ static inline void quantize_igo(int8_t *scratch_quantized, for (int go = 0; go < G * O; go++) { const float s = scales[(mask == 0) ? 0 : go]; scratch_quantized[ldi * G * O + go] - = q10n::qz_b0()( + = q10n::qz_b0_t()( src[ldi * G * O + go], s); } } @@ -100,7 +100,7 @@ static inline void quantize_goi(int8_t *scratch_quantized, PRAGMA_OMP_SIMD() for (dim_t i = 0; i < I; i++) { scratch_quantized[ld * I * G * O + i * G * O + go] - = q10n::qz_b0()( + = q10n::qz_b0_t()( src[ld * G * O * I + go * I + i], s); } }); @@ -271,7 +271,7 @@ struct rnn_data_reorder_t : public primitive_t { PRAGMA_OMP_SIMD() for (int j = 0; j < inner_dim; ++j) { const float in = (float)i_[j] * scale + shift; - o_[j] = q10n::qz_a1b0()(in); + o_[j] = q10n::qz_a1b0_t()(in); } } }); @@ -288,7 +288,8 @@ struct rnn_data_reorder_t : public primitive_t { const size_t nelems = input_d.nelems(); parallel_nd(nelems, [&](size_t i) { const float in = (float)input[input_d.off_l(i)] * scale + shift; - output[output_d.off_l(i)] = q10n::qz_a1b0()(in); + output[output_d.off_l(i)] + = q10n::qz_a1b0_t()(in); }); return status::success; } diff --git a/src/cpu/rnn/rnn_utils.hpp b/src/cpu/rnn/rnn_utils.hpp index 826cbd13044..f120e733cd3 100644 --- a/src/cpu/rnn/rnn_utils.hpp +++ b/src/cpu/rnn/rnn_utils.hpp @@ -316,7 +316,7 @@ struct rnn_conf_t { size_t weights_iter_comp_offset = 0, weights_iter_pack_size = 0; size_t weights_projection_comp_offset = 0, weights_projection_pack_size = 0; - bool copy_bias = 0; + bool copy_bias = false; int weights_layer_ld = 0, weights_layer_nld = 0; int diff_weights_layer_ld = 0, diff_weights_layer_nld = 0; int weights_iter_ld = 0, weights_iter_nld = 0; @@ -347,9 +347,10 @@ struct rnn_conf_t { int dst_iter_c_ld_ = 0, dst_iter_c_nld_ = 0; int weights_iter_compensation_size = 0, weights_layer_compensation_size = 0; - bool is_fwd = 0, is_training = 0, is_lbr = 0, is_lstm_peephole = 0, - is_lstm_projection = 0, is_augru = 0, is_orig_gru = 0; - bool use_workspace = 0; + bool is_fwd = false, is_training = false, is_lbr = false, + is_lstm_peephole = false, is_lstm_projection = false, is_augru = false, + is_orig_gru = false; + bool use_workspace = false; // Size of workspace for each tensor in bytes // Notes: diff --git a/src/cpu/simple_q10n.hpp b/src/cpu/simple_q10n.hpp index 7e4289e663e..10f2ca62a06 100644 --- a/src/cpu/simple_q10n.hpp +++ b/src/cpu/simple_q10n.hpp @@ -82,33 +82,33 @@ inline out_t saturate_and_round(acc_t f) { /* Quantization with alpha == 1 and beta == 0 */ template -struct qz_a1b0 { +struct qz_a1b0_t { out_t operator()(in_t in) { return saturate_and_round((float)in); } }; template -struct qz_a1b0::value && !is_subset::value>::type> { out_t operator()(in_t in) { return saturate(in); } }; template -struct qz_a1b0::value>::type> { out_t operator()(in_t in) { return (out_t)in; } }; /* Quantization with alpha == 1 */ template -struct qz_a1 { +struct qz_a1_t { out_t operator()(in_t in, out_t out, float beta) { return saturate_and_round((float)in + beta * out); } }; template -struct qz_a1 { +struct qz_a1_t { float operator()(in_t in, float out, float beta) { return (float)in + beta * out; } @@ -116,55 +116,55 @@ struct qz_a1 { /* Quantization with beta == 0 */ template -struct qz_b0 { +struct qz_b0_t { out_t operator()(in_t in, float alpha) { return saturate_and_round(alpha * in); } }; template -struct qz_b0 { +struct qz_b0_t { float operator()(in_t in, float alpha) { return alpha * in; } }; /* Quantization */ template -struct qz { +struct qz_t { out_t operator()(in_t in, out_t out, float alpha, float beta) { return saturate_and_round(alpha * in + (beta ? beta * out : 0)); } }; template -struct qz { +struct qz_t { float operator()(in_t in, float out, float alpha, float beta) { return alpha * in + (beta ? beta * out : 0); } }; template <> -struct qz { +struct qz_t { float operator()(bfloat16_t in, bfloat16_t out, float alpha, float beta) { return (bfloat16_t)(alpha * (float)in + (beta ? beta * (float)out : 0)); } }; template <> -struct qz { +struct qz_t { float operator()(float in, bfloat16_t out, float alpha, float beta) { return (bfloat16_t)(alpha * in + (beta ? beta * out : 0)); } }; template <> -struct qz { +struct qz_t { float operator()(float16_t in, float16_t out, float alpha, float beta) { return (float16_t)(alpha * (float)in + (beta ? beta * (float)out : 0)); } }; template <> -struct qz { +struct qz_t { float operator()(float in, float16_t out, float alpha, float beta) { return (float16_t)(alpha * in + (beta ? beta * out : 0)); } diff --git a/src/cpu/simple_resampling.hpp b/src/cpu/simple_resampling.hpp index f632baa27a4..a9ccef95af2 100644 --- a/src/cpu/simple_resampling.hpp +++ b/src/cpu/simple_resampling.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -103,7 +103,8 @@ struct simple_resampling_fwd_t : public primitive_t { simple_resampling_fwd_t(const pd_t *apd); status_t init(engine_t *engine) override; - ~simple_resampling_fwd_t() = default; + + ~simple_resampling_fwd_t() override = default; status_t execute(const exec_ctx_t &ctx) const override; @@ -149,7 +150,8 @@ struct simple_resampling_bwd_t : public primitive_t { simple_resampling_bwd_t(const pd_t *apd); status_t init(engine_t *engine) override; - ~simple_resampling_bwd_t() = default; + + ~simple_resampling_bwd_t() override = default; status_t execute(const exec_ctx_t &ctx) const override; diff --git a/src/cpu/x64/amx_tile_configure.cpp b/src/cpu/x64/amx_tile_configure.cpp index 410ea05bc91..9464c604617 100644 --- a/src/cpu/x64/amx_tile_configure.cpp +++ b/src/cpu/x64/amx_tile_configure.cpp @@ -22,12 +22,12 @@ namespace impl { namespace cpu { namespace x64 { -struct jit_amx_tilecfg_t : public jit_generator { +struct jit_amx_tilecfg_t : public jit_generator_t { DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_amx_tilecfg_t) // TODO: Need to check status jit_amx_tilecfg_t(bool lazy = false) - : jit_generator(jit_name(), avx512_core_amx), is_lazy_(lazy) { + : jit_generator_t(jit_name(), avx512_core_amx), is_lazy_(lazy) { create_kernel(); } @@ -72,11 +72,11 @@ struct jit_amx_tilecfg_t : public jit_generator { } }; -struct jit_amx_tilerelease_t : public jit_generator { +struct jit_amx_tilerelease_t : public jit_generator_t { DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_amx_tilerelease_t) // TODO: Need to check status - jit_amx_tilerelease_t() : jit_generator(jit_name(), avx512_core_amx) { + jit_amx_tilerelease_t() : jit_generator_t(jit_name(), avx512_core_amx) { create_kernel(); } diff --git a/src/cpu/x64/brgemm/brgemm_containers.hpp b/src/cpu/x64/brgemm/brgemm_containers.hpp index bc4f7ec78c6..5f5a7c67177 100644 --- a/src/cpu/x64/brgemm/brgemm_containers.hpp +++ b/src/cpu/x64/brgemm/brgemm_containers.hpp @@ -34,7 +34,7 @@ namespace brgemm_containers { struct brgemm_desc_container_t { public: - brgemm_desc_container_t() {} + brgemm_desc_container_t() = default; brgemm_desc_container_t(size_t ns) { resize(ns); } void resize(size_t ns) { refs_.resize(ns); } inline const brgemm_desc_t *operator[](int idx) const { return refs_[idx]; } @@ -71,7 +71,7 @@ struct brgemm_desc_container_t { // #define BRGEMM_KERNEL_GLOBAL_STORAGE struct brgemm_kernel_container_t { - brgemm_kernel_container_t() {} + brgemm_kernel_container_t() = default; brgemm_kernel_container_t(size_t ns) { resize(ns); } void resize(size_t ns) { refs_.resize(ns); } inline const brgemm_kernel_t *operator[](int idx) const { @@ -113,7 +113,7 @@ struct brgemm_kernel_container_t { struct brgemm_palette_container_t { using S_t = std::array; - brgemm_palette_container_t() {} + brgemm_palette_container_t() = default; brgemm_palette_container_t(size_t ns) { resize(ns); } void resize(size_t ns) { refs_.resize(ns); } diff --git a/src/cpu/x64/brgemm/brgemm_types.hpp b/src/cpu/x64/brgemm/brgemm_types.hpp index 1b86897858f..a904e029ae4 100644 --- a/src/cpu/x64/brgemm/brgemm_types.hpp +++ b/src/cpu/x64/brgemm/brgemm_types.hpp @@ -97,11 +97,7 @@ struct brgemm_prf_t { }; struct brgemm_batch_element_t { - brgemm_batch_element_t() { - ptr.A = ptr.B = nullptr; - vvpad.top = vvpad.bottom = 0; - has_s8s8_comp_batch_pad = 0; - } + brgemm_batch_element_t() { ptr.A = ptr.B = nullptr; } union { struct { const void *A; @@ -113,14 +109,14 @@ struct brgemm_batch_element_t { } offset; }; struct { - dim_t top; - dim_t bottom; + dim_t top = 0; + dim_t bottom = 0; } vvpad; // w.r.t. M dimension // Used to calculate compensation when batch padding is present. // Note: batch_pad represent the overlap between weights and the height // dimension w.r.t. convolution dimensions. - dim_t has_s8s8_comp_batch_pad; + dim_t has_s8s8_comp_batch_pad = 0; }; struct DNNL_API brgemm_attr_t { @@ -206,7 +202,7 @@ struct DNNL_API brgemm_attr_t { }; struct brgemm_desc_t { - brgemm_desc_t() {} + brgemm_desc_t() = default; brgemm_desc_t(const brgemm_desc_t &other); DNNL_API ~brgemm_desc_t(); @@ -569,32 +565,32 @@ struct jit_brgemm_kernel_t; struct jit_brgemm_amx_uker_base_t; template struct jit_brdgmm_kernel_base_t; -class jit_generator; +class jit_generator_t; struct brgemm_kernel_t { - brgemm_kernel_t() {}; - virtual ~brgemm_kernel_t() {}; + brgemm_kernel_t() = default; + virtual ~brgemm_kernel_t() = default; virtual status_t create_kernel() = 0; virtual void operator()(brgemm_kernel_params_t *) const = 0; - virtual const jit_generator *get_jit_generator() const = 0; + virtual const jit_generator_t *get_jit_generator() const = 0; virtual const brgemm_desc_t &get_brg() const = 0; }; -struct jit_base_brgemm_kernel_t : public jit_generator { +struct jit_base_brgemm_kernel_t : public jit_generator_t { jit_base_brgemm_kernel_t(const char *impl_name, cpu_isa_t isa_impl) - : jit_generator(impl_name, isa_impl) {} + : jit_generator_t(impl_name, isa_impl) {} virtual const brgemm_desc_t &get_brg() const = 0; }; template struct brgemm_kernel_common_t : public brgemm_kernel_t { brgemm_kernel_common_t(const brgemm_desc_t &abrd); - ~brgemm_kernel_common_t(); + ~brgemm_kernel_common_t() override; status_t create_kernel() override; void operator()(brgemm_kernel_params_t *) const override; - virtual const jit_generator *get_jit_generator() const override; - virtual const brgemm_desc_t &get_brg() const override { + const jit_generator_t *get_jit_generator() const override; + const brgemm_desc_t &get_brg() const override { return ((jit_base_brgemm_kernel_t *)brgemm_kernel_)->get_brg(); } @@ -606,12 +602,12 @@ struct brgemm_kernel_common_t : public brgemm_kernel_t { struct brgemm_amx_uker_t : public brgemm_kernel_t { brgemm_amx_uker_t(const brgemm_desc_t &abrd); - ~brgemm_amx_uker_t(); + ~brgemm_amx_uker_t() override; status_t create_kernel() override; void operator()(brgemm_kernel_params_t *) const override; - virtual const jit_generator *get_jit_generator() const override; - virtual const brgemm_desc_t &get_brg() const override { + const jit_generator_t *get_jit_generator() const override; + const brgemm_desc_t &get_brg() const override { return ((jit_base_brgemm_kernel_t *)brgemm_kernel_)->get_brg(); } @@ -624,12 +620,12 @@ struct brgemm_amx_uker_t : public brgemm_kernel_t { template struct brdgmm_kernel_t : public brgemm_kernel_t { brdgmm_kernel_t(const brgemm_desc_t &abrd); - ~brdgmm_kernel_t(); + ~brdgmm_kernel_t() override; status_t create_kernel() override; void operator()(brgemm_kernel_params_t *) const override; - virtual const jit_generator *get_jit_generator() const override; - virtual const brgemm_desc_t &get_brg() const override { + const jit_generator_t *get_jit_generator() const override; + const brgemm_desc_t &get_brg() const override { return ((jit_base_brgemm_kernel_t *)brgemm_kernel_)->get_brg(); } diff --git a/src/cpu/x64/brgemm/jit_brdgmm_kernel.cpp b/src/cpu/x64/brgemm/jit_brdgmm_kernel.cpp index 461e9142bd1..7a8aaf66445 100644 --- a/src/cpu/x64/brgemm/jit_brdgmm_kernel.cpp +++ b/src/cpu/x64/brgemm/jit_brdgmm_kernel.cpp @@ -40,7 +40,7 @@ jit_brdgmm_kernel_base_t::jit_brdgmm_kernel_base_t( const brgemm_desc_t &abrd) : jit_base_brgemm_kernel_t(jit_name(), abrd.isa_impl) , brg(abrd) - , simd_w_(vreg_traits::vlen / brg.typesize_C) + , simd_w_(vreg_traits_t::vlen / brg.typesize_C) , max_vmms_(isa_num_vregs(brg.isa_impl)) , compute_dst_zp_(brg.zp_type_c != brgemm_broadcast_t::none) , compute_src_zp_(brg.zp_type_a != brgemm_broadcast_t::none) @@ -238,8 +238,8 @@ void jit_brdgmm_kernel_base_t::cvt2ps(data_type_t type_in, bool store) { const int tail_size = tail_length(); const bool is_load_tail = op.isMEM() && mask_flag && tail_size > 0 - && (tail_size - < static_cast(vreg_traits::vlen / sizeof(float))); + && (tail_size < static_cast( + vreg_traits_t::vlen / sizeof(float))); if (IMPLICATION(is_load_tail, isa_has_masks(brg.isa_impl))) { const Vmm vmm = maybe_mask(vmm_in, is_load_tail, store); switch (type_in) { @@ -1439,7 +1439,7 @@ void brdgmm_kernel_t::operator()(brgemm_kernel_params_t *params) const { } template -const jit_generator *brdgmm_kernel_t::get_jit_generator() const { +const jit_generator_t *brdgmm_kernel_t::get_jit_generator() const { return brgemm_kernel_; } diff --git a/src/cpu/x64/brgemm/jit_brdgmm_kernel.hpp b/src/cpu/x64/brgemm/jit_brdgmm_kernel.hpp index 66932f9ef09..236d027de56 100644 --- a/src/cpu/x64/brgemm/jit_brdgmm_kernel.hpp +++ b/src/cpu/x64/brgemm/jit_brdgmm_kernel.hpp @@ -168,7 +168,7 @@ struct jit_brdgmm_kernel_base_t : public jit_base_brgemm_kernel_t { using Vmm = typename utils::conditional::value, Xbyak::Zmm, Wmm>::type; - using Vmm_low_t = typename vreg_traits::Vmm_lower_t; + using Vmm_low_t = typename vreg_traits_t::Vmm_lower_t; using po_injector_t = injector::jit_uni_postops_injector_base_t; std::unique_ptr postops_injector_; std::unique_ptr bf16_emu_; diff --git a/src/cpu/x64/brgemm/jit_brgemm_amx_uker.cpp b/src/cpu/x64/brgemm/jit_brgemm_amx_uker.cpp index c5165b8a3a9..a8641db38ed 100644 --- a/src/cpu/x64/brgemm/jit_brgemm_amx_uker.cpp +++ b/src/cpu/x64/brgemm/jit_brgemm_amx_uker.cpp @@ -146,7 +146,7 @@ struct jit_brgemm_amx_uker_base_t : public jit_base_brgemm_kernel_t { using reg64_t = const Xbyak::Reg64; enum { simd_w = 16, - zmm_width_in_bytes = cpu_isa_traits::vlen, + zmm_width_in_bytes = cpu_isa_traits_t::vlen, }; // Register decomposition @@ -2747,7 +2747,7 @@ void brgemm_amx_uker_t::operator()(brgemm_kernel_params_t *params) const { (*brgemm_kernel_)(params); } -const jit_generator *brgemm_amx_uker_t::get_jit_generator() const { +const jit_generator_t *brgemm_amx_uker_t::get_jit_generator() const { return brgemm_kernel_; } diff --git a/src/cpu/x64/brgemm/jit_brgemm_kernel.cpp b/src/cpu/x64/brgemm/jit_brgemm_kernel.cpp index ef87f1c4c84..79eb9f0e40c 100644 --- a/src/cpu/x64/brgemm/jit_brgemm_kernel.cpp +++ b/src/cpu/x64/brgemm/jit_brgemm_kernel.cpp @@ -136,11 +136,11 @@ struct jit_brgemm_kernel_t : public jit_base_brgemm_kernel_t { enum matrix_kind_t { matrix_A, matrix_B }; static constexpr int zmm_width_in_bytes_ - = cpu_isa_traits::vlen; + = cpu_isa_traits_t::vlen; using Vmm = typename utils::conditional::value, Xbyak::Zmm, Wmm>::type; - using Vmm_lower_t = typename vreg_traits::Vmm_lower_t; + using Vmm_lower_t = typename vreg_traits_t::Vmm_lower_t; using po_injector_t = injector::jit_uni_postops_injector_base_t; std::unique_ptr postops_injector_; std::unique_ptr bf16_emu_; @@ -669,8 +669,8 @@ void jit_brgemm_kernel_t::cvt2ps(data_type_t type_in, const Vmm vmm_in, const Xbyak::Operand &op, bool mask_flag, bool store, Xbyak::Opmask ktail_mask, dim_t tail_size) { Vmm vmm = vmm_in; - const bool has_tail - = op.isMEM() && tail_size != vreg_traits::vlen / sizeof(float); + const bool has_tail = op.isMEM() + && tail_size != vreg_traits_t::vlen / sizeof(float); if (IMPLICATION(has_tail, is_superset(brg.isa_impl, avx512_core))) { vmm = vmm_mask(vmm_in, mask_flag, store, ktail_mask); } else { @@ -1993,7 +1993,7 @@ bool jit_brgemm_kernel_t::maybe_pre_process_k_tail(bool last_bdb, && brg.rdb_tail != 0 && last_bdb && is_rd_tail; if (!need_k_tail_processing) return false; - const auto zmm_width_in_bytes = cpu_isa_traits::vlen; + const auto zmm_width_in_bytes = cpu_isa_traits_t::vlen; auto transform_offset = brg.get_num_C_tiles() * brgemm_desc_t::tilesize + brg.get_convert_wsp_buffer_size(); @@ -2862,7 +2862,7 @@ void jit_brgemm_kernel_t::generate() { postamble(); align(32); - const dim_t simd = vreg_traits::vlen / sizeof(float); + const dim_t simd = vreg_traits_t::vlen / sizeof(float); if (!isa_has_masks(brg.isa_impl) && brg.ldb_tail > 0) { L(avx_tail_mask_); for (dim_t i = 0; i < brg.ldb_tail; ++i) @@ -2948,7 +2948,7 @@ void brgemm_kernel_common_t::operator()( } template -const jit_generator *brgemm_kernel_common_t::get_jit_generator() const { +const jit_generator_t *brgemm_kernel_common_t::get_jit_generator() const { return brgemm_kernel_; } diff --git a/src/cpu/x64/cpu_barrier.cpp b/src/cpu/x64/cpu_barrier.cpp index 24ab6515b02..2ab3bb5c4a5 100644 --- a/src/cpu/x64/cpu_barrier.cpp +++ b/src/cpu/x64/cpu_barrier.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2017-2022 Intel Corporation +* Copyright 2017-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,7 +26,7 @@ namespace x64 { namespace simple_barrier { void generate( - jit_generator &code, Xbyak::Reg64 reg_ctx, Xbyak::Reg64 reg_nthr) { + jit_generator_t &code, Xbyak::Reg64 reg_ctx, Xbyak::Reg64 reg_nthr) { #define BAR_CTR_OFF offsetof(ctx_t, ctr) #define BAR_SENSE_OFF offsetof(ctx_t, sense) using namespace Xbyak; @@ -81,7 +81,7 @@ void generate( } /** jit barrier generator */ -struct jit_t : public jit_generator { +struct jit_t : public jit_generator_t { void generate() override { simple_barrier::generate(*this, abi_param1, abi_param2); @@ -89,7 +89,7 @@ struct jit_t : public jit_generator { } // TODO: Need to check status - jit_t() : jit_generator(jit_name()) { create_kernel(); } + jit_t() : jit_generator_t(jit_name()) { create_kernel(); } DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_t) }; diff --git a/src/cpu/x64/cpu_barrier.hpp b/src/cpu/x64/cpu_barrier.hpp index c76d57911af..f5cd7966ac9 100644 --- a/src/cpu/x64/cpu_barrier.hpp +++ b/src/cpu/x64/cpu_barrier.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2017-2020 Intel Corporation +* Copyright 2017-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -68,11 +68,12 @@ void barrier(ctx_t *ctx, int nthr); /** injects actual barrier implementation into another jitted code * @params: - * code -- jit_generator object where the barrier is to be injected + * code -- jit_generator_t object where the barrier is to be injected * reg_ctx -- read-only register with pointer to the barrier context * reg_nnthr -- read-only register with the # of synchronizing threads */ -void generate(jit_generator &code, Xbyak::Reg64 reg_ctx, Xbyak::Reg64 reg_nthr); +void generate( + jit_generator_t &code, Xbyak::Reg64 reg_ctx, Xbyak::Reg64 reg_nthr); } // namespace simple_barrier diff --git a/src/cpu/x64/cpu_isa_traits.cpp b/src/cpu/x64/cpu_isa_traits.cpp index 931f13a8c2b..c9d718e1132 100644 --- a/src/cpu/x64/cpu_isa_traits.cpp +++ b/src/cpu/x64/cpu_isa_traits.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -46,7 +46,7 @@ cpu_isa_t init_max_cpu_isa() { if (!isa_val.empty()) { #define IF_HANDLE_CASE(cpu_isa) \ - if (isa_val.compare(cpu_isa_traits::user_option_env) == 0) \ + if (isa_val.compare(cpu_isa_traits_t::user_option_env) == 0) \ max_cpu_isa_val = cpu_isa #define ELSEIF_HANDLE_CASE(cpu_isa) else IF_HANDLE_CASE(cpu_isa) @@ -206,7 +206,9 @@ status_t set_max_cpu_isa(dnnl_cpu_isa_t isa) { cpu_isa_t isa_to_set = isa_undef; #define HANDLE_CASE(cpu_isa) \ - case cpu_isa_traits::user_option_val: isa_to_set = cpu_isa; break; + case cpu_isa_traits_t::user_option_val: \ + isa_to_set = cpu_isa; \ + break; switch (isa) { HANDLE_CASE(isa_all); HANDLE_CASE(sse41); diff --git a/src/cpu/x64/cpu_isa_traits.hpp b/src/cpu/x64/cpu_isa_traits.hpp index 189e030ac31..7fc472335c8 100644 --- a/src/cpu/x64/cpu_isa_traits.hpp +++ b/src/cpu/x64/cpu_isa_traits.hpp @@ -209,28 +209,28 @@ static inline bool is_superset(cpu_isa_t isa_1, cpu_isa_t isa_2) { } template -struct vreg_traits {}; +struct vreg_traits_t {}; template <> -struct vreg_traits { +struct vreg_traits_t { using Vmm_lower_t = Xbyak::Ymm; static constexpr size_t vlen = 64; }; template <> -struct vreg_traits { +struct vreg_traits_t { using Vmm_lower_t = Xbyak::Xmm; static constexpr size_t vlen = 32; }; template <> -struct vreg_traits { +struct vreg_traits_t { using Vmm_lower_t = Xbyak::Xmm; static constexpr size_t vlen = 16; }; template -struct cpu_isa_traits {}; /* ::vlen -> 32 (for avx2) */ +struct cpu_isa_traits_t {}; /* ::vlen -> 32 (for avx2) */ // pack struct so it can fit into a single 64-byte cache line #pragma pack(push, 1) @@ -244,90 +244,92 @@ struct palette_config_t { #pragma pack(pop) template <> -struct cpu_isa_traits { +struct cpu_isa_traits_t { static constexpr dnnl_cpu_isa_t user_option_val = dnnl_cpu_isa_default; static constexpr const char *user_option_env = "default"; }; template <> -struct cpu_isa_traits { +struct cpu_isa_traits_t { using Vmm = Xbyak::Xmm; static constexpr int vlen_shift = 4; - static constexpr int vlen = vreg_traits::vlen; + static constexpr int vlen = vreg_traits_t::vlen; static constexpr int n_vregs = 16; static constexpr dnnl_cpu_isa_t user_option_val = dnnl_cpu_isa_sse41; static constexpr const char *user_option_env = "sse41"; }; template <> -struct cpu_isa_traits { +struct cpu_isa_traits_t { using Vmm = Xbyak::Ymm; static constexpr int vlen_shift = 5; - static constexpr int vlen = vreg_traits::vlen; + static constexpr int vlen = vreg_traits_t::vlen; static constexpr int n_vregs = 16; static constexpr dnnl_cpu_isa_t user_option_val = dnnl_cpu_isa_avx; static constexpr const char *user_option_env = "avx"; }; template <> -struct cpu_isa_traits : public cpu_isa_traits { +struct cpu_isa_traits_t : public cpu_isa_traits_t { static constexpr dnnl_cpu_isa_t user_option_val = dnnl_cpu_isa_avx2; static constexpr const char *user_option_env = "avx2"; }; template <> -struct cpu_isa_traits : public cpu_isa_traits { +struct cpu_isa_traits_t : public cpu_isa_traits_t { static constexpr dnnl_cpu_isa_t user_option_val = dnnl_cpu_isa_avx2_vnni; static constexpr const char *user_option_env = "avx2_vnni"; }; template <> -struct cpu_isa_traits : public cpu_isa_traits { +struct cpu_isa_traits_t : public cpu_isa_traits_t { static constexpr dnnl_cpu_isa_t user_option_val = dnnl_cpu_isa_avx2_vnni_2; static constexpr const char *user_option_env = "avx2_vnni_2"; }; template <> -struct cpu_isa_traits { +struct cpu_isa_traits_t { using Vmm = Xbyak::Zmm; static constexpr int vlen_shift = 6; - static constexpr int vlen = vreg_traits::vlen; + static constexpr int vlen = vreg_traits_t::vlen; static constexpr int n_vregs = 32; static constexpr dnnl_cpu_isa_t user_option_val = dnnl_cpu_isa_avx512_core; static constexpr const char *user_option_env = "avx512_core"; }; template <> -struct cpu_isa_traits : public cpu_isa_traits { +struct cpu_isa_traits_t + : public cpu_isa_traits_t { static constexpr dnnl_cpu_isa_t user_option_val = dnnl_cpu_isa_avx512_core_vnni; static constexpr const char *user_option_env = "avx512_core_vnni"; }; template <> -struct cpu_isa_traits : public cpu_isa_traits { +struct cpu_isa_traits_t + : public cpu_isa_traits_t { static constexpr dnnl_cpu_isa_t user_option_val = dnnl_cpu_isa_avx512_core_bf16; static constexpr const char *user_option_env = "avx512_core_bf16"; }; template <> -struct cpu_isa_traits { +struct cpu_isa_traits_t { using Vmm = Xbyak::Zmm; - static constexpr int vlen = vreg_traits::vlen; + static constexpr int vlen = vreg_traits_t::vlen; static constexpr dnnl_cpu_isa_t user_option_val = dnnl_cpu_isa_avx10_1_512_amx; static constexpr const char *user_option_env = "avx10_1_512_amx"; }; template <> -struct cpu_isa_traits : public cpu_isa_traits { +struct cpu_isa_traits_t : public cpu_isa_traits_t { static constexpr dnnl_cpu_isa_t user_option_val = dnnl_cpu_isa_avx10_1_512; static constexpr const char *user_option_env = "avx10_1_512"; }; template <> -struct cpu_isa_traits { +struct cpu_isa_traits_t { using Vmm = Xbyak::Zmm; static constexpr dnnl_cpu_isa_t user_option_val = dnnl_cpu_isa_avx10_1_512_amx_fp16; @@ -354,7 +356,7 @@ bool DNNL_API is_available(); namespace { -static inline bool mayiuse(const cpu_isa_t cpu_isa, bool soft = false) { +inline bool mayiuse(const cpu_isa_t cpu_isa, bool soft = false) { using namespace Xbyak::util; unsigned cpu_isa_mask = x64::get_max_cpu_isa_mask(soft); @@ -418,23 +420,23 @@ static inline bool mayiuse(const cpu_isa_t cpu_isa, bool soft = false) { return false; } -static inline bool isa_has_int8_vnni(cpu_isa_t isa) { +inline bool isa_has_int8_vnni(cpu_isa_t isa) { return is_superset(isa, avx512_core_vnni) || is_superset(isa, avx2_vnni); } -static inline bool isa_has_s8s8(cpu_isa_t isa) { +inline bool isa_has_s8s8(cpu_isa_t isa) { return is_superset(isa, amx_int8) || is_superset(isa, avx2_vnni_2); } -static inline bool isa_has_bf16(cpu_isa_t isa) { +inline bool isa_has_bf16(cpu_isa_t isa) { return is_superset(isa, avx512_core_bf16); } -static inline bool isa_has_masks(cpu_isa_t isa) { +inline bool isa_has_masks(cpu_isa_t isa) { return is_superset(isa, avx512_core); } -static inline int isa_max_vlen(cpu_isa_t isa) { +inline int isa_max_vlen(cpu_isa_t isa) { const bool is_avx512 = is_superset(isa, avx512_core); const bool is_avx = is_superset(isa, avx); const bool is_sse41 = is_superset(isa, sse41); @@ -443,14 +445,14 @@ static inline int isa_max_vlen(cpu_isa_t isa) { MAYBE_UNUSED(is_sse41); if (is_avx512) - return cpu_isa_traits::vlen; + return cpu_isa_traits_t::vlen; else if (is_avx) - return cpu_isa_traits::vlen; + return cpu_isa_traits_t::vlen; else - return cpu_isa_traits::vlen; + return cpu_isa_traits_t::vlen; } -static inline int isa_num_vregs(cpu_isa_t isa) { +inline int isa_num_vregs(cpu_isa_t isa) { const bool is_avx512 = is_superset(isa, avx512_core); const bool is_avx = is_superset(isa, avx); const bool is_sse41 = is_superset(isa, sse41); @@ -459,11 +461,11 @@ static inline int isa_num_vregs(cpu_isa_t isa) { MAYBE_UNUSED(is_sse41); if (is_avx512) - return cpu_isa_traits::n_vregs; + return cpu_isa_traits_t::n_vregs; else if (is_avx) - return cpu_isa_traits::n_vregs; + return cpu_isa_traits_t::n_vregs; else - return cpu_isa_traits::n_vregs; + return cpu_isa_traits_t::n_vregs; } } // namespace diff --git a/src/cpu/x64/cpu_reducer.cpp b/src/cpu/x64/cpu_reducer.cpp index b75cc838613..86397f0881c 100644 --- a/src/cpu/x64/cpu_reducer.cpp +++ b/src/cpu/x64/cpu_reducer.cpp @@ -97,12 +97,12 @@ void reduce_balancer_t::balance() { using namespace Xbyak; template -struct reducer_2d_driver_t : public jit_generator { +struct reducer_2d_driver_t : public jit_generator_t { using data_t = typename prec_traits_t::type; reducer_2d_driver_t(int n_src, size_t src_ld, size_t src_step, size_t dst_step, bool nullify_dst, const char *name) - : jit_generator(name) + : jit_generator_t(name) , n_src_(n_src) , src_ld_(src_ld) , src_step_(src_step) @@ -126,7 +126,7 @@ struct reducer_2d_driver_f_s_32_t : public reducer_2d_driver_t { void operator()( data_t *dst, const data_t *srcs, size_t ny, size_t nx) override { - jit_generator::operator()(dst, srcs, ny, nx); + jit_generator_t::operator()(dst, srcs, ny, nx); } /* cpu specific part */ @@ -145,7 +145,7 @@ struct reducer_2d_driver_f_s_32_t : public reducer_2d_driver_t { this->paddd(x1, op); } - const int vlen = cpu_isa_traits::vlen; + const int vlen = cpu_isa_traits_t::vlen; const int typesize = sizeof(typename dnnl::impl::prec_traits_t::type); Xbyak::Reg64 reg_dst = abi_param1; @@ -205,7 +205,7 @@ struct reducer_2d_driver_f_s_32_t : public reducer_2d_driver_t { } void loop_x() { - const int nloads[] = {cpu_isa_traits::n_vregs, 1, 1}; + const int nloads[] = {cpu_isa_traits_t::n_vregs, 1, 1}; const int nbranches = sizeof(nloads) / sizeof(nloads[0]); const int load_len[nbranches] = {vlen, vlen, typesize}; diff --git a/src/cpu/x64/gemm/amx/jit_avx512_core_amx_copy_kern.cpp b/src/cpu/x64/gemm/amx/jit_avx512_core_amx_copy_kern.cpp index 7f9b09824d2..81771dab6a8 100644 --- a/src/cpu/x64/gemm/amx/jit_avx512_core_amx_copy_kern.cpp +++ b/src/cpu/x64/gemm/amx/jit_avx512_core_amx_copy_kern.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,7 +42,7 @@ static inline Zmm make_zmm(const Xmm &v) { return Zmm(v.getIdx()); } -void jit_avx512_core_amx_copy_kern::transpose(int s, const Ymm &dst1, +void jit_avx512_core_amx_copy_kern_t::transpose(int s, const Ymm &dst1, const Ymm &dst2, const Ymm &src1, const Ymm &src2) { switch (s) { case 32: @@ -91,8 +91,9 @@ void jit_avx512_core_amx_copy_kern::transpose(int s, const Ymm &dst1, } } -void jit_avx512_core_amx_copy_kern::amxtrans8(const Ymm &dst1, const Ymm &dst2, - const Ymm &src1, const Ymm &src2, const Ymm &src3, const Ymm &src4) { +void jit_avx512_core_amx_copy_kern_t::amxtrans8(const Ymm &dst1, + const Ymm &dst2, const Ymm &src1, const Ymm &src2, const Ymm &src3, + const Ymm &src4) { vpunpcklbw(dst1, src1, src2); vpunpckhbw(dst2, src1, src2); vpunpcklbw(src1, src3, src4); @@ -107,7 +108,7 @@ void jit_avx512_core_amx_copy_kern::amxtrans8(const Ymm &dst1, const Ymm &dst2, vshufi32x4(src4, dst1, dst2, 0x03); } -void jit_avx512_core_amx_copy_kern::amxtrans16( +void jit_avx512_core_amx_copy_kern_t::amxtrans16( const Ymm &dst1, const Ymm &dst2, const Ymm &src1, const Ymm &src2) { vpunpcklwd(dst1, src1, src2); vpunpckhwd(dst2, src1, src2); @@ -117,7 +118,7 @@ void jit_avx512_core_amx_copy_kern::amxtrans16( vshufi32x4(src2, src2, src2, 0xd8); } -void jit_avx512_core_amx_copy_kern::load( +void jit_avx512_core_amx_copy_kern_t::load( const Xmm &dst, const Address &src, bool corner) { if (!corner && isize_ == 1) vmovdqu8(dst, src); @@ -129,14 +130,15 @@ void jit_avx512_core_amx_copy_kern::load( vmovdqu16(dst | k1 | T_z, src); } -void jit_avx512_core_amx_copy_kern::store(const Address &dst, const Xmm &src) { +void jit_avx512_core_amx_copy_kern_t::store( + const Address &dst, const Xmm &src) { if (size_ == 1) vmovdqu8(dst, src); else vmovdqu16(dst, src); } -void jit_avx512_core_amx_copy_kern::kernel_AN( +void jit_avx512_core_amx_copy_kern_t::kernel_AN( int unroll_x, int unroll_y, int step, Reg64 A, Reg64 B, bool corner) { // Transpose data. int u[] = {32, 16, 8, 4}; @@ -170,7 +172,7 @@ void jit_avx512_core_amx_copy_kern::kernel_AN( } } -void jit_avx512_core_amx_copy_kern::kernel_BN( +void jit_avx512_core_amx_copy_kern_t::kernel_BN( int unroll_x, int unroll_y, int step, Reg64 A, Reg64 B, bool corner) { // Store data. for (int i = 0; i < 16; i++) @@ -179,7 +181,7 @@ void jit_avx512_core_amx_copy_kern::kernel_BN( src_[i]); } -void jit_avx512_core_amx_copy_kern::kernel_AT( +void jit_avx512_core_amx_copy_kern_t::kernel_AT( int unroll_x, int unroll_y, int step, Reg64 A, Reg64 B, bool corner) { Ymm v[16]; @@ -258,7 +260,7 @@ void jit_avx512_core_amx_copy_kern::kernel_AT( } } -void jit_avx512_core_amx_copy_kern::kernel_BT( +void jit_avx512_core_amx_copy_kern_t::kernel_BT( int unroll_x, int unroll_y, int step, Reg64 A, Reg64 B, bool corner) { // Transpose data. int u[] = {16, 8, 4, 2, 1}; @@ -297,7 +299,7 @@ void jit_avx512_core_amx_copy_kern::kernel_BT( L(store_end); } -void jit_avx512_core_amx_copy_kern::kernel( +void jit_avx512_core_amx_copy_kern_t::kernel( int unroll_x, int unroll_y, int step, Reg64 A, Reg64 B, bool corner) { // Load matrix. @@ -326,7 +328,7 @@ void jit_avx512_core_amx_copy_kern::kernel( kernel_BT(unroll_x, unroll_y, step, A, B, corner); } -void jit_avx512_core_amx_copy_kern::copy_m(int unroll_m, int unroll_n) { +void jit_avx512_core_amx_copy_kern_t::copy_m(int unroll_m, int unroll_n) { if (is_trans_) { mov(B1_, B_); add(B_, unroll_m * unroll_n * size_); @@ -378,7 +380,7 @@ void jit_avx512_core_amx_copy_kern::copy_m(int unroll_m, int unroll_n) { L_aligned(kernel_tail_end); } -void jit_avx512_core_amx_copy_kern::copy_ns(int unroll_n, Label &epilogue) { +void jit_avx512_core_amx_copy_kern_t::copy_ns(int unroll_n, Label &epilogue) { if (unroll_n > 0) { copy_ns(unroll_n - 1, epilogue); @@ -393,7 +395,7 @@ void jit_avx512_core_amx_copy_kern::copy_ns(int unroll_n, Label &epilogue) { } } -void jit_avx512_core_amx_copy_kern::copy_n(int unroll_n, Label &epilogue) { +void jit_avx512_core_amx_copy_kern_t::copy_n(int unroll_n, Label &epilogue) { Label copy_m_loop, copy_m_end; @@ -422,7 +424,7 @@ void jit_avx512_core_amx_copy_kern::copy_n(int unroll_n, Label &epilogue) { copy_ns(unroll_n - 1, epilogue); } -void jit_avx512_core_amx_copy_kern::generate() { +void jit_avx512_core_amx_copy_kern_t::generate() { // Prologue preamble(); sub(rsp, stack_alloc_size_); @@ -494,9 +496,9 @@ void jit_avx512_core_amx_copy_kern::generate() { postamble(); } -jit_avx512_core_amx_copy_kern::jit_avx512_core_amx_copy_kern( +jit_avx512_core_amx_copy_kern_t::jit_avx512_core_amx_copy_kern_t( bool is_a, bool is_trans, int isize) - : jit_generator(jit_name()) + : jit_generator_t(jit_name()) , is_a_(is_a) , is_trans_(is_trans) , size_(isize) diff --git a/src/cpu/x64/gemm/amx/jit_avx512_core_amx_copy_kern.hpp b/src/cpu/x64/gemm/amx/jit_avx512_core_amx_copy_kern.hpp index 1e51de69e43..db74267baef 100644 --- a/src/cpu/x64/gemm/amx/jit_avx512_core_amx_copy_kern.hpp +++ b/src/cpu/x64/gemm/amx/jit_avx512_core_amx_copy_kern.hpp @@ -24,10 +24,10 @@ namespace impl { namespace cpu { namespace x64 { -class jit_avx512_core_amx_copy_kern : public jit_generator { +class jit_avx512_core_amx_copy_kern_t : public jit_generator_t { public: - jit_avx512_core_amx_copy_kern(bool is_a, bool is_trans, int isize); - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_amx_copy_kern); + jit_avx512_core_amx_copy_kern_t(bool is_a, bool is_trans, int isize); + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_amx_copy_kern_t); protected: bool is_a_; diff --git a/src/cpu/x64/gemm/amx/jit_avx512_core_amx_gemm_kern.cpp b/src/cpu/x64/gemm/amx/jit_avx512_core_amx_gemm_kern.cpp index f9005d6ea6e..c92560cb70c 100644 --- a/src/cpu/x64/gemm/amx/jit_avx512_core_amx_gemm_kern.cpp +++ b/src/cpu/x64/gemm/amx/jit_avx512_core_amx_gemm_kern.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -59,7 +59,7 @@ namespace x64 { #define TILED(X) dword[rsp + ((X) + 0xc0)] #define TILEQ(X) qword[rsp + ((X) + 0xc0)] -void jit_avx512_core_amx_gemm_kern::generate() { +void jit_avx512_core_amx_gemm_kern_t::generate() { int kerneltype = ((typea << 1) | typeb); @@ -455,9 +455,9 @@ void jit_avx512_core_amx_gemm_kern::generate() { ret(); } -jit_avx512_core_amx_gemm_kern::jit_avx512_core_amx_gemm_kern( +jit_avx512_core_amx_gemm_kern_t::jit_avx512_core_amx_gemm_kern_t( int typea, int typeb, int typec, int betaZero) - : jit_generator(jit_name(), avx512_core_amx) + : jit_generator_t(jit_name(), avx512_core_amx) , typea(typea) , typeb(typeb) , typec(typec) diff --git a/src/cpu/x64/gemm/amx/jit_avx512_core_amx_gemm_kern.hpp b/src/cpu/x64/gemm/amx/jit_avx512_core_amx_gemm_kern.hpp index 5df4ac140c9..fab208e61cf 100644 --- a/src/cpu/x64/gemm/amx/jit_avx512_core_amx_gemm_kern.hpp +++ b/src/cpu/x64/gemm/amx/jit_avx512_core_amx_gemm_kern.hpp @@ -24,11 +24,11 @@ namespace impl { namespace cpu { namespace x64 { -class jit_avx512_core_amx_gemm_kern : public jit_generator { +class jit_avx512_core_amx_gemm_kern_t : public jit_generator_t { public: - jit_avx512_core_amx_gemm_kern( + jit_avx512_core_amx_gemm_kern_t( int typea, int typeb, int typec, int betaZero); - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_amx_gemm_kern); + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_amx_gemm_kern_t); protected: void generate() override; diff --git a/src/cpu/x64/gemm/bf16/common_s16.hpp b/src/cpu/x64/gemm/bf16/common_s16.hpp index 5a9d221b48f..c61e44190eb 100644 --- a/src/cpu/x64/gemm/bf16/common_s16.hpp +++ b/src/cpu/x64/gemm/bf16/common_s16.hpp @@ -24,68 +24,68 @@ namespace impl { namespace cpu { namespace x64 { -class jit_avx512_core_s16_48x8_copy_an_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_48x8_copy_an_kern); +class jit_avx512_core_s16_48x8_copy_an_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_48x8_copy_an_kern_t); void generate() override; public: - jit_avx512_core_s16_48x8_copy_an_kern(); + jit_avx512_core_s16_48x8_copy_an_kern_t(); }; -class jit_avx512_core_s16_48x8_copy_at_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_48x8_copy_at_kern); +class jit_avx512_core_s16_48x8_copy_at_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_48x8_copy_at_kern_t); void generate() override; public: - jit_avx512_core_s16_48x8_copy_at_kern(); + jit_avx512_core_s16_48x8_copy_at_kern_t(); }; -class jit_avx512_core_s16_48x8_copy_bn_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_48x8_copy_bn_kern); +class jit_avx512_core_s16_48x8_copy_bn_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_48x8_copy_bn_kern_t); void generate() override; public: - jit_avx512_core_s16_48x8_copy_bn_kern(); + jit_avx512_core_s16_48x8_copy_bn_kern_t(); }; -class jit_avx512_core_s16_48x8_copy_bt_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_48x8_copy_bt_kern); +class jit_avx512_core_s16_48x8_copy_bt_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_48x8_copy_bt_kern_t); void generate() override; public: - jit_avx512_core_s16_48x8_copy_bt_kern(); + jit_avx512_core_s16_48x8_copy_bt_kern_t(); }; -class jit_avx512_core_s16_24x8_copy_an_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_24x8_copy_an_kern); +class jit_avx512_core_s16_24x8_copy_an_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_24x8_copy_an_kern_t); void generate() override; public: - jit_avx512_core_s16_24x8_copy_an_kern(); + jit_avx512_core_s16_24x8_copy_an_kern_t(); }; -class jit_avx512_core_s16_24x8_copy_at_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_24x8_copy_at_kern); +class jit_avx512_core_s16_24x8_copy_at_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_24x8_copy_at_kern_t); void generate() override; public: - jit_avx512_core_s16_24x8_copy_at_kern(); + jit_avx512_core_s16_24x8_copy_at_kern_t(); }; -class jit_avx512_core_s16_24x8_copy_bn_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_24x8_copy_bn_kern); +class jit_avx512_core_s16_24x8_copy_bn_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_24x8_copy_bn_kern_t); void generate() override; public: - jit_avx512_core_s16_24x8_copy_bn_kern(); + jit_avx512_core_s16_24x8_copy_bn_kern_t(); }; -class jit_avx512_core_s16_24x8_copy_bt_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_24x8_copy_bt_kern); +class jit_avx512_core_s16_24x8_copy_bt_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_24x8_copy_bt_kern_t); void generate() override; public: - jit_avx512_core_s16_24x8_copy_bt_kern(); + jit_avx512_core_s16_24x8_copy_bt_kern_t(); }; } // namespace x64 diff --git a/src/cpu/x64/gemm/bf16/jit_avx512_core_gemm_bf16bf16f32_kern.cpp b/src/cpu/x64/gemm/bf16/jit_avx512_core_gemm_bf16bf16f32_kern.cpp index 069dae9d902..17f1a27c19d 100644 --- a/src/cpu/x64/gemm/bf16/jit_avx512_core_gemm_bf16bf16f32_kern.cpp +++ b/src/cpu/x64/gemm/bf16/jit_avx512_core_gemm_bf16bf16f32_kern.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -46,7 +46,7 @@ static inline Zmm make_zmm(const Xmm &v) { } // Load from or store to C. -void jit_avx512_core_gemm_bf16bf16f32_kern::c_load( +void jit_avx512_core_gemm_bf16bf16f32_kern_t::c_load( const Xbyak::Xmm &dst, const Xbyak::Address &src, int nelems) { switch (nelems) { case 1: vmovss(make_xmm(dst), src); break; @@ -60,7 +60,7 @@ void jit_avx512_core_gemm_bf16bf16f32_kern::c_load( } } -void jit_avx512_core_gemm_bf16bf16f32_kern::c_store( +void jit_avx512_core_gemm_bf16bf16f32_kern_t::c_store( const Xbyak::Address &dst, const Xbyak::Xmm &src, int nelems) { switch (nelems) { case 1: vmovss(dst, make_xmm(src)); break; @@ -76,7 +76,7 @@ void jit_avx512_core_gemm_bf16bf16f32_kern::c_store( // Perform length-2 dot product accumulations of bfloat16 in parallel. // Use vdpbf16ps if available, otherwise emulate. -void jit_avx512_core_gemm_bf16bf16f32_kern::dot_product( +void jit_avx512_core_gemm_bf16bf16f32_kern_t::dot_product( const Xmm &dst, const Xmm &src1, const Xmm &src2) { if (bfloat16_) vdpbf16ps(dst, src1, src2); @@ -85,7 +85,7 @@ void jit_avx512_core_gemm_bf16bf16f32_kern::dot_product( } // Inner kernel. -void jit_avx512_core_gemm_bf16bf16f32_kern::kernel_loop( +void jit_avx512_core_gemm_bf16bf16f32_kern_t::kernel_loop( int unroll_m, int unroll_n, bool cfetch) { int um_vecs = utils::div_up(unroll_m, c_nelems_); Label label_kernel_loop; @@ -147,7 +147,7 @@ void jit_avx512_core_gemm_bf16bf16f32_kern::kernel_loop( } // k remainder loop for kernel. -void jit_avx512_core_gemm_bf16bf16f32_kern::remainder_kernel( +void jit_avx512_core_gemm_bf16bf16f32_kern_t::remainder_kernel( int unroll_m, int unroll_n, int unroll_k, int bwidth) { int um_vecs = utils::div_up(unroll_m, c_nelems_); @@ -181,7 +181,7 @@ void jit_avx512_core_gemm_bf16bf16f32_kern::remainder_kernel( } // Inner loop. -void jit_avx512_core_gemm_bf16bf16f32_kern::innerloop( +void jit_avx512_core_gemm_bf16bf16f32_kern_t::innerloop( int unroll_m, int unroll_n) { int um_vecs = utils::div_up(unroll_m, c_nelems_); int stage1 = unroll_n, stage2 = unroll_n; @@ -311,7 +311,7 @@ void jit_avx512_core_gemm_bf16bf16f32_kern::innerloop( } // Outer loop. -void jit_avx512_core_gemm_bf16bf16f32_kern::outerloop( +void jit_avx512_core_gemm_bf16bf16f32_kern_t::outerloop( int unroll_x, int unroll_y, Label *&cur_outerloop_label) { Label label_m_loop, label_n_loop, label_n_remainder_loops[6]; @@ -375,7 +375,7 @@ void jit_avx512_core_gemm_bf16bf16f32_kern::outerloop( align(16); } -void jit_avx512_core_gemm_bf16bf16f32_kern::generate() { +void jit_avx512_core_gemm_bf16bf16f32_kern_t::generate() { // Prologue preamble(); sub(rsp, stack_alloc_size_); @@ -423,9 +423,10 @@ void jit_avx512_core_gemm_bf16bf16f32_kern::generate() { postamble(); } -jit_avx512_core_gemm_bf16bf16f32_kern::jit_avx512_core_gemm_bf16bf16f32_kern( - bool beta_zero, bool alpha_one, bool use_zmm) - : jit_generator(jit_name()) +jit_avx512_core_gemm_bf16bf16f32_kern_t:: + jit_avx512_core_gemm_bf16bf16f32_kern_t( + bool beta_zero, bool alpha_one, bool use_zmm) + : jit_generator_t(jit_name()) , beta_zero_(beta_zero) , alpha_one_(alpha_one) , bfloat16_(mayiuse(avx512_core_bf16)) @@ -507,7 +508,8 @@ jit_avx512_core_gemm_bf16bf16f32_kern::jit_avx512_core_gemm_bf16bf16f32_kern( this, one_, even_, selector_, scratch_, zmm_tmp0_, zmm_tmp1_); } -jit_avx512_core_gemm_bf16bf16f32_kern::~jit_avx512_core_gemm_bf16bf16f32_kern() +jit_avx512_core_gemm_bf16bf16f32_kern_t:: + ~jit_avx512_core_gemm_bf16bf16f32_kern_t() = default; } // namespace x64 } // namespace cpu diff --git a/src/cpu/x64/gemm/bf16/jit_avx512_core_gemm_bf16bf16f32_kern.hpp b/src/cpu/x64/gemm/bf16/jit_avx512_core_gemm_bf16bf16f32_kern.hpp index 076b49c0501..bc176fa9467 100644 --- a/src/cpu/x64/gemm/bf16/jit_avx512_core_gemm_bf16bf16f32_kern.hpp +++ b/src/cpu/x64/gemm/bf16/jit_avx512_core_gemm_bf16bf16f32_kern.hpp @@ -25,12 +25,12 @@ namespace impl { namespace cpu { namespace x64 { -class jit_avx512_core_gemm_bf16bf16f32_kern : public jit_generator { +class jit_avx512_core_gemm_bf16bf16f32_kern_t : public jit_generator_t { public: - jit_avx512_core_gemm_bf16bf16f32_kern( + jit_avx512_core_gemm_bf16bf16f32_kern_t( bool beta_zero, bool alpha_one, bool use_zmm); - ~jit_avx512_core_gemm_bf16bf16f32_kern(); - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_gemm_bf16bf16f32_kern); + ~jit_avx512_core_gemm_bf16bf16f32_kern_t() override; + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_gemm_bf16bf16f32_kern_t); protected: bool beta_zero_; @@ -98,7 +98,7 @@ class jit_avx512_core_gemm_bf16bf16f32_kern : public jit_generator { Xbyak::Zmm zmm_tmp0_; Xbyak::Zmm zmm_tmp1_; - DNNL_DISALLOW_COPY_AND_ASSIGN(jit_avx512_core_gemm_bf16bf16f32_kern); + DNNL_DISALLOW_COPY_AND_ASSIGN(jit_avx512_core_gemm_bf16bf16f32_kern_t); }; } // namespace x64 diff --git a/src/cpu/x64/gemm/bf16/jit_avx512_core_gemv_bf16bf16f32_kern.cpp b/src/cpu/x64/gemm/bf16/jit_avx512_core_gemv_bf16bf16f32_kern.cpp index 4d77805f9ff..42b0430e9a1 100644 --- a/src/cpu/x64/gemm/bf16/jit_avx512_core_gemv_bf16bf16f32_kern.cpp +++ b/src/cpu/x64/gemm/bf16/jit_avx512_core_gemv_bf16bf16f32_kern.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,7 +49,7 @@ static inline Zmm make_zmm(const Xmm &v) { // Perform length-2 dot product accumulations of bfloat16 in parallel. // Use vdpbf16ps if available, otherwise emulate. -void jit_avx512_core_gemv_bf16bf16f32_kern::dot_product( +void jit_avx512_core_gemv_bf16bf16f32_kern_t::dot_product( const Xmm &dst, const Xmm &src1, const Xmm &src2) { if (bfloat16_) vdpbf16ps(dst, src1, src2); @@ -58,7 +58,7 @@ void jit_avx512_core_gemv_bf16bf16f32_kern::dot_product( } // Vector load for 16-bit values. -void jit_avx512_core_gemv_bf16bf16f32_kern::v_load( +void jit_avx512_core_gemv_bf16bf16f32_kern_t::v_load( const Xbyak::Xmm &dst, const Xbyak::Address &src, int nelems) { if (nelems >= 32) vmovdqu16(dst, src); @@ -82,7 +82,7 @@ void jit_avx512_core_gemv_bf16bf16f32_kern::v_load( vmovdqu16(make_xmm(dst) | k1 | T_z, src); } -void jit_avx512_core_gemv_bf16bf16f32_kern::y_load( +void jit_avx512_core_gemv_bf16bf16f32_kern_t::y_load( const Xbyak::Xmm &dst, const Xbyak::Address &src, int nelems) { if (nelems >= 16) vmovups(dst, src); @@ -102,7 +102,7 @@ void jit_avx512_core_gemv_bf16bf16f32_kern::y_load( vmovss(make_xmm(dst), src); } -void jit_avx512_core_gemv_bf16bf16f32_kern::y_store( +void jit_avx512_core_gemv_bf16bf16f32_kern_t::y_store( const Xbyak::Address &dst, const Xbyak::Xmm &src, int nelems) { if (nelems >= 16) vmovups(dst, src); @@ -122,7 +122,7 @@ void jit_avx512_core_gemv_bf16bf16f32_kern::y_store( vmovss(dst, make_xmm(src)); } -void jit_avx512_core_gemv_bf16bf16f32_kern::kernel_loop_n( +void jit_avx512_core_gemv_bf16bf16f32_kern_t::kernel_loop_n( int unroll_m, int unroll_n, bool fetch, bool last) { int zmm_vecs = utils::div_up(unroll_m, 32); @@ -203,7 +203,7 @@ void jit_avx512_core_gemv_bf16bf16f32_kern::kernel_loop_n( } // Inner loop for A non-transposed. -void jit_avx512_core_gemv_bf16bf16f32_kern::innerloop_n(int unroll_n) { +void jit_avx512_core_gemv_bf16bf16f32_kern_t::innerloop_n(int unroll_n) { mov(A1_, A_); if (unroll_n > 4) { lea(A2_, ptr[A1_ + LDA_ * 4]); @@ -283,7 +283,7 @@ void jit_avx512_core_gemv_bf16bf16f32_kern::innerloop_n(int unroll_n) { L_aligned(label_m_tail_end); } -void jit_avx512_core_gemv_bf16bf16f32_kern::kernel_loop_t( +void jit_avx512_core_gemv_bf16bf16f32_kern_t::kernel_loop_t( int unroll_m, int unroll_n, bool fetch, bool last) { // Load x. @@ -312,7 +312,7 @@ void jit_avx512_core_gemv_bf16bf16f32_kern::kernel_loop_t( } // Inner loop for A transposed. -void jit_avx512_core_gemv_bf16bf16f32_kern::innerloop_t(int unroll_n) { +void jit_avx512_core_gemv_bf16bf16f32_kern_t::innerloop_t(int unroll_n) { mov(A1_, A_); if (unroll_n > 4) { lea(A2_, ptr[A1_ + LDA_ * 4]); @@ -431,7 +431,7 @@ void jit_avx512_core_gemv_bf16bf16f32_kern::innerloop_t(int unroll_n) { } // Outer loop. -void jit_avx512_core_gemv_bf16bf16f32_kern::outerloop(int unroll_y, +void jit_avx512_core_gemv_bf16bf16f32_kern_t::outerloop(int unroll_y, Label *&cur_outerloop_label, Label *&outerloop_end_label) { bool is_tail = unroll_y < UNROLL_N_; @@ -464,7 +464,7 @@ void jit_avx512_core_gemv_bf16bf16f32_kern::outerloop(int unroll_y, } } -void jit_avx512_core_gemv_bf16bf16f32_kern::generate() { +void jit_avx512_core_gemv_bf16bf16f32_kern_t::generate() { // Prologue preamble(); @@ -513,9 +513,9 @@ void jit_avx512_core_gemv_bf16bf16f32_kern::generate() { } // Function signature: gemv(*m, *n, *alpha, *a, *lda, *x, *incx, *y, *incy) -jit_avx512_core_gemv_bf16bf16f32_kern::jit_avx512_core_gemv_bf16bf16f32_kern( - bool trans) - : jit_generator(jit_name()) +jit_avx512_core_gemv_bf16bf16f32_kern_t:: + jit_avx512_core_gemv_bf16bf16f32_kern_t(bool trans) + : jit_generator_t(jit_name()) , trans_(trans) , bfloat16_(mayiuse(avx512_core_bf16)) , arg_lda_(0) @@ -605,8 +605,8 @@ jit_avx512_core_gemv_bf16bf16f32_kern::jit_avx512_core_gemv_bf16bf16f32_kern( this, one_, even_, selector_, gpr_, zmm_tmp0_, zmm_tmp1_); } -jit_avx512_core_gemv_bf16bf16f32_kern:: - ~jit_avx512_core_gemv_bf16bf16f32_kern() { +jit_avx512_core_gemv_bf16bf16f32_kern_t:: + ~jit_avx512_core_gemv_bf16bf16f32_kern_t() { delete bf16_emu_; } diff --git a/src/cpu/x64/gemm/bf16/jit_avx512_core_gemv_bf16bf16f32_kern.hpp b/src/cpu/x64/gemm/bf16/jit_avx512_core_gemv_bf16bf16f32_kern.hpp index e370d95f683..c108d6afc83 100644 --- a/src/cpu/x64/gemm/bf16/jit_avx512_core_gemv_bf16bf16f32_kern.hpp +++ b/src/cpu/x64/gemm/bf16/jit_avx512_core_gemv_bf16bf16f32_kern.hpp @@ -25,11 +25,11 @@ namespace impl { namespace cpu { namespace x64 { -class jit_avx512_core_gemv_bf16bf16f32_kern : public jit_generator { +class jit_avx512_core_gemv_bf16bf16f32_kern_t : public jit_generator_t { public: - jit_avx512_core_gemv_bf16bf16f32_kern(bool trans); - ~jit_avx512_core_gemv_bf16bf16f32_kern(); - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_gemv_bf16bf16f32_kern); + jit_avx512_core_gemv_bf16bf16f32_kern_t(bool trans); + ~jit_avx512_core_gemv_bf16bf16f32_kern_t() override; + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_gemv_bf16bf16f32_kern_t); protected: bool trans_; diff --git a/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_24x8_copy_an_kern_autogen.cpp b/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_24x8_copy_an_kern_autogen.cpp index 22f089dc8b0..491a2a51c52 100644 --- a/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_24x8_copy_an_kern_autogen.cpp +++ b/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_24x8_copy_an_kern_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,10 +23,11 @@ namespace impl { namespace cpu { namespace x64 { -jit_avx512_core_s16_24x8_copy_an_kern::jit_avx512_core_s16_24x8_copy_an_kern() - : jit_generator(jit_name()) {} +jit_avx512_core_s16_24x8_copy_an_kern_t:: + jit_avx512_core_s16_24x8_copy_an_kern_t() + : jit_generator_t(jit_name()) {} -void jit_avx512_core_s16_24x8_copy_an_kern::generate() { +void jit_avx512_core_s16_24x8_copy_an_kern_t::generate() { #ifndef _WIN32 #define M rdi diff --git a/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_24x8_copy_at_kern_autogen.cpp b/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_24x8_copy_at_kern_autogen.cpp index 9a6032745f7..69f0d00e129 100644 --- a/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_24x8_copy_at_kern_autogen.cpp +++ b/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_24x8_copy_at_kern_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,10 +23,11 @@ namespace impl { namespace cpu { namespace x64 { -jit_avx512_core_s16_24x8_copy_at_kern::jit_avx512_core_s16_24x8_copy_at_kern() - : jit_generator(jit_name()) {} +jit_avx512_core_s16_24x8_copy_at_kern_t:: + jit_avx512_core_s16_24x8_copy_at_kern_t() + : jit_generator_t(jit_name()) {} -void jit_avx512_core_s16_24x8_copy_at_kern::generate() { +void jit_avx512_core_s16_24x8_copy_at_kern_t::generate() { #ifndef _WIN32 #define M rdi diff --git a/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_24x8_copy_bn_kern_autogen.cpp b/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_24x8_copy_bn_kern_autogen.cpp index be61df11e29..01db091bf68 100644 --- a/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_24x8_copy_bn_kern_autogen.cpp +++ b/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_24x8_copy_bn_kern_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,10 +23,11 @@ namespace impl { namespace cpu { namespace x64 { -jit_avx512_core_s16_24x8_copy_bn_kern::jit_avx512_core_s16_24x8_copy_bn_kern() - : jit_generator(jit_name()) {} +jit_avx512_core_s16_24x8_copy_bn_kern_t:: + jit_avx512_core_s16_24x8_copy_bn_kern_t() + : jit_generator_t(jit_name()) {} -void jit_avx512_core_s16_24x8_copy_bn_kern::generate() { +void jit_avx512_core_s16_24x8_copy_bn_kern_t::generate() { #ifndef _WIN32 #define M rdi diff --git a/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_24x8_copy_bt_kern_autogen.cpp b/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_24x8_copy_bt_kern_autogen.cpp index cd62ed88dbd..5164dff7cb8 100644 --- a/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_24x8_copy_bt_kern_autogen.cpp +++ b/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_24x8_copy_bt_kern_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,10 +23,11 @@ namespace impl { namespace cpu { namespace x64 { -jit_avx512_core_s16_24x8_copy_bt_kern::jit_avx512_core_s16_24x8_copy_bt_kern() - : jit_generator(jit_name()) {} +jit_avx512_core_s16_24x8_copy_bt_kern_t:: + jit_avx512_core_s16_24x8_copy_bt_kern_t() + : jit_generator_t(jit_name()) {} -void jit_avx512_core_s16_24x8_copy_bt_kern::generate() { +void jit_avx512_core_s16_24x8_copy_bt_kern_t::generate() { #ifndef _WIN32 #define M rdi diff --git a/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_48x8_copy_an_kern_autogen.cpp b/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_48x8_copy_an_kern_autogen.cpp index 3a936e6a280..c6d3c901c04 100644 --- a/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_48x8_copy_an_kern_autogen.cpp +++ b/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_48x8_copy_an_kern_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,10 +23,11 @@ namespace impl { namespace cpu { namespace x64 { -jit_avx512_core_s16_48x8_copy_an_kern::jit_avx512_core_s16_48x8_copy_an_kern() - : jit_generator(jit_name()) {} +jit_avx512_core_s16_48x8_copy_an_kern_t:: + jit_avx512_core_s16_48x8_copy_an_kern_t() + : jit_generator_t(jit_name()) {} -void jit_avx512_core_s16_48x8_copy_an_kern::generate() { +void jit_avx512_core_s16_48x8_copy_an_kern_t::generate() { #ifndef _WIN32 #define M rdi diff --git a/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_48x8_copy_at_kern_autogen.cpp b/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_48x8_copy_at_kern_autogen.cpp index ced7abdd837..815d72b437b 100644 --- a/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_48x8_copy_at_kern_autogen.cpp +++ b/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_48x8_copy_at_kern_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,10 +23,11 @@ namespace impl { namespace cpu { namespace x64 { -jit_avx512_core_s16_48x8_copy_at_kern::jit_avx512_core_s16_48x8_copy_at_kern() - : jit_generator(jit_name()) {} +jit_avx512_core_s16_48x8_copy_at_kern_t:: + jit_avx512_core_s16_48x8_copy_at_kern_t() + : jit_generator_t(jit_name()) {} -void jit_avx512_core_s16_48x8_copy_at_kern::generate() { +void jit_avx512_core_s16_48x8_copy_at_kern_t::generate() { #ifndef _WIN32 #define M rdi diff --git a/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_48x8_copy_bn_kern_autogen.cpp b/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_48x8_copy_bn_kern_autogen.cpp index 196039ad816..da6d516438d 100644 --- a/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_48x8_copy_bn_kern_autogen.cpp +++ b/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_48x8_copy_bn_kern_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,10 +23,11 @@ namespace impl { namespace cpu { namespace x64 { -jit_avx512_core_s16_48x8_copy_bn_kern::jit_avx512_core_s16_48x8_copy_bn_kern() - : jit_generator(jit_name()) {} +jit_avx512_core_s16_48x8_copy_bn_kern_t:: + jit_avx512_core_s16_48x8_copy_bn_kern_t() + : jit_generator_t(jit_name()) {} -void jit_avx512_core_s16_48x8_copy_bn_kern::generate() { +void jit_avx512_core_s16_48x8_copy_bn_kern_t::generate() { #ifndef _WIN32 #define M rdi diff --git a/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_48x8_copy_bt_kern_autogen.cpp b/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_48x8_copy_bt_kern_autogen.cpp index d448a2e121a..2f5918a5748 100644 --- a/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_48x8_copy_bt_kern_autogen.cpp +++ b/src/cpu/x64/gemm/bf16/jit_avx512_core_s16_48x8_copy_bt_kern_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,10 +23,11 @@ namespace impl { namespace cpu { namespace x64 { -jit_avx512_core_s16_48x8_copy_bt_kern::jit_avx512_core_s16_48x8_copy_bt_kern() - : jit_generator(jit_name()) {} +jit_avx512_core_s16_48x8_copy_bt_kern_t:: + jit_avx512_core_s16_48x8_copy_bt_kern_t() + : jit_generator_t(jit_name()) {} -void jit_avx512_core_s16_48x8_copy_bt_kern::generate() { +void jit_avx512_core_s16_48x8_copy_bt_kern_t::generate() { #ifndef _WIN32 #define M rdi diff --git a/src/cpu/x64/gemm/f32/common_f32.hpp b/src/cpu/x64/gemm/f32/common_f32.hpp index 03e07360f89..ed632c06c06 100644 --- a/src/cpu/x64/gemm/f32/common_f32.hpp +++ b/src/cpu/x64/gemm/f32/common_f32.hpp @@ -24,173 +24,173 @@ namespace impl { namespace cpu { namespace x64 { -class jit_avx512_core_f32_copy_an_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_f32_copy_an_kern); +class jit_avx512_core_f32_copy_an_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_f32_copy_an_kern_t); void generate() override; public: - jit_avx512_core_f32_copy_an_kern(); + jit_avx512_core_f32_copy_an_kern_t(); }; -class jit_avx512_core_f32_copy_at_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_f32_copy_at_kern); +class jit_avx512_core_f32_copy_at_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_f32_copy_at_kern_t); void generate() override; void generate_part1(const Xbyak::Label &, const Xbyak::Label &, const Xbyak::Label &, const Xbyak::Label &); void generate_part2(Xbyak::Label, Xbyak::Label, Xbyak::Label, Xbyak::Label); public: - jit_avx512_core_f32_copy_at_kern(); + jit_avx512_core_f32_copy_at_kern_t(); }; -class jit_avx512_core_f32_copy_bn_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_f32_copy_bn_kern); +class jit_avx512_core_f32_copy_bn_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_f32_copy_bn_kern_t); void generate() override; public: - jit_avx512_core_f32_copy_bn_kern(); + jit_avx512_core_f32_copy_bn_kern_t(); }; -class jit_avx512_core_f32_copy_bt_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_f32_copy_bt_kern); +class jit_avx512_core_f32_copy_bt_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_f32_copy_bt_kern_t); void generate() override; public: - jit_avx512_core_f32_copy_bt_kern(); + jit_avx512_core_f32_copy_bt_kern_t(); }; -class jit_avx2_f32_copy_an_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_f32_copy_an_kern); +class jit_avx2_f32_copy_an_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_f32_copy_an_kern_t); void generate() override; public: - jit_avx2_f32_copy_an_kern(); + jit_avx2_f32_copy_an_kern_t(); }; -class jit_avx2_f32_copy_at_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_f32_copy_at_kern); +class jit_avx2_f32_copy_at_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_f32_copy_at_kern_t); void generate() override; public: - jit_avx2_f32_copy_at_kern(); + jit_avx2_f32_copy_at_kern_t(); }; -class jit_avx2_f32_copy_bn_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_f32_copy_bn_kern); +class jit_avx2_f32_copy_bn_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_f32_copy_bn_kern_t); void generate() override; public: - jit_avx2_f32_copy_bn_kern(); + jit_avx2_f32_copy_bn_kern_t(); }; -class jit_avx2_f32_copy_bt_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_f32_copy_bt_kern); +class jit_avx2_f32_copy_bt_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_f32_copy_bt_kern_t); void generate() override; public: - jit_avx2_f32_copy_bt_kern(); + jit_avx2_f32_copy_bt_kern_t(); }; -class jit_avx_f32_copy_an_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_f32_copy_an_kern); +class jit_avx_f32_copy_an_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_f32_copy_an_kern_t); void generate() override; public: - jit_avx_f32_copy_an_kern(); + jit_avx_f32_copy_an_kern_t(); }; -class jit_avx_f32_copy_at_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_f32_copy_at_kern); +class jit_avx_f32_copy_at_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_f32_copy_at_kern_t); void generate() override; public: - jit_avx_f32_copy_at_kern(); + jit_avx_f32_copy_at_kern_t(); }; -class jit_avx_f32_copy_bn_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_f32_copy_bn_kern); +class jit_avx_f32_copy_bn_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_f32_copy_bn_kern_t); void generate() override; public: - jit_avx_f32_copy_bn_kern(); + jit_avx_f32_copy_bn_kern_t(); }; -class jit_avx_f32_copy_bt_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_f32_copy_bt_kern); +class jit_avx_f32_copy_bt_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_f32_copy_bt_kern_t); void generate() override; public: - jit_avx_f32_copy_bt_kern(); + jit_avx_f32_copy_bt_kern_t(); }; -class jit_avx_kernel_b0_sgemm_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_b0_sgemm_kern); +class jit_avx_kernel_b0_sgemm_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_b0_sgemm_kern_t); void generate() override; void generate_part1(const Xbyak::Label &, const Xbyak::Label &, const Xbyak::Label &, const Xbyak::Label &); void generate_part2(Xbyak::Label, Xbyak::Label, Xbyak::Label, Xbyak::Label); public: - jit_avx_kernel_b0_sgemm_kern(); + jit_avx_kernel_b0_sgemm_kern_t(); }; -class jit_avx_kernel_sgemm_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_sgemm_kern); +class jit_avx_kernel_sgemm_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_sgemm_kern_t); void generate() override; void generate_part1( const Xbyak::Label &, const Xbyak::Label &, const Xbyak::Label &); void generate_part2(Xbyak::Label &, Xbyak::Label &, Xbyak::Label &); public: - jit_avx_kernel_sgemm_kern(); + jit_avx_kernel_sgemm_kern_t(); }; -class jit_sse41_f32_copy_an_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_f32_copy_an_kern); +class jit_sse41_f32_copy_an_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_f32_copy_an_kern_t); void generate() override; public: - jit_sse41_f32_copy_an_kern(); + jit_sse41_f32_copy_an_kern_t(); }; -class jit_sse41_f32_copy_at_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_f32_copy_at_kern); +class jit_sse41_f32_copy_at_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_f32_copy_at_kern_t); void generate() override; public: - jit_sse41_f32_copy_at_kern(); + jit_sse41_f32_copy_at_kern_t(); }; -class jit_sse41_f32_copy_bn_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_f32_copy_bn_kern); +class jit_sse41_f32_copy_bn_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_f32_copy_bn_kern_t); void generate() override; public: - jit_sse41_f32_copy_bn_kern(); + jit_sse41_f32_copy_bn_kern_t(); }; -class jit_sse41_f32_copy_bt_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_f32_copy_bt_kern); +class jit_sse41_f32_copy_bt_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_f32_copy_bt_kern_t); void generate() override; public: - jit_sse41_f32_copy_bt_kern(); + jit_sse41_f32_copy_bt_kern_t(); }; -class jit_sse41_kernel_b0_sgemm_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_b0_sgemm_kern); +class jit_sse41_kernel_b0_sgemm_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_b0_sgemm_kern_t); void generate() override; public: - jit_sse41_kernel_b0_sgemm_kern(); + jit_sse41_kernel_b0_sgemm_kern_t(); }; -class jit_sse41_kernel_sgemm_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_sgemm_kern); +class jit_sse41_kernel_sgemm_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_sgemm_kern_t); void generate() override; public: - jit_sse41_kernel_sgemm_kern(); + jit_sse41_kernel_sgemm_kern_t(); }; } // namespace x64 diff --git a/src/cpu/x64/gemm/f32/jit_avx2_f32_copy_an_kern_autogen.cpp b/src/cpu/x64/gemm/f32/jit_avx2_f32_copy_an_kern_autogen.cpp index 3b14fe68440..ba136908bfa 100644 --- a/src/cpu/x64/gemm/f32/jit_avx2_f32_copy_an_kern_autogen.cpp +++ b/src/cpu/x64/gemm/f32/jit_avx2_f32_copy_an_kern_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,10 +23,10 @@ namespace impl { namespace cpu { namespace x64 { -jit_avx2_f32_copy_an_kern::jit_avx2_f32_copy_an_kern() - : jit_generator(jit_name()) {} +jit_avx2_f32_copy_an_kern_t::jit_avx2_f32_copy_an_kern_t() + : jit_generator_t(jit_name()) {} -void jit_avx2_f32_copy_an_kern::generate() { +void jit_avx2_f32_copy_an_kern_t::generate() { #ifndef _WIN32 #define M rdi diff --git a/src/cpu/x64/gemm/f32/jit_avx2_f32_copy_at_kern_autogen.cpp b/src/cpu/x64/gemm/f32/jit_avx2_f32_copy_at_kern_autogen.cpp index 8f9205dfca5..daa3ece4b9c 100644 --- a/src/cpu/x64/gemm/f32/jit_avx2_f32_copy_at_kern_autogen.cpp +++ b/src/cpu/x64/gemm/f32/jit_avx2_f32_copy_at_kern_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,10 +23,10 @@ namespace impl { namespace cpu { namespace x64 { -jit_avx2_f32_copy_at_kern::jit_avx2_f32_copy_at_kern() - : jit_generator(jit_name()) {} +jit_avx2_f32_copy_at_kern_t::jit_avx2_f32_copy_at_kern_t() + : jit_generator_t(jit_name()) {} -void jit_avx2_f32_copy_at_kern::generate() { +void jit_avx2_f32_copy_at_kern_t::generate() { #ifndef _WIN32 #define M rdi diff --git a/src/cpu/x64/gemm/f32/jit_avx2_f32_copy_bn_kern_autogen.cpp b/src/cpu/x64/gemm/f32/jit_avx2_f32_copy_bn_kern_autogen.cpp index 1b086a5e4de..f3e17a76a87 100644 --- a/src/cpu/x64/gemm/f32/jit_avx2_f32_copy_bn_kern_autogen.cpp +++ b/src/cpu/x64/gemm/f32/jit_avx2_f32_copy_bn_kern_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,10 +23,10 @@ namespace impl { namespace cpu { namespace x64 { -jit_avx2_f32_copy_bn_kern::jit_avx2_f32_copy_bn_kern() - : jit_generator(jit_name()) {} +jit_avx2_f32_copy_bn_kern_t::jit_avx2_f32_copy_bn_kern_t() + : jit_generator_t(jit_name()) {} -void jit_avx2_f32_copy_bn_kern::generate() { +void jit_avx2_f32_copy_bn_kern_t::generate() { #ifndef _WIN32 #define M rdi diff --git a/src/cpu/x64/gemm/f32/jit_avx2_f32_copy_bt_kern_autogen.cpp b/src/cpu/x64/gemm/f32/jit_avx2_f32_copy_bt_kern_autogen.cpp index 9fd7218234b..461d24d51e4 100644 --- a/src/cpu/x64/gemm/f32/jit_avx2_f32_copy_bt_kern_autogen.cpp +++ b/src/cpu/x64/gemm/f32/jit_avx2_f32_copy_bt_kern_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,10 +23,10 @@ namespace impl { namespace cpu { namespace x64 { -jit_avx2_f32_copy_bt_kern::jit_avx2_f32_copy_bt_kern() - : jit_generator(jit_name()) {} +jit_avx2_f32_copy_bt_kern_t::jit_avx2_f32_copy_bt_kern_t() + : jit_generator_t(jit_name()) {} -void jit_avx2_f32_copy_bt_kern::generate() { +void jit_avx2_f32_copy_bt_kern_t::generate() { #ifndef _WIN32 #define M rdi diff --git a/src/cpu/x64/gemm/f32/jit_avx2_kernel_sgemm_kern.cpp b/src/cpu/x64/gemm/f32/jit_avx2_kernel_sgemm_kern.cpp index d0fb52fa6c3..0a8dd0ddbaf 100644 --- a/src/cpu/x64/gemm/f32/jit_avx2_kernel_sgemm_kern.cpp +++ b/src/cpu/x64/gemm/f32/jit_avx2_kernel_sgemm_kern.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,14 +27,14 @@ namespace impl { namespace cpu { namespace x64 { -int jit_avx2_kernel_sgemm_kern::next_acc(int idx, int um, int un) const { +int jit_avx2_kernel_sgemm_kern_t::next_acc(int idx, int um, int un) const { while (!(((idx / unroll_n_) < std::max(1, um / nelt_per_vecreg_)) || ((idx % unroll_n_) < un))) idx++; return idx; } -void jit_avx2_kernel_sgemm_kern::prefetchB_beforeBload( +void jit_avx2_kernel_sgemm_kern_t::prefetchB_beforeBload( int um, int un, int k_idx, int n_idx) { if (!(mayiuse(avx512_core) && __BUILD_GEMM_AVX512)) { if ((n_idx == 0) && (k_idx == 0) && (un == unroll_n_) && (um != 16)) { @@ -44,7 +44,7 @@ void jit_avx2_kernel_sgemm_kern::prefetchB_beforeBload( } } -void jit_avx2_kernel_sgemm_kern::prefetchB_beforeFMA( +void jit_avx2_kernel_sgemm_kern_t::prefetchB_beforeFMA( int um, int un, int k_idx, int n_idx, int m_idx) { if (!(mayiuse(avx512_core) && __BUILD_GEMM_AVX512)) { if ((um == 16) || (un < unroll_n_)) { @@ -61,7 +61,7 @@ void jit_avx2_kernel_sgemm_kern::prefetchB_beforeFMA( } } -void jit_avx2_kernel_sgemm_kern::prefetchA_afterFMA( +void jit_avx2_kernel_sgemm_kern_t::prefetchA_afterFMA( int um, int un, int k_idx, int n_idx, int m_idx) { if (mayiuse(avx512_core) && __BUILD_GEMM_AVX512) { if ((um < unroll_m_) && (m_idx == 0)) { @@ -85,7 +85,7 @@ void jit_avx2_kernel_sgemm_kern::prefetchA_afterFMA( } } -void jit_avx2_kernel_sgemm_kern::prefetchA_afterBload( +void jit_avx2_kernel_sgemm_kern_t::prefetchA_afterBload( int um, int un, int k_idx, int n_idx) { if (!(mayiuse(avx512_core) && __BUILD_GEMM_AVX512)) { if ((um == unroll_m_) && (un == 2)) { @@ -109,7 +109,7 @@ void jit_avx2_kernel_sgemm_kern::prefetchA_afterBload( } } -void jit_avx2_kernel_sgemm_kern::prefetchB_afterFMA( +void jit_avx2_kernel_sgemm_kern_t::prefetchB_afterFMA( int k_idx, int n_idx, int m_idx) { if (mayiuse(avx512_core) && __BUILD_GEMM_AVX512) { if (((m_idx + (k_idx % (nb_zmm_a_ / unroll_m_reg_)) * unroll_m_reg_) @@ -124,7 +124,7 @@ void jit_avx2_kernel_sgemm_kern::prefetchB_afterFMA( } } -void jit_avx2_kernel_sgemm_kern::prefetchA_beforeFMA( +void jit_avx2_kernel_sgemm_kern_t::prefetchA_beforeFMA( int um, int un, int k_idx, int n_idx, int m_idx) { if (!(mayiuse(avx512_core) && __BUILD_GEMM_AVX512)) { if ((um == unroll_m_) && (un == unroll_n_)) { @@ -158,7 +158,7 @@ void jit_avx2_kernel_sgemm_kern::prefetchA_beforeFMA( } } -void jit_avx2_kernel_sgemm_kern::prefetchC_afterBload( +void jit_avx2_kernel_sgemm_kern_t::prefetchC_afterBload( int um, int un, int k_idx, int n_idx) { if (mayiuse(avx512_core) && __BUILD_GEMM_AVX512) { if (um == unroll_m_) { @@ -172,7 +172,7 @@ void jit_avx2_kernel_sgemm_kern::prefetchC_afterBload( } } -void jit_avx2_kernel_sgemm_kern::prefetchC_beforeKloop(int um) { +void jit_avx2_kernel_sgemm_kern_t::prefetchC_beforeKloop(int um) { if (mayiuse(avx512_core) && __BUILD_GEMM_AVX512) { if (um < unroll_m_) { prefetchw(ptr[CO2_ + elt_size_ * 0]); @@ -199,7 +199,7 @@ void jit_avx2_kernel_sgemm_kern::prefetchC_beforeKloop(int um) { } } -void jit_avx2_kernel_sgemm_kern::generate() { +void jit_avx2_kernel_sgemm_kern_t::generate() { int i, unroll_x, unroll_y, uy_bin, ux_bin; int C_off = is_windows ? 56 : 8; @@ -435,8 +435,8 @@ void jit_avx2_kernel_sgemm_kern::generate() { postamble(); } -jit_avx2_kernel_sgemm_kern::jit_avx2_kernel_sgemm_kern(bool beta_zero) - : jit_generator(jit_name()), beta_zero_(beta_zero) {} +jit_avx2_kernel_sgemm_kern_t::jit_avx2_kernel_sgemm_kern_t(bool beta_zero) + : jit_generator_t(jit_name()), beta_zero_(beta_zero) {} } // namespace x64 } // namespace cpu diff --git a/src/cpu/x64/gemm/f32/jit_avx2_kernel_sgemm_kern.hpp b/src/cpu/x64/gemm/f32/jit_avx2_kernel_sgemm_kern.hpp index b18b34686d5..60b97371367 100644 --- a/src/cpu/x64/gemm/f32/jit_avx2_kernel_sgemm_kern.hpp +++ b/src/cpu/x64/gemm/f32/jit_avx2_kernel_sgemm_kern.hpp @@ -29,9 +29,9 @@ namespace impl { namespace cpu { namespace x64 { -class jit_avx2_kernel_sgemm_kern : public jit_generator { +class jit_avx2_kernel_sgemm_kern_t : public jit_generator_t { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_kernel_sgemm_kern); + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_kernel_sgemm_kern_t); const int elt_size_ = 4; const int elt_size_bin_ = 2; int nelt_per_vecreg_ = mayiuse(avx512_core) && __BUILD_GEMM_AVX512 ? 16 : 8; @@ -701,7 +701,7 @@ class jit_avx2_kernel_sgemm_kern : public jit_generator { } public: - jit_avx2_kernel_sgemm_kern(bool beta_zero); + jit_avx2_kernel_sgemm_kern_t(bool beta_zero); }; } // namespace x64 } // namespace cpu diff --git a/src/cpu/x64/gemm/f32/jit_avx512_common_gemm_f32.cpp b/src/cpu/x64/gemm/f32/jit_avx512_common_gemm_f32.cpp index 7fd8e81bdb4..85e9e4aec69 100644 --- a/src/cpu/x64/gemm/f32/jit_avx512_common_gemm_f32.cpp +++ b/src/cpu/x64/gemm/f32/jit_avx512_common_gemm_f32.cpp @@ -59,11 +59,11 @@ namespace x64 { namespace avx512_common_gemm_f32 { using namespace gemm_utils; -struct xbyak_gemm_t : public jit_generator { +struct xbyak_gemm_t : public jit_generator_t { DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_common_gemm_f32_xbyak_gemm) xbyak_gemm_t(char isTransA, char isTransB, float beta, bool hasBias = false) - : jit_generator(jit_name()) + : jit_generator_t(jit_name()) , isTransA(isTransA) , isTransB(isTransB) , beta(beta) diff --git a/src/cpu/x64/gemm/f32/jit_avx512_core_f32_copy_an_kern_autogen.cpp b/src/cpu/x64/gemm/f32/jit_avx512_core_f32_copy_an_kern_autogen.cpp index bca29715498..75b38090dcb 100644 --- a/src/cpu/x64/gemm/f32/jit_avx512_core_f32_copy_an_kern_autogen.cpp +++ b/src/cpu/x64/gemm/f32/jit_avx512_core_f32_copy_an_kern_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,10 +23,10 @@ namespace impl { namespace cpu { namespace x64 { -jit_avx512_core_f32_copy_an_kern::jit_avx512_core_f32_copy_an_kern() - : jit_generator(jit_name()) {} +jit_avx512_core_f32_copy_an_kern_t::jit_avx512_core_f32_copy_an_kern_t() + : jit_generator_t(jit_name()) {} -void jit_avx512_core_f32_copy_an_kern::generate() { +void jit_avx512_core_f32_copy_an_kern_t::generate() { #ifndef _WIN32 #define M rdi diff --git a/src/cpu/x64/gemm/f32/jit_avx512_core_f32_copy_at_kern_part1_autogen.cpp b/src/cpu/x64/gemm/f32/jit_avx512_core_f32_copy_at_kern_part1_autogen.cpp index 63bb212c563..d7230690c63 100644 --- a/src/cpu/x64/gemm/f32/jit_avx512_core_f32_copy_at_kern_part1_autogen.cpp +++ b/src/cpu/x64/gemm/f32/jit_avx512_core_f32_copy_at_kern_part1_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,10 +24,10 @@ namespace impl { namespace cpu { namespace x64 { -jit_avx512_core_f32_copy_at_kern::jit_avx512_core_f32_copy_at_kern() - : jit_generator(jit_name()) {} +jit_avx512_core_f32_copy_at_kern_t::jit_avx512_core_f32_copy_at_kern_t() + : jit_generator_t(jit_name()) {} -void jit_avx512_core_f32_copy_at_kern::generate() { +void jit_avx512_core_f32_copy_at_kern_t::generate() { Xbyak::Label l1f80; Xbyak::Label l22b8; Xbyak::Label l2a5c; @@ -48,9 +48,9 @@ void jit_avx512_core_f32_copy_at_kern::generate() { postamble(); } -void jit_avx512_core_f32_copy_at_kern::generate_part1(const Xbyak::Label &l4000, - const Xbyak::Label &l2a5c, const Xbyak::Label &l22b8, - const Xbyak::Label &l1f80) { +void jit_avx512_core_f32_copy_at_kern_t::generate_part1( + const Xbyak::Label &l4000, const Xbyak::Label &l2a5c, + const Xbyak::Label &l22b8, const Xbyak::Label &l1f80) { Xbyak::Label l1d30; Xbyak::Label l1d0c; Xbyak::Label l1cfc; diff --git a/src/cpu/x64/gemm/f32/jit_avx512_core_f32_copy_at_kern_part2_autogen.cpp b/src/cpu/x64/gemm/f32/jit_avx512_core_f32_copy_at_kern_part2_autogen.cpp index 51c776f1989..379a632bb1a 100644 --- a/src/cpu/x64/gemm/f32/jit_avx512_core_f32_copy_at_kern_part2_autogen.cpp +++ b/src/cpu/x64/gemm/f32/jit_avx512_core_f32_copy_at_kern_part2_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2021 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ namespace impl { namespace cpu { namespace x64 { -void jit_avx512_core_f32_copy_at_kern::generate_part2(Xbyak::Label l4000, +void jit_avx512_core_f32_copy_at_kern_t::generate_part2(Xbyak::Label l4000, Xbyak::Label l2a5c, Xbyak::Label l22b8, Xbyak::Label l1f80) { std::vector labels(62); L(l1f80); diff --git a/src/cpu/x64/gemm/f32/jit_avx512_core_f32_copy_bn_kern_autogen.cpp b/src/cpu/x64/gemm/f32/jit_avx512_core_f32_copy_bn_kern_autogen.cpp index c49dbb2f743..ab581f6a2ad 100644 --- a/src/cpu/x64/gemm/f32/jit_avx512_core_f32_copy_bn_kern_autogen.cpp +++ b/src/cpu/x64/gemm/f32/jit_avx512_core_f32_copy_bn_kern_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,10 +23,10 @@ namespace impl { namespace cpu { namespace x64 { -jit_avx512_core_f32_copy_bn_kern::jit_avx512_core_f32_copy_bn_kern() - : jit_generator(jit_name()) {} +jit_avx512_core_f32_copy_bn_kern_t::jit_avx512_core_f32_copy_bn_kern_t() + : jit_generator_t(jit_name()) {} -void jit_avx512_core_f32_copy_bn_kern::generate() { +void jit_avx512_core_f32_copy_bn_kern_t::generate() { #ifndef _WIN32 #define M rdi diff --git a/src/cpu/x64/gemm/f32/jit_avx512_core_f32_copy_bt_kern_autogen.cpp b/src/cpu/x64/gemm/f32/jit_avx512_core_f32_copy_bt_kern_autogen.cpp index 24d3145349f..99e101a7525 100644 --- a/src/cpu/x64/gemm/f32/jit_avx512_core_f32_copy_bt_kern_autogen.cpp +++ b/src/cpu/x64/gemm/f32/jit_avx512_core_f32_copy_bt_kern_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,10 +23,10 @@ namespace impl { namespace cpu { namespace x64 { -jit_avx512_core_f32_copy_bt_kern::jit_avx512_core_f32_copy_bt_kern() - : jit_generator(jit_name()) {} +jit_avx512_core_f32_copy_bt_kern_t::jit_avx512_core_f32_copy_bt_kern_t() + : jit_generator_t(jit_name()) {} -void jit_avx512_core_f32_copy_bt_kern::generate() { +void jit_avx512_core_f32_copy_bt_kern_t::generate() { #ifndef _WIN32 #define M rdi diff --git a/src/cpu/x64/gemm/f32/jit_avx512_core_gemm_smalln_tn_f32_kern.cpp b/src/cpu/x64/gemm/f32/jit_avx512_core_gemm_smalln_tn_f32_kern.cpp index 8f1f9e299c3..3787430d2bf 100644 --- a/src/cpu/x64/gemm/f32/jit_avx512_core_gemm_smalln_tn_f32_kern.cpp +++ b/src/cpu/x64/gemm/f32/jit_avx512_core_gemm_smalln_tn_f32_kern.cpp @@ -43,11 +43,11 @@ static inline Xbyak::Ymm make_ymm(const Xbyak::Zmm &v) { namespace avx512_core_gemm_smalln_tn_f32 { -struct xbyak_gemm_smalln_tn_t : public jit_generator { +struct xbyak_gemm_smalln_tn_t : public jit_generator_t { DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_gemm_smalln_tn_xbyak_gemm) xbyak_gemm_smalln_tn_t(int N, float beta, float alpha) - : jit_generator(jit_name()), N(N), beta(beta), alpha(alpha) {} + : jit_generator_t(jit_name()), N(N), beta(beta), alpha(alpha) {} void generate() override { using namespace Xbyak; diff --git a/src/cpu/x64/gemm/f32/jit_avx_f32_copy_an_kern_autogen.cpp b/src/cpu/x64/gemm/f32/jit_avx_f32_copy_an_kern_autogen.cpp index 117de225946..4354e22db58 100644 --- a/src/cpu/x64/gemm/f32/jit_avx_f32_copy_an_kern_autogen.cpp +++ b/src/cpu/x64/gemm/f32/jit_avx_f32_copy_an_kern_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,10 +23,10 @@ namespace impl { namespace cpu { namespace x64 { -jit_avx_f32_copy_an_kern::jit_avx_f32_copy_an_kern() - : jit_generator(jit_name()) {} +jit_avx_f32_copy_an_kern_t::jit_avx_f32_copy_an_kern_t() + : jit_generator_t(jit_name()) {} -void jit_avx_f32_copy_an_kern::generate() { +void jit_avx_f32_copy_an_kern_t::generate() { #ifndef _WIN32 #define M rdi diff --git a/src/cpu/x64/gemm/f32/jit_avx_f32_copy_at_kern_autogen.cpp b/src/cpu/x64/gemm/f32/jit_avx_f32_copy_at_kern_autogen.cpp index 20e8c67d6be..700ff542285 100644 --- a/src/cpu/x64/gemm/f32/jit_avx_f32_copy_at_kern_autogen.cpp +++ b/src/cpu/x64/gemm/f32/jit_avx_f32_copy_at_kern_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,10 +23,10 @@ namespace impl { namespace cpu { namespace x64 { -jit_avx_f32_copy_at_kern::jit_avx_f32_copy_at_kern() - : jit_generator(jit_name()) {} +jit_avx_f32_copy_at_kern_t::jit_avx_f32_copy_at_kern_t() + : jit_generator_t(jit_name()) {} -void jit_avx_f32_copy_at_kern::generate() { +void jit_avx_f32_copy_at_kern_t::generate() { #ifndef _WIN32 #define M rdi diff --git a/src/cpu/x64/gemm/f32/jit_avx_f32_copy_bn_kern_autogen.cpp b/src/cpu/x64/gemm/f32/jit_avx_f32_copy_bn_kern_autogen.cpp index 277144c5fbd..ed0494c469b 100644 --- a/src/cpu/x64/gemm/f32/jit_avx_f32_copy_bn_kern_autogen.cpp +++ b/src/cpu/x64/gemm/f32/jit_avx_f32_copy_bn_kern_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,10 +23,10 @@ namespace impl { namespace cpu { namespace x64 { -jit_avx_f32_copy_bn_kern::jit_avx_f32_copy_bn_kern() - : jit_generator(jit_name()) {} +jit_avx_f32_copy_bn_kern_t::jit_avx_f32_copy_bn_kern_t() + : jit_generator_t(jit_name()) {} -void jit_avx_f32_copy_bn_kern::generate() { +void jit_avx_f32_copy_bn_kern_t::generate() { #ifndef _WIN32 #define M rdi diff --git a/src/cpu/x64/gemm/f32/jit_avx_f32_copy_bt_kern_autogen.cpp b/src/cpu/x64/gemm/f32/jit_avx_f32_copy_bt_kern_autogen.cpp index a7d9fe4fa04..e59bb0a5d8b 100644 --- a/src/cpu/x64/gemm/f32/jit_avx_f32_copy_bt_kern_autogen.cpp +++ b/src/cpu/x64/gemm/f32/jit_avx_f32_copy_bt_kern_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,10 +23,10 @@ namespace impl { namespace cpu { namespace x64 { -jit_avx_f32_copy_bt_kern::jit_avx_f32_copy_bt_kern() - : jit_generator(jit_name()) {} +jit_avx_f32_copy_bt_kern_t::jit_avx_f32_copy_bt_kern_t() + : jit_generator_t(jit_name()) {} -void jit_avx_f32_copy_bt_kern::generate() { +void jit_avx_f32_copy_bt_kern_t::generate() { #ifndef _WIN32 #define M rdi diff --git a/src/cpu/x64/gemm/f32/jit_avx_gemm_f32.cpp b/src/cpu/x64/gemm/f32/jit_avx_gemm_f32.cpp index 8fe514cc0d0..38a01ce662e 100644 --- a/src/cpu/x64/gemm/f32/jit_avx_gemm_f32.cpp +++ b/src/cpu/x64/gemm/f32/jit_avx_gemm_f32.cpp @@ -58,11 +58,11 @@ namespace avx_gemm_f32 { using namespace gemm_utils; using namespace Xbyak; -struct xbyak_gemm_t : public jit_generator { +struct xbyak_gemm_t : public jit_generator_t { DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_gemm_f32_xbyak_gemm) xbyak_gemm_t(char isTransA, char isTransB, float beta, bool hasBias = false) - : jit_generator(jit_name()) + : jit_generator_t(jit_name()) , isTransA(isTransA) , isTransB(isTransB) , hasBias(hasBias) diff --git a/src/cpu/x64/gemm/f32/jit_avx_gemv_t_f32_kern.cpp b/src/cpu/x64/gemm/f32/jit_avx_gemv_t_f32_kern.cpp index d85f65fb581..394eb40f2e7 100644 --- a/src/cpu/x64/gemm/f32/jit_avx_gemv_t_f32_kern.cpp +++ b/src/cpu/x64/gemm/f32/jit_avx_gemv_t_f32_kern.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,7 +38,7 @@ static inline Xmm make_xmm(const Xmm &v) { } // Load vector register data for x, y or A. -void jit_avx_gemv_t_f32_kern::v_load( +void jit_avx_gemv_t_f32_kern_t::v_load( const Xbyak::Xmm &dst, const Xbyak::Address &src, int nelems) { switch (nelems) { case 1: vmovss(make_xmm(dst), src); break; @@ -52,7 +52,7 @@ void jit_avx_gemv_t_f32_kern::v_load( } // Store vector register data for x, y or A. -void jit_avx_gemv_t_f32_kern::v_store( +void jit_avx_gemv_t_f32_kern_t::v_store( const Xbyak::Address &dst, const Xbyak::Xmm &src, int nelems) { switch (nelems) { case 1: vmovss(dst, make_xmm(src)); break; @@ -67,7 +67,7 @@ void jit_avx_gemv_t_f32_kern::v_store( // Perform Hadamard product of 2 vectors and accumulate. // Use FMA instruction, otherwise emulate. -void jit_avx_gemv_t_f32_kern::dot_product( +void jit_avx_gemv_t_f32_kern_t::dot_product( const Xmm &dst, const Xmm &src1, const Xmm &src2) { if (is_avx2_) vfmadd231ps(dst, src1, src2); @@ -78,7 +78,7 @@ void jit_avx_gemv_t_f32_kern::dot_product( } // Inner loop. -void jit_avx_gemv_t_f32_kern::innerloop(int unroll_m, int unroll_n) { +void jit_avx_gemv_t_f32_kern_t::innerloop(int unroll_m, int unroll_n) { if ((unroll_m > M_UNROLL_) || (unroll_n > N_UNROLL_) || (unroll_m < 0) || (unroll_n < 0)) return; @@ -119,7 +119,7 @@ void jit_avx_gemv_t_f32_kern::innerloop(int unroll_m, int unroll_n) { } // Outer loop. -void jit_avx_gemv_t_f32_kern::outerloop( +void jit_avx_gemv_t_f32_kern_t::outerloop( int unroll_x, int unroll_y, Label *&cur_outerloop_label) { if ((unroll_x > M_UNROLL_) || (unroll_y > N_UNROLL_) || (unroll_y < 0) || (unroll_x < 0)) @@ -259,7 +259,7 @@ void jit_avx_gemv_t_f32_kern::outerloop( align(16); } -void jit_avx_gemv_t_f32_kern::generate() { +void jit_avx_gemv_t_f32_kern_t::generate() { // Prologue preamble(); @@ -301,8 +301,8 @@ void jit_avx_gemv_t_f32_kern::generate() { } // Function signature: gemv(*m, *n, *alpha, *a, *lda, *x, *incx, *y, *incy) -jit_avx_gemv_t_f32_kern::jit_avx_gemv_t_f32_kern() - : jit_generator(jit_name()) +jit_avx_gemv_t_f32_kern_t::jit_avx_gemv_t_f32_kern_t() + : jit_generator_t(jit_name()) , is_avx2_(mayiuse(avx2)) , LDA_(is_windows ? rdi : r8) , X_(is_windows ? rsi : r9) diff --git a/src/cpu/x64/gemm/f32/jit_avx_gemv_t_f32_kern.hpp b/src/cpu/x64/gemm/f32/jit_avx_gemv_t_f32_kern.hpp index 3b9bf70b462..d4b07183ed5 100644 --- a/src/cpu/x64/gemm/f32/jit_avx_gemv_t_f32_kern.hpp +++ b/src/cpu/x64/gemm/f32/jit_avx_gemv_t_f32_kern.hpp @@ -24,10 +24,10 @@ namespace impl { namespace cpu { namespace x64 { -class jit_avx_gemv_t_f32_kern : public jit_generator { +class jit_avx_gemv_t_f32_kern_t : public jit_generator_t { public: - jit_avx_gemv_t_f32_kern(void); - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_gemv_t_f32_kern); + jit_avx_gemv_t_f32_kern_t(void); + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_gemv_t_f32_kern_t); protected: bool is_avx2_; diff --git a/src/cpu/x64/gemm/f32/jit_avx_kernel_b0_sgemm_kern_part1_autogen.cpp b/src/cpu/x64/gemm/f32/jit_avx_kernel_b0_sgemm_kern_part1_autogen.cpp index 52fccd21619..32a2f5860dd 100644 --- a/src/cpu/x64/gemm/f32/jit_avx_kernel_b0_sgemm_kern_part1_autogen.cpp +++ b/src/cpu/x64/gemm/f32/jit_avx_kernel_b0_sgemm_kern_part1_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,10 +24,10 @@ namespace impl { namespace cpu { namespace x64 { -jit_avx_kernel_b0_sgemm_kern::jit_avx_kernel_b0_sgemm_kern() - : jit_generator(jit_name()) {} +jit_avx_kernel_b0_sgemm_kern_t::jit_avx_kernel_b0_sgemm_kern_t() + : jit_generator_t(jit_name()) {} -void jit_avx_kernel_b0_sgemm_kern::generate() { +void jit_avx_kernel_b0_sgemm_kern_t::generate() { Xbyak::Label l259c; Xbyak::Label l2774; Xbyak::Label l2834; @@ -52,7 +52,7 @@ void jit_avx_kernel_b0_sgemm_kern::generate() { postamble(); } -void jit_avx_kernel_b0_sgemm_kern::generate_part1(const Xbyak::Label &l2cf4, +void jit_avx_kernel_b0_sgemm_kern_t::generate_part1(const Xbyak::Label &l2cf4, const Xbyak::Label &l2834, const Xbyak::Label &l2774, const Xbyak::Label &l259c) { std::vector labels(55); diff --git a/src/cpu/x64/gemm/f32/jit_avx_kernel_b0_sgemm_kern_part2_autogen.cpp b/src/cpu/x64/gemm/f32/jit_avx_kernel_b0_sgemm_kern_part2_autogen.cpp index 74d2c82cbc4..35a9ea2f626 100644 --- a/src/cpu/x64/gemm/f32/jit_avx_kernel_b0_sgemm_kern_part2_autogen.cpp +++ b/src/cpu/x64/gemm/f32/jit_avx_kernel_b0_sgemm_kern_part2_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2021 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ namespace impl { namespace cpu { namespace x64 { -void jit_avx_kernel_b0_sgemm_kern::generate_part2(Xbyak::Label l2cf4, +void jit_avx_kernel_b0_sgemm_kern_t::generate_part2(Xbyak::Label l2cf4, Xbyak::Label l2834, Xbyak::Label l2774, Xbyak::Label l259c) { std::vector labels(57); L(labels[56]); diff --git a/src/cpu/x64/gemm/f32/jit_avx_kernel_sgemm_kern_part1_autogen.cpp b/src/cpu/x64/gemm/f32/jit_avx_kernel_sgemm_kern_part1_autogen.cpp index 8ea5bd9a729..daeba0781ea 100644 --- a/src/cpu/x64/gemm/f32/jit_avx_kernel_sgemm_kern_part1_autogen.cpp +++ b/src/cpu/x64/gemm/f32/jit_avx_kernel_sgemm_kern_part1_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,10 +23,10 @@ namespace impl { namespace cpu { namespace x64 { -jit_avx_kernel_sgemm_kern::jit_avx_kernel_sgemm_kern() - : jit_generator(jit_name()) {} +jit_avx_kernel_sgemm_kern_t::jit_avx_kernel_sgemm_kern_t() + : jit_generator_t(jit_name()) {} -void jit_avx_kernel_sgemm_kern::generate() { +void jit_avx_kernel_sgemm_kern_t::generate() { Xbyak::Label l1efc; Xbyak::Label l1f44; Xbyak::Label l1f48; @@ -40,13 +40,13 @@ void jit_avx_kernel_sgemm_kern::generate() { mov(C, ptr[OLD_C]); mov(LDC, ptr[OLD_LDC]); - jit_avx_kernel_sgemm_kern::generate_part1(l1efc, l1f44, l1f48); - jit_avx_kernel_sgemm_kern::generate_part2(l1efc, l1f44, l1f48); + jit_avx_kernel_sgemm_kern_t::generate_part1(l1efc, l1f44, l1f48); + jit_avx_kernel_sgemm_kern_t::generate_part2(l1efc, l1f44, l1f48); postamble(); } -void jit_avx_kernel_sgemm_kern::generate_part1(const Xbyak::Label &l1efc, +void jit_avx_kernel_sgemm_kern_t::generate_part1(const Xbyak::Label &l1efc, const Xbyak::Label &l1f44, const Xbyak::Label &l1f48) { std::vector labels(44); diff --git a/src/cpu/x64/gemm/f32/jit_avx_kernel_sgemm_kern_part2_autogen.cpp b/src/cpu/x64/gemm/f32/jit_avx_kernel_sgemm_kern_part2_autogen.cpp index a8154c7a1c8..e1ff79875f0 100644 --- a/src/cpu/x64/gemm/f32/jit_avx_kernel_sgemm_kern_part2_autogen.cpp +++ b/src/cpu/x64/gemm/f32/jit_avx_kernel_sgemm_kern_part2_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2021 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ namespace impl { namespace cpu { namespace x64 { -void jit_avx_kernel_sgemm_kern::generate_part2( +void jit_avx_kernel_sgemm_kern_t::generate_part2( Xbyak::Label &l1efc, Xbyak::Label &l1f44, Xbyak::Label &l1f48) { std::vector labels(69); L(l1efc); diff --git a/src/cpu/x64/gemm/f32/jit_sse41_f32_copy_an_kern_autogen.cpp b/src/cpu/x64/gemm/f32/jit_sse41_f32_copy_an_kern_autogen.cpp index 57039cba5b0..9fe1c3a386b 100644 --- a/src/cpu/x64/gemm/f32/jit_sse41_f32_copy_an_kern_autogen.cpp +++ b/src/cpu/x64/gemm/f32/jit_sse41_f32_copy_an_kern_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,10 +23,10 @@ namespace impl { namespace cpu { namespace x64 { -jit_sse41_f32_copy_an_kern::jit_sse41_f32_copy_an_kern() - : jit_generator(jit_name()) {} +jit_sse41_f32_copy_an_kern_t::jit_sse41_f32_copy_an_kern_t() + : jit_generator_t(jit_name()) {} -void jit_sse41_f32_copy_an_kern::generate() { +void jit_sse41_f32_copy_an_kern_t::generate() { #ifndef _WIN32 #define M rdi diff --git a/src/cpu/x64/gemm/f32/jit_sse41_f32_copy_at_kern_autogen.cpp b/src/cpu/x64/gemm/f32/jit_sse41_f32_copy_at_kern_autogen.cpp index b1381469d1b..d52a86a0726 100644 --- a/src/cpu/x64/gemm/f32/jit_sse41_f32_copy_at_kern_autogen.cpp +++ b/src/cpu/x64/gemm/f32/jit_sse41_f32_copy_at_kern_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,10 +23,10 @@ namespace impl { namespace cpu { namespace x64 { -jit_sse41_f32_copy_at_kern::jit_sse41_f32_copy_at_kern() - : jit_generator(jit_name()) {} +jit_sse41_f32_copy_at_kern_t::jit_sse41_f32_copy_at_kern_t() + : jit_generator_t(jit_name()) {} -void jit_sse41_f32_copy_at_kern::generate() { +void jit_sse41_f32_copy_at_kern_t::generate() { #ifndef _WIN32 #define M rdi diff --git a/src/cpu/x64/gemm/f32/jit_sse41_f32_copy_bn_kern_autogen.cpp b/src/cpu/x64/gemm/f32/jit_sse41_f32_copy_bn_kern_autogen.cpp index f095bf750e9..36c56697f43 100644 --- a/src/cpu/x64/gemm/f32/jit_sse41_f32_copy_bn_kern_autogen.cpp +++ b/src/cpu/x64/gemm/f32/jit_sse41_f32_copy_bn_kern_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,10 +23,10 @@ namespace impl { namespace cpu { namespace x64 { -jit_sse41_f32_copy_bn_kern::jit_sse41_f32_copy_bn_kern() - : jit_generator(jit_name()) {} +jit_sse41_f32_copy_bn_kern_t::jit_sse41_f32_copy_bn_kern_t() + : jit_generator_t(jit_name()) {} -void jit_sse41_f32_copy_bn_kern::generate() { +void jit_sse41_f32_copy_bn_kern_t::generate() { #ifndef _WIN32 #define M rdi diff --git a/src/cpu/x64/gemm/f32/jit_sse41_f32_copy_bt_kern_autogen.cpp b/src/cpu/x64/gemm/f32/jit_sse41_f32_copy_bt_kern_autogen.cpp index 3f509e5dcef..b985134391e 100644 --- a/src/cpu/x64/gemm/f32/jit_sse41_f32_copy_bt_kern_autogen.cpp +++ b/src/cpu/x64/gemm/f32/jit_sse41_f32_copy_bt_kern_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,10 +23,10 @@ namespace impl { namespace cpu { namespace x64 { -jit_sse41_f32_copy_bt_kern::jit_sse41_f32_copy_bt_kern() - : jit_generator(jit_name()) {} +jit_sse41_f32_copy_bt_kern_t::jit_sse41_f32_copy_bt_kern_t() + : jit_generator_t(jit_name()) {} -void jit_sse41_f32_copy_bt_kern::generate() { +void jit_sse41_f32_copy_bt_kern_t::generate() { #ifndef _WIN32 #define M rdi diff --git a/src/cpu/x64/gemm/f32/jit_sse41_gemv_n_f32_kern.cpp b/src/cpu/x64/gemm/f32/jit_sse41_gemv_n_f32_kern.cpp index cb195f55006..83b171466aa 100644 --- a/src/cpu/x64/gemm/f32/jit_sse41_gemv_n_f32_kern.cpp +++ b/src/cpu/x64/gemm/f32/jit_sse41_gemv_n_f32_kern.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2021-2024 Intel Corporation +* Copyright 2021-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -58,7 +58,7 @@ static inline int log2_of_pow2(int n) { } // Load vector register data for x, y or A. -void jit_sse41_gemv_n_f32_kern::v_load( +void jit_sse41_gemv_n_f32_kern_t::v_load( const Xmm &dst, const Address &src, int nelems) { if (nelems >= v_nelems_) { uni_vmovups(dst, src); @@ -82,7 +82,7 @@ void jit_sse41_gemv_n_f32_kern::v_load( } // Store vector register data for x, y or A. -void jit_sse41_gemv_n_f32_kern::v_store( +void jit_sse41_gemv_n_f32_kern_t::v_store( const Address &dst, const Xmm &src, int nelems) { if (nelems >= v_nelems_) { uni_vmovups(dst, src); @@ -107,7 +107,7 @@ void jit_sse41_gemv_n_f32_kern::v_store( // Perform Hadamard product of 2 vectors and accumulate. // Use FMA instruction, otherwise emulate. -void jit_sse41_gemv_n_f32_kern::dot_product( +void jit_sse41_gemv_n_f32_kern_t::dot_product( const Xmm &dst, const Xmm &src1, const Xmm &src2) { if (has_avx2_) vfmadd231ps(dst, src1, src2); @@ -120,7 +120,7 @@ void jit_sse41_gemv_n_f32_kern::dot_product( } } -void jit_sse41_gemv_n_f32_kern::kernel_loop( +void jit_sse41_gemv_n_f32_kern_t::kernel_loop( int unroll_m, int unroll_n, bool fetch, bool last) { int um_vecs = utils::div_up(unroll_m, v_nelems_); @@ -168,7 +168,7 @@ void jit_sse41_gemv_n_f32_kern::kernel_loop( } // Inner loop for A non-transposed. -void jit_sse41_gemv_n_f32_kern::innerloop(int unroll_m, int unroll_n) { +void jit_sse41_gemv_n_f32_kern_t::innerloop(int unroll_m, int unroll_n) { mov(Y1_, Y_); // Load x and scale by alpha. @@ -237,7 +237,7 @@ void jit_sse41_gemv_n_f32_kern::innerloop(int unroll_m, int unroll_n) { L_aligned(label_m_loop_end); } -void jit_sse41_gemv_n_f32_kern::outerloop(int unroll_x, int unroll_y, +void jit_sse41_gemv_n_f32_kern_t::outerloop(int unroll_x, int unroll_y, Label *&cur_outerloop_label, Label *&outerloop_end_label) { bool is_tail = unroll_y < unroll_n_; @@ -270,7 +270,7 @@ void jit_sse41_gemv_n_f32_kern::outerloop(int unroll_x, int unroll_y, } } -void jit_sse41_gemv_n_f32_kern::generate() { +void jit_sse41_gemv_n_f32_kern_t::generate() { // Prologue preamble(); @@ -313,8 +313,8 @@ void jit_sse41_gemv_n_f32_kern::generate() { } // Function signature: gemv(*m, *n, *alpha, *a, *lda, *x, *incx, *y, *incy) -jit_sse41_gemv_n_f32_kern::jit_sse41_gemv_n_f32_kern(void) - : jit_generator(jit_name()) +jit_sse41_gemv_n_f32_kern_t::jit_sse41_gemv_n_f32_kern_t(void) + : jit_generator_t(jit_name()) , has_avx512_(mayiuse(avx512_core) && __BUILD_GEMM_AVX512) , has_avx2_(mayiuse(avx2) && __BUILD_GEMM_AVX2) , has_avx_(mayiuse(avx) && __BUILD_GEMM_AVX2) diff --git a/src/cpu/x64/gemm/f32/jit_sse41_gemv_n_f32_kern.hpp b/src/cpu/x64/gemm/f32/jit_sse41_gemv_n_f32_kern.hpp index 1a3b60ac2b3..8058122ea18 100644 --- a/src/cpu/x64/gemm/f32/jit_sse41_gemv_n_f32_kern.hpp +++ b/src/cpu/x64/gemm/f32/jit_sse41_gemv_n_f32_kern.hpp @@ -24,10 +24,10 @@ namespace impl { namespace cpu { namespace x64 { -class jit_sse41_gemv_n_f32_kern : public jit_generator { +class jit_sse41_gemv_n_f32_kern_t : public jit_generator_t { public: - jit_sse41_gemv_n_f32_kern(); - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_gemv_n_f32_kern); + jit_sse41_gemv_n_f32_kern_t(); + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_gemv_n_f32_kern_t); protected: bool has_avx512_; diff --git a/src/cpu/x64/gemm/f32/jit_sse41_gemv_t_f32_kern.cpp b/src/cpu/x64/gemm/f32/jit_sse41_gemv_t_f32_kern.cpp index b3b578975fc..da67ca31e78 100644 --- a/src/cpu/x64/gemm/f32/jit_sse41_gemv_t_f32_kern.cpp +++ b/src/cpu/x64/gemm/f32/jit_sse41_gemv_t_f32_kern.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,7 +30,7 @@ namespace x64 { using namespace Xbyak; // Load vector register data for x, y or A. -void jit_sse41_gemv_t_f32_kern::v_load( +void jit_sse41_gemv_t_f32_kern_t::v_load( const Xbyak::Xmm &dst, const Xbyak::Address &src, int nelems) { switch (nelems) { case 1: movss(dst, src); break; @@ -43,7 +43,7 @@ void jit_sse41_gemv_t_f32_kern::v_load( } // Store vector register data for x, y or A. -void jit_sse41_gemv_t_f32_kern::v_store( +void jit_sse41_gemv_t_f32_kern_t::v_store( const Xbyak::Address &dst, const Xbyak::Xmm &src, int nelems) { switch (nelems) { case 1: movss(dst, src); break; @@ -56,14 +56,14 @@ void jit_sse41_gemv_t_f32_kern::v_store( } // Perform Hadamard product of 2 vectors and accumulate. -void jit_sse41_gemv_t_f32_kern::dot_product( +void jit_sse41_gemv_t_f32_kern_t::dot_product( const Xmm &dst, const Xmm &src1, const Xmm &src2) { mulps(src2, src1); addps(dst, src2); } // Inner loop. -void jit_sse41_gemv_t_f32_kern::innerloop(int unroll_m, int unroll_n) { +void jit_sse41_gemv_t_f32_kern_t::innerloop(int unroll_m, int unroll_n) { if ((unroll_m > M_UNROLL_) || (unroll_n > N_UNROLL_) || (unroll_m < 0) || (unroll_n < 0)) return; @@ -104,7 +104,7 @@ void jit_sse41_gemv_t_f32_kern::innerloop(int unroll_m, int unroll_n) { } // Outer loop. -void jit_sse41_gemv_t_f32_kern::outerloop( +void jit_sse41_gemv_t_f32_kern_t::outerloop( int unroll_x, int unroll_y, Label *&cur_outerloop_label) { if ((unroll_x > M_UNROLL_) || (unroll_y > N_UNROLL_) || (unroll_y < 0) || unroll_x < 0) @@ -230,7 +230,7 @@ void jit_sse41_gemv_t_f32_kern::outerloop( align(16); } -void jit_sse41_gemv_t_f32_kern::generate() { +void jit_sse41_gemv_t_f32_kern_t::generate() { // Prologue preamble(); @@ -272,8 +272,8 @@ void jit_sse41_gemv_t_f32_kern::generate() { } // Function signature: gemv(*m, *n, *alpha, *a, *lda, *x, *incx, *y, *incy) -jit_sse41_gemv_t_f32_kern::jit_sse41_gemv_t_f32_kern() - : jit_generator(jit_name()) +jit_sse41_gemv_t_f32_kern_t::jit_sse41_gemv_t_f32_kern_t() + : jit_generator_t(jit_name()) , LDA_(is_windows ? rdi : r8) , X_(is_windows ? rsi : r9) , INCY_(r10) diff --git a/src/cpu/x64/gemm/f32/jit_sse41_gemv_t_f32_kern.hpp b/src/cpu/x64/gemm/f32/jit_sse41_gemv_t_f32_kern.hpp index 36fadc0d4bb..9f79643ab8e 100644 --- a/src/cpu/x64/gemm/f32/jit_sse41_gemv_t_f32_kern.hpp +++ b/src/cpu/x64/gemm/f32/jit_sse41_gemv_t_f32_kern.hpp @@ -24,10 +24,10 @@ namespace impl { namespace cpu { namespace x64 { -class jit_sse41_gemv_t_f32_kern : public jit_generator { +class jit_sse41_gemv_t_f32_kern_t : public jit_generator_t { public: - jit_sse41_gemv_t_f32_kern(void); - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_gemv_t_f32_kern); + jit_sse41_gemv_t_f32_kern_t(void); + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_gemv_t_f32_kern_t); protected: void v_load(const Xbyak::Xmm &dst, const Xbyak::Address &src, int nelems); diff --git a/src/cpu/x64/gemm/f32/jit_sse41_kernel_b0_sgemm_kern_autogen.cpp b/src/cpu/x64/gemm/f32/jit_sse41_kernel_b0_sgemm_kern_autogen.cpp index ae734d720b7..a2d2934f144 100644 --- a/src/cpu/x64/gemm/f32/jit_sse41_kernel_b0_sgemm_kern_autogen.cpp +++ b/src/cpu/x64/gemm/f32/jit_sse41_kernel_b0_sgemm_kern_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,10 +23,10 @@ namespace impl { namespace cpu { namespace x64 { -jit_sse41_kernel_b0_sgemm_kern::jit_sse41_kernel_b0_sgemm_kern() - : jit_generator(jit_name()) {} +jit_sse41_kernel_b0_sgemm_kern_t::jit_sse41_kernel_b0_sgemm_kern_t() + : jit_generator_t(jit_name()) {} -void jit_sse41_kernel_b0_sgemm_kern::generate() { +void jit_sse41_kernel_b0_sgemm_kern_t::generate() { #ifndef _WIN32 diff --git a/src/cpu/x64/gemm/f32/jit_sse41_kernel_sgemm_kern_autogen.cpp b/src/cpu/x64/gemm/f32/jit_sse41_kernel_sgemm_kern_autogen.cpp index ba6a36882ed..6d900e70fb4 100644 --- a/src/cpu/x64/gemm/f32/jit_sse41_kernel_sgemm_kern_autogen.cpp +++ b/src/cpu/x64/gemm/f32/jit_sse41_kernel_sgemm_kern_autogen.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2024 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,10 +23,10 @@ namespace impl { namespace cpu { namespace x64 { -jit_sse41_kernel_sgemm_kern::jit_sse41_kernel_sgemm_kern() - : jit_generator(jit_name()) {} +jit_sse41_kernel_sgemm_kern_t::jit_sse41_kernel_sgemm_kern_t() + : jit_generator_t(jit_name()) {} -void jit_sse41_kernel_sgemm_kern::generate() { +void jit_sse41_kernel_sgemm_kern_t::generate() { #ifndef _WIN32 diff --git a/src/cpu/x64/gemm/gemm_driver.cpp b/src/cpu/x64/gemm/gemm_driver.cpp index f07533587b5..aaacd0931e2 100644 --- a/src/cpu/x64/gemm/gemm_driver.cpp +++ b/src/cpu/x64/gemm/gemm_driver.cpp @@ -80,15 +80,15 @@ int get_vector_length() { //dummy if #if __BUILD_GEMM_AVX512 } else if (mayiuse(avx512_core)) { - v_bytes = cpu_isa_traits::vlen; + v_bytes = cpu_isa_traits_t::vlen; #endif #if __BUILD_GEMM_AVX2 } else if (mayiuse(avx)) { - v_bytes = cpu_isa_traits::vlen; + v_bytes = cpu_isa_traits_t::vlen; #endif #if __BUILD_GEMM_SSE41 } else if (mayiuse(sse41)) { - v_bytes = cpu_isa_traits::vlen; + v_bytes = cpu_isa_traits_t::vlen; #endif } else { assert(!"not supposed to be reached."); diff --git a/src/cpu/x64/gemm/gemm_driver.hpp b/src/cpu/x64/gemm/gemm_driver.hpp index 650d1775a01..163349b1101 100644 --- a/src/cpu/x64/gemm/gemm_driver.hpp +++ b/src/cpu/x64/gemm/gemm_driver.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2018-2020 Intel Corporation +* Copyright 2018-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,7 +36,7 @@ dnnl_status_t gemm_driver(const char *transA, const char *transB, const b_type *b, const dim_t *ldb, const b_type *ob, const float *beta, c_type *c, const dim_t *ldc, const c_type *oc, const bool force_jit_nocopy_gemm, pack_type packing = pack_type::none, - gemm_pack_storage_t *pack_dst = NULL, bool measure_only = false); + gemm_pack_storage_t *pack_dst = nullptr, bool measure_only = false); void prep_ref_gemm_s8u8s32_pack( bool do_a, dim_t rows, dim_t cols, gemm_pack_storage_t *pack_dst); diff --git a/src/cpu/x64/gemm/gemm_info.cpp b/src/cpu/x64/gemm/gemm_info.cpp index 6489bfca792..18904d2ebb5 100644 --- a/src/cpu/x64/gemm/gemm_info.cpp +++ b/src/cpu/x64/gemm/gemm_info.cpp @@ -404,8 +404,8 @@ void gemm_info_t::jit_init(void) { = is_bf16 && mayiuse(avx512_core_amx) && __BUILD_GEMM_AMX; bool is_amx = is_int8_amx || is_bf16_amx; - static maybe_unique_ptr copy_a[2][2] = {{nullptr}}; - static maybe_unique_ptr copy_b[2][2] = {{nullptr}}; + static maybe_unique_ptr copy_a[2][2] = {{nullptr}}; + static maybe_unique_ptr copy_b[2][2] = {{nullptr}}; switch (data_traits_t::data_type) { case data_type::s8: @@ -415,123 +415,123 @@ void gemm_info_t::jit_init(void) { } else if (mayiuse(amx_int8)) { for (int isTrans : {no_trans, do_trans}) { copy_a[isTrans][no_sum].reset( - new jit_avx512_core_amx_copy_kern( + new jit_avx512_core_amx_copy_kern_t( true, !isTrans, sizeof(a_t))); copy_b[isTrans][no_sum].reset( - new jit_avx512_core_amx_copy_kern( + new jit_avx512_core_amx_copy_kern_t( false, isTrans, sizeof(b_t))); } #endif #if __BUILD_GEMM_AVX512 } else if (mayiuse(avx512_core)) { copy_a[no_trans][no_sum].reset( - new jit_avx512_core_u8_copy_an_kern()); + new jit_avx512_core_u8_copy_an_kern_t()); copy_a[do_trans][no_sum].reset( - new jit_avx512_core_u8_copy_at_kern()); + new jit_avx512_core_u8_copy_at_kern_t()); copy_b[no_trans][no_sum].reset( - new jit_avx512_core_u8_copy_bn_kern(b_is_s8)); + new jit_avx512_core_u8_copy_bn_kern_t(b_is_s8)); copy_b[do_trans][no_sum].reset( - new jit_avx512_core_u8_copy_bt_kern(b_is_s8)); + new jit_avx512_core_u8_copy_bt_kern_t(b_is_s8)); copy_a[no_trans][do_sum].reset( - new jit_avx512_core_u8_copy_sum_an_kern()); + new jit_avx512_core_u8_copy_sum_an_kern_t()); copy_a[do_trans][do_sum].reset( - new jit_avx512_core_u8_copy_sum_at_kern()); + new jit_avx512_core_u8_copy_sum_at_kern_t()); copy_b[no_trans][do_sum].reset( - new jit_avx512_core_u8_copy_sum_bn_kern(b_is_s8)); + new jit_avx512_core_u8_copy_sum_bn_kern_t(b_is_s8)); copy_b[do_trans][do_sum].reset( - new jit_avx512_core_u8_copy_sum_bt_kern(b_is_s8)); + new jit_avx512_core_u8_copy_sum_bt_kern_t(b_is_s8)); #endif #if __BUILD_GEMM_AVX2 } else if (mayiuse(avx2_vnni)) { copy_a[no_trans][no_sum].reset( - new jit_avx2_vnni_u8_copy_an_kern()); + new jit_avx2_vnni_u8_copy_an_kern_t()); copy_a[do_trans][no_sum].reset( - new jit_avx2_vnni_u8_copy_at_kern()); + new jit_avx2_vnni_u8_copy_at_kern_t()); copy_b[no_trans][no_sum].reset( - new jit_avx2_vnni_u8_copy_bn_kern()); + new jit_avx2_vnni_u8_copy_bn_kern_t()); copy_b[do_trans][no_sum].reset( - new jit_avx2_vnni_u8_copy_bt_kern()); + new jit_avx2_vnni_u8_copy_bt_kern_t()); copy_a[no_trans][do_sum].reset( - new jit_avx2_vnni_u8_copy_sum_an_kern()); + new jit_avx2_vnni_u8_copy_sum_an_kern_t()); copy_a[do_trans][do_sum].reset( - new jit_avx2_vnni_u8_copy_sum_at_kern()); + new jit_avx2_vnni_u8_copy_sum_at_kern_t()); copy_b[no_trans][do_sum].reset( - new jit_avx2_vnni_u8_copy_sum_bn_kern()); + new jit_avx2_vnni_u8_copy_sum_bn_kern_t()); copy_b[do_trans][do_sum].reset( - new jit_avx2_vnni_u8_copy_sum_bt_kern()); + new jit_avx2_vnni_u8_copy_sum_bt_kern_t()); #endif #if __BUILD_GEMM_AVX2 } else if (mayiuse(avx2)) { copy_a[no_trans][no_sum].reset( - new jit_avx2_u8_copy_an_kern()); + new jit_avx2_u8_copy_an_kern_t()); copy_a[do_trans][no_sum].reset( - new jit_avx2_u8_copy_at_kern()); + new jit_avx2_u8_copy_at_kern_t()); copy_b[no_trans][no_sum].reset( - new jit_avx2_u8_copy_bn_kern()); + new jit_avx2_u8_copy_bn_kern_t()); copy_b[do_trans][no_sum].reset( - new jit_avx2_u8_copy_bt_kern()); + new jit_avx2_u8_copy_bt_kern_t()); copy_a[no_trans][do_sum].reset( - new jit_avx2_u8_copy_sum_an_kern()); + new jit_avx2_u8_copy_sum_an_kern_t()); copy_a[do_trans][do_sum].reset( - new jit_avx2_u8_copy_sum_at_kern()); + new jit_avx2_u8_copy_sum_at_kern_t()); copy_b[no_trans][do_sum].reset( - new jit_avx2_u8_copy_sum_bn_kern()); + new jit_avx2_u8_copy_sum_bn_kern_t()); copy_b[do_trans][do_sum].reset( - new jit_avx2_u8_copy_sum_bt_kern()); + new jit_avx2_u8_copy_sum_bt_kern_t()); #endif #if __BUILD_GEMM_AVX2 } else if (mayiuse(avx)) { copy_a[no_trans][no_sum].reset( - new jit_avx_u8_copy_an_kern()); + new jit_avx_u8_copy_an_kern_t()); copy_a[do_trans][no_sum].reset( - new jit_avx_u8_copy_at_kern()); + new jit_avx_u8_copy_at_kern_t()); copy_b[no_trans][no_sum].reset( - new jit_avx_u8_copy_bn_kern()); + new jit_avx_u8_copy_bn_kern_t()); copy_b[do_trans][no_sum].reset( - new jit_avx_u8_copy_bt_kern()); + new jit_avx_u8_copy_bt_kern_t()); copy_a[no_trans][do_sum].reset( - new jit_avx_u8_copy_sum_an_kern()); + new jit_avx_u8_copy_sum_an_kern_t()); copy_a[do_trans][do_sum].reset( - new jit_avx_u8_copy_sum_at_kern()); + new jit_avx_u8_copy_sum_at_kern_t()); copy_b[no_trans][do_sum].reset( - new jit_avx_u8_copy_sum_bn_kern()); + new jit_avx_u8_copy_sum_bn_kern_t()); copy_b[do_trans][do_sum].reset( - new jit_avx_u8_copy_sum_bt_kern()); + new jit_avx_u8_copy_sum_bt_kern_t()); #endif #if __BUILD_GEMM_SSE41 } else if (mayiuse(sse41)) { copy_a[no_trans][no_sum].reset( - new jit_sse41_u8_copy_an_kern()); + new jit_sse41_u8_copy_an_kern_t()); copy_a[do_trans][no_sum].reset( - new jit_sse41_u8_copy_at_kern()); + new jit_sse41_u8_copy_at_kern_t()); copy_b[no_trans][no_sum].reset( - new jit_sse41_u8_copy_bn_kern()); + new jit_sse41_u8_copy_bn_kern_t()); copy_b[do_trans][no_sum].reset( - new jit_sse41_u8_copy_bt_kern()); + new jit_sse41_u8_copy_bt_kern_t()); copy_a[no_trans][do_sum].reset( - new jit_sse41_u8_copy_sum_an_kern()); + new jit_sse41_u8_copy_sum_an_kern_t()); copy_a[do_trans][do_sum].reset( - new jit_sse41_u8_copy_sum_at_kern()); + new jit_sse41_u8_copy_sum_at_kern_t()); copy_b[no_trans][do_sum].reset( - new jit_sse41_u8_copy_sum_bn_kern()); + new jit_sse41_u8_copy_sum_bn_kern_t()); copy_b[do_trans][do_sum].reset( - new jit_sse41_u8_copy_sum_bt_kern()); + new jit_sse41_u8_copy_sum_bt_kern_t()); #endif } break; @@ -543,37 +543,37 @@ void gemm_info_t::jit_init(void) { } else if (mayiuse(amx_bf16)) { for (int isTrans : {no_trans, do_trans}) { copy_a[isTrans][no_sum].reset( - new jit_avx512_core_amx_copy_kern( + new jit_avx512_core_amx_copy_kern_t( true, !isTrans, sizeof(a_t))); copy_b[isTrans][no_sum].reset( - new jit_avx512_core_amx_copy_kern( + new jit_avx512_core_amx_copy_kern_t( false, isTrans, sizeof(b_t))); } #endif #if __BUILD_GEMM_AVX512 } else if (mayiuse(avx512_core) && !use_bf16_ymm) { copy_a[no_trans][no_sum].reset( - new jit_avx512_core_s16_48x8_copy_an_kern()); + new jit_avx512_core_s16_48x8_copy_an_kern_t()); copy_a[do_trans][no_sum].reset( - new jit_avx512_core_s16_48x8_copy_at_kern()); + new jit_avx512_core_s16_48x8_copy_at_kern_t()); copy_b[no_trans][no_sum].reset( - new jit_avx512_core_s16_48x8_copy_bn_kern()); + new jit_avx512_core_s16_48x8_copy_bn_kern_t()); copy_b[do_trans][no_sum].reset( - new jit_avx512_core_s16_48x8_copy_bt_kern()); + new jit_avx512_core_s16_48x8_copy_bt_kern_t()); #endif #if __BUILD_GEMM_AVX512 } else if (mayiuse(avx512_core) && use_bf16_ymm) { copy_a[no_trans][no_sum].reset( - new jit_avx512_core_s16_24x8_copy_an_kern()); + new jit_avx512_core_s16_24x8_copy_an_kern_t()); copy_a[do_trans][no_sum].reset( - new jit_avx512_core_s16_24x8_copy_at_kern()); + new jit_avx512_core_s16_24x8_copy_at_kern_t()); copy_b[no_trans][no_sum].reset( - new jit_avx512_core_s16_24x8_copy_bn_kern()); + new jit_avx512_core_s16_24x8_copy_bn_kern_t()); copy_b[do_trans][no_sum].reset( - new jit_avx512_core_s16_24x8_copy_bt_kern()); + new jit_avx512_core_s16_24x8_copy_bt_kern_t()); #endif } break; @@ -584,50 +584,50 @@ void gemm_info_t::jit_init(void) { #if __BUILD_GEMM_AVX512 } else if (mayiuse(avx512_core)) { copy_a[no_trans][no_sum].reset( - new jit_avx512_core_f32_copy_an_kern()); + new jit_avx512_core_f32_copy_an_kern_t()); copy_a[do_trans][no_sum].reset( - new jit_avx512_core_f32_copy_at_kern()); + new jit_avx512_core_f32_copy_at_kern_t()); copy_b[no_trans][no_sum].reset( - new jit_avx512_core_f32_copy_bn_kern()); + new jit_avx512_core_f32_copy_bn_kern_t()); copy_b[do_trans][no_sum].reset( - new jit_avx512_core_f32_copy_bt_kern()); + new jit_avx512_core_f32_copy_bt_kern_t()); #endif #if __BUILD_GEMM_AVX2 } else if (mayiuse(avx2)) { copy_a[no_trans][no_sum].reset( - new jit_avx2_f32_copy_an_kern()); + new jit_avx2_f32_copy_an_kern_t()); copy_a[do_trans][no_sum].reset( - new jit_avx2_f32_copy_at_kern()); + new jit_avx2_f32_copy_at_kern_t()); copy_b[no_trans][no_sum].reset( - new jit_avx2_f32_copy_bn_kern()); + new jit_avx2_f32_copy_bn_kern_t()); copy_b[do_trans][no_sum].reset( - new jit_avx2_f32_copy_bt_kern()); + new jit_avx2_f32_copy_bt_kern_t()); #endif #if __BUILD_GEMM_AVX2 } else if (mayiuse(avx)) { copy_a[no_trans][no_sum].reset( - new jit_avx_f32_copy_an_kern()); + new jit_avx_f32_copy_an_kern_t()); copy_a[do_trans][no_sum].reset( - new jit_avx_f32_copy_at_kern()); + new jit_avx_f32_copy_at_kern_t()); copy_b[no_trans][no_sum].reset( - new jit_avx_f32_copy_bn_kern()); + new jit_avx_f32_copy_bn_kern_t()); copy_b[do_trans][no_sum].reset( - new jit_avx_f32_copy_bt_kern()); + new jit_avx_f32_copy_bt_kern_t()); #endif #if __BUILD_GEMM_SSE41 } else if (mayiuse(sse41)) { copy_a[no_trans][no_sum].reset( - new jit_sse41_f32_copy_an_kern()); + new jit_sse41_f32_copy_an_kern_t()); copy_a[do_trans][no_sum].reset( - new jit_sse41_f32_copy_at_kern()); + new jit_sse41_f32_copy_at_kern_t()); copy_b[no_trans][no_sum].reset( - new jit_sse41_f32_copy_bn_kern()); + new jit_sse41_f32_copy_bn_kern_t()); copy_b[do_trans][no_sum].reset( - new jit_sse41_f32_copy_bt_kern()); + new jit_sse41_f32_copy_bt_kern_t()); #endif } break; @@ -643,7 +643,7 @@ void gemm_info_t::jit_init(void) { UNUSED(is_b_s8); UNUSED(is_c_s32); - static maybe_unique_ptr kernel[2][2][2][2] + static maybe_unique_ptr kernel[2][2][2][2] = {{{{nullptr}}}}; switch (data_traits_t::data_type) { case data_type::s8: @@ -653,7 +653,7 @@ void gemm_info_t::jit_init(void) { } else if (mayiuse(avx512_core_amx)) { for (int isBeta0 : {no_beta0, do_beta0}) { kernel[isBeta0][do_alpha1][no_sum][no_sum].reset( - new jit_avx512_core_amx_gemm_kern( + new jit_avx512_core_amx_gemm_kern_t( is_a_s8, is_b_s8, is_c_s32, isBeta0)); } #endif @@ -663,7 +663,7 @@ void gemm_info_t::jit_init(void) { for (int doColSum : {no_sum, do_sum}) for (int doRowSum : {no_sum, do_sum}) { kernel[isBeta0][do_alpha1][doColSum][doRowSum].reset( - new jit_avx512_core_gemm_s8u8s32_kern( + new jit_avx512_core_gemm_s8u8s32_kern_t( isBeta0, doColSum, doRowSum)); } #endif @@ -673,7 +673,7 @@ void gemm_info_t::jit_init(void) { for (int doColSum : {no_sum, do_sum}) for (int doRowSum : {no_sum, do_sum}) { kernel[isBeta0][do_alpha1][doColSum][doRowSum] - .reset(new jit_avx2_gemm_s8u8s32_kern( + .reset(new jit_avx2_gemm_s8u8s32_kern_t( isBeta0, doColSum, doRowSum, um)); } @@ -681,42 +681,42 @@ void gemm_info_t::jit_init(void) { #if __BUILD_GEMM_AVX2 } else if (mayiuse(avx)) { kernel[no_beta0][do_alpha1][no_sum][no_sum].reset( - new jit_avx_kernel_gemm_s8u8s32_kern()); + new jit_avx_kernel_gemm_s8u8s32_kern_t()); kernel[no_beta0][do_alpha1][do_sum][no_sum].reset( - new jit_avx_kernel_c_gemm_s8u8s32_kern()); + new jit_avx_kernel_c_gemm_s8u8s32_kern_t()); kernel[no_beta0][do_alpha1][no_sum][do_sum].reset( - new jit_avx_kernel_r_gemm_s8u8s32_kern()); + new jit_avx_kernel_r_gemm_s8u8s32_kern_t()); kernel[no_beta0][do_alpha1][do_sum][do_sum].reset( - new jit_avx_kernel_b_gemm_s8u8s32_kern()); + new jit_avx_kernel_b_gemm_s8u8s32_kern_t()); kernel[do_beta0][do_alpha1][no_sum][no_sum].reset( - new jit_avx_kernel_b0_gemm_s8u8s32_kern()); + new jit_avx_kernel_b0_gemm_s8u8s32_kern_t()); kernel[do_beta0][do_alpha1][do_sum][no_sum].reset( - new jit_avx_kernel_b0_c_gemm_s8u8s32_kern()); + new jit_avx_kernel_b0_c_gemm_s8u8s32_kern_t()); kernel[do_beta0][do_alpha1][no_sum][do_sum].reset( - new jit_avx_kernel_b0_r_gemm_s8u8s32_kern()); + new jit_avx_kernel_b0_r_gemm_s8u8s32_kern_t()); kernel[do_beta0][do_alpha1][do_sum][do_sum].reset( - new jit_avx_kernel_b0_b_gemm_s8u8s32_kern()); + new jit_avx_kernel_b0_b_gemm_s8u8s32_kern_t()); #endif #if __BUILD_GEMM_SSE41 } else if (mayiuse(sse41)) { kernel[no_beta0][do_alpha1][no_sum][no_sum].reset( - new jit_sse41_kernel_gemm_s8u8s32_kern()); + new jit_sse41_kernel_gemm_s8u8s32_kern_t()); kernel[no_beta0][do_alpha1][do_sum][no_sum].reset( - new jit_sse41_kernel_c_gemm_s8u8s32_kern()); + new jit_sse41_kernel_c_gemm_s8u8s32_kern_t()); kernel[no_beta0][do_alpha1][no_sum][do_sum].reset( - new jit_sse41_kernel_r_gemm_s8u8s32_kern()); + new jit_sse41_kernel_r_gemm_s8u8s32_kern_t()); kernel[no_beta0][do_alpha1][do_sum][do_sum].reset( - new jit_sse41_kernel_b_gemm_s8u8s32_kern()); + new jit_sse41_kernel_b_gemm_s8u8s32_kern_t()); kernel[do_beta0][do_alpha1][no_sum][no_sum].reset( - new jit_sse41_kernel_b0_gemm_s8u8s32_kern()); + new jit_sse41_kernel_b0_gemm_s8u8s32_kern_t()); kernel[do_beta0][do_alpha1][do_sum][no_sum].reset( - new jit_sse41_kernel_b0_c_gemm_s8u8s32_kern()); + new jit_sse41_kernel_b0_c_gemm_s8u8s32_kern_t()); kernel[do_beta0][do_alpha1][no_sum][do_sum].reset( - new jit_sse41_kernel_b0_r_gemm_s8u8s32_kern()); + new jit_sse41_kernel_b0_r_gemm_s8u8s32_kern_t()); kernel[do_beta0][do_alpha1][do_sum][do_sum].reset( - new jit_sse41_kernel_b0_b_gemm_s8u8s32_kern()); + new jit_sse41_kernel_b0_b_gemm_s8u8s32_kern_t()); #endif } break; @@ -728,7 +728,7 @@ void gemm_info_t::jit_init(void) { } else if (mayiuse(avx512_core_amx)) { for (int isBeta0 : {no_beta0, do_beta0}) { kernel[isBeta0][do_alpha1][no_sum][no_sum].reset( - new jit_avx512_core_amx_gemm_kern( + new jit_avx512_core_amx_gemm_kern_t( is_a_s8, is_b_s8, is_c_s32, isBeta0)); } #endif @@ -737,7 +737,7 @@ void gemm_info_t::jit_init(void) { for (int isBeta0 : {no_beta0, do_beta0}) for (int isAlpha1 : {no_alpha1, do_alpha1}) { kernel[isBeta0][isAlpha1][no_sum][no_sum].reset( - new jit_avx512_core_gemm_bf16bf16f32_kern( + new jit_avx512_core_gemm_bf16bf16f32_kern_t( isBeta0, isAlpha1, !use_bf16_ymm)); } #endif @@ -751,22 +751,22 @@ void gemm_info_t::jit_init(void) { } else if (mayiuse(avx2)) { for (int isBeta0 : {no_beta0, do_beta0}) { kernel[isBeta0][do_alpha1][no_sum][no_sum].reset( - new jit_avx2_kernel_sgemm_kern(isBeta0)); + new jit_avx2_kernel_sgemm_kern_t(isBeta0)); } #endif #if __BUILD_GEMM_AVX2 } else if (mayiuse(avx)) { kernel[no_beta0][do_alpha1][no_sum][no_sum].reset( - new jit_avx_kernel_sgemm_kern()); + new jit_avx_kernel_sgemm_kern_t()); kernel[do_beta0][do_alpha1][no_sum][no_sum].reset( - new jit_avx_kernel_b0_sgemm_kern()); + new jit_avx_kernel_b0_sgemm_kern_t()); #endif #if __BUILD_GEMM_SSE41 } else if (mayiuse(sse41)) { kernel[no_beta0][do_alpha1][no_sum][no_sum].reset( - new jit_sse41_kernel_sgemm_kern()); + new jit_sse41_kernel_sgemm_kern_t()); kernel[do_beta0][do_alpha1][no_sum][no_sum].reset( - new jit_sse41_kernel_b0_sgemm_kern()); + new jit_sse41_kernel_b0_sgemm_kern_t()); #endif } break; @@ -774,10 +774,10 @@ void gemm_info_t::jit_init(void) { default: break; } - static maybe_unique_ptr gemv_kernel[2] = {nullptr}; - static maybe_unique_ptr gemv_s8s8s32_kernel = nullptr; - static maybe_unique_ptr gemv_s8u8s32_kernel = nullptr; - static maybe_unique_ptr gemv_u8s8s32_kernel = nullptr; + static maybe_unique_ptr gemv_kernel[2] = {nullptr}; + static maybe_unique_ptr gemv_s8s8s32_kernel = nullptr; + static maybe_unique_ptr gemv_s8u8s32_kernel = nullptr; + static maybe_unique_ptr gemv_u8s8s32_kernel = nullptr; switch (data_traits_t::data_type) { case data_type::s8: if (false) { @@ -785,11 +785,14 @@ void gemm_info_t::jit_init(void) { #if __BUILD_GEMM_AVX512 } else if (mayiuse(avx512_core)) { gemv_s8s8s32_kernel.reset( - new jit_avx512_core_gemv_s8x8s32_kern(ver_t::s8s8)); + new jit_avx512_core_gemv_s8x8s32_kern_t( + ver_t::s8s8)); gemv_s8u8s32_kernel.reset( - new jit_avx512_core_gemv_s8x8s32_kern(ver_t::s8u8)); + new jit_avx512_core_gemv_s8x8s32_kern_t( + ver_t::s8u8)); gemv_u8s8s32_kernel.reset( - new jit_avx512_core_gemv_s8x8s32_kern(ver_t::u8s8)); + new jit_avx512_core_gemv_s8x8s32_kern_t( + ver_t::u8s8)); #endif } break; @@ -801,7 +804,7 @@ void gemm_info_t::jit_init(void) { } else if (mayiuse(avx512_core)) { for (int isTrans : {no_trans, do_trans}) gemv_kernel[isTrans].reset( - new jit_avx512_core_gemv_bf16bf16f32_kern( + new jit_avx512_core_gemv_bf16bf16f32_kern_t( isTrans)); #endif } @@ -813,15 +816,16 @@ void gemm_info_t::jit_init(void) { #if __BUILD_GEMM_AVX2 } else if (mayiuse(avx)) { gemv_kernel[no_trans].reset( - new jit_sse41_gemv_n_f32_kern()); - gemv_kernel[do_trans].reset(new jit_avx_gemv_t_f32_kern()); + new jit_sse41_gemv_n_f32_kern_t()); + gemv_kernel[do_trans].reset( + new jit_avx_gemv_t_f32_kern_t()); #endif #if __BUILD_GEMM_SSE41 } else if (mayiuse(sse41)) { gemv_kernel[no_trans].reset( - new jit_sse41_gemv_n_f32_kern()); + new jit_sse41_gemv_n_f32_kern_t()); gemv_kernel[do_trans].reset( - new jit_sse41_gemv_t_f32_kern()); + new jit_sse41_gemv_t_f32_kern_t()); #endif } break; diff --git a/src/cpu/x64/gemm/gemm_pack_storage.hpp b/src/cpu/x64/gemm/gemm_pack_storage.hpp index 2f92e445c0a..73111f73c7d 100644 --- a/src/cpu/x64/gemm/gemm_pack_storage.hpp +++ b/src/cpu/x64/gemm/gemm_pack_storage.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2023 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -108,14 +108,14 @@ struct gemm_pack_storage_t { template data_type *row_sums(int ithr, dim_t r0, dim_t cblock) const { - if (!has_row_sums()) return NULL; + if (!has_row_sums()) return nullptr; auto id = thread_to_slice(ithr); return get_block(sums_header->slice[id], r0, cblock); } template data_type *col_sums(int ithr, dim_t rblock, dim_t c0) const { - if (!has_col_sums()) return NULL; + if (!has_col_sums()) return nullptr; auto id = thread_to_slice(ithr); return get_block(sums_header->slice[id], rblock, c0); } diff --git a/src/cpu/x64/gemm/gemm_threading.hpp b/src/cpu/x64/gemm/gemm_threading.hpp index 3915dd54f12..b0af2760095 100644 --- a/src/cpu/x64/gemm/gemm_threading.hpp +++ b/src/cpu/x64/gemm/gemm_threading.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2022 Intel Corporation +* Copyright 2019-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,7 +43,7 @@ struct gemm_slice_t { }; struct gemm_threading_t { - gemm_threading_t() {}; + gemm_threading_t() = default; int nthrs_m, nthrs_n, nthrs_k; dim_t block_m, block_n, block_k; // Blocking sizes (-1 = default) diff --git a/src/cpu/x64/gemm/s8x8s32/common_u8.hpp b/src/cpu/x64/gemm/s8x8s32/common_u8.hpp index c083378627e..575f73a52a9 100644 --- a/src/cpu/x64/gemm/s8x8s32/common_u8.hpp +++ b/src/cpu/x64/gemm/s8x8s32/common_u8.hpp @@ -22,464 +22,464 @@ #include "cpu/x64/jit_generator.hpp" #define PADD_BYTESIZE_ONPAGE(x, size) \ - (((x) * (size) + PAGE_4K - 1) / PAGE_4K) * PAGE_4K -#define NEXT_THR_STRIDE(x, size) (PADD_BYTESIZE_ONPAGE(x, size)) / size + ((((x) * (size) + PAGE_4K - 1) / PAGE_4K) * PAGE_4K) +#define NEXT_THR_STRIDE(x, size) (PADD_BYTESIZE_ONPAGE(x, (size)) / (size)) namespace dnnl { namespace impl { namespace cpu { namespace x64 { -class jit_avx512_core_u8_copy_an_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_an_kern); +class jit_avx512_core_u8_copy_an_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_an_kern_t); void generate() override; public: - jit_avx512_core_u8_copy_an_kern(); + jit_avx512_core_u8_copy_an_kern_t(); }; -class jit_avx512_core_u8_copy_at_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_at_kern); +class jit_avx512_core_u8_copy_at_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_at_kern_t); void generate() override; public: - jit_avx512_core_u8_copy_at_kern(); + jit_avx512_core_u8_copy_at_kern_t(); }; -class jit_avx512_core_u8_copy_bn_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_bn_kern); +class jit_avx512_core_u8_copy_bn_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_bn_kern_t); void generate() override; bool s8_case; public: - jit_avx512_core_u8_copy_bn_kern(bool s8 = false); + jit_avx512_core_u8_copy_bn_kern_t(bool s8 = false); }; -class jit_avx512_core_u8_copy_bt_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_bt_kern); +class jit_avx512_core_u8_copy_bt_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_bt_kern_t); void generate() override; bool s8_case; public: - jit_avx512_core_u8_copy_bt_kern(bool s8 = false); + jit_avx512_core_u8_copy_bt_kern_t(bool s8 = false); }; -class jit_avx512_core_u8_copy_sum_an_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_sum_an_kern); +class jit_avx512_core_u8_copy_sum_an_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_sum_an_kern_t); void generate() override; public: - jit_avx512_core_u8_copy_sum_an_kern(); + jit_avx512_core_u8_copy_sum_an_kern_t(); }; -class jit_avx512_core_u8_copy_sum_at_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_sum_at_kern); +class jit_avx512_core_u8_copy_sum_at_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_sum_at_kern_t); void generate() override; public: - jit_avx512_core_u8_copy_sum_at_kern(); + jit_avx512_core_u8_copy_sum_at_kern_t(); }; -class jit_avx512_core_u8_copy_sum_bn_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_sum_bn_kern); +class jit_avx512_core_u8_copy_sum_bn_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_sum_bn_kern_t); void generate() override; bool s8_case; public: - jit_avx512_core_u8_copy_sum_bn_kern(bool s8 = false); + jit_avx512_core_u8_copy_sum_bn_kern_t(bool s8 = false); }; -class jit_avx512_core_u8_copy_sum_bt_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_sum_bt_kern); +class jit_avx512_core_u8_copy_sum_bt_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_sum_bt_kern_t); void generate() override; bool s8_case; public: - jit_avx512_core_u8_copy_sum_bt_kern(bool s8 = false); + jit_avx512_core_u8_copy_sum_bt_kern_t(bool s8 = false); }; -class jit_avx2_vnni_u8_copy_an_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_an_kern); +class jit_avx2_vnni_u8_copy_an_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_an_kern_t); void generate() override; public: - jit_avx2_vnni_u8_copy_an_kern(); + jit_avx2_vnni_u8_copy_an_kern_t(); }; -class jit_avx2_vnni_u8_copy_at_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_at_kern); +class jit_avx2_vnni_u8_copy_at_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_at_kern_t); void generate() override; public: - jit_avx2_vnni_u8_copy_at_kern(); + jit_avx2_vnni_u8_copy_at_kern_t(); }; -class jit_avx2_vnni_u8_copy_bn_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_bn_kern); +class jit_avx2_vnni_u8_copy_bn_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_bn_kern_t); void generate() override; public: - jit_avx2_vnni_u8_copy_bn_kern(); + jit_avx2_vnni_u8_copy_bn_kern_t(); }; -class jit_avx2_vnni_u8_copy_bt_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_bt_kern); +class jit_avx2_vnni_u8_copy_bt_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_bt_kern_t); void generate() override; public: - jit_avx2_vnni_u8_copy_bt_kern(); + jit_avx2_vnni_u8_copy_bt_kern_t(); }; -class jit_avx2_vnni_u8_copy_sum_an_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_sum_an_kern); +class jit_avx2_vnni_u8_copy_sum_an_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_sum_an_kern_t); void generate() override; public: - jit_avx2_vnni_u8_copy_sum_an_kern(); + jit_avx2_vnni_u8_copy_sum_an_kern_t(); }; -class jit_avx2_vnni_u8_copy_sum_at_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_sum_at_kern); +class jit_avx2_vnni_u8_copy_sum_at_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_sum_at_kern_t); void generate() override; public: - jit_avx2_vnni_u8_copy_sum_at_kern(); + jit_avx2_vnni_u8_copy_sum_at_kern_t(); }; -class jit_avx2_vnni_u8_copy_sum_bn_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_sum_bn_kern); +class jit_avx2_vnni_u8_copy_sum_bn_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_sum_bn_kern_t); void generate() override; public: - jit_avx2_vnni_u8_copy_sum_bn_kern(); + jit_avx2_vnni_u8_copy_sum_bn_kern_t(); }; -class jit_avx2_vnni_u8_copy_sum_bt_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_sum_bt_kern); +class jit_avx2_vnni_u8_copy_sum_bt_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_sum_bt_kern_t); void generate() override; public: - jit_avx2_vnni_u8_copy_sum_bt_kern(); + jit_avx2_vnni_u8_copy_sum_bt_kern_t(); }; -class jit_avx2_u8_copy_an_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_an_kern); +class jit_avx2_u8_copy_an_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_an_kern_t); void generate() override; public: - jit_avx2_u8_copy_an_kern(); + jit_avx2_u8_copy_an_kern_t(); }; -class jit_avx2_u8_copy_at_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_at_kern); +class jit_avx2_u8_copy_at_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_at_kern_t); void generate() override; public: - jit_avx2_u8_copy_at_kern(); + jit_avx2_u8_copy_at_kern_t(); }; -class jit_avx2_u8_copy_bn_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_bn_kern); +class jit_avx2_u8_copy_bn_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_bn_kern_t); void generate() override; public: - jit_avx2_u8_copy_bn_kern(); + jit_avx2_u8_copy_bn_kern_t(); }; -class jit_avx2_u8_copy_bt_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_bt_kern); +class jit_avx2_u8_copy_bt_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_bt_kern_t); void generate() override; public: - jit_avx2_u8_copy_bt_kern(); + jit_avx2_u8_copy_bt_kern_t(); }; -class jit_avx2_u8_copy_sum_an_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_sum_an_kern); +class jit_avx2_u8_copy_sum_an_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_sum_an_kern_t); void generate() override; public: - jit_avx2_u8_copy_sum_an_kern(); + jit_avx2_u8_copy_sum_an_kern_t(); }; -class jit_avx2_u8_copy_sum_at_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_sum_at_kern); +class jit_avx2_u8_copy_sum_at_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_sum_at_kern_t); void generate() override; public: - jit_avx2_u8_copy_sum_at_kern(); + jit_avx2_u8_copy_sum_at_kern_t(); }; -class jit_avx2_u8_copy_sum_bn_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_sum_bn_kern); +class jit_avx2_u8_copy_sum_bn_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_sum_bn_kern_t); void generate() override; public: - jit_avx2_u8_copy_sum_bn_kern(); + jit_avx2_u8_copy_sum_bn_kern_t(); }; -class jit_avx2_u8_copy_sum_bt_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_sum_bt_kern); +class jit_avx2_u8_copy_sum_bt_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_sum_bt_kern_t); void generate() override; public: - jit_avx2_u8_copy_sum_bt_kern(); + jit_avx2_u8_copy_sum_bt_kern_t(); }; -class jit_avx_u8_copy_an_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_an_kern); +class jit_avx_u8_copy_an_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_an_kern_t); void generate() override; public: - jit_avx_u8_copy_an_kern(); + jit_avx_u8_copy_an_kern_t(); }; -class jit_avx_u8_copy_at_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_at_kern); +class jit_avx_u8_copy_at_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_at_kern_t); void generate() override; public: - jit_avx_u8_copy_at_kern(); + jit_avx_u8_copy_at_kern_t(); }; -class jit_avx_u8_copy_bn_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_bn_kern); +class jit_avx_u8_copy_bn_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_bn_kern_t); void generate() override; public: - jit_avx_u8_copy_bn_kern(); + jit_avx_u8_copy_bn_kern_t(); }; -class jit_avx_u8_copy_bt_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_bt_kern); +class jit_avx_u8_copy_bt_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_bt_kern_t); void generate() override; public: - jit_avx_u8_copy_bt_kern(); + jit_avx_u8_copy_bt_kern_t(); }; -class jit_avx_u8_copy_sum_an_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_sum_an_kern); +class jit_avx_u8_copy_sum_an_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_sum_an_kern_t); void generate() override; public: - jit_avx_u8_copy_sum_an_kern(); + jit_avx_u8_copy_sum_an_kern_t(); }; -class jit_avx_u8_copy_sum_at_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_sum_at_kern); +class jit_avx_u8_copy_sum_at_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_sum_at_kern_t); void generate() override; public: - jit_avx_u8_copy_sum_at_kern(); + jit_avx_u8_copy_sum_at_kern_t(); }; -class jit_avx_u8_copy_sum_bn_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_sum_bn_kern); +class jit_avx_u8_copy_sum_bn_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_sum_bn_kern_t); void generate() override; public: - jit_avx_u8_copy_sum_bn_kern(); + jit_avx_u8_copy_sum_bn_kern_t(); }; -class jit_avx_u8_copy_sum_bt_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_sum_bt_kern); +class jit_avx_u8_copy_sum_bt_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_sum_bt_kern_t); void generate() override; public: - jit_avx_u8_copy_sum_bt_kern(); + jit_avx_u8_copy_sum_bt_kern_t(); }; -class jit_avx_kernel_b0_gemm_s8u8s32_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_b0_gemm_s8u8s32_kern); +class jit_avx_kernel_b0_gemm_s8u8s32_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_b0_gemm_s8u8s32_kern_t); void generate() override; public: - jit_avx_kernel_b0_gemm_s8u8s32_kern(); + jit_avx_kernel_b0_gemm_s8u8s32_kern_t(); }; -class jit_avx_kernel_b0_b_gemm_s8u8s32_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_b0_b_gemm_s8u8s32_kern); +class jit_avx_kernel_b0_b_gemm_s8u8s32_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_b0_b_gemm_s8u8s32_kern_t); void generate() override; public: - jit_avx_kernel_b0_b_gemm_s8u8s32_kern(); + jit_avx_kernel_b0_b_gemm_s8u8s32_kern_t(); }; -class jit_avx_kernel_b0_r_gemm_s8u8s32_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_b0_r_gemm_s8u8s32_kern); +class jit_avx_kernel_b0_r_gemm_s8u8s32_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_b0_r_gemm_s8u8s32_kern_t); void generate() override; public: - jit_avx_kernel_b0_r_gemm_s8u8s32_kern(); + jit_avx_kernel_b0_r_gemm_s8u8s32_kern_t(); }; -class jit_avx_kernel_b0_c_gemm_s8u8s32_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_b0_c_gemm_s8u8s32_kern); +class jit_avx_kernel_b0_c_gemm_s8u8s32_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_b0_c_gemm_s8u8s32_kern_t); void generate() override; public: - jit_avx_kernel_b0_c_gemm_s8u8s32_kern(); + jit_avx_kernel_b0_c_gemm_s8u8s32_kern_t(); }; -class jit_avx_kernel_gemm_s8u8s32_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_gemm_s8u8s32_kern); +class jit_avx_kernel_gemm_s8u8s32_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_gemm_s8u8s32_kern_t); void generate() override; public: - jit_avx_kernel_gemm_s8u8s32_kern(); + jit_avx_kernel_gemm_s8u8s32_kern_t(); }; -class jit_avx_kernel_b_gemm_s8u8s32_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_b_gemm_s8u8s32_kern); +class jit_avx_kernel_b_gemm_s8u8s32_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_b_gemm_s8u8s32_kern_t); void generate() override; public: - jit_avx_kernel_b_gemm_s8u8s32_kern(); + jit_avx_kernel_b_gemm_s8u8s32_kern_t(); }; -class jit_avx_kernel_r_gemm_s8u8s32_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_r_gemm_s8u8s32_kern); +class jit_avx_kernel_r_gemm_s8u8s32_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_r_gemm_s8u8s32_kern_t); void generate() override; public: - jit_avx_kernel_r_gemm_s8u8s32_kern(); + jit_avx_kernel_r_gemm_s8u8s32_kern_t(); }; -class jit_avx_kernel_c_gemm_s8u8s32_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_c_gemm_s8u8s32_kern); +class jit_avx_kernel_c_gemm_s8u8s32_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_c_gemm_s8u8s32_kern_t); void generate() override; public: - jit_avx_kernel_c_gemm_s8u8s32_kern(); + jit_avx_kernel_c_gemm_s8u8s32_kern_t(); }; -class jit_sse41_u8_copy_an_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_an_kern); +class jit_sse41_u8_copy_an_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_an_kern_t); void generate() override; public: - jit_sse41_u8_copy_an_kern(); + jit_sse41_u8_copy_an_kern_t(); }; -class jit_sse41_u8_copy_at_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_at_kern); +class jit_sse41_u8_copy_at_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_at_kern_t); void generate() override; public: - jit_sse41_u8_copy_at_kern(); + jit_sse41_u8_copy_at_kern_t(); }; -class jit_sse41_u8_copy_bn_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_bn_kern); +class jit_sse41_u8_copy_bn_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_bn_kern_t); void generate() override; public: - jit_sse41_u8_copy_bn_kern(); + jit_sse41_u8_copy_bn_kern_t(); }; -class jit_sse41_u8_copy_bt_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_bt_kern); +class jit_sse41_u8_copy_bt_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_bt_kern_t); void generate() override; public: - jit_sse41_u8_copy_bt_kern(); + jit_sse41_u8_copy_bt_kern_t(); }; -class jit_sse41_u8_copy_sum_an_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_sum_an_kern); +class jit_sse41_u8_copy_sum_an_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_sum_an_kern_t); void generate() override; public: - jit_sse41_u8_copy_sum_an_kern(); + jit_sse41_u8_copy_sum_an_kern_t(); }; -class jit_sse41_u8_copy_sum_at_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_sum_at_kern); +class jit_sse41_u8_copy_sum_at_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_sum_at_kern_t); void generate() override; public: - jit_sse41_u8_copy_sum_at_kern(); + jit_sse41_u8_copy_sum_at_kern_t(); }; -class jit_sse41_u8_copy_sum_bn_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_sum_bn_kern); +class jit_sse41_u8_copy_sum_bn_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_sum_bn_kern_t); void generate() override; public: - jit_sse41_u8_copy_sum_bn_kern(); + jit_sse41_u8_copy_sum_bn_kern_t(); }; -class jit_sse41_u8_copy_sum_bt_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_sum_bt_kern); +class jit_sse41_u8_copy_sum_bt_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_sum_bt_kern_t); void generate() override; public: - jit_sse41_u8_copy_sum_bt_kern(); + jit_sse41_u8_copy_sum_bt_kern_t(); }; -class jit_sse41_kernel_b0_gemm_s8u8s32_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_b0_gemm_s8u8s32_kern); +class jit_sse41_kernel_b0_gemm_s8u8s32_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_b0_gemm_s8u8s32_kern_t); void generate() override; public: - jit_sse41_kernel_b0_gemm_s8u8s32_kern(); + jit_sse41_kernel_b0_gemm_s8u8s32_kern_t(); }; -class jit_sse41_kernel_b0_b_gemm_s8u8s32_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_b0_b_gemm_s8u8s32_kern); +class jit_sse41_kernel_b0_b_gemm_s8u8s32_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_b0_b_gemm_s8u8s32_kern_t); void generate() override; public: - jit_sse41_kernel_b0_b_gemm_s8u8s32_kern(); + jit_sse41_kernel_b0_b_gemm_s8u8s32_kern_t(); }; -class jit_sse41_kernel_b0_r_gemm_s8u8s32_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_b0_r_gemm_s8u8s32_kern); +class jit_sse41_kernel_b0_r_gemm_s8u8s32_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_b0_r_gemm_s8u8s32_kern_t); void generate() override; public: - jit_sse41_kernel_b0_r_gemm_s8u8s32_kern(); + jit_sse41_kernel_b0_r_gemm_s8u8s32_kern_t(); }; -class jit_sse41_kernel_b0_c_gemm_s8u8s32_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_b0_c_gemm_s8u8s32_kern); +class jit_sse41_kernel_b0_c_gemm_s8u8s32_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_b0_c_gemm_s8u8s32_kern_t); void generate() override; public: - jit_sse41_kernel_b0_c_gemm_s8u8s32_kern(); + jit_sse41_kernel_b0_c_gemm_s8u8s32_kern_t(); }; -class jit_sse41_kernel_gemm_s8u8s32_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_gemm_s8u8s32_kern); +class jit_sse41_kernel_gemm_s8u8s32_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_gemm_s8u8s32_kern_t); void generate() override; public: - jit_sse41_kernel_gemm_s8u8s32_kern(); + jit_sse41_kernel_gemm_s8u8s32_kern_t(); }; -class jit_sse41_kernel_b_gemm_s8u8s32_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_b_gemm_s8u8s32_kern); +class jit_sse41_kernel_b_gemm_s8u8s32_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_b_gemm_s8u8s32_kern_t); void generate() override; public: - jit_sse41_kernel_b_gemm_s8u8s32_kern(); + jit_sse41_kernel_b_gemm_s8u8s32_kern_t(); }; -class jit_sse41_kernel_r_gemm_s8u8s32_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_r_gemm_s8u8s32_kern); +class jit_sse41_kernel_r_gemm_s8u8s32_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_r_gemm_s8u8s32_kern_t); void generate() override; public: - jit_sse41_kernel_r_gemm_s8u8s32_kern(); + jit_sse41_kernel_r_gemm_s8u8s32_kern_t(); }; -class jit_sse41_kernel_c_gemm_s8u8s32_kern : public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_c_gemm_s8u8s32_kern); +class jit_sse41_kernel_c_gemm_s8u8s32_kern_t : public jit_generator_t { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_c_gemm_s8u8s32_kern_t); void generate() override; public: - jit_sse41_kernel_c_gemm_s8u8s32_kern(); + jit_sse41_kernel_c_gemm_s8u8s32_kern_t(); }; } // namespace x64 diff --git a/src/cpu/x64/gemm/s8x8s32/jit_avx2_gemm_s8u8s32_kern.cpp b/src/cpu/x64/gemm/s8x8s32/jit_avx2_gemm_s8u8s32_kern.cpp index d15e3cc71b6..a214221860a 100644 --- a/src/cpu/x64/gemm/s8x8s32/jit_avx2_gemm_s8u8s32_kern.cpp +++ b/src/cpu/x64/gemm/s8x8s32/jit_avx2_gemm_s8u8s32_kern.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2018-2024 Intel Corporation +* Copyright 2018-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,7 +38,7 @@ static inline Xmm make_xmm(const Xmm &v) { } // Load from or store to C. -void jit_avx2_gemm_s8u8s32_kern::c_load( +void jit_avx2_gemm_s8u8s32_kern_t::c_load( const Xbyak::Xmm &dst, const Xbyak::Address &src, int nelems) { switch (nelems) { case 1: vmovss(make_xmm(dst), src); break; @@ -51,7 +51,7 @@ void jit_avx2_gemm_s8u8s32_kern::c_load( } } -void jit_avx2_gemm_s8u8s32_kern::c_store( +void jit_avx2_gemm_s8u8s32_kern_t::c_store( const Xbyak::Address &dst, const Xbyak::Xmm &src, int nelems) { switch (nelems) { case 1: vmovss(dst, make_xmm(src)); break; @@ -67,7 +67,7 @@ void jit_avx2_gemm_s8u8s32_kern::c_store( // Perform length-4 dot product accumulations of unsigned and signed bytes // in parallel. // Use VEX vpdpbusd if avx2-vnni available, otherwise emulate. -void jit_avx2_gemm_s8u8s32_kern::dot_product( +void jit_avx2_gemm_s8u8s32_kern_t::dot_product( const Xmm &dst, const Xmm &src1, const Xmm &src2) { if (vnni_) { vpdpbusd(dst, src1, src2, VexEncoding); @@ -79,7 +79,7 @@ void jit_avx2_gemm_s8u8s32_kern::dot_product( } // Inner kernel. -void jit_avx2_gemm_s8u8s32_kern::kernel_loop( +void jit_avx2_gemm_s8u8s32_kern_t::kernel_loop( int unroll_m, int unroll_n, bool cfetch) { int um_vecs = (unroll_m + 7) >> 3; Label label_kernel_loop; @@ -137,7 +137,7 @@ void jit_avx2_gemm_s8u8s32_kern::kernel_loop( } // k remainder loop for kernel. -void jit_avx2_gemm_s8u8s32_kern::remainder_kernel( +void jit_avx2_gemm_s8u8s32_kern_t::remainder_kernel( int unroll_m, int unroll_n, int unroll_k, int bwidth) { Ymm b = b_regs_[0]; @@ -165,7 +165,7 @@ void jit_avx2_gemm_s8u8s32_kern::remainder_kernel( } // Inner loop. -void jit_avx2_gemm_s8u8s32_kern::innerloop(int unroll_m, int unroll_n) { +void jit_avx2_gemm_s8u8s32_kern_t::innerloop(int unroll_m, int unroll_n) { int um_vecs = (unroll_m + 7) >> 3; int stage1 = unroll_n, stage2 = mayiuse(avx2_vnni) ? 32 : 16; @@ -308,7 +308,7 @@ void jit_avx2_gemm_s8u8s32_kern::innerloop(int unroll_m, int unroll_n) { } // Outer loop. -void jit_avx2_gemm_s8u8s32_kern::outerloop( +void jit_avx2_gemm_s8u8s32_kern_t::outerloop( int unroll_x, int unroll_y, Label *&cur_outerloop_label) { Label label_m_loop, label_n_loop; std::vector