From f994b8579898af5f658c5aea37db5100d2f417e9 Mon Sep 17 00:00:00 2001 From: "Kassen, Andrew" Date: Thu, 6 Mar 2025 17:33:38 -0800 Subject: [PATCH 1/5] xe: avoid copies --- src/gpu/gpu_reorder_pd.cpp | 2 +- src/gpu/intel/jit/codegen/codegen.cpp | 2 +- src/gpu/intel/jit/codegen/reduce.hpp | 2 +- src/gpu/intel/jit/conv/config.cpp | 9 +++++---- src/gpu/intel/jit/conv/ir_builder.cpp | 6 +++--- src/gpu/intel/jit/conv/message_patterns.hpp | 6 +++--- src/gpu/intel/jit/conv/pipeline.cpp | 2 +- src/gpu/intel/jit/conv/plan.cpp | 10 +++++----- src/gpu/intel/jit/conv/tiler.cpp | 6 +++--- src/gpu/intel/jit/conv/zp_plan.cpp | 6 +++--- .../intel/jit/gemm/generator/pieces/c_update.cxx | 6 +++--- src/gpu/intel/jit/ir/blocking.cpp | 2 +- src/gpu/intel/jit/ir/epilogue.cpp | 4 ++-- src/gpu/intel/jit/ir/ir.cpp | 2 +- src/gpu/intel/jit/ir/linear_expr.cpp | 5 ++--- src/gpu/intel/jit/ir/message.cpp | 4 ++-- src/gpu/intel/jit/ir/message_patterns.hpp | 2 +- src/gpu/intel/jit/ir/problem.cpp | 6 ++---- src/gpu/intel/jit/ir/send_plan.cpp | 2 +- src/gpu/intel/jit/ir/slm_reduce_builder.cpp | 2 +- src/gpu/intel/jit/ir/tensor.hpp | 4 ++-- src/gpu/intel/jit/pass/alloc.cpp | 2 +- src/gpu/intel/jit/pass/bank_conflict.cpp | 6 +++--- src/gpu/intel/jit/pass/dpasw.cpp | 2 +- src/gpu/intel/jit/pass/hoist.cpp | 2 +- src/gpu/intel/jit/pass/overflow.cpp | 2 +- src/gpu/intel/jit/pass/send.cpp | 4 ++-- src/gpu/intel/jit/pass/simplify.cpp | 10 +++++----- src/gpu/intel/jit/reorder/config.cpp | 2 +- src/gpu/intel/jit/reorder/ir_builder.cpp | 2 +- src/gpu/intel/jit/reorder/tiler.cpp | 2 +- src/gpu/intel/jit/v2/conv/builder.cpp | 6 +++--- src/gpu/intel/jit/v2/conv/kernel_desc.cpp | 2 +- src/gpu/intel/jit/v2/conv/kernel_desc_2d_reqs.cpp | 6 +++--- src/gpu/intel/jit/v2/conv/plan.cpp | 10 +++++----- src/gpu/intel/jit/v2/conv/plan_registry.cpp | 4 ++-- src/gpu/intel/jit/v2/conv/planner/bench.cpp | 2 +- src/gpu/intel/jit/v2/conv/planner/search.cpp | 12 ++++++------ src/gpu/intel/jit/v2/conv/tensor_utils.cpp | 4 ++-- src/gpu/intel/jit/v2/ir/builder.cpp | 2 +- src/gpu/intel/jit/v2/ir/builder.hpp | 4 ++-- src/gpu/intel/jit/v2/ir/send.hpp | 4 ++-- src/gpu/intel/jit/v2/ir/tensor.cpp | 8 ++++---- src/gpu/intel/logging.hpp | 2 +- src/gpu/intel/microkernels/shim.cpp | 4 ++-- src/gpu/intel/ocl/bnorm/bnorm_lookup_table.cpp | 4 ++-- src/gpu/intel/ocl/bnorm/nhwc_reusable.hpp | 4 ++-- src/gpu/intel/ocl/micro_sdpa.cpp | 2 +- src/gpu/intel/ocl/reduction/atomic_reduction.cpp | 5 +++-- src/gpu/intel/ocl/reduction/atomic_reduction.hpp | 2 +- src/gpu/intel/ocl/reduction/combined_reduction.hpp | 13 +++++-------- .../intel/ocl/reduction/reusable_ref_reduction.hpp | 2 +- src/gpu/intel/ocl/reusable_lnorm.cpp | 2 +- src/gpu/intel/ocl/rnn/rnn_grid.cpp | 2 +- 54 files changed, 113 insertions(+), 117 deletions(-) diff --git a/src/gpu/gpu_reorder_pd.cpp b/src/gpu/gpu_reorder_pd.cpp index ca293db5c89..a5609fca26c 100644 --- a/src/gpu/gpu_reorder_pd.cpp +++ b/src/gpu/gpu_reorder_pd.cpp @@ -47,7 +47,7 @@ status_t gpu_reorder_pd_t::maybe_create_zp_precompute_conv_pd( auto gpu_align = utils::downcast(dst_engine) ->get_buffer_alignment(); auto scratchpad = scratchpad_registry().registrar(); - auto registry = zp_precomp_conv_pd_->scratchpad_registry(); + const auto ®istry = zp_precomp_conv_pd_->scratchpad_registry(); memory_desc_wrapper wspace((is_bwd_d) ? zp_precomp_conv_pd_->diff_dst_md() : zp_precomp_conv_pd_->src_md()); scratchpad.book(key_conv_tr_src, wspace.size(), 1, gpu_align); diff --git a/src/gpu/intel/jit/codegen/codegen.cpp b/src/gpu/intel/jit/codegen/codegen.cpp index fb5f845803f..a2146128d92 100644 --- a/src/gpu/intel/jit/codegen/codegen.cpp +++ b/src/gpu/intel/jit/codegen/codegen.cpp @@ -755,7 +755,7 @@ class ir_to_ngen_t : public ir_visitor_t { auto do_eltwise = [&](const reg_buf_data_t &r, const int count) { if (func.alg_kind == alg_kind::eltwise_stochastic_round) { gpu_assert(args.size() == 3); - auto seed = args[2].reg_buf_data(); + const auto &seed = args[2].reg_buf_data(); inj.compute(ngen::GRFRange(r.base(), count), seed.reg_data().getBase(), seed.reg_data().getOffset(), func.dst_dt); diff --git a/src/gpu/intel/jit/codegen/reduce.hpp b/src/gpu/intel/jit/codegen/reduce.hpp index d8afcd6c460..8b2f899afac 100644 --- a/src/gpu/intel/jit/codegen/reduce.hpp +++ b/src/gpu/intel/jit/codegen/reduce.hpp @@ -133,7 +133,7 @@ class reduce_impl_t { auto a_blocks = a.blocks(); a_blocks.erase(a_blocks.begin()); a = layout_t(a.type(), a.ndims(), 0, a_blocks); - return find_1d_tile(a, b); + return find_1d_tile(std::move(a), std::move(b)); } return tensor_t(std::vector(b.ndims(), 1)); } diff --git a/src/gpu/intel/jit/conv/config.cpp b/src/gpu/intel/jit/conv/config.cpp index 3a29ca4026e..1d24070e35c 100644 --- a/src/gpu/intel/jit/conv/config.cpp +++ b/src/gpu/intel/jit/conv/config.cpp @@ -1118,7 +1118,7 @@ bool post_op_layouts_ok(const conv_problem_t &prb) { po.binary.src1_desc.dims, prb.ndims, true); // These cases don't have message-related limitations. if ((mask & (1 << 1)) == 0 || mask == (1 << 1)) continue; - auto rhs_layout = po.is_prelu() + const auto &rhs_layout = po.is_prelu() ? layout_t(type_t::f32(), 0, get_prelu_weights_dims(po.prelu.mask, output_md)) : layout_t(po.binary.src1_desc); @@ -1568,12 +1568,13 @@ class mn_walker_t { : prb_(prb) { for (auto &d : tile) { auto bmnk = to_gemm(d, prb); - entry_t e; + if (!utils::one_of(bmnk, pvars::m, pvars::n)) continue; + + entries_.emplace_back(); + entry_t &e = entries_.back(); e.dim = d; e.tile_size = tile[d]; - if (!utils::one_of(bmnk, pvars::m, pvars::n)) continue; e.mn_kind = (bmnk == pvars::m ? 'm' : 'n'); - entries_.push_back(e); } // Put through spatial dimensions first and order spatial accordingly // (WHD, width is first). diff --git a/src/gpu/intel/jit/conv/ir_builder.cpp b/src/gpu/intel/jit/conv/ir_builder.cpp index cece13b94bf..babafde59f1 100644 --- a/src/gpu/intel/jit/conv/ir_builder.cpp +++ b/src/gpu/intel/jit/conv/ir_builder.cpp @@ -644,13 +644,13 @@ stmt_t inject_compute_loop_label(const stmt_t &s) { } void conv_ir_builder_t::build() { - auto &prb = cfg_.prb(); + const auto &prb = cfg_.prb(); trace_reset(); std::vector init_stmts; - auto &plan = cfg_.plan(); - auto gemm_schedule = plan.gemm_schedule; + const auto &plan = cfg_.plan(); + const auto &gemm_schedule = plan.gemm_schedule; auto init_cset = plan.init_cset; init_kernel_grid(cfg_.kernel_grid(), cfg_.thread_group_grid(), cfg_.simd(), init_cset, init_stmts); diff --git a/src/gpu/intel/jit/conv/message_patterns.hpp b/src/gpu/intel/jit/conv/message_patterns.hpp index 78627a3b004..e5df00e9115 100644 --- a/src/gpu/intel/jit/conv/message_patterns.hpp +++ b/src/gpu/intel/jit/conv/message_patterns.hpp @@ -137,7 +137,7 @@ struct conv_stride_layout_t : public stride_layout_t { || prb.id < prb.od * prb.sd + (prb.kd - 1) * (prb.dd + 1)); - auto x_dim = !prb.is_bwd_d ? pvars::od : pvars::id; + const auto &x_dim = !prb.is_bwd_d ? pvars::od : pvars::id; auto x = !prb.is_bwd_d ? prb.od : prb.id; auto xas = !prb.is_bwd_d ? prb.sd : prb.sd == 1; auto kx = prb.kd; @@ -151,7 +151,7 @@ struct conv_stride_layout_t : public stride_layout_t { || prb.ih < prb.oh * prb.sh + (prb.kh - 1) * (prb.dh + 1)); - auto x_dim = !prb.is_bwd_d ? pvars::oh : pvars::ih; + const auto &x_dim = !prb.is_bwd_d ? pvars::oh : pvars::ih; auto x = !prb.is_bwd_d ? prb.oh : prb.ih; auto xas = !prb.is_bwd_d ? prb.sh : prb.sh == 1; auto kx = prb.kh; @@ -163,7 +163,7 @@ struct conv_stride_layout_t : public stride_layout_t { && (prb.pw || prb.iw < prb.ow * prb.sw + (prb.kw - 1) * (prb.dw + 1)); - auto x_dim = !prb.is_bwd_d ? pvars::ow : pvars::iw; + const auto &x_dim = !prb.is_bwd_d ? pvars::ow : pvars::iw; auto x = !prb.is_bwd_d ? prb.ow : prb.iw; auto xas = !prb.is_bwd_d ? prb.sw : prb.sw == 1; auto kx = prb.kw; diff --git a/src/gpu/intel/jit/conv/pipeline.cpp b/src/gpu/intel/jit/conv/pipeline.cpp index fcdfc5644ed..c223100c35f 100644 --- a/src/gpu/intel/jit/conv/pipeline.cpp +++ b/src/gpu/intel/jit/conv/pipeline.cpp @@ -278,7 +278,7 @@ class compute_step_t { } auto let_info = create_let_info( let, is_preload_let(_let), is_mul_let(_let)); - let_infos.push_back(let_info); + let_infos.push_back(std::move(let_info)); seen.insert(_let); }; for (auto &_let : inner_let_stmts_) diff --git a/src/gpu/intel/jit/conv/plan.cpp b/src/gpu/intel/jit/conv/plan.cpp index 8b93870af53..ef888ed6562 100644 --- a/src/gpu/intel/jit/conv/plan.cpp +++ b/src/gpu/intel/jit/conv/plan.cpp @@ -282,8 +282,8 @@ void init_fwd(const conv_config_t &cfg_, gemm_schedule_t &gemm_schedule, std::vector kernel_grid_vars; kernel_grid_vars.push_back(oc_tile.grid_idx()); - kernel_grid_vars.push_back(od); - kernel_grid_vars.push_back(oh); + kernel_grid_vars.push_back(std::move(od)); + kernel_grid_vars.push_back(std::move(oh)); kernel_grid_vars.push_back(ow_tile.grid_idx()); kernel_grid_vars.push_back(g_tile.grid_idx()); kernel_grid_vars.push_back(mb_tile.grid_idx()); @@ -661,8 +661,8 @@ void init_bwd_w(const conv_config_t &cfg_, gemm_schedule_t &gemm_schedule, kernel_grid_vars.push_back(od_tile.grid_idx()); kernel_grid_vars.push_back(oh_tile.grid_idx()); kernel_grid_vars.push_back(ow_tile.grid_idx()); - kernel_grid_vars.push_back(kd); - kernel_grid_vars.push_back(kh); + kernel_grid_vars.push_back(std::move(kd)); + kernel_grid_vars.push_back(std::move(kh)); kernel_grid_vars.push_back(kw_tile.grid_idx()); kernel_grid_vars.push_back(ic_tile.grid_idx()); kernel_grid_vars.push_back(mb_tile.grid_idx()); @@ -2167,7 +2167,7 @@ class plan_builder_t { auto &direct_view = (abc == abc_kind_t::a ? a_direct_view_ : b_direct_view_); - auto load_view = direct_view ? direct_view.get() : gmem_view; + const auto &load_view = direct_view ? direct_view.get() : gmem_view; auto params = get_send_params(cfg_.exec_cfg(), send_op_t::load, send_address_t::a64, cfg_.fma_kind(), abc, load_view, diff --git a/src/gpu/intel/jit/conv/tiler.cpp b/src/gpu/intel/jit/conv/tiler.cpp index 82980040562..fdf1bf46013 100644 --- a/src/gpu/intel/jit/conv/tiler.cpp +++ b/src/gpu/intel/jit/conv/tiler.cpp @@ -360,7 +360,7 @@ class conv_blocking_scheme_t : public blocking_scheme_t { x2_info.set_iter_unit(unit); x2_info.d0 = info0.div_info; x2_info.d1 = info1.div_info; - x2_tile_infos_.push_back(x2_info); + x2_tile_infos_.push_back(std::move(x2_info)); } void finalize_loop_dims(const conv_config_t &cfg) { @@ -395,7 +395,7 @@ class conv_blocking_scheme_t : public blocking_scheme_t { ld.size = shape.get(d, 1); if (iter_.has(d)) ld.size = utils::div_up(ld.size, iter_dim_hint); - loop_dims.push_back(ld); + loop_dims.push_back(std::move(ld)); } std::sort(loop_dims.begin(), loop_dims.end(), [&](const loop_dim_t &a, const loop_dim_t &b) { @@ -1111,7 +1111,7 @@ conv_blocking_scheme_list_t get_blocking_schemes_fwd(const conv_config_t &cfg) { conv_blocking_scheme_list_t get_blocking_schemes_bwd_d( const conv_config_t &cfg) { conv_blocking_scheme_list_t ret(conv_tune_level()); - auto m_iter_dim = cfg.prb().ab_swap_transpose + const auto &m_iter_dim = cfg.prb().ab_swap_transpose ? pvars::ic : select_iter_dim(cfg, {pvars::mb, pvars::iw}); bool m_is_mb = (m_iter_dim == pvars::mb); diff --git a/src/gpu/intel/jit/conv/zp_plan.cpp b/src/gpu/intel/jit/conv/zp_plan.cpp index 88855f9e08b..c0a478f2f12 100644 --- a/src/gpu/intel/jit/conv/zp_plan.cpp +++ b/src/gpu/intel/jit/conv/zp_plan.cpp @@ -766,7 +766,7 @@ class zp_comp_init_plan_t : public base_plan_t { buf_mgr, zp_type, real_zp, (zp_stride) ? simd_ : 1); auto mad = mad_t::make( hw, comp_type, simd_, zp_type, zp_stride, wei_type, wei_stride); - return ret.append(mad.call({comp, comp, real_zp, wei})); + return ret.append(mad.call({comp, comp, std::move(real_zp), wei})); } stmt_t maybe_typecast_zp_src(buffer_manager_t &buf_mgr, type_t &type, @@ -937,7 +937,7 @@ struct texpr_t { auto s = "_" + std::to_string(vidxs[i]); // NOLINTNEXTLINE(performance-inefficient-string-concatenation) if (vstrides[i] != 1) s = std::to_string(vstrides[i]) + " x " + s; - parts.push_back(s); + parts.push_back(std::move(s)); } for (int i = 0; i < (int)parts.size(); i++) { if (i > 0) oss << " + "; @@ -1123,7 +1123,7 @@ class zp_mask_init_plan_t : public base_plan_t { for (auto &m : mask_descs_) { auto e_m = m.normalize( vvars_, vstart_, start, simd_, simd_dim_idx_); - e_masks.push_back(e_m); + e_masks.push_back(std::move(e_m)); } auto cond = e_masks[0]; for (int i = 1; i < (int)e_masks.size(); i++) diff --git a/src/gpu/intel/jit/gemm/generator/pieces/c_update.cxx b/src/gpu/intel/jit/gemm/generator/pieces/c_update.cxx index 8fe4ae8215d..4e3a6d182f5 100644 --- a/src/gpu/intel/jit/gemm/generator/pieces/c_update.cxx +++ b/src/gpu/intel/jit/gemm/generator/pieces/c_update.cxx @@ -1078,9 +1078,9 @@ void BLASKernelGenerator::updateCLayout(const vector &layoutE break; case 1: { - C_accs.push_back(C_acc); - C_accSwaps.push_back(C_accSwap); - C_loads.push_back(C_load); + C_accs.push_back(std::move(C_acc)); + C_accSwaps.push_back(std::move(C_accSwap)); + C_loads.push_back(std::move(C_load)); } break; case 2: diff --git a/src/gpu/intel/jit/ir/blocking.cpp b/src/gpu/intel/jit/ir/blocking.cpp index 73b054cb348..11fe1772dc0 100644 --- a/src/gpu/intel/jit/ir/blocking.cpp +++ b/src/gpu/intel/jit/ir/blocking.cpp @@ -196,7 +196,7 @@ std::vector level_tile_set_t::sample(int target, set(blk, dims_[i], try_tiles[i]); } if (!is_ok(blk)) continue; - ret.push_back(blk); + ret.push_back(std::move(blk)); if ((int)ret.size() >= target) break; } return ret; diff --git a/src/gpu/intel/jit/ir/epilogue.cpp b/src/gpu/intel/jit/ir/epilogue.cpp index 45ff6b4ca98..7838b2e236a 100644 --- a/src/gpu/intel/jit/ir/epilogue.cpp +++ b/src/gpu/intel/jit/ir/epilogue.cpp @@ -367,7 +367,7 @@ class post_op_tensor_t { reg_layout_ = slm_reduce_builder_.reg_layout(); - auto new_tile = slm_reduce_builder_.thr_tile(); + const auto &new_tile = slm_reduce_builder_.thr_tile(); info_ = info_.create_sub_tensor(new_tile); auto &slm_allocs = slm_reduce_builder_.allocs(); @@ -740,7 +740,7 @@ class epilogue_builder_t { gpu_assert(c_po_idx_ == -1); c_po_idx_ = tensor_idx; } - post_op_tensors_.push_back(po_tensor); + post_op_tensors_.push_back(std::move(po_tensor)); tensor_idx++; } diff --git a/src/gpu/intel/jit/ir/ir.cpp b/src/gpu/intel/jit/ir/ir.cpp index c89bf96ff27..8ec87816239 100644 --- a/src/gpu/intel/jit/ir/ir.cpp +++ b/src/gpu/intel/jit/ir/ir.cpp @@ -473,7 +473,7 @@ class alloc_injector_t : public ir_mutator_t { j = _find(j); parent[j] = i; }; - std::vector new_stmt_seq = stmt_vec; + std::vector new_stmt_seq = std::move(stmt_vec); for (auto &buf : bufs) { auto &e = entries.at(buf); stmt_t stmt; diff --git a/src/gpu/intel/jit/ir/linear_expr.cpp b/src/gpu/intel/jit/ir/linear_expr.cpp index 0ce3bb1ee2b..755b8fd42ee 100644 --- a/src/gpu/intel/jit/ir/linear_expr.cpp +++ b/src/gpu/intel/jit/ir/linear_expr.cpp @@ -244,7 +244,7 @@ class linear_coef_t { for (auto &f : factors_) { if (f.is()) new_factors.push_back(f); } - factors_ = new_factors; + factors_ = std::move(new_factors); } linear_coef_t &operator/=(int64_t factor) { @@ -372,8 +372,7 @@ expr_t linear_div(const expr_t &e, int64_t factor) { auto &linear = _linear.as(); auto c = linear_coef_t::div(linear.c, factor); auto u_vec = linear_coef_t::div(linear.u_vec, factor); - auto v_vec = linear.v_vec; - return linear_t::to_expr(c, u_vec, v_vec); + return linear_t::to_expr(c, u_vec, linear.v_vec); } expr_t simplify_linear_mod_reduce(const expr_t &e, int64_t factor) { diff --git a/src/gpu/intel/jit/ir/message.cpp b/src/gpu/intel/jit/ir/message.cpp index 2debda0ac3a..a0d89f9825a 100644 --- a/src/gpu/intel/jit/ir/message.cpp +++ b/src/gpu/intel/jit/ir/message.cpp @@ -336,7 +336,7 @@ class memory_walker_t { int64_t f = get_max_const_factor(off, cset); int alignment = f ? ir_utils::max_pow2_divisor(f) : base_alignment; - block_offs_.push_back(off); + block_offs_.push_back(std::move(off)); block_alignments_.push_back(alignment); }); } @@ -990,7 +990,7 @@ stmt_t access_builder_t::create_send_stmt( } else if (!off_base.is_equal(off_base0)) { is_same_base = false; } - off_vec.push_back(off); + off_vec.push_back(std::move(off)); off_const_vec.emplace_back(off_const - off_const0); } expr_t off; diff --git a/src/gpu/intel/jit/ir/message_patterns.hpp b/src/gpu/intel/jit/ir/message_patterns.hpp index c9cf838016d..6adeae74d2f 100644 --- a/src/gpu/intel/jit/ir/message_patterns.hpp +++ b/src/gpu/intel/jit/ir/message_patterns.hpp @@ -56,7 +56,7 @@ struct stride_layout_t { struct stride_dim_t { stride_dim_t() = default; - stride_dim_t(dim_type_t dim, dim_t size, dim_t stride, + stride_dim_t(const dim_type_t &dim, dim_t size, dim_t stride, bool can_overflow, bool is_complex) : dim(dim) , size(size) diff --git a/src/gpu/intel/jit/ir/problem.cpp b/src/gpu/intel/jit/ir/problem.cpp index 4c95a765982..332779c426c 100644 --- a/src/gpu/intel/jit/ir/problem.cpp +++ b/src/gpu/intel/jit/ir/problem.cpp @@ -42,10 +42,8 @@ std::string to_string(tensor_kind_t tensor) { const expr_t &pvar_t::index_var() const { static thread_local pvar_map_t vars; - if (!vars.has(*this)) { - auto var = var_t::make(type_t::s32(), name_ + "_idx"); - vars[*this] = var; - } + if (!vars.has(*this)) + vars[*this] = var_t::make(type_t::s32(), name_ + "_idx"); return vars[*this]; } diff --git a/src/gpu/intel/jit/ir/send_plan.cpp b/src/gpu/intel/jit/ir/send_plan.cpp index eb5761af6b4..5e965615e16 100644 --- a/src/gpu/intel/jit/ir/send_plan.cpp +++ b/src/gpu/intel/jit/ir/send_plan.cpp @@ -1109,7 +1109,7 @@ struct send_group_t { if (bounds.contains(subtile_idx, b.reg_off)) { auto bb = b; bb.reg_off = bounds.normalize_reg_off(subtile_idx, b.reg_off); - new_blocks.push_back(bb); + new_blocks.push_back(std::move(bb)); } } diff --git a/src/gpu/intel/jit/ir/slm_reduce_builder.cpp b/src/gpu/intel/jit/ir/slm_reduce_builder.cpp index 7a99a838688..effdc4a7952 100644 --- a/src/gpu/intel/jit/ir/slm_reduce_builder.cpp +++ b/src/gpu/intel/jit/ir/slm_reduce_builder.cpp @@ -102,7 +102,7 @@ void slm_reduce_builder_t::build() { read_start[i] = local_thr_tile.start(i); auto cond = read_start[i] < slm_layout.dims()[i]; if (reduce_cond_.is_empty()) - reduce_cond_ = cond; + reduce_cond_ = std::move(cond); else reduce_cond_ &= cond; } diff --git a/src/gpu/intel/jit/ir/tensor.hpp b/src/gpu/intel/jit/ir/tensor.hpp index 2852b845bef..9b0d79db598 100644 --- a/src/gpu/intel/jit/ir/tensor.hpp +++ b/src/gpu/intel/jit/ir/tensor.hpp @@ -495,8 +495,8 @@ class layout_t { bool operator!=(const layout_t &other) const { return !operator==(other); } bool operator<=(const layout_t &other) const { if (!type_.is_equal(other.type_)) return false; - const auto other_blocks = other.normalize().blocks(); - const auto self_blocks = normalize().blocks(); + auto other_blocks = other.normalize().blocks(); + auto self_blocks = normalize().blocks(); if (self_blocks.size() > other_blocks.size()) return false; if (self_blocks.empty()) return true; diff --git a/src/gpu/intel/jit/pass/alloc.cpp b/src/gpu/intel/jit/pass/alloc.cpp index 7f2c32432be..526f4ebbcd6 100644 --- a/src/gpu/intel/jit/pass/alloc.cpp +++ b/src/gpu/intel/jit/pass/alloc.cpp @@ -35,7 +35,7 @@ class alloc_lifter_t : public ir_mutator_t { if (!is_func_call(c)) continue; auto header_buf = send_t::arg_mem_off(c); gpu_assert(is_var(header_buf)) << header_buf; - header_bufs_.insert(header_buf); + header_bufs_.insert(std::move(header_buf)); } } diff --git a/src/gpu/intel/jit/pass/bank_conflict.cpp b/src/gpu/intel/jit/pass/bank_conflict.cpp index cbaca5679fc..ad8a7aaa523 100644 --- a/src/gpu/intel/jit/pass/bank_conflict.cpp +++ b/src/gpu/intel/jit/pass/bank_conflict.cpp @@ -78,9 +78,9 @@ class bank_conflict_attribute_injector_t : public ir_mutator_t { auto src2_buf = ptr_base(obj.args[3]); // src0 may be null in some cases, skip it. - if (!src0_buf.is_empty()) bufs_.insert(src0_buf); - bufs_.insert(src1_buf); - bufs_.insert(src2_buf); + if (!src0_buf.is_empty()) bufs_.insert(std::move(src0_buf)); + bufs_.insert(std::move(src1_buf)); + bufs_.insert(std::move(src2_buf)); instructions_.emplace_back(obj); } else if (is_load) { diff --git a/src/gpu/intel/jit/pass/dpasw.cpp b/src/gpu/intel/jit/pass/dpasw.cpp index f96fba637e0..886e5ef021f 100644 --- a/src/gpu/intel/jit/pass/dpasw.cpp +++ b/src/gpu/intel/jit/pass/dpasw.cpp @@ -280,7 +280,7 @@ class dpasw_injector_t { auto it = buf2send.find(buf); if (it != buf2send.end()) prev_send = it->second; buf2send[buf] = s; - send_infos_.push_back(send_info); + send_infos_.push_back(std::move(send_info)); if (!prev_send.is_empty()) { send_infos_.back().set_prev_send(prev_send); } diff --git a/src/gpu/intel/jit/pass/hoist.cpp b/src/gpu/intel/jit/pass/hoist.cpp index 37201986260..ecc91a1622a 100644 --- a/src/gpu/intel/jit/pass/hoist.cpp +++ b/src/gpu/intel/jit/pass/hoist.cpp @@ -232,7 +232,7 @@ class hoist_exprs_mutator_t : public ir_mutator_t { return inv_var; } - other_args.push_back(inv_var); + other_args.push_back(std::move(inv_var)); cur_expr = sum_expr_t::make_add(other_args, type); } return cur_expr.expr(); diff --git a/src/gpu/intel/jit/pass/overflow.cpp b/src/gpu/intel/jit/pass/overflow.cpp index 56b1e36c549..55d913e4647 100644 --- a/src/gpu/intel/jit/pass/overflow.cpp +++ b/src/gpu/intel/jit/pass/overflow.cpp @@ -231,7 +231,7 @@ class overflow_fixer_t : public ir_mutator_t { auto value_i = scalarizer.mutate(obj.value); auto lo_hi = ctx_.bound_finder.find_bounds(value_i); ctx_.bound_finder.set_var_bounds(var_i, lo_hi); - ctx_.vec_vars[obj.var].push_back(var_i); + ctx_.vec_vars[obj.var].push_back(std::move(var_i)); } } expr_t var = obj.var; diff --git a/src/gpu/intel/jit/pass/send.cpp b/src/gpu/intel/jit/pass/send.cpp index 7302c03879b..1f312c0847c 100644 --- a/src/gpu/intel/jit/pass/send.cpp +++ b/src/gpu/intel/jit/pass/send.cpp @@ -104,7 +104,7 @@ class send_injector_t : public ir_mutator_t { auto new_call = func_call_t::make(obj.func, {mem_buf, header_buf, reg_buf, mask, expr_t(), expr_t(), - pattern}, + std::move(pattern)}, obj.attr); auto body = stmt_seq_t::make(off_store, new_call); @@ -147,7 +147,7 @@ class send_2d_header_store_lifter_t : public ir_mutator_t { if (!c.as().func.as().is_2d()) continue; auto header_buf = send_t::arg_mem_off(c); gpu_assert(is_var(header_buf)) << header_buf; - header_bufs_.insert(header_buf); + header_bufs_.insert(std::move(header_buf)); } } diff --git a/src/gpu/intel/jit/pass/simplify.cpp b/src/gpu/intel/jit/pass/simplify.cpp index 3a75f2a46ef..68fc1915e20 100644 --- a/src/gpu/intel/jit/pass/simplify.cpp +++ b/src/gpu/intel/jit/pass/simplify.cpp @@ -702,7 +702,7 @@ void fold_const_nary_op_args(op_kind_t op_kind, const std::vector &args, } if (op_kind == op_kind_t::_mul && is_one(c)) return; if (op_kind == op_kind_t::_add && is_zero(c)) return; - new_args.push_back(c); + new_args.push_back(std::move(c)); } expr_t cvt_mul_to_nary_op(const expr_t &a, const expr_t &b) { @@ -808,7 +808,7 @@ class mul_nary_op_expander_t : public nary_op_flattener_t { } } if (new_args.empty()) { - new_args = i_args; + new_args = std::move(i_args); continue; } std::vector next_args; @@ -1095,7 +1095,7 @@ class factored_expr_t : public expr_impl_t { e_const = e_const * abs(e); } if (sign) e_const = -e_const; - factors.push_back(e_const); + factors.push_back(std::move(e_const)); } void init_factors(const expr_t &e) { @@ -2235,8 +2235,8 @@ expr_t simplify_propagate_shuffle(const expr_t &e) { ok = false; break; } - a.push_back(op_a); - b.push_back(op_b); + a.push_back(std::move(op_a)); + b.push_back(std::move(op_b)); } else if (op_kind == op_kind_t::_and) { // Replace with expression true elem to allow matching // this op against future binary operation. diff --git a/src/gpu/intel/jit/reorder/config.cpp b/src/gpu/intel/jit/reorder/config.cpp index b04cc7f98b8..ce559dddb4d 100644 --- a/src/gpu/intel/jit/reorder/config.cpp +++ b/src/gpu/intel/jit/reorder/config.cpp @@ -46,7 +46,7 @@ reorder_config_t::reorder_config_t( tiles_.assign(rev_tiles.rbegin(), rev_tiles.rend()); dim_idx_t ndims = src.ndims(); - auto thr_tile = tiles_.front(); + const auto &thr_tile = tiles_.front(); pvar_tile_t iter_tile; pvar_tile_t loop_tile; diff --git a/src/gpu/intel/jit/reorder/ir_builder.cpp b/src/gpu/intel/jit/reorder/ir_builder.cpp index d43fa782a5d..5c5ee11c416 100644 --- a/src/gpu/intel/jit/reorder/ir_builder.cpp +++ b/src/gpu/intel/jit/reorder/ir_builder.cpp @@ -161,7 +161,7 @@ bool reorder_ir_builder_t::try_build( ordered.insert(ordered.begin(), outer); } schedule.reorder(ordered); - fused_idxs[grid_idx].push_back(v); + fused_idxs[grid_idx].push_back(std::move(v)); } for (dim_idx_t i = 0; i < into(fused_idxs.size()); i++) { diff --git a/src/gpu/intel/jit/reorder/tiler.cpp b/src/gpu/intel/jit/reorder/tiler.cpp index 67bd581a5c9..ec649a9c10b 100644 --- a/src/gpu/intel/jit/reorder/tiler.cpp +++ b/src/gpu/intel/jit/reorder/tiler.cpp @@ -269,7 +269,7 @@ std::vector tiles(const hw_t &hw, layout_t a, layout_t b) { if (tile.elems() > max_elems) break; if (get_grf_layout_size(tile) > max_layout_size) continue; if (candidate_tiles.empty() || !tile.is_equal(candidate_tiles.back())) - candidate_tiles.push_back(tile); + candidate_tiles.push_back(std::move(tile)); } gpu_assert(!candidate_tiles.empty()); diff --git a/src/gpu/intel/jit/v2/conv/builder.cpp b/src/gpu/intel/jit/v2/conv/builder.cpp index 4be837f1f28..aa4809bd286 100644 --- a/src/gpu/intel/jit/v2/conv/builder.cpp +++ b/src/gpu/intel/jit/v2/conv/builder.cpp @@ -530,7 +530,7 @@ class post_op_builder_t : public ir_builder_t { if (rhs.type() != type_t::f32()) { auto rhs_f32 = _rhs.retype(type_t::f32(), /*dense=*/true); rhs_buf = reorder(_rhs, rhs_f32, _rhs_buf); - rhs = rhs_f32; + rhs = std::move(rhs_f32); } if (zero_point != 0) { auto func = eltwise_t::make( @@ -558,7 +558,7 @@ class post_op_builder_t : public ir_builder_t { elems = (elems < 8 ? 1 : elems); pvar_tile_t tile; tile[lhs0.dim] = elems; - for_each(lhs.int_dim_sizes(), tile, + for_each(lhs.int_dim_sizes(), std::move(tile), [&](const pvar_coord_t &coord) { auto lhs_off = lhs.offset_in_bytes(coord); auto rhs_off = rhs.offset_in_bytes(coord); @@ -702,7 +702,7 @@ class epilogue_tile_builder_t : public ir_builder_t { } // Apply dst scales. build_scale(DNNL_ARG_DST); - out_layout = f32_layout; + out_layout = std::move(f32_layout); return buf; } diff --git a/src/gpu/intel/jit/v2/conv/kernel_desc.cpp b/src/gpu/intel/jit/v2/conv/kernel_desc.cpp index 052da8730c4..641c74348f6 100644 --- a/src/gpu/intel/jit/v2/conv/kernel_desc.cpp +++ b/src/gpu/intel/jit/v2/conv/kernel_desc.cpp @@ -213,7 +213,7 @@ int estimate_grf_usage_bytes(const kernel_desc_t &desc) { dim_t a_elems = b_iter * m_iter * k_iter; dim_t b_elems = b_iter * k_iter * n_iter; dim_t c_elems = m_iter * n_iter; - auto iter_outer_dim + const auto &iter_outer_dim = (desc.iter_outer_tile.is_empty() ? pvar_t() : *desc.iter_outer_tile.begin()); auto bmnk = to_gemm(iter_outer_dim, desc.prop); diff --git a/src/gpu/intel/jit/v2/conv/kernel_desc_2d_reqs.cpp b/src/gpu/intel/jit/v2/conv/kernel_desc_2d_reqs.cpp index 37b0b23326c..5c4358cad0e 100644 --- a/src/gpu/intel/jit/v2/conv/kernel_desc_2d_reqs.cpp +++ b/src/gpu/intel/jit/v2/conv/kernel_desc_2d_reqs.cpp @@ -84,7 +84,7 @@ struct stride_t { } if (found) new_pvars.push_back(a); } - pvars = new_pvars; + pvars = std::move(new_pvars); } }; @@ -125,10 +125,10 @@ block_2d_params_t to_block_2d_params(const prop_kind_t &prop, if (d.is_undef()) continue; if (strides.at(d).is_one()) { gpu_assert(params.w_dim.is_undef()); - params.w_dim = d; + params.w_dim = std::move(d); } else { gpu_assert(params.h_dim.is_undef()); - params.h_dim = d; + params.h_dim = std::move(d); } } gpu_assert(!params.w_dim.is_undef()); diff --git a/src/gpu/intel/jit/v2/conv/plan.cpp b/src/gpu/intel/jit/v2/conv/plan.cpp index 679d9e87483..957e39e8170 100644 --- a/src/gpu/intel/jit/v2/conv/plan.cpp +++ b/src/gpu/intel/jit/v2/conv/plan.cpp @@ -326,9 +326,9 @@ class multiply_info_t { a_inner_ = to_v2_layout(dpas.b_layout(), a_desc, std::vector {k_dim, m_dim}); b_inner_ = to_v2_layout(dpas.a_layout(), b_desc, - std::vector {n_dim, k_dim}); + std::vector {n_dim, std::move(k_dim)}); c_inner_ = to_v2_layout(dpas.c_layout(), c_desc, - std::vector {n_dim, m_dim}); + std::vector {std::move(n_dim), std::move(m_dim)}); return true; } @@ -816,10 +816,10 @@ class plan_builder_t { gpu_check( init_slm_reduce_plan(c_fma_layout, virt_grid, plan.slm_reduce)); auto &c_mapper = dim_mapper_manager_.mapper(tensor_kind_t::c); - auto c_reg_layout + const auto &c_reg_layout = (plan.slm_reduce ? plan.slm_reduce.c_layout : c_fma_layout); - auto c_coord = (plan.slm_reduce ? plan.slm_reduce.c_coord - : coord_info_.iter_coord()); + const auto &c_coord = (plan.slm_reduce ? plan.slm_reduce.c_coord + : coord_info_.iter_coord()); auto c_tile = c_reg_layout.int_dim_sizes(); auto c_mem_view = view_t(c_mapper, c_layout_, c_coord, c_tile); plan.c_reg_layout = c_reg_layout; diff --git a/src/gpu/intel/jit/v2/conv/plan_registry.cpp b/src/gpu/intel/jit/v2/conv/plan_registry.cpp index 57ee03040d8..dd7cc021207 100644 --- a/src/gpu/intel/jit/v2/conv/plan_registry.cpp +++ b/src/gpu/intel/jit/v2/conv/plan_registry.cpp @@ -43,7 +43,7 @@ plan_registry_t::plan_registry_t(const char **entries) { } } #endif - entries_.push_back(e); + entries_.push_back(std::move(e)); entries++; } } @@ -63,7 +63,7 @@ kernel_desc_t plan_registry_t::find_best(const problem_t &prb) const { time = e.model_set.time(prb, desc); if (time < min_time) { min_time = time; - best = desc; + best = std::move(desc); } } return best; diff --git a/src/gpu/intel/jit/v2/conv/planner/bench.cpp b/src/gpu/intel/jit/v2/conv/planner/bench.cpp index 57bba00c538..594fe7f9723 100644 --- a/src/gpu/intel/jit/v2/conv/planner/bench.cpp +++ b/src/gpu/intel/jit/v2/conv/planner/bench.cpp @@ -569,7 +569,7 @@ std::vector generate_problems(const bench_input_params_t ¶ms) { auto prb = params.problem(); prb.set_shape(shape); if (!params.reqs.fits(prb.shape())) continue; - ret.push_back(prb); + ret.push_back(std::move(prb)); if ((int)ret.size() >= params.nprbs) break; } if ((int)ret.size() < params.nprbs) { diff --git a/src/gpu/intel/jit/v2/conv/planner/search.cpp b/src/gpu/intel/jit/v2/conv/planner/search.cpp index 8c6b848bc44..19bb4fd64c4 100644 --- a/src/gpu/intel/jit/v2/conv/planner/search.cpp +++ b/src/gpu/intel/jit/v2/conv/planner/search.cpp @@ -409,15 +409,15 @@ class kernel_search_manager_t { if (!params_.is_iter_set) d.iter_tile = td.iter; auto d_key = jit::stringify(d); if (seen.count(d_key) > 0) continue; - seen.insert(d_key); + seen.insert(std::move(d_key)); if (!create_conv_plan(d, bench_mger_.hw())) { std::cout << d.brief_str() << ": \033[1;31mFAIL\033[0m" << std::endl; continue; } - descs.push_back(d); std::cout << d.brief_str() << ": \033[1;32mOK\033[0m" << std::endl; + descs.push_back(std::move(d)); } } gpu_info() << "gen_desc_groups(): descs.size() = " << descs.size(); @@ -465,7 +465,7 @@ class kernel_search_manager_t { if (desc.iter_tile.at(d) % outer != 0) continue; pvar_tile_t tile_outer; tile_outer[d] = outer; - tiles.push_back(tile_outer); + tiles.push_back(std::move(tile_outer)); } } return tiles; @@ -487,11 +487,11 @@ class search_sequence_t { std::vector d_tiles; auto iter = to_gemm(d.iter_tile, d.prop); auto tg = to_gemm(d.thread_group_tile, d.prop); - d_tiles.push_back(iter); - d_tiles.push_back(tg); + d_tiles.push_back(std::move(iter)); + d_tiles.push_back(std::move(tg)); pvar_tile_t prefetch_tile; prefetch_tile[prefetch_dim] = d.prefetch.dist; - d_tiles.push_back(prefetch_tile); + d_tiles.push_back(std::move(prefetch_tile)); tiles.push_back(std::move(d_tiles)); } tile_to_vec_ = tile_to_vec_t(tiles); diff --git a/src/gpu/intel/jit/v2/conv/tensor_utils.cpp b/src/gpu/intel/jit/v2/conv/tensor_utils.cpp index 33682cbd57f..a60a5595949 100644 --- a/src/gpu/intel/jit/v2/conv/tensor_utils.cpp +++ b/src/gpu/intel/jit/v2/conv/tensor_utils.cpp @@ -111,7 +111,7 @@ layout_tag_t append_groups( bool is_dst = (tensor_kind == tensor_kind_t::dst); bool is_bias = (tensor_kind == tensor_kind_t::bias); if (!is_src && !is_dst && !is_bias) return layout_tag; - auto xc_dim = (is_src ? pvars::ic : pvars::oc); + const auto &xc_dim = (is_src ? pvars::ic : pvars::oc); auto xc_letter = dim_idx::as_tag(layout_tag.desc().dim_index(xc_dim)); auto new_g_letter = xc_letter; auto new_xc_letter = into(xc_letter + 1); @@ -406,7 +406,7 @@ std::vector skip_mask( // Check if the mask can be proven with known dimension requirements. if (!reqs.can_prove(dim_sizes.at(dim) % tile.at(dim) == 0)) continue; // Mask is not required for this dimension. - ret.push_back(dim); + ret.push_back(std::move(dim)); } return ret; } diff --git a/src/gpu/intel/jit/v2/ir/builder.cpp b/src/gpu/intel/jit/v2/ir/builder.cpp index 1e48b34a989..4cf8fbfdd2d 100644 --- a/src/gpu/intel/jit/v2/ir/builder.cpp +++ b/src/gpu/intel/jit/v2/ir/builder.cpp @@ -110,7 +110,7 @@ offset_t offset_scope_t::get_offset(int version, const expr_t &base0, for (size_t i = 0; i < loop_nest.nloops(); i++) { auto inc_value = simplify(_loop_incs[i] - comp_value); auto inc = to_simple_expr(inc_value); - ret.loop_incs.push_back(inc); + ret.loop_incs.push_back(std::move(inc)); if (i == loop_nest.nloops() - 1) break; comp_value = to_simple_expr(_loop_incs[i] * loop_nest[i].bound); } diff --git a/src/gpu/intel/jit/v2/ir/builder.hpp b/src/gpu/intel/jit/v2/ir/builder.hpp index 2266c6c9c12..4d243fdb90c 100644 --- a/src/gpu/intel/jit/v2/ir/builder.hpp +++ b/src/gpu/intel/jit/v2/ir/builder.hpp @@ -493,13 +493,13 @@ class ir_builder_t { const offset_scope_t &off_scope() const { return *off_scope_; } const offset_ctx_t &off_ctx() const { return off_ctx_; } expr_t alloc(const std::string &_name, int size) { - auto name = (buf_mgr_->has(_name) + const auto &name = (buf_mgr_->has(_name) ? buf_mgr_->ir_ctx().create_tmp_name(_name) : _name); return buf_mgr_->get(name, size); } var_ref_t alloc_var(const type_t &type, const std::string &_name) { - auto name = (buf_mgr_->has(_name) + const auto &name = (buf_mgr_->has(_name) ? buf_mgr_->ir_ctx().create_tmp_name(_name) : _name); auto buf = alloc(name, type.size()); diff --git a/src/gpu/intel/jit/v2/ir/send.hpp b/src/gpu/intel/jit/v2/ir/send.hpp index 3ec451407bb..ab65f4ec059 100644 --- a/src/gpu/intel/jit/v2/ir/send.hpp +++ b/src/gpu/intel/jit/v2/ir/send.hpp @@ -779,13 +779,13 @@ class send_plan_builder_t { auto outer_begin = end(layout); if (is_scattered) { // Add blocks to fill up slots in the scattered message. - for (auto it = inner_end; it != end(layout); ++it) { + for (auto it = std::move(inner_end); it != end(layout); ++it) { int it_slots = ir_utils::safe_div(it.elems(), elems_per_slot); int entry_reg_size = utils::rnd_up(it_slots * slot_stride, grf_size); if (it_slots > max_slots || entry_reg_size > params.max_entry_reg_size) { - outer_begin = it; + outer_begin = std::move(it); break; } slots = it_slots; diff --git a/src/gpu/intel/jit/v2/ir/tensor.cpp b/src/gpu/intel/jit/v2/ir/tensor.cpp index 3a7b1b1c92c..45708c9c228 100644 --- a/src/gpu/intel/jit/v2/ir/tensor.cpp +++ b/src/gpu/intel/jit/v2/ir/tensor.cpp @@ -714,8 +714,8 @@ struct try_div_mod_t { if (!a_mod.is_empty()) return false; a_mod = v; } - div = a_div; - mod = a_mod; + div = std::move(a_div); + mod = std::move(a_mod); return true; } }; @@ -771,8 +771,8 @@ layout_t layout_t::map(const dim_mapper_t &dim_mapper, T mod = T(); if (try_div_mod_t::call(idxs[dim], b.int_size(), var_range_info, div, mod)) { - idxs[dim] = div; - off = mod; + idxs[dim] = std::move(div); + off = std::move(mod); is_final = false; } } diff --git a/src/gpu/intel/logging.hpp b/src/gpu/intel/logging.hpp index 405c4059bc5..81367497bc9 100644 --- a/src/gpu/intel/logging.hpp +++ b/src/gpu/intel/logging.hpp @@ -83,7 +83,7 @@ class logger_t { print_helper_t::call(oss, obj); auto lines = gpu_utils::split(oss.str(), "\n"); if (lines_.empty() || lines.empty()) { - lines_ = lines; + lines_ = std::move(lines); return *this; } lines_.back() += lines[0]; diff --git a/src/gpu/intel/microkernels/shim.cpp b/src/gpu/intel/microkernels/shim.cpp index d1edf9eb7cc..49d3b2638c4 100644 --- a/src/gpu/intel/microkernels/shim.cpp +++ b/src/gpu/intel/microkernels/shim.cpp @@ -438,7 +438,7 @@ std::string generateShim(const Package &package, HostLanguage language, varg.copy = options.copyScalarArgs; varg.name = pargs[i].name; if (byPtr) varg.name = '*' + varg.name; - vargList.push_back(varg); + vargList.push_back(std::move(varg)); } } @@ -537,7 +537,7 @@ std::string generateShim(const Package &package, HostLanguage language, clobber.name = (vargs[i].copy ? "COPY" : "%") + std::to_string(i); clobber.arg = false; // Reuse 'arg' field as flag clobber.preclobbered = vargs[i].copy && vargs[i].in; - vargClobbers.push_back(clobber); + vargClobbers.push_back(std::move(clobber)); } std::sort(vargClobbers.begin(), vargClobbers.end(), diff --git a/src/gpu/intel/ocl/bnorm/bnorm_lookup_table.cpp b/src/gpu/intel/ocl/bnorm/bnorm_lookup_table.cpp index e6495b38262..3714bed2c87 100644 --- a/src/gpu/intel/ocl/bnorm/bnorm_lookup_table.cpp +++ b/src/gpu/intel/ocl/bnorm/bnorm_lookup_table.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2022-2024 Intel Corporation +* Copyright 2022-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -602,7 +602,7 @@ std::string get_nhwc_desc_str(const params_t &conf) { const char *bnorm_lookup_table_t::find( const params_t &conf, const gpu_arch_t &gpu_arch) const { - auto key + const auto &key = is_nhwc_impl(conf) ? get_nhwc_desc_str(conf) : get_desc_str(conf); auto it = map_.find(key); if (it == map_.end()) return nullptr; diff --git a/src/gpu/intel/ocl/bnorm/nhwc_reusable.hpp b/src/gpu/intel/ocl/bnorm/nhwc_reusable.hpp index 5407c5c4aaf..f5655208b9f 100644 --- a/src/gpu/intel/ocl/bnorm/nhwc_reusable.hpp +++ b/src/gpu/intel/ocl/bnorm/nhwc_reusable.hpp @@ -189,7 +189,7 @@ struct nhwc_reusable_batch_normalization_fwd_t : public gpu_primitive_t { status_t init(impl::engine_t *engine) override { if (pd()->has_zero_dim_memory()) return status::success; - auto kernel_names = pd()->cmpl_conf.get_kernel_names(); + const auto &kernel_names = pd()->cmpl_conf.get_kernel_names(); CHECK(create_kernels(engine, kernels_, kernel_names, pd()->cmpl_conf)); return status::success; } @@ -271,7 +271,7 @@ struct nhwc_reusable_batch_normalization_bwd_t : public gpu_primitive_t { status_t init(impl::engine_t *engine) override { if (pd()->has_zero_dim_memory()) return status::success; - auto kernel_names = pd()->cmpl_conf.get_kernel_names(); + const auto &kernel_names = pd()->cmpl_conf.get_kernel_names(); CHECK(create_kernels(engine, kernels_, kernel_names, pd()->cmpl_conf)); return status::success; } diff --git a/src/gpu/intel/ocl/micro_sdpa.cpp b/src/gpu/intel/ocl/micro_sdpa.cpp index a224f67fb47..9b987f2834b 100644 --- a/src/gpu/intel/ocl/micro_sdpa.cpp +++ b/src/gpu/intel/ocl/micro_sdpa.cpp @@ -435,7 +435,7 @@ status_t micro_sdpa_t::pd_t::init_microkernels(impl::engine_t *engine) { opts_vs.slmPtr = true; /* Update for second GEMM: V*S */ - auto problem_vs = problem; + auto problem_vs = std::move(problem); problem_vs.Ta_ext = jit::convert_dnnl_to_kernel_type(val_md()->data_type); problem_vs.A.layout = convert_dnnl_to_kernel_layout(val_md()); if (with_value_scales() && !vs_common_scales) { diff --git a/src/gpu/intel/ocl/reduction/atomic_reduction.cpp b/src/gpu/intel/ocl/reduction/atomic_reduction.cpp index 502f8f5f379..1547e825c42 100644 --- a/src/gpu/intel/ocl/reduction/atomic_reduction.cpp +++ b/src/gpu/intel/ocl/reduction/atomic_reduction.cpp @@ -212,7 +212,7 @@ atomic_reduction_conf_t::atomic_reduction_conf_t( status_t atomic_reduction_conf_t::init_dispatcher( const compute::compute_engine_t *engine, const gpu_primitive_attr_t *gpu_attr) { - const std::vector dispatch_dims = { + std::vector dispatch_dims = { reduction_dims::outer, reduction_dims::local, reduction_dims::global, @@ -262,7 +262,8 @@ status_t atomic_reduction_conf_t::init_dispatcher( = inner_block.block / conf.vect_size; // Create the dispatcher - compute::reusable_dispatch_config_t config(engine, dispatch_dims); + compute::reusable_dispatch_config_t config( + engine, std::move(dispatch_dims)); CHECK(config.register_buffer(src)); CHECK(config.register_buffer(dst)); CHECK(config.define_dim_index( diff --git a/src/gpu/intel/ocl/reduction/atomic_reduction.hpp b/src/gpu/intel/ocl/reduction/atomic_reduction.hpp index 6e912ec20d9..161b6516c61 100644 --- a/src/gpu/intel/ocl/reduction/atomic_reduction.hpp +++ b/src/gpu/intel/ocl/reduction/atomic_reduction.hpp @@ -125,7 +125,7 @@ struct atomic_reduction_t : public gpu_primitive_t { for (auto &phase : phases) { compute::kernel_t kernel; CHECK(create_kernel(engine, kernel, "atomic_reduce", phase.conf)); - kernels_.push_back(kernel); + kernels_.push_back(std::move(kernel)); } if (pd()->needs_finalization) { diff --git a/src/gpu/intel/ocl/reduction/combined_reduction.hpp b/src/gpu/intel/ocl/reduction/combined_reduction.hpp index a1ee98479e8..8b9256e5d35 100644 --- a/src/gpu/intel/ocl/reduction/combined_reduction.hpp +++ b/src/gpu/intel/ocl/reduction/combined_reduction.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2021-2024 Intel Corporation +* Copyright 2021-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -84,16 +84,13 @@ struct combined_reduction_t : public gpu_primitive_t { status_t init(impl::engine_t *engine) override { auto &phases = pd()->phases; - status_t status; for (auto &phase : phases) { compute::kernel_ctx_t kernel_ctx(pd()->attr()); - status = pd()->init_kernel_ctx(kernel_ctx, phase); - CHECK(status); + CHECK(pd()->init_kernel_ctx(kernel_ctx, phase)); compute::kernel_t kernel; - status = create_kernel( - engine, &kernel, "combined_reduce", kernel_ctx); - CHECK(status); - kernels_.push_back(kernel); + CHECK(create_kernel( + engine, &kernel, "combined_reduce", kernel_ctx)); + kernels_.push_back(std::move(kernel)); } return status::success; diff --git a/src/gpu/intel/ocl/reduction/reusable_ref_reduction.hpp b/src/gpu/intel/ocl/reduction/reusable_ref_reduction.hpp index 505d24da67e..5f4e32b0ffa 100644 --- a/src/gpu/intel/ocl/reduction/reusable_ref_reduction.hpp +++ b/src/gpu/intel/ocl/reduction/reusable_ref_reduction.hpp @@ -114,7 +114,7 @@ struct reusable_ref_reduction_t : public gpu_primitive_t { compute::kernel_t kernel; CHECK(create_kernel(engine, kernel, phase.conf.get_kernel_names()[0], phase.conf)); - kernels_.push_back(kernel); + kernels_.push_back(std::move(kernel)); } return status::success; } diff --git a/src/gpu/intel/ocl/reusable_lnorm.cpp b/src/gpu/intel/ocl/reusable_lnorm.cpp index 49ff39e680f..6b5bd0df958 100644 --- a/src/gpu/intel/ocl/reusable_lnorm.cpp +++ b/src/gpu/intel/ocl/reusable_lnorm.cpp @@ -48,7 +48,7 @@ static status_t init_conf_common(const layer_normalization_pd_t *pd, conf->src_dt = src_buf.data_type; conf->dst_dt = dst_buf.data_type; - auto scales = pd->attr()->scales_; + const auto &scales = pd->attr()->scales_; conf->with_src_scale = !scales.has_default_values(DNNL_ARG_SRC); conf->with_dst_scale = !scales.has_default_values(DNNL_ARG_DST); diff --git a/src/gpu/intel/ocl/rnn/rnn_grid.cpp b/src/gpu/intel/ocl/rnn/rnn_grid.cpp index dd235c42536..c70b5854807 100644 --- a/src/gpu/intel/ocl/rnn/rnn_grid.cpp +++ b/src/gpu/intel/ocl/rnn/rnn_grid.cpp @@ -980,7 +980,7 @@ status_t simple_rnn_common_t::init(impl::engine_t *engine) { rnn_utils::set_workspace_offsets(rnn, ws_gates_offset_, ws_states_offset_, ws_c_states_offset_, ws_grid_comp_offset_, ws_bias_offset_); - auto kernel_names = pd()->ocl_conf.get_kernel_names(); + const auto &kernel_names = pd()->ocl_conf.get_kernel_names(); CHECK(create_kernels(engine, kernels_, kernel_names, pd()->ocl_conf)); bool gemm_ok = utils::everyone_is(status::success, From d4bcf8f2505d5ac9c99bb6afb68c7c1eba97747b Mon Sep 17 00:00:00 2001 From: "Kassen, Andrew" Date: Thu, 6 Mar 2025 17:37:28 -0800 Subject: [PATCH 2/5] xe: add missing ctors/dtors/assignment operators --- src/gpu/gpu_resource.hpp | 3 ++- src/gpu/intel/compute/kernel.hpp | 1 + src/gpu/intel/compute/kernel_arg_list.hpp | 1 + src/gpu/intel/jit/ir/core.hpp | 3 +++ src/gpu/intel/jit/ir/kernel_info.hpp | 2 ++ src/gpu/intel/jit/ir/send_plan.cpp | 2 ++ src/gpu/intel/jit/v2/conv/debug.hpp | 4 ++++ src/gpu/intel/logging.hpp | 4 ++++ 8 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/gpu/gpu_resource.hpp b/src/gpu/gpu_resource.hpp index 44489aa1295..1be9983facd 100644 --- a/src/gpu/gpu_resource.hpp +++ b/src/gpu/gpu_resource.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2024 Intel Corporation +* Copyright 2020-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,6 +33,7 @@ struct gpu_resource_t : public resource_t { using mapped_memory_t = std::unique_ptr; gpu_resource_t() = default; + ~gpu_resource_t() override = default; void add_memory_storage(key_memory_t idx, mapped_memory_t &&m) { assert(idx_to_memory_storage_.count(idx) == 0); diff --git a/src/gpu/intel/compute/kernel.hpp b/src/gpu/intel/compute/kernel.hpp index b94150bb5a9..77d4bbe1778 100644 --- a/src/gpu/intel/compute/kernel.hpp +++ b/src/gpu/intel/compute/kernel.hpp @@ -246,6 +246,7 @@ class kernel_bundle_t { kernel_bundle_t &operator=(const kernel_bundle_t &other) = delete; kernel_bundle_t(kernel_bundle_t &&other) = default; kernel_bundle_t &operator=(kernel_bundle_t &&other) = default; + ~kernel_bundle_t() = default; status_t get_kernels(std::vector &kernels, const std::vector &kernel_names) const { diff --git a/src/gpu/intel/compute/kernel_arg_list.hpp b/src/gpu/intel/compute/kernel_arg_list.hpp index 9862ec2fa53..9a3c9ce6081 100644 --- a/src/gpu/intel/compute/kernel_arg_list.hpp +++ b/src/gpu/intel/compute/kernel_arg_list.hpp @@ -229,6 +229,7 @@ class kernel_arg_t { class kernel_arg_list_t { public: kernel_arg_list_t() { args_.reserve(512); } + ~kernel_arg_list_t() = default; void append(const memory_storage_t &storage) { args_.emplace_back(); diff --git a/src/gpu/intel/jit/ir/core.hpp b/src/gpu/intel/jit/ir/core.hpp index 4783a0657ab..d1ee91705cb 100644 --- a/src/gpu/intel/jit/ir/core.hpp +++ b/src/gpu/intel/jit/ir/core.hpp @@ -600,6 +600,8 @@ class ref_count_t { public: ref_count_t() : value_(0) {} ref_count_t(const ref_count_t &) = delete; + ref_count_t &operator=(const ref_count_t &) = delete; + ~ref_count_t() = default; uint32_t increment() { return ++value_; } uint32_t decrement() { return --value_; } @@ -627,6 +629,7 @@ class object_impl_t { object_impl_t(type_info_t type_info) : type_info_(type_info) {}; object_impl_t(const object_impl_t &) = delete; + object_impl_t &operator=(const object_impl_t &) = delete; virtual ~object_impl_t() = default; diff --git a/src/gpu/intel/jit/ir/kernel_info.hpp b/src/gpu/intel/jit/ir/kernel_info.hpp index 4ab6386a700..7a738c62e77 100644 --- a/src/gpu/intel/jit/ir/kernel_info.hpp +++ b/src/gpu/intel/jit/ir/kernel_info.hpp @@ -40,6 +40,8 @@ class memory_storage_ptr_t { : unique_ptr_(std::move(ptr)) {} memory_storage_ptr_t(const memory_storage_t *ptr) : raw_ptr_(ptr) {} memory_storage_ptr_t(const memory_storage_ptr_t &) = delete; + memory_storage_ptr_t &operator=(const memory_storage_ptr_t &) = delete; + ~memory_storage_ptr_t() = default; const memory_storage_t *get() const { if (unique_ptr_) return unique_ptr_.get(); diff --git a/src/gpu/intel/jit/ir/send_plan.cpp b/src/gpu/intel/jit/ir/send_plan.cpp index 5e965615e16..ede9220e7a0 100644 --- a/src/gpu/intel/jit/ir/send_plan.cpp +++ b/src/gpu/intel/jit/ir/send_plan.cpp @@ -2278,6 +2278,8 @@ class ir_send_plan_t final : public send_plan_impl_t { } ir_send_plan_t(const ir_send_plan_t &) = delete; + ir_send_plan_t &operator=(const ir_send_plan_t &) = delete; + ~ir_send_plan_t() override = default; const send_params_t &send_params() const override { return send_params_; } diff --git a/src/gpu/intel/jit/v2/conv/debug.hpp b/src/gpu/intel/jit/v2/conv/debug.hpp index 25e65280be7..4308049b0d6 100644 --- a/src/gpu/intel/jit/v2/conv/debug.hpp +++ b/src/gpu/intel/jit/v2/conv/debug.hpp @@ -52,6 +52,10 @@ class debug_t { desc_ptr_ = other.desc_ptr_; other.desc_ptr_ = nullptr; } + kernel_desc_setter_t &operator=(kernel_desc_setter_t &&other) { + if (&other != this) std::swap(desc_ptr_, other.desc_ptr_); + return *this; + } kernel_desc_setter_t(const kernel_desc_setter_t &) = delete; kernel_desc_setter_t &operator=(const kernel_desc_setter_t &) = delete; diff --git a/src/gpu/intel/logging.hpp b/src/gpu/intel/logging.hpp index 81367497bc9..929ff54ba7a 100644 --- a/src/gpu/intel/logging.hpp +++ b/src/gpu/intel/logging.hpp @@ -55,6 +55,10 @@ class logger_t { logger_t(const char *file_name, int line, std::ostream &out = std::cout) : file_path_(file_name + std::string(":") + std::to_string(line)) , out_(out) {} + + logger_t(const logger_t &) = delete; + logger_t &operator=(const logger_t &) = delete; + ~logger_t() { add_header(true); if (lines_.size() == 1) { From 50591cce5d01de0db82615810a9683b0fff24363 Mon Sep 17 00:00:00 2001 From: "Kassen, Andrew" Date: Thu, 6 Mar 2025 17:38:18 -0800 Subject: [PATCH 3/5] xe: remove unnecessary/dangerous moves --- src/gpu/intel/jit/conv/config.cpp | 4 ++-- src/gpu/intel/jit/v2/conv/builder.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gpu/intel/jit/conv/config.cpp b/src/gpu/intel/jit/conv/config.cpp index 1d24070e35c..5f89526c266 100644 --- a/src/gpu/intel/jit/conv/config.cpp +++ b/src/gpu/intel/jit/conv/config.cpp @@ -646,9 +646,9 @@ void init_data_tags(const conv_config_t &cfg, const memory_desc_t &src_md, // Use plain tags for user-facing activations for small-channel tensors. if (!matches_tag(src_md, src_tag) && is_small_ic_g1) - user_src_tag = (user_src_req.empty() ? "axb" : std::move(user_src_req)); + user_src_tag = (user_src_req.empty() ? "axb" : user_src_req); if (!matches_tag(dst_md, dst_tag) && is_small_oc_g1) - user_dst_tag = (user_dst_req.empty() ? "axb" : std::move(user_dst_req)); + user_dst_tag = (user_dst_req.empty() ? "axb" : user_dst_req); // Avoid reorder for small shapes if (!user_src_tag.empty() && !user_dst_tag.empty() && prb.g == 1 diff --git a/src/gpu/intel/jit/v2/conv/builder.cpp b/src/gpu/intel/jit/v2/conv/builder.cpp index aa4809bd286..f1fe66c265c 100644 --- a/src/gpu/intel/jit/v2/conv/builder.cpp +++ b/src/gpu/intel/jit/v2/conv/builder.cpp @@ -558,7 +558,7 @@ class post_op_builder_t : public ir_builder_t { elems = (elems < 8 ? 1 : elems); pvar_tile_t tile; tile[lhs0.dim] = elems; - for_each(lhs.int_dim_sizes(), std::move(tile), + for_each(lhs.int_dim_sizes(), tile, [&](const pvar_coord_t &coord) { auto lhs_off = lhs.offset_in_bytes(coord); auto rhs_off = rhs.offset_in_bytes(coord); From 55825ba4e719f46f3b59b9a4bff8e0f15328947a Mon Sep 17 00:00:00 2001 From: "Kassen, Andrew" Date: Thu, 6 Mar 2025 17:38:57 -0800 Subject: [PATCH 4/5] xe: remove unused code --- src/gpu/intel/jit/v2/conv/kernel_desc_2d_reqs.cpp | 1 - src/gpu/intel/jit/v2/conv/planner/bench.cpp | 3 +-- src/gpu/intel/jit/v2/conv/planner/search.cpp | 1 - src/gpu/intel/ocl/reusable_vectorized_lnorm.cpp | 2 -- 4 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/gpu/intel/jit/v2/conv/kernel_desc_2d_reqs.cpp b/src/gpu/intel/jit/v2/conv/kernel_desc_2d_reqs.cpp index 5c4358cad0e..d4db4c23f0e 100644 --- a/src/gpu/intel/jit/v2/conv/kernel_desc_2d_reqs.cpp +++ b/src/gpu/intel/jit/v2/conv/kernel_desc_2d_reqs.cpp @@ -90,7 +90,6 @@ struct stride_t { struct block_2d_params_t { stride_t base_stride; - dim_t x_stride; expr_t y_stride; pvar_t w_dim; pvar_t h_dim; diff --git a/src/gpu/intel/jit/v2/conv/planner/bench.cpp b/src/gpu/intel/jit/v2/conv/planner/bench.cpp index 594fe7f9723..3d352e18c44 100644 --- a/src/gpu/intel/jit/v2/conv/planner/bench.cpp +++ b/src/gpu/intel/jit/v2/conv/planner/bench.cpp @@ -648,9 +648,8 @@ class bench_runner_impl_t { } } - bench_data_t bench(const kernel_desc_t &_kernel_desc) { + bench_data_t bench(const kernel_desc_t &kernel_desc) { if (tasks_.empty()) return bench_data_t(); - const auto &kernel_desc = _kernel_desc; if (!create_conv_plan(kernel_desc, bench_mger_.hw())) return {}; return planner::bench(bench_mger_, kernel_desc, tasks_, &mem_pool_); } diff --git a/src/gpu/intel/jit/v2/conv/planner/search.cpp b/src/gpu/intel/jit/v2/conv/planner/search.cpp index 19bb4fd64c4..222bf8a3d55 100644 --- a/src/gpu/intel/jit/v2/conv/planner/search.cpp +++ b/src/gpu/intel/jit/v2/conv/planner/search.cpp @@ -556,7 +556,6 @@ class search_sequence_t { bench_data_set_t bench_kernel_desc_group(const bench_manager_t &bench_mger, const search_kernel_desc_group_t &desc_group, int nprbs, int max_descs) { - auto eng = bench_mger.get_engine(); bench_runner_t runner( bench_mger, desc_group.bench_input_params(nprbs, bench_mger.hw())); bench_data_set_t bd_set; diff --git a/src/gpu/intel/ocl/reusable_vectorized_lnorm.cpp b/src/gpu/intel/ocl/reusable_vectorized_lnorm.cpp index e7aad4d3a41..5614c630410 100644 --- a/src/gpu/intel/ocl/reusable_vectorized_lnorm.cpp +++ b/src/gpu/intel/ocl/reusable_vectorized_lnorm.cpp @@ -97,8 +97,6 @@ static status_t init_conf_common(const layer_normalization_pd_t *pd, conf->calculate_stats = !pd->stats_are_src(); conf->save_stats = pd->is_training(); - auto scales = pd->attr()->scales_; - // We require that the lnorm axis is a single dense block, so that it can // be represented by a stride + size alone. size_t ndims = into(input_buf.ndims); From 89155b5a3e3dc3619ee9c557abf336ad8140eb02 Mon Sep 17 00:00:00 2001 From: "Kassen, Andrew" Date: Thu, 6 Mar 2025 19:40:07 -0800 Subject: [PATCH 5/5] xe: jit: address clang-tidy complaints --- src/gpu/intel/jit/conv/pipeline.cpp | 6 +++--- src/gpu/intel/jit/conv/plan.cpp | 21 +++++++++---------- src/gpu/intel/jit/conv/tiler.cpp | 4 ++-- src/gpu/intel/jit/ir/core.hpp | 1 + src/gpu/intel/jit/ir/epilogue.cpp | 1 + src/gpu/intel/jit/ir/ir.cpp | 2 +- src/gpu/intel/jit/ir/message_patterns.hpp | 2 +- src/gpu/intel/jit/ir/send_plan.cpp | 10 +++++---- src/gpu/intel/jit/pass/hoist.cpp | 3 ++- src/gpu/intel/jit/pass/send.cpp | 6 +++--- src/gpu/intel/jit/pass/simplify.cpp | 3 ++- src/gpu/intel/jit/v2/conv/bridge.hpp | 2 +- src/gpu/intel/jit/v2/conv/debug.hpp | 6 +++--- src/gpu/intel/jit/v2/conv/kernel_desc.hpp | 2 +- src/gpu/intel/jit/v2/conv/plan_registry.hpp | 2 +- src/gpu/intel/jit/v2/conv/planner/bench.hpp | 19 +++++++++-------- src/gpu/intel/jit/v2/conv/planner/search.cpp | 1 + src/gpu/intel/jit/v2/conv/problem.hpp | 2 +- src/gpu/intel/jit/v2/conv/tensor_utils.cpp | 4 ++-- src/gpu/intel/ocl/gemm/gemm_with_post_ops.hpp | 2 +- .../ocl/reduction/combined_reduction.cpp | 1 + 21 files changed, 54 insertions(+), 46 deletions(-) diff --git a/src/gpu/intel/jit/conv/pipeline.cpp b/src/gpu/intel/jit/conv/pipeline.cpp index c223100c35f..def57b54440 100644 --- a/src/gpu/intel/jit/conv/pipeline.cpp +++ b/src/gpu/intel/jit/conv/pipeline.cpp @@ -622,9 +622,9 @@ struct compute_params_t { : slm_bufs(slm_bufs) , gmem_bufs(gmem_bufs) , slm_buf_size(slm_buf_size) - , prefetch_bufs(prefetch_bufs) { - use_slm = (slm_buf_size > 0); - use_prefetch = (prefetch_bufs > 0); + , prefetch_bufs(prefetch_bufs) + , use_slm(slm_buf_size > 0) + , use_prefetch(prefetch_bufs > 0) { gpu_assert(!use_slm || !use_prefetch) << "Can't have both SLM buffering and prefetch enabled."; if (use_slm) { diff --git a/src/gpu/intel/jit/conv/plan.cpp b/src/gpu/intel/jit/conv/plan.cpp index ef888ed6562..8fb03f3a434 100644 --- a/src/gpu/intel/jit/conv/plan.cpp +++ b/src/gpu/intel/jit/conv/plan.cpp @@ -1263,17 +1263,16 @@ struct fma_layout_hint_t { }; struct fma_context_t { - fma_context_t(const conv_config_t &cfg) { - hw = cfg.hw(); - simd = cfg.simd(); - vec_size = cfg.vec_size(); - fma = cfg.fma_kind(); - a_type = type_t(cfg.prb().a_data_type); - b_type = type_t(cfg.prb().b_data_type); - acc_type = get_accumulation_type(cfg, a_type, b_type); - is_src1_broadcast = !cfg.prb().is_dw; - ab_swap_transpose_ = cfg.prb().ab_swap_transpose; - } + fma_context_t(const conv_config_t &cfg) + : hw(cfg.hw()) + , simd(cfg.simd()) + , vec_size(cfg.vec_size()) + , fma(cfg.fma_kind()) + , a_type(cfg.prb().a_data_type) + , b_type(cfg.prb().b_data_type) + , acc_type(get_accumulation_type(cfg, a_type, b_type)) + , is_src1_broadcast(!cfg.prb().is_dw) + , ab_swap_transpose_(cfg.prb().ab_swap_transpose) {} fma_layout_hint_t &layout_hint(abc_kind_t abc) { return (abc == abc_kind_t::a) ? a_layout_hint : b_layout_hint; diff --git a/src/gpu/intel/jit/conv/tiler.cpp b/src/gpu/intel/jit/conv/tiler.cpp index fdf1bf46013..16a980b8f36 100644 --- a/src/gpu/intel/jit/conv/tiler.cpp +++ b/src/gpu/intel/jit/conv/tiler.cpp @@ -597,7 +597,8 @@ class conv_blocking_checker_t : public blocking_checker_t { private: struct context_t { - context_t(const blocking_t &blk, const conv_config_t &cfg) : blk(blk) { + context_t(const blocking_t &blk, const conv_config_t &cfg) + : blk(blk), dpas_2x_depth(get_dpas_2x_depth(blk, cfg)) { auto &prb = cfg.prb(); auto gemm_iter = to_gemm(blk.iter(), prb); auto gemm_loop = to_gemm(blk.loop(), prb); @@ -611,7 +612,6 @@ class conv_blocking_checker_t : public blocking_checker_t { m_tg = gemm_tg.get(pvars::m, 1); n_tg = gemm_tg.get(pvars::n, 1); k_tg = gemm_tg.get(pvars::k, 1); - dpas_2x_depth = get_dpas_2x_depth(blk, cfg); } bool get_dpas_2x_depth( diff --git a/src/gpu/intel/jit/ir/core.hpp b/src/gpu/intel/jit/ir/core.hpp index d1ee91705cb..54fee11a00e 100644 --- a/src/gpu/intel/jit/ir/core.hpp +++ b/src/gpu/intel/jit/ir/core.hpp @@ -869,6 +869,7 @@ class ir_mutator_t { template std::vector mutate(const std::vector &v) { std::vector new_v; + new_v.reserve(v.size()); for (auto &e : v) new_v.push_back(mutate(e)); return new_v; diff --git a/src/gpu/intel/jit/ir/epilogue.cpp b/src/gpu/intel/jit/ir/epilogue.cpp index 7838b2e236a..c62fe878087 100644 --- a/src/gpu/intel/jit/ir/epilogue.cpp +++ b/src/gpu/intel/jit/ir/epilogue.cpp @@ -1030,6 +1030,7 @@ class epilogue_builder_t { // Create sub-tensors for post-ops. std::vector sub_po_tensors; + sub_po_tensors.reserve(post_op_tensors_.size()); for (auto &t : post_op_tensors_) sub_po_tensors.push_back(t.create_sub_tensor(tile)); diff --git a/src/gpu/intel/jit/ir/ir.cpp b/src/gpu/intel/jit/ir/ir.cpp index 8ec87816239..2bdd851e3d7 100644 --- a/src/gpu/intel/jit/ir/ir.cpp +++ b/src/gpu/intel/jit/ir/ir.cpp @@ -387,7 +387,7 @@ class alloc_injector_t : public ir_mutator_t { buf_total_refs_ = buf_cur_refs_; for (auto &kv : buf_cur_refs_) kv.second = 0; - in_ctor_ = false; + in_ctor_ = false; // NOLINT(cppcoreguidelines-prefer-member-initializer) } #define HANDLE_IR_OBJECT(type) \ diff --git a/src/gpu/intel/jit/ir/message_patterns.hpp b/src/gpu/intel/jit/ir/message_patterns.hpp index 6adeae74d2f..a353d32ef2e 100644 --- a/src/gpu/intel/jit/ir/message_patterns.hpp +++ b/src/gpu/intel/jit/ir/message_patterns.hpp @@ -500,7 +500,7 @@ struct uniform_send_idiom_t final { ret.begin(), ret.end(), [&](const hint_t &a, const hint_t &b) { return a.size() > b.size(); }); - if (ret.size() && filtered_ret.size() + if (!ret.empty() && !filtered_ret.empty() && ret[0].size() > filtered_ret[0].size()) gpu_warning() << "Optimal send hint disabled: " << ret[0]; diff --git a/src/gpu/intel/jit/ir/send_plan.cpp b/src/gpu/intel/jit/ir/send_plan.cpp index ede9220e7a0..22c1f83dd62 100644 --- a/src/gpu/intel/jit/ir/send_plan.cpp +++ b/src/gpu/intel/jit/ir/send_plan.cpp @@ -348,9 +348,11 @@ class tdim_info_t { tdim_info_t() = default; tdim_info_t( int tidx, const tdim_t &tdim, const view_t &view, int64_t block = 1) - : tidx_(tidx), block_(block), dim_(&tdim) { - base_mod_ = to_base(tdim, view.vvars()); - size_ = view.tlayout().dim(tidx); + : tidx_(tidx) + , size_(view.tlayout().dim(tidx)) + , base_mod_(to_base(tdim, view.vvars())) + , block_(block) + , dim_(&tdim) { for (dim_idx_t i = 0; i < tdim.nvargs(); i++) { vidxs_[i] = tdim.vidx(i); vstrides_[i] = tdim.vstride(i); @@ -1810,10 +1812,10 @@ class view_iterator_t { public: view_iterator_t(const view_info_t &info) : info_(info) + , inner_elems_(1) , block_off_(nblocks()) , block_dims_(nblocks()) , off_(info.vlayout().ndims()) { - inner_elems_ = 1; for (int i = 0; i < info_.inner_idx(); i++) { inner_elems_ *= (int)blocks()[i].block; } diff --git a/src/gpu/intel/jit/pass/hoist.cpp b/src/gpu/intel/jit/pass/hoist.cpp index ecc91a1622a..8f6f5c1d59a 100644 --- a/src/gpu/intel/jit/pass/hoist.cpp +++ b/src/gpu/intel/jit/pass/hoist.cpp @@ -83,6 +83,7 @@ class hoist_exprs_mutator_t : public ir_mutator_t { if (!obj.func.is()) return ir_mutator_t::_mutate(obj); std::vector new_args; + new_args.reserve(obj.args.size()); for (auto &e : obj.args) { new_args.push_back(hoist_expr(e)); } @@ -379,7 +380,7 @@ class hoist_send_masks_mutator_t : public ir_mutator_t { return loop_deps_.count(v) != 0; } - bool can_hoist(const expr_t &expr) { + bool can_hoist(const expr_t &expr) const { return expr.type().size() <= max_hoist_size_ - current_hoist_size_; } diff --git a/src/gpu/intel/jit/pass/send.cpp b/src/gpu/intel/jit/pass/send.cpp index 1f312c0847c..a576a74838c 100644 --- a/src/gpu/intel/jit/pass/send.cpp +++ b/src/gpu/intel/jit/pass/send.cpp @@ -27,7 +27,7 @@ namespace jit { class buffer_offset_lifter_t : public ir_mutator_t { public: - object_t _mutate(const func_call_t &obj) { + object_t _mutate(const func_call_t &obj) override { if (!obj.func.is()) return ir_mutator_t::_mutate(obj); auto &mem_buf = send_t::arg_mem_buf(obj); @@ -56,7 +56,7 @@ class send_injector_t : public ir_mutator_t { public: send_injector_t(ir_context_t &ir_ctx) : ir_ctx_(ir_ctx) {} - object_t _mutate(const func_call_t &obj) { + object_t _mutate(const func_call_t &obj) override { auto *send = obj.func.as_ptr(); if (!send) return ir_mutator_t::_mutate(obj); @@ -176,7 +176,7 @@ class send_2d_header_store_lifter_t : public ir_mutator_t { off, send_t::header_2d_off_x(), send_t::header_2d_off_y()) && !is_const(obj.value)) return obj; - stores_[obj.buf].push_back(obj); + stores_[obj.buf].emplace_back(obj); return stmt_t(); } diff --git a/src/gpu/intel/jit/pass/simplify.cpp b/src/gpu/intel/jit/pass/simplify.cpp index 68fc1915e20..093158bec6c 100644 --- a/src/gpu/intel/jit/pass/simplify.cpp +++ b/src/gpu/intel/jit/pass/simplify.cpp @@ -1565,6 +1565,7 @@ expr_t simplify_with_nary(const expr_t &_e, const constraint_set_t &cset) { return e; } +// NOLINTNEXTLINE(readability-identifier-naming) class _64_bit_add_optimizer_t : public nary_op_mutator_t { public: object_t _mutate(const nary_op_t &obj) override { @@ -1770,7 +1771,7 @@ struct op_traits_t {}; static auto compute(T a, T b) -> decltype(a op b) { \ return a op b; \ } \ - template ::type> \ static bool compute(bool a, bool b) { \ diff --git a/src/gpu/intel/jit/v2/conv/bridge.hpp b/src/gpu/intel/jit/v2/conv/bridge.hpp index 02f6d6470ff..26bb5f15091 100644 --- a/src/gpu/intel/jit/v2/conv/bridge.hpp +++ b/src/gpu/intel/jit/v2/conv/bridge.hpp @@ -105,7 +105,7 @@ inline jit::layout_t to_conv_layout(const layout_tag_t &_tag, inline jit::layout_t to_conv_layout( const layout_tag_t &_tag, const pvar_tile_t &shape) { int ndims = _tag.desc().ndims(); - auto tag = _tag.raw_tag(); + const auto &tag = _tag.raw_tag(); std::vector dims(ndims); for (int i = 0; i < ndims; i++) { auto d = _tag.desc().prb_dim(i); diff --git a/src/gpu/intel/jit/v2/conv/debug.hpp b/src/gpu/intel/jit/v2/conv/debug.hpp index 4308049b0d6..2a72aa8610c 100644 --- a/src/gpu/intel/jit/v2/conv/debug.hpp +++ b/src/gpu/intel/jit/v2/conv/debug.hpp @@ -48,9 +48,9 @@ class debug_t { *desc_ptr_ = desc; } - kernel_desc_setter_t(kernel_desc_setter_t &&other) { - desc_ptr_ = other.desc_ptr_; - other.desc_ptr_ = nullptr; + kernel_desc_setter_t(kernel_desc_setter_t &&other) + : desc_ptr_(nullptr) { + std::swap(desc_ptr_, other.desc_ptr_); } kernel_desc_setter_t &operator=(kernel_desc_setter_t &&other) { if (&other != this) std::swap(desc_ptr_, other.desc_ptr_); diff --git a/src/gpu/intel/jit/v2/conv/kernel_desc.hpp b/src/gpu/intel/jit/v2/conv/kernel_desc.hpp index 6313a437eac..59eb71d5d19 100644 --- a/src/gpu/intel/jit/v2/conv/kernel_desc.hpp +++ b/src/gpu/intel/jit/v2/conv/kernel_desc.hpp @@ -78,7 +78,7 @@ static auto specialization_mode_names = nstl::to_array({ GPU_DEFINE_PARSE_ENUM(specialization_mode_t, specialization_mode_names) struct specialization_t { - specialization_mode_t mode; + specialization_mode_t mode = specialization_mode_t::none; // Dimension values to specialize (e.g. kw1). pvar_tile_t dim_values; // Dimension modulus to specialize (e.g. oc@64) diff --git a/src/gpu/intel/jit/v2/conv/plan_registry.hpp b/src/gpu/intel/jit/v2/conv/plan_registry.hpp index 88e60589979..62aaad03c71 100644 --- a/src/gpu/intel/jit/v2/conv/plan_registry.hpp +++ b/src/gpu/intel/jit/v2/conv/plan_registry.hpp @@ -55,7 +55,7 @@ class plan_registry_t { void stringify(std::ostream &out) const; void parse(std::istream &out); -public: +private: std::vector entries_; }; diff --git a/src/gpu/intel/jit/v2/conv/planner/bench.hpp b/src/gpu/intel/jit/v2/conv/planner/bench.hpp index e16a64b1cb1..d933ca267f3 100644 --- a/src/gpu/intel/jit/v2/conv/planner/bench.hpp +++ b/src/gpu/intel/jit/v2/conv/planner/bench.hpp @@ -71,15 +71,16 @@ struct bench_input_params_t { bench_input_params_t() = default; bench_input_params_t(const kernel_desc_t &kernel_desc, const hw_t &hw, int nprbs = default_nprbs) - : hw(hw), nprbs(nprbs) { - prop = kernel_desc.prop; - src_tag = kernel_desc.src_tag; - wei_tag = kernel_desc.wei_tag; - dst_tag = kernel_desc.dst_tag; - reqs = kernel_desc.reqs(); - is_dw = kernel_desc.is_dw; - bias_type = kernel_desc.bias_type; - tile = kernel_desc.iter_tile; + : hw(hw) + , prop(kernel_desc.prop) + , src_tag(kernel_desc.src_tag) + , wei_tag(kernel_desc.wei_tag) + , dst_tag(kernel_desc.dst_tag) + , reqs(kernel_desc.reqs()) + , is_dw(kernel_desc.is_dw) + , bias_type(kernel_desc.bias_type) + , tile(kernel_desc.iter_tile) + , nprbs(nprbs) { for (auto &d : kernel_desc.thread_group_tile) { tile[d] = tile.get(d, 1) * kernel_desc.thread_group_tile[d]; } diff --git a/src/gpu/intel/jit/v2/conv/planner/search.cpp b/src/gpu/intel/jit/v2/conv/planner/search.cpp index 222bf8a3d55..b394f5b0f60 100644 --- a/src/gpu/intel/jit/v2/conv/planner/search.cpp +++ b/src/gpu/intel/jit/v2/conv/planner/search.cpp @@ -447,6 +447,7 @@ class kernel_search_manager_t { } } std::vector ret; + ret.reserve(desc_groups.size()); for (auto &kv : desc_groups) { ret.push_back(kv.second); } diff --git a/src/gpu/intel/jit/v2/conv/problem.hpp b/src/gpu/intel/jit/v2/conv/problem.hpp index e828e4dbd58..a5dd93c75b3 100644 --- a/src/gpu/intel/jit/v2/conv/problem.hpp +++ b/src/gpu/intel/jit/v2/conv/problem.hpp @@ -104,7 +104,7 @@ class problem_t { layout_tag_t dst_tag_; type_t bias_type_; pvar_tile_t shape_; - std::array dhw_map_; + std::array dhw_map_ = {0}; bool with_groups_ = false; bool with_scales_ = false; bool with_post_ops_ = false; diff --git a/src/gpu/intel/jit/v2/conv/tensor_utils.cpp b/src/gpu/intel/jit/v2/conv/tensor_utils.cpp index a60a5595949..1fcbf2079ae 100644 --- a/src/gpu/intel/jit/v2/conv/tensor_utils.cpp +++ b/src/gpu/intel/jit/v2/conv/tensor_utils.cpp @@ -188,7 +188,7 @@ std::string blocked_to_str_tag(const memory_desc_t &md) { for (int i = blk.inner_nblks - 1; i >= 0; i--) { dim_idx_t idx = into(blk.inner_idxs[i]); dim_t block = blk.inner_blks[i]; - parts.push_back(std::string(1, dim_idx::as_tag(idx))); + parts.emplace_back(1, dim_idx::as_tag(idx)); parts.push_back(std::to_string(block)); full_inner_blks[idx] *= block; stride *= block; @@ -211,7 +211,7 @@ std::string blocked_to_str_tag(const memory_desc_t &md) { // Size-one blocks have to be added first. if (min_dim == 1 && rem_dims[j] != min_dim) continue; bool is_blocked = (full_inner_blks[j] != 1); - parts.push_back(std::string(1, dim_idx::as_tag(j, is_blocked))); + parts.emplace_back(1, dim_idx::as_tag(j, is_blocked)); stride *= rem_dims[j]; seen[j] = true; found = true; diff --git a/src/gpu/intel/ocl/gemm/gemm_with_post_ops.hpp b/src/gpu/intel/ocl/gemm/gemm_with_post_ops.hpp index 82ca1d1f88c..08b00060f5e 100644 --- a/src/gpu/intel/ocl/gemm/gemm_with_post_ops.hpp +++ b/src/gpu/intel/ocl/gemm/gemm_with_post_ops.hpp @@ -48,7 +48,7 @@ struct gemm_with_post_ops_t : public gpu_gemm_t { bool use_reorder = false; compute::dispatch_t dispatch_; attr_info_t attr_info_; - bool subbyte_pack_; + bool subbyte_pack_ = false; }; status_t init(impl::engine_t *engine) override { diff --git a/src/gpu/intel/ocl/reduction/combined_reduction.cpp b/src/gpu/intel/ocl/reduction/combined_reduction.cpp index 4852a66921a..fc1193e9ea3 100644 --- a/src/gpu/intel/ocl/reduction/combined_reduction.cpp +++ b/src/gpu/intel/ocl/reduction/combined_reduction.cpp @@ -85,6 +85,7 @@ reduction_phase_conf_t::reduction_phase_conf_t( gpu_assert(reduction_block.block != 0) << "Reducing over 0 elements"; if (outer_block.block == 0 || inner_block.block == 0) { nd_range = compute::nd_range_t({0}, {into(subgroup_size)}); + with_block_reads = false; return; } with_block_reads = can_use_block_reads();