@@ -192,8 +192,8 @@ class ir_kernel_base_t : public ngen_generator_t {
192
192
const impl::engine_t *engine, ngen_generator_args... args)
193
193
: ngen_generator_t (std::forward<ngen_generator_args>(args)...)
194
194
, exec_cfg_(desc.exec_cfg(engine))
195
- , ra_(hw ())
196
- , emu_strategy(hw (), exec_cfg_.hw().stepping_id()) {
195
+ , ra_(getHardware ())
196
+ , emu_strategy(getHardware (), exec_cfg_.hw().stepping_id()) {
197
197
desc.init_kernel_iface (kernel_iface_);
198
198
ra_.setRegisterCount (exec_cfg_.regs ());
199
199
}
@@ -204,13 +204,12 @@ class ir_kernel_base_t : public ngen_generator_t {
204
204
: ngen_generator_t (std::forward<ngen_generator_args>(args)...)
205
205
, kernel_iface_(kernel_iface)
206
206
, exec_cfg_(exec_cfg)
207
- , ra_(hw ())
208
- , emu_strategy(hw (), exec_cfg.hw().stepping_id()) {
207
+ , ra_(getHardware ())
208
+ , emu_strategy(getHardware (), exec_cfg.hw().stepping_id()) {
209
209
ngen_generator_t::setStepping (exec_cfg.hw ().stepping_id ());
210
210
ra_.setRegisterCount (exec_cfg_.regs ());
211
211
}
212
212
213
- ngen::HW hw () const { return ngen_generator_t::getHardware (); }
214
213
const kernel_iface_t &kernel_iface () const { return kernel_iface_; }
215
214
const exec_config_t &exec_cfg () const { return exec_cfg_; }
216
215
@@ -308,7 +307,7 @@ class ir_kernel_base_t : public ngen_generator_t {
308
307
const ngen::Subregister &src1, uint32_t src2) {
309
308
bool is_src2_16_bit
310
309
= (src2 <= std::numeric_limits<uint16_t >::max ());
311
- if (hw () >= ngen::HW::XeLP && is_src2_16_bit && false ) {
310
+ if (getHardware () >= ngen::HW::XeLP && is_src2_16_bit && false ) {
312
311
mad (1 , dst, src0, src1, src2);
313
312
} else {
314
313
auto tmp = ra_.alloc_sub <uint64_t >();
@@ -552,7 +551,7 @@ class ir_kernel_base_t : public ngen_generator_t {
552
551
auto scope = ngen_register_scope_t (ra_);
553
552
align_src_dst_offset (this , scope, mod, dst, src0);
554
553
align_src_dst_offset (this , scope, mod, dst, src1);
555
- if (hw () >= ngen::HW::XeHP) {
554
+ if (getHardware () >= ngen::HW::XeHP) {
556
555
if (src2.is_reg_data ()) {
557
556
align_src_dst_offset (this , scope, mod, dst, src2);
558
557
add3 (mod, dst.reg_data (), fixup_ternary_rgn (src0.reg_data ()),
@@ -585,7 +584,7 @@ class ir_kernel_base_t : public ngen_generator_t {
585
584
align_src_dst_offset (this , scope, mod, dst, src2);
586
585
mad (mod, dst.reg_data (), fixup_ternary_rgn (src0.reg_data ()),
587
586
fixup_ternary_rgn (src1.reg_data ()), src2.reg_data ());
588
- } else if (hw () < ngen::HW::XeLP) {
587
+ } else if (getHardware () < ngen::HW::XeLP) {
589
588
align_src_dst_offset (this , scope, mod, dst, src0);
590
589
mul (mod, dst.reg_data (), src1.reg_data (), src2.immediate ());
591
590
add (mod, dst.reg_data (), dst.reg_data (), src0.reg_data ());
@@ -608,10 +607,11 @@ class ir_kernel_base_t : public ngen_generator_t {
608
607
const ngen_operand_t &src0, const ngen_operand_t &src1) {
609
608
if (!src1.is_immediate ()) {
610
609
// Immediate src0 is not supported with fdiv_ieee.
611
- if (src0.is_immediate () && hw () >= ngen::HW::XeHPC) {
610
+ if (src0.is_immediate () && getHardware () >= ngen::HW::XeHPC) {
612
611
auto tmp_src0 = ra_.alloc_sub (src0.type ());
613
612
mov (mod, tmp_src0, src0.immediate ());
614
- efdiv (mod, dst, ngen_operand_t (reg_buf_data_t (hw (), tmp_src0)),
613
+ efdiv (mod, dst,
614
+ ngen_operand_t (reg_buf_data_t (getHardware (), tmp_src0)),
615
615
src1);
616
616
ra_.safeRelease (tmp_src0);
617
617
} else {
@@ -635,7 +635,7 @@ class ir_kernel_base_t : public ngen_generator_t {
635
635
void efdiv (const ngen::InstructionModifier &mod, const ngen_operand_t &dst,
636
636
const ngen_operand_t &src0, const ngen_operand_t &src1) {
637
637
int esize = mod.getExecSize ();
638
- int grf_size = ngen::GRF::bytes (hw ());
638
+ int grf_size = ngen::GRF::bytes (getHardware ());
639
639
int div_esize = std::min (esize, grf_size / int (sizeof (float )));
640
640
641
641
gpu_assert (dst.type () == ngen::DataType::f);
@@ -656,10 +656,11 @@ class ir_kernel_base_t : public ngen_generator_t {
656
656
}
657
657
658
658
// fdiv_ieee() is not supported in XeHPG so we use a less precise, inv-based sequence.
659
- if (hw () < ngen::HW::XeHPC) {
659
+ if (getHardware () < ngen::HW::XeHPC) {
660
660
auto tmp = ra_.alloc_sub <float >();
661
661
inv (1 , tmp, src1.reg_data ());
662
- emul (mod, dst, src0, ngen_operand_t (reg_buf_data_t (hw (), tmp)));
662
+ emul (mod, dst, src0,
663
+ ngen_operand_t (reg_buf_data_t (getHardware (), tmp)));
663
664
ra_.safeRelease (tmp);
664
665
return ;
665
666
}
@@ -952,7 +953,7 @@ class ir_kernel_base_t : public ngen_generator_t {
952
953
// rem = x - qot * y
953
954
bool y_is_16_bit = (y <= static_cast <uint32_t >(
954
955
std::numeric_limits<int16_t >::max ()));
955
- if (hw () >= ngen::HW::XeLP && y_is_16_bit) {
956
+ if (getHardware () >= ngen::HW::XeLP && y_is_16_bit) {
956
957
mad (mod, rem, x, _qot, -int16_t (y));
957
958
} else {
958
959
auto tmp = ra_.alloc_sub <uint64_t >();
@@ -1043,7 +1044,7 @@ class ir_kernel_base_t : public ngen_generator_t {
1043
1044
int w = rd.getWidth ();
1044
1045
int hs = rd.getHS ();
1045
1046
int vs = rd.getVS ();
1046
- int grf_size = ngen::GRF::bytes (host->hw ());
1047
+ int grf_size = ngen::GRF::bytes (host->getHardware ());
1047
1048
int regs = utils::div_up (
1048
1049
std::max (esize * hs, 1 ) * rd.getBytes (), grf_size);
1049
1050
tmp_range_ = host_->ra_ .alloc_range (regs);
@@ -1106,7 +1107,7 @@ class ir_kernel_base_t : public ngen_generator_t {
1106
1107
1107
1108
bool overlaps (
1108
1109
int esize, const ngen::RegData &a, const ngen::RegData &b) const {
1109
- int grf_size = ngen::GRF::bytes (hw ());
1110
+ int grf_size = ngen::GRF::bytes (getHardware ());
1110
1111
int a_beg = a.getBase () * grf_size + a.getByteOffset ();
1111
1112
int b_beg = b.getBase () * grf_size + b.getByteOffset ();
1112
1113
int a_end = a_beg + std::max (esize * a.getHS (), 1 ) * a.getBytes () - 1 ;
@@ -1141,11 +1142,10 @@ template <ngen::HW hw>
1141
1142
class ir_kernel_t : public ir_kernel_base_t <generator_t <hw>> {
1142
1143
public:
1143
1144
using base = ir_kernel_base_t <generator_t <hw>>;
1145
+ using elf_generator_t = ngen::ELFCodeGenerator<hw>;
1144
1146
friend class expr_evaluator_t <ir_kernel_t >;
1145
1147
friend class ir_to_ngen_t <ir_kernel_t >;
1146
1148
1147
- NGEN_FORWARD_ELF (hw)
1148
-
1149
1149
ir_kernel_t (const kernel_desc_base_t &desc, const impl::engine_t *engine,
1150
1150
const debug_config_t &debug_config)
1151
1151
: base(desc, engine, debug_config)
@@ -1162,31 +1162,33 @@ class ir_kernel_t : public ir_kernel_base_t<generator_t<hw>> {
1162
1162
, local_range_(local_range) {}
1163
1163
1164
1164
const ngen::NEOInterfaceHandler &neo_interface () const {
1165
- return ngen::ELFCodeGenerator<hw> ::interface_;
1165
+ return elf_generator_t ::interface_;
1166
1166
}
1167
1167
1168
1168
void set_kernel_iface (const kernel_iface_t &kernel_iface) {
1169
1169
base::kernel_iface_ = kernel_iface;
1170
1170
}
1171
1171
1172
1172
void setup_interface (const stmt_t &kernel_body = stmt_t ()) {
1173
- externalName (kernel_name_);
1174
- requireLocalID (3 );
1175
- requireLocalSize ();
1176
- requireGRF (base::exec_cfg ().regs ());
1177
- requireSIMD (base::exec_cfg ().simd ());
1178
- requireBarrier ();
1179
- if (require_dpas_) requireDPAS ();
1180
- if (has_send_atomics (kernel_body)) requireGlobalAtomics ();
1173
+ elf_generator_t::externalName (kernel_name_);
1174
+ elf_generator_t::requireLocalID (3 );
1175
+ elf_generator_t::requireLocalSize ();
1176
+ elf_generator_t::requireGRF (base::exec_cfg ().regs ());
1177
+ elf_generator_t::requireSIMD (base::exec_cfg ().simd ());
1178
+ elf_generator_t::requireBarrier ();
1179
+ if (require_dpas_) elf_generator_t::requireDPAS ();
1180
+ if (has_send_atomics (kernel_body))
1181
+ elf_generator_t::requireGlobalAtomics ();
1181
1182
1182
1183
for (int i = 0 ; i < base::kernel_iface ().nargs (); i++) {
1183
1184
auto &name = base::kernel_iface ().arg_name (i);
1184
1185
auto &type = base::kernel_iface ().arg_type (i);
1185
1186
if (type.is_ptr ()) {
1186
- newArgument (name, ngen::ExternalArgumentType::GlobalPtr,
1187
+ elf_generator_t::newArgument (name,
1188
+ ngen::ExternalArgumentType::GlobalPtr,
1187
1189
ngen::GlobalAccessType::Stateless);
1188
1190
} else {
1189
- newArgument (name, to_ngen (type));
1191
+ elf_generator_t:: newArgument (name, to_ngen (type));
1190
1192
}
1191
1193
}
1192
1194
@@ -1200,10 +1202,10 @@ class ir_kernel_t : public ir_kernel_base_t<generator_t<hw>> {
1200
1202
// TODO: Use status code for this check.
1201
1203
gpu_except_not_implemented (" SLM size limit is exceeded." );
1202
1204
}
1203
- requireSLM (slm_size);
1205
+ elf_generator_t:: requireSLM (slm_size);
1204
1206
}
1205
1207
1206
- finalizeInterface ();
1208
+ elf_generator_t:: finalizeInterface ();
1207
1209
}
1208
1210
1209
1211
int thread_group_size () const {
0 commit comments