Skip to content

Commit 88a0cc2

Browse files
committed
xe: jit: codegen: workaround windows build issue
The MSVC compiler errors while stating that hw is not a compile time constant. This appears to be due to a naming clash with the hw() function in the base class. This patch removes hw() to enable proper compilation.
1 parent 25f5b89 commit 88a0cc2

File tree

1 file changed

+33
-31
lines changed

1 file changed

+33
-31
lines changed

src/gpu/intel/jit/codegen/kernel.hpp

+33-31
Original file line numberDiff line numberDiff line change
@@ -192,8 +192,8 @@ class ir_kernel_base_t : public ngen_generator_t {
192192
const impl::engine_t *engine, ngen_generator_args... args)
193193
: ngen_generator_t(std::forward<ngen_generator_args>(args)...)
194194
, exec_cfg_(desc.exec_cfg(engine))
195-
, ra_(hw())
196-
, emu_strategy(hw(), exec_cfg_.hw().stepping_id()) {
195+
, ra_(getHardware())
196+
, emu_strategy(getHardware(), exec_cfg_.hw().stepping_id()) {
197197
desc.init_kernel_iface(kernel_iface_);
198198
ra_.setRegisterCount(exec_cfg_.regs());
199199
}
@@ -204,13 +204,12 @@ class ir_kernel_base_t : public ngen_generator_t {
204204
: ngen_generator_t(std::forward<ngen_generator_args>(args)...)
205205
, kernel_iface_(kernel_iface)
206206
, exec_cfg_(exec_cfg)
207-
, ra_(hw())
208-
, emu_strategy(hw(), exec_cfg.hw().stepping_id()) {
207+
, ra_(getHardware())
208+
, emu_strategy(getHardware(), exec_cfg.hw().stepping_id()) {
209209
ngen_generator_t::setStepping(exec_cfg.hw().stepping_id());
210210
ra_.setRegisterCount(exec_cfg_.regs());
211211
}
212212

213-
ngen::HW hw() const { return ngen_generator_t::getHardware(); }
214213
const kernel_iface_t &kernel_iface() const { return kernel_iface_; }
215214
const exec_config_t &exec_cfg() const { return exec_cfg_; }
216215

@@ -308,7 +307,7 @@ class ir_kernel_base_t : public ngen_generator_t {
308307
const ngen::Subregister &src1, uint32_t src2) {
309308
bool is_src2_16_bit
310309
= (src2 <= std::numeric_limits<uint16_t>::max());
311-
if (hw() >= ngen::HW::XeLP && is_src2_16_bit && false) {
310+
if (getHardware() >= ngen::HW::XeLP && is_src2_16_bit && false) {
312311
mad(1, dst, src0, src1, src2);
313312
} else {
314313
auto tmp = ra_.alloc_sub<uint64_t>();
@@ -552,7 +551,7 @@ class ir_kernel_base_t : public ngen_generator_t {
552551
auto scope = ngen_register_scope_t(ra_);
553552
align_src_dst_offset(this, scope, mod, dst, src0);
554553
align_src_dst_offset(this, scope, mod, dst, src1);
555-
if (hw() >= ngen::HW::XeHP) {
554+
if (getHardware() >= ngen::HW::XeHP) {
556555
if (src2.is_reg_data()) {
557556
align_src_dst_offset(this, scope, mod, dst, src2);
558557
add3(mod, dst.reg_data(), fixup_ternary_rgn(src0.reg_data()),
@@ -585,7 +584,7 @@ class ir_kernel_base_t : public ngen_generator_t {
585584
align_src_dst_offset(this, scope, mod, dst, src2);
586585
mad(mod, dst.reg_data(), fixup_ternary_rgn(src0.reg_data()),
587586
fixup_ternary_rgn(src1.reg_data()), src2.reg_data());
588-
} else if (hw() < ngen::HW::XeLP) {
587+
} else if (getHardware() < ngen::HW::XeLP) {
589588
align_src_dst_offset(this, scope, mod, dst, src0);
590589
mul(mod, dst.reg_data(), src1.reg_data(), src2.immediate());
591590
add(mod, dst.reg_data(), dst.reg_data(), src0.reg_data());
@@ -608,10 +607,11 @@ class ir_kernel_base_t : public ngen_generator_t {
608607
const ngen_operand_t &src0, const ngen_operand_t &src1) {
609608
if (!src1.is_immediate()) {
610609
// Immediate src0 is not supported with fdiv_ieee.
611-
if (src0.is_immediate() && hw() >= ngen::HW::XeHPC) {
610+
if (src0.is_immediate() && getHardware() >= ngen::HW::XeHPC) {
612611
auto tmp_src0 = ra_.alloc_sub(src0.type());
613612
mov(mod, tmp_src0, src0.immediate());
614-
efdiv(mod, dst, ngen_operand_t(reg_buf_data_t(hw(), tmp_src0)),
613+
efdiv(mod, dst,
614+
ngen_operand_t(reg_buf_data_t(getHardware(), tmp_src0)),
615615
src1);
616616
ra_.safeRelease(tmp_src0);
617617
} else {
@@ -635,7 +635,7 @@ class ir_kernel_base_t : public ngen_generator_t {
635635
void efdiv(const ngen::InstructionModifier &mod, const ngen_operand_t &dst,
636636
const ngen_operand_t &src0, const ngen_operand_t &src1) {
637637
int esize = mod.getExecSize();
638-
int grf_size = ngen::GRF::bytes(hw());
638+
int grf_size = ngen::GRF::bytes(getHardware());
639639
int div_esize = std::min(esize, grf_size / int(sizeof(float)));
640640

641641
gpu_assert(dst.type() == ngen::DataType::f);
@@ -656,10 +656,11 @@ class ir_kernel_base_t : public ngen_generator_t {
656656
}
657657

658658
// fdiv_ieee() is not supported in XeHPG so we use a less precise, inv-based sequence.
659-
if (hw() < ngen::HW::XeHPC) {
659+
if (getHardware() < ngen::HW::XeHPC) {
660660
auto tmp = ra_.alloc_sub<float>();
661661
inv(1, tmp, src1.reg_data());
662-
emul(mod, dst, src0, ngen_operand_t(reg_buf_data_t(hw(), tmp)));
662+
emul(mod, dst, src0,
663+
ngen_operand_t(reg_buf_data_t(getHardware(), tmp)));
663664
ra_.safeRelease(tmp);
664665
return;
665666
}
@@ -952,7 +953,7 @@ class ir_kernel_base_t : public ngen_generator_t {
952953
// rem = x - qot * y
953954
bool y_is_16_bit = (y <= static_cast<uint32_t>(
954955
std::numeric_limits<int16_t>::max()));
955-
if (hw() >= ngen::HW::XeLP && y_is_16_bit) {
956+
if (getHardware() >= ngen::HW::XeLP && y_is_16_bit) {
956957
mad(mod, rem, x, _qot, -int16_t(y));
957958
} else {
958959
auto tmp = ra_.alloc_sub<uint64_t>();
@@ -1043,7 +1044,7 @@ class ir_kernel_base_t : public ngen_generator_t {
10431044
int w = rd.getWidth();
10441045
int hs = rd.getHS();
10451046
int vs = rd.getVS();
1046-
int grf_size = ngen::GRF::bytes(host->hw());
1047+
int grf_size = ngen::GRF::bytes(host->getHardware());
10471048
int regs = utils::div_up(
10481049
std::max(esize * hs, 1) * rd.getBytes(), grf_size);
10491050
tmp_range_ = host_->ra_.alloc_range(regs);
@@ -1106,7 +1107,7 @@ class ir_kernel_base_t : public ngen_generator_t {
11061107

11071108
bool overlaps(
11081109
int esize, const ngen::RegData &a, const ngen::RegData &b) const {
1109-
int grf_size = ngen::GRF::bytes(hw());
1110+
int grf_size = ngen::GRF::bytes(getHardware());
11101111
int a_beg = a.getBase() * grf_size + a.getByteOffset();
11111112
int b_beg = b.getBase() * grf_size + b.getByteOffset();
11121113
int a_end = a_beg + std::max(esize * a.getHS(), 1) * a.getBytes() - 1;
@@ -1141,11 +1142,10 @@ template <ngen::HW hw>
11411142
class ir_kernel_t : public ir_kernel_base_t<generator_t<hw>> {
11421143
public:
11431144
using base = ir_kernel_base_t<generator_t<hw>>;
1145+
using elf_generator_t = ngen::ELFCodeGenerator<hw>;
11441146
friend class expr_evaluator_t<ir_kernel_t>;
11451147
friend class ir_to_ngen_t<ir_kernel_t>;
11461148

1147-
NGEN_FORWARD_ELF(hw)
1148-
11491149
ir_kernel_t(const kernel_desc_base_t &desc, const impl::engine_t *engine,
11501150
const debug_config_t &debug_config)
11511151
: base(desc, engine, debug_config)
@@ -1162,31 +1162,33 @@ class ir_kernel_t : public ir_kernel_base_t<generator_t<hw>> {
11621162
, local_range_(local_range) {}
11631163

11641164
const ngen::NEOInterfaceHandler &neo_interface() const {
1165-
return ngen::ELFCodeGenerator<hw>::interface_;
1165+
return elf_generator_t::interface_;
11661166
}
11671167

11681168
void set_kernel_iface(const kernel_iface_t &kernel_iface) {
11691169
base::kernel_iface_ = kernel_iface;
11701170
}
11711171

11721172
void setup_interface(const stmt_t &kernel_body = stmt_t()) {
1173-
externalName(kernel_name_);
1174-
requireLocalID(3);
1175-
requireLocalSize();
1176-
requireGRF(base::exec_cfg().regs());
1177-
requireSIMD(base::exec_cfg().simd());
1178-
requireBarrier();
1179-
if (require_dpas_) requireDPAS();
1180-
if (has_send_atomics(kernel_body)) requireGlobalAtomics();
1173+
elf_generator_t::externalName(kernel_name_);
1174+
elf_generator_t::requireLocalID(3);
1175+
elf_generator_t::requireLocalSize();
1176+
elf_generator_t::requireGRF(base::exec_cfg().regs());
1177+
elf_generator_t::requireSIMD(base::exec_cfg().simd());
1178+
elf_generator_t::requireBarrier();
1179+
if (require_dpas_) elf_generator_t::requireDPAS();
1180+
if (has_send_atomics(kernel_body))
1181+
elf_generator_t::requireGlobalAtomics();
11811182

11821183
for (int i = 0; i < base::kernel_iface().nargs(); i++) {
11831184
auto &name = base::kernel_iface().arg_name(i);
11841185
auto &type = base::kernel_iface().arg_type(i);
11851186
if (type.is_ptr()) {
1186-
newArgument(name, ngen::ExternalArgumentType::GlobalPtr,
1187+
elf_generator_t::newArgument(name,
1188+
ngen::ExternalArgumentType::GlobalPtr,
11871189
ngen::GlobalAccessType::Stateless);
11881190
} else {
1189-
newArgument(name, to_ngen(type));
1191+
elf_generator_t::newArgument(name, to_ngen(type));
11901192
}
11911193
}
11921194

@@ -1200,10 +1202,10 @@ class ir_kernel_t : public ir_kernel_base_t<generator_t<hw>> {
12001202
// TODO: Use status code for this check.
12011203
gpu_except_not_implemented("SLM size limit is exceeded.");
12021204
}
1203-
requireSLM(slm_size);
1205+
elf_generator_t::requireSLM(slm_size);
12041206
}
12051207

1206-
finalizeInterface();
1208+
elf_generator_t::finalizeInterface();
12071209
}
12081210

12091211
int thread_group_size() const {

0 commit comments

Comments
 (0)