Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ngen: downstream nGEN #2942

Merged
merged 1 commit into from
Mar 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions src/gpu/intel/ocl/hw_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,12 @@ status_t init_gpu_hw_info(impl::engine_t *engine, cl_device_id device,
int &gpu_product_family, int &stepping_id, uint64_t &native_extensions,
bool &mayiuse_systolic, bool &mayiuse_ngen_kernels) {
using namespace ngen;
HW hw = HW::Unknown;
Product product = {ProductFamily::Unknown, 0};
jit::generator_t<HW::Unknown>::detectHWInfo(context, device, hw, product);
Product product = ngen::OpenCLCodeGenerator<HW::Unknown>::detectHWInfo(
context, device);
bool is_xelpg = (product.family == ngen::ProductFamily::ARL
|| product.family == ngen::ProductFamily::MTL);

gpu_arch = jit::convert_ngen_arch_to_dnnl(hw);
gpu_arch = jit::convert_ngen_arch_to_dnnl(ngen::getCore(product.family));
gpu_product_family = static_cast<int>(product.family);
stepping_id = product.stepping;

Expand Down
8 changes: 3 additions & 5 deletions src/gpu/intel/sycl/l0/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -386,12 +386,10 @@ status_t init_gpu_hw_info(impl::engine_t *engine, ze_device_handle_t device,
int &stepping_id, uint64_t &native_extensions, bool &mayiuse_systolic,
bool &mayiuse_ngen_kernels) {
using namespace ngen;
HW hw = HW::Unknown;
Product product = {ProductFamily::Unknown, 0};
LevelZeroCodeGenerator<HW::Unknown>::detectHWInfo(
context, device, hw, product);
Product product = LevelZeroCodeGenerator<HW::Unknown>::detectHWInfo(
context, device);

gpu_arch = jit::convert_ngen_arch_to_dnnl(hw);
gpu_arch = jit::convert_ngen_arch_to_dnnl(ngen::getCore(product.family));
gpu_product_family = static_cast<int>(product.family);
stepping_id = product.stepping;

Expand Down
18 changes: 10 additions & 8 deletions third_party/ngen/ngen.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@
#ifndef NGEN_HPP
#define NGEN_HPP

#ifdef ENABLE_LLVM_WCONVERSION
#if defined(__clang__)
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wimplicit-int-conversion"
#endif

#include "ngen_config.hpp"
#include "ngen_config_internal.hpp"

#include <array>
#include <cstring>
Expand All @@ -42,16 +42,13 @@
#include "ngen_core.hpp"
#include "ngen_auto_swsb.hpp"
#include "ngen_debuginfo.hpp"

// -----------------------------------------------------------------------
// Binary formats, split between pre-Gen12 and post-Gen12.
#include "ngen_gen8.hpp"
#include "ngen_gen12.hpp"
// -----------------------------------------------------------------------

#ifdef NGEN_ASM
#include "ngen_asm.hpp"
#endif

namespace NGEN_NAMESPACE {

Expand Down Expand Up @@ -299,7 +296,7 @@ class BinaryCodeGenerator
pushStream(rootStream);
}

explicit BinaryCodeGenerator(int stepping_ = 0, DebugConfig debugConfig = {}) : BinaryCodeGenerator({genericProductFamily(hw), stepping_}, debugConfig) {}
explicit BinaryCodeGenerator(int stepping_ = 0, DebugConfig debugConfig = {}) : BinaryCodeGenerator({genericProductFamily(hw), stepping_, PlatformType::Unknown}, debugConfig) {}

~BinaryCodeGenerator() {
for (size_t sn = 1; sn < streamStack.size(); sn++)
Expand Down Expand Up @@ -684,7 +681,7 @@ class BinaryCodeGenerator
void halt(const InstructionModifier &mod, Label &jip, SourceLocation loc = {}) {
halt(mod, jip, jip, loc);
}
void if_(InstructionModifier mod, Label &jip, Label &uip, bool branchCtrl = false, SourceLocation loc = {}) {
void if_(InstructionModifier mod, Label &jip, Label &uip, bool branchCtrl, SourceLocation loc = {}) {
mod.setBranchCtrl(branchCtrl);
opBranch(Opcode::if_, mod, null, jip, uip, loc);
}
Expand Down Expand Up @@ -1506,10 +1503,15 @@ int getStepping() const { return scope::getStepping(); } \
void setProduct(NGEN_NAMESPACE::Product product_) { scope::setProduct(product_); } \
void setProductFamily(NGEN_NAMESPACE::ProductFamily family_) { scope::setProductFamily(family_); } \
void setStepping(int stepping_) { scope::setStepping(stepping_); } \
NGEN_FORWARD_SCOPE_EXTRA(scope) \
NGEN_FORWARD_SCOPE_OP_NAMES(scope) \
NGEN_FORWARD_SCOPE_MIN_MAX(scope) \
NGEN_FORWARD_SCOPE_REGISTERS(scope)

#define NGEN_FORWARD_SCOPE_EXTRA(scope)
#define NGEN_FORWARD_SCOPE_EXTRA_ELF_OVERRIDES(hw)


#ifdef NGEN_NO_OP_NAMES
#define NGEN_FORWARD_SCOPE_OP_NAMES(scope)
#else
Expand Down Expand Up @@ -2798,7 +2800,7 @@ void BinaryCodeGenerator<hw>::opNop(Opcode op, SourceLocation loc)

} /* namespace NGEN_NAMESPACE */

#ifdef ENABLE_LLVM_WCONVERSION
#if defined(__clang__)
#pragma clang diagnostic pop
#endif

Expand Down
46 changes: 37 additions & 9 deletions third_party/ngen/ngen_asm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,18 @@

#ifndef NGEN_ASM_HPP
#define NGEN_ASM_HPP
#ifdef NGEN_ASM

#include "ngen_config.hpp"
#include "ngen_config_internal.hpp"

#ifdef NGEN_ASM

#include <array>
#include <cstdint>
#include <sstream>
#include <string>

#include "ngen_core.hpp"
#include "ngen_debuginfo.hpp"
#include "ngen_gen12.hpp"

namespace NGEN_NAMESPACE {
Expand Down Expand Up @@ -210,6 +213,7 @@ struct AsmInstruction {
explicit AsmInstruction(uint32_t inum_, const std::string &comment_)
: op(Opcode::illegal), ext(0), inum(inum_), mod{}, dst{}, src{}, labelManager{nullptr}, comment{comment_} {}
inline AsmInstruction(const autoswsb::SyncInsertion &si);
inline AsmInstruction(const autoswsb::DummyMovInsertion &mi);

bool isLabel() const { return (op == Opcode::illegal) && (dst.type == AsmOperand::Type::label); }
bool isComment() const { return (op == Opcode::illegal) && !comment.empty(); }
Expand Down Expand Up @@ -278,6 +282,21 @@ AsmInstruction::AsmInstruction(const autoswsb::SyncInsertion &si)
src[0] = NullRegister();
}

AsmInstruction::AsmInstruction(const autoswsb::DummyMovInsertion &mi)
{
op = Opcode::mov_gen12;
ext = 0;
mod = 1 | InstructionModifier::createMaskCtrl(true);
mod.setSWSB(mi.swsb);
dst = NullRegister().retype(mi.dt);
for (auto n = 1; n < 4; n++)
src[n] = NoOperand();
if (mi.constant) {
src[0] = Immediate::zero(mi.dt);
} else
src[0] = GRF(mi.grf).sub(0, mi.dt);
}

unsigned AsmInstruction::getTypecode(const AsmOperand &op)
{
DataType dt = DataType::invalid;
Expand Down Expand Up @@ -416,7 +435,7 @@ class AsmCodeGenerator {
streamStack.push_back(new InstructionStream());
}

explicit AsmCodeGenerator(HW hardware_, int stepping_ = 0) : AsmCodeGenerator({genericProductFamily(hardware_), 0}) {}
explicit AsmCodeGenerator(HW hardware_, int stepping_ = 0) : AsmCodeGenerator({genericProductFamily(hardware_), 0, PlatformType::Unknown}) {}

AsmCodeGenerator(HW hardware_, std::ostream &defaultOutput_, int stepping_ = 0) : AsmCodeGenerator(hardware_, stepping_) {
defaultOutput = &defaultOutput_;
Expand Down Expand Up @@ -488,7 +507,6 @@ class AsmCodeGenerator {
LabelManager labelManager;
std::vector<InstructionStream*> streamStack;


inline void unsupported();

// Output functions.
Expand Down Expand Up @@ -555,7 +573,7 @@ class AsmCodeGenerator {
src0.fixup(hardware, 1, 0, defaultType, 0, 3);
src1.fixup(hardware, 1, 0, defaultType, 1, 3);
src2.fixup(hardware, 1, 0, defaultType, 2, 3);
(void) streamStack.back()->append(op, static_cast<uint16_t>((sdepth << 8) | rcount), mod | defaultModifier, &labelManager, dst, src0, src1, src2);
(void) streamStack.back()->append(op, (sdepth << 8) | rcount, mod | defaultModifier, &labelManager, dst, src0, src1, src2);
}
template <typename D, typename S0> void opCall(Opcode op, const InstructionModifier &mod, D dst, S0 src0) {
(void) streamStack.back()->append(op, 0, mod | defaultModifier | NoMask, &labelManager, dst, src0);
Expand All @@ -582,7 +600,6 @@ class AsmCodeGenerator {
bool getDefaultNoMask() const { return defaultModifier.isWrEn(); }
bool getDefaultAutoSWSB() const { return defaultModifier.isAutoSWSB(); }


// Stream handling.
void pushStream() { pushStream(new InstructionStream()); }
void pushStream(InstructionStream &s) { pushStream(&s); }
Expand Down Expand Up @@ -917,7 +934,7 @@ class AsmCodeGenerator {
void halt(const InstructionModifier &mod, Label &jip, SourceLocation loc = {}) {
halt(mod, jip, jip);
}
void if_(InstructionModifier mod, Label &jip, Label &uip, bool branchCtrl = false, SourceLocation loc = {}) {
void if_(InstructionModifier mod, Label &jip, Label &uip, bool branchCtrl, SourceLocation loc = {}) {
(void) jip.getID(labelManager);
(void) uip.getID(labelManager);
opX(Opcode::if_, DataType::invalid, mod, NoOperand(), jip, uip, NoOperand(), branchCtrl);
Expand Down Expand Up @@ -1073,6 +1090,9 @@ class AsmCodeGenerator {
}
template <typename DT = void>
void movi(const InstructionModifier &mod, const RegData &dst, const RegData &src0, SourceLocation loc = {}) {
#ifdef NGEN_SAFE
if (!src0.isIndirect()) throw invalid_address_mode_exception();
#endif
if (hardware >= HW::Gen10)
movi<DT>(mod, dst, src0, null);
else
Expand All @@ -1082,6 +1102,7 @@ class AsmCodeGenerator {
void movi(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1, SourceLocation loc = {}) {
#ifdef NGEN_SAFE
if (hardware < HW::Gen10) throw unsupported_instruction();
if (!src0.isIndirect()) throw invalid_address_mode_exception();
#endif
opX(isGen12 ? Opcode::movi_gen12 : Opcode::movi, getDataType<DT>(), mod, dst, src0, src1);
}
Expand Down Expand Up @@ -1585,17 +1606,24 @@ void AsmCodeGenerator::getCode(std::ostream &out)

autoswsb::BasicBlockList analysis = autoswsb::autoSWSB(hardware, declaredGRFs, streamStack.back()->buffer);
std::multimap<int32_t, autoswsb::SyncInsertion*> syncs; // Syncs inserted by auto-SWSB.
std::multimap<int32_t, autoswsb::DummyMovInsertion*> movs; // Dummy moves inserted by auto-SWSB.

for (auto &bb : analysis)
for (auto &sync : bb.syncs)
for (auto &bb : analysis) {
for (auto &sync: bb.syncs)
syncs.insert(std::make_pair(sync.inum, &sync));
for (auto &mov: bb.movs)
movs.insert(std::make_pair(mov.inum, &mov));
}

auto nextSync = syncs.begin();
auto nextMov = movs.begin();
int lineNo = 0;

for (auto &i : streamStack.back()->buffer) {
while ((nextSync != syncs.end()) && (nextSync->second->inum == i.inum))
outX(out, *(nextSync++)->second, lineNo++);
while ((nextMov != movs.end()) && (nextMov->second->inum == i.inum))
outX(out, *(nextMov++)->second, lineNo++);

if (i.isLabel()) {
i.dst.label.outputText(out, PrintDetail::full, labelManager);
Expand Down
11 changes: 2 additions & 9 deletions third_party/ngen/ngen_auto_swsb.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,6 @@
#ifndef NGEN_AUTO_SWSB_HPP
#define NGEN_AUTO_SWSB_HPP

#ifdef ENABLE_LLVM_WCONVERSION
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wimplicit-int-conversion"
#endif

#if defined(NGEN_DEBUG) || defined(NGEN_DEBUG_PROPAGATE) || defined(NGEN_DEBUG_BB)
#include <iomanip>
#include <iostream>
Expand All @@ -35,6 +30,8 @@
#include <list>
#include <map>

#include "ngen_core.hpp"

namespace NGEN_NAMESPACE {
namespace autoswsb {

Expand Down Expand Up @@ -2630,8 +2627,4 @@ inline BasicBlockList autoSWSB(HW hw, int grfCount, Program &program)
// Instruction operator[](int inum);
// size_t size() const;

#ifdef ENABLE_LLVM_WCONVERSION
#pragma clang diagnostic pop
#endif

#endif /* NGEN_AUTOSWSB_HPP */
44 changes: 44 additions & 0 deletions third_party/ngen/ngen_config_internal.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*******************************************************************************
* Copyright 2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/

#ifndef NGEN_CONFIG_INTERNAL_HPP
#define NGEN_CONFIG_INTERNAL_HPP

// Drop NGEN_CONFIG define once C++11/14 support dropped
#if (defined(__has_include) && __has_include("ngen_config.hpp")) || defined(NGEN_CONFIG)
#include "ngen_config.hpp"
#else
// Default config settings

#ifndef NGEN_NAMESPACE
#define NGEN_NAMESPACE ngen
#endif

#ifndef NGEN_ASM
#define NGEN_ASM
#endif

#if (__cplusplus >= 202002L || _MSVC_LANG >= 202002L)
#if __has_include(<version>)
#include <version>
#if __cpp_lib_source_location >= 201907L
#define NGEN_ENABLE_SOURCE_LOCATION true
#endif
#endif
#endif

#endif
#endif /* header guard */
Loading
Loading