Skip to content

Commit b63debf

Browse files
committed
gpu: intel: sycl: use only l0 queries for l0 devices
1 parent 6620bfd commit b63debf

File tree

3 files changed

+148
-22
lines changed

3 files changed

+148
-22
lines changed

src/gpu/intel/sycl/device_info.cpp

+11-20
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*******************************************************************************
2-
* Copyright 2020-2024 Intel Corporation
2+
* Copyright 2020-2025 Intel Corporation
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -19,6 +19,7 @@
1919
#include "gpu/intel/sycl/compat.hpp"
2020
#include "gpu/intel/sycl/device_info.hpp"
2121
#include "gpu/intel/sycl/engine.hpp"
22+
#include "gpu/intel/sycl/l0/utils.hpp"
2223
#include "gpu/intel/sycl/utils.hpp"
2324

2425
#include "gpu/intel/ocl/ocl_gpu_hw_info.hpp"
@@ -34,6 +35,7 @@ status_t device_info_t::init_arch(impl::engine_t *engine) {
3435
auto *sycl_engine
3536
= utils::downcast<const gpu::intel::sycl::engine_t *>(engine);
3637
auto &device = sycl_engine->device();
38+
auto &ctx = sycl_engine->context();
3739

3840
// skip cpu engines
3941
if (!device.is_gpu()) return status::success;
@@ -43,34 +45,23 @@ status_t device_info_t::init_arch(impl::engine_t *engine) {
4345

4446
auto be = xpu::sycl::get_backend(device);
4547
if (be == xpu::sycl::backend_t::opencl) {
46-
cl_int err = CL_SUCCESS;
47-
4848
auto ocl_dev = xpu::sycl::compat::get_native<cl_device_id>(device);
4949
auto ocl_dev_wrapper = xpu::ocl::make_wrapper(ocl_dev);
5050

51-
auto ocl_ctx_wrapper = xpu::ocl::make_wrapper(
52-
clCreateContext(nullptr, 1, &ocl_dev, nullptr, nullptr, &err));
53-
OCL_CHECK(err);
51+
auto ocl_ctx = xpu::sycl::compat::get_native<cl_context>(ctx);
52+
auto ocl_ctx_wrapper = xpu::ocl::make_wrapper(ocl_ctx);
5453

5554
gpu::intel::ocl::init_gpu_hw_info(engine, ocl_dev_wrapper,
5655
ocl_ctx_wrapper, ip_version_, gpu_arch_, gpu_product_family_,
5756
stepping_id_, native_extensions_, mayiuse_systolic_,
5857
mayiuse_ngen_kernels_);
5958
} else if (be == xpu::sycl::backend_t::level0) {
60-
// TODO: add support for L0 binary ngen check
61-
// XXX: query from ocl_engine for now
62-
std::unique_ptr<gpu::intel::ocl::ocl_gpu_engine_t, engine_deleter_t>
63-
ocl_engine;
64-
CHECK(gpu::intel::sycl::create_ocl_engine(&ocl_engine, sycl_engine));
65-
66-
auto *dev_info = ocl_engine->device_info();
67-
ip_version_ = dev_info->ip_version();
68-
gpu_arch_ = dev_info->gpu_arch();
69-
gpu_product_family_ = dev_info->gpu_product_family();
70-
stepping_id_ = dev_info->stepping_id();
71-
native_extensions_ = dev_info->native_extensions();
72-
mayiuse_systolic_ = dev_info->mayiuse_systolic();
73-
mayiuse_ngen_kernels_ = dev_info->mayiuse_ngen_kernels();
59+
auto ze_dev = xpu::sycl::compat::get_native<ze_device_handle_t>(device);
60+
auto ze_ctx = xpu::sycl::compat::get_native<ze_context_handle_t>(ctx);
61+
62+
gpu::intel::sycl::init_gpu_hw_info(engine, ze_dev, ze_ctx, ip_version_,
63+
gpu_arch_, gpu_product_family_, stepping_id_,
64+
native_extensions_, mayiuse_systolic_, mayiuse_ngen_kernels_);
7465
} else {
7566
assert(!"not_expected");
7667
}

src/gpu/intel/sycl/l0/utils.cpp

+130-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*******************************************************************************
2-
* Copyright 2020-2024 Intel Corporation
2+
* Copyright 2020-2025 Intel Corporation
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -17,6 +17,10 @@
1717
#include "gpu/intel/sycl/l0/utils.hpp"
1818
#include "oneapi/dnnl/dnnl_config.h"
1919

20+
#include "gpu/intel/jit/binary_format.hpp"
21+
#include "gpu/intel/jit/ngen/ngen_level_zero.hpp"
22+
#include "gpu/intel/jit/utils/ngen_type_bridge.hpp"
23+
2024
#if defined(__linux__)
2125
#include <dlfcn.h>
2226
#elif defined(_WIN32)
@@ -26,6 +30,7 @@
2630
#endif
2731

2832
#include "gpu/intel/sycl/l0/level_zero/ze_api.h"
33+
#include "gpu/intel/sycl/l0/level_zero/ze_intel_gpu.h"
2934

3035
#if !defined(__SYCL_COMPILER_VERSION)
3136
#error "Unsupported compiler"
@@ -173,6 +178,21 @@ status_t func_zeDeviceGetProperties(
173178
return status::success;
174179
}
175180

181+
status_t func_zeDeviceGetModuleProperties(ze_device_handle_t hDevice,
182+
ze_device_module_properties_t *pDeviceProperties) {
183+
static auto f = find_ze_symbol<decltype(&zeDeviceGetModuleProperties)>(
184+
"zeDeviceGetModuleProperties");
185+
186+
if (!f) {
187+
VERROR(common, level_zero,
188+
"failed to find systolic query extension (maybe update the "
189+
"driver?)");
190+
return status::runtime_error;
191+
}
192+
ZE_CHECK(f(hDevice, pDeviceProperties));
193+
return status::success;
194+
}
195+
176196
} // namespace
177197

178198
// This function is called from compatibility layer that ensures compatibility
@@ -272,6 +292,115 @@ bool compare_ze_devices(const ::sycl::device &lhs, const ::sycl::device &rhs) {
272292
return lhs_ze_handle == rhs_ze_handle;
273293
}
274294

295+
status_t get_device_ip(ze_device_handle_t device, uint32_t &ip_version) {
296+
ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
297+
ze_device_ip_version_ext_t devicePropsIP
298+
= {ZE_STRUCTURE_TYPE_DEVICE_IP_VERSION_EXT};
299+
deviceProps.pNext = &devicePropsIP;
300+
CHECK(func_zeDeviceGetProperties(device, &deviceProps));
301+
ip_version = devicePropsIP.ipVersion;
302+
return status::success;
303+
}
304+
305+
status_t get_l0_device_enabled_systolic_intel(
306+
ze_device_handle_t device, bool &mayiuse_systolic) {
307+
ze_device_module_properties_t deviceModProps
308+
= {ZE_STRUCTURE_TYPE_DEVICE_MODULE_PROPERTIES};
309+
// Note: supported by Intel Driver 24.05 and onwards
310+
ze_intel_device_module_dp_exp_properties_t deviceModPropsExt
311+
= {ZE_STRUCTURE_INTEL_DEVICE_MODULE_DP_EXP_PROPERTIES};
312+
deviceModProps.pNext = &deviceModPropsExt;
313+
314+
CHECK(func_zeDeviceGetModuleProperties(device, &deviceModProps));
315+
mayiuse_systolic
316+
= deviceModPropsExt.flags & ZE_INTEL_DEVICE_MODULE_EXP_FLAG_DPAS;
317+
return status::success;
318+
}
319+
320+
status_t get_l0_device_enabled_native_float_atomics(
321+
ze_device_handle_t device, uint64_t native_extensions) {
322+
using namespace gpu::intel::compute;
323+
324+
ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
325+
ze_float_atomic_ext_properties_t fltAtom
326+
= {ZE_STRUCTURE_TYPE_FLOAT_ATOMIC_EXT_PROPERTIES};
327+
deviceProps.pNext = &fltAtom;
328+
CHECK(func_zeDeviceGetProperties(device, &deviceProps));
329+
330+
ze_device_fp_atomic_ext_flags_t atomic_load_store
331+
= ZE_DEVICE_FP_ATOMIC_EXT_FLAG_GLOBAL_LOAD_STORE
332+
| ZE_DEVICE_FP_ATOMIC_EXT_FLAG_LOCAL_LOAD_STORE;
333+
ze_device_fp_atomic_ext_flags_t atomic_add
334+
= ZE_DEVICE_FP_ATOMIC_EXT_FLAG_GLOBAL_ADD
335+
| ZE_DEVICE_FP_ATOMIC_EXT_FLAG_LOCAL_ADD;
336+
ze_device_fp_atomic_ext_flags_t atomic_min_max
337+
= ZE_DEVICE_FP_ATOMIC_EXT_FLAG_GLOBAL_MIN_MAX
338+
| ZE_DEVICE_FP_ATOMIC_EXT_FLAG_LOCAL_MIN_MAX;
339+
340+
if ((fltAtom.fp16Flags & atomic_load_store) == atomic_load_store)
341+
native_extensions |= (uint64_t)native_ext_t::fp16_atomic_load_store;
342+
if ((fltAtom.fp16Flags & atomic_add) == atomic_add)
343+
native_extensions |= (uint64_t)native_ext_t::fp16_atomic_add;
344+
if ((fltAtom.fp16Flags & atomic_add) == atomic_min_max)
345+
native_extensions |= (uint64_t)native_ext_t::fp16_atomic_min_max;
346+
347+
if ((fltAtom.fp32Flags & atomic_load_store) == atomic_load_store)
348+
native_extensions |= (uint64_t)native_ext_t::fp32_atomic_load_store;
349+
if ((fltAtom.fp32Flags & atomic_add) == atomic_add)
350+
native_extensions |= (uint64_t)native_ext_t::fp32_atomic_add;
351+
if ((fltAtom.fp32Flags & atomic_add) == atomic_min_max)
352+
native_extensions |= (uint64_t)native_ext_t::fp32_atomic_min_max;
353+
354+
if ((fltAtom.fp64Flags & atomic_load_store) == atomic_load_store)
355+
native_extensions |= (uint64_t)native_ext_t::fp64_atomic_load_store;
356+
if ((fltAtom.fp64Flags & atomic_add) == atomic_add)
357+
native_extensions |= (uint64_t)native_ext_t::fp64_atomic_add;
358+
if ((fltAtom.fp64Flags & atomic_add) == atomic_min_max)
359+
native_extensions |= (uint64_t)native_ext_t::fp64_atomic_min_max;
360+
361+
return status::success;
362+
}
363+
364+
status_t get_l0_device_eu_count(ze_device_handle_t device, int &eu_count) {
365+
ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
366+
ze_eu_count_ext_t eucnt = ze_eu_count_ext_t();
367+
deviceProps.pNext = &eucnt;
368+
369+
CHECK(func_zeDeviceGetProperties(device, &deviceProps));
370+
eu_count = eucnt.numTotalEUs;
371+
return status::success;
372+
}
373+
374+
void init_gpu_hw_info(impl::engine_t *engine, ze_device_handle_t device,
375+
ze_context_handle_t context, uint32_t &ip_version,
376+
compute::gpu_arch_t &gpu_arch, int &gpu_product_family,
377+
int &stepping_id, uint64_t &native_extensions, bool &mayiuse_systolic,
378+
bool &mayiuse_ngen_kernels) {
379+
using namespace ngen;
380+
HW hw = HW::Unknown;
381+
Product product = {ProductFamily::Unknown, 0};
382+
LevelZeroCodeGenerator<HW::Unknown>::detectHWInfo(
383+
context, device, hw, product);
384+
385+
gpu_arch = jit::convert_ngen_arch_to_dnnl(hw);
386+
gpu_product_family = static_cast<int>(product.family);
387+
stepping_id = product.stepping;
388+
389+
mayiuse_systolic = false;
390+
status_t ret
391+
= get_l0_device_enabled_systolic_intel(device, mayiuse_systolic);
392+
// TODO: xelpg has no f64 support. check that the query properly handle that
393+
ret = get_l0_device_enabled_native_float_atomics(device, native_extensions);
394+
MAYBE_UNUSED(ret);
395+
396+
auto status
397+
= jit::gpu_supports_binary_format(&mayiuse_ngen_kernels, engine);
398+
if (status != status::success) mayiuse_ngen_kernels = false;
399+
400+
ip_version = 0;
401+
get_device_ip(device, ip_version);
402+
}
403+
275404
} // namespace sycl
276405
} // namespace intel
277406
} // namespace gpu

src/gpu/intel/sycl/l0/utils.hpp

+7-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*******************************************************************************
2-
* Copyright 2020-2024 Intel Corporation
2+
* Copyright 2020-2025 Intel Corporation
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -45,6 +45,12 @@ bool compare_ze_devices(const ::sycl::device &lhs, const ::sycl::device &rhs);
4545
status_t func_zeModuleGetNativeBinary(ze_module_handle_t hModule, size_t *pSize,
4646
uint8_t *pModuleNativeBinary);
4747

48+
void init_gpu_hw_info(impl::engine_t *engine, ze_device_handle_t device,
49+
ze_context_handle_t context, uint32_t &ip_version,
50+
compute::gpu_arch_t &gpu_arch, int &gpu_product_family,
51+
int &stepping_id, uint64_t &native_extensions, bool &mayiuse_systolic,
52+
bool &mayiuse_ngen_kernels);
53+
4854
} // namespace sycl
4955
} // namespace intel
5056
} // namespace gpu

0 commit comments

Comments
 (0)