Skip to content

Commit 42e2d88

Browse files
committed
gpu:intel:sycl: use only l0 queries for l0 devices
1 parent 63f72a4 commit 42e2d88

File tree

3 files changed

+140
-19
lines changed

3 files changed

+140
-19
lines changed

src/gpu/intel/sycl/device_info.cpp

+10-19
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "gpu/intel/sycl/compat.hpp"
2020
#include "gpu/intel/sycl/device_info.hpp"
2121
#include "gpu/intel/sycl/engine.hpp"
22+
#include "gpu/intel/sycl/l0/utils.hpp"
2223
#include "gpu/intel/sycl/utils.hpp"
2324

2425
#include "gpu/intel/ocl/ocl_gpu_hw_info.hpp"
@@ -34,6 +35,7 @@ status_t device_info_t::init_arch(impl::engine_t *engine) {
3435
auto *sycl_engine
3536
= utils::downcast<const gpu::intel::sycl::engine_t *>(engine);
3637
auto &device = sycl_engine->device();
38+
auto &ctx = sycl_engine->context();
3739

3840
// skip cpu engines
3941
if (!device.is_gpu()) return status::success;
@@ -43,34 +45,23 @@ status_t device_info_t::init_arch(impl::engine_t *engine) {
4345

4446
auto be = xpu::sycl::get_backend(device);
4547
if (be == xpu::sycl::backend_t::opencl) {
46-
cl_int err = CL_SUCCESS;
47-
4848
auto ocl_dev = xpu::sycl::compat::get_native<cl_device_id>(device);
4949
auto ocl_dev_wrapper = xpu::ocl::make_wrapper(ocl_dev);
5050

51-
auto ocl_ctx_wrapper = xpu::ocl::make_wrapper(
52-
clCreateContext(nullptr, 1, &ocl_dev, nullptr, nullptr, &err));
53-
OCL_CHECK(err);
51+
auto ocl_ctx = xpu::sycl::compat::get_native<cl_context>(ctx);
52+
auto ocl_ctx_wrapper = xpu::ocl::make_wrapper(ocl_ctx);
5453

5554
gpu::intel::ocl::init_gpu_hw_info(engine, ocl_dev_wrapper,
5655
ocl_ctx_wrapper, ip_version_, gpu_arch_, gpu_product_family_,
5756
stepping_id_, native_extensions_, mayiuse_systolic_,
5857
mayiuse_ngen_kernels_);
5958
} else if (be == xpu::sycl::backend_t::level0) {
60-
// TODO: add support for L0 binary ngen check
61-
// XXX: query from ocl_engine for now
62-
std::unique_ptr<gpu::intel::ocl::ocl_gpu_engine_t, engine_deleter_t>
63-
ocl_engine;
64-
CHECK(gpu::intel::sycl::create_ocl_engine(&ocl_engine, sycl_engine));
65-
66-
auto *dev_info = ocl_engine->device_info();
67-
ip_version_ = dev_info->ip_version();
68-
gpu_arch_ = dev_info->gpu_arch();
69-
gpu_product_family_ = dev_info->gpu_product_family();
70-
stepping_id_ = dev_info->stepping_id();
71-
native_extensions_ = dev_info->native_extensions();
72-
mayiuse_systolic_ = dev_info->mayiuse_systolic();
73-
mayiuse_ngen_kernels_ = dev_info->mayiuse_ngen_kernels();
59+
auto ze_dev = xpu::sycl::compat::get_native<ze_device_handle_t>(device);
60+
auto ze_ctx = xpu::sycl::compat::get_native<ze_context_handle_t>(ctx);
61+
62+
gpu::intel::sycl::init_gpu_hw_info(engine, ze_dev, ze_ctx, ip_version_,
63+
gpu_arch_, gpu_product_family_, stepping_id_,
64+
native_extensions_, mayiuse_systolic_, mayiuse_ngen_kernels_);
7465
} else {
7566
assert(!"not_expected");
7667
}

src/gpu/intel/sycl/l0/utils.cpp

+124
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@
1717
#include "gpu/intel/sycl/l0/utils.hpp"
1818
#include "oneapi/dnnl/dnnl_config.h"
1919

20+
#include "gpu/intel/jit/binary_format.hpp"
21+
#include "gpu/intel/jit/ngen/ngen_level_zero.hpp"
22+
#include "gpu/intel/jit/utils/ngen_type_bridge.hpp"
23+
2024
#if defined(__linux__)
2125
#include <dlfcn.h>
2226
#elif defined(_WIN32)
@@ -26,6 +30,7 @@
2630
#endif
2731

2832
#include "gpu/intel/sycl/l0/level_zero/ze_api.h"
33+
#include "gpu/intel/sycl/l0/ze_intel_gpu.h"
2934

3035
#if !defined(__SYCL_COMPILER_VERSION)
3136
#error "Unsupported compiler"
@@ -173,6 +178,16 @@ status_t func_zeDeviceGetProperties(
173178
return status::success;
174179
}
175180

181+
status_t func_zeDeviceGetModuleProperties(ze_device_handle_t hDevice,
182+
ze_device_module_properties_t *pDeviceProperties) {
183+
static auto f = find_ze_symbol<decltype(&zeDeviceGetModuleProperties)>(
184+
"zeDeviceGetModuleProperties");
185+
186+
if (!f) return status::runtime_error;
187+
ZE_CHECK(f(hDevice, pDeviceProperties));
188+
return status::success;
189+
}
190+
176191
} // namespace
177192

178193
// This function is called from compatibility layer that ensures compatibility
@@ -272,6 +287,115 @@ bool compare_ze_devices(const ::sycl::device &lhs, const ::sycl::device &rhs) {
272287
return lhs_ze_handle == rhs_ze_handle;
273288
}
274289

290+
status_t get_device_ip(ze_device_handle_t device, uint32_t &ip_version) {
291+
ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
292+
ze_device_ip_version_ext_t devicePropsIP
293+
= {ZE_STRUCTURE_TYPE_DEVICE_IP_VERSION_EXT};
294+
deviceProps.pNext = &devicePropsIP;
295+
CHECK(func_zeDeviceGetProperties(device, &deviceProps));
296+
ip_version = devicePropsIP.ipVersion;
297+
return status::success;
298+
}
299+
300+
status_t get_l0_device_enabled_systolic_intel(
301+
ze_device_handle_t device, bool &mayiuse_systolic) {
302+
ze_device_module_properties_t deviceModProps
303+
= {ZE_STRUCTURE_TYPE_DEVICE_MODULE_PROPERTIES};
304+
// Note: supported by Intel Driver 24.05 and onwards
305+
ze_intel_device_module_dp_exp_properties_t deviceModPropsExt
306+
= {ZE_STRUCTURE_INTEL_DEVICE_MODULE_DP_EXP_PROPERTIES};
307+
deviceModProps.pNext = &deviceModPropsExt;
308+
309+
CHECK(func_zeDeviceGetModuleProperties(device, &deviceModProps));
310+
mayiuse_systolic
311+
= deviceModPropsExt.flags & ZE_INTEL_DEVICE_MODULE_EXP_FLAG_DPAS;
312+
return status::success;
313+
}
314+
315+
status_t get_l0_device_enabled_native_float_atomics(
316+
ze_device_handle_t device, uint64_t native_extensions) {
317+
using namespace gpu::intel::compute;
318+
319+
ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
320+
ze_float_atomic_ext_properties_t fltAtom
321+
= {ZE_STRUCTURE_TYPE_FLOAT_ATOMIC_EXT_PROPERTIES};
322+
deviceProps.pNext = &fltAtom;
323+
CHECK(func_zeDeviceGetProperties(device, &deviceProps));
324+
325+
ze_device_fp_atomic_ext_flags_t atomic_load_store
326+
= ZE_DEVICE_FP_ATOMIC_EXT_FLAG_GLOBAL_LOAD_STORE
327+
| ZE_DEVICE_FP_ATOMIC_EXT_FLAG_LOCAL_LOAD_STORE;
328+
ze_device_fp_atomic_ext_flags_t atomic_add
329+
= ZE_DEVICE_FP_ATOMIC_EXT_FLAG_GLOBAL_ADD
330+
| ZE_DEVICE_FP_ATOMIC_EXT_FLAG_LOCAL_ADD;
331+
ze_device_fp_atomic_ext_flags_t atomic_min_max
332+
= ZE_DEVICE_FP_ATOMIC_EXT_FLAG_GLOBAL_MIN_MAX
333+
| ZE_DEVICE_FP_ATOMIC_EXT_FLAG_LOCAL_MIN_MAX;
334+
335+
if ((fltAtom.fp16Flags & atomic_load_store) == atomic_load_store)
336+
native_extensions |= (uint64_t)native_ext_t::fp16_atomic_load_store;
337+
if ((fltAtom.fp16Flags & atomic_add) == atomic_add)
338+
native_extensions |= (uint64_t)native_ext_t::fp16_atomic_add;
339+
if ((fltAtom.fp16Flags & atomic_add) == atomic_min_max)
340+
native_extensions |= (uint64_t)native_ext_t::fp16_atomic_min_max;
341+
342+
if ((fltAtom.fp32Flags & atomic_load_store) == atomic_load_store)
343+
native_extensions |= (uint64_t)native_ext_t::fp32_atomic_load_store;
344+
if ((fltAtom.fp32Flags & atomic_add) == atomic_add)
345+
native_extensions |= (uint64_t)native_ext_t::fp32_atomic_add;
346+
if ((fltAtom.fp32Flags & atomic_add) == atomic_min_max)
347+
native_extensions |= (uint64_t)native_ext_t::fp32_atomic_min_max;
348+
349+
if ((fltAtom.fp64Flags & atomic_load_store) == atomic_load_store)
350+
native_extensions |= (uint64_t)native_ext_t::fp64_atomic_load_store;
351+
if ((fltAtom.fp64Flags & atomic_add) == atomic_add)
352+
native_extensions |= (uint64_t)native_ext_t::fp64_atomic_add;
353+
if ((fltAtom.fp64Flags & atomic_add) == atomic_min_max)
354+
native_extensions |= (uint64_t)native_ext_t::fp64_atomic_min_max;
355+
356+
return status::success;
357+
}
358+
359+
status_t get_l0_device_eu_count(ze_device_handle_t device, int &eucount) {
360+
ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
361+
ze_eu_count_ext_t eucnt = ze_eu_count_ext_t();
362+
deviceProps.pNext = &eucnt;
363+
364+
CHECK(func_zeDeviceGetProperties(device, &deviceProps));
365+
eucount = eucnt.numTotalEUs;
366+
return status::success;
367+
}
368+
369+
void init_gpu_hw_info(impl::engine_t *engine, ze_device_handle_t device,
370+
ze_context_handle_t context, uint32_t &ip_version,
371+
compute::gpu_arch_t &gpu_arch, int &gpu_product_family,
372+
int &stepping_id, uint64_t &native_extensions, bool &mayiuse_systolic,
373+
bool &mayiuse_ngen_kernels) {
374+
using namespace ngen;
375+
HW hw = HW::Unknown;
376+
Product product = {ProductFamily::Unknown, 0};
377+
LevelZeroCodeGenerator<HW::Unknown>::detectHWInfo(
378+
context, device, hw, product);
379+
380+
gpu_arch = jit::convert_ngen_arch_to_dnnl(hw);
381+
gpu_product_family = static_cast<int>(product.family);
382+
stepping_id = product.stepping;
383+
384+
mayiuse_systolic = false;
385+
status_t ret
386+
= get_l0_device_enabled_systolic_intel(device, mayiuse_systolic);
387+
// TODO: xelpg has no f64 support. check that the query properly handle that
388+
ret = get_l0_device_enabled_native_float_atomics(device, native_extensions);
389+
MAYBE_UNUSED(ret);
390+
391+
auto status
392+
= jit::gpu_supports_binary_format(&mayiuse_ngen_kernels, engine);
393+
if (status != status::success) mayiuse_ngen_kernels = false;
394+
395+
ip_version = 0;
396+
get_device_ip(device, ip_version);
397+
}
398+
275399
} // namespace sycl
276400
} // namespace intel
277401
} // namespace gpu

src/gpu/intel/sycl/l0/utils.hpp

+6
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,12 @@ bool compare_ze_devices(const ::sycl::device &lhs, const ::sycl::device &rhs);
4545
status_t func_zeModuleGetNativeBinary(ze_module_handle_t hModule, size_t *pSize,
4646
uint8_t *pModuleNativeBinary);
4747

48+
void init_gpu_hw_info(impl::engine_t *engine, ze_device_handle_t device,
49+
ze_context_handle_t context, uint32_t &ip_version,
50+
compute::gpu_arch_t &gpu_arch, int &gpu_product_family,
51+
int &stepping_id, uint64_t &native_extensions, bool &mayiuse_systolic,
52+
bool &mayiuse_ngen_kernels);
53+
4854
} // namespace sycl
4955
} // namespace intel
5056
} // namespace gpu

0 commit comments

Comments
 (0)