|
1 | 1 | /*******************************************************************************
|
2 |
| -* Copyright 2020-2024 Intel Corporation |
| 2 | +* Copyright 2020-2025 Intel Corporation |
3 | 3 | *
|
4 | 4 | * Licensed under the Apache License, Version 2.0 (the "License");
|
5 | 5 | * you may not use this file except in compliance with the License.
|
|
17 | 17 | #include "gpu/intel/sycl/l0/utils.hpp"
|
18 | 18 | #include "oneapi/dnnl/dnnl_config.h"
|
19 | 19 |
|
| 20 | +#include "gpu/intel/jit/binary_format.hpp" |
| 21 | +#include "gpu/intel/jit/ngen/ngen_level_zero.hpp" |
| 22 | +#include "gpu/intel/jit/utils/ngen_type_bridge.hpp" |
| 23 | + |
20 | 24 | #if defined(__linux__)
|
21 | 25 | #include <dlfcn.h>
|
22 | 26 | #elif defined(_WIN32)
|
|
26 | 30 | #endif
|
27 | 31 |
|
28 | 32 | #include "gpu/intel/sycl/l0/level_zero/ze_api.h"
|
| 33 | +#include "gpu/intel/sycl/l0/level_zero/ze_intel_gpu.h" |
29 | 34 |
|
30 | 35 | #if !defined(__SYCL_COMPILER_VERSION)
|
31 | 36 | #error "Unsupported compiler"
|
@@ -173,6 +178,21 @@ status_t func_zeDeviceGetProperties(
|
173 | 178 | return status::success;
|
174 | 179 | }
|
175 | 180 |
|
| 181 | +status_t func_zeDeviceGetModuleProperties(ze_device_handle_t hDevice, |
| 182 | + ze_device_module_properties_t *pDeviceProperties) { |
| 183 | + static auto f = find_ze_symbol<decltype(&zeDeviceGetModuleProperties)>( |
| 184 | + "zeDeviceGetModuleProperties"); |
| 185 | + |
| 186 | + if (!f) { |
| 187 | + VERROR(common, level_zero, |
| 188 | + "failed to find systolic query extension (maybe update the " |
| 189 | + "driver?)"); |
| 190 | + return status::runtime_error; |
| 191 | + } |
| 192 | + ZE_CHECK(f(hDevice, pDeviceProperties)); |
| 193 | + return status::success; |
| 194 | +} |
| 195 | + |
176 | 196 | } // namespace
|
177 | 197 |
|
178 | 198 | // This function is called from compatibility layer that ensures compatibility
|
@@ -272,6 +292,115 @@ bool compare_ze_devices(const ::sycl::device &lhs, const ::sycl::device &rhs) {
|
272 | 292 | return lhs_ze_handle == rhs_ze_handle;
|
273 | 293 | }
|
274 | 294 |
|
| 295 | +status_t get_device_ip(ze_device_handle_t device, uint32_t &ip_version) { |
| 296 | + ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; |
| 297 | + ze_device_ip_version_ext_t devicePropsIP |
| 298 | + = {ZE_STRUCTURE_TYPE_DEVICE_IP_VERSION_EXT}; |
| 299 | + deviceProps.pNext = &devicePropsIP; |
| 300 | + CHECK(func_zeDeviceGetProperties(device, &deviceProps)); |
| 301 | + ip_version = devicePropsIP.ipVersion; |
| 302 | + return status::success; |
| 303 | +} |
| 304 | + |
| 305 | +status_t get_l0_device_enabled_systolic_intel( |
| 306 | + ze_device_handle_t device, bool &mayiuse_systolic) { |
| 307 | + ze_device_module_properties_t deviceModProps |
| 308 | + = {ZE_STRUCTURE_TYPE_DEVICE_MODULE_PROPERTIES}; |
| 309 | + // Note: supported by Intel Driver 24.05 and onwards |
| 310 | + ze_intel_device_module_dp_exp_properties_t deviceModPropsExt |
| 311 | + = {ZE_STRUCTURE_INTEL_DEVICE_MODULE_DP_EXP_PROPERTIES}; |
| 312 | + deviceModProps.pNext = &deviceModPropsExt; |
| 313 | + |
| 314 | + CHECK(func_zeDeviceGetModuleProperties(device, &deviceModProps)); |
| 315 | + mayiuse_systolic |
| 316 | + = deviceModPropsExt.flags & ZE_INTEL_DEVICE_MODULE_EXP_FLAG_DPAS; |
| 317 | + return status::success; |
| 318 | +} |
| 319 | + |
| 320 | +status_t get_l0_device_enabled_native_float_atomics( |
| 321 | + ze_device_handle_t device, uint64_t native_extensions) { |
| 322 | + using namespace gpu::intel::compute; |
| 323 | + |
| 324 | + ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; |
| 325 | + ze_float_atomic_ext_properties_t fltAtom |
| 326 | + = {ZE_STRUCTURE_TYPE_FLOAT_ATOMIC_EXT_PROPERTIES}; |
| 327 | + deviceProps.pNext = &fltAtom; |
| 328 | + CHECK(func_zeDeviceGetProperties(device, &deviceProps)); |
| 329 | + |
| 330 | + ze_device_fp_atomic_ext_flags_t atomic_load_store |
| 331 | + = ZE_DEVICE_FP_ATOMIC_EXT_FLAG_GLOBAL_LOAD_STORE |
| 332 | + | ZE_DEVICE_FP_ATOMIC_EXT_FLAG_LOCAL_LOAD_STORE; |
| 333 | + ze_device_fp_atomic_ext_flags_t atomic_add |
| 334 | + = ZE_DEVICE_FP_ATOMIC_EXT_FLAG_GLOBAL_ADD |
| 335 | + | ZE_DEVICE_FP_ATOMIC_EXT_FLAG_LOCAL_ADD; |
| 336 | + ze_device_fp_atomic_ext_flags_t atomic_min_max |
| 337 | + = ZE_DEVICE_FP_ATOMIC_EXT_FLAG_GLOBAL_MIN_MAX |
| 338 | + | ZE_DEVICE_FP_ATOMIC_EXT_FLAG_LOCAL_MIN_MAX; |
| 339 | + |
| 340 | + if ((fltAtom.fp16Flags & atomic_load_store) == atomic_load_store) |
| 341 | + native_extensions |= (uint64_t)native_ext_t::fp16_atomic_load_store; |
| 342 | + if ((fltAtom.fp16Flags & atomic_add) == atomic_add) |
| 343 | + native_extensions |= (uint64_t)native_ext_t::fp16_atomic_add; |
| 344 | + if ((fltAtom.fp16Flags & atomic_add) == atomic_min_max) |
| 345 | + native_extensions |= (uint64_t)native_ext_t::fp16_atomic_min_max; |
| 346 | + |
| 347 | + if ((fltAtom.fp32Flags & atomic_load_store) == atomic_load_store) |
| 348 | + native_extensions |= (uint64_t)native_ext_t::fp32_atomic_load_store; |
| 349 | + if ((fltAtom.fp32Flags & atomic_add) == atomic_add) |
| 350 | + native_extensions |= (uint64_t)native_ext_t::fp32_atomic_add; |
| 351 | + if ((fltAtom.fp32Flags & atomic_add) == atomic_min_max) |
| 352 | + native_extensions |= (uint64_t)native_ext_t::fp32_atomic_min_max; |
| 353 | + |
| 354 | + if ((fltAtom.fp64Flags & atomic_load_store) == atomic_load_store) |
| 355 | + native_extensions |= (uint64_t)native_ext_t::fp64_atomic_load_store; |
| 356 | + if ((fltAtom.fp64Flags & atomic_add) == atomic_add) |
| 357 | + native_extensions |= (uint64_t)native_ext_t::fp64_atomic_add; |
| 358 | + if ((fltAtom.fp64Flags & atomic_add) == atomic_min_max) |
| 359 | + native_extensions |= (uint64_t)native_ext_t::fp64_atomic_min_max; |
| 360 | + |
| 361 | + return status::success; |
| 362 | +} |
| 363 | + |
| 364 | +status_t get_l0_device_eu_count(ze_device_handle_t device, int &eu_count) { |
| 365 | + ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; |
| 366 | + ze_eu_count_ext_t eucnt = ze_eu_count_ext_t(); |
| 367 | + deviceProps.pNext = &eucnt; |
| 368 | + |
| 369 | + CHECK(func_zeDeviceGetProperties(device, &deviceProps)); |
| 370 | + eu_count = eucnt.numTotalEUs; |
| 371 | + return status::success; |
| 372 | +} |
| 373 | + |
| 374 | +void init_gpu_hw_info(impl::engine_t *engine, ze_device_handle_t device, |
| 375 | + ze_context_handle_t context, uint32_t &ip_version, |
| 376 | + compute::gpu_arch_t &gpu_arch, int &gpu_product_family, |
| 377 | + int &stepping_id, uint64_t &native_extensions, bool &mayiuse_systolic, |
| 378 | + bool &mayiuse_ngen_kernels) { |
| 379 | + using namespace ngen; |
| 380 | + HW hw = HW::Unknown; |
| 381 | + Product product = {ProductFamily::Unknown, 0}; |
| 382 | + LevelZeroCodeGenerator<HW::Unknown>::detectHWInfo( |
| 383 | + context, device, hw, product); |
| 384 | + |
| 385 | + gpu_arch = jit::convert_ngen_arch_to_dnnl(hw); |
| 386 | + gpu_product_family = static_cast<int>(product.family); |
| 387 | + stepping_id = product.stepping; |
| 388 | + |
| 389 | + mayiuse_systolic = false; |
| 390 | + status_t ret |
| 391 | + = get_l0_device_enabled_systolic_intel(device, mayiuse_systolic); |
| 392 | + // TODO: xelpg has no f64 support. check that the query properly handle that |
| 393 | + ret = get_l0_device_enabled_native_float_atomics(device, native_extensions); |
| 394 | + MAYBE_UNUSED(ret); |
| 395 | + |
| 396 | + auto status |
| 397 | + = jit::gpu_supports_binary_format(&mayiuse_ngen_kernels, engine); |
| 398 | + if (status != status::success) mayiuse_ngen_kernels = false; |
| 399 | + |
| 400 | + ip_version = 0; |
| 401 | + get_device_ip(device, ip_version); |
| 402 | +} |
| 403 | + |
275 | 404 | } // namespace sycl
|
276 | 405 | } // namespace intel
|
277 | 406 | } // namespace gpu
|
|
0 commit comments