uxlfoundation · echeresh · Feb 27, 2025 · rjoursler · Feb 27, 2025 · vpirogov
@@ -0,0 +1,48 @@
+/*******************************************************************************
+* Copyright 2025 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "gpu/intel/compute/utils.hpp"
+#include "common/verbose.hpp"
+
+#include <limits>
+
+namespace dnnl {
+namespace impl {
+namespace gpu {
+namespace intel {
+namespace compute {
+
+void check_global_range(const compute::range_t &range) {
+    bool exceeds_32bit = false;
+    const size_t u32_max = std::numeric_limits<uint32_t>::max();
+    for (size_t i = 0; i < range.ndims(); i++) {
+        if (range[i] > u32_max) {
+            exceeds_32bit = true;
+            break;
+        }
+    }
+    if (exceeds_32bit) {
+        VERROR(common, runtime,
+                "global work size exceeds the 32-bit limit. Potential "
+                "correctness issues may arise due to driver limitation");
+    }
+}
+
+} // namespace compute
+} // namespace intel
+} // namespace gpu
+} // namespace impl
+} // namespace dnnl
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -155,6 +155,8 @@ class nd_range_t {
     range_t local_range_;
 };
 
+void check_global_range(const compute::range_t &range);
+
 } // namespace compute
 } // namespace intel
 } // namespace gpu

@@ -228,6 +228,7 @@ status_t kernel_t::parallel_for(impl::stream_t &stream,
     cl_uint ndims = static_cast<cl_uint>(range.ndims());
     if (range.is_zero()) { return status::success; }
 
+    check_global_range(range.global_range());
     xpu::ocl::wrapper_t<cl_event> event;
     if (ocl_stream->flags() & stream_flags::out_of_order) {
         const auto &event_wrappers = xpu::ocl::event_t::from(deps).events;

@@ -160,6 +160,7 @@ status_t sycl_interop_gpu_kernel_t::parallel_for(impl::stream_t &stream,
                 set_scalar_arg(cgh, (int)i, arg.scalar_type(), arg.value());
             }
         }
+        check_global_range(range.global_range());
         if (range.local_range()) {
             auto sycl_nd_range = gpu::intel::sycl::to_sycl_nd_range(range);
             cgh.parallel_for(sycl_nd_range, *sycl_kernel_);