From db382e06b8ff411b3002cc6719810821e83bdfde Mon Sep 17 00:00:00 2001 From: John Osorio Date: Wed, 9 Oct 2024 14:46:23 +0100 Subject: [PATCH 1/2] gpu: generic: sycl: lnorm and bnorm Intel GPU precision issues --- src/gpu/generic/sycl/ref_batch_normalization.cpp | 4 ++++ src/gpu/generic/sycl/ref_layer_normalizations.cpp | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/gpu/generic/sycl/ref_batch_normalization.cpp b/src/gpu/generic/sycl/ref_batch_normalization.cpp index ae2b8552589..e584c6b10d8 100644 --- a/src/gpu/generic/sycl/ref_batch_normalization.cpp +++ b/src/gpu/generic/sycl/ref_batch_normalization.cpp @@ -84,9 +84,13 @@ status_t ref_batch_normalization_fwd_t::init(impl::engine_t *engine) { = ::sycl::get_kernel_id(); CHECK(create_kernel(engine, kid, &kernel_)); } else { + // Enabling the IEEE div compliant implementation + setenv("SYCL_PROGRAM_COMPILE_OPTIONS", + "-cl-fp32-correctly-rounded-divide-sqrt", 1); const auto kid = ::sycl::get_kernel_id< batch_normalization_fwd_kernel_vec_t1>(); CHECK(create_kernel(engine, kid, &kernel_)); + unsetenv("SYCL_PROGRAM_COMPILE_OPTIONS"); } return status::success; } diff --git a/src/gpu/generic/sycl/ref_layer_normalizations.cpp b/src/gpu/generic/sycl/ref_layer_normalizations.cpp index 18ed304e826..a6cbff92af6 100644 --- a/src/gpu/generic/sycl/ref_layer_normalizations.cpp +++ b/src/gpu/generic/sycl/ref_layer_normalizations.cpp @@ -82,9 +82,13 @@ status_t ref_layer_normalization_fwd_t::init(impl::engine_t *engine) { = ::sycl::get_kernel_id(); CHECK(create_kernel(engine, kid, &kernel_)); } else { + // Enabling the IEEE div compliant implementation + setenv("SYCL_PROGRAM_COMPILE_OPTIONS", + "-cl-fp32-correctly-rounded-divide-sqrt", 1); const auto kid = ::sycl::get_kernel_id< layer_normalization_fwd_kernel_vec1_t>(); CHECK(create_kernel(engine, kid, &kernel_)); + unsetenv("SYCL_PROGRAM_COMPILE_OPTIONS"); } return status::success; } From b6b135593c5d0e9b69d029fc54d8e2e6b29fe928 Mon Sep 17 00:00:00 2001 From: John Osorio Date: Mon, 30 Sep 2024 12:13:10 +0100 Subject: [PATCH 2/2] gpu: generic: sycl: lnorm: Intel GPU precision issues --- tests/benchdnn/lnorm/lnorm.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/benchdnn/lnorm/lnorm.cpp b/tests/benchdnn/lnorm/lnorm.cpp index 477fb636808..efa3fb94e4b 100644 --- a/tests/benchdnn/lnorm/lnorm.cpp +++ b/tests/benchdnn/lnorm/lnorm.cpp @@ -450,7 +450,17 @@ void setup_cmp(compare::compare_t &cmp, const prb_t *prb, data_kind_t kind, // exceeds `digits_f32`. const int safe_digits = MAX2(0, digits_dt(dnnl_f32) - digits_dt(dt)); const float trh_coeff = (1 << safe_digits); + // The following code is a workaround because a SYCL compiler bug + // that is causing precision issues. +#ifdef DNNL_WITH_SYCL + float trh = trh_coeff + * ((kind == SRC || kind == DST + || (kind == VAR && prb->dir & FLAG_FWD)) + ? 5e-7 + : 0); +#else float trh = trh_coeff * ((kind == SRC || kind == DST) ? 5e-7 : 0); +#endif if ((kind == SC || kind == SH) && prb->dir & FLAG_BWD) trh = trh_coeff * 5e-6; cmp.set_threshold(trh);