Skip to content

Commit db382e0

Browse files
committed
gpu: generic: sycl: lnorm and bnorm Intel GPU precision issues
1 parent cfe12d8 commit db382e0

File tree

2 files changed

+8
-0
lines changed

2 files changed

+8
-0
lines changed

src/gpu/generic/sycl/ref_batch_normalization.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,13 @@ status_t ref_batch_normalization_fwd_t::init(impl::engine_t *engine) {
8484
= ::sycl::get_kernel_id<batch_normalization_fwd_kernel_vec_t>();
8585
CHECK(create_kernel(engine, kid, &kernel_));
8686
} else {
87+
// Enabling the IEEE div compliant implementation
88+
setenv("SYCL_PROGRAM_COMPILE_OPTIONS",
89+
"-cl-fp32-correctly-rounded-divide-sqrt", 1);
8790
const auto kid = ::sycl::get_kernel_id<
8891
batch_normalization_fwd_kernel_vec_t1>();
8992
CHECK(create_kernel(engine, kid, &kernel_));
93+
unsetenv("SYCL_PROGRAM_COMPILE_OPTIONS");
9094
}
9195
return status::success;
9296
}

src/gpu/generic/sycl/ref_layer_normalizations.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,13 @@ status_t ref_layer_normalization_fwd_t::init(impl::engine_t *engine) {
8282
= ::sycl::get_kernel_id<layer_normalization_fwd_kernel_vec_t>();
8383
CHECK(create_kernel(engine, kid, &kernel_));
8484
} else {
85+
// Enabling the IEEE div compliant implementation
86+
setenv("SYCL_PROGRAM_COMPILE_OPTIONS",
87+
"-cl-fp32-correctly-rounded-divide-sqrt", 1);
8588
const auto kid = ::sycl::get_kernel_id<
8689
layer_normalization_fwd_kernel_vec1_t>();
8790
CHECK(create_kernel(engine, kid, &kernel_));
91+
unsetenv("SYCL_PROGRAM_COMPILE_OPTIONS");
8892
}
8993
return status::success;
9094
}

0 commit comments

Comments
 (0)