Skip to content

Commit 75290af

Browse files
committed
gpu: nvidia: Added missing bf16 threshold for softmax
1 parent a760af4 commit 75290af

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

tests/benchdnn/softmax/softmax.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ void setup_cmp(compare::compare_t &cmp, const prb_t *prb, data_kind_t kind,
232232
const float trh_coeff_bwd = (prb->dir & FLAG_FWD) ? 1.f : 4.f;
233233
const float trh_f32 = trh_coeff_log * trh_coeff_bwd * trh_coeff_f32
234234
* epsilon_dt(trh_dt);
235-
#if DNNL_AARCH64 || defined(DNNL_SYCL_HIP)
235+
#if DNNL_AARCH64 || defined(DNNL_SYCL_HIP) || defined(DNNL_SYCL_CUDA)
236236
// MIOpen and ACL softmax accumulate in F16, but oneDNN now expects accumulation in
237237
// F32, this partially reverts 6727bbe8. For more information on ACL softmax, see
238238
// https://github.com/oneapi-src/oneDNN/issues/1819
@@ -243,7 +243,7 @@ void setup_cmp(compare::compare_t &cmp, const prb_t *prb, data_kind_t kind,
243243
const bool is_strict_acc
244244
= prb->attr.acc_mode == dnnl_accumulation_mode_strict;
245245
// Relaxed fp16 computation can get an ulp difference with f32 ref values.
246-
const float trh = is_flt_or_dbl || (trh_dt == dnnl_f16 && !is_strict_acc)
246+
const float trh = is_flt_or_dbl || ((trh_dt == dnnl_f16) && !is_strict_acc)
247247
? trh_f32
248248
: 0.f;
249249
#endif

0 commit comments

Comments
 (0)