Skip to content

Commit a7f3ef9

Browse files
committed
gpu: nvidia: Added missing bf16 threshold for softmax
1 parent a760af4 commit a7f3ef9

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

tests/benchdnn/softmax/softmax.cpp

+5-3
Original file line numberDiff line numberDiff line change
@@ -240,10 +240,12 @@ void setup_cmp(compare::compare_t &cmp, const prb_t *prb, data_kind_t kind,
240240
// minor accuracy drops observed compared to f32
241241
const float trh = trh_f32;
242242
#else
243-
const bool is_strict_acc
244-
= prb->attr.acc_mode == dnnl_accumulation_mode_strict;
243+
const bool is_strict_acc = !is_nvidia_gpu()
244+
&& prb->attr.acc_mode == dnnl_accumulation_mode_strict;
245245
// Relaxed fp16 computation can get an ulp difference with f32 ref values.
246-
const float trh = is_flt_or_dbl || (trh_dt == dnnl_f16 && !is_strict_acc)
246+
const float trh = is_flt_or_dbl
247+
|| ((trh_dt == dnnl_f16 || trh_dt == dnnl_bf16)
248+
&& !is_strict_acc)
247249
? trh_f32
248250
: 0.f;
249251
#endif

0 commit comments

Comments
 (0)