@@ -1238,10 +1238,17 @@ void ScaledDotProductAttention::createPrimitive() {
1238
1238
std::shared_ptr<Executor> executor = nullptr ;
1239
1239
#ifdef OPENVINO_ARCH_X86_64
1240
1240
if (rtPrecision == ov::element::bf16) {
1241
- executor = std::make_shared<AttentionExecutor<KT_ONEDNN, ov::bfloat16>>(context,
1242
- m_key_quant_param.groupSize ,
1243
- m_value_quant_param.groupSize ,
1244
- m_key_quant_param.isByChannel );
1241
+ if (ov::with_cpu_x86_bfloat16 ()) {
1242
+ executor = std::make_shared<AttentionExecutor<KT_ONEDNN, ov::bfloat16>>(context,
1243
+ m_key_quant_param.groupSize ,
1244
+ m_value_quant_param.groupSize ,
1245
+ m_key_quant_param.isByChannel );
1246
+ } else {
1247
+ executor = std::make_shared<AttentionExecutor<KT_REF, ov::bfloat16>>(context,
1248
+ m_key_quant_param.groupSize ,
1249
+ m_value_quant_param.groupSize ,
1250
+ m_key_quant_param.isByChannel );
1251
+ }
1245
1252
} else if (rtPrecision == ov::element::f16) {
1246
1253
if (with_cpu_x86_avx512_core_fp16 ()) {
1247
1254
executor = std::make_shared<AttentionExecutor<KT_ONEDNN, ov::float16>>(context,
0 commit comments