extend ov::with_cpu_x86_bfloat16() to cover avx2_vnni_2 and introduce ov::with_cpu_x86_float16()

liubo-intel · liubo-intel · commit 6bec04b71308 · 2025-03-14T08:52:09.000-04:00
diff --git a/src/inference/dev_api/openvino/runtime/system_conf.hpp b/src/inference/dev_api/openvino/runtime/system_conf.hpp
@@ -142,10 +142,17 @@ OPENVINO_RUNTIME_API bool with_cpu_x86_avx512_core_vnni();
 /**
  * @brief      Checks whether CPU supports BFloat16 capability
  * @ingroup    ov_dev_api_system_conf
- * @return     `True` is tAVX512_BF16 instructions are available, `false` otherwise
+ * @return     `True` is tAVX512_BF16 or AVX2_VNNI_2 instructions are available, `false` otherwise
  */
 OPENVINO_RUNTIME_API bool with_cpu_x86_bfloat16();
 
+/**
+ * @brief      Checks whether CPU supports Float16 capability
+ * @ingroup    ov_dev_api_system_conf
+ * @return     `True` is tAVX512_FP16 or AVX2_VNNI_2 instructions are available, `false` otherwise
+ */
+OPENVINO_RUNTIME_API bool with_cpu_x86_float16();
+
 /**
  * @brief      Checks whether CPU supports fp16 capability
  * @ingroup    ov_dev_api_system_conf
diff --git a/src/inference/src/system_conf.cpp b/src/inference/src/system_conf.cpp
@@ -93,13 +93,17 @@ bool with_cpu_x86_avx512_core_vnni() {
 }
 
 bool with_cpu_x86_bfloat16() {
-    return get_cpu_info().has(Xbyak::util::Cpu::tAVX512_BF16);
+    return get_cpu_info().has(Xbyak::util::Cpu::tAVX512_BF16) || with_cpu_x86_avx2_vnni_2();
 }
 
 bool with_cpu_x86_avx512_core_fp16() {
     return get_cpu_info().has(Xbyak::util::Cpu::tAVX512_FP16);
 }
 
+bool with_cpu_x86_float16() {
+    return with_cpu_x86_avx512_core_fp16() || with_cpu_x86_avx2_vnni_2();
+}
+
 bool with_cpu_x86_avx512_core_amx_int8() {
     return get_cpu_info().has(Xbyak::util::Cpu::tAMX_INT8);
 }
@@ -156,6 +160,9 @@ bool with_cpu_x86_bfloat16() {
 bool with_cpu_x86_avx512_core_fp16() {
     return false;
 }
+bool with_cpu_x86_float16() {
+    return false;
+}
 bool with_cpu_x86_avx512_core_amx_int8() {
     return false;
 }
diff --git a/src/plugins/intel_cpu/src/nodes/paged_attn.cpp b/src/plugins/intel_cpu/src/nodes/paged_attn.cpp
@@ -276,7 +276,7 @@ bool PagedAttention::isSupportedOperation(const std::shared_ptr<const ov::Node>&
 ov::element::Type PagedAttention::getRuntimePrecision() const {
     auto rtPrecision = getOriginalInputPrecisionAtPort(0);
     // bf16 should be enabled only when platform supports
-    if (rtPrecision == ov::element::bf16 && ov::with_cpu_x86_bfloat16()) {
+    if (rtPrecision == ov::element::bf16 && ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx2_vnni_2()) {
         rtPrecision = ov::element::bf16;
     } else if (rtPrecision == ov::element::f16 && ov::with_cpu_x86_avx512_core_fp16()) {
         rtPrecision = ov::element::f16;
diff --git a/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp b/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp
@@ -1238,7 +1238,7 @@ void ScaledDotProductAttention::createPrimitive() {
         std::shared_ptr<Executor> executor = nullptr;
 #ifdef OPENVINO_ARCH_X86_64
         if (rtPrecision == ov::element::bf16) {
-            if (ov::with_cpu_x86_bfloat16()) {
+            if (ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx2_vnni_2()) {
                 executor = std::make_shared<AttentionExecutor<KT_ONEDNN, ov::bfloat16>>(context,
                                                                                         m_key_quant_param.groupSize,
                                                                                         m_value_quant_param.groupSize,
@@ -2082,7 +2082,7 @@ const ScaledDotProductAttention::SDPAQuantParam& ScaledDotProductAttention::getV
 ov::element::Type ScaledDotProductAttention::getRuntimePrecision() const {
     auto rtPrecision = getOriginalInputPrecisionAtPort(0);
     // bf16 should be enabled only when platform supports
-    if (rtPrecision == ov::element::bf16 && (ov::with_cpu_x86_bfloat16() || with_cpu_x86_avx2_vnni_2())) {
+    if (rtPrecision == ov::element::bf16 && ov::with_cpu_x86_bfloat16()) {
         rtPrecision = ov::element::bf16;
     } else if (rtPrecision == ov::element::f16 && ov::intel_cpu::hasHardwareSupport(ov::element::f16)) {
         rtPrecision = ov::element::f16;
diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp
@@ -156,7 +156,8 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigStreamsNum) {
 #if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)
     const auto expected_precision_for_performance_mode = ov::intel_cpu::hasHardwareSupport(ov::element::f16) ? ov::element::f16 : ov::element::f32;
 #else
-    const auto expected_precision_for_performance_mode = ov::with_cpu_x86_bfloat16() ? ov::element::bf16 : ov::element::f32;
+const auto expected_precision_for_performance_mode =
+    (ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx2_vnni_2()) ? ov::element::bf16 : ov::element::f32;
 #endif
 
 TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigHintInferencePrecision) {
diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/random_uniform.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/random_uniform.cpp
@@ -79,7 +79,7 @@ void RandomUniformLayerTestCPU::SetUp() {
             updateSelectedType(getPrimitiveType(), ElementType::f32, configuration);
         }
     } else if (output_prc == ElementType::bf16) {
-        if (ov::with_cpu_x86_bfloat16()) {
+        if (ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx2_vnni_2()) {
             updateSelectedType(getPrimitiveType(), ElementType::bf16, configuration);
         } else {
             updateSelectedType("ref_any", ElementType::bf16, configuration);
diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/group_convolution.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/group_convolution.cpp
@@ -275,7 +275,7 @@ std::vector<groupConvLayerCPUTestParamsSet> filterParamsSetForDevice(
         auto additionalConfig = std::get<configIndex>(param);
         if (additionalConfig.count(ov::hint::inference_precision.name()) &&
             ov::element::bf16 == additionalConfig[ov::hint::inference_precision.name()].as<ov::element::Type>() &&
-            !ov::with_cpu_x86_bfloat16()) {
+            (!ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx2_vnni_2())) {
             continue;
         }
         resParamsSet.push_back(param);
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -579,7 +579,7 @@ std::vector<std::string> disabledTestPatterns() {
         // TODO: Issue 92895
         // on platforms which do not support AMX, we are disabling I8 input tests
         retVector.emplace_back(R"(smoke_LPT/FakeQuantizeWithNotOptimalTransformation.CompareWithRefImpl.*CPU.*i8.*)");
-    if (!ov::with_cpu_x86_avx512_core_amx_bf16() && !ov::with_cpu_x86_bfloat16()) {
+    if ((!ov::with_cpu_x86_avx512_core_amx_bf16() && !ov::with_cpu_x86_bfloat16()) || ov::with_cpu_x86_avx2_vnni_2()) {
         // ignored for not supported bf16 platforms
         retVector.emplace_back(R"(.*smoke_Snippets_EnforcePrecision_bf16.*)");
         retVector.emplace_back(R"(.*smoke_Snippets_MHAWOTransposeEnforceBF16.*)");
@@ -638,7 +638,7 @@ std::vector<std::string> disabledTestPatterns() {
         retVector.emplace_back(R"(.*smoke_Deconv_(2|3)D_NSPC_INT8_AMX/DeconvolutionLayerCPUTest.*)");
     }
 
-    if (ov::with_cpu_x86_avx512_core_fp16() || ov::with_cpu_x86_avx2_vnni_2()) {
+    if (ov::with_cpu_x86_float16()) {
         // Issue: 143852
         retVector.emplace_back(R"(smoke_ConvertRangeSubgraphCPUTest/ConvertRangeSubgraphCPUTest\.CompareWithRefs.*Prc=f16.*)");
         retVector.emplace_back(R"((smoke|nightly)_FC_3D_FP16/.*_Fused=Multiply\(PerChannel\).*)");
@@ -655,6 +655,12 @@ std::vector<std::string> disabledTestPatterns() {
         retVector.emplace_back(
             R"(smoke_GroupConv_.*D_Gemm_BF16/GroupConvolutionLayerCPUTest.CompareWithRefs.*primitive=jit_gemm.*)");
         retVector.emplace_back(R"(smoke_.*MatMulLayerCPUTest.*INFERENCE_PRECISION_HINT=bf16.*_primitive=jit_gemm.*)");
+        // Issue: 163147
+        retVector.emplace_back(
+            R"(smoke_CompareWithRefs_4D.*[Ff]using.*EltwiseLayerCPUTest\.CompareWithRefs.*INFERENCE_PRECISION_HINT=f16.*enforceSnippets=1.*)");
+        // Issue: 163144
+        retVector.emplace_back(
+            R"(smoke_ScaledAttn_CPU/ScaledAttnLayerCPUTest.CompareWithRefs/netPRC=bf16.*_TS=\(2\.8\.16\.32\)_\(2\.8\.16\.32\)_\(2\.8\.16\.32\)_\(1\.8\.48\.32\)_\(1\.8\.48\.32\)_\(1\.8\.48\.32\)_\(16\.48\)_\(16\.1\)_\(1\.48\).*)");
     }
 
     return retVector;
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/fully_connected.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/fully_connected.cpp
@@ -31,7 +31,8 @@ static inline std::vector<std::vector<element::Type>> precisions(bool only_fp32
         auto quant = quantized_precisions();
         std::copy(quant.begin(), quant.end(), std::back_inserter(prc));
         // In Snippets MatMul BF16 is supported only on bf16/AMX platforms
-        if (ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16()) {
+        if ((ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx2_vnni_2()) ||
+            ov::with_cpu_x86_avx512_core_amx_bf16()) {
             prc.emplace_back(std::vector<element::Type>{element::bf16, element::bf16});
         }
     }
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp
@@ -179,7 +179,8 @@ static inline std::vector<std::vector<element::Type>> precisions(bool only_fp32
             prc.emplace_back(std::vector<element::Type>{element::u8, element::i8});
         }
         // In Snippets MatMul BF16 is supported only on bf16/AMX platforms
-        if (ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16()) {
+        if ((ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx2_vnni_2()) ||
+            ov::with_cpu_x86_avx512_core_amx_bf16()) {
             prc.emplace_back(std::vector<element::Type>{element::bf16, element::bf16});
         }
     }
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/utils.hpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/utils.hpp
@@ -13,7 +13,7 @@ namespace snippets {
 #define SNIPPETS_TESTS_STATIC_SHAPES(...) static_shapes_to_test_representation(std::vector<std::vector<ov::Shape>>{__VA_ARGS__})
 
 static inline bool is_bf16_supported_by_brgemm() {
-    return ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16();
+    return (ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx2_vnni_2()) || ov::with_cpu_x86_avx512_core_amx_bf16();
 }
 
 static inline bool is_fp16_supported_by_brgemm() {
diff --git a/src/plugins/intel_cpu/tests/functional/utils/cpu_test_utils.cpp b/src/plugins/intel_cpu/tests/functional/utils/cpu_test_utils.cpp
@@ -474,8 +474,7 @@ CPUTestsBase::deduce_expected_precision(const ov::element::Type& opPrecision,
         inferencePrecisionSetExplicitly = true;
         if ((inferencePrecisionConfig == ov::element::bf16 &&
              (ov::with_cpu_x86_avx512_core() || ov::with_cpu_x86_avx2_vnni_2())) ||
-            (inferencePrecisionConfig == ov::element::f16 &&
-             (ov::with_cpu_x86_avx512_core_fp16() || ov::with_cpu_x86_avx2_vnni_2())) ||
+            (inferencePrecisionConfig == ov::element::f16 && ov::with_cpu_x86_float16()) ||
             (inferencePrecisionConfig == ov::element::f32) || (inferencePrecisionConfig == ov::element::dynamic)) {
             inferencePrecision = inferencePrecisionConfig;
         }
@@ -485,7 +484,7 @@ CPUTestsBase::deduce_expected_precision(const ov::element::Type& opPrecision,
         const auto& configIt = configuration.find(executionModeKey);
         if (configIt != configuration.end() && configIt->second.as<ov::hint::ExecutionMode>() == ov::hint::ExecutionMode::PERFORMANCE) {
             inferencePrecision = ov::element::f32;
-            if (ov::with_cpu_x86_bfloat16()) {
+            if (ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx2_vnni_2()) {
                 inferencePrecision = ov::element::bf16;
             }
         } else {
diff --git a/src/plugins/intel_cpu/tests/unit/brgemm_executor_test.cpp b/src/plugins/intel_cpu/tests/unit/brgemm_executor_test.cpp
@@ -70,7 +70,7 @@ void run_test(ov::element::Type rtPrec) {
 
 TEST_P(BrgemmKernelTest, simpleGemmTest) {
     ov::element::Type rtPrec = this->GetParam();
-    if (rtPrec == ov::element::bf16 && !ov::with_cpu_x86_bfloat16())
+    if (rtPrec == ov::element::bf16 && (!ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx2_vnni_2()))
         GTEST_SKIP();
     if (rtPrec == ov::element::f32 && !ov::with_cpu_x86_avx512_core())
         GTEST_SKIP();

Original file line number	Diff line number	Diff line change
`@@ -93,13 +93,17 @@ bool with_cpu_x86_avx512_core_vnni() {`
`93`	`93`	`}`
`94`	`94`
`95`	`95`	`bool with_cpu_x86_bfloat16() {`
`96`		`- return get_cpu_info().has(Xbyak::util::Cpu::tAVX512_BF16);`
	`96`	`+ return get_cpu_info().has(Xbyak::util::Cpu::tAVX512_BF16) \|\| with_cpu_x86_avx2_vnni_2();`
`97`	`97`	`}`
`98`	`98`
`99`	`99`	`bool with_cpu_x86_avx512_core_fp16() {`
`100`	`100`	`return get_cpu_info().has(Xbyak::util::Cpu::tAVX512_FP16);`
`101`	`101`	`}`
`102`	`102`
	`103`	`+bool with_cpu_x86_float16() {`
	`104`	`+ return with_cpu_x86_avx512_core_fp16() \|\| with_cpu_x86_avx2_vnni_2();`
	`105`	`+}`
	`106`	`+`
`103`	`107`	`bool with_cpu_x86_avx512_core_amx_int8() {`
`104`	`108`	`return get_cpu_info().has(Xbyak::util::Cpu::tAMX_INT8);`
`105`	`109`	`}`
`@@ -156,6 +160,9 @@ bool with_cpu_x86_bfloat16() {`
`156`	`160`	`bool with_cpu_x86_avx512_core_fp16() {`
`157`	`161`	`return false;`
`158`	`162`	`}`
	`163`	`+bool with_cpu_x86_float16() {`
	`164`	`+ return false;`
	`165`	`+}`
`159`	`166`	`bool with_cpu_x86_avx512_core_amx_int8() {`
`160`	`167`	`return false;`
`161`	`168`	`}`
Original file line number	Diff line number	Diff line change
`@@ -79,7 +79,7 @@ void RandomUniformLayerTestCPU::SetUp() {`
`79`	`79`	`updateSelectedType(getPrimitiveType(), ElementType::f32, configuration);`
`80`	`80`	`}`
`81`	`81`	`} else if (output_prc == ElementType::bf16) {`
`82`		`- if (ov::with_cpu_x86_bfloat16()) {`
	`82`	`+ if (ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx2_vnni_2()) {`
`83`	`83`	`updateSelectedType(getPrimitiveType(), ElementType::bf16, configuration);`
`84`	`84`	`} else {`
`85`	`85`	`updateSelectedType("ref_any", ElementType::bf16, configuration);`
Original file line number	Diff line number	Diff line change
`@@ -275,7 +275,7 @@ std::vector<groupConvLayerCPUTestParamsSet> filterParamsSetForDevice(`
`275`	`275`	`auto additionalConfig = std::get<configIndex>(param);`
`276`	`276`	`if (additionalConfig.count(ov::hint::inference_precision.name()) &&`
`277`	`277`	`ov::element::bf16 == additionalConfig[ov::hint::inference_precision.name()].as<ov::element::Type>() &&`
`278`		`- !ov::with_cpu_x86_bfloat16()) {`
	`278`	`+ (!ov::with_cpu_x86_bfloat16() \|\| ov::with_cpu_x86_avx2_vnni_2())) {`
`279`	`279`	`continue;`
`280`	`280`	`}`
`281`	`281`	`resParamsSet.push_back(param);`
Original file line number	Diff line number	Diff line change
`@@ -31,7 +31,8 @@ static inline std::vector<std::vector<element::Type>> precisions(bool only_fp32`
`31`	`31`	`auto quant = quantized_precisions();`
`32`	`32`	`std::copy(quant.begin(), quant.end(), std::back_inserter(prc));`
`33`	`33`	`// In Snippets MatMul BF16 is supported only on bf16/AMX platforms`
`34`		`- if (ov::with_cpu_x86_bfloat16() \|\| ov::with_cpu_x86_avx512_core_amx_bf16()) {`
	`34`	`+ if ((ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx2_vnni_2()) \|\|`
	`35`	`+ ov::with_cpu_x86_avx512_core_amx_bf16()) {`
`35`	`36`	`prc.emplace_back(std::vector<element::Type>{element::bf16, element::bf16});`
`36`	`37`	`}`
`37`	`38`	`}`
Original file line number	Diff line number	Diff line change
`@@ -179,7 +179,8 @@ static inline std::vector<std::vector<element::Type>> precisions(bool only_fp32`
`179`	`179`	`prc.emplace_back(std::vector<element::Type>{element::u8, element::i8});`
`180`	`180`	`}`
`181`	`181`	`// In Snippets MatMul BF16 is supported only on bf16/AMX platforms`
`182`		`- if (ov::with_cpu_x86_bfloat16() \|\| ov::with_cpu_x86_avx512_core_amx_bf16()) {`
	`182`	`+ if ((ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx2_vnni_2()) \|\|`
	`183`	`+ ov::with_cpu_x86_avx512_core_amx_bf16()) {`
`183`	`184`	`prc.emplace_back(std::vector<element::Type>{element::bf16, element::bf16});`
`184`	`185`	`}`
`185`	`186`	`}`
Original file line number	Diff line number	Diff line change
`@@ -13,7 +13,7 @@ namespace snippets {`
`13`	`13`	`#define SNIPPETS_TESTS_STATIC_SHAPES(...) static_shapes_to_test_representation(std::vector<std::vector<ov::Shape>>{__VA_ARGS__})`
`14`	`14`
`15`	`15`	`static inline bool is_bf16_supported_by_brgemm() {`
`16`		`- return ov::with_cpu_x86_bfloat16() \|\| ov::with_cpu_x86_avx512_core_amx_bf16();`
	`16`	`+ return (ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx2_vnni_2()) \|\| ov::with_cpu_x86_avx512_core_amx_bf16();`
`17`	`17`	`}`
`18`	`18`
`19`	`19`	`static inline bool is_fp16_supported_by_brgemm() {`