Extend cpu functional testing on LNL+ systems

liubo-intel · liubo-intel · commit 9b01da104f70 · 2025-03-14T08:52:09.000-04:00
diff --git a/src/inference/dev_api/openvino/runtime/system_conf.hpp b/src/inference/dev_api/openvino/runtime/system_conf.hpp
@@ -111,6 +111,13 @@ OPENVINO_RUNTIME_API bool with_cpu_x86_avx2();
  */
 OPENVINO_RUNTIME_API bool with_cpu_x86_avx2_vnni();
 
+/**
+ * @brief      Checks whether CPU supports AVX2_VNNI_2 capability
+ * @ingroup    ov_dev_api_system_conf
+ * @return     `True` is AVX2_VNNI_2 instructions are available, `false` otherwise
+ */
+OPENVINO_RUNTIME_API bool with_cpu_x86_avx2_vnni_2();
+
 /**
  * @brief      Checks whether CPU supports AVX 512 capability
  * @ingroup    ov_dev_api_system_conf
diff --git a/src/inference/src/system_conf.cpp b/src/inference/src/system_conf.cpp
@@ -75,6 +75,11 @@ bool with_cpu_x86_avx2_vnni() {
     return get_cpu_info().has(Xbyak::util::Cpu::tAVX2 | Xbyak::util::Cpu::tAVX_VNNI);
 }
 
+bool with_cpu_x86_avx2_vnni_2() {
+    return with_cpu_x86_avx2_vnni() && get_cpu_info().has(Xbyak::util::Cpu::tAVX_VNNI_INT8) &&
+           get_cpu_info().has(Xbyak::util::Cpu::tAVX_NE_CONVERT);
+}
+
 bool with_cpu_x86_avx512f() {
     return get_cpu_info().has(Xbyak::util::Cpu::tAVX512F);
 }
@@ -133,6 +138,9 @@ bool with_cpu_x86_avx2() {
 bool with_cpu_x86_avx2_vnni() {
     return false;
 }
+bool with_cpu_x86_avx2_vnni_2() {
+    return false;
+}
 bool with_cpu_x86_avx512f() {
     return false;
 }
diff --git a/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp b/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp
@@ -2075,7 +2075,7 @@ const ScaledDotProductAttention::SDPAQuantParam& ScaledDotProductAttention::getV
 ov::element::Type ScaledDotProductAttention::getRuntimePrecision() const {
     auto rtPrecision = getOriginalInputPrecisionAtPort(0);
     // bf16 should be enabled only when platform supports
-    if (rtPrecision == ov::element::bf16 && ov::with_cpu_x86_bfloat16()) {
+    if (rtPrecision == ov::element::bf16 && (ov::with_cpu_x86_bfloat16() || with_cpu_x86_avx2_vnni_2())) {
         rtPrecision = ov::element::bf16;
     } else if (rtPrecision == ov::element::f16 && ov::intel_cpu::hasHardwareSupport(ov::element::f16)) {
         rtPrecision = ov::element::f16;
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -492,6 +492,13 @@ std::vector<std::string> disabledTestPatterns() {
     retVector.emplace_back(R"(.*smoke_RDFT_CPU_2D/RDFTTestCPU.CompareWithRefs/prec=f32_IS0=\[\]_TS0=\(\(16.38\)\)_constAxes=true_axes=\(\(0.1\)\)_isInverse=false.*)");
 #endif
     if (!ov::with_cpu_x86_avx512_core()) {
+        // Issue: MFDNN-12818
+        retVector.emplace_back(
+            R"(.*smoke_LPT/RecurrentCellTransformation.CompareWithRefImpl/f32_\[1,1,3\]_CPU_f32FQ_X_level=256_.*_FQ_W_level=255.*)");
+        retVector.emplace_back(
+            R"(.*smoke_static/ConvertFqRnnToQuantizedRnn.CompareWithRefs/Type=GRUSequence.*2.5.10.*2.1.4.*2.1.4.*)");
+    }
+    if (!ov::intel_cpu::hasHardwareSupport(ov::element::bf16)) {
         // on platforms which do not support bfloat16, we are disabling bf16 tests since there are no bf16 primitives,
         // tests are useless on such platforms
         retVector.emplace_back(R"(.*(BF|bf)16.*)");
@@ -504,7 +511,7 @@ std::vector<std::string> disabledTestPatterns() {
         retVector.emplace_back(R"(.*Snippets.*MHA.*)");
         retVector.emplace_back(R"(.*Snippets.*(MatMul|Matmul).*)");
     }
-    if (!ov::with_cpu_x86_avx512_core_fp16()) {
+    if (!ov::intel_cpu::hasHardwareSupport(ov::element::f16)) {
         // Skip fp16 tests for paltforms that don't support fp16 precision
         retVector.emplace_back(R"(.*INFERENCE_PRECISION_HINT=(F|f)16.*)");
         retVector.emplace_back(R"(.*ConcatMultiQuerySDPTest.*f16.*)");
@@ -638,7 +645,7 @@ std::vector<std::string> disabledTestPatterns() {
         retVector.emplace_back(R"(.*smoke_Deconv_(2|3)D_NSPC_INT8_AMX/DeconvolutionLayerCPUTest.*)");
     }
 
-    if (ov::with_cpu_x86_avx512_core_fp16()) {
+    if (ov::with_cpu_x86_avx512_core_fp16() || ov::with_cpu_x86_avx2_vnni_2()) {
         // Issue: 143852
         retVector.emplace_back(R"(smoke_ConvertRangeSubgraphCPUTest/ConvertRangeSubgraphCPUTest\.CompareWithRefs.*Prc=f16.*)");
         retVector.emplace_back(R"((smoke|nightly)_FC_3D_FP16/.*_Fused=Multiply\(PerChannel\).*)");
@@ -649,5 +656,13 @@ std::vector<std::string> disabledTestPatterns() {
         retVector.emplace_back(R"(smoke_Conv_Sum_Broadcast_FP16/ConvSumInPlaceTest.*Relu\.Multiply\(PerChannel\)\.Add\(PerChannel\).*)");
     }
 
+    if (ov::with_cpu_x86_avx2_vnni_2()) {
+        // jit_gemm_BF16 kernels are not supported for conv,inner_product,matmul on avx2_vnni_2 platforms
+        retVector.emplace_back(R"(smoke_Conv_.*D_GEMM_BF16.*)");
+        retVector.emplace_back(
+            R"(smoke_GroupConv_.*D_Gemm_BF16/GroupConvolutionLayerCPUTest.CompareWithRefs.*primitive=jit_gemm.*)");
+        retVector.emplace_back(R"(smoke_.*MatMulLayerCPUTest.*INFERENCE_PRECISION_HINT=bf16.*_primitive=jit_gemm.*)");
+    }
+
     return retVector;
 }
diff --git a/src/plugins/intel_cpu/tests/functional/utils/cpu_test_utils.cpp b/src/plugins/intel_cpu/tests/functional/utils/cpu_test_utils.cpp
@@ -472,9 +472,10 @@ CPUTestsBase::deduce_expected_precision(const ov::element::Type& opPrecision,
     if (it != configuration.end()) {
         auto inferencePrecisionConfig = it->second.as<ov::element::Type>();
         inferencePrecisionSetExplicitly = true;
-        // TODO also need to check (dnnl::impl::cpu::x64::avx2_vnni_2)
-        if ((inferencePrecisionConfig == ov::element::bf16 && ov::with_cpu_x86_avx512_core()) ||
-            (inferencePrecisionConfig == ov::element::f16 && ov::with_cpu_x86_avx512_core_fp16()) ||
+        if ((inferencePrecisionConfig == ov::element::bf16 &&
+             (ov::with_cpu_x86_avx512_core() || ov::with_cpu_x86_avx2_vnni_2())) ||
+            (inferencePrecisionConfig == ov::element::f16 &&
+             (ov::with_cpu_x86_avx512_core_fp16() || ov::with_cpu_x86_avx2_vnni_2())) ||
             (inferencePrecisionConfig == ov::element::f32) || (inferencePrecisionConfig == ov::element::dynamic)) {
             inferencePrecision = inferencePrecisionConfig;
         }
@@ -495,7 +496,8 @@ CPUTestsBase::deduce_expected_precision(const ov::element::Type& opPrecision,
     ov::element::Type deducedType = opPrecision;
     // enforceInferPrecision stage
     if (inferencePrecision == ov::element::bf16) {
-        deducedType = ov::with_cpu_x86_avx512_core() ? ov::element::bf16 : ov::element::f32;
+        deducedType =
+            (ov::with_cpu_x86_avx512_core() || ov::with_cpu_x86_avx2_vnni_2()) ? ov::element::bf16 : ov::element::f32;
     }
 
     // ngraph transform pipeline stage
@@ -505,7 +507,8 @@ CPUTestsBase::deduce_expected_precision(const ov::element::Type& opPrecision,
         }
     }
     if (deducedType == ov::element::bf16) {
-        deducedType = ov::with_cpu_x86_avx512_core() ? ov::element::bf16 : ov::element::f32;
+        deducedType =
+            (ov::with_cpu_x86_avx512_core() || ov::with_cpu_x86_avx2_vnni_2()) ? ov::element::bf16 : ov::element::f32;
     } else if (deducedType == ov::element::f16) {
         if (inferencePrecision != ov::element::f16 && inferencePrecision != ov::element::dynamic) {
             deducedType = ov::element::f32;
diff --git a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp
@@ -307,6 +307,12 @@ void SubgraphBaseTest::compile_model() {
     }
     try {
         inference_precision = core->get_property(targetDevice, ov::hint::inference_precision);
+        // for avx2_vnni_2 platforms use the set inference_precision hint in case generate higher threshold during
+        // following calculate_thresholds stage
+        if (ov::with_cpu_x86_avx2_vnni_2() &&
+            configuration.find(ov::hint::inference_precision.name()) != configuration.end()) {
+            inference_precision = configuration.at(ov::hint::inference_precision.name()).as<ov::element::Type>();
+        }
     } catch (std::exception& e) {
         std::cout << "[ WARNING ] Impossible to get Inference Precision with exception: " << e.what() << std::endl;
     }

Original file line number	Diff line number	Diff line change
`@@ -75,6 +75,11 @@ bool with_cpu_x86_avx2_vnni() {`
`75`	`75`	`return get_cpu_info().has(Xbyak::util::Cpu::tAVX2 \| Xbyak::util::Cpu::tAVX_VNNI);`
`76`	`76`	`}`
`77`	`77`
	`78`	`+bool with_cpu_x86_avx2_vnni_2() {`
	`79`	`+ return with_cpu_x86_avx2_vnni() && get_cpu_info().has(Xbyak::util::Cpu::tAVX_VNNI_INT8) &&`
	`80`	`+ get_cpu_info().has(Xbyak::util::Cpu::tAVX_NE_CONVERT);`
	`81`	`+}`
	`82`	`+`
`78`	`83`	`bool with_cpu_x86_avx512f() {`
`79`	`84`	`return get_cpu_info().has(Xbyak::util::Cpu::tAVX512F);`
`80`	`85`	`}`
`@@ -133,6 +138,9 @@ bool with_cpu_x86_avx2() {`
`133`	`138`	`bool with_cpu_x86_avx2_vnni() {`
`134`	`139`	`return false;`
`135`	`140`	`}`
	`141`	`+bool with_cpu_x86_avx2_vnni_2() {`
	`142`	`+ return false;`
	`143`	`+}`
`136`	`144`	`bool with_cpu_x86_avx512f() {`
`137`	`145`	`return false;`
`138`	`146`	`}`
Original file line number	Diff line number	Diff line change
`@@ -307,6 +307,12 @@ void SubgraphBaseTest::compile_model() {`
`307`	`307`	`}`
`308`	`308`	`try {`
`309`	`309`	`inference_precision = core->get_property(targetDevice, ov::hint::inference_precision);`
	`310`	`+ // for avx2_vnni_2 platforms use the set inference_precision hint in case generate higher threshold during`
	`311`	`+ // following calculate_thresholds stage`
	`312`	`+ if (ov::with_cpu_x86_avx2_vnni_2() &&`
	`313`	`+ configuration.find(ov::hint::inference_precision.name()) != configuration.end()) {`
	`314`	`+ inference_precision = configuration.at(ov::hint::inference_precision.name()).as<ov::element::Type>();`
	`315`	`+ }`
`310`	`316`	`} catch (std::exception& e) {`
`311`	`317`	`std::cout << "[ WARNING ] Impossible to get Inference Precision with exception: " << e.what() << std::endl;`
`312`	`318`	`}`