Skip to content

Commit 6bec04b

Browse files
committed
extend ov::with_cpu_x86_bfloat16() to cover avx2_vnni_2 and introduce ov::with_cpu_x86_float16()
1 parent 93d94a1 commit 6bec04b

File tree

13 files changed

+39
-17
lines changed

13 files changed

+39
-17
lines changed

src/inference/dev_api/openvino/runtime/system_conf.hpp

+8-1
Original file line numberDiff line numberDiff line change
@@ -142,10 +142,17 @@ OPENVINO_RUNTIME_API bool with_cpu_x86_avx512_core_vnni();
142142
/**
143143
* @brief Checks whether CPU supports BFloat16 capability
144144
* @ingroup ov_dev_api_system_conf
145-
* @return `True` is tAVX512_BF16 instructions are available, `false` otherwise
145+
* @return `True` is tAVX512_BF16 or AVX2_VNNI_2 instructions are available, `false` otherwise
146146
*/
147147
OPENVINO_RUNTIME_API bool with_cpu_x86_bfloat16();
148148

149+
/**
150+
* @brief Checks whether CPU supports Float16 capability
151+
* @ingroup ov_dev_api_system_conf
152+
* @return `True` is tAVX512_FP16 or AVX2_VNNI_2 instructions are available, `false` otherwise
153+
*/
154+
OPENVINO_RUNTIME_API bool with_cpu_x86_float16();
155+
149156
/**
150157
* @brief Checks whether CPU supports fp16 capability
151158
* @ingroup ov_dev_api_system_conf

src/inference/src/system_conf.cpp

+8-1
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,17 @@ bool with_cpu_x86_avx512_core_vnni() {
9393
}
9494

9595
bool with_cpu_x86_bfloat16() {
96-
return get_cpu_info().has(Xbyak::util::Cpu::tAVX512_BF16);
96+
return get_cpu_info().has(Xbyak::util::Cpu::tAVX512_BF16) || with_cpu_x86_avx2_vnni_2();
9797
}
9898

9999
bool with_cpu_x86_avx512_core_fp16() {
100100
return get_cpu_info().has(Xbyak::util::Cpu::tAVX512_FP16);
101101
}
102102

103+
bool with_cpu_x86_float16() {
104+
return with_cpu_x86_avx512_core_fp16() || with_cpu_x86_avx2_vnni_2();
105+
}
106+
103107
bool with_cpu_x86_avx512_core_amx_int8() {
104108
return get_cpu_info().has(Xbyak::util::Cpu::tAMX_INT8);
105109
}
@@ -156,6 +160,9 @@ bool with_cpu_x86_bfloat16() {
156160
bool with_cpu_x86_avx512_core_fp16() {
157161
return false;
158162
}
163+
bool with_cpu_x86_float16() {
164+
return false;
165+
}
159166
bool with_cpu_x86_avx512_core_amx_int8() {
160167
return false;
161168
}

src/plugins/intel_cpu/src/nodes/paged_attn.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ bool PagedAttention::isSupportedOperation(const std::shared_ptr<const ov::Node>&
276276
ov::element::Type PagedAttention::getRuntimePrecision() const {
277277
auto rtPrecision = getOriginalInputPrecisionAtPort(0);
278278
// bf16 should be enabled only when platform supports
279-
if (rtPrecision == ov::element::bf16 && ov::with_cpu_x86_bfloat16()) {
279+
if (rtPrecision == ov::element::bf16 && ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx2_vnni_2()) {
280280
rtPrecision = ov::element::bf16;
281281
} else if (rtPrecision == ov::element::f16 && ov::with_cpu_x86_avx512_core_fp16()) {
282282
rtPrecision = ov::element::f16;

src/plugins/intel_cpu/src/nodes/scaled_attn.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1238,7 +1238,7 @@ void ScaledDotProductAttention::createPrimitive() {
12381238
std::shared_ptr<Executor> executor = nullptr;
12391239
#ifdef OPENVINO_ARCH_X86_64
12401240
if (rtPrecision == ov::element::bf16) {
1241-
if (ov::with_cpu_x86_bfloat16()) {
1241+
if (ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx2_vnni_2()) {
12421242
executor = std::make_shared<AttentionExecutor<KT_ONEDNN, ov::bfloat16>>(context,
12431243
m_key_quant_param.groupSize,
12441244
m_value_quant_param.groupSize,
@@ -2082,7 +2082,7 @@ const ScaledDotProductAttention::SDPAQuantParam& ScaledDotProductAttention::getV
20822082
ov::element::Type ScaledDotProductAttention::getRuntimePrecision() const {
20832083
auto rtPrecision = getOriginalInputPrecisionAtPort(0);
20842084
// bf16 should be enabled only when platform supports
2085-
if (rtPrecision == ov::element::bf16 && (ov::with_cpu_x86_bfloat16() || with_cpu_x86_avx2_vnni_2())) {
2085+
if (rtPrecision == ov::element::bf16 && ov::with_cpu_x86_bfloat16()) {
20862086
rtPrecision = ov::element::bf16;
20872087
} else if (rtPrecision == ov::element::f16 && ov::intel_cpu::hasHardwareSupport(ov::element::f16)) {
20882088
rtPrecision = ov::element::f16;

src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,8 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigStreamsNum) {
156156
#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)
157157
const auto expected_precision_for_performance_mode = ov::intel_cpu::hasHardwareSupport(ov::element::f16) ? ov::element::f16 : ov::element::f32;
158158
#else
159-
const auto expected_precision_for_performance_mode = ov::with_cpu_x86_bfloat16() ? ov::element::bf16 : ov::element::f32;
159+
const auto expected_precision_for_performance_mode =
160+
(ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx2_vnni_2()) ? ov::element::bf16 : ov::element::f32;
160161
#endif
161162

162163
TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigHintInferencePrecision) {

src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/random_uniform.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ void RandomUniformLayerTestCPU::SetUp() {
7979
updateSelectedType(getPrimitiveType(), ElementType::f32, configuration);
8080
}
8181
} else if (output_prc == ElementType::bf16) {
82-
if (ov::with_cpu_x86_bfloat16()) {
82+
if (ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx2_vnni_2()) {
8383
updateSelectedType(getPrimitiveType(), ElementType::bf16, configuration);
8484
} else {
8585
updateSelectedType("ref_any", ElementType::bf16, configuration);

src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/group_convolution.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ std::vector<groupConvLayerCPUTestParamsSet> filterParamsSetForDevice(
275275
auto additionalConfig = std::get<configIndex>(param);
276276
if (additionalConfig.count(ov::hint::inference_precision.name()) &&
277277
ov::element::bf16 == additionalConfig[ov::hint::inference_precision.name()].as<ov::element::Type>() &&
278-
!ov::with_cpu_x86_bfloat16()) {
278+
(!ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx2_vnni_2())) {
279279
continue;
280280
}
281281
resParamsSet.push_back(param);

src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp

+8-2
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,7 @@ std::vector<std::string> disabledTestPatterns() {
579579
// TODO: Issue 92895
580580
// on platforms which do not support AMX, we are disabling I8 input tests
581581
retVector.emplace_back(R"(smoke_LPT/FakeQuantizeWithNotOptimalTransformation.CompareWithRefImpl.*CPU.*i8.*)");
582-
if (!ov::with_cpu_x86_avx512_core_amx_bf16() && !ov::with_cpu_x86_bfloat16()) {
582+
if ((!ov::with_cpu_x86_avx512_core_amx_bf16() && !ov::with_cpu_x86_bfloat16()) || ov::with_cpu_x86_avx2_vnni_2()) {
583583
// ignored for not supported bf16 platforms
584584
retVector.emplace_back(R"(.*smoke_Snippets_EnforcePrecision_bf16.*)");
585585
retVector.emplace_back(R"(.*smoke_Snippets_MHAWOTransposeEnforceBF16.*)");
@@ -638,7 +638,7 @@ std::vector<std::string> disabledTestPatterns() {
638638
retVector.emplace_back(R"(.*smoke_Deconv_(2|3)D_NSPC_INT8_AMX/DeconvolutionLayerCPUTest.*)");
639639
}
640640

641-
if (ov::with_cpu_x86_avx512_core_fp16() || ov::with_cpu_x86_avx2_vnni_2()) {
641+
if (ov::with_cpu_x86_float16()) {
642642
// Issue: 143852
643643
retVector.emplace_back(R"(smoke_ConvertRangeSubgraphCPUTest/ConvertRangeSubgraphCPUTest\.CompareWithRefs.*Prc=f16.*)");
644644
retVector.emplace_back(R"((smoke|nightly)_FC_3D_FP16/.*_Fused=Multiply\(PerChannel\).*)");
@@ -655,6 +655,12 @@ std::vector<std::string> disabledTestPatterns() {
655655
retVector.emplace_back(
656656
R"(smoke_GroupConv_.*D_Gemm_BF16/GroupConvolutionLayerCPUTest.CompareWithRefs.*primitive=jit_gemm.*)");
657657
retVector.emplace_back(R"(smoke_.*MatMulLayerCPUTest.*INFERENCE_PRECISION_HINT=bf16.*_primitive=jit_gemm.*)");
658+
// Issue: 163147
659+
retVector.emplace_back(
660+
R"(smoke_CompareWithRefs_4D.*[Ff]using.*EltwiseLayerCPUTest\.CompareWithRefs.*INFERENCE_PRECISION_HINT=f16.*enforceSnippets=1.*)");
661+
// Issue: 163144
662+
retVector.emplace_back(
663+
R"(smoke_ScaledAttn_CPU/ScaledAttnLayerCPUTest.CompareWithRefs/netPRC=bf16.*_TS=\(2\.8\.16\.32\)_\(2\.8\.16\.32\)_\(2\.8\.16\.32\)_\(1\.8\.48\.32\)_\(1\.8\.48\.32\)_\(1\.8\.48\.32\)_\(16\.48\)_\(16\.1\)_\(1\.48\).*)");
658664
}
659665

660666
return retVector;

src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/fully_connected.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ static inline std::vector<std::vector<element::Type>> precisions(bool only_fp32
3131
auto quant = quantized_precisions();
3232
std::copy(quant.begin(), quant.end(), std::back_inserter(prc));
3333
// In Snippets MatMul BF16 is supported only on bf16/AMX platforms
34-
if (ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16()) {
34+
if ((ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx2_vnni_2()) ||
35+
ov::with_cpu_x86_avx512_core_amx_bf16()) {
3536
prc.emplace_back(std::vector<element::Type>{element::bf16, element::bf16});
3637
}
3738
}

src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,8 @@ static inline std::vector<std::vector<element::Type>> precisions(bool only_fp32
179179
prc.emplace_back(std::vector<element::Type>{element::u8, element::i8});
180180
}
181181
// In Snippets MatMul BF16 is supported only on bf16/AMX platforms
182-
if (ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16()) {
182+
if ((ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx2_vnni_2()) ||
183+
ov::with_cpu_x86_avx512_core_amx_bf16()) {
183184
prc.emplace_back(std::vector<element::Type>{element::bf16, element::bf16});
184185
}
185186
}

src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/utils.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ namespace snippets {
1313
#define SNIPPETS_TESTS_STATIC_SHAPES(...) static_shapes_to_test_representation(std::vector<std::vector<ov::Shape>>{__VA_ARGS__})
1414

1515
static inline bool is_bf16_supported_by_brgemm() {
16-
return ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16();
16+
return (ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx2_vnni_2()) || ov::with_cpu_x86_avx512_core_amx_bf16();
1717
}
1818

1919
static inline bool is_fp16_supported_by_brgemm() {

src/plugins/intel_cpu/tests/functional/utils/cpu_test_utils.cpp

+2-3
Original file line numberDiff line numberDiff line change
@@ -474,8 +474,7 @@ CPUTestsBase::deduce_expected_precision(const ov::element::Type& opPrecision,
474474
inferencePrecisionSetExplicitly = true;
475475
if ((inferencePrecisionConfig == ov::element::bf16 &&
476476
(ov::with_cpu_x86_avx512_core() || ov::with_cpu_x86_avx2_vnni_2())) ||
477-
(inferencePrecisionConfig == ov::element::f16 &&
478-
(ov::with_cpu_x86_avx512_core_fp16() || ov::with_cpu_x86_avx2_vnni_2())) ||
477+
(inferencePrecisionConfig == ov::element::f16 && ov::with_cpu_x86_float16()) ||
479478
(inferencePrecisionConfig == ov::element::f32) || (inferencePrecisionConfig == ov::element::dynamic)) {
480479
inferencePrecision = inferencePrecisionConfig;
481480
}
@@ -485,7 +484,7 @@ CPUTestsBase::deduce_expected_precision(const ov::element::Type& opPrecision,
485484
const auto& configIt = configuration.find(executionModeKey);
486485
if (configIt != configuration.end() && configIt->second.as<ov::hint::ExecutionMode>() == ov::hint::ExecutionMode::PERFORMANCE) {
487486
inferencePrecision = ov::element::f32;
488-
if (ov::with_cpu_x86_bfloat16()) {
487+
if (ov::with_cpu_x86_bfloat16() && !ov::with_cpu_x86_avx2_vnni_2()) {
489488
inferencePrecision = ov::element::bf16;
490489
}
491490
} else {

src/plugins/intel_cpu/tests/unit/brgemm_executor_test.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ void run_test(ov::element::Type rtPrec) {
7070

7171
TEST_P(BrgemmKernelTest, simpleGemmTest) {
7272
ov::element::Type rtPrec = this->GetParam();
73-
if (rtPrec == ov::element::bf16 && !ov::with_cpu_x86_bfloat16())
73+
if (rtPrec == ov::element::bf16 && (!ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx2_vnni_2()))
7474
GTEST_SKIP();
7575
if (rtPrec == ov::element::f32 && !ov::with_cpu_x86_avx512_core())
7676
GTEST_SKIP();

0 commit comments

Comments
 (0)