Skip to content

Commit 277c0b7

Browse files
authored
[CPU][ARM] Fix Reduce NHWC conversion (#25212)
### Details: 2 issues have been fixed - ACL supports tensor rank up to 4 for both Reduce kernels - NEReduceMean and NEReductionOperation (https://github.com/ARM-software/ComputeLibrary/blob/505adb91d40e05b3f80a075a4467a78a253395e1/src/runtime/NEON/functions/NEReductionOperation.cpp#L78) The fix checks axis for both NEReduceMean and NEReductionOperation. - `axisCast` method is updated to support NDHWC to NCDHW and vice versa conversion. ### Tickets: - *ticket-id*
1 parent 2805775 commit 277c0b7

File tree

6 files changed

+129
-33
lines changed

6 files changed

+129
-33
lines changed

src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.hpp

+12-9
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,17 @@ class AclReduceExecutorBuilder : public ReduceExecutorBuilder {
6969
" dimensions maximum. src[0] shape rank is ", srcDescs[0]->getShape().getRank());
7070
return false;
7171
}
72-
if (reduceAttrs.operation == Algorithm::ReduceMean) {
73-
arm_compute::Coordinates axesMean;
74-
for (size_t i = 0; i < reduceAttrs.axes.size(); ++i) {
75-
auto axe = axisCast(reduceAttrs.axes[i], srcDescs[0]->getShape().getRank());
76-
if (axe > 3) {
77-
DEBUG_LOG("ACL supports tensor rank up to 4 for ReduceMean operation. Tensor rank: ", axe);
78-
return false;
79-
}
72+
auto srcShapeRank = srcDescs[0]->getShape().getRank();
73+
bool hasSrcNspcLayout = srcDescs[0]->hasLayoutType(LayoutType::nspc);
74+
for (size_t i = 0; i < reduceAttrs.axes.size(); ++i) {
75+
int axis = axisCast(reduceAttrs.axes[i], srcShapeRank, hasSrcNspcLayout ? NHWC_TO_NCHW : NO_LAYOUT_CONVERSION);
76+
if (axis == -1) {
77+
DEBUG_LOG("Layout conversion to NHWC has failed");
78+
return false;
79+
}
80+
if (axis > 3) {
81+
DEBUG_LOG("ACL supports reduction axis 0, 1, 2, 3. Unsupported reduction axis specified: ", axis);
82+
return false;
8083
}
8184
}
8285
if ((reduceAttrs.operation == Algorithm::ReduceSum ||
@@ -97,4 +100,4 @@ class AclReduceExecutorBuilder : public ReduceExecutorBuilder {
97100
};
98101

99102
} // namespace intel_cpu
100-
} // namespace ov
103+
} // namespace ov

src/plugins/intel_cpu/src/nodes/executors/acl/acl_utils.hpp

+14-5
Original file line numberDiff line numberDiff line change
@@ -82,17 +82,26 @@ enum ACLAxisCastMode {
8282
*/
8383
inline int axisCast(const std::size_t axis, const std::size_t shapeSize, ACLAxisCastMode axisCastMode = NO_LAYOUT_CONVERSION) {
8484
// CWHN (reverted NHWC) (0, 1, 2, 3) into WHCN (reverted NCHW) (1, 2, 0, 3)
85-
static std::vector<size_t> nhwcToNchw = {1, 2, 0, 3};
85+
static const std::array<size_t, 4> nhwcToNchw = {1, 2, 0, 3};
8686
// WHCN (reverted NCHW) (0, 1, 2, 3) into CWHN (reverted NHWC) (2, 0, 1, 3)
87-
static std::vector<size_t> nchwToNhwc = {2, 0, 1, 3};
87+
static const std::array<size_t, 4> nchwToNhwc = {2, 0, 1, 3};
88+
// CWHDN (reverted NDHWC) (0, 1, 2, 3, 4) into WHDCN (reverted NCDHW) (1, 2, 3, 0, 4)
89+
static const std::array<size_t, 5> ndhwcToNcdhw = {1, 2, 3, 0, 4};
90+
// WHDCN (reverted NCDHW) (0, 1, 2, 3, 4) into CWHDN (reverted NDHWC) (3, 0, 1, 2, 4)
91+
static const std::array<size_t, 5> ncdhwToNdhwc = {3, 0, 1, 2, 4};
92+
8893
size_t revertedAxis = shapeSize - axis - 1;
8994
switch (axisCastMode) {
95+
case NO_LAYOUT_CONVERSION:
96+
return revertedAxis;
9097
case NHWC_TO_NCHW:
91-
return revertedAxis > 3 ? -1 : nhwcToNchw[revertedAxis];
98+
if (shapeSize == 4) return nhwcToNchw[revertedAxis];
99+
if (shapeSize == 5) return ndhwcToNcdhw[revertedAxis];
92100
case NCHW_TO_NHWC:
93-
return revertedAxis > 3 ? -1 : nchwToNhwc[revertedAxis];
101+
if (shapeSize == 4) return nchwToNhwc[revertedAxis];
102+
if (shapeSize == 5) return ncdhwToNdhwc[revertedAxis];
94103
default:
95-
return revertedAxis;
104+
return -1;
96105
}
97106
}
98107

src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/reduce.cpp

+3-8
Original file line numberDiff line numberDiff line change
@@ -139,11 +139,9 @@ void ReduceCPULayerTest::SetUp() {
139139

140140
function = makeNgraphFunction(netPrecision, params, reduce, "Reduce");
141141

142-
if (ov::with_cpu_x86_avx512_core_amx()) {
143-
if (netPrecision == ov::element::f32 && configuration.count(ov::hint::inference_precision.name()) &&
144-
configuration.at(ov::hint::inference_precision.name()) == ov::element::f16) {
145-
abs_threshold = 5e-3;
146-
}
142+
if (netPrecision == ov::element::f32 && configuration.count(ov::hint::inference_precision.name()) &&
143+
configuration.at(ov::hint::inference_precision.name()) == ov::element::f16) {
144+
abs_threshold = 5e-3;
147145
}
148146
}
149147

@@ -254,10 +252,7 @@ const std::vector<std::map<std::string, ov::element::Type>> additionalConfig() {
254252
static const std::vector<std::map<std::string, ov::element::Type>> additionalConfig = {
255253
{{ov::hint::inference_precision.name(), ov::element::f32}},
256254
{{ov::hint::inference_precision.name(), ov::element::bf16}},
257-
// ARM doesn't support FP16 for now
258-
#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
259255
{{ov::hint::inference_precision.name(), ov::element::f16}},
260-
#endif
261256
};
262257
return additionalConfig;
263258
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
// Copyright (C) 2024 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#include "custom/single_layer_tests/classes/reduce.hpp"
6+
#include "utils/cpu_test_utils.hpp"
7+
#include "utils/fusing_test_utils.hpp"
8+
#include "ov_lpt_models/common/builders.hpp"
9+
#include "common_test_utils/node_builders/fake_quantize.hpp"
10+
11+
using namespace CPUTestUtils;
12+
13+
namespace ov {
14+
namespace test {
15+
namespace Reduce {
16+
namespace {
17+
18+
std::vector<std::vector<ov::test::InputShape>> inputShapes_5D = {
19+
{{{}, {{2, 19, 2, 2, 9}}}},
20+
};
21+
22+
const std::vector<std::vector<int>> axes5D = {
23+
{2, 4},
24+
{1, 2, 4},
25+
};
26+
27+
std::vector<CPUSpecificParams> cpuParams_5D = {
28+
CPUSpecificParams({ndhwc}, {ndhwc}, {}, {}),
29+
CPUSpecificParams({ncdhw}, {ncdhw}, {}, {}),
30+
};
31+
32+
const auto params_MultiAxis_5D = testing::Combine(
33+
testing::Combine(
34+
testing::ValuesIn(axes5D),
35+
testing::Values(ov::test::utils::OpType::VECTOR),
36+
testing::Values(true),
37+
testing::ValuesIn(reductionTypes()),
38+
testing::ValuesIn(inpOutPrc()),
39+
testing::Values(ElementType::undefined),
40+
testing::Values(ElementType::undefined),
41+
testing::ValuesIn(inputShapes_5D)),
42+
testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)),
43+
testing::Values(emptyFusingSpec),
44+
testing::ValuesIn(additionalConfig()));
45+
46+
const std::vector<std::vector<int>> axes5D_ref = {
47+
{0}
48+
};
49+
50+
std::vector<CPUSpecificParams> cpuParams_5D_ref = {
51+
CPUSpecificParams({ncdhw}, {ncdhw}, {"ref"}, {"ref"}),
52+
};
53+
54+
std::vector<std::map<std::string, ov::element::Type>> config_infer_prec_f32 = {
55+
{{ov::hint::inference_precision.name(), ov::element::f32}}
56+
};
57+
58+
const auto params_MultiAxis_5D_ref = testing::Combine(
59+
testing::Combine(
60+
testing::ValuesIn(axes5D_ref),
61+
testing::Values(ov::test::utils::OpType::VECTOR),
62+
testing::Values(true),
63+
testing::ValuesIn(reductionTypes()),
64+
testing::ValuesIn(inpOutPrc()),
65+
testing::Values(ElementType::undefined),
66+
testing::Values(ElementType::undefined),
67+
testing::ValuesIn(inputShapes_5D)),
68+
testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D_ref)),
69+
testing::Values(emptyFusingSpec),
70+
testing::ValuesIn(config_infer_prec_f32));
71+
72+
//There are dedicated instences of smoke_Reduce_MultiAxis_5D_CPU test in arm and x64 folders
73+
//because ACL does not support 0 as reduction axis
74+
INSTANTIATE_TEST_SUITE_P(
75+
smoke_Reduce_MultiAxis_5D_CPU,
76+
ReduceCPULayerTest,
77+
params_MultiAxis_5D,
78+
ReduceCPULayerTest::getTestCaseName
79+
);
80+
81+
// Reference implementation testing of ACL unsupported case
82+
INSTANTIATE_TEST_SUITE_P(
83+
smoke_Reduce_MultiAxis_5D_CPU_ref,
84+
ReduceCPULayerTest,
85+
params_MultiAxis_5D_ref,
86+
ReduceCPULayerTest::getTestCaseName
87+
);
88+
89+
} // namespace
90+
} // namespace Reduce
91+
} // namespace test
92+
} // namespace ov

src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/reduce.cpp

-4
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,6 @@ std::vector<std::vector<ov::test::InputShape>> inputShapes_dynamic_2dims = {
2424
{{{2, 19, {1, 5}, {1, 10}}, {{2, 19, 2, 2}, {2, 19, 2, 9}}}},
2525
};
2626

27-
std::vector<std::vector<ov::test::InputShape>> inputShapes_5D = {
28-
{{{}, {{2, 19, 2, 2, 9}}}},
29-
};
30-
3127
std::vector<std::vector<ov::test::InputShape>> inputShapes_6D = {
3228
{{{}, {{2, 19, 2, 2, 2, 2}}}},
3329
};

src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/reduce.cpp

+8-7
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ std::vector<std::vector<ov::test::InputShape>> inputShapes_3D_fuse_dyn = {
2323
{{{{1, 5}, 19, {1, 10}}, {{1, 19, 2}, {1, 19, 9}, {1, 19, 2}}}},
2424
};
2525

26-
std::vector<std::vector<ov::test::InputShape>> inputShapes_5D_dyn = {
26+
std::vector<std::vector<ov::test::InputShape>> inputShapes_5D = {
27+
{{{}, {{2, 19, 2, 2, 9}}}},
2728
{{{{1, 5}, 19, {1, 5}, {1, 5}, {1, 5}}, {{2, 19, 2, 2, 2}, {2, 19, 3, 2, 2}}}},
2829
};
2930

@@ -210,7 +211,7 @@ const auto params_MultiAxis_5D = testing::Combine(
210211
testing::ValuesIn(inpOutPrc()),
211212
testing::Values(ElementType::undefined),
212213
testing::Values(ElementType::undefined),
213-
testing::ValuesIn(inputShapes_5D_dyn)),
214+
testing::ValuesIn(inputShapes_5D)),
214215
testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)),
215216
testing::Values(emptyFusingSpec),
216217
testing::ValuesIn(additionalConfig()));
@@ -238,7 +239,7 @@ const auto params_MultiAxis_5D_Hybrid = testing::Combine(
238239
testing::ValuesIn(inpOutPrc()),
239240
testing::Values(ElementType::undefined),
240241
testing::Values(ElementType::undefined),
241-
testing::ValuesIn(inputShapes_5D_dyn)),
242+
testing::ValuesIn(inputShapes_5D)),
242243
testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_5D)),
243244
testing::Values(emptyFusingSpec),
244245
testing::ValuesIn(additionalConfigFP32()));
@@ -463,7 +464,7 @@ const auto params_MultiAxis_5D_Logical = testing::Combine(
463464
testing::Values(ElementType::boolean),
464465
testing::Values(ElementType::undefined),
465466
testing::Values(ElementType::undefined),
466-
testing::ValuesIn(inputShapes_5D_dyn)),
467+
testing::ValuesIn(inputShapes_5D)),
467468
testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)),
468469
testing::Values(emptyFusingSpec),
469470
testing::ValuesIn(additionalConfigFP32()));
@@ -491,7 +492,7 @@ const auto params_MultiAxis_5D_Hybrid_Logical = testing::Combine(
491492
testing::Values(ElementType::boolean),
492493
testing::Values(ElementType::undefined),
493494
testing::Values(ElementType::undefined),
494-
testing::ValuesIn(inputShapes_5D_dyn)),
495+
testing::ValuesIn(inputShapes_5D)),
495496
testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_5D)),
496497
testing::Values(emptyFusingSpec),
497498
testing::ValuesIn(additionalConfigFP32()));
@@ -604,7 +605,7 @@ const auto params_MultiAxis_5D_fusing = testing::Combine(
604605
testing::ValuesIn(inpOutPrc()),
605606
testing::Values(ElementType::undefined),
606607
testing::Values(ElementType::undefined),
607-
testing::ValuesIn(inputShapes_5D_dyn)),
608+
testing::ValuesIn(inputShapes_5D)),
608609
testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)),
609610
testing::ValuesIn(fusingParamsSet),
610611
testing::ValuesIn(additionalConfig()));
@@ -696,7 +697,7 @@ const auto params_MultiAxis_5D_Hybrid_fusing_KeepNoDims = testing::Combine(
696697
testing::ValuesIn(inpOutPrc()),
697698
testing::Values(ElementType::undefined),
698699
testing::Values(ElementType::undefined),
699-
testing::ValuesIn(inputShapes_5D_dyn)),
700+
testing::ValuesIn(inputShapes_5D)),
700701
testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_5D)),
701702
testing::ValuesIn(fusingParamsSet_KeepNoDims),
702703
testing::ValuesIn(additionalConfigFP32()));

0 commit comments

Comments
 (0)