Skip to content

Commit 0c598f4

Browse files
[CPU] only allow per-oc or per-tensor FQ fusing into FC (openvinotoolkit#25530)
### Details: - Add a check to reject non-supported FakeQuantize from fusing into FC node, so they can run in standalone mode w/o causing exceptions when composing oneDNN postOps. - port from openvinotoolkit#23009 - add test case ### Tickets: - *CVS-131890* --------- Signed-off-by: HU Yuan2 <yuan2.hu@intel.com> Co-authored-by: Li, Tingqian <tingqian.li@intel.com>
1 parent b9d98cb commit 0c598f4

File tree

4 files changed

+86
-1
lines changed

4 files changed

+86
-1
lines changed

src/plugins/intel_cpu/src/nodes/fullyconnected.cpp

+21
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
#include "utils/debug_capabilities.h"
2626
#include "utils/general_utils.h"
2727

28+
#include "fake_quantize.h"
29+
2830
using namespace dnnl;
2931
using namespace ov::element;
3032

@@ -94,6 +96,25 @@ bool FullyConnected::canFuse(const NodePtr& node) const {
9496
#if defined(OV_CPU_WITH_SHL)
9597
return false;
9698
#endif
99+
if (node->getType() == Type::FakeQuantize) {
100+
auto* fq = dynamic_cast<FakeQuantize*>(node.get());
101+
if (fq->getBroadcastingPolicy() != FakeQuantize::BroadcastingPolicy::PerTensor) {
102+
const auto& dstShape = getOutputShapeAtPort(0);
103+
auto dataRanks = dstShape.getRank();
104+
// only per-OC or per-Tensor fakequantize can be postOps
105+
if (fq->getAxis() != dataRanks - 1) {
106+
DEBUG_LOG("reject FakeQuantize ",
107+
fq->getName(),
108+
"(axis=",
109+
fq->getAxis(),
110+
") from fusing into ",
111+
getName(),
112+
" with dst shape ",
113+
dstShape);
114+
return false;
115+
}
116+
}
117+
}
97118
return canFuseSimpleOperation(node);
98119
}
99120

src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/matmul.cpp

+39
Original file line numberDiff line numberDiff line change
@@ -1108,6 +1108,45 @@ INSTANTIATE_TEST_SUITE_P(
11081108
testParamsDynamicFusingFullUndefShapes,
11091109
MatMulLayerCPUTest::getTestCaseName);
11101110

1111+
class FCNotFuseFQCPUTest : public MatMulLayerCPUTest {
1112+
void SetUp() override {
1113+
MatMulLayerCPUTest::SetUp();
1114+
expectPostOpsToBeFused = false;
1115+
}
1116+
};
1117+
1118+
TEST_P(FCNotFuseFQCPUTest, CompareWithRefs) {
1119+
run();
1120+
CheckPluginRelatedResults(compiledModel, cpuNodeType);
1121+
}
1122+
1123+
const std::vector<ShapeRelatedParams>& notFuseSmoke() {
1124+
static const std::vector<ShapeRelatedParams> params = {
1125+
{static_shapes_to_test_representation({{59, 1}, {1, 120}}), {false, true}},
1126+
{static_shapes_to_test_representation({{59, 1}, {1, 120}}), {true, true}},
1127+
1128+
{static_shapes_to_test_representation({{59, 120}, {120, 1}}), {false, false}},
1129+
{static_shapes_to_test_representation({{59, 120}, {120, 1}}), {true, true}},
1130+
1131+
{static_shapes_to_test_representation({{71, 128}, {128, 20}}), {true, false}},
1132+
{static_shapes_to_test_representation({{71, 128}, {128, 20}}), {false, true}},
1133+
};
1134+
return params;
1135+
}
1136+
1137+
const auto notFuseTestParamsSmoke = ::testing::Combine(::testing::Combine(::testing::ValuesIn(notFuseSmoke()),
1138+
::testing::Values(ElementType::f32),
1139+
::testing::Values(ElementType::undefined),
1140+
::testing::Values(ElementType::undefined),
1141+
::testing::Values(utils::InputLayerType::CONSTANT),
1142+
::testing::Values(ov::test::utils::DEVICE_CPU),
1143+
::testing::Values(emptyAdditionalConfig())),
1144+
::testing::Values(MatMulNodeType::FullyConnected),
1145+
::testing::ValuesIn({fusingFakeQuantizePerBatch, fusingFakeQuantizeFullTensor}),
1146+
::testing::ValuesIn({CPUSpecificParams{{}, {}, {""}, "any_type"}}));
1147+
1148+
INSTANTIATE_TEST_SUITE_P(smoke_FC, FCNotFuseFQCPUTest, notFuseTestParamsSmoke, FCNotFuseFQCPUTest::getTestCaseName);
1149+
11111150
} // namespace
11121151
} // namespace MatMul
11131152
} // namespace test

src/plugins/intel_cpu/tests/functional/utils/fusing_test_utils.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,11 @@ void CpuTestWithFusing::CheckFusingResults(const std::shared_ptr<const ov::Model
5858
size_t pos = 0;
5959
for (const auto& fusedOp : fusedOps) {
6060
pos = originalLayersNames.find(fusedOp, checkFusingPosition ? pos : 0);
61-
ASSERT_TRUE(pos != std::string::npos) << "Fused op " << fusedOp << " has not been found!";
61+
if (expectPostOpsToBeFused) {
62+
ASSERT_TRUE(pos != std::string::npos) << "Fused op " << fusedOp << " has not been found!";
63+
} else {
64+
ASSERT_TRUE(pos == std::string::npos) << "op" << fusedOp << " should not be fused!";
65+
}
6266
}
6367
}
6468
}

src/plugins/intel_cpu/tests/functional/utils/fusing_test_utils.hpp

+21
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ class CpuTestWithFusing : public CPUTestsBase {
9393
std::shared_ptr<postOpMgr> postOpMgrPtr;
9494
std::vector<std::string> fusedOps;
9595
bool checkFusingPosition = true;
96+
bool expectPostOpsToBeFused = true;
9697
};
9798

9899
static int getChannelAxis(const ov::AxisSet &axes, bool keep_dims) {
@@ -304,6 +305,26 @@ const auto fusingFakeQuantizePerChannel = fusingSpecificParams{std::make_shared<
304305
return ov::test::utils::make_fake_quantize(cfg.input, localPrc, 256, newShape);
305306
}, "FakeQuantize(PerChannel)"}}), {"FakeQuantize"}};
306307

308+
const auto fusingFakeQuantizePerBatch = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
309+
{[](postNodeConfig& cfg){
310+
auto localPrc = cfg.input->get_element_type();
311+
const auto shape = cfg.input->get_output_partial_shape(0);
312+
ov::Shape perBatchSize(shape.size(), 1);
313+
perBatchSize[0] = shape[0].get_length();
314+
return ov::test::utils::make_fake_quantize(cfg.input, localPrc, 256, perBatchSize);
315+
}, "FakeQuantize(PerBatch)"}}), {"FakeQuantize"}};
316+
317+
const auto fusingFakeQuantizeFullTensor = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
318+
{[](postNodeConfig& cfg){
319+
auto localPrc = cfg.input->get_element_type();
320+
const auto shape = cfg.input->get_output_partial_shape(0);
321+
ov::Shape fullTensorShape(shape.size(), 1);
322+
for (size_t axis = 0; axis < shape.size(); axis++) {
323+
fullTensorShape[axis] = shape[axis].get_length();
324+
}
325+
return ov::test::utils::make_fake_quantize(cfg.input, localPrc, 256, fullTensorShape);
326+
}, "FakeQuantize(FullTensor)"}}), {"FakeQuantize"}};
327+
307328
const auto fusingFakeQuantizePerChannelRelu = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
308329
{[](postNodeConfig& cfg){
309330
auto localPrc = cfg.input->get_element_type();

0 commit comments

Comments
 (0)