From b0db7415f43543cf9515a0a2be3deac3d870fdc8 Mon Sep 17 00:00:00 2001 From: xipingya Date: Thu, 11 Apr 2024 13:20:37 +0800 Subject: [PATCH 01/17] Update select prefer PD score algorithm to fix ZOOM decoder network regression issue. Signed-off-by: xipingya --- src/plugins/intel_cpu/src/node.cpp | 42 +++++- .../subgraph_tests/src/subgraph_select_pd.cpp | 125 ++++++++++++++++++ 2 files changed, 164 insertions(+), 3 deletions(-) create mode 100644 src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 60f6206818783b..28ce0ae1f2ceaa 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -266,7 +266,7 @@ void Node::selectPreferPrimitiveDescriptor(const std::vector& pr // We don't take into account constant edges since reorders on them will be executed on load network stage if (ignoreConstInputs && j > 0 && parentPtr->isConstant()) { - equalsLocalFormatCount++; + equalsLocalFormatCount += 100; continue; } @@ -282,8 +282,44 @@ void Node::selectPreferPrimitiveDescriptor(const std::vector& pr const bool isCompatible = curDesc->isCompatible(*parentDesc); - if (isCompatible) { - equalsLocalFormatCount++; + /* + Heuristics: + SShape=Shape(all elements = 1 or only 1 position is not 1) + Reorder is inserted after the SShape node, and such reorder have less cost of calculation. + If multiple inputs of a node have different layouts, try to have the reorder occur on the SShape + node. + + Select Alogrithm: + Condition 1: Precision and shape keep same. + Condition 2: Static shape. + 1: Compatible + not SShape: score=100 + 2: Compatible + SShape: score=2 + 3: Not Compatible + SShape: score=1 + 4: Others: score=0 + */ + if ((curDesc->getPrecision() == parentDesc->getPrecision()) && curDesc->getShape().isStatic() && + parentDesc->getShape().isStatic() && curDesc->getShape() == parentDesc->getShape()) { + bool isSShape = false; + auto curDims = curDesc->getShape().getDims(); + size_t noneOneNum = 0; + for (size_t d = 0; d < curDims.size(); d++) { + if (curDims[d] != 1u) { + noneOneNum++; + } + } + isSShape = (noneOneNum == 1u) || (noneOneNum == 0); + + if (isCompatible && (!isSShape)) { + equalsLocalFormatCount += 100; + } else if (isCompatible && isSShape) { + equalsLocalFormatCount += 2; + } else if (!isCompatible && isSShape) { + equalsLocalFormatCount += 1; + } + } else { + if (isCompatible) { + equalsLocalFormatCount += 100; + } } DEBUG_LOG(getName(), " pd[", i, "].inConfs[", j, "]" diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp new file mode 100644 index 00000000000000..221237fee70536 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp @@ -0,0 +1,125 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/opsets/opset8.hpp" +#include "ov_models/builders.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "test_utils/cpu_test_utils.hpp" + +namespace ov { +namespace test { + +/* + input1(f32_abcd_{1,64,32,32}) input2(f16_abcd_{1,128,1,1}) + | | + Reorder(f32_acdb_{1,64,32,32}) const Convert(f32_abcd_{1,128,1,1}) + | / | + | / | + Convolution(f32_acdb_{1,1,30,30}) Range_1520 VariadicSplit(f32_abcd_{1,64,1,1}, f32_abcd_{1,64,1,1}) + | / \ / + | / \ / + | / \ / + | / \ / + MVN(f32_acdb_{1,1,30,30}) Reorder1(f32_acdb_{1,64,1,1}) Reorder2(f32_acdb_{1,64,1,1}) + \ / / + \ / / + \ / / + \ / / + Subgraph(f32_acdb_{1,64,30,30}) + | + | + Convolution(f32_acdb_{1,1,28,28}) + | + Result + The Subgraph node have 3 inputs: they don't have same layout. + Expected: Reorder is inserted after VariadicSplit[0] and VariadicSplit[1], not inserted after MVN. + Because VariadicSplit's output layout is [1,64,1,1], it' reorder almost have calculation. +*/ + +class SubgraphSelectPD : virtual public SubgraphBaseStaticTest { +protected: + void SetUp() override { + targetDevice = ov::test::utils::DEVICE_CPU; + abs_threshold = 1e-2; + + auto type = element::f32; +#define CONST1 128 + auto input1 = std::make_shared(type, Shape{1, CONST1 / 2, 32, 32}); + input1->set_friendly_name("input1"); + auto input2 = std::make_shared(ov::element::f16, Shape{1, CONST1, 1, 1}); + input2->set_friendly_name("input2"); + + auto variadicSplit = std::make_shared( + input2, + ov::opset8::Constant::create(element::i64, Shape{1}, {1}), + ov::opset8::Constant::create(element::i64, Shape{2}, {CONST1 / 2, CONST1 / 2})); + variadicSplit->set_friendly_name("variadicSplit"); + + auto add1 = std::make_shared(variadicSplit->output(0), + ov::opset8::Constant::create(element::f16, Shape{1}, {1})); + add1->set_friendly_name("add1"); + auto convert_input1 = std::make_shared(input1, ov::element::f16); + convert_input1->set_friendly_name("convert_input1"); + auto shapeof = std::make_shared(convert_input1); + auto rankof = std::make_shared(shapeof); + auto squeeze = + std::make_shared(rankof, ov::opset8::Constant::create(element::i64, Shape{1}, {0})); + + auto range = std::make_shared(ov::opset8::Constant::create(element::i64, Shape{}, {2}), + squeeze, + ov::opset8::Constant::create(element::i64, Shape{}, {1}), + ov::element::i64); + auto create_conv = [](const std::shared_ptr& input_node) { + auto conv = + std::make_shared(input_node, + ngraph::builder::makeConstant(ov::element::f16, + Shape{1, CONST1 / 2, 3, 3}, + std::vector{}, + true, + 0.1f, + 0.9f), + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + conv->get_rt_info() = + CPUTestUtils::CPUTestsBase::makeCPUInfo({CPUTestUtils::nhwc}, {CPUTestUtils::nhwc}, {}); + return conv; + }; + auto conv1 = create_conv(convert_input1); + auto mvn = std::make_shared(conv1, range, false, 0.1, op::MVNEpsMode::INSIDE_SQRT); + auto mul = std::make_shared(add1, mvn); + auto add2 = std::make_shared(variadicSplit->output(1), mul); + auto LeakyRelu = + std::make_shared(add2, ov::opset8::Constant::create(element::f32, Shape{1}, {1})); + LeakyRelu->set_friendly_name("LeakyRelu"); + + auto conv2 = create_conv(LeakyRelu); + conv2->set_friendly_name("conv2"); + + function = std::make_shared(conv2, ParameterVector{input1, input2}); + } + + void TearDown() override { + auto runtime_function = compiledModel.get_runtime_model(); + int nodes_found = 0; + for (const auto& n : runtime_function->get_ordered_ops()) { + auto layer_type = n->get_rt_info().at(ov::exec_model_info::LAYER_TYPE).as(); + if (layer_type == "Subgraph") { + nodes_found++; + auto output_layout = n->get_rt_info().at(ov::exec_model_info::OUTPUT_LAYOUTS).as(); + // The optimal choose should be: 'nhwc'. + ASSERT_EQ(output_layout, "acdb"); + } + } + ASSERT_GT(nodes_found, 0); + } +}; + +TEST_F(SubgraphSelectPD, smoke_CompareWithRefs) { + run(); +} + +} // namespace test +} // namespace ov From 98eccf4e02a68fa5f8f0bcd2e802b607fa1342d9 Mon Sep 17 00:00:00 2001 From: xipingya Date: Thu, 11 Apr 2024 13:41:21 +0800 Subject: [PATCH 02/17] replace makeConst with ov::test::utils::deprecated::make_constant Signed-off-by: xipingya --- .../subgraph_tests/src/subgraph_select_pd.cpp | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp index 221237fee70536..0ebe71f6447933 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp @@ -2,10 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "common_test_utils/node_builders/constant.hpp" #include "openvino/opsets/opset8.hpp" -#include "ov_models/builders.hpp" #include "shared_test_classes/base/ov_subgraph.hpp" -#include "test_utils/cpu_test_utils.hpp" +#include "utils/cpu_test_utils.hpp" namespace ov { namespace test { @@ -71,18 +71,18 @@ class SubgraphSelectPD : virtual public SubgraphBaseStaticTest { ov::opset8::Constant::create(element::i64, Shape{}, {1}), ov::element::i64); auto create_conv = [](const std::shared_ptr& input_node) { - auto conv = - std::make_shared(input_node, - ngraph::builder::makeConstant(ov::element::f16, - Shape{1, CONST1 / 2, 3, 3}, - std::vector{}, - true, - 0.1f, - 0.9f), - Strides{1, 1}, - CoordinateDiff{0, 0}, - CoordinateDiff{0, 0}, - Strides{1, 1}); + auto conv = std::make_shared( + input_node, + ov::test::utils::deprecated::make_constant(ov::element::f16, + Shape{1, CONST1 / 2, 3, 3}, + std::vector{}, + true, + 0.1f, + 0.9f), + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); conv->get_rt_info() = CPUTestUtils::CPUTestsBase::makeCPUInfo({CPUTestUtils::nhwc}, {CPUTestUtils::nhwc}, {}); return conv; From 2d2ee67b027a82588c97a1adef0d81493736cee0 Mon Sep 17 00:00:00 2001 From: Xiping Yan Date: Fri, 12 Apr 2024 17:10:58 +0800 Subject: [PATCH 03/17] Update subgraph_select_pd.cpp --- .../functional/custom/subgraph_tests/src/subgraph_select_pd.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp index 0ebe71f6447933..9ecd5038d13aac 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp @@ -34,7 +34,7 @@ namespace test { Result The Subgraph node have 3 inputs: they don't have same layout. Expected: Reorder is inserted after VariadicSplit[0] and VariadicSplit[1], not inserted after MVN. - Because VariadicSplit's output layout is [1,64,1,1], it' reorder almost have calculation. + Because VariadicSplit's output layout is [1,64,1,1], it' reorder almost have no calculation. */ class SubgraphSelectPD : virtual public SubgraphBaseStaticTest { From d3d10181f77078a8fca2c61986d58dcf362c4240 Mon Sep 17 00:00:00 2001 From: Yan Date: Mon, 15 Apr 2024 09:22:35 +0800 Subject: [PATCH 04/17] Replace define with constexpr Signed-off-by: Yan --- .../custom/subgraph_tests/src/subgraph_select_pd.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp index 9ecd5038d13aac..e8270b9769a69e 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp @@ -44,16 +44,16 @@ class SubgraphSelectPD : virtual public SubgraphBaseStaticTest { abs_threshold = 1e-2; auto type = element::f32; -#define CONST1 128 - auto input1 = std::make_shared(type, Shape{1, CONST1 / 2, 32, 32}); + constexpr int const1 = 128; + auto input1 = std::make_shared(type, Shape{1, const1 / 2, 32, 32}); input1->set_friendly_name("input1"); - auto input2 = std::make_shared(ov::element::f16, Shape{1, CONST1, 1, 1}); + auto input2 = std::make_shared(ov::element::f16, Shape{1, const1, 1, 1}); input2->set_friendly_name("input2"); auto variadicSplit = std::make_shared( input2, ov::opset8::Constant::create(element::i64, Shape{1}, {1}), - ov::opset8::Constant::create(element::i64, Shape{2}, {CONST1 / 2, CONST1 / 2})); + ov::opset8::Constant::create(element::i64, Shape{2}, {const1 / 2, const1 / 2})); variadicSplit->set_friendly_name("variadicSplit"); auto add1 = std::make_shared(variadicSplit->output(0), @@ -74,7 +74,7 @@ class SubgraphSelectPD : virtual public SubgraphBaseStaticTest { auto conv = std::make_shared( input_node, ov::test::utils::deprecated::make_constant(ov::element::f16, - Shape{1, CONST1 / 2, 3, 3}, + Shape{1, const1 / 2, 3, 3}, std::vector{}, true, 0.1f, From f831ffb46ea67365a8fe962429745d0509ad39de Mon Sep 17 00:00:00 2001 From: xipingya Date: Thu, 18 Apr 2024 15:07:00 +0800 Subject: [PATCH 05/17] Fix Windows build issue. Signed-off-by: xipingya --- .../custom/subgraph_tests/src/subgraph_select_pd.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp index e8270b9769a69e..528af9896be080 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp @@ -70,11 +70,11 @@ class SubgraphSelectPD : virtual public SubgraphBaseStaticTest { squeeze, ov::opset8::Constant::create(element::i64, Shape{}, {1}), ov::element::i64); - auto create_conv = [](const std::shared_ptr& input_node) { + auto create_conv = [&](const std::shared_ptr& input_node) { auto conv = std::make_shared( input_node, ov::test::utils::deprecated::make_constant(ov::element::f16, - Shape{1, const1 / 2, 3, 3}, + Shape{1, const1 / 2u, 3, 3}, std::vector{}, true, 0.1f, From c022b3d1ff629b3003dbb77e9f441ff2fe412626 Mon Sep 17 00:00:00 2001 From: xipingya Date: Thu, 27 Jun 2024 13:32:16 +0800 Subject: [PATCH 06/17] I found 3 subgraph nodes after rebasing. Fix test fail bug. Signed-off-by: xipingya --- src/plugins/intel_cpu/src/node.cpp | 2 +- .../custom/subgraph_tests/src/subgraph_select_pd.cpp | 9 ++------- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 28ce0ae1f2ceaa..0fceacbbe6e640 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -284,7 +284,7 @@ void Node::selectPreferPrimitiveDescriptor(const std::vector& pr /* Heuristics: - SShape=Shape(all elements = 1 or only 1 position is not 1) + SShape=Scalar Shape(all elements = 1 or only 1 position is not 1) Reorder is inserted after the SShape node, and such reorder have less cost of calculation. If multiple inputs of a node have different layouts, try to have the reorder occur on the SShape node. diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp index 528af9896be080..be04898b4b5a91 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp @@ -73,12 +73,7 @@ class SubgraphSelectPD : virtual public SubgraphBaseStaticTest { auto create_conv = [&](const std::shared_ptr& input_node) { auto conv = std::make_shared( input_node, - ov::test::utils::deprecated::make_constant(ov::element::f16, - Shape{1, const1 / 2u, 3, 3}, - std::vector{}, - true, - 0.1f, - 0.9f), + ov::test::utils::make_constant(ov::element::f16, Shape{1, const1 / 2u, 3, 3}), Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, @@ -106,7 +101,7 @@ class SubgraphSelectPD : virtual public SubgraphBaseStaticTest { int nodes_found = 0; for (const auto& n : runtime_function->get_ordered_ops()) { auto layer_type = n->get_rt_info().at(ov::exec_model_info::LAYER_TYPE).as(); - if (layer_type == "Subgraph") { + if (layer_type == "Subgraph" && n->get_input_size() == 3u) { nodes_found++; auto output_layout = n->get_rt_info().at(ov::exec_model_info::OUTPUT_LAYOUTS).as(); // The optimal choose should be: 'nhwc'. From 6f3f8417da581448b2e38e7c98d77116a8299df4 Mon Sep 17 00:00:00 2001 From: xipingya Date: Fri, 12 Jul 2024 02:19:37 +0000 Subject: [PATCH 07/17] Enable "selectPreferPrimitiveDescriptorWithShape", It only works for Subgraph currently. Signed-off-by: xipingya --- src/plugins/intel_cpu/src/node.cpp | 135 ++++++++++++------ src/plugins/intel_cpu/src/node.h | 2 + src/plugins/intel_cpu/src/nodes/subgraph.cpp | 25 +++- .../subgraph_tests/src/subgraph_select_pd.cpp | 40 +++--- 4 files changed, 141 insertions(+), 61 deletions(-) diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 0fceacbbe6e640..8b535d03600722 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -266,7 +266,7 @@ void Node::selectPreferPrimitiveDescriptor(const std::vector& pr // We don't take into account constant edges since reorders on them will be executed on load network stage if (ignoreConstInputs && j > 0 && parentPtr->isConstant()) { - equalsLocalFormatCount += 100; + equalsLocalFormatCount++; continue; } @@ -281,45 +281,8 @@ void Node::selectPreferPrimitiveDescriptor(const std::vector& pr auto parentDesc = parent_spd->getConfig().outConfs[inNum].getMemDesc(); const bool isCompatible = curDesc->isCompatible(*parentDesc); - - /* - Heuristics: - SShape=Scalar Shape(all elements = 1 or only 1 position is not 1) - Reorder is inserted after the SShape node, and such reorder have less cost of calculation. - If multiple inputs of a node have different layouts, try to have the reorder occur on the SShape - node. - - Select Alogrithm: - Condition 1: Precision and shape keep same. - Condition 2: Static shape. - 1: Compatible + not SShape: score=100 - 2: Compatible + SShape: score=2 - 3: Not Compatible + SShape: score=1 - 4: Others: score=0 - */ - if ((curDesc->getPrecision() == parentDesc->getPrecision()) && curDesc->getShape().isStatic() && - parentDesc->getShape().isStatic() && curDesc->getShape() == parentDesc->getShape()) { - bool isSShape = false; - auto curDims = curDesc->getShape().getDims(); - size_t noneOneNum = 0; - for (size_t d = 0; d < curDims.size(); d++) { - if (curDims[d] != 1u) { - noneOneNum++; - } - } - isSShape = (noneOneNum == 1u) || (noneOneNum == 0); - - if (isCompatible && (!isSShape)) { - equalsLocalFormatCount += 100; - } else if (isCompatible && isSShape) { - equalsLocalFormatCount += 2; - } else if (!isCompatible && isSShape) { - equalsLocalFormatCount += 1; - } - } else { - if (isCompatible) { - equalsLocalFormatCount += 100; - } + if (isCompatible) { + equalsLocalFormatCount++; } DEBUG_LOG(getName(), " pd[", i, "].inConfs[", j, "]" @@ -352,6 +315,98 @@ void Node::selectPreferPrimitiveDescriptor(const std::vector& pr selectPrimitiveDescriptorByIndex(0); } +bool Node::checkScalarShape(const ov::PartialShape& pshape) { + int oneNum = 0; + int sz = static_cast(pshape.size()); + for (auto s : pshape) { + if (s.is_static() && s.get_length() == 1) { + oneNum++; + } + } + return oneNum >= sz - 1; +} + +void Node::selectPreferPrimitiveDescriptorWithShape(const std::vector& priority, bool ignoreConstInputs) { + for (auto& type : priority) { + int selectedPrimitive = -1; + int reorderCostScore = std::numeric_limits::max(); + for (size_t i = 0; i < getSupportedPrimitiveDescriptors().size(); i++) { + const auto& supportedPrimitiveDesc = getSupportedPrimitiveDescriptors()[i]; + const impl_desc_type supportedType = supportedPrimitiveDesc.getImplementationType(); + if (supportedType != type) { + continue; + } + + int reorderLocalCostScore = 0; + const size_t descInConfSize = supportedPrimitiveDesc.getConfig().inConfs.size(); + + if (descInConfSize > getParentEdges().size()) { + OPENVINO_THROW(getName(), + " Desc ", + i, + " with type: ", + supportedType, + " has more input ports than node: ", + descInConfSize, + " vs ", + getParentEdges().size()); + continue; + } + + for (size_t j = 0; j < descInConfSize; j++) { + auto parentEdge = getParentEdgeAt(j); + auto parentPtr = parentEdge->getParent(); + + // We don't take into account constant edges since reorders on them will be executed on load network stage + if (ignoreConstInputs && j > 0 && parentPtr->isConstant()) { + continue; + } + + auto parent_spd = parentPtr->getSelectedPrimitiveDescriptor(); + if (parent_spd != nullptr && !parent_spd->getConfig().outConfs.empty()) { + int inNum = parentEdge->getInputNum(); + if (inNum < 0 || inNum >= static_cast(parent_spd->getConfig().outConfs.size())) { + inNum = 0; + } + auto curDesc = supportedPrimitiveDesc.getConfig().inConfs[j].getMemDesc(); + auto parentDesc = parent_spd->getConfig().outConfs[inNum].getMemDesc(); + + const bool isCompatible = curDesc->isCompatible(*parentDesc); + bool isScalarShape = checkScalarShape(curDesc->getShape().toPartialShape()); + if (!isCompatible && !isScalarShape) { + reorderLocalCostScore += + ov::shape_size(curDesc->getShape().getStaticDims()); + } + + DEBUG_LOG(getName(), " pd[", i, "].inConfs[", j, "]" + " is ", (isCompatible ? "compatible" : "not compatible"), + " with parent ", parentPtr->getName(), + " outConfs[", inNum, "], reorderLocalCostScore add to ", reorderLocalCostScore); + } + } + if (reorderLocalCostScore < reorderCostScore) { + reorderCostScore = reorderLocalCostScore; + selectedPrimitive = static_cast(i); + DEBUG_LOG(getName(), " Select primitive desc: ", i, " ", supportedPrimitiveDesc); + } + } + + if (selectedPrimitive >= 0) { + selectPrimitiveDescriptorByIndex(selectedPrimitive); + return; + } + } + + OPENVINO_ASSERT(!getSupportedPrimitiveDescriptors().empty(), + "Supported primitive descriptors list is empty for node: ", + getName(), + " type: ", + NameFromType(getType())); + + // fallback. If there are no primitives from priority list just select a first + selectPrimitiveDescriptorByIndex(0); +} + bool Node::canBeInPlace() const { // TODO [DS]: enable inPlace for dynamic shapes if (isDynamicNode()) { diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h index d442c0280ab03c..4fab58b97a9c27 100644 --- a/src/plugins/intel_cpu/src/node.h +++ b/src/plugins/intel_cpu/src/node.h @@ -708,6 +708,8 @@ class Node { friend class GraphOptimizer; void selectPreferPrimitiveDescriptor(const std::vector& priority, bool ignoreConstInputs); + void selectPreferPrimitiveDescriptorWithShape(const std::vector& priority, bool ignoreConstInputs); + bool checkScalarShape(const ov::PartialShape& pshape); bool isConfigDefined(const NodeConfig &config) const; virtual bool canBeInPlace() const; diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp index eac50bf04dbd82..7d6e49a8886453 100644 --- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp +++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp @@ -514,7 +514,30 @@ void Subgraph::initSupportedPrimitiveDescriptors() { } void Subgraph::selectOptimalPrimitiveDescriptor() { - selectPreferPrimitiveDescriptor(getImplPriority(), true); + selectPreferPrimitiveDescriptorWithShape(getImplPriority(), true); + // if (getName() == "PRelu_658") { + // std::cout << "== this->getTypeStr()=" << this->getTypeStr() << ", " << getName() << std::endl; + // auto parent_size = this->getParentEdges().size(); + // for (size_t p = 0; p < parent_size; p++) { + // auto parentEdge = getParentEdgeAt(p); + // auto parentPtr = parentEdge->getParent(); + // auto parent_spd = parentPtr->getSelectedPrimitiveDescriptor(); + // auto parentDesc = parent_spd->getConfig().outConfs[0].getMemDesc(); + // if (parentDesc->getShape().isStatic()) { + // std::cout << " " << p << " " << *parentDesc << std::endl; + // } + // } + + // if (std::getenv("WITHSHAPE")) { + // std::cout << "== WITHSHAPE\n"; + // selectPreferPrimitiveDescriptorWithShape(getImplPriority(), true); + // } else { + // selectPreferPrimitiveDescriptor(getImplPriority(), true); + // } + // std::cout << " selected:" << *(this->getSelectedPrimitiveDescriptor()) << std::endl; + // } else { + // selectPreferPrimitiveDescriptor(getImplPriority(), true); + // } } ov::element::Type Subgraph::getRuntimePrecision() const { diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp index be04898b4b5a91..edbb8b8ef8832a 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp @@ -41,13 +41,13 @@ class SubgraphSelectPD : virtual public SubgraphBaseStaticTest { protected: void SetUp() override { targetDevice = ov::test::utils::DEVICE_CPU; - abs_threshold = 1e-2; + abs_threshold = 2e-2; auto type = element::f32; - constexpr int const1 = 128; - auto input1 = std::make_shared(type, Shape{1, const1 / 2, 32, 32}); + constexpr int const1 = 32; + auto input1 = std::make_shared(type, Shape{1, const1 / 2, 8, 8}); input1->set_friendly_name("input1"); - auto input2 = std::make_shared(ov::element::f16, Shape{1, const1, 1, 1}); + auto input2 = std::make_shared(type, Shape{1, const1, 1, 1}); input2->set_friendly_name("input2"); auto variadicSplit = std::make_shared( @@ -57,11 +57,9 @@ class SubgraphSelectPD : virtual public SubgraphBaseStaticTest { variadicSplit->set_friendly_name("variadicSplit"); auto add1 = std::make_shared(variadicSplit->output(0), - ov::opset8::Constant::create(element::f16, Shape{1}, {1})); + ov::opset8::Constant::create(type, Shape{1}, {0})); add1->set_friendly_name("add1"); - auto convert_input1 = std::make_shared(input1, ov::element::f16); - convert_input1->set_friendly_name("convert_input1"); - auto shapeof = std::make_shared(convert_input1); + auto shapeof = std::make_shared(input1); auto rankof = std::make_shared(shapeof); auto squeeze = std::make_shared(rankof, ov::opset8::Constant::create(element::i64, Shape{1}, {0})); @@ -71,26 +69,28 @@ class SubgraphSelectPD : virtual public SubgraphBaseStaticTest { ov::opset8::Constant::create(element::i64, Shape{}, {1}), ov::element::i64); auto create_conv = [&](const std::shared_ptr& input_node) { + ov::test::utils::InputGenerateData in_gen_data(0, 1); auto conv = std::make_shared( input_node, - ov::test::utils::make_constant(ov::element::f16, Shape{1, const1 / 2u, 3, 3}), + ov::test::utils::make_constant(type, Shape{1, const1 / 2u, 3, 3}, ov::test::utils::InputGenerateData(0, 1)), Strides{1, 1}, - CoordinateDiff{0, 0}, - CoordinateDiff{0, 0}, + CoordinateDiff{1, 1}, + CoordinateDiff{1, 1}, Strides{1, 1}); conv->get_rt_info() = CPUTestUtils::CPUTestsBase::makeCPUInfo({CPUTestUtils::nhwc}, {CPUTestUtils::nhwc}, {}); return conv; }; - auto conv1 = create_conv(convert_input1); - auto mvn = std::make_shared(conv1, range, false, 0.1, op::MVNEpsMode::INSIDE_SQRT); - auto mul = std::make_shared(add1, mvn); + auto create_relu = [&](const std::shared_ptr& input_node) { + return std::make_shared(input_node, + ov::opset8::Constant::create(element::f32, Shape{1}, {1})); + }; + auto conv1 = create_conv(input1); + auto mvn = + std::make_shared(create_relu(conv1), range, false, 0.1, op::MVNEpsMode::INSIDE_SQRT); + auto mul = std::make_shared(create_relu(add1), mvn); auto add2 = std::make_shared(variadicSplit->output(1), mul); - auto LeakyRelu = - std::make_shared(add2, ov::opset8::Constant::create(element::f32, Shape{1}, {1})); - LeakyRelu->set_friendly_name("LeakyRelu"); - - auto conv2 = create_conv(LeakyRelu); + auto conv2 = create_conv(create_relu(add2)); conv2->set_friendly_name("conv2"); function = std::make_shared(conv2, ParameterVector{input1, input2}); @@ -101,7 +101,7 @@ class SubgraphSelectPD : virtual public SubgraphBaseStaticTest { int nodes_found = 0; for (const auto& n : runtime_function->get_ordered_ops()) { auto layer_type = n->get_rt_info().at(ov::exec_model_info::LAYER_TYPE).as(); - if (layer_type == "Subgraph" && n->get_input_size() == 3u) { + if (layer_type == "Subgraph") { nodes_found++; auto output_layout = n->get_rt_info().at(ov::exec_model_info::OUTPUT_LAYOUTS).as(); // The optimal choose should be: 'nhwc'. From bf579c9b08bcf555ae18ee9b31cee35938aa93fd Mon Sep 17 00:00:00 2001 From: xipingya Date: Fri, 12 Jul 2024 02:28:17 +0000 Subject: [PATCH 08/17] Remove debug log. Signed-off-by: xipingya --- src/plugins/intel_cpu/src/nodes/subgraph.cpp | 23 -------------------- 1 file changed, 23 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp index 7d6e49a8886453..61a9c8fc51db5f 100644 --- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp +++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp @@ -515,29 +515,6 @@ void Subgraph::initSupportedPrimitiveDescriptors() { void Subgraph::selectOptimalPrimitiveDescriptor() { selectPreferPrimitiveDescriptorWithShape(getImplPriority(), true); - // if (getName() == "PRelu_658") { - // std::cout << "== this->getTypeStr()=" << this->getTypeStr() << ", " << getName() << std::endl; - // auto parent_size = this->getParentEdges().size(); - // for (size_t p = 0; p < parent_size; p++) { - // auto parentEdge = getParentEdgeAt(p); - // auto parentPtr = parentEdge->getParent(); - // auto parent_spd = parentPtr->getSelectedPrimitiveDescriptor(); - // auto parentDesc = parent_spd->getConfig().outConfs[0].getMemDesc(); - // if (parentDesc->getShape().isStatic()) { - // std::cout << " " << p << " " << *parentDesc << std::endl; - // } - // } - - // if (std::getenv("WITHSHAPE")) { - // std::cout << "== WITHSHAPE\n"; - // selectPreferPrimitiveDescriptorWithShape(getImplPriority(), true); - // } else { - // selectPreferPrimitiveDescriptor(getImplPriority(), true); - // } - // std::cout << " selected:" << *(this->getSelectedPrimitiveDescriptor()) << std::endl; - // } else { - // selectPreferPrimitiveDescriptor(getImplPriority(), true); - // } } ov::element::Type Subgraph::getRuntimePrecision() const { From a77372913e3352ccd42ded70ff7ccb966141e810 Mon Sep 17 00:00:00 2001 From: xipingya Date: Mon, 15 Jul 2024 10:08:44 +0800 Subject: [PATCH 09/17] Fix CI fail. 1: Reorder itself should have low cost for scalar shape,(assume it is 1); Signed-off-by: xipingya --- src/plugins/intel_cpu/src/node.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 8b535d03600722..1ebc1312e6436a 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -373,13 +373,18 @@ void Node::selectPreferPrimitiveDescriptorWithShape(const std::vectorisCompatible(*parentDesc); bool isScalarShape = checkScalarShape(curDesc->getShape().toPartialShape()); - if (!isCompatible && !isScalarShape) { - reorderLocalCostScore += - ov::shape_size(curDesc->getShape().getStaticDims()); + if (!isCompatible) { + if (isScalarShape) { + reorderLocalCostScore += 1; + } else { + reorderLocalCostScore += + ov::shape_size(curDesc->getShape().getStaticDims()); + } } DEBUG_LOG(getName(), " pd[", i, "].inConfs[", j, "]" " is ", (isCompatible ? "compatible" : "not compatible"), + " shape is ", (isScalarShape ? "scalar" : "not scalar"), " with parent ", parentPtr->getName(), " outConfs[", inNum, "], reorderLocalCostScore add to ", reorderLocalCostScore); } From 8f6d76b052c39b729b67330f44752c4976c2a341 Mon Sep 17 00:00:00 2001 From: xipingya Date: Tue, 13 Aug 2024 06:09:54 +0000 Subject: [PATCH 10/17] Update descriptions of test. Signed-off-by: xipingya --- .../functional/custom/subgraph_tests/src/subgraph_select_pd.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp index edbb8b8ef8832a..2d44492c13c106 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/subgraph_select_pd.cpp @@ -34,7 +34,7 @@ namespace test { Result The Subgraph node have 3 inputs: they don't have same layout. Expected: Reorder is inserted after VariadicSplit[0] and VariadicSplit[1], not inserted after MVN. - Because VariadicSplit's output layout is [1,64,1,1], it' reorder almost have no calculation. + Because VariadicSplit's output layout is scalar shape([1,64,1,1]), its reorder has less computation. */ class SubgraphSelectPD : virtual public SubgraphBaseStaticTest { From eee48a2e034be28a6e6e9c4c8735ed3a95c41215 Mon Sep 17 00:00:00 2001 From: xipingya Date: Tue, 13 Aug 2024 06:12:30 +0000 Subject: [PATCH 11/17] Improve code readability, using lambda function replace loop; Signed-off-by: xipingya --- src/plugins/intel_cpu/src/node.cpp | 91 +++++++++++++++++------------- 1 file changed, 53 insertions(+), 38 deletions(-) diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 1ebc1312e6436a..57e0ea830f8a77 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -327,9 +327,50 @@ bool Node::checkScalarShape(const ov::PartialShape& pshape) { } void Node::selectPreferPrimitiveDescriptorWithShape(const std::vector& priority, bool ignoreConstInputs) { - for (auto& type : priority) { - int selectedPrimitive = -1; - int reorderCostScore = std::numeric_limits::max(); + auto loopEdges = [&](size_t i, + const size_t inputNodesNum, + const ov::intel_cpu::NodeDesc& supportedPrimitiveDesc, + int& reorderLocalCostScore) { + for (size_t j = 0; j < inputNodesNum; j++) { + auto parentEdge = getParentEdgeAt(j); + auto parentPtr = parentEdge->getParent(); + + // We don't take into account constant edges since reorders on them will be executed on load network + // stage + if (ignoreConstInputs && j > 0 && parentPtr->isConstant()) { + continue; + } + + auto parent_spd = parentPtr->getSelectedPrimitiveDescriptor(); + if (parent_spd != nullptr && !parent_spd->getConfig().outConfs.empty()) { + int inNum = parentEdge->getInputNum(); + if (inNum < 0 || inNum >= static_cast(parent_spd->getConfig().outConfs.size())) { + inNum = 0; + } + auto curDesc = supportedPrimitiveDesc.getConfig().inConfs[j].getMemDesc(); + auto parentDesc = parent_spd->getConfig().outConfs[inNum].getMemDesc(); + + const bool isCompatible = curDesc->isCompatible(*parentDesc); + bool isScalarShape = checkScalarShape(curDesc->getShape().toPartialShape()); + if (!isCompatible) { + if (isScalarShape) { + reorderLocalCostScore += 1; + } else { + reorderLocalCostScore += + ov::shape_size(curDesc->getShape().getStaticDims()); + } + } + + DEBUG_LOG(getName(), " pd[", i, "].inConfs[", j, "]" + " is ", (isCompatible ? "compatible" : "not compatible"), + " shape is ", (isScalarShape ? "scalar" : "not scalar"), + " with parent ", parentPtr->getName(), + " outConfs[", inNum, "], reorderLocalCostScore add to ", reorderLocalCostScore); + } + } + }; + + auto loopSPD = [&](const impl_desc_type type, int& selectedPrimitive, int& reorderCostScore) { for (size_t i = 0; i < getSupportedPrimitiveDescriptors().size(); i++) { const auto& supportedPrimitiveDesc = getSupportedPrimitiveDescriptors()[i]; const impl_desc_type supportedType = supportedPrimitiveDesc.getImplementationType(); @@ -353,48 +394,22 @@ void Node::selectPreferPrimitiveDescriptorWithShape(const std::vectorgetParent(); - - // We don't take into account constant edges since reorders on them will be executed on load network stage - if (ignoreConstInputs && j > 0 && parentPtr->isConstant()) { - continue; - } - - auto parent_spd = parentPtr->getSelectedPrimitiveDescriptor(); - if (parent_spd != nullptr && !parent_spd->getConfig().outConfs.empty()) { - int inNum = parentEdge->getInputNum(); - if (inNum < 0 || inNum >= static_cast(parent_spd->getConfig().outConfs.size())) { - inNum = 0; - } - auto curDesc = supportedPrimitiveDesc.getConfig().inConfs[j].getMemDesc(); - auto parentDesc = parent_spd->getConfig().outConfs[inNum].getMemDesc(); - - const bool isCompatible = curDesc->isCompatible(*parentDesc); - bool isScalarShape = checkScalarShape(curDesc->getShape().toPartialShape()); - if (!isCompatible) { - if (isScalarShape) { - reorderLocalCostScore += 1; - } else { - reorderLocalCostScore += - ov::shape_size(curDesc->getShape().getStaticDims()); - } - } + loopEdges(i, descInConfSize, supportedPrimitiveDesc, reorderLocalCostScore); - DEBUG_LOG(getName(), " pd[", i, "].inConfs[", j, "]" - " is ", (isCompatible ? "compatible" : "not compatible"), - " shape is ", (isScalarShape ? "scalar" : "not scalar"), - " with parent ", parentPtr->getName(), - " outConfs[", inNum, "], reorderLocalCostScore add to ", reorderLocalCostScore); - } - } if (reorderLocalCostScore < reorderCostScore) { reorderCostScore = reorderLocalCostScore; selectedPrimitive = static_cast(i); DEBUG_LOG(getName(), " Select primitive desc: ", i, " ", supportedPrimitiveDesc); } } + }; + + // loop kernel priority + for (auto& type : priority) { + int selectedPrimitive = -1; + int reorderCostScore = std::numeric_limits::max(); + + loopSPD(type, selectedPrimitive, reorderCostScore); if (selectedPrimitive >= 0) { selectPrimitiveDescriptorByIndex(selectedPrimitive); From 61a23f05f5256e8df596e3691826b6abb97002f7 Mon Sep 17 00:00:00 2001 From: xipingya Date: Thu, 22 Aug 2024 02:57:06 +0000 Subject: [PATCH 12/17] Update lambda function name. Signed-off-by: xipingya --- src/plugins/intel_cpu/src/node.cpp | 47 ++++++++++++++---------------- src/plugins/intel_cpu/src/node.h | 2 +- 2 files changed, 23 insertions(+), 26 deletions(-) diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 1e40fbb7b0d65d..c40c0fe2ca4515 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -315,22 +315,21 @@ void Node::selectPreferPrimitiveDescriptor(const std::vector& pr selectPrimitiveDescriptorByIndex(0); } -bool Node::checkScalarShape(const ov::PartialShape& pshape) { - int oneNum = 0; +bool Node::isScalarShape(const ov::PartialShape& pshape) { + int value_1_num = 0; int sz = static_cast(pshape.size()); for (auto s : pshape) { if (s.is_static() && s.get_length() == 1) { - oneNum++; + value_1_num++; } } - return oneNum >= sz - 1; + return value_1_num >= sz - 1; } void Node::selectPreferPrimitiveDescriptorWithShape(const std::vector& priority, bool ignoreConstInputs) { - auto loopEdges = [&](size_t i, - const size_t inputNodesNum, - const ov::intel_cpu::NodeDesc& supportedPrimitiveDesc, - int& reorderLocalCostScore) { + auto estimateReorderOverhead = [&](const ov::intel_cpu::NodeDesc& supportedPrimitiveDesc, size_t i) { + int estimate = 0; + auto inputNodesNum = supportedPrimitiveDesc.getConfig().inConfs.size(); for (size_t j = 0; j < inputNodesNum; j++) { auto parentEdge = getParentEdgeAt(j); auto parentPtr = parentEdge->getParent(); @@ -351,26 +350,28 @@ void Node::selectPreferPrimitiveDescriptorWithShape(const std::vectorgetConfig().outConfs[inNum].getMemDesc(); const bool isCompatible = curDesc->isCompatible(*parentDesc); - bool isScalarShape = checkScalarShape(curDesc->getShape().toPartialShape()); + bool scalarShape = isScalarShape(curDesc->getShape().toPartialShape()); if (!isCompatible) { - if (isScalarShape) { - reorderLocalCostScore += 1; + if (scalarShape) { + estimate += 1; } else { - reorderLocalCostScore += - ov::shape_size(curDesc->getShape().getStaticDims()); + estimate += ov::shape_size(curDesc->getShape().getStaticDims()); } } DEBUG_LOG(getName(), " pd[", i, "].inConfs[", j, "]" " is ", (isCompatible ? "compatible" : "not compatible"), - " shape is ", (isScalarShape ? "scalar" : "not scalar"), + " shape is ", (scalarShape ? "scalar" : "not scalar"), " with parent ", parentPtr->getName(), - " outConfs[", inNum, "], reorderLocalCostScore add to ", reorderLocalCostScore); + " outConfs[", inNum, "], estimate add to ", estimate); } } + return estimate; }; - auto loopSPD = [&](const impl_desc_type type, int& selectedPrimitive, int& reorderCostScore) { + auto selectSPDwithType = [&](const impl_desc_type type) { + int selectedPrimitive = -1; + int reorderCostScore = std::numeric_limits::max(); for (size_t i = 0; i < getSupportedPrimitiveDescriptors().size(); i++) { const auto& supportedPrimitiveDesc = getSupportedPrimitiveDescriptors()[i]; const impl_desc_type supportedType = supportedPrimitiveDesc.getImplementationType(); @@ -378,7 +379,6 @@ void Node::selectPreferPrimitiveDescriptorWithShape(const std::vector getParentEdges().size()) { @@ -394,23 +394,20 @@ void Node::selectPreferPrimitiveDescriptorWithShape(const std::vector(i); DEBUG_LOG(getName(), " Select primitive desc: ", i, " ", supportedPrimitiveDesc); } } + return selectedPrimitive; }; // loop kernel priority for (auto& type : priority) { - int selectedPrimitive = -1; - int reorderCostScore = std::numeric_limits::max(); - - loopSPD(type, selectedPrimitive, reorderCostScore); - + int selectedPrimitive = selectSPDwithType(type); if (selectedPrimitive >= 0) { selectPrimitiveDescriptorByIndex(selectedPrimitive); return; diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h index 289d984fcc686a..408e5d08ced069 100644 --- a/src/plugins/intel_cpu/src/node.h +++ b/src/plugins/intel_cpu/src/node.h @@ -709,7 +709,7 @@ class Node { void selectPreferPrimitiveDescriptor(const std::vector& priority, bool ignoreConstInputs); void selectPreferPrimitiveDescriptorWithShape(const std::vector& priority, bool ignoreConstInputs); - bool checkScalarShape(const ov::PartialShape& pshape); + bool isScalarShape(const ov::PartialShape& pshape); bool isConfigDefined(const NodeConfig &config) const; virtual bool canBeInPlace() const; From c218d23ceae091f1a14e2b2e068852e7886ca78c Mon Sep 17 00:00:00 2001 From: xipingya Date: Thu, 22 Aug 2024 03:05:39 +0000 Subject: [PATCH 13/17] Rename reorderCostScore to bestEstimate Signed-off-by: xipingya --- src/plugins/intel_cpu/src/node.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 07027baf6b9c4b..cd26fee3b08c6d 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -371,7 +371,7 @@ void Node::selectPreferPrimitiveDescriptorWithShape(const std::vector::max(); + int bestEstimate = std::numeric_limits::max(); for (size_t i = 0; i < getSupportedPrimitiveDescriptors().size(); i++) { const auto& supportedPrimitiveDesc = getSupportedPrimitiveDescriptors()[i]; const impl_desc_type supportedType = supportedPrimitiveDesc.getImplementationType(); @@ -396,8 +396,8 @@ void Node::selectPreferPrimitiveDescriptorWithShape(const std::vector(i); DEBUG_LOG(getName(), " Select primitive desc: ", i, " ", supportedPrimitiveDesc); } From a0f30811fe8b265ad7c28074dffc71d22e22c6bd Mon Sep 17 00:00:00 2001 From: xipingya Date: Tue, 27 Aug 2024 01:54:06 +0000 Subject: [PATCH 14/17] Remove scalar shape check, replace with isReorderRequired Signed-off-by: xipingya --- src/plugins/intel_cpu/src/node.cpp | 14 ++++++++++---- src/plugins/intel_cpu/src/node.h | 3 ++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index cd26fee3b08c6d..44de9342580456 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -315,7 +315,7 @@ void Node::selectPreferPrimitiveDescriptor(const std::vector& pr selectPrimitiveDescriptorByIndex(0); } -bool Node::isScalarShape(const ov::PartialShape& pshape) { +bool Node::isOneDimShape(const ov::PartialShape& pshape) { int value_1_num = 0; int sz = static_cast(pshape.size()); for (auto s : pshape) { @@ -326,6 +326,13 @@ bool Node::isScalarShape(const ov::PartialShape& pshape) { return value_1_num >= sz - 1; } +bool Node::isReorderRequired(ov::intel_cpu::MemoryDescPtr desc1, ov::intel_cpu::MemoryDescPtr desc2) { + bool samePrec = desc1->getPrecision() == desc2->getPrecision(); + bool isOneDimShape1 = isOneDimShape(desc1->getShape().toPartialShape()); + bool isOneDimShape2 = isOneDimShape(desc2->getShape().toPartialShape()); + return !(isOneDimShape1 && isOneDimShape2 && samePrec); +} + void Node::selectPreferPrimitiveDescriptorWithShape(const std::vector& priority, bool ignoreConstInputs) { auto estimateReorderOverhead = [&](const ov::intel_cpu::NodeDesc& supportedPrimitiveDesc, size_t i) { int estimate = 0; @@ -350,9 +357,8 @@ void Node::selectPreferPrimitiveDescriptorWithShape(const std::vectorgetConfig().outConfs[inNum].getMemDesc(); const bool isCompatible = curDesc->isCompatible(*parentDesc); - bool scalarShape = isScalarShape(curDesc->getShape().toPartialShape()); if (!isCompatible) { - if (scalarShape) { + if (!isReorderRequired(parentDesc, curDesc)) { estimate += 1; } else { estimate += ov::shape_size(curDesc->getShape().getStaticDims()); @@ -361,7 +367,7 @@ void Node::selectPreferPrimitiveDescriptorWithShape(const std::vectorgetShape().toPartialShape()) ? "one dim shape" : "not one dim shape"), " with parent ", parentPtr->getName(), " outConfs[", inNum, "], estimate add to ", estimate); } diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h index 9611de4acc6854..a8709df4ece962 100644 --- a/src/plugins/intel_cpu/src/node.h +++ b/src/plugins/intel_cpu/src/node.h @@ -708,7 +708,8 @@ class Node { void selectPreferPrimitiveDescriptor(const std::vector& priority, bool ignoreConstInputs); void selectPreferPrimitiveDescriptorWithShape(const std::vector& priority, bool ignoreConstInputs); - bool isScalarShape(const ov::PartialShape& pshape); + bool isOneDimShape(const ov::PartialShape& pshape); + bool isReorderRequired(ov::intel_cpu::MemoryDescPtr desc1, ov::intel_cpu::MemoryDescPtr desc2); bool isConfigDefined(const NodeConfig &config) const; virtual bool canBeInPlace() const; From b5312b1421f5be1e23ca880da7ddc31c51e6c94d Mon Sep 17 00:00:00 2001 From: xipingya Date: Mon, 9 Sep 2024 13:15:12 +0800 Subject: [PATCH 15/17] fix potential dynamic case exception issue, just call getMinDims --- src/plugins/intel_cpu/src/node.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index cb0889b4770624..ee6782a4ca049d 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -361,7 +361,7 @@ void Node::selectPreferPrimitiveDescriptorWithShape(const std::vector(curDesc->getShape().getStaticDims()); + estimate += ov::shape_size(curDesc->getShape().getMinDims()); } } From a044a5a644c856ddbabfd66ad797671dc48d3427 Mon Sep 17 00:00:00 2001 From: xipingya Date: Wed, 25 Sep 2024 14:01:24 +0800 Subject: [PATCH 16/17] Still adopt old logical if current node has dynamic input. Signed-off-by: xipingya --- src/plugins/intel_cpu/src/node.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index ee6782a4ca049d..f7ac63a7e8868e 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -334,6 +334,13 @@ bool Node::isReorderRequired(ov::intel_cpu::MemoryDescPtr desc1, ov::intel_cpu:: } void Node::selectPreferPrimitiveDescriptorWithShape(const std::vector& priority, bool ignoreConstInputs) { + // Filter out dynamic shape. + for (size_t i = 0; i < this->getOriginalInputsNumber(); i++) { + if (this->getInputShapeAtPort(i).isDynamic()) { + return selectPreferPrimitiveDescriptor(priority, ignoreConstInputs); + } + } + auto estimateReorderOverhead = [&](const ov::intel_cpu::NodeDesc& supportedPrimitiveDesc, size_t i) { int estimate = 0; auto inputNodesNum = supportedPrimitiveDesc.getConfig().inConfs.size(); From 16ae796988bc45647372d57e8b52fe719f2dead2 Mon Sep 17 00:00:00 2001 From: xipingya Date: Thu, 26 Sep 2024 13:47:04 +0800 Subject: [PATCH 17/17] call isDynamic directly. Signed-off-by: xipingya --- src/plugins/intel_cpu/src/node.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index f7ac63a7e8868e..21f38743d32679 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -335,10 +335,8 @@ bool Node::isReorderRequired(ov::intel_cpu::MemoryDescPtr desc1, ov::intel_cpu:: void Node::selectPreferPrimitiveDescriptorWithShape(const std::vector& priority, bool ignoreConstInputs) { // Filter out dynamic shape. - for (size_t i = 0; i < this->getOriginalInputsNumber(); i++) { - if (this->getInputShapeAtPort(i).isDynamic()) { - return selectPreferPrimitiveDescriptor(priority, ignoreConstInputs); - } + if (isDynamic) { + return selectPreferPrimitiveDescriptor(priority, ignoreConstInputs); } auto estimateReorderOverhead = [&](const ov::intel_cpu::NodeDesc& supportedPrimitiveDesc, size_t i) {