Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CPU]The shape of the memory descriptor is considered in selectPreferPrimitiveDescriptor of Subgraph #23971

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
b0db741
Update select prefer PD score algorithm to fix ZOOM decoder network r…
xipingyan Apr 11, 2024
98eccf4
replace makeConst with ov::test::utils::deprecated::make_constant
xipingyan Apr 11, 2024
2d2ee67
Update subgraph_select_pd.cpp
xipingyan Apr 12, 2024
d3d1018
Replace define with constexpr
xipingyan Apr 15, 2024
f831ffb
Fix Windows build issue.
xipingyan Apr 18, 2024
c022b3d
I found 3 subgraph nodes after rebasing.
xipingyan Jun 27, 2024
6f3f841
Enable "selectPreferPrimitiveDescriptorWithShape",
xipingyan Jul 12, 2024
bf579c9
Remove debug log.
xipingyan Jul 12, 2024
99c7790
Merge branch 'master' into xp/fix_zoom_regression_to_master
xipingyan Jul 12, 2024
a773729
Fix CI fail.
xipingyan Jul 15, 2024
8f6d76b
Update descriptions of test.
xipingyan Aug 13, 2024
eee48a2
Improve code readability, using lambda function replace loop;
xipingyan Aug 13, 2024
7b6fafd
Merge branch 'master' into xp/fix_zoom_regression_to_master
xipingyan Aug 13, 2024
8af13ec
Merge branch 'master' into xp/fix_zoom_regression_to_master
xipingyan Aug 15, 2024
36805ef
Merge branch 'master' into xp/fix_zoom_regression_to_master
xipingyan Aug 16, 2024
880d4fd
Merge branch 'master' into xp/fix_zoom_regression_to_master
xipingyan Aug 16, 2024
61a23f0
Update lambda function name.
xipingyan Aug 22, 2024
52f517d
Merge branch 'master' into xp/fix_zoom_regression_to_master
xipingyan Aug 22, 2024
c218d23
Rename reorderCostScore to bestEstimate
xipingyan Aug 22, 2024
a0f3081
Remove scalar shape check, replace with isReorderRequired
xipingyan Aug 27, 2024
f6bfd4c
Merge branch 'master' into xp/fix_zoom_regression_to_master
xipingyan Sep 9, 2024
b5312b1
fix potential dynamic case exception issue, just call getMinDims
xipingyan Sep 9, 2024
a044a5a
Still adopt old logical if current node has dynamic input.
xipingyan Sep 25, 2024
16ae796
call isDynamic directly.
xipingyan Sep 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 120 additions & 1 deletion src/plugins/intel_cpu/src/node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,6 @@ void Node::selectPreferPrimitiveDescriptor(const std::vector<impl_desc_type>& pr
auto parentDesc = parent_spd->getConfig().outConfs[inNum].getMemDesc();

const bool isCompatible = curDesc->isCompatible(*parentDesc);

if (isCompatible) {
equalsLocalFormatCount++;
}
Expand Down Expand Up @@ -316,6 +315,126 @@ void Node::selectPreferPrimitiveDescriptor(const std::vector<impl_desc_type>& pr
selectPrimitiveDescriptorByIndex(0);
}

bool Node::isOneDimShape(const ov::PartialShape& pshape) {
int value_1_num = 0;
int sz = static_cast<int>(pshape.size());
for (auto s : pshape) {
if (s.is_static() && s.get_length() == 1) {
value_1_num++;
}
}
return value_1_num >= sz - 1;
}

bool Node::isReorderRequired(ov::intel_cpu::MemoryDescPtr desc1, ov::intel_cpu::MemoryDescPtr desc2) {
bool samePrec = desc1->getPrecision() == desc2->getPrecision();
bool isOneDimShape1 = isOneDimShape(desc1->getShape().toPartialShape());
bool isOneDimShape2 = isOneDimShape(desc2->getShape().toPartialShape());
return !(isOneDimShape1 && isOneDimShape2 && samePrec);
}

void Node::selectPreferPrimitiveDescriptorWithShape(const std::vector<impl_desc_type>& priority, bool ignoreConstInputs) {
// Filter out dynamic shape.
if (isDynamic) {
return selectPreferPrimitiveDescriptor(priority, ignoreConstInputs);
}

auto estimateReorderOverhead = [&](const ov::intel_cpu::NodeDesc& supportedPrimitiveDesc, size_t i) {
int estimate = 0;
auto inputNodesNum = supportedPrimitiveDesc.getConfig().inConfs.size();
for (size_t j = 0; j < inputNodesNum; j++) {
auto parentEdge = getParentEdgeAt(j);
auto parentPtr = parentEdge->getParent();

// We don't take into account constant edges since reorders on them will be executed on load network
// stage
if (ignoreConstInputs && j > 0 && parentPtr->isConstant()) {
continue;
}

auto parent_spd = parentPtr->getSelectedPrimitiveDescriptor();
if (parent_spd != nullptr && !parent_spd->getConfig().outConfs.empty()) {
int inNum = parentEdge->getInputNum();
if (inNum < 0 || inNum >= static_cast<int>(parent_spd->getConfig().outConfs.size())) {
inNum = 0;
}
auto curDesc = supportedPrimitiveDesc.getConfig().inConfs[j].getMemDesc();
auto parentDesc = parent_spd->getConfig().outConfs[inNum].getMemDesc();

const bool isCompatible = curDesc->isCompatible(*parentDesc);
if (!isCompatible) {
if (!isReorderRequired(parentDesc, curDesc)) {
estimate += 1;
} else {
estimate += ov::shape_size<ov::intel_cpu::VectorDims>(curDesc->getShape().getMinDims());
}
}

DEBUG_LOG(getName(), " pd[", i, "].inConfs[", j, "]"
" is ", (isCompatible ? "compatible" : "not compatible"),
" shape is ", (isOneDimShape(curDesc->getShape().toPartialShape()) ? "one dim shape" : "not one dim shape"),
" with parent ", parentPtr->getName(),
" outConfs[", inNum, "], estimate add to ", estimate);
}
}
return estimate;
};

auto selectSPDwithType = [&](const impl_desc_type type) {
int selectedPrimitive = -1;
int bestEstimate = std::numeric_limits<int>::max();
for (size_t i = 0; i < getSupportedPrimitiveDescriptors().size(); i++) {
const auto& supportedPrimitiveDesc = getSupportedPrimitiveDescriptors()[i];
const impl_desc_type supportedType = supportedPrimitiveDesc.getImplementationType();
if (supportedType != type) {
continue;
}

const size_t descInConfSize = supportedPrimitiveDesc.getConfig().inConfs.size();

if (descInConfSize > getParentEdges().size()) {
OPENVINO_THROW(getName(),
" Desc ",
i,
" with type: ",
supportedType,
" has more input ports than node: ",
descInConfSize,
" vs ",
getParentEdges().size());
continue;
}

auto estimate = estimateReorderOverhead(supportedPrimitiveDesc, i);

if (estimate < bestEstimate) {
bestEstimate = estimate;
selectedPrimitive = static_cast<int>(i);
DEBUG_LOG(getName(), " Select primitive desc: ", i, " ", supportedPrimitiveDesc);
}
}
return selectedPrimitive;
};

// loop kernel priority
for (auto& type : priority) {
int selectedPrimitive = selectSPDwithType(type);
if (selectedPrimitive >= 0) {
selectPrimitiveDescriptorByIndex(selectedPrimitive);
return;
}
}

OPENVINO_ASSERT(!getSupportedPrimitiveDescriptors().empty(),
"Supported primitive descriptors list is empty for node: ",
getName(),
" type: ",
NameFromType(getType()));

// fallback. If there are no primitives from priority list just select a first
selectPrimitiveDescriptorByIndex(0);
}

bool Node::canBeInPlace() const {
// TODO [DS]: enable inPlace for dynamic shapes
if (isDynamicNode()) {
Expand Down
3 changes: 3 additions & 0 deletions src/plugins/intel_cpu/src/node.h
Original file line number Diff line number Diff line change
Expand Up @@ -707,6 +707,9 @@ class Node {
friend class GraphOptimizer;

void selectPreferPrimitiveDescriptor(const std::vector<impl_desc_type>& priority, bool ignoreConstInputs);
void selectPreferPrimitiveDescriptorWithShape(const std::vector<impl_desc_type>& priority, bool ignoreConstInputs);
bool isOneDimShape(const ov::PartialShape& pshape);
bool isReorderRequired(ov::intel_cpu::MemoryDescPtr desc1, ov::intel_cpu::MemoryDescPtr desc2);
bool isConfigDefined(const NodeConfig &config) const;
virtual bool canBeInPlace() const;

Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_cpu/src/nodes/subgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,7 @@ void Subgraph::initSupportedPrimitiveDescriptors() {
}

void Subgraph::selectOptimalPrimitiveDescriptor() {
selectPreferPrimitiveDescriptor(getImplPriority(), true);
selectPreferPrimitiveDescriptorWithShape(getImplPriority(), true);
}

ov::element::Type Subgraph::getRuntimePrecision() const {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "common_test_utils/node_builders/constant.hpp"
#include "openvino/opsets/opset8.hpp"
#include "shared_test_classes/base/ov_subgraph.hpp"
#include "utils/cpu_test_utils.hpp"

namespace ov {
namespace test {

/*
input1(f32_abcd_{1,64,32,32}) input2(f16_abcd_{1,128,1,1})
| |
Reorder(f32_acdb_{1,64,32,32}) const Convert(f32_abcd_{1,128,1,1})
| / |
| / |
Convolution(f32_acdb_{1,1,30,30}) Range_1520 VariadicSplit(f32_abcd_{1,64,1,1}, f32_abcd_{1,64,1,1})
| / \ /
| / \ /
| / \ /
| / \ /
MVN(f32_acdb_{1,1,30,30}) Reorder1(f32_acdb_{1,64,1,1}) Reorder2(f32_acdb_{1,64,1,1})
\ / /
\ / /
\ / /
\ / /
Subgraph(f32_acdb_{1,64,30,30})
|
|
Convolution(f32_acdb_{1,1,28,28})
|
Result
The Subgraph node have 3 inputs: they don't have same layout.
Expected: Reorder is inserted after VariadicSplit[0] and VariadicSplit[1], not inserted after MVN.
Because VariadicSplit's output layout is scalar shape([1,64,1,1]), its reorder has less computation.
*/

class SubgraphSelectPD : virtual public SubgraphBaseStaticTest {
protected:
void SetUp() override {
targetDevice = ov::test::utils::DEVICE_CPU;
abs_threshold = 2e-2;

auto type = element::f32;
constexpr int const1 = 32;
auto input1 = std::make_shared<ov::opset8::Parameter>(type, Shape{1, const1 / 2, 8, 8});
input1->set_friendly_name("input1");
auto input2 = std::make_shared<ov::opset8::Parameter>(type, Shape{1, const1, 1, 1});
input2->set_friendly_name("input2");

auto variadicSplit = std::make_shared<ov::op::v1::VariadicSplit>(
input2,
ov::opset8::Constant::create(element::i64, Shape{1}, {1}),
ov::opset8::Constant::create(element::i64, Shape{2}, {const1 / 2, const1 / 2}));
variadicSplit->set_friendly_name("variadicSplit");

auto add1 = std::make_shared<ov::opset8::Add>(variadicSplit->output(0),
ov::opset8::Constant::create(type, Shape{1}, {0}));
add1->set_friendly_name("add1");
auto shapeof = std::make_shared<ov::opset8::ShapeOf>(input1);
auto rankof = std::make_shared<ov::opset8::ShapeOf>(shapeof);
auto squeeze =
std::make_shared<ov::opset8::Squeeze>(rankof, ov::opset8::Constant::create(element::i64, Shape{1}, {0}));

auto range = std::make_shared<ov::opset8::Range>(ov::opset8::Constant::create(element::i64, Shape{}, {2}),
squeeze,
ov::opset8::Constant::create(element::i64, Shape{}, {1}),
ov::element::i64);
auto create_conv = [&](const std::shared_ptr<ov::Node>& input_node) {
ov::test::utils::InputGenerateData in_gen_data(0, 1);
auto conv = std::make_shared<ov::opset8::Convolution>(
input_node,
ov::test::utils::make_constant(type, Shape{1, const1 / 2u, 3, 3}, ov::test::utils::InputGenerateData(0, 1)),
Strides{1, 1},
CoordinateDiff{1, 1},
CoordinateDiff{1, 1},
Strides{1, 1});
conv->get_rt_info() =
CPUTestUtils::CPUTestsBase::makeCPUInfo({CPUTestUtils::nhwc}, {CPUTestUtils::nhwc}, {});
return conv;
};
auto create_relu = [&](const std::shared_ptr<ov::Node>& input_node) {
return std::make_shared<ov::opset8::PRelu>(input_node,
ov::opset8::Constant::create(element::f32, Shape{1}, {1}));
};
auto conv1 = create_conv(input1);
auto mvn =
std::make_shared<ov::opset8::MVN>(create_relu(conv1), range, false, 0.1, op::MVNEpsMode::INSIDE_SQRT);
auto mul = std::make_shared<ov::opset8::Multiply>(create_relu(add1), mvn);
auto add2 = std::make_shared<ov::opset8::Add>(variadicSplit->output(1), mul);
auto conv2 = create_conv(create_relu(add2));
conv2->set_friendly_name("conv2");

function = std::make_shared<ov::Model>(conv2, ParameterVector{input1, input2});
}

void TearDown() override {
auto runtime_function = compiledModel.get_runtime_model();
int nodes_found = 0;
for (const auto& n : runtime_function->get_ordered_ops()) {
auto layer_type = n->get_rt_info().at(ov::exec_model_info::LAYER_TYPE).as<std::string>();
if (layer_type == "Subgraph") {
nodes_found++;
auto output_layout = n->get_rt_info().at(ov::exec_model_info::OUTPUT_LAYOUTS).as<std::string>();
// The optimal choose should be: 'nhwc'.
ASSERT_EQ(output_layout, "acdb");
}
}
ASSERT_GT(nodes_found, 0);
}
};

TEST_F(SubgraphSelectPD, smoke_CompareWithRefs) {
run();
}

} // namespace test
} // namespace ov
Loading