Skip to content

Commit 2e6d061

Browse files
authored
[CPU] [LPT] CPU limitation (openvinotoolkit#22522)
* [CPU] [LPT] CPU limitation * tests * comments fixes * tests fix * tests refactoring
1 parent db4b33c commit 2e6d061

File tree

12 files changed

+101
-49
lines changed

12 files changed

+101
-49
lines changed

src/common/low_precision_transformations/include/low_precision/layer_transformation.hpp

+1-10
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,7 @@ namespace precision_set {
4141
LP_TRANSFORMATIONS_API const std::vector<element::Type>& get_int8_support();
4242
LP_TRANSFORMATIONS_API const std::vector<element::Type>& get_int8_int16_int32_support();
4343
} // namespace precision_set
44-
enum levels : size_t {
45-
int4 = 16,
46-
int4_narrow_range = 15,
47-
int8 = 256,
48-
int8_narrow_range = 255,
49-
int16 = 65536,
50-
int16_narrow_range = 65535,
51-
int32 = size_t(4294967296), // for ARM and ia32 platforms where this number bigger than size_t but never used
52-
int32_narrow_range = 4294967295
53-
};
44+
5445
class LP_TRANSFORMATIONS_API DataPrecision {
5546
public:
5647
DataPrecision() : precision(element::undefined), min(0.f), max(0.f), hasZeroPoint(false) {}

src/common/low_precision_transformations/include/low_precision/low_precision.hpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
#include <vector>
88
#include <memory>
9+
#include <unordered_set>
910

1011
// one place to include all Low Precision Transformations from ov::pass::low_precision
1112
#include "low_precision/rt_info/intervals_alignment_attribute.hpp"
@@ -59,7 +60,7 @@ class ov::pass::low_precision::TypeRelaxedReplacer : public ov::pass::GraphRewri
5960
TypeRelaxedReplacer();
6061
};
6162

62-
class ov::pass::low_precision::LowPrecision : public ov::pass::ModelPass {
63+
class LP_TRANSFORMATIONS_API ov::pass::low_precision::LowPrecision : public ov::pass::ModelPass {
6364
public:
6465
OPENVINO_RTTI("LowPrecision", "0");
6566
LowPrecision(
@@ -68,7 +69,9 @@ class ov::pass::low_precision::LowPrecision : public ov::pass::ModelPass {
6869
const LayerTransformation::Params = LayerTransformation::Params());
6970
bool run_on_model(const std::shared_ptr<ov::Model>& m) override;
7071

71-
static bool isFunctionQuantized(const std::shared_ptr<const ov::Model>& model);
72+
static bool isFunctionQuantized(
73+
const std::shared_ptr<const ov::Model>& model,
74+
const std::set<levels>& supported_levels = all_levels);
7275
static bool isFQLevelsPresent(const std::shared_ptr<const ov::Model>& model, const std::set<size_t>& levels);
7376

7477
template <typename T, class... Args>

src/common/low_precision_transformations/include/low_precision/quantization_details.hpp

+22-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#pragma once
66

77
#include <memory>
8+
#include <unordered_set>
89
#include <ostream>
910
#include <vector>
1011

@@ -15,6 +16,24 @@ namespace ov {
1516
namespace pass {
1617
namespace low_precision {
1718

19+
enum levels : size_t {
20+
int4 = 16,
21+
int4_narrow_range = 15,
22+
int8 = 256,
23+
int8_narrow_range = 255,
24+
int16 = 65536,
25+
int16_narrow_range = 65535,
26+
int32 = size_t(4294967296), // for ARM and ia32 platforms where this number bigger than size_t but never used
27+
int32_narrow_range = 4294967295
28+
};
29+
30+
static std::set<levels> all_levels = {
31+
levels::int4, levels::int4_narrow_range,
32+
levels::int8, levels::int8_narrow_range,
33+
levels::int16, levels::int16_narrow_range,
34+
levels::int32, levels::int32_narrow_range
35+
};
36+
1837
class LP_TRANSFORMATIONS_API QuantizationDetails {
1938
public:
2039
QuantizationDetails();
@@ -50,7 +69,9 @@ class LP_TRANSFORMATIONS_API QuantizationDetails {
5069

5170
bool empty() const noexcept;
5271

53-
static bool isSupportedLevel(const size_t level);
72+
static bool isSupportedLevel(
73+
const size_t level,
74+
const std::set<levels>& supported_levels = all_levels);
5475

5576
const size_t levels;
5677
const std::vector<float> inputLowValues;

src/common/low_precision_transformations/src/low_precision.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,9 @@ bool ov::pass::low_precision::LowPrecision::run_on_model(const std::shared_ptr<o
297297
return false;
298298
}
299299

300-
bool ov::pass::low_precision::LowPrecision::isFunctionQuantized(const std::shared_ptr<const ov::Model>& model) {
300+
bool ov::pass::low_precision::LowPrecision::isFunctionQuantized(
301+
const std::shared_ptr<const ov::Model>& model,
302+
const std::set<levels>& supported_levels) {
301303
std::set<std::shared_ptr<ov::Node>> handledNodes;
302304
std::deque<std::shared_ptr<ov::Node>> nodes;
303305
for (const auto& result : model->get_results()) {
@@ -316,7 +318,7 @@ bool ov::pass::low_precision::LowPrecision::isFunctionQuantized(const std::share
316318

317319
if (const auto fakeQuantize = ov::as_type_ptr<ov::opset1::FakeQuantize>(parent)) {
318320
if (QuantizationDetails::outputLayoutIsSupported(fakeQuantize, true) &&
319-
QuantizationDetails::isSupportedLevel(fakeQuantize->get_levels())) {
321+
QuantizationDetails::isSupportedLevel(fakeQuantize->get_levels(), supported_levels)) {
320322
return true;
321323
}
322324
} else if (const auto multiSubGraph = ov::as_type_ptr<ov::op::util::MultiSubGraphOp>(parent)) {

src/common/low_precision_transformations/src/quantization_details.cpp

+4-9
Original file line numberDiff line numberDiff line change
@@ -170,15 +170,10 @@ bool QuantizationDetails::empty() const noexcept {
170170
return (levels == 0ul) && inputLowValues.empty() && inputHighValues.empty() && outputLowValues.empty() && outputHighValues.empty();
171171
}
172172

173-
bool QuantizationDetails::isSupportedLevel(const size_t level) {
174-
using ov::pass::low_precision::levels;
175-
static const std::unordered_set<size_t> supported_levels = {
176-
levels::int4, levels::int4_narrow_range,
177-
levels::int8, levels::int8_narrow_range,
178-
levels::int16, levels::int16_narrow_range,
179-
levels::int32, levels::int32_narrow_range
180-
};
181-
return supported_levels.find(level) != supported_levels.end();
173+
bool QuantizationDetails::isSupportedLevel(
174+
const size_t quantization_level,
175+
const std::set<ov::pass::low_precision::levels>& supported_levels) {
176+
return supported_levels.find(static_cast<ov::pass::low_precision::levels>(quantization_level)) != supported_levels.end();
182177
}
183178

184179
} // namespace low_precision

src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp

+13-24
Original file line numberDiff line numberDiff line change
@@ -210,28 +210,24 @@ bool Transformations::fuse_type_to_convert(const std::shared_ptr<ov::Node>& node
210210
}
211211

212212
void Transformations::UpToLpt() {
213+
using namespace ov::pass::low_precision;
214+
static const std::set<levels>& supported_fq_levels = {
215+
levels::int4,
216+
levels::int4_narrow_range,
217+
levels::int8,
218+
levels::int8_narrow_range
219+
};
220+
213221
const bool useLpt = enableLpt &&
214-
ov::pass::low_precision::LowPrecision::isFunctionQuantized(model) &&
222+
LowPrecision::isFunctionQuantized(model, supported_fq_levels) &&
215223
CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(config.debugCaps, Lpt);
216224

217-
auto defaultPrecisions = useLpt ? ov::pass::low_precision::precision_set::get_int8_support() : std::vector<ov::element::Type>{};
218-
bool hasINT16orINT32Levels = false;
219-
220-
if (useLpt) {
221-
CPU_LPT_SCOPE(LowPrecisionTransformations_Part1);
222-
hasINT16orINT32Levels = ov::pass::low_precision::LowPrecision::isFQLevelsPresent(
223-
model,
224-
{ov::pass::low_precision::levels::int16, ov::pass::low_precision::levels::int16_narrow_range,
225-
ov::pass::low_precision::levels::int32, ov::pass::low_precision::levels::int32_narrow_range});
226-
if (hasINT16orINT32Levels) {
227-
defaultPrecisions = ov::pass::low_precision::precision_set::get_int8_int16_int32_support();
228-
}
229-
}
225+
const auto defaultPrecisions = useLpt ? precision_set::get_int8_support() : std::vector<ov::element::Type>{};
230226

231227
PreLpt(defaultPrecisions, isLegacyApi);
232228

233229
if (useLpt)
234-
Lpt(hasINT16orINT32Levels, defaultPrecisions);
230+
Lpt(defaultPrecisions);
235231
}
236232

237233
void Transformations::CpuSpecificOpSet(void) {
@@ -512,7 +508,7 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
512508
manager.run_passes(model);
513509
}
514510

515-
void Transformations::Lpt(const bool hasINT16orINT32Levels, const std::vector<ov::element::Type>& defaultPrecisions) {
511+
void Transformations::Lpt(const std::vector<ov::element::Type>& defaultPrecisions) {
516512
CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(this, Lpt);
517513

518514
using namespace ov::pass::low_precision;
@@ -571,18 +567,11 @@ void Transformations::Lpt(const bool hasINT16orINT32Levels, const std::vector<ov
571567
QuantizationGranularityRestriction::create<ov::opset1::ConvolutionBackpropData>({0})
572568
});
573569

574-
// for GNA networks reference execution
575-
bool updatePrecision = true;
576-
if (hasINT16orINT32Levels) {
577-
updatePrecision = false;
578-
supportedPrecisions = std::vector<PrecisionsRestriction>({});
579-
}
580-
581570
ov::pass::Manager lptManager;
582571
CPU_REGISTER_PASS_COMMON(lptManager, LowPrecision,
583572
supportedPrecisions,
584573
quantizationRestrictions,
585-
LayerTransformation::Params(updatePrecision, ov::element::f32, defaultPrecisions));
574+
LayerTransformation::Params(true, ov::element::f32, defaultPrecisions));
586575

587576
CPU_SET_CALLBACK_COMMON(lptManager, [](const_node_ptr& node) -> bool {
588577
return ov::is_type<ov::opset1::Multiply>(node) &&

src/plugins/intel_cpu/src/transformations/transformation_pipeline.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ class Transformations {
5050

5151
void PreLpt(const std::vector<ov::element::Type>& defaultPrecisions, const bool isLegacyApi);
5252

53-
void Lpt(const bool hasINT16orINT32Levels, const std::vector<ov::element::Type>& defaultPrecisions);
53+
void Lpt(const std::vector<ov::element::Type>& defaultPrecisions);
5454

5555
void MainSnippets(void);
5656

src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/reshape_transformation.cpp

+10
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,16 @@ const std::vector<ReshapeTransformationParam> params = {
163163
"Reshape",
164164
"f32"
165165
},
166+
167+
// int16 is not supported: no dequantization after Reshape: Reshape => Output
168+
{
169+
{ 1, 3, 32 },
170+
{ 1, 3, 4, 8 },
171+
{ 65536ul, ov::Shape{ 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } },
172+
"Reshape",
173+
"f32",
174+
{ "Reshape", "Output" }
175+
},
166176
};
167177

168178
INSTANTIATE_TEST_SUITE_P(smoke_LPT, ReshapeTransformation,

src/tests/functional/plugin/shared/include/low_precision_transformations/reshape_transformation.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ class ReshapeTransformationParam {
1919
ov::builder::subgraph::FakeQuantizeOnData fakeQuantize;
2020
std::string layerType;
2121
std::string expectedKernelType;
22+
std::vector<std::string> executionOrder;
2223
};
2324

2425
typedef std::tuple<

src/tests/functional/plugin/shared/src/low_precision_transformations/reshape_transformation.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ void ReshapeTransformation::run() {
5151
LayerTransformation::run();
5252

5353
const auto params = std::get<3>(GetParam());
54+
55+
EXPECT_TRUE(check_execution_order(params.executionOrder));
56+
5457
auto actualPrecision = get_runtime_precision_by_type(params.layerType);
5558
const auto expectedPrecision = params.expectedKernelType;
5659
if ((expectedPrecision == "FP32") && (actualPrecision == "FP16")) {

src/tests/functional/shared_test_classes/include/shared_test_classes/base/low_precision_transformations/layer_transformation.hpp

+4
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,10 @@ class LayerTransformation : virtual public ov::test::SubgraphBaseTest {
5353
// get runtime precision by operation friendly name which can be fused
5454
std::string get_runtime_precision_by_fused_name(const std::string& layerName);
5555

56+
// check operation sequence in an execution graph and orderedOpsTypes
57+
// orderedOpsTypes can consist only necessary operations (fewer than exist in the execution graph)
58+
bool check_execution_order(const std::vector<std::string>& orderedOpsTypes);
59+
5660
std::map<std::string, ov::Node::RTMap> get_runtime_info();
5761

5862
void init_input_shapes(const ov::PartialShape& shape);

src/tests/functional/shared_test_classes/src/base/low_precision_transformations/layer_transformation.cpp

+33
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,39 @@ std::string LayerTransformation::get_runtime_precision_by_fused_name(const std::
127127
return find_node_by_runtime_precision(compiledModel, is_node_f);
128128
}
129129

130+
bool LayerTransformation::check_execution_order(const std::vector<std::string>& orderedOpsTypes) {
131+
if (orderedOpsTypes.empty()) {
132+
return true;
133+
}
134+
135+
size_t comparisonIndex = 0;
136+
const std::shared_ptr<const ov::Model>& execFunction = compiledModel.get_runtime_model();
137+
for (const auto& op : execFunction->get_ordered_ops()) {
138+
const auto& rtInfo = op->get_rt_info();
139+
const auto& typeIt = rtInfo.find("layerType");
140+
OPENVINO_ASSERT(typeIt != rtInfo.end(), "layerType is not found");
141+
142+
const auto layerType = typeIt->second.as<std::string>();
143+
if (orderedOpsTypes[comparisonIndex] == layerType) {
144+
// if comparisonIndex == 0 then start comparision
145+
// if comparisonIndex != 0 then comparision has been started, check next operation type in sequence
146+
comparisonIndex++;
147+
148+
if (comparisonIndex >= orderedOpsTypes.size()) {
149+
// all operation types in sequence were checked, comparision is ended
150+
return true;
151+
}
152+
} else if (comparisonIndex != 0) {
153+
// if comparision has been started and operation type is not equal then exit
154+
return false;
155+
}
156+
}
157+
158+
// actually we can be here only if operation sequence too long
159+
// (execution graph doesn't have some operations from operations sequence)
160+
return comparisonIndex == orderedOpsTypes.size();
161+
}
162+
130163
std::map<std::string, ov::Node::RTMap> LayerTransformation::get_runtime_info() {
131164
const ov::CompiledModel& execNet = compiledModel;
132165
const std::shared_ptr<const ov::Model>& function = execNet.get_runtime_model();

0 commit comments

Comments
 (0)