Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 6f4b52f

Browse files
committedJul 17, 2023
[CPU] I64 transformation & config.
1 parent 3958f77 commit 6f4b52f

13 files changed

+244
-74
lines changed
 

‎src/inference/dev_api/cpp_interfaces/interface/ie_internal_plugin_config.hpp

+5
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,11 @@ INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_CONFIG_VALUE(ENABLE);
110110
INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_CONFIG_VALUE(IGNORE_CALLBACK);
111111
INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_CONFIG_VALUE(DISABLE);
112112

113+
/**
114+
* @brief Enables inference with INT64 data type in CPU plugin if it's presented in the original model.
115+
*/
116+
DECLARE_CONFIG_KEY(CPU_NATIVE_I64);
117+
113118
} // namespace PluginConfigInternalParams
114119

115120
} // namespace InferenceEngine

‎src/plugins/intel_cpu/src/config.cpp

+10-1
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,15 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
230230
IE_THROW() << "Wrong value for property key " << ov::hint::execution_mode.name()
231231
<< ". Supported values: PERFORMANCE, ACCURACY";
232232
}
233+
} else if (key == PluginConfigInternalParams::KEY_CPU_NATIVE_I64) {
234+
if (val == PluginConfigParams::YES) {
235+
enableNativeI64 = true;
236+
} else if (val == PluginConfigParams::NO) {
237+
enableNativeI64 = false;
238+
} else {
239+
IE_THROW() << "Wrong value for property key " << PluginConfigInternalParams::KEY_CPU_NATIVE_I64 << ": " << val
240+
<< ". Expected only YES or NO values.";
241+
}
233242
} else {
234243
IE_THROW(NotFound) << "Unsupported property " << key << " by CPU plugin";
235244
}
@@ -314,4 +323,4 @@ void Config::updateProperties() {
314323
}
315324

316325
} // namespace intel_cpu
317-
} // namespace ov
326+
} // namespace ov

‎src/plugins/intel_cpu/src/config.h

+1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ struct Config {
5757
// TODO: Executor cache may leads to incorrect behavior on oneDNN ACL primitives
5858
size_t rtCacheCapacity = 0ul;
5959
#endif
60+
bool enableNativeI64 = false;
6061
InferenceEngine::IStreamsExecutor::Config streamExecutorConfig;
6162
InferenceEngine::PerfHintsConfig perfHintsConfig;
6263
bool enableCpuPinning = true;

‎src/plugins/intel_cpu/src/dnnl_extension_utils.cpp

+29-27
Original file line numberDiff line numberDiff line change
@@ -4,45 +4,43 @@
44

55
#include "dnnl_extension_utils.h"
66

7-
#include "utils/general_utils.h"
87
#include <oneapi/dnnl/dnnl.hpp>
98
#include "memory_desc/dnnl_blocked_memory_desc.h"
10-
#include "onednn/iml_type_mapper.h"
11-
#include <common/primitive_desc.hpp>
129
#include <common/primitive_desc_iface.hpp>
1310

14-
#include <vector>
15-
1611
using namespace dnnl;
1712

1813
namespace ov {
1914
namespace intel_cpu {
2015

21-
uint8_t DnnlExtensionUtils::sizeOfDataType(dnnl::memory::data_type dataType) {
16+
uint8_t DnnlExtensionUtils::sizeOfDataType(memory::data_type dataType) {
2217
switch (dataType) {
23-
case dnnl::memory::data_type::f32:
24-
return 4;
25-
case dnnl::memory::data_type::s32:
18+
case memory::data_type::f64:
19+
case memory::data_type::s64:
20+
return 8;
21+
case memory::data_type::f32:
22+
case memory::data_type::s32:
2623
return 4;
27-
case dnnl::memory::data_type::bf16:
24+
case memory::data_type::bf16:
25+
case memory::data_type::f16:
2826
return 2;
29-
case dnnl::memory::data_type::s8:
30-
return 1;
31-
case dnnl::memory::data_type::u8:
27+
case memory::data_type::s8:
28+
case memory::data_type::u8:
29+
case memory::data_type::bin:
3230
return 1;
33-
case dnnl::memory::data_type::bin:
34-
return 1;
35-
case dnnl::memory::data_type::f16:
36-
return 2;
37-
case dnnl::memory::data_type::undef:
31+
case memory::data_type::undef:
3832
return 0;
3933
default:
40-
IE_THROW() << "Unsupported data type.";
34+
IE_THROW() << "Unsupported data type: " << DataTypeToIEPrecision(dataType);
4135
}
4236
}
4337

4438
memory::data_type DnnlExtensionUtils::IEPrecisionToDataType(const InferenceEngine::Precision& prec) {
4539
switch (prec) {
40+
case InferenceEngine::Precision::FP64:
41+
return memory::data_type::f64;
42+
case InferenceEngine::Precision::I64:
43+
return memory::data_type::s64;
4644
case InferenceEngine::Precision::FP32:
4745
return memory::data_type::f32;
4846
case InferenceEngine::Precision::I32:
@@ -68,6 +66,10 @@ memory::data_type DnnlExtensionUtils::IEPrecisionToDataType(const InferenceEngin
6866

6967
InferenceEngine::Precision DnnlExtensionUtils::DataTypeToIEPrecision(memory::data_type dataType) {
7068
switch (dataType) {
69+
case memory::data_type::f64:
70+
return InferenceEngine::Precision::FP64;
71+
case memory::data_type::s64:
72+
return InferenceEngine::Precision::I64;
7173
case memory::data_type::f32:
7274
return InferenceEngine::Precision::FP32;
7375
case memory::data_type::s32:
@@ -90,11 +92,11 @@ InferenceEngine::Precision DnnlExtensionUtils::DataTypeToIEPrecision(memory::dat
9092
}
9193
}
9294

93-
Dim DnnlExtensionUtils::convertToDim(const dnnl::memory::dim &dim) {
95+
Dim DnnlExtensionUtils::convertToDim(const memory::dim &dim) {
9496
return dim == DNNL_RUNTIME_DIM_VAL ? Shape::UNDEFINED_DIM : static_cast<size_t>(dim);
9597
}
96-
dnnl::memory::dim DnnlExtensionUtils::convertToDnnlDim(const Dim &dim) {
97-
return dim == Shape::UNDEFINED_DIM ? DNNL_RUNTIME_DIM_VAL : static_cast<dnnl::memory::dim>(dim);
98+
memory::dim DnnlExtensionUtils::convertToDnnlDim(const Dim &dim) {
99+
return dim == Shape::UNDEFINED_DIM ? DNNL_RUNTIME_DIM_VAL : static_cast<memory::dim>(dim);
98100
}
99101

100102
VectorDims DnnlExtensionUtils::convertToVectorDims(const memory::dims& dims) {
@@ -133,19 +135,19 @@ memory::format_tag DnnlExtensionUtils::GetPlainFormatByRank(size_t rank) {
133135
}
134136
}
135137

136-
DnnlMemoryDescPtr DnnlExtensionUtils::makeDescriptor(const dnnl::memory::desc &desc) {
138+
DnnlMemoryDescPtr DnnlExtensionUtils::makeDescriptor(const memory::desc &desc) {
137139
return makeDescriptor(desc.get());
138140
}
139141

140142
DnnlMemoryDescPtr DnnlExtensionUtils::makeDescriptor(const_dnnl_memory_desc_t desc) {
141-
if (desc->format_kind == dnnl::impl::format_kind_t::dnnl_blocked) {
143+
if (desc->format_kind == impl::format_kind_t::dnnl_blocked) {
142144
return std::shared_ptr<DnnlBlockedMemoryDesc>(new DnnlBlockedMemoryDesc(desc));
143145
} else {
144146
return std::shared_ptr<DnnlMemoryDesc>(new DnnlMemoryDesc(desc));
145147
}
146148
}
147149

148-
size_t DnnlExtensionUtils::getMemSizeForDnnlDesc(const dnnl::memory::desc& desc) {
150+
size_t DnnlExtensionUtils::getMemSizeForDnnlDesc(const memory::desc& desc) {
149151
auto tmpDesc = desc;
150152

151153
const auto offset0 = tmpDesc.get()->offset0;
@@ -167,8 +169,8 @@ std::shared_ptr<DnnlBlockedMemoryDesc> DnnlExtensionUtils::makeUndefinedDesc(con
167169
}
168170
}
169171

170-
DnnlMemoryDescPtr DnnlExtensionUtils::query_md(const const_dnnl_primitive_desc_t& pd, const dnnl::query& what, int idx) {
171-
auto query = dnnl::convert_to_c(what);
172+
DnnlMemoryDescPtr DnnlExtensionUtils::query_md(const const_dnnl_primitive_desc_t& pd, const query& what, int idx) {
173+
auto query = convert_to_c(what);
172174
const auto* cdesc = dnnl_primitive_desc_query_md(pd, query, idx);
173175

174176
if (!cdesc)

‎src/plugins/intel_cpu/src/plugin.cpp

+18-8
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,19 @@
99

1010
#include "transformations/transformation_pipeline.h"
1111
#include "itt.h"
12-
#include "extension_mngr.h"
1312
#include "extension.h"
1413
#include "serialize.h"
1514
#include "threading/ie_executor_manager.hpp"
1615

1716
#include "ie_icore.hpp"
1817
#include "ie_plugin_config.hpp"
1918
#include "ie_system_conf.h"
20-
#include "threading/ie_cpu_streams_info.hpp"
2119
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
2220
#include "openvino/runtime/intel_cpu/properties.hpp"
2321

24-
#include <transformations/utils/utils.hpp>
2522
#include <ie_ngraph_utils.hpp>
2623

2724
#include "performance_heuristics.hpp"
28-
#include "openvino/runtime/properties.hpp"
2925
#include "weights_cache.hpp"
3026
#include "utils/denormals.hpp"
3127

@@ -36,7 +32,6 @@
3632
#endif
3733

3834
#include <cpu/x64/cpu_isa_traits.hpp>
39-
#include <itt.h>
4035

4136
using namespace InferenceEngine;
4237

@@ -155,7 +150,7 @@ static bool streamsSet(const std::map<std::string, std::string>& config) {
155150
config.count(ov::num_streams.name());
156151
}
157152

158-
void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, const std::shared_ptr<ngraph::Function>& ngraphFunc) const {
153+
void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, const std::shared_ptr<ov::Model>& ngraphFunc) const {
159154
auto getNumStreamsLatency = [&]() {
160155
return std::pair<std::string, std::string>(CONFIG_VALUE(CPU_THROUGHPUT_NUMA), ov::util::to_string(ov::streams::NUMA));
161156
};
@@ -272,7 +267,7 @@ void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, c
272267
}
273268
}
274269

275-
void Engine::GetPerformanceStreams(Config& config, const std::shared_ptr<ngraph::Function>& ngraphFunc) {
270+
void Engine::GetPerformanceStreams(Config& config, const std::shared_ptr<ov::Model>& ngraphFunc) {
276271
const auto perf_hint_name = config.perfHintsConfig.ovPerfHint;
277272
// save hints parameters to model rt_info
278273
ov::AnyMap hints_props;
@@ -421,6 +416,19 @@ static Config::SnippetsMode getSnippetsMode(const std::map<std::string, std::str
421416
IE_THROW() << "Wrong value for property key SNIPPETS_MODE. Expected values: ENABLE/DISABLE/IGNORE_CALLBACK";
422417
}
423418

419+
static void setI64Mode(const std::map<std::string, std::string>& modelConfig, Config& engineConfig) {
420+
engineConfig.enableNativeI64 = false;
421+
const auto i64prop = modelConfig.find(InferenceEngine::PluginConfigInternalParams::KEY_CPU_NATIVE_I64);
422+
if (i64prop != modelConfig.end()) {
423+
if (i64prop->second == PluginConfigParams::YES) {
424+
engineConfig.enableNativeI64 = true;
425+
} else if (i64prop->second != PluginConfigParams::NO) {
426+
IE_THROW() << "Wrong value for property key " << PluginConfigInternalParams::KEY_CPU_NATIVE_I64 << ": " << i64prop->second
427+
<< ". Expected only YES or NO values.";
428+
}
429+
}
430+
}
431+
424432
InferenceEngine::IExecutableNetworkInternal::Ptr
425433
Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std::map<std::string, std::string> &orig_config) {
426434
OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "Engine::LoadExeNetworkImpl");
@@ -454,6 +462,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
454462
const bool enableLPT = shouldEnableLPT(config, engConfig);
455463
ov::element::Type inferencePrecision = getInferencePrecision(config, engConfig);
456464
const Config::SnippetsMode snippetsMode = getSnippetsMode(config, engConfig);
465+
setI64Mode(config, engConfig);
457466

458467
auto nGraphFunc = clonedNetwork.getFunction();
459468

@@ -729,6 +738,7 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::ma
729738
const bool enableLPT = (lptProp != config.end() && lptProp->second == PluginConfigParams::YES) /* enabled in the orig_config*/
730739
|| Config::LPTransformsMode::On == engConfig.lpTransformsMode /* or already enabled */;
731740
const Config::SnippetsMode snippetsMode = getSnippetsMode(config, conf);
741+
setI64Mode(config, conf);
732742

733743
auto model = network.getFunction();
734744
if (model == nullptr) {
@@ -744,7 +754,7 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::ma
744754
transformation.UpToCpuSpecificOpSet();
745755
transformation.CpuSpecificOpSet();
746756
},
747-
[&](const std::shared_ptr<ngraph::Node>& op) {
757+
[&](const std::shared_ptr<ov::Node>& op) {
748758
std::unique_ptr<Node> ptr;
749759
try {
750760
ptr.reset(Node::factory().create(op, context));

‎src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp

+10-16
Original file line numberDiff line numberDiff line change
@@ -2,34 +2,29 @@
22
// SPDX-License-Identifier: Apache-2.0
33
//
44

5-
#include <ngraph/pass/constant_folding.hpp>
6-
#include "ngraph/op/fake_quantize.hpp"
7-
#include "ngraph/pass/manager.hpp"
85
#include "common/pass/reshape_fc_fusion.hpp"
96
#include "common/pass/align_matmul_input_ranks.hpp"
10-
#include "transformations/common_optimizations/reshape_prelu.hpp"
117
#include "common/pass/convert_broadcast_to_tiles.hpp"
128
#include "common/pass/convert_tile_to_seq_tiles.hpp"
139
#include "common/pass/convert_matmul_to_fc.hpp"
1410
#include "common/pass/convert_to_power_static.hpp"
1511
#include "common/pass/convert_to_leaky_relu.hpp"
1612
#include "common/pass/convert_to_swish_cpu.hpp"
17-
#include "transformations/convert_precision.hpp"
18-
#include "transformations/utils/utils.hpp"
1913
#include "common/pass/rnn_sequences_optimization.hpp"
20-
#include "transformations/common_optimizations/reshape_sequence_fusion.hpp"
2114
#include "common/pass/ngram_fusion.hpp"
22-
#include "transformations/defs.hpp"
15+
#include <openvino/pass/constant_folding.hpp>
16+
#include "openvino/pass/manager.hpp"
17+
#include "transformations/common_optimizations/reshape_sequence_fusion.hpp"
2318

2419
#include "itt.hpp"
2520

2621
namespace ov {
2722
namespace intel_cpu {
2823

29-
inline void ConvertToCPUSpecificOpset(std::shared_ptr<ngraph::Function> &nGraphFunc) {
24+
inline void ConvertToCPUSpecificOpset(std::shared_ptr<ov::Model> &model) {
3025
RUN_ON_FUNCTION_SCOPE(ConvertToCPUSpecificOpset);
3126

32-
ngraph::pass::Manager manager;
27+
ov::pass::Manager manager;
3328
manager.set_per_pass_validation(false);
3429
CPU_REGISTER_PASS_COMMON(manager, ConvertMatMulToFC);
3530
CPU_REGISTER_PASS_COMMON(manager, AlignMatMulInputRanks);
@@ -38,17 +33,16 @@ inline void ConvertToCPUSpecificOpset(std::shared_ptr<ngraph::Function> &nGraphF
3833
CPU_REGISTER_PASS_COMMON(manager, ConvertToLeakyRelu);
3934
CPU_REGISTER_PASS_COMMON(manager, ConvertToSwishCPU);
4035
CPU_REGISTER_PASS_COMMON(manager, OptimizeSequenceTransposes);
41-
if (!ov::op::util::has_op_with_type<ngraph::op::FakeQuantize>(nGraphFunc)) {
36+
if (!op::util::has_op_with_type<ngraph::op::FakeQuantize>(model)) {
4237
CPU_REGISTER_PASS_COMMON(manager, ReshapeFullyConnectedFusion);
4338
}
4439
// after transformation "MoveEltwiseUpThroughDataMov" there can be reshaped sequences that should be eliminated or fused
45-
CPU_REGISTER_PASS_COMMON(manager, ov::pass::ReshapeSequenceFusion);
46-
CPU_REGISTER_PASS_COMMON(manager, ov::pass::ConstantFolding);
47-
CPU_REGISTER_PASS_COMMON(manager, ov::pass::ConvertPrecision, precisions_map {{ ngraph::element::i64, ngraph::element::i32 }});
40+
CPU_REGISTER_PASS_COMMON(manager, pass::ReshapeSequenceFusion);
41+
CPU_REGISTER_PASS_COMMON(manager, pass::ConstantFolding);
4842
CPU_REGISTER_PASS_COMMON(manager, NgramFusion);
49-
CPU_REGISTER_PASS_COMMON(manager, ov::pass::Validate);
43+
CPU_REGISTER_PASS_COMMON(manager, pass::Validate);
5044

51-
manager.run_passes(nGraphFunc);
45+
manager.run_passes(model);
5246
}
5347

5448
} // namespace intel_cpu
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
// Copyright (C) 2018-2023 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
5+
#include "convert_precision_i64_i32.hpp"
6+
#include <openvino/opsets/opset12.hpp>
7+
#include "transformations/utils/utils.hpp"
8+
#include "cpu_types.h"
9+
10+
#include <unordered_set>
11+
12+
// Returns list of operations that support i64.
13+
bool isNativelySupported(const ov::Node::type_info_t &type) {
14+
static const std::unordered_set<ov::Node::type_info_t> i64Ops = {
15+
};
16+
17+
return i64Ops.find(type) != i64Ops.end();
18+
}
19+
20+
std::shared_ptr<ov::Node> changeConstantPrecision(std::shared_ptr<ov::op::v0::Constant>& constant) {
21+
const auto* srcData = constant->get_data_ptr<int64_t>();
22+
const auto size = shape_size(constant->get_shape());
23+
24+
auto newConstant = std::make_shared<ov::op::v0::Constant>(ov::element::i32, constant->get_shape());
25+
newConstant->output(0).set_names(constant->output(0).get_names());
26+
auto* dstData = const_cast<int32_t*>(reinterpret_cast<const int32_t*>(newConstant->get_data_ptr()));
27+
if (dstData == nullptr) {
28+
throw ngraph::ngraph_error("Can't get destination data pointer");
29+
}
30+
31+
for (size_t i = 0; i < size; ++i) {
32+
if (srcData[i] >= std::numeric_limits<int32_t>::max()) {
33+
dstData[i] = std::numeric_limits<int32_t>::max();
34+
} else if (srcData[i] <= std::numeric_limits<int32_t>::lowest()) {
35+
dstData[i] = std::numeric_limits<int32_t>::lowest();
36+
} else {
37+
dstData[i] = static_cast<int32_t>(srcData[i]);
38+
}
39+
}
40+
return newConstant;
41+
}
42+
43+
bool ov::intel_cpu::ConvertPrecisionI64ToI32::run_on_model(const std::shared_ptr<ov::Model> &model) {
44+
const auto orderedOps = model->get_ordered_ops();
45+
for (const auto& op : orderedOps) {
46+
if (isNativelySupported(op->get_type_info()) || TypeFromName(op->get_type_name()) == Type::Unknown) {
47+
continue;
48+
}
49+
50+
bool convertForOutputsRequired = false;
51+
for (const auto& input : op->inputs()) {
52+
if (input.get_element_type() == ov::element::i64) {
53+
auto parentOutput = input.get_source_output();
54+
auto parentNode = parentOutput.get_node_shared_ptr();
55+
if (ov::is_type<ov::opset12::Convert>(parentNode) &&
56+
parentNode->get_rt_info().find("convert_i32_i64") != parentNode->get_rt_info().end()) {
57+
input.replace_source_output(parentNode->input_value(0));
58+
} else if (auto constOp = ov::as_type_ptr<ov::op::v0::Constant>(parentNode)) {
59+
auto newConst = changeConstantPrecision(constOp);
60+
input.replace_source_output(newConst);
61+
newConst->set_friendly_name(constOp->get_friendly_name());
62+
} else {
63+
auto convert = std::make_shared<ov::opset12::Convert>(input.get_source_output(), ov::element::i32);
64+
convert->output(0).add_names(parentOutput.get_names());
65+
input.replace_source_output(convert);
66+
}
67+
convertForOutputsRequired = true;
68+
}
69+
}
70+
71+
if (convertForOutputsRequired) {
72+
// Propagate i32 precision into outputs.
73+
op->validate_and_infer_types();
74+
for (auto& output : op->outputs()) {
75+
if (output.get_element_type() == ov::element::i32) {
76+
auto targetInputs = output.get_target_inputs();
77+
auto convert = std::make_shared<ov::opset12::Convert>(output, ov::element::i64);
78+
79+
auto& rt_info = convert->get_rt_info();
80+
rt_info["convert_i32_i64"] = "";
81+
for (const auto& targetInput : targetInputs) {
82+
targetInput.replace_source_output(convert);
83+
}
84+
85+
auto& convertTensor = convert->output(0).get_tensor();
86+
const std::string newName = ov::op::util::get_ie_output_name(output);
87+
if (ov::descriptor::get_ov_tensor_legacy_name(convertTensor).empty()) {
88+
ov::descriptor::set_ov_tensor_legacy_name(convertTensor, newName);
89+
}
90+
if (!output.get_names().empty()) {
91+
convertTensor.set_names(output.get_names());
92+
}
93+
}
94+
}
95+
}
96+
97+
if (auto multisubgraph_op = ov::as_type_ptr<ov::op::util::MultiSubGraphOp>(op)) {
98+
for (size_t idx = 0; idx < multisubgraph_op->get_internal_subgraphs_size(); ++idx) {
99+
run_on_model(multisubgraph_op->get_function(static_cast<int>(idx)));
100+
}
101+
}
102+
}
103+
104+
return true;
105+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// Copyright (C) 2023 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#pragma once
6+
7+
#include "openvino/pass/pass.hpp"
8+
9+
namespace ov {
10+
namespace intel_cpu {
11+
class ConvertPrecisionI64ToI32: public ov::pass::ModelPass {
12+
public:
13+
OPENVINO_RTTI("ConvertPrecisionI64ToI32", "0");
14+
15+
ConvertPrecisionI64ToI32() = default;
16+
17+
bool run_on_model(const std::shared_ptr<ov::Model>& model) override;
18+
};
19+
20+
} // namespace intel_cpu
21+
} // namespace ov

‎src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp

+24-16
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (C) 2022 Intel Corporation
1+
// Copyright (C) 2022-2023 Intel Corporation
22
// SPDX-License-Identifier: Apache-2.0
33
//
44

@@ -25,6 +25,7 @@
2525
#include "transformations/common_optimizations/fq_mul_fusion.hpp"
2626
#include "transformations/common_optimizations/mul_fake_quantize_fusion.hpp"
2727
#include "transformations/common_optimizations/nop_elimination.hpp"
28+
#include "transformations/common_optimizations/reshape_prelu.hpp"
2829
#include "transformations/common_optimizations/transpose_sinking.hpp"
2930
#include "transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp"
3031
#include "transformations/common_optimizations/augru_cell_fusion.hpp"
@@ -53,8 +54,6 @@
5354
#include "transformations/op_conversions/convert_slice_to_strided_slice.hpp"
5455
#include "transformations/op_conversions/convert_space_to_batch.hpp"
5556
#include "transformations/op_conversions/convert_space_to_depth.hpp"
56-
#include "transformations/op_conversions/convert_subtract.hpp"
57-
#include "transformations/op_conversions/convert_ti_to_sequences.hpp"
5857
#include "transformations/op_conversions/detection_output_downgrade.hpp"
5958
#include "transformations/op_conversions/detection_output_upgrade.hpp"
6059
#include "transformations/op_conversions/eye_decomposition.hpp"
@@ -98,11 +97,7 @@
9897
#include "transformations/snippets/x64/pass/snippets_mark_skipped.hpp"
9998
#include "transformations/cpu_opset/x64/pass/mha_fusion.hpp"
10099
#include "transformations/cpu_opset/x64/pass/convert_to_interaction.hpp"
101-
#include "transformations/cpu_opset/arm/pass/convert_group_conv.hpp"
102-
#include "transformations/cpu_opset/arm/pass/convert_group_conv1d.hpp"
103-
#include "transformations/cpu_opset/arm/pass/convert_reduce_multi_axis.hpp"
104-
#include "transformations/cpu_opset/arm/pass/mish_decomposition.hpp"
105-
#include "transformations/cpu_opset/common/pass/decompose_integer_divide.hpp"
100+
#include "transformations/cpu_opset/x64/pass/convert_precision_i64_i32.hpp"
106101
#include "transformations/cpu_opset/common/pass/convert_fq_rnn_to_quantized_rnn.hpp"
107102
#include "transformations/cpu_opset/common/pass/insert_convert_after_extension.hpp"
108103
#include "transformations/cpu_opset/common/pass/move_eltwise_up_data_movement.hpp"
@@ -127,7 +122,7 @@ namespace intel_cpu {
127122

128123
using const_node_ptr = const std::shared_ptr<const ov::Node>;
129124

130-
bool Transformations::fuse_type_to_convert(const std::shared_ptr<ngraph::Node>& node, const precisions_map& precisions) {
125+
bool Transformations::fuse_type_to_convert(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions) {
131126
const auto& from = node->get_output_element_type(0);
132127
auto it = precisions.find(from);
133128
if (it == precisions.end())
@@ -139,7 +134,7 @@ bool Transformations::fuse_type_to_convert(const std::shared_ptr<ngraph::Node>&
139134
// is converted to be 1 for boolean, but 0 for u8. Thus an Abs and Ceil node should be added before the
140135
// Convert node for this scenario.
141136
if (convert->input(0).get_element_type().is_real() &&
142-
convert->get_convert_element_type() == ngraph::element::boolean && to.is_integral_number()) {
137+
convert->get_convert_element_type() == ov::element::boolean && to.is_integral_number()) {
143138
auto abs = std::make_shared<ov::opset10::Abs>(convert->input_value(0).get_node_shared_ptr());
144139
auto ceil = std::make_shared<ov::opset10::Ceiling>(abs);
145140
auto new_convert = std::make_shared<ov::opset10::Convert>(ceil, to);
@@ -208,11 +203,10 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
208203
if (useLpt) {
209204
CPU_REGISTER_PASS_COMMON(manager, ov::pass::MarkDequantizationSubgraph, defaultPrecisions);
210205
}
206+
bool supportI64 = config.enableNativeI64;
211207

212-
auto get_convert_precisions = []() {
208+
auto get_convert_precisions = [&]() {
213209
precisions_map map = {
214-
{ov::element::i64, ov::element::i32},
215-
{ov::element::u64, ov::element::i32},
216210
{ov::element::i16, ov::element::i32},
217211
{ov::element::u16, ov::element::i32},
218212
{ov::element::u32, ov::element::i32},
@@ -223,12 +217,21 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
223217
{ov::element::u4, ov::element::u8}
224218
};
225219

226-
if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core))
220+
if (supportI64) {
221+
map.insert({ov::element::u64, ov::element::i64});
222+
} else {
223+
map.insert({ov::element::u64, ov::element::i32});
224+
map.insert({ov::element::i64, ov::element::i32});
225+
}
226+
227+
if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) {
227228
map.insert({ov::element::bf16, ov::element::f32});
229+
}
228230

229231
return map;
230232
};
231-
static const auto precisions = get_convert_precisions();
233+
234+
const auto precisions = get_convert_precisions();
232235
type_to_fuse_map type_to_fuse = {{ov::opset10::Convert::get_type_info_static(), fuse_type_to_convert}};
233236

234237
CPU_REGISTER_PASS_COMMON(manager, ov::pass::AUGRUCellFusion);
@@ -263,8 +266,13 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
263266
// Common ConvertPrecision pass handles only a limited set of opevino operations to match the list of precisions supported by the plugin.
264267
// However, if the extension operation produces an output precision that is not natively supported, this may lead to inconsistency during
265268
// element type propagation. This transformation is called before the ConvertPrecision pass to align the actual precisions with the list of supported ones.
266-
CPU_REGISTER_PASS_COMMON(manager, ov::pass::InsertConvertAfterExtension);
269+
if (!supportI64) {
270+
CPU_REGISTER_PASS_COMMON(manager, ov::pass::InsertConvertAfterExtension);
271+
}
267272
CPU_REGISTER_PASS_COMMON(manager, ov::pass::ConvertPrecision, precisions, type_to_fuse);
273+
if (supportI64) {
274+
CPU_REGISTER_PASS_X64(manager, ConvertPrecisionI64ToI32);
275+
}
268276

269277
CPU_REGISTER_PASS_COMMON(manager, ov::pass::EliminateConvert);
270278
CPU_REGISTER_PASS_COMMON(manager, SwapConvertTranspose);

‎src/plugins/intel_cpu/src/transformations/transformation_pipeline.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ class Transformations {
6262

6363
void Snippets(void);
6464

65-
static bool fuse_type_to_convert(const std::shared_ptr<ngraph::Node>& node, const precisions_map& precisions);
65+
static bool fuse_type_to_convert(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions);
6666
};
6767

6868
} // namespace intel_cpu

‎src/plugins/intel_cpu/src/utils/blob_dump.cpp

+12
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,12 @@ void BlobDumper::dumpAsTxt(std::ostream &stream) const {
166166
const void *ptr = memory->getData();
167167

168168
switch (desc.getPrecision()) {
169+
case Precision::FP64 : {
170+
auto *blob_ptr = reinterpret_cast<const double*>(ptr);
171+
for (size_t i = 0; i < data_size; i++)
172+
stream << blob_ptr[desc.getElementOffset(i)] << std::endl;
173+
break;
174+
}
169175
case Precision::FP32 : {
170176
auto *blob_ptr = reinterpret_cast<const float*>(ptr);
171177
for (size_t i = 0; i < data_size; i++)
@@ -180,6 +186,12 @@ void BlobDumper::dumpAsTxt(std::ostream &stream) const {
180186
}
181187
break;
182188
}
189+
case Precision::I64: {
190+
auto *blob_ptr = reinterpret_cast<const int64_t*>(ptr);
191+
for (size_t i = 0; i < data_size; i++)
192+
stream << blob_ptr[desc.getElementOffset(i)] << std::endl;
193+
break;
194+
}
183195
case Precision::I32: {
184196
auto *blob_ptr = reinterpret_cast<const int32_t*>(ptr);
185197
for (size_t i = 0; i < data_size; i++)

‎src/plugins/intel_cpu/src/utils/cpu_utils.hpp

+7-4
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,8 @@ inline InferenceEngine::Precision normalizeToSupportedPrecision(InferenceEngine:
102102
case InferenceEngine::Precision::I8:
103103
case InferenceEngine::Precision::I32:
104104
case InferenceEngine::Precision::BF16:
105-
case InferenceEngine::Precision::FP32: {
105+
case InferenceEngine::Precision::FP32:
106+
case InferenceEngine::Precision::I64: {
106107
break;
107108
}
108109
case InferenceEngine::Precision::FP64: {
@@ -113,11 +114,13 @@ inline InferenceEngine::Precision normalizeToSupportedPrecision(InferenceEngine:
113114
precision = InferenceEngine::Precision::U8;
114115
break;
115116
}
117+
case InferenceEngine::Precision::U64: {
118+
precision = InferenceEngine::Precision::I64;
119+
break;
120+
}
116121
case InferenceEngine::Precision::U16:
117122
case InferenceEngine::Precision::I16:
118-
case InferenceEngine::Precision::U32:
119-
case InferenceEngine::Precision::I64:
120-
case InferenceEngine::Precision::U64: {
123+
case InferenceEngine::Precision::U32: {
121124
precision = InferenceEngine::Precision::I32;
122125
break;
123126
}

0 commit comments

Comments
 (0)
Please sign in to comment.