Skip to content

Commit 3ae4725

Browse files
committed
[CPU][I64] Transformation & config.
1 parent e67850a commit 3ae4725

16 files changed

+265
-83
lines changed

src/common/transformations/src/transformations/convert_precision.cpp

+11-1
Original file line numberDiff line numberDiff line change
@@ -871,6 +871,14 @@ inline int32_t convert_value<uint32_t, int32_t>(uint32_t val) {
871871
return static_cast<int32_t>(val);
872872
}
873873

874+
template <>
875+
inline int64_t convert_value<uint64_t, int64_t>(uint64_t val) {
876+
if (val > static_cast<uint64_t>(std::numeric_limits<int64_t>::max())) {
877+
return std::numeric_limits<int64_t>::max();
878+
}
879+
return static_cast<int64_t>(val);
880+
}
881+
874882
namespace {
875883
template <ov::element::Type_t PREC_FROM, ov::element::Type_t PREC_TO>
876884
std::shared_ptr<ngraph::Node> change_constant_precision(std::shared_ptr<opset4::Constant>& constant) {
@@ -1110,7 +1118,9 @@ bool fuse_type_to_constant(const std::shared_ptr<ngraph::Node>& node,
11101118
const auto& to = it->second;
11111119
if (auto constant = ov::as_type_ptr<opset4::Constant>(node)) {
11121120
std::shared_ptr<ngraph::Node> new_const;
1113-
if (from == ov::element::u64 && to == ov::element::i32) {
1121+
if (from == ov::element::u64 && to == ov::element::i64) {
1122+
new_const = change_constant_precision<ov::element::Type_t::u64, ov::element::Type_t::i64>(constant);
1123+
} else if (from == ov::element::u64 && to == ov::element::i32) {
11141124
new_const = change_constant_precision<ov::element::Type_t::u64, ov::element::Type_t::i32>(constant);
11151125
} else if (from == ov::element::i64 && to == ov::element::i32) {
11161126
new_const = change_constant_precision<ov::element::Type_t::i64, ov::element::Type_t::i32>(constant);

src/inference/dev_api/cpp_interfaces/interface/ie_internal_plugin_config.hpp

+5
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,11 @@ INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_CONFIG_VALUE(ENABLE);
110110
INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_CONFIG_VALUE(IGNORE_CALLBACK);
111111
INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_CONFIG_VALUE(DISABLE);
112112

113+
/**
114+
* @brief Enables inference with INT64 data type in CPU plugin if it's presented in the original model.
115+
*/
116+
DECLARE_CONFIG_KEY(CPU_NATIVE_I64);
117+
113118
} // namespace PluginConfigInternalParams
114119

115120
} // namespace InferenceEngine

src/plugins/intel_cpu/src/config.cpp

+10-1
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,15 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
230230
IE_THROW() << "Wrong value for property key " << ov::hint::execution_mode.name()
231231
<< ". Supported values: PERFORMANCE, ACCURACY";
232232
}
233+
} else if (key == PluginConfigInternalParams::KEY_CPU_NATIVE_I64) {
234+
if (val == PluginConfigParams::YES) {
235+
enableNativeI64 = true;
236+
} else if (val == PluginConfigParams::NO) {
237+
enableNativeI64 = false;
238+
} else {
239+
IE_THROW() << "Wrong value for property key " << PluginConfigInternalParams::KEY_CPU_NATIVE_I64 << ": " << val
240+
<< ". Expected only YES or NO values.";
241+
}
233242
} else {
234243
IE_THROW(NotFound) << "Unsupported property " << key << " by CPU plugin";
235244
}
@@ -314,4 +323,4 @@ void Config::updateProperties() {
314323
}
315324

316325
} // namespace intel_cpu
317-
} // namespace ov
326+
} // namespace ov

src/plugins/intel_cpu/src/config.h

+1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ struct Config {
5757
// TODO: Executor cache may leads to incorrect behavior on oneDNN ACL primitives
5858
size_t rtCacheCapacity = 0ul;
5959
#endif
60+
bool enableNativeI64 = false;
6061
InferenceEngine::IStreamsExecutor::Config streamExecutorConfig;
6162
InferenceEngine::PerfHintsConfig perfHintsConfig;
6263
bool enableCpuPinning = true;

src/plugins/intel_cpu/src/dnnl_extension_utils.cpp

+29-27
Original file line numberDiff line numberDiff line change
@@ -4,45 +4,43 @@
44

55
#include "dnnl_extension_utils.h"
66

7-
#include "utils/general_utils.h"
87
#include <oneapi/dnnl/dnnl.hpp>
98
#include "memory_desc/dnnl_blocked_memory_desc.h"
10-
#include "onednn/iml_type_mapper.h"
11-
#include <common/primitive_desc.hpp>
129
#include <common/primitive_desc_iface.hpp>
1310

14-
#include <vector>
15-
1611
using namespace dnnl;
1712

1813
namespace ov {
1914
namespace intel_cpu {
2015

21-
uint8_t DnnlExtensionUtils::sizeOfDataType(dnnl::memory::data_type dataType) {
16+
uint8_t DnnlExtensionUtils::sizeOfDataType(memory::data_type dataType) {
2217
switch (dataType) {
23-
case dnnl::memory::data_type::f32:
24-
return 4;
25-
case dnnl::memory::data_type::s32:
18+
case memory::data_type::f64:
19+
case memory::data_type::s64:
20+
return 8;
21+
case memory::data_type::f32:
22+
case memory::data_type::s32:
2623
return 4;
27-
case dnnl::memory::data_type::bf16:
24+
case memory::data_type::bf16:
25+
case memory::data_type::f16:
2826
return 2;
29-
case dnnl::memory::data_type::s8:
30-
return 1;
31-
case dnnl::memory::data_type::u8:
27+
case memory::data_type::s8:
28+
case memory::data_type::u8:
29+
case memory::data_type::bin:
3230
return 1;
33-
case dnnl::memory::data_type::bin:
34-
return 1;
35-
case dnnl::memory::data_type::f16:
36-
return 2;
37-
case dnnl::memory::data_type::undef:
31+
case memory::data_type::undef:
3832
return 0;
3933
default:
40-
IE_THROW() << "Unsupported data type.";
34+
IE_THROW() << "Unsupported data type: " << DataTypeToIEPrecision(dataType);
4135
}
4236
}
4337

4438
memory::data_type DnnlExtensionUtils::IEPrecisionToDataType(const InferenceEngine::Precision& prec) {
4539
switch (prec) {
40+
case InferenceEngine::Precision::FP64:
41+
return memory::data_type::f64;
42+
case InferenceEngine::Precision::I64:
43+
return memory::data_type::s64;
4644
case InferenceEngine::Precision::FP32:
4745
return memory::data_type::f32;
4846
case InferenceEngine::Precision::I32:
@@ -68,6 +66,10 @@ memory::data_type DnnlExtensionUtils::IEPrecisionToDataType(const InferenceEngin
6866

6967
InferenceEngine::Precision DnnlExtensionUtils::DataTypeToIEPrecision(memory::data_type dataType) {
7068
switch (dataType) {
69+
case memory::data_type::f64:
70+
return InferenceEngine::Precision::FP64;
71+
case memory::data_type::s64:
72+
return InferenceEngine::Precision::I64;
7173
case memory::data_type::f32:
7274
return InferenceEngine::Precision::FP32;
7375
case memory::data_type::s32:
@@ -90,11 +92,11 @@ InferenceEngine::Precision DnnlExtensionUtils::DataTypeToIEPrecision(memory::dat
9092
}
9193
}
9294

93-
Dim DnnlExtensionUtils::convertToDim(const dnnl::memory::dim &dim) {
95+
Dim DnnlExtensionUtils::convertToDim(const memory::dim &dim) {
9496
return dim == DNNL_RUNTIME_DIM_VAL ? Shape::UNDEFINED_DIM : static_cast<size_t>(dim);
9597
}
96-
dnnl::memory::dim DnnlExtensionUtils::convertToDnnlDim(const Dim &dim) {
97-
return dim == Shape::UNDEFINED_DIM ? DNNL_RUNTIME_DIM_VAL : static_cast<dnnl::memory::dim>(dim);
98+
memory::dim DnnlExtensionUtils::convertToDnnlDim(const Dim &dim) {
99+
return dim == Shape::UNDEFINED_DIM ? DNNL_RUNTIME_DIM_VAL : static_cast<memory::dim>(dim);
98100
}
99101

100102
VectorDims DnnlExtensionUtils::convertToVectorDims(const memory::dims& dims) {
@@ -133,19 +135,19 @@ memory::format_tag DnnlExtensionUtils::GetPlainFormatByRank(size_t rank) {
133135
}
134136
}
135137

136-
DnnlMemoryDescPtr DnnlExtensionUtils::makeDescriptor(const dnnl::memory::desc &desc) {
138+
DnnlMemoryDescPtr DnnlExtensionUtils::makeDescriptor(const memory::desc &desc) {
137139
return makeDescriptor(desc.get());
138140
}
139141

140142
DnnlMemoryDescPtr DnnlExtensionUtils::makeDescriptor(const_dnnl_memory_desc_t desc) {
141-
if (desc->format_kind == dnnl::impl::format_kind_t::dnnl_blocked) {
143+
if (desc->format_kind == impl::format_kind_t::dnnl_blocked) {
142144
return std::shared_ptr<DnnlBlockedMemoryDesc>(new DnnlBlockedMemoryDesc(desc));
143145
} else {
144146
return std::shared_ptr<DnnlMemoryDesc>(new DnnlMemoryDesc(desc));
145147
}
146148
}
147149

148-
size_t DnnlExtensionUtils::getMemSizeForDnnlDesc(const dnnl::memory::desc& desc) {
150+
size_t DnnlExtensionUtils::getMemSizeForDnnlDesc(const memory::desc& desc) {
149151
auto tmpDesc = desc;
150152

151153
const auto offset0 = tmpDesc.get()->offset0;
@@ -167,8 +169,8 @@ std::shared_ptr<DnnlBlockedMemoryDesc> DnnlExtensionUtils::makeUndefinedDesc(con
167169
}
168170
}
169171

170-
DnnlMemoryDescPtr DnnlExtensionUtils::query_md(const const_dnnl_primitive_desc_t& pd, const dnnl::query& what, int idx) {
171-
auto query = dnnl::convert_to_c(what);
172+
DnnlMemoryDescPtr DnnlExtensionUtils::query_md(const const_dnnl_primitive_desc_t& pd, const query& what, int idx) {
173+
auto query = convert_to_c(what);
172174
const auto* cdesc = dnnl_primitive_desc_query_md(pd, query, idx);
173175

174176
if (!cdesc)

src/plugins/intel_cpu/src/graph.cpp

+15-10
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@
4545
#include "memory_desc/cpu_memory_desc_utils.h"
4646

4747
#include <openvino/core/model.hpp>
48-
#include <openvino/core/node.hpp>
4948
#include <openvino/op/ops.hpp>
5049
#include <transformations/utils/utils.hpp>
5150
#include <low_precision/low_precision.hpp>
@@ -306,7 +305,10 @@ void Graph::Replicate(const CNNNetwork &network) {
306305
// change precision for input/output nodes to avoid extra data conversion when set input/output blobs
307306
// also we need to change input/output precisions for consumers/producers to avoid inserting reorder
308307
for (auto &input : inputNodesMap) {
309-
const auto precToSet = normalizeToSupportedPrecision(inputsInfo.at(input.first)->getPrecision());
308+
auto precToSet = normalizeToSupportedPrecision(inputsInfo.at(input.first)->getPrecision());
309+
if (!getConfig().enableNativeI64 && precToSet == Precision::I64) {
310+
precToSet = Precision::I32;
311+
}
310312
input.second->setOriginalOutputPrecisionAtPort(0, precToSet);
311313
const auto childEdges = input.second->getChildEdgesAtPort(0);
312314
for (size_t i = 0; i < childEdges.size(); i++) {
@@ -319,7 +321,10 @@ void Graph::Replicate(const CNNNetwork &network) {
319321
}
320322

321323
for (auto &output : outputNodesMap) {
322-
const auto precToSet = normalizeToSupportedPrecision(outputsInfo.at(output.first)->getPrecision());
324+
auto precToSet = normalizeToSupportedPrecision(outputsInfo.at(output.first)->getPrecision());
325+
if (!getConfig().enableNativeI64 && precToSet == Precision::I64) {
326+
precToSet = Precision::I32;
327+
}
323328
output.second->setOriginalInputPrecisionAtPort(0, precToSet);
324329
const auto parentEdges = output.second->getParentEdgesAtPort(0);
325330
for (size_t i = 0; i < parentEdges.size(); i++) {
@@ -977,7 +982,7 @@ void Graph::PushInputData(const std::string& name, const InferenceEngine::Blob::
977982

978983
// todo: make sure 'name' exists in this map...
979984
if (_normalizePreprocMap.find(name) != _normalizePreprocMap.end()) {
980-
if (inTensorDesc.getPrecision() == InferenceEngine::Precision::FP32) {
985+
if (inTensorDesc.getPrecision() == Precision::FP32) {
981986
_normalizePreprocMap[name].NormalizeImage(outDims, reinterpret_cast<float *>(inter_data_ptr),
982987
inTensorDesc.getLayout());
983988
} else {
@@ -1424,16 +1429,16 @@ void Graph::SortTopologically() {
14241429
}
14251430
}
14261431

1427-
void Graph::GetPerfData(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &perfMap) const {
1432+
void Graph::GetPerfData(std::map<std::string, InferenceEngineProfileInfo> &perfMap) const {
14281433
unsigned i = 0;
1429-
std::function<void(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &, const NodePtr&)>
1430-
getPerfMapFor = [&](std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &perfMap, const NodePtr& node) {
1431-
InferenceEngine::InferenceEngineProfileInfo &pc = perfMap[node->getName()];
1434+
std::function<void(std::map<std::string, InferenceEngineProfileInfo> &, const NodePtr&)>
1435+
getPerfMapFor = [&](std::map<std::string, InferenceEngineProfileInfo> &perfMap, const NodePtr& node) {
1436+
InferenceEngineProfileInfo &pc = perfMap[node->getName()];
14321437
pc.execution_index = i++;
14331438
// TODO: Why time counter is signed?
14341439
pc.cpu_uSec = pc.realTime_uSec = (long long) node->PerfCounter().avg();
1435-
pc.status = pc.cpu_uSec > 0 ? InferenceEngine::InferenceEngineProfileInfo::EXECUTED
1436-
: InferenceEngine::InferenceEngineProfileInfo::NOT_RUN;
1440+
pc.status = pc.cpu_uSec > 0 ? InferenceEngineProfileInfo::EXECUTED
1441+
: InferenceEngineProfileInfo::NOT_RUN;
14371442
std::string pdType = node->getPrimitiveDescriptorType();
14381443
size_t typeLen = sizeof(pc.exec_type) / sizeof(pc.exec_type[0]);
14391444
pdType.copy(pc.exec_type, typeLen, 0);

src/plugins/intel_cpu/src/graph_optimizer.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
#include "nodes/mvn.h"
1919
#include "nodes/transpose.h"
2020
#include "nodes/interpolate.h"
21-
#include "nodes/reduce.h"
2221
#include "nodes/input.h"
2322
#include "nodes/rnn.h"
2423
#include "nodes/common/cpu_convert.h"

src/plugins/intel_cpu/src/plugin.cpp

+18-8
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,19 @@
99

1010
#include "transformations/transformation_pipeline.h"
1111
#include "itt.h"
12-
#include "extension_mngr.h"
1312
#include "extension.h"
1413
#include "serialize.h"
1514
#include "threading/ie_executor_manager.hpp"
1615

1716
#include "ie_icore.hpp"
1817
#include "ie_plugin_config.hpp"
1918
#include "ie_system_conf.h"
20-
#include "threading/ie_cpu_streams_info.hpp"
2119
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
2220
#include "openvino/runtime/intel_cpu/properties.hpp"
2321

24-
#include <transformations/utils/utils.hpp>
2522
#include <ie_ngraph_utils.hpp>
2623

2724
#include "performance_heuristics.hpp"
28-
#include "openvino/runtime/properties.hpp"
2925
#include "weights_cache.hpp"
3026
#include "utils/denormals.hpp"
3127

@@ -36,7 +32,6 @@
3632
#endif
3733

3834
#include <cpu/x64/cpu_isa_traits.hpp>
39-
#include <itt.h>
4035

4136
using namespace InferenceEngine;
4237

@@ -155,7 +150,7 @@ static bool streamsSet(const std::map<std::string, std::string>& config) {
155150
config.count(ov::num_streams.name());
156151
}
157152

158-
void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, const std::shared_ptr<ngraph::Function>& ngraphFunc) const {
153+
void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, const std::shared_ptr<ov::Model>& ngraphFunc) const {
159154
auto getNumStreamsLatency = [&]() {
160155
return std::pair<std::string, std::string>(CONFIG_VALUE(CPU_THROUGHPUT_NUMA), ov::util::to_string(ov::streams::NUMA));
161156
};
@@ -272,7 +267,7 @@ void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, c
272267
}
273268
}
274269

275-
void Engine::GetPerformanceStreams(Config& config, const std::shared_ptr<ngraph::Function>& ngraphFunc) {
270+
void Engine::GetPerformanceStreams(Config& config, const std::shared_ptr<ov::Model>& ngraphFunc) {
276271
const auto perf_hint_name = config.perfHintsConfig.ovPerfHint;
277272
// save hints parameters to model rt_info
278273
ov::AnyMap hints_props;
@@ -421,6 +416,19 @@ static Config::SnippetsMode getSnippetsMode(const std::map<std::string, std::str
421416
IE_THROW() << "Wrong value for property key SNIPPETS_MODE. Expected values: ENABLE/DISABLE/IGNORE_CALLBACK";
422417
}
423418

419+
static void setI64Mode(const std::map<std::string, std::string>& modelConfig, Config& engineConfig) {
420+
engineConfig.enableNativeI64 = false;
421+
const auto i64prop = modelConfig.find(InferenceEngine::PluginConfigInternalParams::KEY_CPU_NATIVE_I64);
422+
if (i64prop != modelConfig.end()) {
423+
if (i64prop->second == PluginConfigParams::YES) {
424+
engineConfig.enableNativeI64 = true;
425+
} else if (i64prop->second != PluginConfigParams::NO) {
426+
IE_THROW() << "Wrong value for property key " << PluginConfigInternalParams::KEY_CPU_NATIVE_I64 << ": " << i64prop->second
427+
<< ". Expected only YES or NO values.";
428+
}
429+
}
430+
}
431+
424432
InferenceEngine::IExecutableNetworkInternal::Ptr
425433
Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std::map<std::string, std::string> &orig_config) {
426434
OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "Engine::LoadExeNetworkImpl");
@@ -454,6 +462,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
454462
const bool enableLPT = shouldEnableLPT(config, engConfig);
455463
ov::element::Type inferencePrecision = getInferencePrecision(config, engConfig);
456464
const Config::SnippetsMode snippetsMode = getSnippetsMode(config, engConfig);
465+
setI64Mode(config, engConfig);
457466

458467
auto nGraphFunc = clonedNetwork.getFunction();
459468

@@ -729,6 +738,7 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::ma
729738
const bool enableLPT = (lptProp != config.end() && lptProp->second == PluginConfigParams::YES) /* enabled in the orig_config*/
730739
|| Config::LPTransformsMode::On == engConfig.lpTransformsMode /* or already enabled */;
731740
const Config::SnippetsMode snippetsMode = getSnippetsMode(config, conf);
741+
setI64Mode(config, conf);
732742

733743
auto model = network.getFunction();
734744
if (model == nullptr) {
@@ -744,7 +754,7 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::ma
744754
transformation.UpToCpuSpecificOpSet();
745755
transformation.CpuSpecificOpSet();
746756
},
747-
[&](const std::shared_ptr<ngraph::Node>& op) {
757+
[&](const std::shared_ptr<ov::Node>& op) {
748758
std::unique_ptr<Node> ptr;
749759
try {
750760
ptr.reset(Node::factory().create(op, context));

0 commit comments

Comments
 (0)