Skip to content

Commit fef6788

Browse files
committed
Add FullyConnected ACL executor
1 parent f19282f commit fef6788

16 files changed

+969
-127
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
// Copyright (C) 2024 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#include "acl_common_executor.hpp"
6+
#include "acl_utils.hpp"
7+
#include "nodes/executors/memory_arguments.hpp"
8+
#include "utils/debug_capabilities.h"
9+
10+
namespace ov {
11+
namespace intel_cpu {
12+
13+
static const std::unordered_map<int, ACLArgs> argConvert = {
14+
{ARG_SRC_0, ACL_SRC_0},
15+
{ARG_SRC_1, ACL_SRC_1},
16+
{ARG_SRC_2, ACL_SRC_2},
17+
{ARG_BIAS, ACL_BIAS},
18+
{ARG_WEI, ACL_WEI},
19+
{ARG_DST, ACL_DST},
20+
};
21+
22+
using ACLTypes = std::array<arm_compute::DataType, ACLArgs::COUNT_OF_ARGS>;
23+
using ACLLayouts = std::array<arm_compute::DataLayout, ACLArgs::COUNT_OF_ARGS>;
24+
25+
static void initACLTensorParams(const MemoryPtr& memoryPtr,
26+
const ACLTensorAttrs& attrs,
27+
arm_compute::TensorShape& tensorShape,
28+
arm_compute::DataType& dataType,
29+
arm_compute::DataLayout& dataLayout) {
30+
dataType = precisionToAclDataType(memoryPtr->getPrecision());
31+
dataLayout = getAclDataLayoutByMemoryDesc(memoryPtr->getDescPtr());
32+
if (dataType != arm_compute::DataType::UNKNOWN) {
33+
auto collapsed_dims = collapse_dims_to_max_rank(memoryPtr->getStaticDims(), attrs.maxDimsShape);
34+
tensorShape = shapeCast(collapsed_dims);
35+
if (attrs.hasLayoutTypeNHWC) {
36+
changeLayoutToNH_C({&tensorShape});
37+
}
38+
}
39+
}
40+
41+
static std::shared_ptr<arm_compute::TensorInfo> initTensorInfo(const arm_compute::TensorShape& tensorShape,
42+
const arm_compute::DataType& dataType,
43+
const arm_compute::DataLayout& dataLayout) {
44+
std::shared_ptr<arm_compute::TensorInfo> aclMemoryInfo = nullptr;
45+
if (dataType != arm_compute::DataType::UNKNOWN) {
46+
aclMemoryInfo = std::make_shared<arm_compute::TensorInfo>(
47+
tensorShape, 1,
48+
dataType,
49+
dataLayout);
50+
}
51+
return aclMemoryInfo;
52+
}
53+
54+
static std::shared_ptr<arm_compute::Tensor> initTensor(const std::shared_ptr<arm_compute::TensorInfo>& aclMemoryInfo) {
55+
std::shared_ptr<arm_compute::Tensor> aclMemory = nullptr;
56+
if (aclMemoryInfo) {
57+
aclMemory = std::make_shared<arm_compute::Tensor>();
58+
aclMemory->allocator()->init(*aclMemoryInfo);
59+
}
60+
return aclMemory;
61+
}
62+
63+
ACLCommonExecutor::ACLCommonExecutor() {
64+
for (int i = 0; i < ACLArgs::COUNT_OF_ARGS; ++i) {
65+
aclTensorAttrs.memoryUsageIndicator[i] = false;
66+
}
67+
}
68+
69+
bool ACLCommonExecutor::update(const MemoryArgs &memory) {
70+
// Initialize ACL tensors params
71+
ACLShapes aclMemoryShapes;
72+
ACLTypes aclDataType{};
73+
ACLLayouts aclDataLayout{};
74+
for (auto& cpu_mem_ptr : memory) {
75+
const ACLArgs index = argConvert.at(cpu_mem_ptr.first);
76+
initACLTensorParams(cpu_mem_ptr.second, aclTensorAttrs,
77+
aclMemoryShapes[index],
78+
aclDataType[index],
79+
aclDataLayout[index]);
80+
}
81+
82+
// Update ACL tensors shapes
83+
updateTensorsShapes(aclMemoryShapes);
84+
85+
// Initialize arm_compute::TensorInfo objects
86+
ACLInfos aclMemoryInfos;
87+
for (int i = 0; i < ACLArgs::COUNT_OF_ARGS; i++) {
88+
aclMemoryInfos[i] = initTensorInfo(aclMemoryShapes[i], aclDataType[i], aclDataLayout[i]);
89+
}
90+
91+
// Validate arm_compute::TensorInfo objects for specific ACL function
92+
auto tensorsInfoValidateStatus = validateTensorsInfo(aclMemoryInfos);
93+
if (!tensorsInfoValidateStatus) {
94+
DEBUG_LOG("ACL operator validation failed: ", tensorsInfoValidateStatus.error_description());
95+
return false;
96+
}
97+
98+
// Initialize arm_compute::Tensor objects
99+
for (int i = 0; i < ACLArgs::COUNT_OF_ARGS; i++) {
100+
aclMemoryTensors[i] = initTensor(aclMemoryInfos[i]);
101+
// Indicate that arm_compute::Tensor object can use import_memory function
102+
if (aclMemoryTensors[i]) {
103+
aclTensorAttrs.memoryUsageIndicator[i] = true;
104+
}
105+
}
106+
107+
// Configure arm_compute::IFunction object
108+
configureThreadSafe([&] {
109+
iFunction = configureFunction(aclMemoryTensors);
110+
});
111+
return true;
112+
}
113+
114+
void ACLCommonExecutor::execute(const MemoryArgs &memory) {
115+
// TODO: Move import_memory() to update() function - CVS-145871
116+
for (auto& cpu_mem_ptr : memory) {
117+
const ACLArgs index = argConvert.at(cpu_mem_ptr.first);
118+
if (aclTensorAttrs.memoryUsageIndicator[index]) {
119+
aclMemoryTensors[index]->allocator()->import_memory(memory.at(cpu_mem_ptr.first)->getData());
120+
}
121+
}
122+
iFunction->run();
123+
}
124+
125+
ACLCommonExecutor::~ACLCommonExecutor() {
126+
for (int i = 0; i < ACLArgs::COUNT_OF_ARGS; i++) {
127+
if (aclTensorAttrs.memoryUsageIndicator[i]) {
128+
aclMemoryTensors[i]->allocator()->free();
129+
}
130+
}
131+
}
132+
133+
} // namespace intel_cpu
134+
} // namespace ov
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
// Copyright (C) 2018-2024 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#pragma once
6+
7+
#include "cpu_memory.h"
8+
#include "nodes/executors/executor.hpp"
9+
#include "arm_compute/runtime/NEON/NEFunctions.h"
10+
11+
namespace ov {
12+
namespace intel_cpu {
13+
14+
enum ACLArgs {
15+
ACL_SRC_0,
16+
ACL_SRC_1,
17+
ACL_SRC_2,
18+
ACL_BIAS,
19+
ACL_WEI,
20+
ACL_DST,
21+
COUNT_OF_ARGS
22+
};
23+
24+
using ACLFunction = std::unique_ptr<arm_compute::IFunction>;
25+
using ACLShapes = std::array<arm_compute::TensorShape, ACLArgs::COUNT_OF_ARGS>;
26+
using ACLInfos = std::array<std::shared_ptr<arm_compute::TensorInfo>, ACLArgs::COUNT_OF_ARGS>;
27+
using ACLTensors = std::array<std::shared_ptr<arm_compute::Tensor>, ACLArgs::COUNT_OF_ARGS>;
28+
29+
struct ACLTensorAttrs {
30+
bool hasLayoutTypeNHWC = false;
31+
size_t maxDimsShape = arm_compute::MAX_DIMS;
32+
std::array<bool, ACLArgs::COUNT_OF_ARGS> memoryUsageIndicator;
33+
};
34+
35+
class ACLCommonExecutor : public Executor {
36+
public:
37+
ACLCommonExecutor();
38+
virtual void updateTensorsShapes(ACLShapes& aclMemoryShapes) = 0;
39+
virtual arm_compute::Status validateTensorsInfo(const ACLInfos& aclMemoryInfos) = 0;
40+
virtual ACLFunction configureFunction(const ACLTensors& aclMemoryTensors) = 0;
41+
impl_desc_type implType() const override {
42+
return impl_desc_type::acl;
43+
}
44+
void execute(const MemoryArgs& memory) override;
45+
bool update(const MemoryArgs& memory) override;
46+
~ACLCommonExecutor();
47+
48+
protected:
49+
ACLTensorAttrs aclTensorAttrs;
50+
private:
51+
ACLTensors aclMemoryTensors;
52+
ACLFunction iFunction = nullptr;
53+
};
54+
55+
using ACLCommonExecutorPtr = std::shared_ptr<ACLCommonExecutor>;
56+
57+
} // namespace intel_cpu
58+
} // namespace ov

src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp

+17-102
Original file line numberDiff line numberDiff line change
@@ -361,66 +361,6 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs &eltwiseAttrs, const std::vecto
361361
return acl_op;
362362
};
363363
break;
364-
case Algorithm::EltwiseRelu:
365-
if (aclEltwiseAttrs.alpha == 0) {
366-
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0],
367-
ActivationLayerInfo::ActivationFunction::RELU))
368-
return false;
369-
} else {
370-
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0],
371-
{ActivationLayerInfo::ActivationFunction::LEAKY_RELU, aclEltwiseAttrs.alpha}))
372-
return false;
373-
}
374-
exec_func = [this]() -> std::unique_ptr<IFunction> {
375-
auto acl_op = std::make_unique<NEActivationLayer>();
376-
if (aclEltwiseAttrs.alpha == 0) {
377-
acl_op->configure(&srcTensors[0], &dstTensors[0], ActivationLayerInfo::ActivationFunction::RELU);
378-
} else {
379-
acl_op->configure(&srcTensors[0], &dstTensors[0],
380-
{ActivationLayerInfo::ActivationFunction::LEAKY_RELU, aclEltwiseAttrs.alpha});
381-
}
382-
return acl_op;
383-
};
384-
break;
385-
case Algorithm::EltwiseGeluErf:
386-
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0], ActivationLayerInfo::ActivationFunction::GELU))
387-
return false;
388-
exec_func = [this]() -> std::unique_ptr<IFunction> {
389-
auto acl_op = std::make_unique<NEActivationLayer>();
390-
acl_op->configure(&srcTensors[0], &dstTensors[0], ActivationLayerInfo::ActivationFunction::GELU);
391-
return acl_op;
392-
};
393-
break;
394-
case Algorithm::EltwiseElu:
395-
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0],
396-
{ActivationLayerInfo::ActivationFunction::ELU, aclEltwiseAttrs.alpha}))
397-
return false;
398-
exec_func = [this]() -> std::unique_ptr<IFunction> {
399-
auto acl_op = std::make_unique<NEActivationLayer>();
400-
acl_op->configure(&srcTensors[0], &dstTensors[0], {ActivationLayerInfo::ActivationFunction::ELU, aclEltwiseAttrs.alpha});
401-
return acl_op;
402-
};
403-
break;
404-
case Algorithm::EltwiseTanh:
405-
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0],
406-
{ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f}))
407-
return false;
408-
exec_func = [this]() -> std::unique_ptr<IFunction> {
409-
auto acl_op = std::make_unique<NEActivationLayer>();
410-
acl_op->configure(&srcTensors[0], &dstTensors[0],
411-
{ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f});
412-
return acl_op;
413-
};
414-
break;
415-
case Algorithm::EltwiseSigmoid:
416-
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0], ActivationLayerInfo::ActivationFunction::LOGISTIC))
417-
return false;
418-
exec_func = [this]() -> std::unique_ptr<IFunction> {
419-
auto acl_op = std::make_unique<NEActivationLayer>();
420-
acl_op->configure(&srcTensors[0], &dstTensors[0], ActivationLayerInfo::ActivationFunction::LOGISTIC);
421-
return acl_op;
422-
};
423-
break;
424364
case Algorithm::EltwiseAbs:
425365
if (!NEAbsLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0]))
426366
return false;
@@ -430,24 +370,6 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs &eltwiseAttrs, const std::vecto
430370
return acl_op;
431371
};
432372
break;
433-
case Algorithm::EltwiseSqrt:
434-
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0], ActivationLayerInfo::ActivationFunction::SQRT))
435-
return false;
436-
exec_func = [this]() -> std::unique_ptr<IFunction> {
437-
auto acl_op = std::make_unique<NEActivationLayer>();
438-
acl_op->configure(&srcTensors[0], &dstTensors[0], ActivationLayerInfo::ActivationFunction::SQRT);
439-
return acl_op;
440-
};
441-
break;
442-
case Algorithm::EltwiseSoftRelu:
443-
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0], ActivationLayerInfo::ActivationFunction::SOFT_RELU))
444-
return false;
445-
exec_func = [this]() -> std::unique_ptr<IFunction> {
446-
auto acl_op = std::make_unique<NEActivationLayer>();
447-
acl_op->configure(&srcTensors[0], &dstTensors[0], ActivationLayerInfo::ActivationFunction::SOFT_RELU);
448-
return acl_op;
449-
};
450-
break;
451373
case Algorithm::EltwiseExp:
452374
if (!NEExpLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0]))
453375
return false;
@@ -457,28 +379,6 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs &eltwiseAttrs, const std::vecto
457379
return acl_op;
458380
};
459381
break;
460-
case Algorithm::EltwiseClamp:
461-
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0],
462-
{ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, aclEltwiseAttrs.beta, aclEltwiseAttrs.alpha}))
463-
return false;
464-
exec_func = [this]() -> std::unique_ptr<IFunction> {
465-
auto acl_op = std::make_unique<NEActivationLayer>();
466-
acl_op->configure(&srcTensors[0], &dstTensors[0],
467-
{ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, aclEltwiseAttrs.beta, aclEltwiseAttrs.alpha});
468-
return acl_op;
469-
};
470-
break;
471-
case Algorithm::EltwiseSwish:
472-
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0],
473-
{ActivationLayerInfo::ActivationFunction::SWISH, aclEltwiseAttrs.alpha}))
474-
return false;
475-
exec_func = [this]() -> std::unique_ptr<IFunction> {
476-
auto acl_op = std::make_unique<NEActivationLayer>();
477-
acl_op->configure(&srcTensors[0], &dstTensors[0],
478-
{ActivationLayerInfo::ActivationFunction::SWISH, aclEltwiseAttrs.alpha});
479-
return acl_op;
480-
};
481-
break;
482382
case Algorithm::EltwisePrelu:
483383
if (!NEPReluLayer::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0]))
484384
return false;
@@ -488,12 +388,27 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs &eltwiseAttrs, const std::vecto
488388
return acl_op;
489389
};
490390
break;
391+
case Algorithm::EltwiseRelu:
392+
case Algorithm::EltwiseGeluErf:
393+
case Algorithm::EltwiseElu:
394+
case Algorithm::EltwiseTanh:
395+
case Algorithm::EltwiseSigmoid:
396+
case Algorithm::EltwiseSqrt:
397+
case Algorithm::EltwiseSoftRelu:
398+
case Algorithm::EltwiseClamp:
399+
case Algorithm::EltwiseSwish:
491400
case Algorithm::EltwiseHswish:
492-
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0], ActivationLayerInfo::ActivationFunction::HARD_SWISH))
401+
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0], getActivationLayerInfo(aclEltwiseAttrs.algorithm,
402+
aclEltwiseAttrs.alpha,
403+
aclEltwiseAttrs.beta,
404+
aclEltwiseAttrs.gamma)))
493405
return false;
494406
exec_func = [this]() -> std::unique_ptr<IFunction> {
495407
auto acl_op = std::make_unique<NEActivationLayer>();
496-
acl_op->configure(&srcTensors[0], &dstTensors[0], ActivationLayerInfo::ActivationFunction::HARD_SWISH);
408+
acl_op->configure(&srcTensors[0], &dstTensors[0], getActivationLayerInfo(aclEltwiseAttrs.algorithm,
409+
aclEltwiseAttrs.alpha,
410+
aclEltwiseAttrs.beta,
411+
aclEltwiseAttrs.gamma));
497412
return acl_op;
498413
};
499414
break;

src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ class AclEltwiseExecutor : public EltwiseExecutor {
1616
explicit AclEltwiseExecutor(const ExecutorContext::CPtr context);
1717
static bool isEltwiseAlgorithmSupported(Algorithm algorithm);
1818

19-
bool init(const EltwiseAttrs& eltwiseAttrs,
19+
bool init(const EltwiseAttrs& attrs,
2020
const std::vector<MemoryDescPtr>& srcDescs,
2121
const std::vector<MemoryDescPtr>& dstDescs,
2222
const std::vector<EltwisePostOp>& postOps) override;

0 commit comments

Comments
 (0)