Skip to content

Commit e0b96ec

Browse files
committed
Add FullyConnected ACL executor
1 parent 83f6d21 commit e0b96ec

12 files changed

+538
-118
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
// Copyright (C) 2024 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#include "acl_common_executor.hpp"
6+
#include "acl_utils.hpp"
7+
#include "nodes/executors/memory_arguments.hpp"
8+
#include "utils/debug_capabilities.h"
9+
10+
namespace ov {
11+
namespace intel_cpu {
12+
13+
ACLMemoryInfo ACLCommonExecutor::initTensorInfo(const MemoryPtr& memoryPtr, const ACLTensorAttrs& attrs) {
14+
auto acl_tensor_type = precisionToAclDataType(memoryPtr->getPrecision());
15+
auto acl_tensor_layout = getAclDataLayoutByMemoryDesc(memoryPtr->getDescPtr());
16+
17+
ACLMemoryInfo aclMemoryInfo = nullptr;
18+
if (acl_tensor_type != arm_compute::DataType::UNKNOWN) {
19+
auto collapsed_dims = collapse_dims_to_max_rank(memoryPtr->getStaticDims(), attrs.maxDimsShape);
20+
auto acl_tensor_shape = shapeCast(collapsed_dims);
21+
if (attrs.hasLayoutTypeNHWC) {
22+
changeLayoutToNH_C({&acl_tensor_shape});
23+
}
24+
aclMemoryInfo = std::make_shared<arm_compute::TensorInfo>(
25+
acl_tensor_shape, 1,
26+
acl_tensor_type,
27+
acl_tensor_layout);
28+
}
29+
return aclMemoryInfo;
30+
}
31+
32+
ACLMemory ACLCommonExecutor::initTensor(const ACLMemoryInfo& aclMemoryInfo) {
33+
ACLMemory aclMemory = nullptr;
34+
if (aclMemoryInfo) {
35+
aclMemory = std::make_shared<arm_compute::Tensor>();
36+
aclMemory->allocator()->init(*aclMemoryInfo);
37+
}
38+
return aclMemory;
39+
}
40+
41+
bool ACLCommonExecutor::update(const MemoryArgs &memory) {
42+
for (auto& cpu_mem_ptr : memory) {
43+
// Initialize arm_compute::TensorInfo object
44+
auto aclTensorInfo = initTensorInfo(cpu_mem_ptr.second, aclTensorAttrs);
45+
// Initialize arm_compute::Tensor object
46+
aclMemoryMap[cpu_mem_ptr.first] = initTensor(aclTensorInfo);
47+
}
48+
49+
// Update arm_compute::TensorInfo objects for specific ACL function
50+
auto tensorsInfoValidateStatus = updateTensorsInfo(aclMemoryMap);
51+
if (!tensorsInfoValidateStatus) {
52+
DEBUG_LOG("ACL operator validation was failed: ", tensorsInfoValidateStatus.error_description());
53+
return false;
54+
}
55+
56+
// Configure arm_compute::IFunction object
57+
configureThreadSafe([&] {
58+
iFunction = configureFunction(aclMemoryMap);
59+
});
60+
return true;
61+
}
62+
63+
void ACLCommonExecutor::execute(const MemoryArgs &memory) {
64+
for (auto& acl_tensor : aclMemoryMap) {
65+
if (acl_tensor.second) {
66+
acl_tensor.second->allocator()->import_memory(memory.at(acl_tensor.first)->getData());
67+
}
68+
}
69+
iFunction->run();
70+
}
71+
72+
ACLCommonExecutor::~ACLCommonExecutor() {
73+
for (auto& acl_tensor : aclMemoryMap) {
74+
if (acl_tensor.second) {
75+
acl_tensor.second->allocator()->free();
76+
}
77+
}
78+
}
79+
80+
} // namespace intel_cpu
81+
} // namespace ov
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// Copyright (C) 2018-2024 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#pragma once
6+
7+
#include "cpu_memory.h"
8+
#include "nodes/executors/executor.hpp"
9+
#include "arm_compute/runtime/NEON/NEFunctions.h"
10+
11+
namespace ov {
12+
namespace intel_cpu {
13+
14+
using ACLMemoryInfo = std::shared_ptr<arm_compute::TensorInfo>;
15+
using ACLMemory = std::shared_ptr<arm_compute::Tensor>;
16+
using ACLMemoryMap = std::unordered_map<int, ACLMemory>;
17+
using ACLFunction = std::unique_ptr<arm_compute::IFunction>;
18+
19+
struct ACLTensorAttrs {
20+
bool hasLayoutTypeNHWC = false;
21+
size_t maxDimsShape = arm_compute::MAX_DIMS;
22+
};
23+
24+
class ACLCommonExecutor : public Executor {
25+
public:
26+
virtual arm_compute::Status updateTensorsInfo(const ACLMemoryMap& acl_memory) {
27+
OPENVINO_THROW_NOT_IMPLEMENTED("This version of the 'updateTensorsInfo' method is not implemented by executor");
28+
}
29+
virtual ACLFunction configureFunction(const ACLMemoryMap& acl_memory) {
30+
OPENVINO_THROW_NOT_IMPLEMENTED("This version of the 'configureFunction' method is not implemented by executor");
31+
}
32+
impl_desc_type implType() const override {
33+
return impl_desc_type::acl;
34+
}
35+
void execute(const MemoryArgs& memory) final;
36+
bool update(const MemoryArgs& memory) final;
37+
~ACLCommonExecutor();
38+
39+
protected:
40+
ACLTensorAttrs aclTensorAttrs;
41+
42+
private:
43+
ACLMemoryMap aclMemoryMap;
44+
ACLFunction iFunction = nullptr;
45+
static ACLMemoryInfo initTensorInfo(const MemoryPtr& memoryPtr, const ACLTensorAttrs& attrs);
46+
static ACLMemory initTensor(const ACLMemoryInfo& aclMemoryInfo);
47+
};
48+
49+
using ACLCommonExecutorPtr = std::shared_ptr<ACLCommonExecutor>;
50+
51+
} // namespace intel_cpu
52+
} // namespace ov

src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp

+19-102
Original file line numberDiff line numberDiff line change
@@ -361,66 +361,6 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs &eltwiseAttrs, const std::vecto
361361
return acl_op;
362362
};
363363
break;
364-
case Algorithm::EltwiseRelu:
365-
if (aclEltwiseAttrs.alpha == 0) {
366-
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0],
367-
ActivationLayerInfo::ActivationFunction::RELU))
368-
return false;
369-
} else {
370-
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0],
371-
{ActivationLayerInfo::ActivationFunction::LEAKY_RELU, aclEltwiseAttrs.alpha}))
372-
return false;
373-
}
374-
exec_func = [this]() -> std::unique_ptr<IFunction> {
375-
auto acl_op = std::make_unique<NEActivationLayer>();
376-
if (aclEltwiseAttrs.alpha == 0) {
377-
acl_op->configure(&srcTensors[0], &dstTensors[0], ActivationLayerInfo::ActivationFunction::RELU);
378-
} else {
379-
acl_op->configure(&srcTensors[0], &dstTensors[0],
380-
{ActivationLayerInfo::ActivationFunction::LEAKY_RELU, aclEltwiseAttrs.alpha});
381-
}
382-
return acl_op;
383-
};
384-
break;
385-
case Algorithm::EltwiseGeluErf:
386-
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0], ActivationLayerInfo::ActivationFunction::GELU))
387-
return false;
388-
exec_func = [this]() -> std::unique_ptr<IFunction> {
389-
auto acl_op = std::make_unique<NEActivationLayer>();
390-
acl_op->configure(&srcTensors[0], &dstTensors[0], ActivationLayerInfo::ActivationFunction::GELU);
391-
return acl_op;
392-
};
393-
break;
394-
case Algorithm::EltwiseElu:
395-
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0],
396-
{ActivationLayerInfo::ActivationFunction::ELU, aclEltwiseAttrs.alpha}))
397-
return false;
398-
exec_func = [this]() -> std::unique_ptr<IFunction> {
399-
auto acl_op = std::make_unique<NEActivationLayer>();
400-
acl_op->configure(&srcTensors[0], &dstTensors[0], {ActivationLayerInfo::ActivationFunction::ELU, aclEltwiseAttrs.alpha});
401-
return acl_op;
402-
};
403-
break;
404-
case Algorithm::EltwiseTanh:
405-
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0],
406-
{ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f}))
407-
return false;
408-
exec_func = [this]() -> std::unique_ptr<IFunction> {
409-
auto acl_op = std::make_unique<NEActivationLayer>();
410-
acl_op->configure(&srcTensors[0], &dstTensors[0],
411-
{ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f});
412-
return acl_op;
413-
};
414-
break;
415-
case Algorithm::EltwiseSigmoid:
416-
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0], ActivationLayerInfo::ActivationFunction::LOGISTIC))
417-
return false;
418-
exec_func = [this]() -> std::unique_ptr<IFunction> {
419-
auto acl_op = std::make_unique<NEActivationLayer>();
420-
acl_op->configure(&srcTensors[0], &dstTensors[0], ActivationLayerInfo::ActivationFunction::LOGISTIC);
421-
return acl_op;
422-
};
423-
break;
424364
case Algorithm::EltwiseAbs:
425365
if (!NEAbsLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0]))
426366
return false;
@@ -430,24 +370,6 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs &eltwiseAttrs, const std::vecto
430370
return acl_op;
431371
};
432372
break;
433-
case Algorithm::EltwiseSqrt:
434-
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0], ActivationLayerInfo::ActivationFunction::SQRT))
435-
return false;
436-
exec_func = [this]() -> std::unique_ptr<IFunction> {
437-
auto acl_op = std::make_unique<NEActivationLayer>();
438-
acl_op->configure(&srcTensors[0], &dstTensors[0], ActivationLayerInfo::ActivationFunction::SQRT);
439-
return acl_op;
440-
};
441-
break;
442-
case Algorithm::EltwiseSoftRelu:
443-
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0], ActivationLayerInfo::ActivationFunction::SOFT_RELU))
444-
return false;
445-
exec_func = [this]() -> std::unique_ptr<IFunction> {
446-
auto acl_op = std::make_unique<NEActivationLayer>();
447-
acl_op->configure(&srcTensors[0], &dstTensors[0], ActivationLayerInfo::ActivationFunction::SOFT_RELU);
448-
return acl_op;
449-
};
450-
break;
451373
case Algorithm::EltwiseExp:
452374
if (!NEExpLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0]))
453375
return false;
@@ -457,28 +379,6 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs &eltwiseAttrs, const std::vecto
457379
return acl_op;
458380
};
459381
break;
460-
case Algorithm::EltwiseClamp:
461-
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0],
462-
{ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, aclEltwiseAttrs.beta, aclEltwiseAttrs.alpha}))
463-
return false;
464-
exec_func = [this]() -> std::unique_ptr<IFunction> {
465-
auto acl_op = std::make_unique<NEActivationLayer>();
466-
acl_op->configure(&srcTensors[0], &dstTensors[0],
467-
{ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, aclEltwiseAttrs.beta, aclEltwiseAttrs.alpha});
468-
return acl_op;
469-
};
470-
break;
471-
case Algorithm::EltwiseSwish:
472-
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0],
473-
{ActivationLayerInfo::ActivationFunction::SWISH, aclEltwiseAttrs.alpha}))
474-
return false;
475-
exec_func = [this]() -> std::unique_ptr<IFunction> {
476-
auto acl_op = std::make_unique<NEActivationLayer>();
477-
acl_op->configure(&srcTensors[0], &dstTensors[0],
478-
{ActivationLayerInfo::ActivationFunction::SWISH, aclEltwiseAttrs.alpha});
479-
return acl_op;
480-
};
481-
break;
482382
case Algorithm::EltwisePrelu:
483383
if (!NEPReluLayer::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0]))
484384
return false;
@@ -488,12 +388,29 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs &eltwiseAttrs, const std::vecto
488388
return acl_op;
489389
};
490390
break;
391+
case Algorithm::EltwiseRelu:
392+
case Algorithm::EltwiseGeluErf:
393+
case Algorithm::EltwiseElu:
394+
case Algorithm::EltwiseTanh:
395+
case Algorithm::EltwiseSigmoid:
396+
case Algorithm::EltwiseSqrt:
397+
case Algorithm::EltwiseSoftRelu:
398+
case Algorithm::EltwiseClamp:
399+
case Algorithm::EltwiseSwish:
491400
case Algorithm::EltwiseHswish:
492-
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0], ActivationLayerInfo::ActivationFunction::HARD_SWISH))
401+
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0],
402+
getActivationLayerInfo(aclEltwiseAttrs.algorithm,
403+
aclEltwiseAttrs.alpha,
404+
aclEltwiseAttrs.beta,
405+
aclEltwiseAttrs.gamma)))
493406
return false;
494407
exec_func = [this]() -> std::unique_ptr<IFunction> {
495408
auto acl_op = std::make_unique<NEActivationLayer>();
496-
acl_op->configure(&srcTensors[0], &dstTensors[0], ActivationLayerInfo::ActivationFunction::HARD_SWISH);
409+
acl_op->configure(&srcTensors[0], &dstTensors[0],
410+
getActivationLayerInfo(aclEltwiseAttrs.algorithm,
411+
aclEltwiseAttrs.alpha,
412+
aclEltwiseAttrs.beta,
413+
aclEltwiseAttrs.gamma));
497414
return acl_op;
498415
};
499416
break;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
// Copyright (C) 2024 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#include "acl_fullyconnected.hpp"
6+
#include "acl_utils.hpp"
7+
#include "nodes/executors/executor.hpp"
8+
#include "nodes/executors/memory_arguments.hpp"
9+
#include "utils/debug_capabilities.h"
10+
#include "nodes/executors/debug_messages.hpp"
11+
#include "nodes/executors/implementation_utils.hpp"
12+
13+
namespace ov {
14+
namespace intel_cpu {
15+
16+
ACLFullyConnectedExecutor::ACLFullyConnectedExecutor(const FCAttrs &attrs, const PostOps &postOps,
17+
const MemoryArgs &memory,
18+
const ExecutorContext::CPtr context) {
19+
aclTensorAttrs.hasLayoutTypeNHWC = memory.at(ARG_SRC)->getDescPtr()->hasLayoutType(LayoutType::nspc);
20+
fullyConnectedLayerInfo.weights_trained_layout = getAclDataLayoutByMemoryDesc(memory.at(ARG_WEI)->getDescPtr());
21+
fullyConnectedLayerInfo.transpose_weights = !attrs.weightsNonTransposed;
22+
if (memory.at(ARG_SRC)->getPrecision() == ov::element::f16) {
23+
fullyConnectedLayerInfo.fp_mixed_precision = true;
24+
}
25+
26+
// Add postops
27+
if (!postOps.empty() && postOps.size() == 1) {
28+
if (const auto activation = std::dynamic_pointer_cast<ActivationPostOp>(postOps[0])) {
29+
fullyConnectedLayerInfo.activation_info = getActivationLayerInfo(convertToEltwiseAlgorithm(activation->type()),
30+
activation->alpha(),
31+
activation->beta(),
32+
activation->gamma());
33+
}
34+
}
35+
}
36+
37+
bool ACLFullyConnectedExecutor::supports(const FCConfig &config) {
38+
VERIFY(one_of(srcType(config), ov::element::f16, ov::element::f32), UNSUPPORTED_SRC_PRECISIONS);
39+
VERIFY(postOpsNumbers(config) < 2, UNSUPPORTED_NUMBER_OF_POSTOPS);
40+
VERIFY(one_of(srcRank(config), 2U, 3U, 4U), UNSUPPORTED_SRC_RANK);
41+
VERIFY(one_of(weiRank(config), 2U, 3U), UNSUPPORTED_WEI_RANK);
42+
return true;
43+
}
44+
45+
arm_compute::Status ACLFullyConnectedExecutor::updateTensorsInfo(const ACLMemoryMap& acl_memory) {
46+
auto wei_shape = acl_memory.at(ARG_WEI)->info()->tensor_shape();
47+
if (wei_shape.num_dimensions() == 3U) {
48+
acl_memory.at(ARG_WEI)->info()->set_tensor_shape({wei_shape[0] * wei_shape[1], wei_shape[2]});
49+
}
50+
51+
auto src_shape = acl_memory.at(ARG_SRC)->info()->tensor_shape();
52+
if (one_of(src_shape.num_dimensions(), 3U, 4U)) {
53+
acl_memory.at(ARG_SRC)->info()->set_tensor_shape({
54+
acl_memory.at(ARG_WEI)->info()->tensor_shape()[0],
55+
src_shape.total_size() / acl_memory.at(ARG_WEI)->info()->tensor_shape()[0]});
56+
}
57+
58+
if (one_of(acl_memory.at(ARG_DST)->info()->tensor_shape().num_dimensions(), 3U, 4U)) {
59+
acl_memory.at(ARG_DST)->info()->set_tensor_shape({
60+
acl_memory.at(ARG_WEI)->info()->tensor_shape()[1],
61+
acl_memory.at(ARG_SRC)->info()->tensor_shape()[1]});
62+
}
63+
64+
if (!fullyConnectedLayerInfo.transpose_weights) {
65+
arm_compute::TensorShape temp_weights_shape = acl_memory.at(ARG_WEI)->info()->tensor_shape();
66+
std::swap(temp_weights_shape[0], temp_weights_shape[1]);
67+
acl_memory.at(ARG_WEI)->info()->set_tensor_shape(temp_weights_shape);
68+
}
69+
70+
return arm_compute::NEFullyConnectedLayer::validate(
71+
acl_memory.at(ARG_SRC)->info(),
72+
acl_memory.at(ARG_WEI)->info(),
73+
acl_memory.at(ARG_BIAS) ? acl_memory.at(ARG_BIAS)->info() : nullptr,
74+
acl_memory.at(ARG_DST)->info(),
75+
fullyConnectedLayerInfo,
76+
weightsInfo);
77+
}
78+
79+
ACLFunction ACLFullyConnectedExecutor::configureFunction(const ACLMemoryMap& acl_memory) {
80+
auto neFC = std::make_unique<arm_compute::NEFullyConnectedLayer>();
81+
neFC->configure(
82+
acl_memory.at(ARG_SRC).get(),
83+
acl_memory.at(ARG_WEI).get(),
84+
acl_memory.at(ARG_BIAS).get(),
85+
acl_memory.at(ARG_DST).get(),
86+
fullyConnectedLayerInfo,
87+
weightsInfo);
88+
return neFC;
89+
}
90+
91+
} // namespace intel_cpu
92+
} // namespace ov

0 commit comments

Comments
 (0)