Skip to content

Commit bf97f82

Browse files
alvoronazhai219
authored andcommitted
[ARM] Added ARM32 ACL kernels calls
1 parent 03ea468 commit bf97f82

File tree

76 files changed

+333
-304
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+333
-304
lines changed

.github/automation/build.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ CMAKE_OPTIONS="${CMAKE_OPTIONS}
7979
# NOTE: only for AArch64 builds.
8080
if [ ! -z ${ACL_DIR} ]; then
8181
export ACL_ROOT_DIR=$ACL_DIR
82-
CMAKE_OPTIONS="${CMAKE_OPTIONS} -DDNNL_AARCH64_USE_ACL=ON"
82+
CMAKE_OPTIONS="${CMAKE_OPTIONS} -DDNNL_USE_ACL=ON"
8383
echo "Info: Building with Arm Compute Library backend for Aarch64..."
8484
fi
8585

cmake/ACL.cmake

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ if(NOT DNNL_TARGET_ARCH MATCHES "^(AARCH64|ARM)$")
2525
return()
2626
endif()
2727

28-
if(NOT DNNL_AARCH64_USE_ACL)
28+
if(NOT DNNL_USE_ACL)
2929
return()
3030
endif()
3131

@@ -67,7 +67,7 @@ if(ACL_FOUND)
6767
message(STATUS "Arm Compute Library: ${ACL_LIBRARIES}")
6868
message(STATUS "Arm Compute Library headers: ${ACL_INCLUDE_DIRS}")
6969

70-
add_definitions(-DDNNL_AARCH64_USE_ACL)
70+
add_definitions(-DDNNL_USE_ACL)
7171
set(CMAKE_CXX_STANDARD 14)
7272
set(CMAKE_CXX_EXTENSIONS "OFF")
7373
endif()

cmake/options.cmake

+1-1
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ set(DNNL_BLAS_VENDOR "NONE" CACHE STRING
419419
# AArch64 optimizations with Arm Compute Library
420420
# ==============================================
421421

422-
option(DNNL_AARCH64_USE_ACL "Enables use of AArch64 optimised functions
422+
option(DNNL_USE_ACL "Enables use of ARM optimised functions
423423
from Arm Compute Library.
424424
This is only supported on AArch64 builds and assumes there is a
425425
functioning Compute Library build available at the location specified by the

doc/build/build.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ make -j
117117
~~~sh
118118
export ACL_ROOT_DIR=<path/to/Compute Library>
119119
cmake .. \
120-
-DDNNL_AARCH64_USE_ACL=ON \
120+
-DDNNL_USE_ACL=ON \
121121
<extra build options>
122122
~~~
123123

src/cpu/CMakeLists.txt

+3
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,9 @@ endif()
138138
if (DNNL_TARGET_ARCH STREQUAL "AARCH64")
139139
add_subdirectory(aarch64)
140140
endif()
141+
if (DNNL_USE_ACL)
142+
add_subdirectory(acl)
143+
endif()
141144
if (DNNL_TARGET_ARCH STREQUAL "PPC64")
142145
add_subdirectory(ppc64)
143146
endif()

src/cpu/aarch64/CMakeLists.txt

-8
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,6 @@ file(GLOB XBYAK_AARCH64_FILES
2727

2828
list(REMOVE_ITEM SOURCES ${XBYAK_AARCH64_FILES})
2929

30-
if(NOT DNNL_AARCH64_USE_ACL)
31-
file(GLOB_RECURSE ACL_FILES
32-
${CMAKE_CURRENT_SOURCE_DIR}/acl_*.[ch]
33-
${CMAKE_CURRENT_SOURCE_DIR}/acl_*.[ch]pp
34-
)
35-
list(REMOVE_ITEM SOURCES ${ACL_FILES})
36-
endif()
37-
3830
# If the runtime is not THREADPOOL remove threadpool_scheduler sources.
3931
if(NOT DNNL_CPU_RUNTIME STREQUAL "THREADPOOL")
4032
list(APPEND ACL_THREADPOOL_FILES

src/cpu/aarch64/acl_reorder.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
namespace dnnl {
2020
namespace impl {
2121
namespace cpu {
22-
namespace aarch64 {
22+
namespace acl {
2323

2424
status_t acl_reorder_fwd_t::execute_forward(const exec_ctx_t &ctx) const {
2525
// Lock here is needed because resource_mapper does not support
@@ -46,7 +46,7 @@ status_t acl_reorder_fwd_t::execute_forward(const exec_ctx_t &ctx) const {
4646
return status::success;
4747
}
4848

49-
} // namespace aarch64
49+
} // namespace acl
5050
} // namespace cpu
5151
} // namespace impl
5252
} // namespace dnnl

src/cpu/aarch64/acl_reorder.hpp

+7-7
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,19 @@
1313
* See the License for the specific language governing permissions and
1414
* limitations under the License.
1515
*******************************************************************************/
16-
#ifndef CPU_AARCH64_ACL_REORDER_HPP
17-
#define CPU_AARCH64_ACL_REORDER_HPP
16+
#ifndef CPU_ACL_REORDER_HPP
17+
#define CPU_ACL_REORDER_HPP
1818

1919
#include "arm_compute/core/Types.h"
2020
#include "common/utils.hpp"
21-
#include "cpu/aarch64/acl_utils.hpp"
21+
#include "cpu/acl/acl_utils.hpp"
2222
#include "cpu/aarch64/cpu_isa_traits.hpp"
2323
#include "cpu/reorder/cpu_reorder_pd.hpp"
2424

2525
namespace dnnl {
2626
namespace impl {
2727
namespace cpu {
28-
namespace aarch64 {
28+
namespace acl {
2929

3030
struct acl_reorder_obj_t {
3131
arm_compute::NEReorderLayer reorder;
@@ -131,7 +131,7 @@ struct acl_reorder_fwd_t : public primitive_t {
131131
if (dst_tag == format_tag::BA4b4a || dst_tag == format_tag::Acdb4a
132132
|| dst_tag == format_tag::Ab4a) {
133133
_pd->app_.dst_wf = arm_compute::WeightFormat::OHWIo4;
134-
} else if (mayiuse(sve_256)
134+
} else if (aarch64::mayiuse(aarch64::sve_256)
135135
&& (dst_tag == format_tag::BA8b4a
136136
|| dst_tag == format_tag::Acdb8a
137137
|| dst_tag == format_tag::Ab8a)) {
@@ -239,9 +239,9 @@ struct acl_reorder_fwd_t : public primitive_t {
239239

240240
}; // acl_reorder_fwd_t
241241

242-
} // namespace aarch64
242+
} // namespace acl
243243
} // namespace cpu
244244
} // namespace impl
245245
} // namespace dnnl
246246

247-
#endif // CPU_AARCH64_ACL_REORDER_HPP
247+
#endif // CPU_ACL_REORDER_HPP

src/cpu/acl/CMakeLists.txt

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#*******************************************************************************
2+
# Copyright 2020-2022 Arm Ltd. and affiliates
3+
# Copyright 2020-2021 FUJITSU LIMITED
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#*******************************************************************************
17+
file(GLOB_RECURSE SOURCES
18+
${CMAKE_CURRENT_SOURCE_DIR}/*.[ch]
19+
${CMAKE_CURRENT_SOURCE_DIR}/*.[ch]pp
20+
)
21+
# If the runtime is not THREADPOOL remove threadpool_scheduler sources.
22+
if(NOT DNNL_CPU_RUNTIME STREQUAL "THREADPOOL")
23+
list(APPEND ACL_THREADPOOL_FILES
24+
${CMAKE_CURRENT_SOURCE_DIR}/acl_threadpool_scheduler.cpp
25+
${CMAKE_CURRENT_SOURCE_DIR}/acl_threadpool_scheduler.hpp
26+
)
27+
list(REMOVE_ITEM SOURCES ${ACL_THREADPOOL_FILES})
28+
endif()
29+
set(OBJ_LIB ${DNNL_LIBRARY_NAME}_cpu_acl)
30+
add_library(${OBJ_LIB} OBJECT ${SOURCES})
31+
set_property(GLOBAL APPEND PROPERTY DNNL_LIB_DEPS
32+
$<TARGET_OBJECTS:${OBJ_LIB}>)
33+
enable_conditional_compilation4(${OBJ_LIB})

src/cpu/aarch64/acl_batch_normalization.cpp src/cpu/acl/acl_batch_normalization.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,12 @@
1414
* limitations under the License.
1515
*******************************************************************************/
1616

17-
#include "cpu/aarch64/acl_batch_normalization.hpp"
17+
#include "cpu/acl/acl_batch_normalization.hpp"
1818

1919
namespace dnnl {
2020
namespace impl {
2121
namespace cpu {
22-
namespace aarch64 {
22+
namespace acl {
2323

2424
status_t acl_batch_normalization_fwd_t::execute_forward(
2525
const exec_ctx_t &ctx) const {
@@ -72,7 +72,7 @@ status_t acl_batch_normalization_fwd_t::execute_forward(
7272
return status::success;
7373
}
7474

75-
} // namespace aarch64
75+
} // namespace acl
7676
} // namespace cpu
7777
} // namespace impl
7878
} // namespace dnnl

src/cpu/aarch64/acl_batch_normalization.hpp src/cpu/acl/acl_batch_normalization.hpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,18 @@
1414
* limitations under the License.
1515
*******************************************************************************/
1616

17-
#ifndef CPU_AARCH64_ACL_BATCH_NORMALIZATION_HPP
18-
#define CPU_AARCH64_ACL_BATCH_NORMALIZATION_HPP
17+
#ifndef CPU_ACL_BATCH_NORMALIZATION_HPP
18+
#define CPU_ACL_BATCH_NORMALIZATION_HPP
1919

2020
#include "cpu/cpu_batch_normalization_pd.hpp"
2121

22-
#include "cpu/aarch64/acl_post_ops.hpp"
23-
#include "cpu/aarch64/acl_utils.hpp"
22+
#include "cpu/acl/acl_post_ops.hpp"
23+
#include "cpu/acl/acl_utils.hpp"
2424

2525
namespace dnnl {
2626
namespace impl {
2727
namespace cpu {
28-
namespace aarch64 {
28+
namespace acl {
2929

3030
struct acl_batch_normalization_obj_t {
3131
arm_compute::NEBatchNormalizationLayer bnorm;
@@ -272,7 +272,7 @@ struct acl_batch_normalization_fwd_t : public primitive_t {
272272
const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); }
273273
}; // acl_batch_normalization_fwd_t
274274

275-
} // namespace aarch64
275+
} // namespace acl
276276
} // namespace cpu
277277
} // namespace impl
278278
} // namespace dnnl

src/cpu/aarch64/acl_benchmark_scheduler.cpp src/cpu/acl/acl_benchmark_scheduler.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@
1414
* limitations under the License.
1515
*******************************************************************************/
1616

17-
#include "cpu/aarch64/acl_benchmark_scheduler.hpp"
17+
#include "cpu/acl/acl_benchmark_scheduler.hpp"
1818
#include "common/verbose.hpp"
1919

2020
namespace dnnl {
2121
namespace impl {
2222
namespace cpu {
23-
namespace aarch64 {
23+
namespace acl {
2424
using namespace arm_compute;
2525

2626
BenchmarkScheduler::BenchmarkScheduler(IScheduler &real_scheduler)
@@ -72,7 +72,7 @@ void BenchmarkScheduler::run_workloads(std::vector<Workload> &workloads) {
7272
ARM_COMPUTE_ERROR("Can't be reached");
7373
}
7474

75-
} // namespace aarch64
75+
} // namespace acl
7676
} // namespace cpu
7777
} // namespace impl
7878
} // namespace dnnl

src/cpu/aarch64/acl_benchmark_scheduler.hpp src/cpu/acl/acl_benchmark_scheduler.hpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,16 @@
1313
* See the License for the specific language governing permissions and
1414
* limitations under the License.
1515
*******************************************************************************/
16-
#ifndef CPU_AARCH64_ACL_BENCHMARK_SCHEDULER_HPP
17-
#define CPU_AARCH64_ACL_BENCHMARK_SCHEDULER_HPP
16+
#ifndef CPU_ACL_BENCHMARK_SCHEDULER_HPP
17+
#define CPU_ACL_BENCHMARK_SCHEDULER_HPP
1818

1919
#include "arm_compute/core/CPP/ICPPKernel.h"
2020
#include "arm_compute/runtime/IScheduler.h"
2121

2222
namespace dnnl {
2323
namespace impl {
2424
namespace cpu {
25-
namespace aarch64 {
25+
namespace acl {
2626
// BenchmarkScheduler implement's ACL IScheduler interface and acts as an interceptor scheduler
2727
// when DNNL_VERBOSE=profile,profile_externals. It intercepts calls made by the actual scheduler used by ACL and adds
2828
// timers to benchmark execution time of ACL kernels and store kernel information.
@@ -52,9 +52,9 @@ class BenchmarkScheduler final : public arm_compute::IScheduler {
5252
IScheduler &_real_scheduler;
5353
};
5454

55-
#endif // CPU_AARCH64_ACL_BENCHMARK_SCHEDULER_HPP
55+
#endif // CPU_ACL_BENCHMARK_SCHEDULER_HPP
5656

57-
} // namespace aarch64
57+
} // namespace acl
5858
} // namespace cpu
5959
} // namespace impl
6060
} // namespace dnnl

src/cpu/aarch64/acl_binary.cpp src/cpu/acl/acl_binary.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
namespace dnnl {
2828
namespace impl {
2929
namespace cpu {
30-
namespace aarch64 {
30+
namespace acl {
3131

3232
status_t acl_binary_t::pd_t::init(engine_t *engine) {
3333
using namespace acl_utils;
@@ -229,7 +229,7 @@ const acl_binary_t::pd_t *acl_binary_t::pd() const {
229229
return static_cast<const pd_t *>(primitive_t::pd().get());
230230
}
231231

232-
} // namespace aarch64
232+
} // namespace acl
233233
} // namespace cpu
234234
} // namespace impl
235235
} // namespace dnnl

src/cpu/aarch64/acl_binary.hpp src/cpu/acl/acl_binary.hpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
* limitations under the License.
1515
*******************************************************************************/
1616

17-
#ifndef CPU_AARCH64_ACL_BINARY_HPP
18-
#define CPU_AARCH64_ACL_BINARY_HPP
17+
#ifndef CPU_ACL_BINARY_HPP
18+
#define CPU_ACL_BINARY_HPP
1919

2020
#include "acl_utils.hpp"
2121
#include "cpu/cpu_binary_pd.hpp"
@@ -28,7 +28,7 @@
2828
namespace dnnl {
2929
namespace impl {
3030
namespace cpu {
31-
namespace aarch64 {
31+
namespace acl {
3232

3333
struct acl_binary_conf_t {
3434
arm_compute::TensorInfo src0_info;
@@ -73,7 +73,7 @@ struct acl_binary_t : public primitive_t {
7373

7474
}; // acl_binary_t
7575

76-
} // namespace aarch64
76+
} // namespace acl
7777
} // namespace cpu
7878
} // namespace impl
7979
} // namespace dnnl

src/cpu/aarch64/acl_convolution_utils.cpp src/cpu/acl/acl_convolution_utils.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
namespace dnnl {
2323
namespace impl {
2424
namespace cpu {
25-
namespace aarch64 {
25+
namespace acl {
2626

2727
namespace acl_convolution_utils {
2828

@@ -285,7 +285,7 @@ status_t acl_init_conf(acl_conv_conf_t &acp, memory_desc_t &src_md,
285285
}
286286
} // namespace acl_convolution_utils
287287

288-
} // namespace aarch64
288+
} // namespace acl
289289
} // namespace cpu
290290
} // namespace impl
291291
} // namespace dnnl

src/cpu/aarch64/acl_convolution_utils.hpp src/cpu/acl/acl_convolution_utils.hpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
* limitations under the License.
1515
*******************************************************************************/
1616

17-
#ifndef CPU_AARCH64_ACL_CONVOLUTION_UTILS_HPP
18-
#define CPU_AARCH64_ACL_CONVOLUTION_UTILS_HPP
17+
#ifndef CPU_ACL_CONVOLUTION_UTILS_HPP
18+
#define CPU_ACL_CONVOLUTION_UTILS_HPP
1919

2020
#include <map>
2121
#include "acl_post_ops.hpp"
@@ -26,7 +26,7 @@
2626
namespace dnnl {
2727
namespace impl {
2828
namespace cpu {
29-
namespace aarch64 {
29+
namespace acl {
3030

3131
template <typename ConvOp>
3232
struct acl_obj_t {
@@ -175,9 +175,9 @@ status_t execute_forward_conv_acl(const exec_ctx_t &ctx,
175175
return status::success;
176176
}
177177

178-
} // namespace aarch64
178+
} // namespace acl
179179
} // namespace cpu
180180
} // namespace impl
181181
} // namespace dnnl
182182

183-
#endif // CPU_AARCH64_ACL_CONVOLUTION_UTILS_HPP
183+
#endif // CPU_ACL_CONVOLUTION_UTILS_HPP

0 commit comments

Comments
 (0)