Skip to content

Commit 7297b1b

Browse files
ilya-lavrenovazhai219
authored andcommitted
[ARM] ARM 32bits support for oneDNN
[ARM] double matches in cmake fix
1 parent 887c081 commit 7297b1b

File tree

10 files changed

+52
-13
lines changed

10 files changed

+52
-13
lines changed

CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ endif()
8989
if(NOT DNNL_TARGET_ARCH)
9090
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)")
9191
set(DNNL_TARGET_ARCH "AARCH64")
92+
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
93+
set(DNNL_TARGET_ARCH "ARM")
9294
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(ppc64.*|PPC64.*|powerpc64.*)")
9395
set(DNNL_TARGET_ARCH "PPC64")
9496
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(s390x.*|S390X.*)")

cmake/ACL.cmake

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ endif()
2121
set(acl_cmake_included true)
2222
include("cmake/options.cmake")
2323

24-
if(NOT DNNL_TARGET_ARCH STREQUAL "AARCH64")
24+
if(NOT DNNL_TARGET_ARCH MATCHES "^(AARCH64|ARM)$")
2525
return()
2626
endif()
2727

cmake/platform.cmake

+2-2
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ elseif(UNIX OR MINGW)
234234
platform_unix_and_mingw_noexcept_ccxx_flags(CMAKE_CMAKE_CCXX_NOEXCEPT_FLAGS)
235235
# compiler specific settings
236236
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
237-
if(DNNL_TARGET_ARCH STREQUAL "AARCH64")
237+
if(DNNL_TARGET_ARCH MATCHES "^(AARCH64|ARM)$")
238238
if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
239239
set(DEF_ARCH_OPT_FLAGS "-O3")
240240
endif()
@@ -339,7 +339,7 @@ elseif(UNIX OR MINGW)
339339
append(CMAKE_CCXX_FLAGS "-Wno-attributes")
340340
endif()
341341

342-
if(DNNL_TARGET_ARCH STREQUAL "AARCH64")
342+
if(DNNL_TARGET_ARCH MATCHES "^(AARCH64|ARM)$")
343343
if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
344344
set(DEF_ARCH_OPT_FLAGS "-O3")
345345
endif()

include/oneapi/dnnl/dnnl_types.h

+5
Original file line numberDiff line numberDiff line change
@@ -2292,7 +2292,12 @@ typedef enum {
22922292
/// A `size_t` counterpart of the DNNL_RUNTIME_DIM_VAL.
22932293
/// For instance, this value is returned by dnnl_memory_desc_get_size() if
22942294
/// either of the dimensions or strides equal to #DNNL_RUNTIME_DIM_VAL.
2295+
2296+
#if INTPTR_MAX == INT64_MAX
22952297
#define DNNL_RUNTIME_SIZE_VAL ((size_t)DNNL_RUNTIME_DIM_VAL)
2298+
#else
2299+
#define DNNL_RUNTIME_SIZE_VAL ((size_t)INT32_MIN)
2300+
#endif
22962301

22972302
/// @cond DO_NOT_DOCUMENT_THIS
22982303
/// Hex representation for a **special** quiet NAN (!= NAN from math.h)

src/common/utils.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ bool get_jit_dump() {
230230
return jit_dump.get();
231231
}
232232

233-
#if defined(DNNL_AARCH64) && (DNNL_AARCH64 == 1)
233+
#if defined(DNNL_AARCH64) && (DNNL_AARCH64 == 1) || defined(DNNL_ARM) && (DNNL_ARM == 1)
234234
static setting_t<unsigned> jit_profiling_flags {DNNL_JIT_PROFILE_LINUX_PERFMAP};
235235
#else
236236
static setting_t<unsigned> jit_profiling_flags {DNNL_JIT_PROFILE_VTUNE};

src/cpu/README.md

+2
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ architecture. Hence, for portability reasons [`cpu/platform.hpp`](platform.hpp)
4444
header file provides a set of helpers macros that could help conditionally
4545
enable or disable parts of code. There the following macros defined:
4646
- `DNNL_X64` is 1 on x64 architecture;
47+
- `DNNL_X86` is 1 on x86 architecture;
4748
- `DNNL_AARCH64` is 1 on Arm AArch64 architecture;
49+
- `DNNL_ARM` is 1 on Arm 32 architecture;
4850
- `DNNL_PPC64` is 1 on OpenPOWER / IBM Power architecture;
4951
- `DNNL_S390X` is 1 on IBMz / s390x architecture;
5052
- `DNNL_RV64` is 1 on RISC-V architecture;

src/cpu/cpu_engine.hpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030
#include "cpu/platform.hpp"
3131

32-
#if DNNL_AARCH64 && DNNL_AARCH64_USE_ACL
32+
#if (DNNL_AARCH64 || DNNL_ARM) && DNNL_AARCH64_USE_ACL
3333
#include "cpu/aarch64/acl_thread.hpp"
3434
#endif
3535

@@ -46,8 +46,11 @@
4646
#define CPU_INSTANCE_AVX512(...) REG_AVX512_ISA(CPU_INSTANCE(__VA_ARGS__))
4747
#define CPU_INSTANCE_AMX(...) REG_AMX_ISA(CPU_INSTANCE(__VA_ARGS__))
4848
#define CPU_INSTANCE_AARCH64(...) DNNL_AARCH64_ONLY(CPU_INSTANCE(__VA_ARGS__))
49+
#define CPU_INSTANCE_ARM(...) DNNL_ARM_ONLY(CPU_INSTANCE(__VA_ARGS__))
4950
#define CPU_INSTANCE_AARCH64_ACL(...) \
5051
DNNL_AARCH64_ACL_ONLY(CPU_INSTANCE(__VA_ARGS__))
52+
#define CPU_INSTANCE_ARM_ACL(...) \
53+
DNNL_ARM_ACL_ONLY(CPU_INSTANCE(__VA_ARGS__))
5154
#define CPU_INSTANCE_RV64GCV(...) DNNL_RV64GCV_ONLY(CPU_INSTANCE(__VA_ARGS__))
5255

5356
namespace dnnl {
@@ -160,7 +163,7 @@ class cpu_engine_factory_t : public engine_factory_t {
160163
*engine = new cpu_engine_t(new impl::engine_impl_t(
161164
engine_kind::cpu, get_cpu_native_runtime(), 0));
162165

163-
#if DNNL_AARCH64 && DNNL_AARCH64_USE_ACL
166+
#if (DNNL_AARCH64 || DNNL_ARM) && DNNL_AARCH64_USE_ACL
164167
dnnl::impl::cpu::aarch64::acl_thread_utils::set_acl_threading();
165168
#endif
166169
return status::success;

src/cpu/platform.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,8 @@ unsigned get_per_core_cache_size(int level) {
205205
unsigned get_num_cores() {
206206
#if DNNL_X64
207207
return x64::cpu().getNumCores(Xbyak::util::CoreLevel);
208-
#elif DNNL_AARCH64_USE_ACL
209-
return aarch64::cpu().getNumCores(Xbyak_aarch64::util::CoreLevel);
208+
#elif (DNNL_AARCH64 || DNNL_ARM) && DNNL_AARCH64_USE_ACL
209+
return arm_compute::cpuinfo::num_threads_hint();
210210
#else
211211
return 1;
212212
#endif

src/cpu/platform.hpp

+27-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@
2626

2727
// Possible architectures:
2828
// - DNNL_X64
29+
// - DNNL_X86
2930
// - DNNL_AARCH64
31+
// - DNNL_ARM
3032
// - DNNL_PPC64
3133
// - DNNL_S390X
3234
// - DNNL_RV64
@@ -35,12 +37,19 @@
3537

3638
#if defined(DNNL_X64) + defined(DNNL_AARCH64) + defined(DNNL_PPC64) \
3739
+ defined(DNNL_S390X) + defined(DNNL_RV64) \
40+
+ defined(DNNL_ARM) + defined(DNNL_X86) \
3841
+ defined(DNNL_ARCH_GENERIC) \
3942
== 0
40-
#if defined(__x86_64__) || defined(_M_X64)
43+
#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || \
44+
defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
4145
#define DNNL_X64 1
42-
#elif defined(__aarch64__)
46+
#elif defined(i386) || defined(__i386) || defined(__i386__) || defined(__IA32__) || defined(_M_I86) || \
47+
defined(_M_IX86) || defined(__X86__) || defined(_X86_) || defined(__I86__) || defined(__386)
48+
#define DNNL_X86 1
49+
#elif defined(__aarch64__) || defined(_M_ARM64)
4350
#define DNNL_AARCH64 1
51+
#elif defined(__arm__) || defined(_M_ARM) || defined(__ARMEL__)
52+
#define DNNL_ARM 1
4453
#elif defined(__powerpc64__) || defined(__PPC64__) || defined(_ARCH_PPC64)
4554
#define DNNL_PPC64 1
4655
#elif defined(__s390x__)
@@ -54,6 +63,7 @@
5463

5564
#if defined(DNNL_X64) + defined(DNNL_AARCH64) + defined(DNNL_PPC64) \
5665
+ defined(DNNL_S390X) + defined(DNNL_RV64) \
66+
+ defined(DNNL_ARM) + defined(DNNL_X86) \
5767
+ defined(DNNL_ARCH_GENERIC) \
5868
!= 1
5969
#error One and only one architecture should be defined at a time
@@ -62,9 +72,15 @@
6272
#if !defined(DNNL_X64)
6373
#define DNNL_X64 0
6474
#endif
75+
#if !defined(DNNL_X86)
76+
#define DNNL_X86 0
77+
#endif
6578
#if !defined(DNNL_AARCH64)
6679
#define DNNL_AARCH64 0
6780
#endif
81+
#if !defined(DNNL_ARM)
82+
#define DNNL_ARM 0
83+
#endif
6884
#if !defined(DNNL_PPC64)
6985
#define DNNL_PPC64 0
7086
#endif
@@ -84,6 +100,7 @@
84100
#define DNNL_PPC64_ONLY(...) Z_CONDITIONAL_DO(DNNL_PPC64_ONLY, __VA_ARGS__)
85101
#define DNNL_S390X_ONLY(...) Z_CONDITIONAL_DO(DNNL_S390X_ONLY, __VA_ARGS__)
86102
#define DNNL_AARCH64_ONLY(...) Z_CONDITIONAL_DO(DNNL_AARCH64, __VA_ARGS__)
103+
#define DNNL_ARM_ONLY(...) Z_CONDITIONAL_DO(DNNL_ARM, __VA_ARGS__)
87104

88105
// Using RISC-V implementations optimized with RVV Intrinsics is optional for RISC-V builds
89106
// and can be enabled with DNNL_ARCH_OPT_FLAGS="-march=<ISA-string>" option, where <ISA-string>
@@ -105,6 +122,14 @@
105122
#define DNNL_AARCH64_ACL_ONLY(...)
106123
#endif
107124

125+
// Using Arm Compute Library kernels is optional for ARM builds
126+
// and can be enabled with the DNNL_AARCH64_USE_ACL CMake option
127+
#if defined(DNNL_ARM) && defined(DNNL_AARCH64_USE_ACL)
128+
#define DNNL_ARM_ACL_ONLY(...) __VA_ARGS__
129+
#else
130+
#define DNNL_ARM_ACL_ONLY(...)
131+
#endif
132+
108133
// Primitive ISA section for configuring knobs.
109134
// Note: MSVC preprocessor by some reason "eats" symbols it's not supposed to
110135
// if __VA_ARGS__ is passed as empty. Then things happen like this for non-x64:

src/cpu/x64/cpu_isa_traits.hpp

+5-3
Original file line numberDiff line numberDiff line change
@@ -374,11 +374,13 @@ namespace {
374374
static inline bool mayiuse(const cpu_isa_t cpu_isa, bool soft = false) {
375375
using namespace Xbyak::util;
376376
#if DNNL_X64
377-
unsigned cpu_isa_mask = x64::get_max_cpu_isa_mask(soft);
377+
const unsigned cpu_isa_mask = x64::get_max_cpu_isa_mask(soft);
378+
#elif DNNL_X86
379+
const unsigned cpu_isa_mask = isa_undef;
378380
#else
379-
unsigned cpu_isa_mask = isa_all;
381+
const unsigned cpu_isa_mask = isa_all;
380382
#endif
381-
unsigned cpu_isa_no_hints = cpu_isa & ~cpu_isa_hints_utils::hints_mask;
383+
const unsigned cpu_isa_no_hints = cpu_isa & ~cpu_isa_hints_utils::hints_mask;
382384

383385
if ((cpu_isa_mask & cpu_isa_no_hints) != cpu_isa_no_hints) return false;
384386

0 commit comments

Comments
 (0)