Skip to content

Commit 710d159

Browse files
alexey-varyzginazhai219
authored andcommitted
[FORK][FEATURE] TBB_AUTO was enabled
1 parent 1bbd997 commit 710d159

10 files changed

+14
-6
lines changed

cmake/TBB.cmake

+1-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ macro(handle_tbb_target)
5959
add_definitions(-DTBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION=1)
6060
endmacro()
6161

62-
if(NOT DNNL_CPU_THREADING_RUNTIME STREQUAL "TBB")
62+
if(NOT "${DNNL_CPU_THREADING_RUNTIME}" MATCHES "^(TBB|TBB_AUTO)$")
6363
return()
6464
endif()
6565

cmake/options.cmake

+1-1
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ set(DNNL_CPU_RUNTIME "OMP" CACHE STRING
262262
To use Threading Building Blocks (TBB) one should also
263263
set TBBROOT (either environment variable or CMake option) to the library
264264
location.")
265-
if(NOT "${DNNL_CPU_RUNTIME}" MATCHES "^(NONE|OMP|TBB|SEQ|THREADPOOL|DPCPP|SYCL)$")
265+
if(NOT "${DNNL_CPU_RUNTIME}" MATCHES "^(NONE|OMP|TBB|TBB_AUTO|SEQ|THREADPOOL|DPCPP|SYCL)$")
266266
message(FATAL_ERROR "Unsupported CPU runtime: ${DNNL_CPU_RUNTIME}")
267267
endif()
268268

include/oneapi/dnnl/dnnl_config.h.in

+3
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@
7070
/// TBB runtime (CPU only)
7171
#define DNNL_RUNTIME_TBB 4u
7272

73+
/// TBB runtime with auto partitioning (CPU only)
74+
#define DNNL_RUNTIME_TBB_AUTO 5u
75+
7376
/// Threadpool runtime (CPU only)
7477
#define DNNL_RUNTIME_THREADPOOL 8u
7578

src/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ if(UNIX AND DNNL_INSTALL_MODE STREQUAL "BUNDLE_V2")
273273
endif()
274274

275275
# Install custom find modules for transitive dependencies
276-
if(DNNL_CPU_THREADING_RUNTIME STREQUAL "TBB")
276+
if("${DNNL_CPU_THREADING_RUNTIME}" MATCHES "^(TBB|TBB_AUTO)$")
277277
if(WIN32)
278278
install(FILES "../cmake/win/TBBConfig.cmake" RENAME "FindTBB.cmake"
279279
DESTINATION ${LIB_CONFIG_INSTALL_DIR})

src/common/c_types_map.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -1914,6 +1914,7 @@ enum runtime_kind_t {
19141914
dnnl_runtime_seq,
19151915
dnnl_runtime_omp,
19161916
dnnl_runtime_tbb,
1917+
dnnl_runtime_tbb_auto,
19171918
dnnl_runtime_threadpool,
19181919
dnnl_runtime_ocl,
19191920
dnnl_runtime_sycl,
@@ -1924,6 +1925,7 @@ const runtime_kind_t none = dnnl_runtime_none;
19241925
const runtime_kind_t seq = dnnl_runtime_seq;
19251926
const runtime_kind_t omp = dnnl_runtime_omp;
19261927
const runtime_kind_t tbb = dnnl_runtime_tbb;
1928+
const runtime_kind_t tbb_auto = dnnl_runtime_tbb_auto;
19271929
const runtime_kind_t threadpool = dnnl_runtime_threadpool;
19281930
const runtime_kind_t ocl = dnnl_runtime_ocl;
19291931
const runtime_kind_t sycl = dnnl_runtime_sycl;

src/common/dnnl_debug.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ const char *dnnl_runtime2str(unsigned runtime) {
3333
case DNNL_RUNTIME_SEQ: return "sequential";
3434
case DNNL_RUNTIME_OMP: return "OpenMP";
3535
case DNNL_RUNTIME_TBB: return "TBB";
36+
case DNNL_RUNTIME_TBB_AUTO: return "TBB_AUTO";
3637
case DNNL_RUNTIME_OCL: return "OpenCL";
3738
case DNNL_RUNTIME_THREADPOOL: return "threadpool";
3839
#ifdef DNNL_WITH_SYCL

src/common/dnnl_thread.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ inline void dnnl_thr_barrier() {
7575
#pragma omp barrier
7676
}
7777

78-
#elif DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_TBB
78+
#elif (DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_TBB || DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_TBB_AUTO)
7979
#include "tbb/parallel_for.h"
8080
#include "tbb/task_arena.h"
8181
#define DNNL_THR_SYNC 0

src/common/engine.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,8 @@ inline runtime_kind_t get_default_runtime(engine_kind_t kind) {
187187
return runtime_kind::omp;
188188
#elif DNNL_CPU_RUNTIME == DNNL_RUNTIME_TBB
189189
return runtime_kind::tbb;
190+
#elif DNNL_CPU_RUNTIME == DNNL_RUNTIME_TBB_AUTO
191+
return runtime_kind::tbb_auto;
190192
#elif DNNL_CPU_RUNTIME == DNNL_RUNTIME_THREADPOOL
191193
return runtime_kind::threadpool;
192194
#elif DNNL_CPU_RUNTIME == DNNL_RUNTIME_SYCL

src/common/utils.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -752,7 +752,7 @@ struct set_once_before_first_get_setting_t {
752752

753753
inline bool is_native_runtime(runtime_kind_t kind) {
754754
return utils::one_of(kind, runtime_kind::seq, runtime_kind::omp,
755-
runtime_kind::tbb, runtime_kind::threadpool);
755+
runtime_kind::tbb, runtime_kind::tbb_auto, runtime_kind::threadpool);
756756
}
757757

758758
// Convenience wrapper to choose at compile-time between std::unique_ptr's

src/cpu/rnn/ref_rnn.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ void gates_reduction(const rnn_utils::rnn_conf_t &rnn,
6262
// @todo block k on simd-width to enable vectorization in
6363
// parallel_nd path
6464
#if DNNL_CPU_RUNTIME == DNNL_RUNTIME_OMP && _OPENMP >= 201307 \
65-
&& (!defined(__INTEL_COMPILER) || __INTEL_COMPILER < 1910)
65+
&& defined __INTEL_COMPILER && __INTEL_COMPILER < 1910
6666
#pragma omp parallel for simd collapse(2)
6767
for (int i = 0; i < rnn.n_gates; i++)
6868
for (int k = 0; k < rnn.dhc; k++)

0 commit comments

Comments
 (0)