Skip to content

Commit 2d8b065

Browse files
EgorDuplenskyazhai219
authored andcommitted
[FIX] Fix seg fault in parallel function with ITT build
Inline and gcc optimizations lead to crash in parallel function
1 parent e56978c commit 2d8b065

File tree

2 files changed

+153
-131
lines changed

2 files changed

+153
-131
lines changed

src/common/dnnl_thread.cpp

+102
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
#include <functional>
2+
3+
#include "dnnl_thread.hpp"
4+
5+
#if defined(DNNL_ENABLE_ITT_TASKS)
6+
#include "common/ittnotify.hpp"
7+
#endif
8+
9+
#if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_THREADPOOL
10+
#include "counting_barrier.hpp"
11+
#endif
12+
13+
namespace dnnl {
14+
namespace impl {
15+
16+
void parallel(int nthr, const std::function<void(int, int)> &f) {
17+
nthr = adjust_num_threads(nthr, INT64_MAX);
18+
#if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_SEQ
19+
for (int i = 0; i < nthr; ++i) {
20+
f(i, nthr);
21+
}
22+
#else
23+
#if defined(DNNL_ENABLE_ITT_TASKS)
24+
auto task_primitive_kind = itt::primitive_task_get_current_kind();
25+
bool itt_enable = itt::get_itt(itt::__itt_task_level_high);
26+
#endif
27+
if (nthr == 1) {
28+
f(0, 1);
29+
return;
30+
}
31+
#if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_OMP
32+
#pragma omp parallel num_threads(nthr)
33+
{
34+
int nthr_ = omp_get_num_threads();
35+
int ithr_ = omp_get_thread_num();
36+
assert(nthr_ == nthr);
37+
#if defined(DNNL_ENABLE_ITT_TASKS)
38+
if (ithr_ && itt_enable) itt::primitive_task_start(task_primitive_kind);
39+
#endif
40+
f(ithr_, nthr_);
41+
#if defined(DNNL_ENABLE_ITT_TASKS)
42+
if (ithr_ && itt_enable) itt::primitive_task_end();
43+
#endif
44+
}
45+
#elif DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_TBB
46+
tbb::parallel_for(
47+
0, nthr,
48+
[&](int ithr) {
49+
#if defined(DNNL_ENABLE_ITT_TASKS)
50+
bool mark_task = itt::primitive_task_get_current_kind()
51+
== primitive_kind::undefined;
52+
if (mark_task && itt_enable)
53+
itt::primitive_task_start(task_primitive_kind);
54+
#endif
55+
f(ithr, nthr);
56+
#if defined(DNNL_ENABLE_ITT_TASKS)
57+
if (mark_task && itt_enable) itt::primitive_task_end();
58+
#endif
59+
},
60+
tbb::static_partitioner());
61+
#elif DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_TBB_AUTO
62+
tbb::parallel_for(
63+
0, nthr, [&](int ithr) { f(ithr, nthr); });
64+
#elif DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_THREADPOOL
65+
using namespace dnnl::impl::threadpool_utils;
66+
dnnl::threadpool_interop::threadpool_iface *tp = get_active_threadpool();
67+
if (!tp || dnnl_in_parallel()) {
68+
threadpool_utils::deactivate_threadpool();
69+
for (int ithr = 0; ithr < nthr; ithr++) {
70+
f(ithr, nthr);
71+
}
72+
threadpool_utils::activate_threadpool(tp);
73+
} else {
74+
bool async = tp->get_flags()
75+
& dnnl::threadpool_interop::threadpool_iface::ASYNCHRONOUS;
76+
counting_barrier_t b;
77+
if (async) b.init(nthr);
78+
tp->parallel_for(nthr, [&, tp](int ithr, int nthr) {
79+
bool is_master = threadpool_utils::get_active_threadpool() == tp;
80+
if (!is_master) {
81+
threadpool_utils::activate_threadpool(tp);
82+
#if defined(DNNL_ENABLE_ITT_TASKS)
83+
if (itt_enable) itt::primitive_task_start(task_primitive_kind);
84+
#endif
85+
}
86+
f(ithr, nthr);
87+
if (!is_master) {
88+
#if defined(DNNL_ENABLE_ITT_TASKS)
89+
if (itt_enable) itt::primitive_task_end();
90+
#endif
91+
threadpool_utils::deactivate_threadpool();
92+
}
93+
if (async) b.notify();
94+
});
95+
if (async) b.wait();
96+
}
97+
#endif
98+
#endif
99+
}
100+
101+
} // namespace impl
102+
} // namespace dnnl

src/common/dnnl_thread.hpp

+51-131
Original file line numberDiff line numberDiff line change
@@ -289,87 +289,7 @@ inline int adjust_num_threads(int nthr, dim_t work_amount) {
289289
#endif
290290
}
291291

292-
static inline void parallel(int nthr, const std::function<void(int, int)> &f) {
293-
nthr = adjust_num_threads(nthr, INT64_MAX);
294-
#if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_SEQ
295-
for (int i = 0; i < nthr; ++i) {
296-
f(i, nthr);
297-
}
298-
#else
299-
#if defined(DNNL_ENABLE_ITT_TASKS)
300-
auto task_primitive_kind = itt::primitive_task_get_current_kind();
301-
bool itt_enable = itt::get_itt(itt::__itt_task_level_high);
302-
#endif
303-
if (nthr == 1) {
304-
f(0, 1);
305-
return;
306-
}
307-
#if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_OMP
308-
#pragma omp parallel num_threads(nthr)
309-
{
310-
int nthr_ = omp_get_num_threads();
311-
int ithr_ = omp_get_thread_num();
312-
assert(nthr_ == nthr);
313-
#if defined(DNNL_ENABLE_ITT_TASKS)
314-
if (ithr_ && itt_enable) itt::primitive_task_start(task_primitive_kind);
315-
#endif
316-
f(ithr_, nthr_);
317-
#if defined(DNNL_ENABLE_ITT_TASKS)
318-
if (ithr_ && itt_enable) itt::primitive_task_end();
319-
#endif
320-
}
321-
#elif DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_TBB
322-
tbb::parallel_for(
323-
0, nthr,
324-
[&](int ithr) {
325-
#if defined(DNNL_ENABLE_ITT_TASKS)
326-
bool mark_task = itt::primitive_task_get_current_kind()
327-
== primitive_kind::undefined;
328-
if (mark_task && itt_enable)
329-
itt::primitive_task_start(task_primitive_kind);
330-
#endif
331-
f(ithr, nthr);
332-
#if defined(DNNL_ENABLE_ITT_TASKS)
333-
if (mark_task && itt_enable) itt::primitive_task_end();
334-
#endif
335-
},
336-
tbb::static_partitioner());
337-
#elif DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_THREADPOOL
338-
using namespace dnnl::impl::threadpool_utils;
339-
dnnl::threadpool_interop::threadpool_iface *tp = get_active_threadpool();
340-
if (!tp || dnnl_in_parallel()) {
341-
threadpool_utils::deactivate_threadpool();
342-
for (int ithr = 0; ithr < nthr; ithr++) {
343-
f(ithr, nthr);
344-
}
345-
threadpool_utils::activate_threadpool(tp);
346-
} else {
347-
bool async = tp->get_flags()
348-
& dnnl::threadpool_interop::threadpool_iface::ASYNCHRONOUS;
349-
counting_barrier_t b;
350-
if (async) b.init(nthr);
351-
tp->parallel_for(nthr, [&, tp](int ithr, int nthr) {
352-
bool is_master = threadpool_utils::get_active_threadpool() == tp;
353-
if (!is_master) {
354-
threadpool_utils::activate_threadpool(tp);
355-
#if defined(DNNL_ENABLE_ITT_TASKS)
356-
if (itt_enable) itt::primitive_task_start(task_primitive_kind);
357-
#endif
358-
}
359-
f(ithr, nthr);
360-
if (!is_master) {
361-
#if defined(DNNL_ENABLE_ITT_TASKS)
362-
if (itt_enable) itt::primitive_task_end();
363-
#endif
364-
threadpool_utils::deactivate_threadpool();
365-
}
366-
if (async) b.notify();
367-
});
368-
if (async) b.wait();
369-
}
370-
#endif
371-
#endif
372-
}
292+
void DNNL_API parallel(int nthr, const std::function<void(int, int)> &f);
373293

374294
// XXX: IMPORTANT!!!
375295
// Keep the functions below static.
@@ -664,82 +584,82 @@ void parallel_legacy(int nthr, F f) {
664584
nthr = adjust_num_threads(nthr, INT64_MAX);
665585
#if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_SEQ
666586
assert(nthr == 1);
667-
f(0, 1);
587+
f(0, 1);
668588
#else
669589
#if defined(DNNL_ENABLE_ITT_TASKS)
670590
auto task_primitive_kind = itt::primitive_task_get_current_kind();
671-
bool itt_enable = itt::get_itt(itt::__itt_task_level_high);
591+
bool itt_enable = itt::get_itt(itt::__itt_task_level_high);
672592
#endif
673593
if (nthr == 1) {
674594
f(0, 1);
675595
return;
676596
}
677597
#if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_OMP
678-
#pragma omp parallel num_threads(nthr)
679-
{
680-
int nthr_ = omp_get_num_threads();
681-
int ithr_ = omp_get_thread_num();
682-
assert(nthr_ == nthr);
598+
#pragma omp parallel num_threads(nthr)
599+
{
600+
int nthr_ = omp_get_num_threads();
601+
int ithr_ = omp_get_thread_num();
602+
assert(nthr_ == nthr);
683603
#if defined(DNNL_ENABLE_ITT_TASKS)
684-
if (ithr_ && itt_enable) itt::primitive_task_start(task_primitive_kind);
604+
if (ithr_ && itt_enable) itt::primitive_task_start(task_primitive_kind);
685605
#endif
686-
f(ithr_, nthr_);
606+
f(ithr_, nthr_);
687607
#if defined(DNNL_ENABLE_ITT_TASKS)
688-
if (ithr_ && itt_enable) itt::primitive_task_end();
608+
if (ithr_ && itt_enable) itt::primitive_task_end();
689609
#endif
690-
}
610+
}
691611
#elif DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_TBB
692612
tbb::parallel_for(
693-
0, nthr,
694-
[&](int ithr) {
613+
0, nthr,
614+
[&](int ithr) {
695615
#if defined(DNNL_ENABLE_ITT_TASKS)
696-
bool mark_task = itt::primitive_task_get_current_kind()
697-
== primitive_kind::undefined;
698-
if (mark_task && itt_enable)
699-
itt::primitive_task_start(task_primitive_kind);
616+
bool mark_task = itt::primitive_task_get_current_kind()
617+
== primitive_kind::undefined;
618+
if (mark_task && itt_enable)
619+
itt::primitive_task_start(task_primitive_kind);
700620
#endif
701-
f(ithr, nthr);
621+
f(ithr, nthr);
702622
#if defined(DNNL_ENABLE_ITT_TASKS)
703-
if (mark_task && itt_enable) itt::primitive_task_end();
623+
if (mark_task && itt_enable) itt::primitive_task_end();
704624
#endif
705-
},
706-
tbb::static_partitioner());
625+
},
626+
tbb::static_partitioner());
707627
#elif DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_TBB_AUTO
708628
tbb::parallel_for(
709-
0, nthr, [&](int ithr) { f(ithr, nthr); });
629+
0, nthr, [&](int ithr) { f(ithr, nthr); });
710630
#elif DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_THREADPOOL
711-
using namespace dnnl::impl::threadpool_utils;
712-
dnnl::threadpool_interop::threadpool_iface *tp = get_active_threadpool();
713-
if (!tp || dnnl_in_parallel()) {
714-
threadpool_utils::deactivate_threadpool();
715-
for (int ithr = 0; ithr < nthr; ithr++) {
716-
f(ithr, nthr);
717-
}
718-
threadpool_utils::activate_threadpool(tp);
719-
} else {
720-
bool async = tp->get_flags()
721-
& dnnl::threadpool_interop::threadpool_iface::ASYNCHRONOUS;
722-
counting_barrier_t b;
723-
if (async) b.init(nthr);
724-
tp->parallel_for(nthr, [&, tp](int ithr, int nthr) {
725-
bool is_master = threadpool_utils::get_active_threadpool() == tp;
726-
if (!is_master) {
727-
threadpool_utils::activate_threadpool(tp);
631+
using namespace dnnl::impl::threadpool_utils;
632+
dnnl::threadpool_interop::threadpool_iface *tp = get_active_threadpool();
633+
if (!tp || dnnl_in_parallel()) {
634+
threadpool_utils::deactivate_threadpool();
635+
for (int ithr = 0; ithr < nthr; ithr++) {
636+
f(ithr, nthr);
637+
}
638+
threadpool_utils::activate_threadpool(tp);
639+
} else {
640+
bool async = tp->get_flags()
641+
& dnnl::threadpool_interop::threadpool_iface::ASYNCHRONOUS;
642+
counting_barrier_t b;
643+
if (async) b.init(nthr);
644+
tp->parallel_for(nthr, [&, tp](int ithr, int nthr) {
645+
bool is_master = threadpool_utils::get_active_threadpool() == tp;
646+
if (!is_master) {
647+
threadpool_utils::activate_threadpool(tp);
728648
#if defined(DNNL_ENABLE_ITT_TASKS)
729-
if (itt_enable) itt::primitive_task_start(task_primitive_kind);
649+
if (itt_enable) itt::primitive_task_start(task_primitive_kind);
730650
#endif
731-
}
732-
f(ithr, nthr);
733-
if (!is_master) {
651+
}
652+
f(ithr, nthr);
653+
if (!is_master) {
734654
#if defined(DNNL_ENABLE_ITT_TASKS)
735-
if (itt_enable) itt::primitive_task_end();
655+
if (itt_enable) itt::primitive_task_end();
736656
#endif
737-
threadpool_utils::deactivate_threadpool();
738-
}
739-
if (async) b.notify();
740-
});
741-
if (async) b.wait();
742-
}
657+
threadpool_utils::deactivate_threadpool();
658+
}
659+
if (async) b.notify();
660+
});
661+
if (async) b.wait();
662+
}
743663
#endif
744664
#endif
745665
}

0 commit comments

Comments
 (0)