@@ -289,87 +289,7 @@ inline int adjust_num_threads(int nthr, dim_t work_amount) {
289
289
#endif
290
290
}
291
291
292
- static inline void parallel (int nthr, const std::function<void (int , int )> &f) {
293
- nthr = adjust_num_threads (nthr, INT64_MAX);
294
- #if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_SEQ
295
- for (int i = 0 ; i < nthr; ++i) {
296
- f (i, nthr);
297
- }
298
- #else
299
- #if defined(DNNL_ENABLE_ITT_TASKS)
300
- auto task_primitive_kind = itt::primitive_task_get_current_kind ();
301
- bool itt_enable = itt::get_itt (itt::__itt_task_level_high);
302
- #endif
303
- if (nthr == 1 ) {
304
- f (0 , 1 );
305
- return ;
306
- }
307
- #if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_OMP
308
- #pragma omp parallel num_threads(nthr)
309
- {
310
- int nthr_ = omp_get_num_threads ();
311
- int ithr_ = omp_get_thread_num ();
312
- assert (nthr_ == nthr);
313
- #if defined(DNNL_ENABLE_ITT_TASKS)
314
- if (ithr_ && itt_enable) itt::primitive_task_start (task_primitive_kind);
315
- #endif
316
- f (ithr_, nthr_);
317
- #if defined(DNNL_ENABLE_ITT_TASKS)
318
- if (ithr_ && itt_enable) itt::primitive_task_end ();
319
- #endif
320
- }
321
- #elif DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_TBB
322
- tbb::parallel_for (
323
- 0 , nthr,
324
- [&](int ithr) {
325
- #if defined(DNNL_ENABLE_ITT_TASKS)
326
- bool mark_task = itt::primitive_task_get_current_kind ()
327
- == primitive_kind::undefined;
328
- if (mark_task && itt_enable)
329
- itt::primitive_task_start (task_primitive_kind);
330
- #endif
331
- f (ithr, nthr);
332
- #if defined(DNNL_ENABLE_ITT_TASKS)
333
- if (mark_task && itt_enable) itt::primitive_task_end ();
334
- #endif
335
- },
336
- tbb::static_partitioner ());
337
- #elif DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_THREADPOOL
338
- using namespace dnnl ::impl::threadpool_utils;
339
- dnnl::threadpool_interop::threadpool_iface *tp = get_active_threadpool ();
340
- if (!tp || dnnl_in_parallel ()) {
341
- threadpool_utils::deactivate_threadpool ();
342
- for (int ithr = 0 ; ithr < nthr; ithr++) {
343
- f (ithr, nthr);
344
- }
345
- threadpool_utils::activate_threadpool (tp);
346
- } else {
347
- bool async = tp->get_flags ()
348
- & dnnl::threadpool_interop::threadpool_iface::ASYNCHRONOUS;
349
- counting_barrier_t b;
350
- if (async) b.init (nthr);
351
- tp->parallel_for (nthr, [&, tp](int ithr, int nthr) {
352
- bool is_master = threadpool_utils::get_active_threadpool () == tp;
353
- if (!is_master) {
354
- threadpool_utils::activate_threadpool (tp);
355
- #if defined(DNNL_ENABLE_ITT_TASKS)
356
- if (itt_enable) itt::primitive_task_start (task_primitive_kind);
357
- #endif
358
- }
359
- f (ithr, nthr);
360
- if (!is_master) {
361
- #if defined(DNNL_ENABLE_ITT_TASKS)
362
- if (itt_enable) itt::primitive_task_end ();
363
- #endif
364
- threadpool_utils::deactivate_threadpool ();
365
- }
366
- if (async) b.notify ();
367
- });
368
- if (async) b.wait ();
369
- }
370
- #endif
371
- #endif
372
- }
292
+ void DNNL_API parallel (int nthr, const std::function<void (int , int )> &f);
373
293
374
294
// XXX: IMPORTANT!!!
375
295
// Keep the functions below static.
@@ -664,82 +584,82 @@ void parallel_legacy(int nthr, F f) {
664
584
nthr = adjust_num_threads (nthr, INT64_MAX);
665
585
#if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_SEQ
666
586
assert (nthr == 1 );
667
- f (0 , 1 );
587
+ f (0 , 1 );
668
588
#else
669
589
#if defined(DNNL_ENABLE_ITT_TASKS)
670
590
auto task_primitive_kind = itt::primitive_task_get_current_kind ();
671
- bool itt_enable = itt::get_itt (itt::__itt_task_level_high);
591
+ bool itt_enable = itt::get_itt (itt::__itt_task_level_high);
672
592
#endif
673
593
if (nthr == 1 ) {
674
594
f (0 , 1 );
675
595
return ;
676
596
}
677
597
#if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_OMP
678
- #pragma omp parallel num_threads(nthr)
679
- {
680
- int nthr_ = omp_get_num_threads ();
681
- int ithr_ = omp_get_thread_num ();
682
- assert (nthr_ == nthr);
598
+ #pragma omp parallel num_threads(nthr)
599
+ {
600
+ int nthr_ = omp_get_num_threads ();
601
+ int ithr_ = omp_get_thread_num ();
602
+ assert (nthr_ == nthr);
683
603
#if defined(DNNL_ENABLE_ITT_TASKS)
684
- if (ithr_ && itt_enable) itt::primitive_task_start (task_primitive_kind);
604
+ if (ithr_ && itt_enable) itt::primitive_task_start (task_primitive_kind);
685
605
#endif
686
- f (ithr_, nthr_);
606
+ f (ithr_, nthr_);
687
607
#if defined(DNNL_ENABLE_ITT_TASKS)
688
- if (ithr_ && itt_enable) itt::primitive_task_end ();
608
+ if (ithr_ && itt_enable) itt::primitive_task_end ();
689
609
#endif
690
- }
610
+ }
691
611
#elif DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_TBB
692
612
tbb::parallel_for (
693
- 0 , nthr,
694
- [&](int ithr) {
613
+ 0 , nthr,
614
+ [&](int ithr) {
695
615
#if defined(DNNL_ENABLE_ITT_TASKS)
696
- bool mark_task = itt::primitive_task_get_current_kind ()
697
- == primitive_kind::undefined;
698
- if (mark_task && itt_enable)
699
- itt::primitive_task_start (task_primitive_kind);
616
+ bool mark_task = itt::primitive_task_get_current_kind ()
617
+ == primitive_kind::undefined;
618
+ if (mark_task && itt_enable)
619
+ itt::primitive_task_start (task_primitive_kind);
700
620
#endif
701
- f (ithr, nthr);
621
+ f (ithr, nthr);
702
622
#if defined(DNNL_ENABLE_ITT_TASKS)
703
- if (mark_task && itt_enable) itt::primitive_task_end ();
623
+ if (mark_task && itt_enable) itt::primitive_task_end ();
704
624
#endif
705
- },
706
- tbb::static_partitioner ());
625
+ },
626
+ tbb::static_partitioner ());
707
627
#elif DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_TBB_AUTO
708
628
tbb::parallel_for (
709
- 0 , nthr, [&](int ithr) { f (ithr, nthr); });
629
+ 0 , nthr, [&](int ithr) { f (ithr, nthr); });
710
630
#elif DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_THREADPOOL
711
- using namespace dnnl ::impl::threadpool_utils;
712
- dnnl::threadpool_interop::threadpool_iface *tp = get_active_threadpool ();
713
- if (!tp || dnnl_in_parallel ()) {
714
- threadpool_utils::deactivate_threadpool ();
715
- for (int ithr = 0 ; ithr < nthr; ithr++) {
716
- f (ithr, nthr);
717
- }
718
- threadpool_utils::activate_threadpool (tp);
719
- } else {
720
- bool async = tp->get_flags ()
721
- & dnnl::threadpool_interop::threadpool_iface::ASYNCHRONOUS;
722
- counting_barrier_t b;
723
- if (async) b.init (nthr);
724
- tp->parallel_for (nthr, [&, tp](int ithr, int nthr) {
725
- bool is_master = threadpool_utils::get_active_threadpool () == tp;
726
- if (!is_master) {
727
- threadpool_utils::activate_threadpool (tp);
631
+ using namespace dnnl ::impl::threadpool_utils;
632
+ dnnl::threadpool_interop::threadpool_iface *tp = get_active_threadpool ();
633
+ if (!tp || dnnl_in_parallel ()) {
634
+ threadpool_utils::deactivate_threadpool ();
635
+ for (int ithr = 0 ; ithr < nthr; ithr++) {
636
+ f (ithr, nthr);
637
+ }
638
+ threadpool_utils::activate_threadpool (tp);
639
+ } else {
640
+ bool async = tp->get_flags ()
641
+ & dnnl::threadpool_interop::threadpool_iface::ASYNCHRONOUS;
642
+ counting_barrier_t b;
643
+ if (async) b.init (nthr);
644
+ tp->parallel_for (nthr, [&, tp](int ithr, int nthr) {
645
+ bool is_master = threadpool_utils::get_active_threadpool () == tp;
646
+ if (!is_master) {
647
+ threadpool_utils::activate_threadpool (tp);
728
648
#if defined(DNNL_ENABLE_ITT_TASKS)
729
- if (itt_enable) itt::primitive_task_start (task_primitive_kind);
649
+ if (itt_enable) itt::primitive_task_start (task_primitive_kind);
730
650
#endif
731
- }
732
- f (ithr, nthr);
733
- if (!is_master) {
651
+ }
652
+ f (ithr, nthr);
653
+ if (!is_master) {
734
654
#if defined(DNNL_ENABLE_ITT_TASKS)
735
- if (itt_enable) itt::primitive_task_end ();
655
+ if (itt_enable) itt::primitive_task_end ();
736
656
#endif
737
- threadpool_utils::deactivate_threadpool ();
738
- }
739
- if (async) b.notify ();
740
- });
741
- if (async) b.wait ();
742
- }
657
+ threadpool_utils::deactivate_threadpool ();
658
+ }
659
+ if (async) b.notify ();
660
+ });
661
+ if (async) b.wait ();
662
+ }
743
663
#endif
744
664
#endif
745
665
}
0 commit comments