@@ -463,6 +463,20 @@ class kernel_search_manager_t {
463
463
auto ret = desc_groups.emplace (
464
464
d.reqs .str (), search_kernel_desc_group_t (d.reqs ));
465
465
ret.first ->second .add_desc (d);
466
+ for (int dist : {1 , 3 }) {
467
+ auto _d = d;
468
+ _d.prefetch = prefetch_desc_t {dist, true , true };
469
+ reset_reqs (_d);
470
+ _d.is_finalized = false ;
471
+ if (!finalize_conv_desc (_d, bench_mger_.hw ())) {
472
+ std::cout << d.brief_str () << " : \033 [1;31mFAIL\033 [0m"
473
+ << std::endl;
474
+ continue ;
475
+ }
476
+ std::cout << _d.brief_str () << " : \033 [1;32mOK\033 [0m"
477
+ << std::endl;
478
+ ret.first ->second .add_desc (_d);
479
+ }
466
480
}
467
481
std::vector<search_kernel_desc_group_t > ret;
468
482
for (auto &kv : desc_groups) {
@@ -518,6 +532,7 @@ class search_sequence_t {
518
532
search_sequence_t (const std::vector<kernel_desc_t > &descs, int max_entries)
519
533
: max_entries_(max_entries) {
520
534
std::vector<std::vector<pvar_tile_t >> tiles;
535
+ pvar_t prefetch_dim (" p" );
521
536
for (int i = 0 ; i < (int )descs.size (); i++) {
522
537
auto &d = descs[i];
523
538
entries_.emplace_back (i, d);
@@ -526,6 +541,9 @@ class search_sequence_t {
526
541
auto tg = to_gemm (d.thread_group_tile , d.prop );
527
542
d_tiles.push_back (iter);
528
543
d_tiles.push_back (tg);
544
+ pvar_tile_t prefetch_tile;
545
+ prefetch_tile[prefetch_dim] = d.prefetch .dist ;
546
+ d_tiles.push_back (prefetch_tile);
529
547
tiles.push_back (std::move (d_tiles));
530
548
}
531
549
tile_to_vec_ = tile_to_vec_t (tiles);
0 commit comments