Skip to content

Commit 3f604ab

Browse files
committed
xe: jit: reorder: prevent scalar mov in 2d impl
1 parent 5da3526 commit 3f604ab

File tree

1 file changed

+35
-21
lines changed

1 file changed

+35
-21
lines changed

src/gpu/intel/jit/codegen/reorder.hpp

+35-21
Original file line numberDiff line numberDiff line change
@@ -1341,47 +1341,53 @@ void align_src_dst_offset(GeneratorT *host, ngen_register_scope_t &scope,
13411341
// Reorder may require several steps, in this case a temporary buffer T is
13421342
// allocated. For example: A -> T -> B or A -> B -> T -> B
13431343
class reorder_2d_impl_t {
1344+
struct reorder_step_t;
1345+
13441346
public:
13451347
reorder_2d_impl_t(ngen::HW hw, tensor_t tile, const layout_t &src_layout,
13461348
const layout_t &dst_layout)
1347-
: hw_(hw), src_(src_layout), dst_(dst_layout), tile_(std::move(tile)) {
1348-
gpu_assert(src_.type() == dst_.type());
1349-
}
1349+
: hw_(hw), tile_(std::move(tile)) {
1350+
gpu_assert(src_layout.type() == dst_layout.type());
13501351

1351-
const tensor_t &tile() const { return tile_; }
1352-
1353-
template <typename GeneratorT>
1354-
void emit(GeneratorT *host, ngen_register_scope_t &scope,
1355-
const reg_buf_data_t &src_rd, const reg_buf_data_t &dst_rd) {
13561352
dim_idx_t a_idx, b_idx;
13571353
int tile_a, tile_b;
13581354
tile_to_2d_dims(tile_, a_idx, b_idx, tile_a, tile_b);
13591355

13601356
// Convert src/dst to 2D layouts.
1361-
dim_assignment_t to_ab(src_.ndims(), 2);
1357+
dim_assignment_t to_ab(src_layout.ndims(), 2);
13621358
to_ab.assign(a_idx, 0);
13631359
to_ab.assign(b_idx, 1);
1364-
auto src_ab = to_ab.map(src_);
1365-
auto dst_ab = to_ab.map(dst_);
1360+
auto src_ab = to_ab.map(src_layout);
1361+
auto dst_ab = to_ab.map(dst_layout);
13661362

1363+
src_ = src_ab;
1364+
dst_ = dst_ab;
13671365
// Find minimal cost reorder path between layouts.
1368-
auto path = find_min_cost_path(hw_, src_ab, dst_ab, tile_a, tile_b);
1366+
path_ = find_min_cost_path(hw_, src_ab, dst_ab, tile_a, tile_b);
1367+
}
1368+
1369+
const tensor_t &tile() const { return tile_; }
1370+
const std::vector<reorder_step_t> &path() const { return path_; }
1371+
1372+
template <typename GeneratorT>
1373+
void emit(GeneratorT *host, ngen_register_scope_t &scope,
1374+
const reg_buf_data_t &src_rd, const reg_buf_data_t &dst_rd) {
1375+
auto &orig_type = src_.type();
13691376

13701377
// Allocate a temporary GRF buffer if needed.
13711378
reg_buf_data_t tmp;
1372-
if (path.size() > 1) {
1379+
if (path_.size() > 1) {
13731380
const int grf_size = ngen::GRF::bytes(hw_);
13741381
tmp = scope.alloc_reg_buf_data(
1375-
utils::div_up(dst_ab.size(), grf_size));
1382+
utils::div_up(dst_.size(), grf_size));
13761383
}
13771384

13781385
// Iterate through found reorders.
1379-
auto *prev_layout = &src_ab;
1386+
auto *prev_layout = &src_;
13801387
auto prev_rd = src_rd;
1381-
int path_len = int(path.size());
1382-
auto &orig_type = src_ab.type();
1388+
int path_len = int(path_.size());
13831389
for (int i = 0; i < path_len; i++) {
1384-
auto &step = path[i];
1390+
auto &step = path_[i];
13851391
auto &tile = step.tile;
13861392
auto &type = step.type;
13871393
auto *next_layout = &step.layout;
@@ -1777,11 +1783,10 @@ class reorder_2d_impl_t {
17771783
}
17781784

17791785
ngen::HW hw_;
1780-
1786+
tensor_t tile_;
17811787
layout_t src_;
17821788
layout_t dst_;
1783-
1784-
tensor_t tile_;
1789+
std::vector<reorder_step_t> path_;
17851790
};
17861791

17871792
class reorder_impl_t {
@@ -1914,6 +1919,15 @@ class reorder_impl_t {
19141919
scope.safeRelease(dummy);
19151920

19161921
reorder_2d_impl_t r(hw_, tile, src_tile_layout, dst_tile_layout);
1922+
bool tile_ok = true;
1923+
for (auto &step : r.path())
1924+
if (step.tile.elems() < 2) {
1925+
tile_ok = false;
1926+
break;
1927+
}
1928+
// Skip any 2d reorder that attempts scalar moves
1929+
if (!tile_ok) continue;
1930+
19171931
src_layout_.for_each_tile(
19181932
tile, [&](const std::vector<dim_t> &start) {
19191933
auto src_off = src_layout_.offset<dim_t>(start);

0 commit comments

Comments
 (0)