Skip to content
This repository was archived by the owner on Aug 30, 2024. It is now read-only.

Commit 18cc4e1

Browse files
sunjiweiswiftDDEle
authored andcommitted
save
1 parent 1bd0290 commit 18cc4e1

File tree

2 files changed

+14
-9
lines changed

2 files changed

+14
-9
lines changed

include/subgroup/tile/impl/load_xe.hpp

+9-9
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,8 @@ tile_load(tile_t& tile, payload_t& payload) {
118118
static constexpr uint32_t max_load_width_in_elem =
119119
load_store_attr::max_load_width_in_bytes / sizeof(dtype);
120120

121-
// static constexpr uint32_t max_trans_load_height_in_elem =
122-
// load_store_attr::max_trans_load_height_in_elem;
121+
// static constexpr uint32_t max_trans_load_height_in_elem =
122+
// load_store_attr::max_trans_load_height_in_elem;
123123
static constexpr uint32_t max_load_height_in_elem =
124124
load_store_attr::max_load_height_in_elem;
125125

@@ -206,6 +206,11 @@ tile_load(tile_t& tile, payload_t& payload) {
206206
#pragma unroll
207207
for (uint32_t ii = 0; ii < block_size_y / ld_blk_size_y; ++ii) {
208208
constexpr uint32_t load_elems = ld_blk_size_y * block_size_x * arr_len;
209+
uint32_t address_offset_x =
210+
(mem_transpose ? (offset_y + ii * ld_blk_size_y) : offset_x) /
211+
scale_factor;
212+
uint32_t address_offset_y =
213+
mem_transpose ? offset_x : (offset_y + ii * ld_blk_size_y);
209214
reg_tmp.xetla_format<native_type_t<load_dtype>>() = xetla_load_global<
210215
native_type_t<load_dtype>,
211216
(trans ? ld_blk_size_y : block_size_x) / scale_factor,
@@ -222,13 +227,8 @@ tile_load(tile_t& tile, payload_t& payload) {
222227
payload.surface_width,
223228
payload.surface_height,
224229
payload.surface_pitch,
225-
payload.offset_x +
226-
(mem_transpose ? (offset_y / (int)scale_factor +
227-
ii * ld_blk_size_y / (int)scale_factor)
228-
: (offset_x / scale_factor)),
229-
230-
payload.offset_y +
231-
(mem_transpose ? offset_x : (offset_y + ii * ld_blk_size_y)));
230+
payload.offset_x + address_offset_x,
231+
payload.offset_y + address_offset_y);
232232

233233
if constexpr (reg_transpose && trans) {
234234
reg_blk.xetla_select<load_elems, 1>(ii * load_elems)

include/subgroup/tile/impl/payload_xe.hpp

+5
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,14 @@ struct mem_payload_t<
7676
!(std::is_same_v<dtype_, int4x2> || std::is_same_v<dtype_, int4x8>);
7777

7878
// Transformed and Transposed cannot be set to true at the same time.
79+
// If Transformed is true then:
80+
// sizeof(T) must be 1- or 2-byte (bytes or words).
7981
static constexpr bool mem_transform = (sizeof(dtype) <= 2) && !trans &&
8082
(register_layout == reg_layout::vnni_tiled ||
8183
register_layout == reg_layout::vnni_tiled_col_major);
84+
85+
// If Transposed is true then:
86+
// sizeof(T) must be 4- or 8-byte (dwords or qwords).
8287
static constexpr bool mem_transpose_dtype_less4bytes =
8388
(sizeof(dtype) < 4) && trans;
8489

0 commit comments

Comments
 (0)