@@ -106,19 +106,31 @@ tile_load(tile_t& tile, payload_t& payload) {
106
106
static constexpr bool mem_transform = payload_t ::mem_transform;
107
107
108
108
using load_store_attr = load_store_attr_t <msg_type::block_2d, arch_tag>;
109
+
110
+ // static constexpr uint32_t max_load_width_in_elem = trans
111
+ // ? load_store_attr::max_trans_load_width_in_bytes / sizeof(dtype)
112
+ // : load_store_attr::max_load_width_in_bytes / sizeof(dtype);
113
+ // static constexpr uint32_t max_load_height_in_elem = trans
114
+ // ? load_store_attr::max_trans_load_height_in_elem
115
+ // : load_store_attr::max_load_height_in_elem;
116
+ static constexpr uint32_t max_trans_load_width_in_elem =
117
+ load_store_attr::max_trans_load_width_in_bytes / sizeof (dtype);
118
+ static constexpr uint32_t max_load_width_in_elem =
119
+ load_store_attr::max_load_width_in_bytes / sizeof (dtype);
120
+
121
+ // static constexpr uint32_t max_trans_load_height_in_elem =
122
+ // load_store_attr::max_trans_load_height_in_elem;
123
+ static constexpr uint32_t max_load_height_in_elem =
124
+ load_store_attr::max_load_height_in_elem;
125
+
109
126
static constexpr uint32_t elems_per_CL =
110
127
load_store_attr::cache_line_size_in_bytes / sizeof (dtype);
128
+
111
129
static constexpr uint32_t elems_per_reg =
112
130
register_bytes_t <arch_tag>::reg_in_bytes / sizeof (dtype);
113
- static constexpr int32_t max_load_block_height =
114
- load_store_attr::max_load_height_in_elem;
115
- static constexpr int32_t max_block_width =
116
- load_store_attr::max_load_width_in_bytes / sizeof (dtype);
117
- static constexpr int32_t max_trans_block_width =
118
- load_store_attr::max_trans_load_width_in_bytes / sizeof (dtype);
119
131
120
132
static constexpr uint32_t ld_blk_size_y_limit =
121
- mem_transpose ? max_trans_block_width : max_load_block_height ;
133
+ mem_transpose ? max_trans_load_width_in_elem : max_load_height_in_elem ;
122
134
static constexpr uint32_t ld_blk_size_y = reg_transpose
123
135
? block_size_y
124
136
: std::min (ld_blk_size_y_limit, block_size_y);
@@ -150,20 +162,21 @@ tile_load(tile_t& tile, payload_t& payload) {
150
162
151
163
static_assert (
152
164
reg_transpose || mem_transpose ||
153
- (!mem_transpose && (block_size_x * arr_len) <= max_block_width),
165
+ (!mem_transpose &&
166
+ (block_size_x * arr_len) <= max_load_width_in_elem),
154
167
" When reg_transpose was disabled, check 2d block width "
155
168
" restriction" );
156
169
static_assert (
157
170
!reg_transpose ||
158
171
(!mem_transpose &&
159
- (block_size_x * arr_len) <= max_trans_block_width ) ||
160
- (mem_transpose && (block_size_y * arr_len) <= max_block_width ),
172
+ (block_size_x * arr_len) <= max_trans_load_width_in_elem ) ||
173
+ (mem_transpose && (block_size_y * arr_len) <= max_load_width_in_elem ),
161
174
" When reg_transpose was enabled, check 2d block width "
162
175
" restriction" );
163
176
static_assert (
164
177
!reg_transpose ||
165
- (!mem_transpose && (block_size_y <= max_load_block_height )) ||
166
- (mem_transpose && (block_size_x) <= max_load_block_height ),
178
+ (!mem_transpose && (block_size_y <= max_load_height_in_elem )) ||
179
+ (mem_transpose && (block_size_x) <= max_load_height_in_elem ),
167
180
" When reg_transpose was enabled, check 2d block height "
168
181
" restriction" );
169
182
static_assert (
0 commit comments