|
163 | 163 | #define AS_DATA4_T as_float4
|
164 | 164 | #define AS_DATA8_T as_float8
|
165 | 165 |
|
166 |
| -#define AS_UINT_T as_uint |
167 |
| -#define AS_UINT2_T as_uint2 |
168 |
| -#define AS_UINT4_T as_uint4 |
169 |
| -#define AS_UINT8_T as_uint8 |
170 |
| - |
171 | 166 | #define BLOCK_DATA_T uint
|
172 | 167 | #define BLOCK_DATA2_T uint2
|
173 | 168 | #define BLOCK_DATA4_T uint4
|
|
286 | 281 | #define AS_DATA4_T as_half4
|
287 | 282 | #define AS_DATA8_T as_half8
|
288 | 283 |
|
289 |
| -#define AS_UINT_T as_ushort |
290 |
| -#define AS_UINT2_T as_ushort2 |
291 |
| -#define AS_UINT4_T as_ushort4 |
292 |
| -#define AS_UINT8_T as_ushort8 |
293 |
| - |
294 | 284 | #define BLOCK_DATA_T ushort
|
295 | 285 | #define BLOCK_DATA2_T ushort2
|
296 | 286 | #define BLOCK_DATA4_T ushort4
|
|
347 | 337 | #define AS_DATA4_T as_ushort4
|
348 | 338 | #define AS_DATA8_T as_ushort8
|
349 | 339 |
|
350 |
| -#define AS_UINT_T as_ushort |
351 |
| -#define AS_UINT2_T as_ushort2 |
352 |
| -#define AS_UINT4_T as_ushort4 |
353 |
| -#define AS_UINT8_T as_ushort8 |
354 |
| - |
355 | 340 | #define BLOCK_DATA_T ushort
|
356 | 341 | #define BLOCK_DATA2_T ushort2
|
357 | 342 | #define BLOCK_DATA4_T ushort4
|
|
408 | 393 | #define AS_DATA8_T as_uchar8
|
409 | 394 | #define AS_DATA16_T as_uchar16
|
410 | 395 |
|
411 |
| -#define AS_UINT_T as_uchar |
412 |
| -#define AS_UINT2_T as_uchar2 |
413 |
| -#define AS_UINT4_T as_uchar4 |
414 |
| -#define AS_UINT8_T as_uchar8 |
415 |
| -#define AS_INT8_T as_uint8 |
416 |
| - |
417 | 396 | #define BLOCK_DATA_T uchar
|
418 | 397 | #define BLOCK_DATA2_T uchar2
|
419 | 398 | #define BLOCK_DATA4_T uchar4
|
|
476 | 455 | #define AS_DATA8_T as_uchar8
|
477 | 456 | #define AS_DATA16_T as_uchar16
|
478 | 457 |
|
479 |
| -#define AS_UINT_T as_uchar |
480 |
| -#define AS_UINT2_T as_uchar2 |
481 |
| -#define AS_UINT4_T as_uchar4 |
482 |
| -#define AS_UINT8_T as_uchar8 |
483 |
| -#define AS_INT8_T as_uint8 |
484 |
| - |
485 | 458 | #define BLOCK_DATA_T uchar
|
486 | 459 | #define BLOCK_DATA2_T uchar2
|
487 | 460 | #define BLOCK_DATA4_T uchar4
|
|
607 | 580 | #define AS_DATA8_T as_char8
|
608 | 581 | #define AS_DATA16_T as_char16
|
609 | 582 |
|
610 |
| -#define AS_UINT_T as_uchar |
611 |
| -#define AS_UINT2_T as_uchar2 |
612 |
| -#define AS_UINT4_T as_uchar4 |
613 |
| -#define AS_UINT8_T as_uchar8 |
614 |
| -#define AS_INT8_T as_int8 |
615 |
| - |
616 | 583 | #define BLOCK_DATA_T uchar
|
617 | 584 | #define BLOCK_DATA2_T uchar2
|
618 | 585 | #define BLOCK_DATA4_T uchar4
|
|
670 | 637 | #define AS_DATA8_T as_uchar8
|
671 | 638 | #define AS_DATA16_T as_uchar16
|
672 | 639 |
|
673 |
| -#define AS_UINT_T as_uchar |
674 |
| -#define AS_UINT2_T as_uchar2 |
675 |
| -#define AS_UINT4_T as_uchar4 |
676 |
| -#define AS_UINT8_T as_uchar8 |
677 |
| -#define AS_INT8_T as_uint8 |
678 |
| - |
679 | 640 | #define BLOCK_DATA_T uchar
|
680 | 641 | #define BLOCK_DATA2_T uchar2
|
681 | 642 | #define BLOCK_DATA4_T uchar4
|
|
736 | 697 | #define AS_DATA4_T as_int4
|
737 | 698 | #define AS_DATA8_T as_int8
|
738 | 699 |
|
739 |
| -#define AS_UINT_T as_uint |
740 |
| -#define AS_UINT2_T as_uint2 |
741 |
| -#define AS_UINT4_T as_uint4 |
742 |
| -#define AS_UINT8_T as_uint8 |
743 |
| - |
744 | 700 | #define BLOCK_DATA_T uint
|
745 | 701 | #define BLOCK_DATA2_T uint2
|
746 | 702 | #define BLOCK_DATA4_T uint4
|
|
941 | 897 | #endif
|
942 | 898 |
|
943 | 899 | #ifdef A_DATA_T
|
944 |
| -#define A_DATA8_T CONCAT2(A_DATA_T, 8) |
945 | 900 | #if A_DT_BF16
|
946 | 901 | #define A_TO_REF(x) cvt_bf16_to_f32(x)
|
947 | 902 | #define A_TO_REF8(x) cvt_bf16_to_f32(x)
|
|
1232 | 1187 | #define BLOCK_WRITE_DST4(ptr, v) \
|
1233 | 1188 | intel_sub_group_block_write_uc4((__global uchar *)ptr, as_uchar4(v))
|
1234 | 1189 |
|
1235 |
| -#define BLOCK_READ_DST8(ptr) \ |
1236 |
| - AS_DST_DATA8_T(intel_sub_group_block_read_uc8((__global uchar *)ptr)) |
1237 | 1190 | #define BLOCK_WRITE_DST8(ptr, v) \
|
1238 | 1191 | intel_sub_group_block_write_uc8((__global uchar *)ptr, as_uchar8(v))
|
1239 | 1192 |
|
1240 |
| -#define BLOCK_READ_DST16(ptr) \ |
1241 |
| - AS_DST_DATA16_T(intel_sub_group_block_read_uc16((__global uchar *)ptr)) |
1242 | 1193 | #define BLOCK_WRITE_DST16(ptr, v) \
|
1243 | 1194 | intel_sub_group_block_write_uc16((__global uchar *)ptr, as_uchar16(v))
|
1244 | 1195 |
|
1245 | 1196 | #elif DST_DT_F16 || DST_DT_BF16
|
1246 |
| -#define BLOCK_READ_DST(ptr) \ |
1247 |
| - AS_DST_DATA_T(intel_sub_group_block_read_us((__global ushort *)ptr)) |
1248 | 1197 | #define BLOCK_WRITE_DST(ptr, v) \
|
1249 | 1198 | intel_sub_group_block_write_us((__global ushort *)ptr, as_ushort(v))
|
1250 | 1199 |
|
1251 |
| -#define BLOCK_READ_DST2(ptr) \ |
1252 |
| - AS_DST_DATA2_T(intel_sub_group_block_read_us2((__global ushort *)ptr)) |
1253 | 1200 | #define BLOCK_WRITE_DST2(ptr, v) \
|
1254 | 1201 | intel_sub_group_block_write_us2((__global ushort *)ptr, as_ushort2(v))
|
1255 | 1202 |
|
1256 |
| -#define BLOCK_READ_DST4(ptr) \ |
1257 |
| - AS_DST_DATA4_T(intel_sub_group_block_read_us4((__global ushort *)ptr)) |
1258 | 1203 | #define BLOCK_WRITE_DST4(ptr, v) \
|
1259 | 1204 | intel_sub_group_block_write_us4((__global ushort *)ptr, as_ushort4(v))
|
1260 | 1205 |
|
1261 |
| -#define BLOCK_READ_DST8(ptr) \ |
1262 |
| - AS_DST_DATA8_T(intel_sub_group_block_read_us8((__global ushort *)ptr)) |
1263 | 1206 | #define BLOCK_WRITE_DST8(ptr, v) \
|
1264 | 1207 | intel_sub_group_block_write_us8((__global ushort *)ptr, as_ushort8(v))
|
1265 | 1208 |
|
1266 |
| -#define BLOCK_READ_DST16(ptr) \ |
1267 |
| - (DST_DATA16_T)( \ |
1268 |
| - BLOCK_READ_DST8(ptr), BLOCK_READ_DST8(ptr + 8 * SUB_GROUP_SIZE)) |
1269 | 1209 | #define BLOCK_WRITE_DST16(ptr, v) \
|
1270 | 1210 | do { \
|
1271 | 1211 | BLOCK_WRITE_DST8(ptr, (v).s01234567); \
|
|
1274 | 1214 |
|
1275 | 1215 | #elif DST_DT_S32 || DST_DT_F32
|
1276 | 1216 |
|
1277 |
| -#define BLOCK_READ_DST(ptr) \ |
1278 |
| - AS_DST_DATA_T(intel_sub_group_block_read((__global uint *)ptr)) |
1279 | 1217 | #define BLOCK_WRITE_DST(ptr, v) \
|
1280 | 1218 | intel_sub_group_block_write((__global uint *)ptr, as_uint(v))
|
1281 | 1219 |
|
1282 |
| -#define BLOCK_READ_DST2(ptr) \ |
1283 |
| - AS_DST_DATA2_T(intel_sub_group_block_read2((__global uint *)ptr)) |
1284 | 1220 | #define BLOCK_WRITE_DST2(ptr, v) \
|
1285 | 1221 | intel_sub_group_block_write2((__global uint *)ptr, as_uint2(v))
|
1286 | 1222 |
|
1287 |
| -#define BLOCK_READ_DST4(ptr) \ |
1288 |
| - AS_DST_DATA4_T(intel_sub_group_block_read4((__global uint *)ptr)) |
1289 | 1223 | #define BLOCK_WRITE_DST4(ptr, v) \
|
1290 | 1224 | intel_sub_group_block_write4((__global uint *)ptr, as_uint4(v))
|
1291 | 1225 |
|
1292 |
| -#define BLOCK_READ_DST8(ptr) \ |
1293 |
| - AS_DST_DATA8_T(intel_sub_group_block_read8((__global uint *)ptr)) |
1294 | 1226 | #define BLOCK_WRITE_DST8(ptr, v) \
|
1295 | 1227 | intel_sub_group_block_write8((__global uint *)ptr, as_uint8(v))
|
1296 | 1228 |
|
1297 |
| -#define BLOCK_READ_DST16(ptr) \ |
1298 |
| - (DST_DATA16_T)( \ |
1299 |
| - BLOCK_READ_DST8(ptr), BLOCK_READ_DST8(ptr + 8 * SUB_GROUP_SIZE)) |
1300 | 1229 | #define BLOCK_WRITE_DST16(ptr, v) \
|
1301 | 1230 | do { \
|
1302 | 1231 | BLOCK_WRITE_DST8(ptr, (v).s01234567); \
|
|
1305 | 1234 |
|
1306 | 1235 | #elif DST_DT_F16 || DST_DT_BF16
|
1307 | 1236 |
|
1308 |
| -#define BLOCK_READ_DST(ptr) \ |
1309 |
| - AS_DST_DATA_T(intel_sub_group_block_read_us((__global ushort *)ptr)) |
1310 | 1237 | #define BLOCK_WRITE_DST(ptr, v) \
|
1311 | 1238 | intel_sub_group_block_write_us((__global ushort *)ptr, as_ushort(v))
|
1312 | 1239 |
|
1313 |
| -#define BLOCK_READ_DST2(ptr) \ |
1314 |
| - AS_DST_DATA2_T(intel_sub_group_block_read_us2((__global ushort *)ptr)) |
1315 | 1240 | #define BLOCK_WRITE_DST2(ptr, v) \
|
1316 | 1241 | intel_sub_group_block_write_us2((__global ushort *)ptr, as_short2(v))
|
1317 | 1242 |
|
1318 |
| -#define BLOCK_READ_DST4(ptr) \ |
1319 |
| - AS_DST_DATA4_T(intel_sub_group_block_read_us4((__global ushort *)ptr)) |
1320 | 1243 | #define BLOCK_WRITE_DST4(ptr, v) \
|
1321 | 1244 | intel_sub_group_block_write_us4((__global ushort *)ptr, as_ushort4(v))
|
1322 | 1245 |
|
1323 |
| -#define BLOCK_READ_DST8(ptr) \ |
1324 |
| - AS_DST_DATA8_T(intel_sub_group_block_read_us8((__global ushort *)ptr)) |
1325 | 1246 | #define BLOCK_WRITE_DST8(ptr, v) \
|
1326 | 1247 | intel_sub_group_block_write_us8((__global ushort *)ptr, as_ushort8(v))
|
1327 | 1248 |
|
1328 |
| -#define BLOCK_READ_DST16(ptr) \ |
1329 |
| - (DST_DATA16_T)( \ |
1330 |
| - BLOCK_READ_DST8(ptr), BLOCK_READ_DST8(ptr + 8 * SUB_GROUP_SIZE)) |
1331 | 1249 | #define BLOCK_WRITE_DST16(ptr, v) \
|
1332 | 1250 | do { \
|
1333 | 1251 | BLOCK_WRITE_DST8(ptr, (v).s01234567); \
|
|
0 commit comments