@@ -167,7 +167,7 @@ auto _CATALOG_ = kcatalog::toFlatCatalog({
167
167
{{'E', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 2, -1}, {-1, 8, -1}, {-1, 2, -1}, {-1, 8, -1}, {4, 4, 1}, "Iqxy"}, "sB64 sS16 aS wg 2x1x16 ikr af vav sr bk0 bm0 sys pab grf256", {8, (LoopType) 0, 256, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {16777216, 8192, 16777216}, {8192, 8192, 16777216}, {16, 8, 64}, {2, 1, 16}, 1, (WGType) 0, 4357, 0, 1024, {4, 4, 4}, {false, false, true}}, {'W', 1, {128}}},
168
168
{{'E', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 1, -1}, {-1, 1, -1}, {-1, 1, -1}, {-1, 1, -1}, {4, 4, 1}, "hxyI"}, "sB64 sS16 aS wg 2x1x16 ikr af vav sr bk0 bm0 sys pab grf256", {8, (LoopType) 0, 256, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {16777216, 8192, 16777216}, {8192, 8192, 16777216}, {16, 8, 64}, {2, 1, 16}, 1, (WGType) 0, 4357, 0, 1024, {4, 4, 4}, {false, false, true}}, {'W', 1, {128}}},
169
169
{{'E', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "ixy"}, "sB4 sB4 aB wg 4x8 kc4 cab4 ks8 nse bo sr bk0 sm sn l4 pab", {8, (LoopType) 0, 128, {(LoopType) 128, (LoopType) 255, (LoopType) 255}, {524288, 262144, 16777216}, {524288, 262144, 16777216}, {32, 16, 8}, {4, 8, 1}, 1, (WGType) 1, 257, 32768, 0, {1, 2, 4}, {false, false, true}}, {'W', 1, {512}}},
170
- {{'E', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 1, -1}, {512, 1, -1}, {-1, 1, -1}, {512, 1, -1}, {4, 4, 1}, "H"}, "aB128x2 aB128x2 aB wg 2x1x8 ikr ar sb128 bk0 dot wt", {8, (LoopType) 0, 128, {(LoopType) 160, (LoopType) 255, (LoopType) 2}, {16384, 16384, 16777216}, {16384, 16384, 16777216}, {1, 1, 128}, {2, 1, 8}, 1, (WGType) 0, 4198661, 0, 64, {4, 4, 4}, {true, true, true}}, {'W', 1, {1}}},
170
+ {{'E', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 1, -1}, {512, 1, -1}, {-1, 1, -1}, {512, 1, -1}, {4, 4, 1}, "H"}, "aB128x2 aB64x2 aB wg 2x1x8 ikr ar sb128 bk0 dot wt", {8, (LoopType) 0, 128, {(LoopType) 160, (LoopType) 255, (LoopType) 2}, {16384, 16384, 16777216}, {16384, 16384, 16777216}, {1, 1, 128}, {2, 1, 8}, 1, (WGType) 0, 4198661, 0, 64, {4, 4, 4}, {true, true, true}}, {'W', 1, {1}}},
171
171
{{'E', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {513, 1, -1}, {1024, 1, -1}, {513, 1, -1}, {1024, 1, -1}, {4, 4, 1}, "H"}, "aB64 aB32x2 aB wg 2x1x8 ikr nse hi ar sb64 bk0 dot wt", {8, (LoopType) 0, 128, {(LoopType) 144, (LoopType) 255, (LoopType) 2}, {65536, 16384, 16777216}, {65536, 16384, 16777216}, {4, 1, 64}, {2, 1, 8}, 1, (WGType) 0, 4198661, 0, 64, {4, 4, 4}, {true, true, true}}, {'W', 1, {4}}},
172
172
{{'E', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {4097, 1, 2049}, {-1, 1, -1}, {4097, 1, 2049}, {-1, 1, -1}, {4, 4, 1}, "H"}, "aB16 aB128 aB wg 2x1x16 ikr nse hi ar sb128 bk0 dot wt", {8, (LoopType) 0, 128, {(LoopType) 144, (LoopType) 255, (LoopType) 2}, {65536, 16384, 16777216}, {65536, 16384, 16777216}, {4, 1, 128}, {2, 1, 16}, 1, (WGType) 0, 4198661, 0, 64, {4, 4, 4}, {true, true, true}}, {'W', 1, {4}}},
173
173
{{'E', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {4097, 1, -1}, {-1, 1, 2048}, {4097, 1, -1}, {-1, 1, 2048}, {4, 4, 1}, "H"}, "aB64 aB32x2 aB wg 2x1x8 ikr nse hi ar sb64 bk0 dot wt", {8, (LoopType) 0, 128, {(LoopType) 144, (LoopType) 255, (LoopType) 2}, {65536, 16384, 16777216}, {65536, 16384, 16777216}, {4, 1, 64}, {2, 1, 8}, 1, (WGType) 0, 4198661, 0, 64, {4, 4, 4}, {true, true, true}}, {'W', 1, {4}}},
@@ -287,7 +287,7 @@ auto _CATALOG_ = kcatalog::toFlatCatalog({
287
287
{{'E', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 1, 513}, {1024, 1, -1}, {-1, 1, 513}, {1024, 1, -1}, {1, 1, 1}, "H"}, "aB128 aB64x2 aB wg 8x1x2 ikr nse hi ar sb128 bk0 grf256 dot wt", {8, (LoopType) 0, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 2}, {16384, 16384, 16777216}, {16384, 16384, 16777216}, {1, 1, 128}, {8, 1, 2}, 1, (WGType) 0, 4198661, 0, 256, {1, 2, 4}, {true, true, true}}, {'W', 1, {1}}},
288
288
{{'E', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {1025, 1, 4097}, {-1, 1, -1}, {1025, 1, 4097}, {-1, 1, -1}, {1, 1, 1}, "H"}, "aB128 aB64x2 aB wg 4x1x8 ikr nse hi ar sb128 bk0 dot wt", {8, (LoopType) 0, 128, {(LoopType) 144, (LoopType) 255, (LoopType) 2}, {32768, 16384, 16777216}, {32768, 16384, 16777216}, {2, 1, 128}, {4, 1, 8}, 1, (WGType) 0, 4198661, 0, 128, {1, 2, 4}, {true, true, true}}, {'W', 1, {2}}},
289
289
{{'E', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {1025, 1, 513}, {-1, 1, 4096}, {1025, 1, 513}, {-1, 1, 4096}, {1, 1, 1}, "H"}, "aB128x2 aB128x2 aB wg 16x1 wx2 nse hi ar sb128 bk0 dot wt", {8, (LoopType) 0, 128, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {16384, 16384, 16777216}, {16384, 16384, 16777216}, {1, 1, 128}, {16, 1, 1}, 2, (WGType) 1, 4194561, 0, 0, {1, 2, 4}, {true, true, true}}, {'W', 1, {1}}},
290
- {{'E', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 2, -1}, {4095, 2, 4095}, {-1, 2, -1}, {4095, 2, 4095}, {4, 4, 1}, "xyH "}, "sB128 sB64 aB wg 8x1x4 ikr nse hi ar sb128 bk0 dot wt", {8, (LoopType) 0, 128, {(LoopType) 144, (LoopType) 255, (LoopType) 2}, {32768, 32768, 16777216}, {32768, 32768, 16777216}, {2, 2, 128}, {8, 1, 4}, 1, (WGType) 0, 4198661, 0, 256, {4, 4, 4}, {false, false, true}}, {'W', 1, {128}}},
290
+ {{'E', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 2, -1}, {4095, 2, 4095}, {-1, 2, -1}, {4095, 2, 4095}, {4, 4, 1}, "Hpxy "}, "sB128 sB64 aB wg 8x1x4 ikr nse hi ar sb128 bk0 dot wt", {8, (LoopType) 0, 128, {(LoopType) 144, (LoopType) 255, (LoopType) 2}, {32768, 32768, 16777216}, {32768, 32768, 16777216}, {2, 2, 128}, {8, 1, 4}, 1, (WGType) 0, 4198661, 0, 256, {4, 4, 4}, {false, false, true}}, {'W', 1, {128}}},
291
291
{{'E', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "I"}, "aB32 aB16 aB ca3 ks64 wg 2x4x4 kr sys dw af k192 grf256 sm vav di dm sr cc fm", {8, (LoopType) 0, 256, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {8192, 8192, 16777216}, {8192, 8192, 16777216}, {16, 16, 192}, {2, 4, 4}, 1, (WGType) 1, 261, 12288, 12288, {4, 4, 4}, {true, true, true}}, {'E', 17, {4.488e+06, 120208, 122348, 327.657, 0, 0, 3.41911, 6.33998, 2.77024, 6.84323, 0.0582208, 0.0145417, 0.0574819, 0.511227, 1.20562, 1.20018, 5.11052e-14}}},
292
292
{{'E', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "Isxy"}, "sS32x2 sB16 aB wg 16x2 cb4 ks32 xaf dw vav bo sr bk0 sn dm grf256 sys pab", {8, (LoopType) 0, 256, {(LoopType) 128, (LoopType) 255, (LoopType) 255}, {262144, 786432, 16777216}, {262144, 786432, 16777216}, {16, 48, 32}, {16, 2, 1}, 1, (WGType) 1, 257, 24576, 0, {4, 4, 4}, {false, false, true}}, {'E', 17, {995629, 475244, 0, 0, 0, 0, 2.57281, 4.9973, 6.41839, 16.8374, 0.018098, 0.0100962, 0.0118445, 0.996594, 1.41614, 1.19695, 9.60059e-13}}},
293
293
{{'E', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, 2048}, {-1, 1024, -1}, {4, 4, 1}, "Ixy"}, "sS32x2 sB16 aB wg 16x2 cb4 ks32 xaf dw vav bo sr bk0 sn dm grf256 sys pab", {8, (LoopType) 0, 256, {(LoopType) 128, (LoopType) 255, (LoopType) 255}, {262144, 524288, 16777216}, {262144, 524288, 16777216}, {16, 32, 32}, {16, 2, 1}, 1, (WGType) 1, 257, 16384, 0, {4, 4, 4}, {false, false, true}}, {'E', 17, {985865, 449373, 0, 0, 0, 0, 2.52021, 5.18432, 6.41111, 16.2959, 0.0196104, 0.0078899, 0.0176455, 0.975623, 1.37619, 1.19224, 7.58759e-13}}},
0 commit comments