@@ -543,7 +543,7 @@ auto _CATALOG_ = kcatalog::toArray({
543
543
{{'F', "gemm", {"B", "F", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "I"}, "aS32+S32@48 aB32+S16@48 aB wg 4x1 af vav nmk li pt br sr sb256 bk0 sm grf256 sys l4 kd", {16, (LoopType) 255, 256, {(LoopType) 225, (LoopType) 255, (LoopType) 255}, {262144, 16384, 16777216}, {262144, 16384, 16777216}, {16, 1, 32}, {4, 1, 1}, 1, (WGType) 1, 257, 0, 0, {2, 2, 4}, {true, true, true}}, {'E', 17, {1.20164e+06, 54541, 0, 0, 0, 0, 0.422505, 7.31873, 4.22063, 10.9759, 0.192755, 0.0407408, 0.156727, 0.693649, 1.00294, 0, 0}}},
544
544
{{'F', "gemm", {"B", "O", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, 16, -1}, {1, 1, 1}, "#I"}, "aB16+m32@32 aB32 aB wg 2x8 af vav li nmk pt sr br ca3 bk0 sys kv dm afb l4", {16, (LoopType) 255, 128, {(LoopType) 225, (LoopType) 255, (LoopType) 255}, {1048576, 32768, 16777216}, {1048576, 32768, 32}, {64, 2, 32}, {2, 8, 1}, 1, (WGType) 1, 409, 24576, 0, {4, 2, 4}, {true, true, true}}, {'E', 17, {1.32162e+06, 161954, 0, 0, 2.32817e+06, 0, 0.71806, 4.15517, 0.786689, 1.40778, 0.0341164, 0.0131941, 0.0256486, 0.947188, 1.39057, 0.987284, 5.0128e-12}}},
545
545
{{'F', "gemm", {"B", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 8, 1}, "ABI"}, "at16x2+m32@48 am32+m16@64 aB wg 4x2x4 kr xaf st vav hi pt sr br sb64 bk0 sm sn grf256 sys kv afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {262144, 262144, 16777216}, {262144, 262144, 32}, {16, 16, 32}, {4, 2, 4}, 1, (WGType) 1, 445, 0, 8192, {4, 8, 4}, {true, true, true}}, {'E', 17, {1.15757e+06, -134939, -26216.3, 226647, 2.94093e+06, 1.99475e+06, 0.459989, 0.701382, 0.869118, 1.51365, 0.0198203, 0.0153374, 0.00626234, 0.558687, 1.30133, 0.935668, 6.0334e-12}}},
546
- {{'F', "gemm", {"B", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "I"}, "aS32+S32@48 aB32+S16@48 aB wg 4x1 af vav nmk li pt br sr sb256 bk0 sm grf256 sys l4 kd", {16, (LoopType) 255, 256, {(LoopType) 225, (LoopType) 255, (LoopType) 255}, {262144, 16384, 16777216}, {262144, 16384, 16777216}, {16, 1, 32}, {4, 1, 1}, 1, (WGType) 1, 257, 0, 0, {2, 2 , 4}, {true, true, true}}, {'E', 17, {1.20164e+06, 54541, 0, 0, 0, 0, 0.422505, 7.31873, 4.22063, 10.9759, 0.192755, 0.0407408, 0.156727, 0.693649, 1.00294, 0, 0}}},
546
+ {{'F', "gemm", {"B", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "I"}, "aS32+S32@48 aB32+S16@48 aB wg 4x1 af vav nmk li pt br sr sb256 bk0 sm grf256 sys l4 kd", {16, (LoopType) 255, 256, {(LoopType) 225, (LoopType) 255, (LoopType) 255}, {262144, 16384, 16777216}, {262144, 16384, 16777216}, {16, 1, 32}, {4, 1, 1}, 1, (WGType) 1, 257, 0, 0, {2, 1 , 4}, {true, true, true}}, {'E', 17, {1.20164e+06, 54541, 0, 0, 0, 0, 0.422505, 7.31873, 4.22063, 10.9759, 0.192755, 0.0407408, 0.156727, 0.693649, 1.00294, 0, 0}}},
547
547
{{'F', "gemm", {"D", "D", "D"}, {"A", "B", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aB8+B8@16 aB8+B8@16 aB nse grf256 wg 4x8 bo pt kc8 sb256 bk0 br sr", {16, (LoopType) 255, 256, {(LoopType) 192, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {8192, 8192, 16777216}, {32, 32, 8}, {4, 8, 1}, 1, (WGType) 1, 256, 0, 0, {128, 128, 8}, {true, true, true}}, {'W', 1, {1024}}},
548
548
{{'F', "gemm", {"D", "D", "D"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aB8x2+B8@24 aS8x2+S8@24 aB wg 4x8 kc8 nse hi pt sb256 bk0 sn grf256 br sr", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 131072, 16777216}, {8192, 8192, 16777216}, {32, 8, 8}, {4, 8, 1}, 1, (WGType) 1, 257, 0, 0, {8, 8, 8}, {true, true, true}}, {'E', 17, {915642, 488057, 0, 0, 0, 0, 2.62682, 4.67056, 1.01353, 1.76192, 0.0687398, 0.0687398, 0, 0.998364, 1.80644, 1.08579, 3.08664e-11}}},
549
549
{{'F', "gemm", {"D", "D", "D"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aB32+B16@32 aS16+S8@32 aB wg 4x8 kc16 nse hi pt sb256 bk0 sn grf256 br sr", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {262144, 131072, 16777216}, {8192, 8192, 16777216}, {16, 8, 32}, {4, 8, 1}, 1, (WGType) 1, 257, 0, 0, {8, 8, 8}, {true, true, true}}, {'E', 17, {887309, 574758, 0, 0, 0, 0, 4.77569, 4.82861, 0.536993, 1.65054, 0.0889844, 0.0889844, 0, 1, 1.65309, 1.06232, 1.19911e-11}}},
@@ -905,7 +905,7 @@ auto _CATALOG_ = kcatalog::toArray({
905
905
{{'F', "gemm", {"Q", "Q", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 8, 1}, "ABI"}, "at32+m32@48 am16x2+m64@32 aB wg 4x4x2 kr xaf st vav hi pt sr br sb64 bk0 sm sn grf256 sys kv afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {262144, 524288, 16777216}, {262144, 524288, 32}, {16, 32, 32}, {4, 4, 2}, 1, (WGType) 1, 445, 0, 32768, {4, 8, 4}, {true, true, true}}, {'E', 17, {1.03742e+06, -452509, 25423.9, 695882, 3.92397e+06, 3.94035e+06, 0.820319, 0.785135, 0.84902, 1.56923, 0.00809246, 0.00116078, 0.00745441, 0.526504, 1.60176, 1.07294, 4.15601e-12}}},
906
906
{{'F', "gemm", {"Q", "Q", "S"}, {"T", "T", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aS8+S1,16@16 aB8+B8@16 aU vav wg 8x4 bo pt sm sb256 grf256 bk0 sr", {16, (LoopType) 255, 256, {(LoopType) 192, (LoopType) 255, (LoopType) 255}, {262144, 262144, 16777216}, {262144, 262144, 16777216}, {16, 16, 8}, {8, 4, 1}, 1, (WGType) 1, 257, 0, 0, {1, 1, 4}, {true, true, true}}, {'E', 17, {875898, 864492, 0, 0, 0, 0, 2.55527, 2.12966, 0.857629, 1.85569, 0.0625314, 0.0625314, 0, 1, 1.01096, 1.00724, 3.06523e-14}}},
907
907
{{'F', "gemm", {"S", "F", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aB8+B8@16 aB8+m32@24 aB wg 1x4 kc8 nse hi pt sr sb256 bk0 sn grf256 l4", {16, (LoopType) 255, 128, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 131072, 16777216}, {524288, 131072, 16777216}, {1, 1, 1}, {1, 4, 1}, 1, (WGType) 1, 257, 0, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.16538e+06, 40635.2, 0, 0, 0, 0, 1.30731, 1.53858, 0.584971, 1.42067, 0.0634061, 0.0581975, 0.0161667, 1, 1.44276, 1.00478, 2.34818e-11}}},
908
- {{'F', "gemm", {"S", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aB8+B8@16 aB8+m32@24 aB wg 1x4 kc8 nse hi pt sr sb256 bk0 sn grf256 l4", {16, (LoopType) 255, 128 , {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 131072 , 16777216}, {524288, 131072 , 16777216}, {1, 1, 1 }, {1, 4, 1}, 1, (WGType) 1, 257, 0, 0, {4, 4 , 4}, {true, true, true}}, {'E', 17, {1.16538e+06, 40635.2, 0, 0, 0, 0, 1.30731, 1.53858, 0.584971, 1.42067, 0.0634061, 0.0581975, 0.0161667, 1, 1.44276, 1.00478, 2.34818e-11}}},
908
+ {{'F', "gemm", {"S", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aB8+B8@16 aB8+m32@24 aB wg 1x4 kc8 nse hi pt sr sb256 bk0 sn grf256 l4", {16, (LoopType) 255, 256 , {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {16384, 16384 , 16777216}, {16384, 16384 , 16777216}, {1, 1, 8 }, {1, 4, 1}, 1, (WGType) 1, 257, 0, 0, {4, 1 , 4}, {true, true, true}}, {'E', 17, {1.16538e+06, 40635.2, 0, 0, 0, 0, 1.30731, 1.53858, 0.584971, 1.42067, 0.0634061, 0.0581975, 0.0161667, 1, 1.44276, 1.00478, 2.34818e-11}}},
909
909
{{'F', "gemm", {"S", "S", "S"}, {"A", "B", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aB8+B8@8 aB8+B8@8 aB nse wg 4x8 bo pt sb256 kc8 bk0 sr", {16, (LoopType) 255, 128, {(LoopType) 192, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {8192, 8192, 16777216}, {32, 32, 8}, {4, 8, 1}, 1, (WGType) 1, 256, 0, 0, {128, 128, 4}, {true, true, true}}, {'W', 1, {1024}}},
910
910
{{'F', "gemm", {"S", "S", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aB16+m8@32 aS32+m16@40 aB wg 4x4 kc16 nse hi pt sb256 bk0 sn grf256 sr", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 262144, 16777216}, {524288, 262144, 16777216}, {32, 16, 32}, {4, 4, 1}, 1, (WGType) 1, 257, 0, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.08792e+06, 260070, 0, 0, 0, 0, 1.27159, 2.25336, 0.633711, 1.35704, 0.0632943, 0.00105479, 0.0694168, 0.543903, 1.15915, 0.195161, 2.93818e-11}}},
911
911
{{'F', "gemm", {"S", "S", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aB8x2+B8@24 am/S16+S32@32 aB wg 4x8 kc8 nse hi pt sb256 bk0 sn grf256 sr l2d", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 131072, 16777216}, {524288, 131072, 16777216}, {32, 8, 16}, {4, 8, 1}, 1, (WGType) 1, 257, 0, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {933004, 470490, 0, 0, 0, 0, 2.49562, 3.97982, 0.810184, 1.38841, 0.0630776, 0.0630776, 0, 1, 1.22055, -0.309162, 2.6504e-11}}},
0 commit comments