15
15
*******************************************************************************/
16
16
17
17
/*@kcatalog@*/
18
- auto _CATALOG_ = kcatalog::toFlatCatalog ({
18
+ auto _CATALOG_ = kcatalog::toArray ({
19
19
{{'C', "gemm", {"B", "B", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "ab4 as8 ab l4 ca1 wg 2x8 int sr", {8, (LoopType) 0, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 255}, {4096, 4096, 2048}, {4096, 4096, 2048}, {32, 16, 8}, {2, 8, 1}, 1, (WGType) 1, 1, 2048, 0, {2, 2, 4}, {true, true, true}}, {'W', 1, {512}}},
20
20
{{'C', "gemm", {"B", "B", "S"}, {"N", "T", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "ab2 ab8 ab l4 cab1 wg 4x4 int sr", {8, (LoopType) 0, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 255}, {4096, 4096, 2048}, {4096, 4096, 2048}, {32, 16, 8}, {4, 4, 1}, 1, (WGType) 1, 1, 6144, 0, {2, 2, 4}, {true, true, true}}, {'W', 1, {512}}},
21
21
{{'C', "gemm", {"B", "B", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "as8x2 ab16 ab l4 cb1 wg 8x2 vnc nmk sr", {8, (LoopType) 1, 128, {(LoopType) 1, (LoopType) 0, (LoopType) 255}, {4096, 4096, 2048}, {4096, 4096, 2048}, {16, 16, 16}, {8, 2, 1}, 1, (WGType) 1, 1, 2048, 0, {2, 2, 4}, {true, true, true}}, {'W', 1, {256}}},
@@ -907,7 +907,7 @@ auto _CATALOG_ = kcatalog::toFlatCatalog({
907
907
{{'F', "gemm", {"S", "F", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aB8+B8@16 aB8+m32@24 aB wg 1x4 kc8 nse hi pt sr sb256 bk0 sn grf256 l4", {16, (LoopType) 255, 128, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 131072, 16777216}, {524288, 131072, 16777216}, {1, 1, 1}, {1, 4, 1}, 1, (WGType) 1, 257, 0, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.16538e+06, 40635.2, 0, 0, 0, 0, 1.30731, 1.53858, 0.584971, 1.42067, 0.0634061, 0.0581975, 0.0161667, 1, 1.44276, 1.00478, 2.34818e-11}}},
908
908
{{'F', "gemm", {"S", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aB8+B8@16 aB8+m32@24 aB wg 1x4 kc8 nse hi pt sr sb256 bk0 sn grf256 l4", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {16384, 16384, 16777216}, {16384, 16384, 16777216}, {1, 1, 8}, {1, 4, 1}, 1, (WGType) 1, 257, 0, 0, {4, 1, 4}, {true, true, true}}, {'E', 17, {1.16538e+06, 40635.2, 0, 0, 0, 0, 1.30731, 1.53858, 0.584971, 1.42067, 0.0634061, 0.0581975, 0.0161667, 1, 1.44276, 1.00478, 2.34818e-11}}},
909
909
{{'F', "gemm", {"S", "S", "S"}, {"A", "B", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aB8+B8@8 aB8+B8@8 aB nse wg 4x8 bo pt sb256 kc8 bk0 sr", {16, (LoopType) 255, 128, {(LoopType) 192, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {8192, 8192, 16777216}, {32, 32, 8}, {4, 8, 1}, 1, (WGType) 1, 256, 0, 0, {128, 128, 4}, {true, true, true}}, {'W', 1, {1024}}},
910
- {{'F', "gemm", {"S", "S", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1 , 1}, ""}, "aB16+m8@32 aS32+m16@40 aB wg 4x4 kc16 nse hi pt sb256 bk0 sn grf256 sr", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 262144, 16777216}, {524288, 262144, 16777216}, {32, 16, 32}, {4, 4, 1}, 1, (WGType) 1, 257, 0, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.08792e+06, 260070, 0, 0, 0, 0, 1.27159, 2.25336, 0.633711, 1.35704, 0.0632943, 0.00105479, 0.0694168, 0.543903, 1.15915, 0.195161, 2.93818e-11}}},
910
+ {{'F', "gemm", {"S", "S", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4 , 1}, ""}, "aB16+m8@32 aS32+m16@40 aB wg 4x4 kc16 nse hi pt sb256 bk0 sn grf256 sr", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 262144, 16777216}, {524288, 262144, 16777216}, {32, 16, 32}, {4, 4, 1}, 1, (WGType) 1, 257, 0, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.08792e+06, 260070, 0, 0, 0, 0, 1.27159, 2.25336, 0.633711, 1.35704, 0.0632943, 0.00105479, 0.0694168, 0.543903, 1.15915, 0.195161, 2.93818e-11}}},
911
911
{{'F', "gemm", {"S", "S", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aB8x2+B8@24 am/S16+S32@32 aB wg 4x8 kc8 nse hi pt sb256 bk0 sn grf256 sr l2d", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 131072, 16777216}, {524288, 131072, 16777216}, {32, 8, 16}, {4, 8, 1}, 1, (WGType) 1, 257, 0, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {933004, 470490, 0, 0, 0, 0, 2.49562, 3.97982, 0.810184, 1.38841, 0.0630776, 0.0630776, 0, 1, 1.22055, -0.309162, 2.6504e-11}}},
912
912
{{'F', "gemm", {"S", "S", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {0, 0, 0}, {0, 0, 0}, {1, 1, 1}, ""}, "am16+m16@64 am/S32+m32@64 aB wg 4x8 kc16 nse hi pt sb256 bk0 sn grf256 sr l2d", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {262144, 65536, 16777216}, {262144, 65536, 16777216}, {16, 4, 32}, {4, 8, 1}, 1, (WGType) 1, 257, 0, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {895520, 534803, 0, 0, 0, 0, 1.20137, 3.44786, 1.88791, 3.67536, 0.0747548, 0.0747548, 0, 1, 1.37174, 0.989936, 2.27372e-12}}},
913
913
{{'F', "gemm", {"S", "S", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {256, 256, 256}, {8, 8, 1}, ""}, "am16+m32@64 am/S32x2+m16@32 aB wg 2x16 kc16 nse hi pt sb256 bk0 sn grf256 sr l2d", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {262144, 32768, 16777216}, {262144, 32768, 16777216}, {16, 2, 32}, {2, 16, 1}, 1, (WGType) 1, 257, 0, 0, {8, 8, 4}, {true, true, true}}, {'E', 17, {938708, 543839, 0, 0, 0, 0, 2.39026, 3.22437, 2.56416, 5.86786, 0.101672, 0.101672, 0, 0.999145, 1.33995, 1.00257, 1.83338e-12}}},
@@ -919,16 +919,16 @@ auto _CATALOG_ = kcatalog::toFlatCatalog({
919
919
{{'F', "gemm", {"S", "S", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {63, -1, -1}, {1, 1, 1}, ""}, "am16x2+m16@16 aS16+m16@16 aB wg 1x16x2 kr kc16 nse li pt sr kv sb256 bk0 sn grf256 afb l2d", {16, (LoopType) 255, 256, {(LoopType) 224, (LoopType) 255, (LoopType) 2}, {262144, 262144, 16777216}, {8192, 8192, 0}, {16, 16, 16}, {1, 16, 2}, 1, (WGType) 1, 413, 0, 16384, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.11666e+06, -375280, -55079.8, 590017, 2.65421e+06, 0, 6.85937, 1.10986, 0.561901, 1.18544, 0.0718347, 0.0118681, 0.0644105, 0.645395, 1.19384, 0.859778, 5.37885e-12}}},
920
920
{{'F', "gemm", {"S", "S", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, -1, -1}, {1, 1, 1}, ""}, "am16x2+S32@16 aS32+S16@32 aS wg 1x4x8 kr kc16 nse li pt sr kv sb256 bk0 sn grf256 afb l2d", {16, (LoopType) 255, 256, {(LoopType) 224, (LoopType) 255, (LoopType) 2}, {65536, 262144, 16777216}, {8192, 8192, 0}, {4, 16, 32}, {1, 4, 8}, 1, (WGType) 1, 413, 0, 1024, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.21263e+06, -101219, -23787.5, 154376, 2.3765e+06, 0, 4.80141, 0.952132, 2.62236, 5.17717, 0.130559, 0.263753, 0.0519091, 1, 1.25383, 1.14072, -8.09176e-14}}},
921
921
{{'F', "gemm", {"S", "S", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, -1, -1}, {1, 1, 1}, ""}, "aB8x2+S32@24 aS8+S16@32 aS wg 1x8x4 kr kc8 nse li pt sr kv sb256 bk0 sn grf256 afb", {16, (LoopType) 255, 256, {(LoopType) 224, (LoopType) 255, (LoopType) 2}, {16384, 262144, 16777216}, {8192, 8192, 0}, {1, 16, 8}, {1, 8, 4}, 1, (WGType) 1, 413, 0, 512, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.16307e+06, -117224, -49945.9, 211601, 2.21184e+06, 0, 27.752, 0.924823, 7.95811, 19.7907, 0.498836, 0.333805, 0.307337, 1, 1.19347, 0, 0}}},
922
- {{'F', "gemm", {"S", "S", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1 , 1}, ""}, "aB16+m16@8 aS16+m32@8 aB wg 2x4x4 kr kc16 nse hi pt sr sb256 bk0 sn grf256 kv afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {524288, 262144, 16777216}, {524288, 262144, 32}, {32, 16, 16}, {2, 4, 4}, 1, (WGType) 1, 413, 0, 16384, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.22732e+06, -192672, -38121.5, 322384, 3.32595e+06, 0, 2.49763, 1.54907, 0.628927, 1.39358, 0.0627876, 0.0504407, 0.0208002, 0.589308, 1.19034, 0.60621, 4.79866e-11}}},
923
- {{'F', "gemm", {"S", "S", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1 , 1}, ""}, "aB16+m16@32 aS16+m16@32 aB wg 2x4x4 kr kc16 nse hi pt sr sb256 bk0 sn grf256 kv afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {524288, 131072, 16777216}, {524288, 131072, 32}, {32, 8, 16}, {2, 4, 4}, 1, (WGType) 1, 413, 0, 8192, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.24447e+06, -148653, -56383, 266371, 2.85901e+06, 0, 1.66468, 2.34448, 0.802059, 1.33917, 0.0632256, 0.0464224, 0.0241823, 0.804536, 1.32602, 0.945203, 1.61386e-11}}},
922
+ {{'F', "gemm", {"S", "S", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4 , 1}, ""}, "aB16+m16@8 aS16+m32@8 aB wg 2x4x4 kr kc16 nse hi pt sr sb256 bk0 sn grf256 kv afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {524288, 262144, 16777216}, {524288, 262144, 32}, {32, 16, 16}, {2, 4, 4}, 1, (WGType) 1, 413, 0, 16384, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.22732e+06, -192672, -38121.5, 322384, 3.32595e+06, 0, 2.49763, 1.54907, 0.628927, 1.39358, 0.0627876, 0.0504407, 0.0208002, 0.589308, 1.19034, 0.60621, 4.79866e-11}}},
923
+ {{'F', "gemm", {"S", "S", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4 , 1}, ""}, "aB16+m16@32 aS16+m16@32 aB wg 2x4x4 kr kc16 nse hi pt sr sb256 bk0 sn grf256 kv afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {524288, 131072, 16777216}, {524288, 131072, 32}, {32, 8, 16}, {2, 4, 4}, 1, (WGType) 1, 413, 0, 8192, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.24447e+06, -148653, -56383, 266371, 2.85901e+06, 0, 1.66468, 2.34448, 0.802059, 1.33917, 0.0632256, 0.0464224, 0.0241823, 0.804536, 1.32602, 0.945203, 1.61386e-11}}},
924
924
{{'F', "gemm", {"S", "S", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, 4, -1}, {1, 1, 1}, ""}, "am8+B8@8 at8x2 aB wg 4x1x16 kr kc8 nse li nmk pt sr bk0 sn kv afb sb32 l2d", {16, (LoopType) 255, 128, {(LoopType) 225, (LoopType) 255, (LoopType) 2}, {524288, 65536, 16777216}, {8192, 8192, 0}, {32, 4, 8}, {4, 1, 16}, 1, (WGType) 1, 413, 0, 2048, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.21115e+06, -59718.2, -11600.6, 104303, 3.06381e+06, 0, 0.860506, 9.14342, 0.769527, 1.14843, 0.0733058, 0.0350639, 0.04512, 0.901895, 1.307, 0.986093, 2.06541e-11}}},
925
925
{{'F', "gemm", {"S", "S", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {1024, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aB16+m8@24 aS16+m16@24 aB wg 1x2x16 kr kc16 nse hi pt sr br sb32 bk0 sn grf256 kv afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {524288, 262144, 16777216}, {524288, 262144, 32}, {32, 16, 16}, {1, 2, 16}, 1, (WGType) 1, 413, 0, 4096, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.19256e+06, -99702.7, -5250.1, 135470, 4.21888e+06, 0, 0.924905, 1.15646, 0.601147, 1.43162, 0.0628943, 0.0635026, 0.0173549, 1, 1.36155, 1.02238, 5.68694e-12}}},
926
926
{{'F', "gemm", {"S", "S", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aB8+m8@16 aS16+m8@24 aB wg 1x4x8 kr kc8 nse hi pt sb256 bk0 sn grf256 sr", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {524288, 131072, 16777216}, {524288, 131072, 16777216}, {32, 8, 16}, {1, 4, 8}, 1, (WGType) 1, 261, 0, 4096, {4, 4, 4}, {true, true, true}}, {'E', 17, {973570, 471411, 10282.9, 75820.8, 0, 0, 1.45806, 1.71929, 3.09927, 7.18832, 0.0643004, 0.0643004, 0, 0.97308, 1.43932, 0.981141, 7.50692e-12}}},
927
927
{{'F', "gemm", {"S", "S", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1023, -1, -1}, {1, 1, 1}, ""}, "aB8+m8@16 aS16+m8@24 aB wg 1x4x8 kr kc8 nse hi pt sb256 bk0 sn grf256 sr kv afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {524288, 131072, 16777216}, {524288, 131072, 32}, {32, 8, 16}, {1, 4, 8}, 1, (WGType) 1, 413, 0, 4096, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.21899e+06, -88481.9, -25272.1, 146166, 3.03923e+06, 0, 1.52455, 1.45603, 0.792316, 1.37923, 0.0633789, 0.0507551, 0.0245627, 1, 1.38276, 0.904775, 1.71039e-11}}},
928
928
{{'F', "gemm", {"S", "S", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {256, 256, 64}, {1, 1, 1}, ""}, "aB8+m16@8 aS8x2 aB wg 1x4x4 kr kc8 nse hi pt ar sb32 bk0", {16, (LoopType) 255, 128, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {524288, 32768, 16777216}, {524288, 32768, 16777216}, {32, 2, 8}, {1, 4, 4}, 1, (WGType) 1, 261, 0, 1024, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.19515e+06, 151613, 62335.9, 17126.5, 0, 0, 1.28521, 4.53132, 1.51228, 3.55467, 0.145838, 0.0796081, 0.0700619, 0.527675, 1.35925, 1.00198, 1.34953e-11}}},
929
929
{{'F', "gemm", {"S", "S", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aB8x2+B8@16 aS8+S8@24 aB wg 1x4 kc8 nse hi pt sr sb256 bk0 sn", {16, (LoopType) 255, 128, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 131072, 16777216}, {524288, 131072, 16777216}, {32, 8, 8}, {1, 4, 1}, 1, (WGType) 1, 257, 0, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.16538e+06, 40635.2, 0, 0, 0, 0, 1.30731, 1.53858, 0.584971, 1.42067, 0.0634061, 0.0581975, 0.0161667, 1, 1.44276, 1.00478, 2.34818e-11}}},
930
930
{{'F', "gemm", {"S", "S", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, -1, -1}, {1, 1, 1}, ""}, "am16x2+m32@16 aS32+m16@32 aS wg 1x4 kc16 nse li pt sr sb256 bk0 sn grf256 l2d", {16, (LoopType) 255, 256, {(LoopType) 224, (LoopType) 255, (LoopType) 255}, {65536, 262144, 16777216}, {65536, 262144, 16777216}, {4, 16, 32}, {1, 4, 1}, 1, (WGType) 1, 257, 0, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.15385e+06, 54677.4, 0, 0, 0, 0, 4.66453, 0.94861, 2.51588, 5.10353, 0.11684, 0.263033, 0.0520785, 1, 1.35189, 1.13162, 7.09638e-15}}},
931
- {{'F', "gemm", {"S", "S", "S"}, {"N", "T", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1 , 1}, ""}, "aB8+m8@16 aB16+m16@16 aB wg 4x4 kc8 nse hi pt sb256 bk0 grf256 sr", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 262144, 16777216}, {524288, 262144, 16777216}, {32, 16, 16}, {4, 4, 1}, 1, (WGType) 1, 257, 0, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.10018e+06, 251905, 0, 0, 0, 0, 1.56408, 2.85947, 0.648851, 1.37611, 0.0629702, 0.000146865, 0.0632313, 0.517444, 1.16754, -0.0884205, 2.14696e-11}}},
931
+ {{'F', "gemm", {"S", "S", "S"}, {"N", "T", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4 , 1}, ""}, "aB8+m8@16 aB16+m16@16 aB wg 4x4 kc8 nse hi pt sb256 bk0 grf256 sr", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 262144, 16777216}, {524288, 262144, 16777216}, {32, 16, 16}, {4, 4, 1}, 1, (WGType) 1, 257, 0, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.10018e+06, 251905, 0, 0, 0, 0, 1.56408, 2.85947, 0.648851, 1.37611, 0.0629702, 0.000146865, 0.0632313, 0.517444, 1.16754, -0.0884205, 2.14696e-11}}},
932
932
{{'F', "gemm", {"S", "S", "S"}, {"N", "T", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, 16, -1}, {1, 1, 1}, ""}, "am16+m16@64 am16+m32@64 aB wg 4x4 kc16 nse hi pt sb256 bk0 grf256 sr l2d", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {262144, 65536, 16777216}, {262144, 65536, 16777216}, {16, 4, 16}, {4, 4, 1}, 1, (WGType) 1, 257, 0, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.10359e+06, 253001, 0, 0, 0, 0, 1.23463, 4.97057, 1.98081, 3.72207, 0.0797284, 0.00775855, 0.0752955, 0.909553, 1.43847, 0.946385, 6.92721e-12}}},
933
933
{{'F', "gemm", {"S", "S", "S"}, {"N", "T", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "am16+m32@32 am16+m16@32 aB wg 2x8 kc16 nse hi pt sb256 bk0 grf256 sr l2d", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {262144, 32768, 16777216}, {262144, 32768, 16777216}, {16, 2, 16}, {2, 8, 1}, 1, (WGType) 1, 257, 0, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.09369e+06, 252258, 0, 0, 0, 0, 1.95015, 5.48836, 2.57025, 5.95009, 0.14056, 0.0114998, 0.12796, 0.974789, 1.2964, 0.960056, 4.31831e-12}}},
934
934
{{'F', "gemm", {"S", "S", "S"}, {"N", "T", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "qp"}, "aB8/4x2+m8@28 aB8/4x2+m8@28 aP nse wg 4x8 bo pt sb256 kc8 grf256 bk0 sr kv afb", {16, (LoopType) 255, 256, {(LoopType) 192, (LoopType) 255, (LoopType) 255}, {1048576, 524288, 16777216}, {1048576, 524288, 32}, {64, 32, 8}, {4, 8, 1}, 1, (WGType) 1, 409, 0, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {866662, 832973, 0, 0, 6.62733e+06, 0, 2.31399, 2.18462, 0.879335, 1.58093, 0.0624816, 0.0624816, 0, 1, 1.01086, 1.00539, 3.62849e-14}}},
@@ -940,7 +940,7 @@ auto _CATALOG_ = kcatalog::toFlatCatalog({
940
940
{{'F', "gemm", {"S", "S", "S"}, {"N", "T", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, 16, -1}, {1, 1, 1}, ""}, "aB8x2+m8@16 aB16+m8@24 aB wg 2x2x8 kr kc8 nse nmk li pt sr sb32 bk0 grf256 kv afb", {16, (LoopType) 255, 256, {(LoopType) 225, (LoopType) 255, (LoopType) 2}, {1048576, 131072, 16777216}, {1048576, 131072, 32}, {64, 8, 16}, {2, 2, 8}, 1, (WGType) 1, 413, 0, 8192, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.16702e+06, -121012, -8196.35, 178120, 2.6026e+06, 0, 1.04823, 5.51776, 0.693341, 1.46543, 0.0628152, 0.058718, 0.0176573, 0.998417, 1.16326, 0.364617, 1.16605e-11}}},
941
941
{{'F', "gemm", {"S", "S", "S"}, {"N", "T", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, 1, -1}, {1, 1, 1}, ""}, "aB8x2+S8@24 at8x2+S32@24 aB wg 4x1x8 kr kc8 nse nmk li pt sr sb256 bk0 grf256 kv afb l2d", {16, (LoopType) 255, 256, {(LoopType) 225, (LoopType) 255, (LoopType) 2}, {262144, 16384, 16777216}, {8192, 8192, 0}, {16, 1, 8}, {4, 1, 8}, 1, (WGType) 1, 413, 0, 256, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.1701e+06, -71179.7, -3197.37, 120149, 2.58458e+06, 0, 0.911003, 23.7279, 3.34366, 10.235, 1.37481, 0.288391, 0.23596, 0.333333, 1.09898, 0, 0}}},
942
942
{{'F', "gemm", {"S", "S", "S"}, {"N", "T", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, -1, -1}, {1, 1, 1}, ""}, "am8x2+m32@8 aB8x2+m8@8 aB wg 1x4x16 kr kc8 nse li pt sr sb256 bk0 kv afb l2d", {16, (LoopType) 255, 128, {(LoopType) 224, (LoopType) 255, (LoopType) 2}, {262144, 262144, 16777216}, {262144, 262144, 32}, {16, 16, 8}, {1, 4, 16}, 1, (WGType) 1, 413, 0, 4096, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.13268e+06, -103657, 246.583, 142575, 3.21126e+06, 0, 1.60336, 0.933443, 0.504957, 0.918636, 0.0712742, 0.0670609, 0.015172, 0.992002, 1.14631, 0.0718492, 1.8422e-11}}},
943
- {{'F', "gemm", {"S", "S", "S"}, {"N", "T", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, -1, -1}, {1, 1 , 1}, ""}, "am16x2+m16@16 aB8x2+m8@24 aS wg 1x8x4 kr kc8 nse li pt sr sb256 bk0 grf256 kv afb l2d", {16, (LoopType) 255, 256, {(LoopType) 224, (LoopType) 255, (LoopType) 2}, {65536, 262144, 16777216}, {65536, 262144, 32}, {4, 16, 16}, {1, 8, 4}, 1, (WGType) 1, 413, 0, 2048, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.26743e+06, -130651, -75631.7, 238106, 2.415e+06, 0, 13.4604, 0.983422, 2.77777, 5.15729, 0.127127, 0.144409, 0.0797807, 1, 1.29538, 1.06434, 1.41696e-13}}},
943
+ {{'F', "gemm", {"S", "S", "S"}, {"N", "T", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, -1, -1}, {4, 4 , 1}, ""}, "am16x2+m16@16 aB8x2+m8@24 aS wg 1x8x4 kr kc8 nse li pt sr sb256 bk0 grf256 kv afb l2d", {16, (LoopType) 255, 256, {(LoopType) 224, (LoopType) 255, (LoopType) 2}, {65536, 262144, 16777216}, {65536, 262144, 32}, {4, 16, 16}, {1, 8, 4}, 1, (WGType) 1, 413, 0, 2048, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.26743e+06, -130651, -75631.7, 238106, 2.415e+06, 0, 13.4604, 0.983422, 2.77777, 5.15729, 0.127127, 0.144409, 0.0797807, 1, 1.29538, 1.06434, 1.41696e-13}}},
944
944
{{'F', "gemm", {"S", "S", "S"}, {"N", "T", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, -1, -1}, {1, 1, 1}, ""}, "at8+m16@8 aB16x2+m16@8 aS wg 1x8x8 kr kc8 nse li pt sr sb256 bk0 kv afb l2d", {16, (LoopType) 255, 128, {(LoopType) 224, (LoopType) 255, (LoopType) 2}, {16384, 262144, 16777216}, {16384, 262144, 32}, {1, 16, 16}, {1, 8, 8}, 1, (WGType) 1, 413, 0, 512, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.10613e+06, -79156.7, -1736.09, 153649, 2.00704e+06, 0, 47.8566, 0.896699, 10.8519, 21.2763, 1.082, 0.293987, 0.241183, 0.333333, 1.10378, 0, 0}}},
945
945
{{'F', "gemm", {"S", "S", "S"}, {"N", "T", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "am8x2+B8@8 am16x2+S16@8 aB wg 4x8 kc8 nse hi pt sb256 bk0 grf256 sr l2d", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 131072, 16777216}, {8192, 8192, 16777216}, {32, 8, 16}, {4, 8, 1}, 1, (WGType) 1, 257, 0, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {869157, 563214, 0, 0, 0, 0, 2.39412, 4.22276, 0.853271, 1.30754, 0.0725087, 0.0725087, 0, 1, 1.25715, 0.942426, 4.35208e-12}}},
946
946
{{'F', "gemm", {"S", "S", "S"}, {"N", "T", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "am8+m8@32 am16x2+m16@16 aB wg 2x8 kc8 nse hi pt sr sb256 bk0 grf256 l2d", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 65536, 16777216}, {524288, 65536, 16777216}, {32, 4, 16}, {2, 8, 1}, 1, (WGType) 1, 257, 0, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.09546e+06, 254169, 0, 0, 0, 0, 1.63097, 3.84728, 1.50643, 2.37923, 0.0742032, 0.0154716, 0.0594372, 0.827878, 1.42273, 0.913754, 1.12977e-11}}},
0 commit comments