diff --git a/src/gpu/intel/jit/gemm/selector/db/kernel.db b/src/gpu/intel/jit/gemm/selector/db/kernel.db index 38ec5270b60..a9f48d77f4c 100644 --- a/src/gpu/intel/jit/gemm/selector/db/kernel.db +++ b/src/gpu/intel/jit/gemm/selector/db/kernel.db @@ -178,10 +178,12 @@ auto _CATALOG_ = kcatalog::toArray({ {{'E', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "Ixyz"}, "sS32x2 sB16 sB wg 16x2 cb4 ks32 xaf dw vav bo sr bk0 sn dm grf256 sys pab", {8, (LoopType) 0, 256, {(LoopType) 128, (LoopType) 255, (LoopType) 255}, {262144, 1048576, 16777216}, {262144, 1048576, 16777216}, {16, 64, 32}, {16, 2, 1}, 1, (WGType) 1, 257, 32768, 0, {4, 4, 4}, {false, false, false}}, {'E', 17, {982213, 473301, 0, 0, 0, 0, 1.74644, 5.1767, 6.10829, 17.1708, 0.0167439, 0.0136956, 0.00599404, 0.999577, 1.37511, 1.22059, 7.53689e-13}}}, {{'E', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "Isxyz"}, "sS64 sB16x2 sB wg 16x2 cb4 ks64 xaf fx dw vav bo sr bk0 sn dm grf256 sys pab", {8, (LoopType) 0, 256, {(LoopType) 128, (LoopType) 255, (LoopType) 255}, {262144, 786432, 16777216}, {262144, 786432, 16777216}, {16, 48, 64}, {16, 2, 1}, 1, (WGType) 1, 257, 49152, 0, {4, 4, 4}, {false, false, false}}, {'E', 17, {981842, 458926, 0, 0, 0, 0, 1.5015, 5.00498, 6.31005, 16.9024, 0.0169639, 0.0400974, 0, 0.719651, 1.35848, 1.1845, 9.89658e-13}}}, {{'E', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, 33, -1}, {-1, -1, -1}, {4, 4, 1}, "xyIs"}, "sB16 sB32 aB wg 4x8 cab3x2 ks32 xaf st dw vav bo sr bk0 dm grf256 sys pab", {8, (LoopType) 0, 256, {(LoopType) 128, (LoopType) 255, (LoopType) 255}, {524288, 393216, 16777216}, {524288, 393216, 16777216}, {32, 24, 32}, {4, 8, 1}, 1, (WGType) 1, 257, 61440, 0, {4, 4, 4}, {false, false, true}}, {'E', 17, {1.01302e+06, 570829, 0, 0, 0, 0, 3.67307, 6.66635, 6.86396, 18.2302, 0.0202076, 0.0155595, 0.00597746, 1, 1.56109, 1.12816, 4.46535e-12}}}, -{{'E', "gemm", {"F", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, 8, -1}, {-1, -1, -1}, {-1, 8, -1}, {4, 4, 1}, "Iqxy"}, "sB64 sS16 aS wg 2x1x8 ikr af acb sr bk0 bm0 sys pab grf256 rc0", {8, (LoopType) 0, 256, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {16777216, 8192, 16777216}, {8192, 8192, 16777216}, {16, 8, 64}, {2, 1, 8}, 1, (WGType) 0, 4357, 0, 1024, {4, 4, 4}, {false, false, true}}, {'E', 17, {3.5449e+06, 60571.4, -243099, 15595.1, 0, 0, 1.78243, 2.8889, 2.76679, 6.10171, 0.051381, 0.0216118, 0.0510683, 1, 1.21576, 1.21633, -9.23968e-14}}}, -{{'E', "gemm", {"F", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, 8, -1}, {-1, -1, -1}, {-1, 8, -1}, {4, 4, 1}, "IQxy"}, "sS64 sB32 aB wg 2x1x8 ikr ki64 sys af k64 grf256 acb di sr nch fm pab rc0", {8, (LoopType) 0, 256, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {8192, 8192, 16777216}, {8192, 8192, 16777216}, {16, 8, 64}, {2, 1, 8}, 1, (WGType) 0, 4357, 0, 1024, {4, 4, 4}, {false, false, true}}, {'W', 1, {128}}}, +{{'E', "gemm", {"F", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 2, -1}, {-1, 8, -1}, {-1, 2, -1}, {-1, 8, -1}, {4, 4, 1}, "Iqxy"}, "sB64 sS16 aS wg 2x1x8 ikr af acb sr bk0 bm0 sys pab grf256 rc0", {8, (LoopType) 0, 256, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {16777216, 8192, 16777216}, {8192, 8192, 16777216}, {16, 8, 64}, {2, 1, 8}, 1, (WGType) 0, 4357, 0, 1024, {4, 4, 4}, {false, false, true}}, {'E', 17, {3.5449e+06, 60571.4, -243099, 15595.1, 0, 0, 1.78243, 2.8889, 2.76679, 6.10171, 0.051381, 0.0216118, 0.0510683, 1, 1.21576, 1.21633, -9.23968e-14}}}, +{{'E', "gemm", {"F", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 2, -1}, {-1, 8, -1}, {-1, 2, -1}, {-1, 8, -1}, {4, 4, 1}, "IQxy"}, "sS64 sB32 aB wg 2x1x8 ikr ki64 sys af k64 grf256 acb di sr nch fm pab rc0", {8, (LoopType) 0, 256, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {8192, 8192, 16777216}, {8192, 8192, 16777216}, {16, 8, 64}, {2, 1, 8}, 1, (WGType) 0, 4357, 0, 1024, {4, 4, 4}, {false, false, true}}, {'W', 1, {128}}}, {{'E', "gemm", {"F", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, 16, 1}, "Ixyz"}, "sB64 sB32x2 sB wg 4x8 ca4x2 ks64 af dw nse hi sr sm dm grf256 cr0 sys pab bk0", {8, (LoopType) 0, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {524288, 262144, 16777216}, {524288, 262144, 16777216}, {32, 16, 64}, {4, 8, 1}, 1, (WGType) 1, 257, 16384, 0, {16, 16, 4}, {false, false, false}}, {'E', 17, {930230, 383972, 0, 0, 0, 0, 1.36662, 2.39816, 6.07666, 16.7056, 0.00930946, 0.00736716, 0.0110739, 1, 1.22963, 1.21426, 6.39235e-14}}}, {{'E', "gemm", {"F", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "Ixyz"}, "sB64 sB32x2 sB wg 4x8 ca4x2 ks64 af dw nse hi sr sm dm grf256 cr0 sys pab bk0", {8, (LoopType) 0, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {524288, 262144, 16777216}, {524288, 262144, 16777216}, {32, 16, 64}, {4, 8, 1}, 1, (WGType) 1, 257, 16384, 0, {4, 4, 4}, {false, false, false}}, {'E', 17, {928487, 383872, 0, 0, 0, 0, 1.36666, 2.39635, 6.08292, 16.706, 0.00967946, 0.0275317, 0.0124194, 0.709751, 1.30644, 1.2369, 3.12589e-14}}}, +{{'E', "gemm", {"F", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 1, 8192}, {-1, 1, -1}, {-1, 1, 8192}, {-1, 1, -1}, {16, 16, 1}, "I"}, "aS64 aB128 aB wg 2x1x32 ikr af nse bo sr sb128 dm sys bk0", {8, (LoopType) 0, 128, {(LoopType) 128, (LoopType) 255, (LoopType) 2}, {524288, 16384, 16777216}, {524288, 16384, 16777216}, {32, 1, 128}, {2, 1, 32}, 1, (WGType) 0, 4357, 0, 256, {16, 16, 4}, {true, true, true}}, {'W', 1, {32}}}, +{{'E', "gemm", {"F", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 1, -1}, {-1, 1, 8191}, {-1, 1, -1}, {-1, 1, 8191}, {16, 16, 1}, "I"}, "aS64 aB128 aB wg 2x1x32 kr af nse bo sr sb128 dm sys bk0", {8, (LoopType) 0, 128, {(LoopType) 128, (LoopType) 255, (LoopType) 2}, {524288, 16384, 16777216}, {524288, 16384, 16777216}, {32, 1, 128}, {2, 1, 32}, 1, (WGType) 0, 261, 0, 256, {16, 16, 4}, {true, true, true}}, {'W', 1, {32}}}, {{'E', "gemm", {"F", "S", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "ixy"}, "sB2x2 sB2x2 aB wg 4x8 kc2 cab4 ks8 nse bo sr bk0 sn l4 pab", {8, (LoopType) 0, 128, {(LoopType) 128, (LoopType) 255, (LoopType) 255}, {524288, 262144, 16777216}, {524288, 262144, 16777216}, {32, 16, 8}, {4, 8, 1}, 1, (WGType) 1, 257, 32768, 0, {1, 2, 4}, {false, false, true}}, {'W', 1, {512}}}, {{'E', "gemm", {"F", "S", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "ixy"}, "sB4 sB4 aB wg 4x8 kc4 cab4 ks8 nse bo sr bk0 sm sn l4 pab", {8, (LoopType) 0, 128, {(LoopType) 128, (LoopType) 255, (LoopType) 255}, {524288, 262144, 16777216}, {524288, 262144, 16777216}, {32, 16, 8}, {4, 8, 1}, 1, (WGType) 1, 257, 32768, 0, {1, 2, 4}, {false, false, true}}, {'W', 1, {512}}}, {{'E', "gemm", {"H", "F", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "Ixy"}, "sB32 sB16 aB wg 8x4 cab4 ks32 af dw vav bo bk0 sm sn grf256 sys pab l4 sr", {8, (LoopType) 0, 256, {(LoopType) 128, (LoopType) 255, (LoopType) 255}, {262144, 262144, 16777216}, {8192, 8192, 16777216}, {16, 16, 32}, {8, 4, 1}, 1, (WGType) 1, 257, 49152, 0, {2, 2, 4}, {false, false, true}}, {'E', 17, {1.03504e+06, 483759, 0, 0, 0, 0, 5.14655, 5.4381, 5.95309, 14.8916, 0.0340942, 0.0152122, 0.0258839, 1, 1.27788, 1.19062, 3.54023e-13}}}, @@ -405,8 +407,10 @@ auto _CATALOG_ = kcatalog::toArray({ {{'E', "gemm", {"[FO]", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 4, 1}, "Ipsxy"}, "sB32 sB32 aB wg 8x8 cab4 ks32 af dw vav hi sr bk0 sm dm grf256 sys acb pab", {8, (LoopType) 0, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {524288, 393216, 16777216}, {524288, 393216, 16777216}, {32, 24, 128}, {8, 8, 1}, 1, (WGType) 1, 257, 57344, 0, {4, 4, 4}, {false, false, true}}, {'E', 17, {1.72221e+06, 2.4081e+06, 0, 0, 0, 0, 1.90767, 2.47059, 4.96037, 15.1649, 0.0137524, 0.0137524, 0, 1, 1.25118, 1.20908, -4.599e-14}}}, {{'E', "gemm", {"[FO]", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, 1024, -1}, {4, 4, 1}, "Ipsxy"}, "sB32 sB32 aB wg 8x8 cab4 ks32 af dw vav hi sr bk0 sm dm grf256 sys acb cr0 pab rc0", {8, (LoopType) 0, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {524288, 393216, 16777216}, {524288, 393216, 16777216}, {32, 16, 128}, {8, 8, 1}, 1, (WGType) 1, 257, 57344, 0, {4, 4, 4}, {false, false, true}}, {'E', 17, {1.72221e+06, 2.4081e+06, 0, 0, 0, 0, 1.90767, 2.47059, 4.96037, 15.1649, 0.0137524, 0.0137524, 0, 1, 1.25118, 1.20908, -4.599e-14}}}, {{'E', "gemm", {"[FO]", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, 1024, -1}, {4, 4, 1}, "Ipsxy"}, "sB32 sB32 aB wg 8x4 cab4 ks32 af dw vav hi sr bk0 sm dm grf256 sys acb cr0 pab rc0", {8, (LoopType) 0, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {524288, 393216, 16777216}, {524288, 393216, 16777216}, {32, 8, 128}, {8, 8, 1}, 1, (WGType) 1, 257, 57344, 0, {4, 4, 4}, {false, false, true}}, {'E', 17, {120968, 1.77814e+06, 0, 0, 0, 0, 0.93955, 4.60294, 4.76347, 13.3704, 0.0275881, 0.0275881, 0, 0.900385, 1.23903, 1.20423, -9.89412e-15}}}, -{{'E', "gemm", {"[FO]", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, 8, -1}, {-1, -1, -1}, {-1, 8, -1}, {4, 4, 1}, "Inqxy"}, "sB64 sS16 aS wg 2x1x8 ikr af acb sr bk0 bm0 sys pab grf256 rc0", {8, (LoopType) 0, 256, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {16777216, 8192, 16777216}, {8192, 8192, 16777216}, {16, 8, 64}, {2, 1, 8}, 1, (WGType) 0, 4357, 0, 1024, {4, 4, 4}, {false, false, true}}, {'E', 17, {2.83863e+06, 74855, -154928, 13616, 0, 0, 1.71054, 2.96521, 2.65327, 6.42273, 0.0516787, 0.0149917, 0.0568116, 1, 1.22262, 1.22456, -1.25262e-13}}}, -{{'E', "gemm", {"[FO]", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, 8, -1}, {-1, -1, -1}, {-1, 8, -1}, {4, 4, 1}, "IQxy"}, "sS64 sB32 aB wg 2x1x8 ikr ki64 sys af k64 grf256 acb di sr nch fm pab", {8, (LoopType) 0, 256, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {8192, 8192, 2048}, {8192, 8192, 2048}, {16, 8, 64}, {2, 1, 8}, 1, (WGType) 0, 4357, 0, 1024, {4, 4, 4}, {false, false, true}}, {'W', 1, {128}}}, +{{'E', "gemm", {"[FO]", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 2, -1}, {-1, 8, -1}, {-1, 2, -1}, {-1, 8, -1}, {4, 4, 1}, "Inqxy"}, "sB64 sS16 aS wg 2x1x8 ikr af acb sr bk0 bm0 sys pab grf256 rc0", {8, (LoopType) 0, 256, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {16777216, 8192, 16777216}, {8192, 8192, 16777216}, {16, 8, 64}, {2, 1, 8}, 1, (WGType) 0, 4357, 0, 1024, {4, 4, 4}, {false, false, true}}, {'E', 17, {2.83863e+06, 74855, -154928, 13616, 0, 0, 1.71054, 2.96521, 2.65327, 6.42273, 0.0516787, 0.0149917, 0.0568116, 1, 1.22262, 1.22456, -1.25262e-13}}}, +{{'E', "gemm", {"[FO]", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 2, -1}, {-1, 8, -1}, {-1, 2, -1}, {-1, 8, -1}, {4, 4, 1}, "IQxy"}, "sS64 sB32 aB wg 2x1x8 ikr ki64 sys af k64 grf256 acb di sr nch fm pab", {8, (LoopType) 0, 256, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {8192, 8192, 2048}, {8192, 8192, 2048}, {16, 8, 64}, {2, 1, 8}, 1, (WGType) 0, 4357, 0, 1024, {4, 4, 4}, {false, false, true}}, {'W', 1, {128}}}, +{{'E', "gemm", {"[FO]", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 1, -1}, {-1, 1, 8191}, {-1, 1, -1}, {-1, 1, 8191}, {16, 16, 1}, "I"}, "aS64 aB128 aB wg 2x1x32 kr af nse bo sr sb128 dm sys bk0", {8, (LoopType) 0, 128, {(LoopType) 128, (LoopType) 255, (LoopType) 2}, {262144, 16384, 16777216}, {262144, 16384, 16777216}, {16, 1, 128}, {2, 1, 32}, 1, (WGType) 0, 261, 0, 128, {16, 16, 4}, {true, true, true}}, {'W', 1, {16}}}, +{{'E', "gemm", {"[FO]", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 1, 8192}, {-1, 1, -1}, {-1, 1, 8192}, {-1, 1, -1}, {16, 16, 1}, "I"}, "aS64 aB128 aB wg 2x1x32 ikr af nse bo sr sb128 dm sys bk0", {8, (LoopType) 0, 128, {(LoopType) 128, (LoopType) 255, (LoopType) 2}, {262144, 16384, 16777216}, {262144, 16384, 16777216}, {16, 1, 128}, {2, 1, 32}, 1, (WGType) 0, 4357, 0, 128, {16, 16, 4}, {true, true, true}}, {'W', 1, {16}}}, {{'E', "gemm", {"[SB]", "[SB]", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "xyI"}, "sB16 sB16 aB wg 16x4 cab4 ks16 af dw vav hi bk0 sn grf256 sys pab sr", {8, (LoopType) 0, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {262144, 1048576, 16777216}, {8192, 8192, 16777216}, {16, 64, 16}, {16, 4, 1}, 1, (WGType) 1, 257, 65536, 0, {4, 4, 4}, {false, false, true}}, {'E', 17, {555289, 1.12387e+06, 0, 0, 0, 0, 11.0812, 12.062, 6.31783, 16.9321, 0.033502, 0.033502, 0, 0.924238, 1.20788, 1.20316, -1.03588e-14}}}, {{'E', "gemm", {"[SB]", "[SB]", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "xypIn"}, "sB32 sB32 aB wg 8x8 cab4 ks32 af dw vav hi bk0 sn sys pab sr", {8, (LoopType) 0, 128, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {262144, 262144, 16777216}, {8192, 8192, 16777216}, {16, 16, 32}, {8, 8, 1}, 1, (WGType) 1, 257, 65536, 0, {4, 4, 4}, {false, false, true}}, {'E', 17, {1.01712e+06, 501677, 0, 0, 0, 0, 11.2352, 11.0061, 6.00586, 15.9727, 0.0368901, 0.0329381, 0.0158743, 0.872583, 1.26733, 1.18539, 7.97223e-13}}}, {{'E', "gemm", {"[SB]", "[SB]", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "xyI"}, "sB16x2 sB16x2 aB wg 8x8 cab4 ks32 af dw vav hi bk0 sn grf256 sys pab sr", {8, (LoopType) 0, 256, {(LoopType) 144, (LoopType) 255, (LoopType) 255}, {262144, 131072, 16777216}, {8192, 8192, 16777216}, {16, 8, 32}, {8, 8, 1}, 1, (WGType) 1, 257, 49152, 0, {4, 4, 4}, {false, false, true}}, {'E', 17, {538723, 893443, 0, 0, 0, 0, 10.7801, 9.8182, 5.03684, 13.5344, 0.0761836, 0.0761836, 0, 0.79624, 1.20514, 1.20124, -4.36232e-15}}}, @@ -1069,9 +1073,10 @@ auto _CATALOG_ = kcatalog::toArray({ {{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 25, -1}, {-1, 32, -1}, {-1, 25, -1}, {-1, 32, -1}, {16, 16, 1}, "IAB"}, "at32+m128@80 am32+m128@80 aB wg 8x1x4 ikr wx2 xaf vav hi pt sr br sb128 bk0 sm sn bm0 nmk sys", {16, (LoopType) 255, 128, {(LoopType) 209, (LoopType) 255, (LoopType) 2}, {16777216, 262144, 16777216}, {262144, 262144, 16777216}, {16, 16, 128}, {8, 1, 4}, 2, (WGType) 1, 4357, 0, 8192, {16, 16, 4}, {true, true, true}}, {'W', 1, {256}}}, {{'G', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 33, -1}, {-1, 48, -1}, {-1, 33, -1}, {-1, 48, -1}, {16, 16, 1}, "ABI"}, "at64+m64@48 am32+m16@48 aB wg 4x1 xaf rr vav hi pt sr br sb64 bk0 sm grf256 sys np", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {524288, 524288, 16777216}, {32, 32, 64}, {4, 1, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'W', 1, {1024}}}, {{'G', "gemm", {"F", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, 16, 1}, "ABI"}, "at32+m128@96 am32x2+m64@96 aB wg 2x16 vav hi pt sr br sb128 bk0 grf256 sys acb cr16", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {2097152, 262144, 16777216}, {2097152, 262144, 16777216}, {128, 16, 32}, {2, 16, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {879529, 62860.9, 0, 0, 0, 0, 1.12572, 1.9182, 3.81465, 7.84556, 0.00532516, 0.00532516, 0, 1, 1.01261, 1.00705, -3.00232e-14}}}, -{{'G', "gemm", {"F", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 1, -1}, {-1, 1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, 16, 1}, "ABI"}, "at128 am128 ab wg 2x1x16 sys ikr sr br", {16, (LoopType) 255, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {8192, 8192, 16777216}, {8192, 8192, 16777216}, {32, 1, 128}, {2, 1, 16}, 1, (WGType) 0, 4357, 0, 256, {16, 16, 4}, {true, true, true}}, {'E', 17, {533005, 706.931, 0, 0, 0, 0, 1.03522, 1.49979, 2.9056, 6.09078, 0.0666521, -0.0162066, 0.0674277, 0.261398, 1.07943, 0, 0}}}, {{'G', "gemm", {"F", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, 16, 1}, "ABI"}, "at128+m128@96 am32+m64@96 aB wg 2x1x16 sys ikr k128 sr br li pt nmk sb128 np bk0", {16, (LoopType) 255, 128, {(LoopType) 225, (LoopType) 255, (LoopType) 2}, {524288, 16384, 16777216}, {524288, 16384, 16777216}, {32, 1, 128}, {2, 1, 16}, 1, (WGType) 1, 4357, 0, 256, {16, 16, 4}, {true, true, true}}, {'E', 17, {764990, 1268.09, 0, 0, 0, 0, 1.01617, 2.03052, 3.46533, 8.79028, 0.0171141, 0.00829374, 0.00596437, 0.150105, 1.78232, -3.38813, 6.20049e-11}}}, {{'G', "gemm", {"F", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, 16, 1}, "ABI"}, "at64+m128@32 am128+m64@32 aB wg 2x8 xaf st hi pt sr br sb128 sn grf256 cr0 sys", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {1048576, 262144, 16777216}, {1048576, 262144, 16777216}, {64, 16, 128}, {2, 8, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {525002, 18498.2, 0, 0, 0, 0, 0.485217, 0.854072, 1.96694, 5.31108, 0.00356541, 0.00161949, 0.00452935, 0.938224, 1.01441, 1.01414, -4.57758e-14}}}, +{{'G', "gemm", {"F", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 1, -1}, {-1, 1, 8191}, {-1, 1, -1}, {-1, 1, 8191}, {16, 16, 1}, "ABI"}, "at128+m64@48 am128+m32@48 aB wg 2x1x4 ikr af hi pt sr br sb128 grf256 sys bk0 acb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {524288, 16384, 16777216}, {524288, 16384, 16777216}, {32, 1, 128}, {2, 1, 4}, 1, (WGType) 1, 4357, 0, 256, {16, 16, 4}, {true, true, true}}, {'W', 1, {32}}}, +{{'G', "gemm", {"F", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 1, 8192}, {-1, 1, -1}, {-1, 1, 8192}, {-1, 1, -1}, {16, 16, 1}, "ABI"}, "at64+m128@64 am128+m128@64 aB wg 1x1x8 ikr af rr hi pt sr br sb128 sn grf256 sys bk0 acb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {524288, 16384, 16777216}, {524288, 16384, 16777216}, {32, 1, 128}, {1, 1, 8}, 1, (WGType) 1, 4357, 0, 128, {16, 16, 4}, {true, true, true}}, {'W', 1, {32}}}, {{'G', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, 8, -1}, {16, 16, 1}, "IAB"}, "at32x2+m64@16 am64x2 aB wg 4x1x4 ikr af vav sr br sb64 bm0 bk0 sys nmk np", {16, (LoopType) 255, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {8192, 8192, 16777216}, {8192, 8192, 16777216}, {16, 8, 64}, {4, 1, 4}, 1, (WGType) 1, 261, 0, 2048, {16, 16, 4}, {true, true, true}}, {'W', 1, {128}}}, {{'G', "gemm", {"O", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, 9, -1}, {-1, 64, -1}, {16, 16, 1}, "IAB"}, "at32x2+m64@16 am64x2 aB wg 4x1x4 ikr af vav sr br sb64 bm0 bk0 sys nmk grf256 np", {16, (LoopType) 255, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {8192, 8192, 16777216}, {8192, 8192, 16777216}, {16, 16, 64}, {4, 1, 4}, 1, (WGType) 1, 261, 0, 2048, {16, 16, 4}, {true, true, true}}, {'W', 1, {128}}}, {{'G', "gemm", {"[FO]", "O", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, 16, 1}, "ABIs"}, "am32+m32@112 am32x2+m32@112 aB wg 4x8 ca4 ks32 af st rr vav hi pt sr br bk0 sn nb 4x0 grf256 sys acb cr16", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {1048576, 458752, 16777216}, {1048576, 458752, 16777216}, {64, 28, 32}, {4, 8, 1}, 1, (WGType) 1, 257, 32768, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {855191, 60448.1, 0, 0, 0, 0, 0.945598, 1.89384, 4.01425, 7.25708, 0.00522328, 0.00522328, 0, 1, 1.04077, 1.00307, 1.26521e-13}}}, @@ -1079,6 +1084,8 @@ auto _CATALOG_ = kcatalog::toArray({ {{'G', "gemm", {"[FO]", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, 511, -1}, {16, 16, 1}, "ABI"}, "at32+m32@32 am32+m64@32 aB wg 4x4 ca3 ks32 xaf st vav hi pt sr br bk0 sm sn nb 4x0 grf256 sys acb cr0 ", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {1048576, 524288, 16777216}, {1048576, 524288, 16777216}, {32, 32, 128}, {4, 8, 1}, 1, (WGType) 1, 257, 24576, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {758684, 49729.1, 0, 0, 0, 0, 0.934748, 1.93997, 3.75236, 8.71198, 0.00639545, 0.00639545, 0, 1, 1.01136, 1.00459, 2.53456e-14}}}, {{'G', "gemm", {"[FO]", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, 8, -1}, {-1, -1, -1}, {-1, 8, -1}, {16, 16, 1}, "ABIpq"}, "at32 am128 aB wg 2x1x8 ikr xaf st acb hi pt sr br sb128 bk0 bm0 nmk sys", {16, (LoopType) 255, 128, {(LoopType) 209, (LoopType) 255, (LoopType) 2}, {16777216, 131072, 16777216}, {262144, 131072, 16777216}, {16, 8, 128}, {2, 1, 8}, 1, (WGType) 0, 4357, 0, 1024, {16, 16, 4}, {true, true, true}}, {'E', 17, {688345, 14101.4, 16156.7, 499.636, 0, 0, 0.759369, 1.91298, 1.29685, 8.23773, 0.0203254, 0.0334888, 0.021689, 1, 1.90691, 0.897029, 6.06249e-12}}}, {{'G', "gemm", {"[FO]", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 9, -1}, {-1, 32, -1}, {-1, 9, -1}, {-1, 32, -1}, {16, 16, 1}, "ABIpq"}, "at32+m128@80 am32+m128@80 aB wg 8x1x4 ikr wx2 xaf acb hi pt sr br sb128 bk0 sm sn bm0 nmk sys", {16, (LoopType) 255, 128, {(LoopType) 209, (LoopType) 255, (LoopType) 2}, {16777216, 262144, 16777216}, {262144, 262144, 16777216}, {16, 16, 128}, {8, 1, 4}, 2, (WGType) 1, 4357, 0, 8192, {16, 16, 4}, {true, true, true}}, {'E', 17, {735000, 35051.3, 20105.7, 10419.3, 0, 0, 1.02806, 2.00271, 0.717545, 5.45873, 0.0120863, 0.0120863, 0, 1, 1.00881, 1.00341, 2.10507e-13}}}, -{{'G', "gemm", {"[FO]", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, 8, -1}, {-1, -1, -1}, {-1, 8, -1}, {16, 16, 1}, "IAB"}, "at32 am128 aB wg 2x1x8 ikr xaf st acb li sr br sb128 bk0 bm0 nmk sys", {16, (LoopType) 255, 128, {(LoopType) 161, (LoopType) 255, (LoopType) 2}, {16777216, 131072, 16777216}, {262144, 131072, 16777216}, {16, 8, 128}, {2, 1, 8}, 1, (WGType) 0, 4357, 0, 1024, {16, 16, 4}, {true, true, true}}, {'E', 17, {683920, 12729, 10055.6, 676.527, 0, 0, 1.01772, 1.93175, -0.0239557, 5.55272, 0.0286059, 0.0303289, 0.0227933, 0.973783, 1.00914, 1.00487, 3.30544e-14}}}, -{{'G', "gemm", {"[FO]", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 25, -1}, {-1, 32, -1}, {-1, 25, -1}, {-1, 32, -1}, {16, 16, 1}, "IAB"}, "at32+m128@80 am32+m128@80 aB wg 8x1x4 ikr wx2 xaf acb hi pt sr br sb128 bk0 sm sn bm0 nmk sys", {16, (LoopType) 255, 128, {(LoopType) 209, (LoopType) 255, (LoopType) 2}, {16777216, 262144, 16777216}, {262144, 262144, 16777216}, {16, 16, 128}, {8, 1, 4}, 2, (WGType) 1, 4357, 0, 8192, {16, 16, 4}, {true, true, true}}, {'E', 17, {739630, 35298.1, 22639.2, 10401.8, 0, 0, 1.02641, 1.90486, 0.696072, 5.49548, 0.0121039, 0.0121039, 0, 1, 1.00848, 1.00061, 2.96932e-13}}} +{{'G', "gemm", {"[FO]", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 2, -1}, {-1, 8, -1}, {-1, 2, -1}, {-1, 8, -1}, {16, 16, 1}, "IAB"}, "at32 am128 aB wg 2x1x8 ikr xaf st acb li sr br sb128 bk0 bm0 nmk sys", {16, (LoopType) 255, 128, {(LoopType) 161, (LoopType) 255, (LoopType) 2}, {16777216, 131072, 16777216}, {262144, 131072, 16777216}, {16, 8, 128}, {2, 1, 8}, 1, (WGType) 0, 4357, 0, 1024, {16, 16, 4}, {true, true, true}}, {'E', 17, {683920, 12729, 10055.6, 676.527, 0, 0, 1.01772, 1.93175, -0.0239557, 5.55272, 0.0286059, 0.0303289, 0.0227933, 0.973783, 1.00914, 1.00487, 3.30544e-14}}}, +{{'G', "gemm", {"[FO]", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 25, -1}, {-1, 32, -1}, {-1, 25, -1}, {-1, 32, -1}, {16, 16, 1}, "IAB"}, "at32+m128@80 am32+m128@80 aB wg 8x1x4 ikr wx2 xaf acb hi pt sr br sb128 bk0 sm sn bm0 nmk sys", {16, (LoopType) 255, 128, {(LoopType) 209, (LoopType) 255, (LoopType) 2}, {16777216, 262144, 16777216}, {262144, 262144, 16777216}, {16, 16, 128}, {8, 1, 4}, 2, (WGType) 1, 4357, 0, 8192, {16, 16, 4}, {true, true, true}}, {'E', 17, {739630, 35298.1, 22639.2, 10401.8, 0, 0, 1.02641, 1.90486, 0.696072, 5.49548, 0.0121039, 0.0121039, 0, 1, 1.00848, 1.00061, 2.96932e-13}}}, +{{'G', "gemm", {"[FO]", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 1, 8192}, {-1, 1, -1}, {-1, 1, 8192}, {-1, 1, -1}, {16, 16, 1}, "ABI"}, "at64+m128@64 am128+m128@64 aB wg 1x1x8 ikr af rr hi pt sr br sb128 sn grf256 sys bk0 acb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {524288, 16384, 16777216}, {524288, 16384, 16777216}, {32, 1, 128}, {1, 1, 8}, 1, (WGType) 1, 4357, 0, 128, {16, 16, 4}, {true, true, true}}, {'W', 1, {32}}}, +{{'G', "gemm", {"[FO]", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 1, -1}, {-1, 1, 8191}, {-1, 1, -1}, {-1, 1, 8191}, {16, 16, 1}, "ABI"}, "at128+m64@48 am128+m32@48 aB wg 2x1x4 ikr af hi pt sr br sb128 grf256 sys bk0 acb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {524288, 16384, 16777216}, {524288, 16384, 16777216}, {32, 1, 128}, {2, 1, 4}, 1, (WGType) 1, 4357, 0, 256, {16, 16, 4}, {true, true, true}}, {'W', 1, {32}}} });