Skip to content

Commit 23f153f

Browse files
committed
xe: jit: gemm: reduce grf consumption for fp4 strategies
1 parent a157fbe commit 23f153f

File tree

1 file changed

+9
-9
lines changed

1 file changed

+9
-9
lines changed

src/gpu/intel/jit/gemm/selector/db/kernel.db

+9-9
Original file line numberDiff line numberDiff line change
@@ -595,15 +595,15 @@ auto _CATALOG_ = kcatalog::toArray({
595595
{{'F', "gemm", {"D", "D", "D"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aS8x2+S16@24 aS16+S16@32 aB wg 4x2x4 kr kc8 nse hi pt sr br bk0 sm sn grf256 kv afb sb32", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {131072, 131072, 16777216}, {8192, 8192, 0}, {8, 8, 16}, {4, 2, 4}, 1, (WGType) 1, 413, 0, 4096, {8, 8, 8}, {true, true, true}}, {'E', 17, {1.2692e+06, -138155, -69946.2, 247166, 2.48218e+06, 0, 2.22535, 2.9076, -0.226323, 2.36118, 0.179437, 0.0955498, 0.0965902, 0.986833, 1.48781, 1.14074, 6.5259e-12}}},
596596
{{'F', "gemm", {"D", "D", "D"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aS16+S8@32 aS8+S1,8@32 aB wg 16x1x2 kr kc8 nse li nmk pt sr br bk0 sm sn grf256 sb32 kv afb", {16, (LoopType) 255, 256, {(LoopType) 225, (LoopType) 255, (LoopType) 2}, {131072, 262144, 16777216}, {8192, 8192, 0}, {8, 16, 16}, {16, 1, 2}, 1, (WGType) 1, 413, 0, 16384, {8, 8, 8}, {true, true, true}}, {'E', 17, {1.08982e+06, -365235, -57570.3, 563017, 2.44122e+06, 0, 1.97404, 6.45479, 0.729323, 2.0721, 0.109576, 0.0188555, 0.121551, 0.995776, 1.6362, 1.09808, 8.84348e-12}}},
597597
{{'F', "gemm", {"D", "D", "D"}, {"T", "T", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aS8+S1,8@16 aB8+B8@16 aS nse wg 8x4 bo pt kc8 sm sb256 grf256 bk0 sr", {16, (LoopType) 255, 256, {(LoopType) 192, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {8192, 8192, 16777216}, {32, 32, 8}, {8, 4, 1}, 1, (WGType) 1, 257, 0, 0, {8, 8, 8}, {true, true, true}}, {'E', 17, {863507, 843091, 0, 0, 0, 0, 2.99578, 2.42156, 4.42824, 4.74906, 0.0650645, 0.0650645, 0, 0.993213, 1.59693, 1.16972, 1.1178e-11}}},
598-
{{'F', "gemm", {"E", "E", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "#I"}, "aB16+m32@32 aB32 aB wg 2x8 af vav li nmk pt sr br ca3 bk0 sys kv dm afb l4", {16, (LoopType) 255, 128, {(LoopType) 225, (LoopType) 255, (LoopType) 255}, {1048576, 32768, 16777216}, {1048576, 32768, 32}, {32, 2, 32}, {2, 8, 1}, 1, (WGType) 1, 441, 24576, 0, {4, 2, 4}, {true, true, true}}, {'E', 17, {1.32162e+06, 161954, 0, 0, 2.32817e+06, 0, 0.71806, 4.15517, 0.786689, 1.40778, 0.0341164, 0.0131941, 0.0256486, 0.947188, 1.39057, 0.987284, 5.0128e-12}}},
599-
{{'F', "gemm", {"E", "E", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "ABI"}, "av64+m64@64 am32x2+m64@32 aB wg 4x8 xaf vav hi pt sr br sb64 bk0 sn grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {262144, 131072, 16777216}, {262144, 131072, 16777216}, {32, 32, 64}, {4, 8, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {888453, 388680, 0, 0, 0, 0, 1.36981, 1.36706, 1.57389, 2.50026, 0.0230892, 0.0230892, 0, 0.779666, 1.13216, 0.970824, 1.11098e-12}}},
600-
{{'F', "gemm", {"E", "E", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "ABI"}, "av16+m64@64 am32+m64@64 aB wg 4x8 af vav hi pt sr br sb256 bk0 sn grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 262144, 16777216}, {524288, 262144, 16777216}, {32, 16, 32}, {4, 8, 1}, 1, (WGType) 1, 257, 0, 0, {8, 8, 4}, {true, true, true}}, {'E', 17, {872440, 433979, 0, 0, 0, 0, 0.692755, 0.929392, 0.682568, 1.28977, 0.00829318, 0.00829318, 0, 0.933146, 1.44966, 1.06633, 2.17433e-12}}},
601-
{{'F', "gemm", {"E", "E", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "ABI"}, "av16+m64@64 am16 aB wg 8x4 cb4x2 ks32 xaf vav hi pt sr br bk0 sn grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {524288, 524288, 16777216}, {32, 32, 32}, {8, 4, 1}, 1, (WGType) 1, 257, 32768, 0, {8, 8, 4}, {true, true, true}}, {'E', 17, {1.00706e+06, 522382, 0, 0, 0, 0, 0.725659, 1.44632, 0.970408, 1.74134, 0.0067111, 0.0067111, 0, 0.90349, 1.42986, 1.13348, 2.91269e-12}}},
602-
{{'F', "gemm", {"E", "E", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "ABIps"}, "av32+m16@64 am32+m32@72 aB wg 8x4 xaf vav hi pt sr br sb64 bk0 sn grf256 sys kv afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 786432, 16777216}, {524288, 786432, 64}, {32, 16, 32}, {8, 4, 1}, 1, (WGType) 1, 441, 0, 0, {8, 8, 4}, {true, true, true}}, {'E', 17, {876646, 564122, 0, 0, 6.5151e+06, 7.83974e+06, 0.629669, 0.87362, 0.885543, 1.48097, 0.00440774, 0.00440774, 0, 1, 1.66234, 1.24996, 2.85794e-12}}},
603-
{{'F', "gemm", {"E", "E", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "ABI"}, "av16+m16@32 am16x2 aB wg 4x4x2 kr cb4 ks16 xaf st vav hi pt sr br bk0 sn grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {1048576, 524288, 16777216}, {1048576, 524288, 16777216}, {64, 32, 16}, {4, 4, 2}, 1, (WGType) 1, 261, 16384, 65536, {8, 8, 4}, {true, true, true}}, {'E', 17, {1.02495e+06, 13797.9, 15430.1, 758509, 0, 0, 0.535333, 1.20812, 0.912657, 1.84068, 0.00529983, 0.00529983, 0, 1, 1.60581, 1.15873, 3.51036e-12}}},
604-
{{'F', "gemm", {"E", "E", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "ABIqps"}, "av16+m32@72 am32+m32@64 aB wg 4x8 xaf vav hi pt sr br sb64 bk0 sn grf256 sys kv afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {1048576, 655360, 16777216}, {1048576, 655360, 32}, {32, 16, 32}, {4, 8, 1}, 1, (WGType) 1, 441, 0, 0, {8, 8, 4}, {true, true, true}}, {'E', 17, {903365, 697556, 0, 0, 8.2903e+06, 1.21651e+07, 0.724506, 0.722081, 0.92287, 1.55416, 0.00402055, 0.00402055, 0, 0.997691, 1.6726, 1.18622, 5.18793e-12}}},
605-
{{'F', "gemm", {"E", "E", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "ABI"}, "av16+m32@40 am32+m32@32 aB wg 2x8x2 kr xaf st vav hi pt sr br sb64 bk0 sn grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {1048576, 131072, 16777216}, {1048576, 131072, 16777216}, {64, 8, 32}, {2, 8, 2}, 1, (WGType) 1, 261, 0, 32768, {8, 8, 4}, {true, true, true}}, {'E', 17, {1.06324e+06, 220443, 364.907, 276934, 0, 0, 0.524524, 1.25881, 0.793843, 2.21167, 0.00974309, 0.00974309, 0, 0.984682, 1.55809, 1.03396, 4.08729e-12}}},
606-
{{'F', "gemm", {"F", "B", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "#I"}, "aB16x2 aB16x2 aB wg 4x8 cab4 ks64 af vav hi pt sr br bk0 sn nb 4x8 dm grf256 sys l4", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 131072, 16777216}, {524288, 131072, 16777216}, {32, 8, 64}, {4, 8, 1}, 1, (WGType) 1, 257, 98304, 0, {2, 2, 4}, {true, true, true}}, {'E', 17, {1.07581e+06, 764320, 0, 0, 0, 0, 0.804535, 1.46469, 0.96438, 2.27185, 0.0120677, 0.0120677, 0, 1, 1.38109, 0.955498, 2.48341e-12}}},
598+
{{'F', "gemm", {"E", "E", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "#I"}, "aB16+m32@32 aB16 aB wg 2x8 af li nmk pt sr br ca3 bk0 sys kv dm afb l4", {16, (LoopType) 255, 128, {(LoopType) 225, (LoopType) 255, (LoopType) 255}, {262144, 131072, 16777216}, {262144, 131072, 64}, {16, 8, 64}, {2, 8, 1}, 1, (WGType) 1, 441, 12288, 0, {1, 1, 4}, {true, true, true}}, {'E', 17, {1.32162e+06, 161954, 0, 0, 2.32817e+06, 0, 0.71806, 4.15517, 0.786689, 1.40778, 0.0341164, 0.0131941, 0.0256486, 0.947188, 1.39057, 0.987284, 5.0128e-12}}},
599+
{{'F', "gemm", {"E", "E", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "ABI"}, "av64+m64@64 am32x2+m64@32 aB wg 4x8 xaf hi pt sr br sb64 bk0 sn grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 262144, 16777216}, {524288, 262144, 16777216}, {32, 16, 64}, {4, 8, 1}, 1, (WGType) 1, 257, 0, 0, {8, 8, 4}, {true, true, true}}, {'E', 17, {888453, 388680, 0, 0, 0, 0, 1.36981, 1.36706, 1.57389, 2.50026, 0.0230892, 0.0230892, 0, 0.779666, 1.13216, 0.970824, 1.11098e-12}}},
600+
{{'F', "gemm", {"E", "E", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "ABI"}, "av16+m64@64 am32+m64@64 aB wg 4x8 af hi pt sr br sb256 bk0 sn grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 262144, 16777216}, {524288, 262144, 16777216}, {32, 16, 64}, {4, 8, 1}, 1, (WGType) 1, 257, 0, 0, {8, 8, 4}, {true, true, true}}, {'E', 17, {872440, 433979, 0, 0, 0, 0, 0.692755, 0.929392, 0.682568, 1.28977, 0.00829318, 0.00829318, 0, 0.933146, 1.44966, 1.06633, 2.17433e-12}}},
601+
{{'F', "gemm", {"E", "E", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "ABI"}, "av16+m64@64 am16 aB wg 8x4 cb4x2 ks32 xaf hi pt sr br bk0 sn grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {524288, 524288, 16777216}, {32, 32, 64}, {8, 4, 1}, 1, (WGType) 1, 257, 32768, 0, {8, 8, 4}, {true, true, true}}, {'E', 17, {1.00706e+06, 522382, 0, 0, 0, 0, 0.725659, 1.44632, 0.970408, 1.74134, 0.0067111, 0.0067111, 0, 0.90349, 1.42986, 1.13348, 2.91269e-12}}},
602+
{{'F', "gemm", {"E", "E", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "ABIps"}, "av32+m16@64 am32+m32@72 aB wg 8x4 xaf hi pt sr br sb64 bk0 sn grf256 sys kv afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 262144, 16777216}, {524288, 262144, 64}, {32, 16, 64}, {8, 4, 1}, 1, (WGType) 1, 441, 0, 0, {8, 8, 4}, {true, true, true}}, {'E', 17, {876646, 564122, 0, 0, 6.5151e+06, 7.83974e+06, 0.629669, 0.87362, 0.885543, 1.48097, 0.00440774, 0.00440774, 0, 1, 1.66234, 1.24996, 2.85794e-12}}},
603+
{{'F', "gemm", {"E", "E", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "ABI"}, "av16+m16@32 am16x2 aB wg 4x4x2 kr cb4 ks16 xaf st hi pt sr br bk0 sn grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {524288, 524288, 16777216}, {524288, 524288, 16777216}, {32, 32, 64}, {4, 4, 2}, 1, (WGType) 1, 261, 16384, 65536, {8, 8, 4}, {true, true, true}}, {'E', 17, {1.02495e+06, 13797.9, 15430.1, 758509, 0, 0, 0.535333, 1.20812, 0.912657, 1.84068, 0.00529983, 0.00529983, 0, 1, 1.60581, 1.15873, 3.51036e-12}}},
604+
{{'F', "gemm", {"E", "E", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "ABIqps"}, "av16+m32@72 am32+m32@64 aB wg 4x8 xaf hi pt sr br sb64 bk0 sn grf256 sys kv afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 393216, 16777216}, {524288, 393216, 64}, {32, 24, 64}, {4, 8, 1}, 1, (WGType) 1, 441, 0, 0, {8, 8, 4}, {true, true, true}}, {'E', 17, {903365, 697556, 0, 0, 8.2903e+06, 1.21651e+07, 0.724506, 0.722081, 0.92287, 1.55416, 0.00402055, 0.00402055, 0, 0.997691, 1.6726, 1.18622, 5.18793e-12}}},
605+
{{'F', "gemm", {"E", "E", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "ABI"}, "av16+m32@40 am32+m32@32 aB wg 2x8x2 kr xaf st hi pt sr br sb64 bk0 sn grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {524288, 131072, 16777216}, {524288, 131072, 16777216}, {32, 8, 64}, {2, 8, 2}, 1, (WGType) 1, 261, 0, 16384, {8, 8, 4}, {true, true, true}}, {'E', 17, {1.06324e+06, 220443, 364.907, 276934, 0, 0, 0.524524, 1.25881, 0.793843, 2.21167, 0.00974309, 0.00974309, 0, 0.984682, 1.55809, 1.03396, 4.08729e-12}}},
606+
{{'F', "gemm", {"F", "B", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "#I"}, "aB16x2 aB16x2 aB wg 4x8 cab4 ks64 af hi pt sr br bk0 sn nb 4x8 dm grf256 sys l4", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 131072, 16777216}, {524288, 131072, 16777216}, {32, 8, 64}, {4, 8, 1}, 1, (WGType) 1, 257, 98304, 0, {2, 2, 4}, {true, true, true}}, {'E', 17, {1.07581e+06, 764320, 0, 0, 0, 0, 0.804535, 1.46469, 0.96438, 2.27185, 0.0120677, 0.0120677, 0, 1, 1.38109, 0.955498, 2.48341e-12}}},
607607
{{'F', "gemm", {"F", "B", "S"}, {"N", "T", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "#I"}, "aB16 at16 aB wg 1x2x16 kr cab3x2 ks16 af vav hi pt sr br bk0 grf256 kv afb sys sn l4 l2d", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {524288, 262144, 16777216}, {524288, 262144, 32}, {32, 16, 16}, {1, 2, 16}, 1, (WGType) 1, 445, 6144, 6144, {2, 2, 4}, {true, true, true}}, {'E', 17, {1.44116e+06, -85531, -20925.3, 122459, 4.34995e+06, 2.28557e+06, 0.844362, 0.875124, 0.584811, 1.33561, 0.0318181, 0.0286306, 0.00945064, 0.972737, 1.204, 0.897885, 9.01622e-12}}},
608608
{{'F', "gemm", {"F", "B", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 4, 1}, "IB"}, "aB64 at32 aS wg 1x1x16 ikr af vav sr sb256 bk0 bm0 sys rr dm", {16, (LoopType) 255, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {16777216, 8192, 16777216}, {8192, 8192, 16777216}, {16, 16, 64}, {1, 1, 16}, 1, (WGType) 0, 4357, 0, 1024, {8, 4, 4}, {true, true, true}}, {'E', 17, {1.21597e+06, 165524, 28763.4, 9411.33, 0, 0, 0.278476, 0.599765, 0.920572, 5.21577, 0.021695, 0.0325353, 0.0148497, 1, 1.30279, 0.785158, 1.47381e-11}}},
609609
{{'F', "gemm", {"F", "H", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "#I"}, "aB16x2 aB16x2 aB wg 4x8 cab4 ks64 af vav hi pt sr br bk0 sn nb 4x8 dm grf256 sys l4", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 131072, 16777216}, {524288, 131072, 16777216}, {32, 8, 64}, {4, 8, 1}, 1, (WGType) 1, 257, 98304, 0, {2, 2, 4}, {true, true, true}}, {'E', 17, {1.08073e+06, 763490, 0, 0, 0, 0, 0.80246, 1.46921, 0.963229, 2.26732, 0.0122976, 0.0122976, 0, 0.986616, 1.49449, 0.966336, 3.40072e-12}}},

0 commit comments

Comments
 (0)