Skip to content

Commit 3dd4f43

Browse files
Simonsays095karturov
authored andcommitted
xe: jit: gemm: workaround slow OOB check
1 parent 280bd28 commit 3dd4f43

File tree

1 file changed

+10
-0
lines changed

1 file changed

+10
-0
lines changed

src/gpu/intel/jit/gemm/generator/pieces/layout_setup.cxx

+10
Original file line numberDiff line numberDiff line change
@@ -680,6 +680,16 @@ bool BLASKernelGenerator<hw>::getBlockInfo(Type T, const MatrixAddressing &atype
680680
block.byteGlue = true;
681681
block.crosspack /= T.perByte();
682682
}
683+
684+
// Xe2: manually mask in the height dimension to work around slow LSC
685+
// out-of-bounds checks.
686+
bool remainderH = memCM ? remainderC : remainderR;
687+
if (hw >= HW::Xe2 && remainderH) {
688+
auto &vymask = memCM ? block.colMask.variable : block.rowMask.variable;
689+
vymask.isFixed = false;
690+
vymask.bitRep = vymask.maskRep = vymask.rsize = 1;
691+
vymask.rshift = 0;
692+
}
683693
break;
684694
}
685695
case AccessType::CacheLine: {

0 commit comments

Comments
 (0)