Skip to content

Commit 4c8fb2c

Browse files
Simonsays095karturov
authored andcommitted
xe: gemm: skip locking flag register on no-load blocks
1 parent 3dd4f43 commit 4c8fb2c

File tree

4 files changed

+12
-3
lines changed

4 files changed

+12
-3
lines changed

src/gpu/intel/jit/gemm/generator/pieces/allocators.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,13 @@ FlagRegister VirtualFlagAllocator::assignPhysical(VirtualFlag vflag)
9999
return pflag.toPhysical();
100100
}
101101

102+
bool VirtualFlagAllocator::lock(VirtualFlag vflag, bool allowAlreadyLocked) {
103+
bool wasLocked = isLocked(vflag);
104+
if (wasLocked && !allowAlreadyLocked) stub("Illegally locking an already-locked flag register");
105+
locked |= mask(vflag);
106+
return wasLocked;
107+
}
108+
102109
bool VirtualFlagAllocator::canLock(int n) const
103110
{
104111
uint8_t unlocked = ~locked & ((1 << nflag) - 1);

src/gpu/intel/jit/gemm/generator/pieces/allocators.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ class VirtualFlagAllocator {
7878

7979
bool isVirtual(VirtualFlag vflag) { return (vflag.idx >= nflag); }
8080

81-
bool lock(VirtualFlag vflag) { bool wasLocked = isLocked(vflag); locked |= mask(vflag); return wasLocked; }
81+
bool lock(VirtualFlag vflag, bool allowAlreadyLocked = false);
8282
void unlock(VirtualFlag vflag) { locked &= ~mask(vflag); }
8383
bool isLocked(VirtualFlag vflag) const { return !(~locked & mask(vflag)); }
8484
bool canLock(int n = 1) const;

src/gpu/intel/jit/gemm/generator/pieces/copy.cxx

+1-1
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ void BLASKernelGenerator<hw>::copyExecute(CopyPlan &&plan, CommonState &state)
172172
if (!state.vflagsEnabled())
173173
for (int i = 0; i < nflag; i++)
174174
if (!raVFlag0.isFree(VirtualFlag{i}))
175-
raVFlag0.lock(VirtualFlag{i});
175+
raVFlag0.lock(VirtualFlag{i}, true);
176176
auto raVFlag = raVFlag0;
177177

178178
// If we have enough free flags, use those.

src/gpu/intel/jit/gemm/generator/pieces/matrix_access.cxx

+3-1
Original file line numberDiff line numberDiff line change
@@ -621,6 +621,8 @@ void BLASKernelGenerator<hw>::prepareSeriesRegisterBlockMasking(const vector<Reg
621621
for (int startPreload = start; startPreload < nblocks; startPreload++) {
622622
auto &block = layout[startPreload];
623623

624+
if (!block.isLoadBlock()) continue;
625+
624626
bool plFlag[2];
625627
for (int i = 0; i <= 1; i++)
626628
plFlag[i] = block.flag[i] && (block.flag[i] != state.blockEMask);
@@ -630,7 +632,7 @@ void BLASKernelGenerator<hw>::prepareSeriesRegisterBlockMasking(const vector<Reg
630632

631633
auto &flag = block.flag[plFlag[0] ? 0 : 1];
632634
if (!state.raVFlag.canLock(flag.n)) break;
633-
state.raVFlag.lock(getPhysicalFlag(flag, state));
635+
state.raVFlag.lock(getPhysicalFlag(flag, state), true);
634636
}
635637
}
636638
}

0 commit comments

Comments
 (0)