Skip to content

Commit 4d17765

Browse files
[GPU] Add only necessary LOOP unroll params for jit constants
1 parent f0ebba0 commit 4d17765

5 files changed

+19
-14
lines changed

src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_base.cpp

+13-7
Original file line numberDiff line numberDiff line change
@@ -83,14 +83,20 @@ JitConstants ConvolutionKernelBase::GetJitConstants(const convolution_params& pa
8383
}
8484
}
8585

86+
return mem_consts;
87+
}
88+
89+
JitConstants ConvolutionKernelBase::GetJitConstantsWithLoopUnroll(const convolution_params& params, const DispatchData& dispatchData) const {
90+
JitConstants mem_consts = ConvolutionKernelBase::GetJitConstants(params, dispatchData);
91+
8692
std::vector<uint32_t> unrollLoopParams{params.filterSize.x,
87-
params.filterSize.y,
88-
(uint32_t)dispatchData.gemmStyle.globalWorkSizeDX,
89-
(uint32_t)dispatchData.gemmStyle.globalWorkSizeDY,
90-
(uint32_t)dispatchData.gemmStyle.globalWorkSizeDZ,
91-
(uint32_t)dispatchData.gemmStyle.subBlockDimM,
92-
(uint32_t)dispatchData.gemmStyle.subBlockDimK,
93-
(uint32_t)dispatchData.gemmStyle.subBlockDimN};
93+
params.filterSize.y,
94+
(uint32_t)dispatchData.gemmStyle.globalWorkSizeDX,
95+
(uint32_t)dispatchData.gemmStyle.globalWorkSizeDY,
96+
(uint32_t)dispatchData.gemmStyle.globalWorkSizeDZ,
97+
(uint32_t)dispatchData.gemmStyle.subBlockDimM,
98+
(uint32_t)dispatchData.gemmStyle.subBlockDimK,
99+
(uint32_t)dispatchData.gemmStyle.subBlockDimN};
94100

95101
auto loopCount = *std::max_element(unrollLoopParams.begin(), unrollLoopParams.end());
96102

src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_base.h

+3-4
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,8 @@ class ConvolutionKernelBase : public WeightBiasKernelBase {
3636
size_t globalWorkSizeDZ;
3737
};
3838

39-
union {
40-
CLDNNStyle cldnnStyle;
41-
GEMMStyle gemmStyle;
42-
};
39+
CLDNNStyle cldnnStyle;
40+
GEMMStyle gemmStyle;
4341
};
4442

4543
std::string GetAutoTuneOptions(int autoTuneIndex) const;
@@ -53,6 +51,7 @@ class ConvolutionKernelBase : public WeightBiasKernelBase {
5351
virtual bool NeedPaddedInput() const { return false; }
5452
bool Validate(const Params& p) const override;
5553
using WeightBiasKernelBase::GetJitConstants;
54+
JitConstants GetJitConstantsWithLoopUnroll(const convolution_params& params, const DispatchData& dispatchData) const;
5655
virtual JitConstants GetJitConstants(const convolution_params& params, const DispatchData& dispatchData) const;
5756
virtual JitConstants GetFusedPrimitivesJitConstants(const convolution_params& params, const DispatchData& dispatchData) const;
5857
virtual DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const;

src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ DeviceFeaturesKey ConvolutionKernel_bfyx_Direct_10_10_12::get_required_device_fe
3333

3434
JitConstants ConvolutionKernel_bfyx_Direct_10_10_12::GetJitConstants(const convolution_params& cp,
3535
const DispatchData& dispatchData) const {
36-
JitConstants jit = Parent::GetJitConstants(cp, dispatchData);
36+
JitConstants jit = Parent::GetJitConstantsWithLoopUnroll(cp, dispatchData);
3737

3838
jit.AddConstants({
3939
MakeJitConstant("ALIGNED_OFM", RoundUp(cp.outputs[0].Feature().v / cp.groups, dispatchData.gemmStyle.subBlockDimN) * cp.groups),

src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_gemm_like.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ std::string ConvolutionKernel_bfyx_GEMMLike::GetKernelName(const convolution_par
4444

4545
JitConstants ConvolutionKernel_bfyx_GEMMLike::GetJitConstants(const convolution_params& params,
4646
const DispatchData& dispatchData) const {
47-
JitConstants jit = Parent::GetJitConstants(params, dispatchData);
47+
JitConstants jit = Parent::GetJitConstantsWithLoopUnroll(params, dispatchData);
4848

4949
jit.AddConstants({
5050
MakeJitConstant("ALIGNED_OFM_PER_GROUP", RoundUp(params.outputs[0].Feature().v / params.groups, dispatchData.gemmStyle.subBlockDimN)),

src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ JitConstants ConvolutionKernel_bfyx_os_iyx_osv16::GetJitConstants(const convolut
226226
const size_t of_threads_per_batch = RoundUp(of_maps_per_group, sub_group_size);
227227
size_t leftovers = of_threads_per_batch - of_maps_per_group;
228228

229-
auto jit = Parent::GetJitConstants(params, dispatchData);
229+
auto jit = Parent::GetJitConstantsWithLoopUnroll(params, dispatchData);
230230

231231
if (!params.fused_ops.empty()) {
232232
auto input_dt = GetUnitType(params);

0 commit comments

Comments
 (0)