Skip to content

Commit e2a71d1

Browse files
committedOct 7, 2024
Apply dynamic padding kernel code if it has any dynmic padding
1 parent 77827e7 commit e2a71d1

File tree

3 files changed

+14
-18
lines changed

3 files changed

+14
-18
lines changed
 

‎src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl

+7-7
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ KERNEL(gemm_tiled_opt)(
181181
// Start pointers offsets
182182
#if TRANSPOSE_INPUT0 == TRANSPOSE_X_LAST
183183
const __global INPUT0_TYPE* a_ptr = input0 + batch_offset_input0;
184-
#if HAS_DYNAMIC_K_PADDING || INPUT0_HAS_PADDING
184+
#if INPUT0_HAS_DYNAMIC_PADDING || INPUT0_HAS_PADDING
185185
const uint input0_offset = FUNC_CALL(get_input0_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, (y+1), 0) - batch_offset_input0;
186186
const uint input0_offset1 = FUNC_CALL(get_input0_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, y, (TILE_K)) - batch_offset_input0;
187187
#else
@@ -190,7 +190,7 @@ KERNEL(gemm_tiled_opt)(
190190
#endif
191191
#elif TRANSPOSE_INPUT0 == TRANSPOSE_Y_LAST
192192
const __global INPUT0_TYPE* a_ptr = input0 + batch_offset_input0;
193-
#if HAS_DYNAMIC_K_PADDING || INPUT0_HAS_PADDING
193+
#if INPUT0_HAS_DYNAMIC_PADDING || INPUT0_HAS_PADDING
194194
const uint input0_offset = FUNC_CALL(get_input0_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, y, 1) - batch_offset_input0;
195195
const uint input0_offset1 = FUNC_CALL(get_input0_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, y, (TILE_K)) - batch_offset_input0;
196196
#else
@@ -200,14 +200,14 @@ KERNEL(gemm_tiled_opt)(
200200
#endif // TRANSPOSE_INPUT0
201201
#if TRANSPOSE_INPUT1 == TRANSPOSE_X_LAST
202202
const __global INPUT1_TYPE* b_ptr = input1 + batch_offset_input1;
203-
#if HAS_DYNAMIC_K_PADDING || INPUT1_HAS_PADDING
203+
#if INPUT1_HAS_DYNAMIC_PADDING || INPUT1_HAS_PADDING
204204
const uint input1_offset = FUNC_CALL(get_input1_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, 1, tile_n_offset) - batch_offset_input1;
205205
#else
206206
const uint input1_offset = FUNC_CALL(get_input1_index)(OPTIONAL_SHAPE_INFO_TENSOR 0, 0, 0, 0, 1, 0);
207207
#endif
208208
#elif TRANSPOSE_INPUT1 == TRANSPOSE_Y_LAST
209209
const __global INPUT1_TYPE* b_ptr = input1 + batch_offset_input1;
210-
#if HAS_DYNAMIC_K_PADDING || INPUT1_HAS_PADDING
210+
#if INPUT1_HAS_DYNAMIC_PADDING || INPUT1_HAS_PADDING
211211
const uint input1_offset = FUNC_CALL(get_input1_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, 0, (tile_n_offset + 1)) - batch_offset_input1;
212212
const uint input1_offset1 = FUNC_CALL(get_input1_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, (TILE_K), tile_n_offset) - batch_offset_input1;
213213
#else
@@ -386,7 +386,7 @@ KERNEL(gemm_tiled_opt)(
386386
#endif // TRANSPOSE_INPUT1 == TRANSPOSE_Y_LAST
387387

388388
// Loading A tile and tile C calculation
389-
#if IS_DYNAMIC && !INDIRECT_INPUT0 && !HAS_DYNAMIC_K_PADDING && TRANSPOSE_INPUT0 == TRANSPOSE_X_LAST
389+
#if IS_DYNAMIC && !INDIRECT_INPUT0 && !INPUT0_HAS_DYNAMIC_PADDING && !INPUT1_HAS_DYNAMIC_PADDING && TRANSPOSE_INPUT0 == TRANSPOSE_X_LAST
390390
A_FLOATN a_read = (TILE_K_NOT_DIVISIBLE == 0 || K_IS_ALIGNED_4BYTE) ? BLOCK_READ_A(a_ptr, 0): a_ptr[sglid];
391391
#endif
392392
unroll_for (uint dot_id = 0; dot_id < tile_m_iterations; dot_id++) {
@@ -395,7 +395,7 @@ KERNEL(gemm_tiled_opt)(
395395
#if INDIRECT_INPUT0
396396
uint a_idx = FUNC_CALL(get_input0_indirect_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, (y + dot_id), (k * TILE_K + sglid), beam_table);
397397
A_FLOATN a_read = input0[a_idx];
398-
#elif HAS_DYNAMIC_K_PADDING || INPUT0_HAS_PADDING
398+
#elif INPUT0_HAS_DYNAMIC_PADDING || INPUT1_HAS_DYNAMIC_PADDING || INPUT0_HAS_PADDING
399399
// In case of dynamic padding we can't guarantee memory access alignment for
400400
// block reads (4 bytes), so use scattered read
401401
uint a_idx = FUNC_CALL(get_input0_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, (y + dot_id), (k * TILE_K + sglid));
@@ -431,7 +431,7 @@ KERNEL(gemm_tiled_opt)(
431431
#endif // TILE_K > SIMD_WIDTH
432432
}
433433
}
434-
#if IS_DYNAMIC && !INDIRECT_INPUT0 && !HAS_DYNAMIC_K_PADDING
434+
#if IS_DYNAMIC && !INDIRECT_INPUT0 && !INPUT0_HAS_DYNAMIC_PADDING && !INPUT1_HAS_DYNAMIC_PADDING
435435
// Read A for next dot_id
436436
a_read = (dot_id + 1 < tile_m_iterations) ? (TILE_K_NOT_DIVISIBLE == 0 || K_IS_ALIGNED_4BYTE) ? BLOCK_READ_A(a_ptr, 0) : a_ptr[sglid] : 0;
437437
#endif

‎src/plugins/intel_gpu/src/kernel_selector/kernel_selector_utils.h

+3-7
Original file line numberDiff line numberDiff line change
@@ -42,23 +42,19 @@ struct DimensionAccessHelperJit : virtual DimensionAccessHelperBase {
4242
: DimensionAccessHelperBase(t) {
4343
size_t dyn_shape_offset = t.get_dynamic_shape_offset();
4444
size_t dyn_pad_offset = dyn_shape_offset + DataTensor::max_rank();
45+
has_dynamic_pad = false;
4546
for (auto d : dims) {
4647
dims_sizes.push_back(toCodeString(d, dyn_shape_offset, padded, d.pad.is_dynamic, dyn_pad_offset));
4748
dyn_shape_offset++;
4849
if (padded) {
4950
if (d.pad.is_dynamic) {
5051
pad_before_after_sizes.push_back("(shape_info[" + std::to_string(dyn_pad_offset++) + "])");
5152
pad_before_after_sizes.push_back("(shape_info[" + std::to_string(dyn_pad_offset++) + "])");
53+
has_dynamic_pad = true;
5254
} else {
5355
pad_before_after_sizes.push_back(toCodeString(d.pad.before));
5456
pad_before_after_sizes.push_back(toCodeString(d.pad.after));
5557
}
56-
57-
if (d.pad.is_dynamic || d.is_dynamic) {
58-
dims_padded.push_back(true);
59-
} else {
60-
dims_padded.push_back(false);
61-
}
6258
}
6359
}
6460
}
@@ -82,7 +78,7 @@ struct DimensionAccessHelperJit : virtual DimensionAccessHelperBase {
8278

8379
std::vector<std::string> dims_sizes;
8480
std::vector<std::string> pad_before_after_sizes;
85-
std::vector<bool> dims_padded;
81+
bool has_dynamic_pad;
8682
};
8783

8884
std::vector<size_t> GetImageSizes(const kernel_selector::WeightsTensor& dimensions, const WeightsLayout layout);

‎src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_tiled_opt.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -205,10 +205,10 @@ JitConstants GemmKernelTiledOpt::GetJitConstants(const gemm_params& params) cons
205205
else
206206
jit.AddConstant(MakeJitConstant("TRANSPOSE_OUTPUT", 0 /* set as TRANSPOSE_X_LAST */));
207207

208-
bool has_dynamic_k_padding = params.transpose_input0 ? dims0_padded.dims_padded[input0_dims[6]]
209-
: dims0_padded.dims_padded[input0_dims[7]];
210-
if (has_dynamic_k_padding)
211-
jit.AddConstant(MakeJitConstant("HAS_DYNAMIC_K_PADDING", 1));
208+
if (dims0_padded.has_dynamic_pad)
209+
jit.AddConstant(MakeJitConstant("INPUT0_HAS_DYNAMIC_PADDING", 1));
210+
if (dims1_padded.has_dynamic_pad)
211+
jit.AddConstant(MakeJitConstant("INPUT1_HAS_DYNAMIC_PADDING", 1));
212212
} else {
213213
auto get_transposed_dim_size = [](const kernel_selector::DataTensor &data_tensor,
214214
const std::vector<int64_t>& dims_order, const std::string dim) {

0 commit comments

Comments
 (0)