Skip to content

Commit 019c7ce

Browse files
Utilize padded info transposed by input_order for dynamic padding
1 parent a93ebb0 commit 019c7ce

File tree

4 files changed

+26
-33
lines changed

4 files changed

+26
-33
lines changed

src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl

+2-2
Original file line numberDiff line numberDiff line change
@@ -200,14 +200,14 @@ KERNEL(gemm_tiled_opt)(
200200
#endif // TRANSPOSE_INPUT0
201201
#if TRANSPOSE_INPUT1 == TRANSPOSE_X_LAST
202202
const __global INPUT1_TYPE* b_ptr = input1 + batch_offset_input1;
203-
#if HAS_DYNAMIC_N_PADDING || INPUT1_HAS_PADDING
203+
#if HAS_DYNAMIC_K_PADDING || INPUT1_HAS_PADDING
204204
const uint input1_offset = FUNC_CALL(get_input1_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, 1, tile_n_offset) - batch_offset_input1;
205205
#else
206206
const uint input1_offset = FUNC_CALL(get_input1_index)(OPTIONAL_SHAPE_INFO_TENSOR 0, 0, 0, 0, 1, 0);
207207
#endif
208208
#elif TRANSPOSE_INPUT1 == TRANSPOSE_Y_LAST
209209
const __global INPUT1_TYPE* b_ptr = input1 + batch_offset_input1;
210-
#if HAS_DYNAMIC_N_PADDING || INPUT1_HAS_PADDING
210+
#if HAS_DYNAMIC_K_PADDING || INPUT1_HAS_PADDING
211211
const uint input1_offset = FUNC_CALL(get_input1_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, 0, (tile_n_offset + 1)) - batch_offset_input1;
212212
const uint input1_offset1 = FUNC_CALL(get_input1_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, (TILE_K), tile_n_offset) - batch_offset_input1;
213213
#else

src/plugins/intel_gpu/src/kernel_selector/kernel_selector_utils.h

+7
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,12 @@ struct DimensionAccessHelperJit : virtual DimensionAccessHelperBase {
5353
pad_before_after_sizes.push_back(toCodeString(d.pad.before));
5454
pad_before_after_sizes.push_back(toCodeString(d.pad.after));
5555
}
56+
57+
if (d.pad.is_dynamic || d.is_dynamic) {
58+
dims_padded.push_back(true);
59+
} else {
60+
dims_padded.push_back(false);
61+
}
5662
}
5763
}
5864
}
@@ -76,6 +82,7 @@ struct DimensionAccessHelperJit : virtual DimensionAccessHelperBase {
7682

7783
std::vector<std::string> dims_sizes;
7884
std::vector<std::string> pad_before_after_sizes;
85+
std::vector<bool> dims_padded;
7986
};
8087

8188
std::vector<size_t> GetImageSizes(const kernel_selector::WeightsTensor& dimensions, const WeightsLayout layout);

src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_tiled_opt.cpp

+2-19
Original file line numberDiff line numberDiff line change
@@ -205,27 +205,10 @@ JitConstants GemmKernelTiledOpt::GetJitConstants(const gemm_params& params) cons
205205
else
206206
jit.AddConstant(MakeJitConstant("TRANSPOSE_OUTPUT", 0 /* set as TRANSPOSE_X_LAST */));
207207

208-
bool has_dynamic_k_padding = params.transpose_input0 ? params.inputs[0].Y().pad.is_dynamic
209-
: params.inputs[0].X().pad.is_dynamic;
210-
bool has_dynamic_n_padding = params.transpose_input1 ? params.inputs[1].Y().pad.is_dynamic
211-
: params.inputs[1].X().pad.is_dynamic;
208+
bool has_dynamic_k_padding = params.transpose_input0 ? dims0_padded.dims_padded[input0_dims[6]]
209+
: dims0_padded.dims_padded[input0_dims[7]];
212210
if (has_dynamic_k_padding)
213211
jit.AddConstant(MakeJitConstant("HAS_DYNAMIC_K_PADDING", 1));
214-
if (has_dynamic_n_padding)
215-
jit.AddConstant(MakeJitConstant("HAS_DYNAMIC_N_PADDING", 1));
216-
217-
auto hasDynamicPad = [](DataTensor dt) -> bool {
218-
auto dims = dt.GetDims();
219-
for (auto d : dims) {
220-
if (d.pad.is_dynamic)
221-
return true;
222-
}
223-
return false;
224-
};
225-
if (hasDynamicPad(params.inputs[0]))
226-
jit.AddConstant(MakeJitConstant("INPUT0_HAS_PADDING", 1));
227-
if (hasDynamicPad(params.inputs[1]))
228-
jit.AddConstant(MakeJitConstant("INPUT1_HAS_PADDING", 1));
229212
} else {
230213
auto get_transposed_dim_size = [](const kernel_selector::DataTensor &data_tensor,
231214
const std::vector<int64_t>& dims_order, const std::string dim) {

src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp

+15-12
Original file line numberDiff line numberDiff line change
@@ -575,34 +575,34 @@ class gemm_gpu_tests: public ::testing::Test {
575575
ov::Shape in2_shape_aligned = { aligned_batch1_size, aligned_batch2_size, aligned_n_size, aligned_k_size };
576576

577577
// Use dynamic padding for all BFYX dimensions
578-
tensor dyn_pad_dims_input1({0, 0, 0, 0}, 0);
579-
tensor dyn_pad_dims_input2({0, 0, 0, 0}, 0);
578+
padding::DynamicDimsMask dyn_pad_dims_input1;
579+
padding::DynamicDimsMask dyn_pad_dims_input2;
580580

581581
if (n_dim_only) {
582-
dyn_pad_dims_input1 = tensor({0, 0, 0, 0}, 0);
583-
dyn_pad_dims_input2 = tensor({0, 0, 1, 0}, 0);
582+
dyn_pad_dims_input1 = 0ul;
583+
dyn_pad_dims_input2 = padding::DynamicDimsMask("1000");
584584
} else {
585-
dyn_pad_dims_input1 = tensor({1, 1, 1, 1}, 0);
586-
dyn_pad_dims_input2 = tensor({1, 1, 1, 1}, 0);
585+
dyn_pad_dims_input1 = padding::DynamicDimsMask("1111");
586+
dyn_pad_dims_input2 = padding::DynamicDimsMask("1111");
587587
}
588588

589-
auto in1_layout = layout{ {-1, -1, -1, -1}, data_types::f16, format::bfyx, padding({0, 0, 0, 0}, {0, 0, 0, 0}, 0.0f, dyn_pad_dims_input1)};
590-
auto in2_layout = layout{ {-1, -1, -1, -1}, data_types::f16, format::bfyx, padding({0, 0, 0, 0}, {0, 0, 0, 0}, 0.0f, dyn_pad_dims_input2)};
589+
auto in1_layout = layout{ {-1, -1, -1, -1}, data_types::f16, format::bfyx, padding({0, 0, 0, 0}, {0, 0, 0, 0}, dyn_pad_dims_input1)};
590+
auto in2_layout = layout{ {-1, -1, -1, -1}, data_types::f16, format::bfyx, padding({0, 0, 0, 0}, {0, 0, 0, 0}, dyn_pad_dims_input2)};
591591

592592
auto aligned_input1_mem = engine.allocate_memory({ov::PartialShape(in1_shape_aligned), data_types::f16, format::bfyx});
593593
auto aligned_input2_mem = engine.allocate_memory({ov::PartialShape(in2_shape_aligned), data_types::f16, format::bfyx});
594594

595595
auto input1_mem = engine.reinterpret_buffer(*aligned_input1_mem, layout{ov::PartialShape(in1_shape),
596596
data_types::f16,
597597
format::bfyx,
598-
n_dim_only ? padding({0, 0, 0, 0 }, {0, 0, 0, 0}, 0.0f, dyn_pad_dims_input1) :
599-
padding({padding_size_batch1, 0, 0, 0}, {0, padding_size_batch2, padding_size_m, padding_size_k}, 0.0f, dyn_pad_dims_input1)});
598+
n_dim_only ? padding({0, 0, 0, 0 }, {0, 0, 0, 0}, dyn_pad_dims_input1) :
599+
padding({padding_size_batch1, 0, 0, 0}, {0, padding_size_batch2, padding_size_m, padding_size_k}, dyn_pad_dims_input1)});
600600

601601
auto input2_mem = engine.reinterpret_buffer(*aligned_input2_mem, layout{ov::PartialShape(in2_shape),
602602
data_types::f16,
603603
format::bfyx,
604-
n_dim_only ? padding({0, 0, 0, 0}, {0, 0, padding_size_n, 0}, 0.0f, dyn_pad_dims_input2) :
605-
padding({0, padding_size_batch2, 0, 0}, {padding_size_batch1, 0, padding_size_n, padding_size_k }, 0.0f, dyn_pad_dims_input2)});
604+
n_dim_only ? padding({0, 0, 0, 0}, {0, 0, 0, padding_size_n}, dyn_pad_dims_input2) :
605+
padding({0, padding_size_batch2, 0, 0}, {padding_size_batch1, 0, padding_size_n, padding_size_k }, dyn_pad_dims_input2)});
606606

607607
auto input_1_data = rg.generate_random_1d<ov::float16>(ov::shape_size(in1_shape), -2, 2);
608608
auto input_2_data = rg.generate_random_1d<ov::float16>(ov::shape_size(in2_shape), -2, 2);
@@ -1595,13 +1595,16 @@ TEST_F(gemm_gpu_tests, dynamic_padding_n_dim_only) {
15951595
this->test_dynamic_padding(false, true);
15961596
}
15971597

1598+
#ifndef ENABLE_ONEDNN_FOR_GPU
1599+
// Disable onednn test because onednn does not support format_tag::cbda, format_tag::badc.
15981600
TEST_F(gemm_gpu_tests, dynamic_padding_w_transpose_order_all_dim) {
15991601
this->test_dynamic_padding_w_transpose_order(false, false);
16001602
}
16011603

16021604
TEST_F(gemm_gpu_tests, dynamic_padding_w_transpose_order_n_dim_only) {
16031605
this->test_dynamic_padding_w_transpose_order(false, true);
16041606
}
1607+
#endif
16051608

16061609
TEST_F(gemm_gpu_tests, dynamic_multi_inference_same_shape) {
16071610
this->test_dynamic_multi_inference_same_shape(false);

0 commit comments

Comments
 (0)