Skip to content

Commit 05d59b0

Browse files
[GPU] Add weight size limit for conv opt kernels
1 parent 1468630 commit 05d59b0

File tree

3 files changed

+95
-0
lines changed

3 files changed

+95
-0
lines changed

src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_gemm_like.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,12 @@ bool ConvolutionKernel_bfyx_GEMMLike::Validate(const Params& p) const {
110110
return false;
111111
}
112112

113+
// To prevent big sized filter which causes lots of CL build time.
114+
const size_t acceptable_filter_x_size = 64; // This acceptable size was decided by heuristics
115+
if (params.filterSize.x > acceptable_filter_x_size) {
116+
return false;
117+
}
118+
113119
return true;
114120
}
115121

src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,14 @@ bool ConvolutionKernel_bfyx_os_iyx_osv16::Validate(const Params& p) const {
205205
return false;
206206
}
207207

208+
// To prevent big sized filter which causes lots of CL build time.
209+
const size_t acceptable_filter_size = 1024; // This acceptable size was decided by heuristics
210+
const auto& params = static_cast<const convolution_params&>(p);
211+
auto filter_size = params.filterSize.x * params.filterSize.y;
212+
if (filter_size > acceptable_filter_size) {
213+
return false;
214+
}
215+
208216
return true;
209217
}
210218

src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp

+81
Original file line numberDiff line numberDiff line change
@@ -1638,6 +1638,87 @@ TEST(convolution_f32_fw_gpu, basic_convolution) {
16381638
}
16391639
}
16401640

1641+
TEST(convolution_f32_fw_gpu, convolution_big_size_weights) {
1642+
auto& engine = get_test_engine();
1643+
1644+
const std::vector<int> filter_size_data = {
1645+
32, 32,
1646+
33, 32,
1647+
65, 64,
1648+
};
1649+
1650+
const std::vector<std::string> impl_kernel_data = {
1651+
"convolution_gpu_bfyx_os_iyx_osv16__f32",
1652+
"convolution_gpu_bfyx_gemm_like__f32",
1653+
"convolution_gpu_ref__f32"
1654+
};
1655+
1656+
for (size_t m = 0 ; m < filter_size_data.size() / 2; m++) {
1657+
const int in_y = filter_size_data[m * 2];
1658+
const int in_x = filter_size_data[m * 2 + 1];
1659+
1660+
auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, in_y, in_x } });
1661+
auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, in_y, in_x } });
1662+
auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
1663+
1664+
tests::random_generator rg(GET_SUITE_NAME);
1665+
VVVVF<float> input_rnd = rg.generate_random_4d<float>(1, 1, in_y, in_x, -10, 10);
1666+
VF<float> input_rnd_vec = flatten_4d<float>(format::yxfb, input_rnd);
1667+
VVVVF<float> filter_rnd = rg.generate_random_4d<float>(1, 1, in_y, in_x, -10, 10);
1668+
VF<float> filter_rnd_vec = flatten_4d<float>(format::bfyx, filter_rnd);
1669+
1670+
set_values(biases, { 0.0f });
1671+
set_values(input, input_rnd_vec);
1672+
set_values(weights, filter_rnd_vec);
1673+
1674+
float output_sum = 0.f;
1675+
size_t idx = 0;
1676+
for (int i = 0 ; i < in_y; i++) {
1677+
for (int k = 0 ; k < in_x; k++) {
1678+
idx = i * in_x + k;
1679+
output_sum += input_rnd_vec[idx] * filter_rnd_vec[idx];
1680+
}
1681+
}
1682+
1683+
topology topology(
1684+
input_layout("input", input->get_layout()),
1685+
data("weights", weights),
1686+
data("biases", biases),
1687+
convolution( "conv", input_info("input"), "weights", "biases", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false));
1688+
1689+
ExecutionConfig config = get_test_default_config(engine);
1690+
config.set_property(ov::intel_gpu::optimize_data(true));
1691+
1692+
network network(engine, topology, config);
1693+
1694+
auto impl_info = network.get_implementation_info("conv");
1695+
ASSERT_EQ(impl_info, impl_kernel_data[m]);
1696+
1697+
network.set_input_data("input", input);
1698+
1699+
auto outputs = network.execute();
1700+
ASSERT_EQ(outputs.size(), size_t(1));
1701+
ASSERT_EQ(outputs.begin()->first, "conv");
1702+
1703+
auto output_memory = outputs.at("conv").get_memory();
1704+
auto output_layout = output_memory->get_layout();
1705+
cldnn::mem_lock<float> output_ptr(output_memory, get_test_stream());
1706+
1707+
int y_size = output_layout.spatial(1);
1708+
int x_size = output_layout.spatial(0);
1709+
int f_size = output_layout.feature();
1710+
int b_size = output_layout.batch();
1711+
1712+
ASSERT_EQ(y_size, 1);
1713+
ASSERT_EQ(x_size, 1);
1714+
ASSERT_EQ(f_size, 1);
1715+
ASSERT_EQ(b_size, 1);
1716+
1717+
ASSERT_EQ(output_sum, output_ptr[0]);
1718+
}
1719+
1720+
}
1721+
16411722
TEST(convolution_f32_fw_gpu, basic_convolution_bfyx_weights_as_input_layout) {
16421723
//Same params as convolution_f32_fw_gpu, basic_convolution but with bfyx optimized data and weights set as input_layout
16431724
auto& engine = get_test_engine();

0 commit comments

Comments
 (0)