@@ -1638,6 +1638,87 @@ TEST(convolution_f32_fw_gpu, basic_convolution) {
1638
1638
}
1639
1639
}
1640
1640
1641
+ TEST(convolution_f32_fw_gpu, convolution_big_size_weights) {
1642
+ auto& engine = get_test_engine();
1643
+
1644
+ const std::vector<int> filter_size_data = {
1645
+ 32, 32,
1646
+ 33, 32,
1647
+ 65, 64,
1648
+ };
1649
+
1650
+ const std::vector<std::string> impl_kernel_data = {
1651
+ "convolution_gpu_bfyx_os_iyx_osv16__f32",
1652
+ "convolution_gpu_bfyx_gemm_like__f32",
1653
+ "convolution_gpu_ref__f32"
1654
+ };
1655
+
1656
+ for (size_t m = 0 ; m < filter_size_data.size() / 2; m++) {
1657
+ const int in_y = filter_size_data[m * 2];
1658
+ const int in_x = filter_size_data[m * 2 + 1];
1659
+
1660
+ auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, in_y, in_x } });
1661
+ auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, in_y, in_x } });
1662
+ auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
1663
+
1664
+ tests::random_generator rg(GET_SUITE_NAME);
1665
+ VVVVF<float> input_rnd = rg.generate_random_4d<float>(1, 1, in_y, in_x, -10, 10);
1666
+ VF<float> input_rnd_vec = flatten_4d<float>(format::yxfb, input_rnd);
1667
+ VVVVF<float> filter_rnd = rg.generate_random_4d<float>(1, 1, in_y, in_x, -10, 10);
1668
+ VF<float> filter_rnd_vec = flatten_4d<float>(format::bfyx, filter_rnd);
1669
+
1670
+ set_values(biases, { 0.0f });
1671
+ set_values(input, input_rnd_vec);
1672
+ set_values(weights, filter_rnd_vec);
1673
+
1674
+ float output_sum = 0.f;
1675
+ size_t idx = 0;
1676
+ for (int i = 0 ; i < in_y; i++) {
1677
+ for (int k = 0 ; k < in_x; k++) {
1678
+ idx = i * in_x + k;
1679
+ output_sum += input_rnd_vec[idx] * filter_rnd_vec[idx];
1680
+ }
1681
+ }
1682
+
1683
+ topology topology(
1684
+ input_layout("input", input->get_layout()),
1685
+ data("weights", weights),
1686
+ data("biases", biases),
1687
+ convolution( "conv", input_info("input"), "weights", "biases", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false));
1688
+
1689
+ ExecutionConfig config = get_test_default_config(engine);
1690
+ config.set_property(ov::intel_gpu::optimize_data(true));
1691
+
1692
+ network network(engine, topology, config);
1693
+
1694
+ auto impl_info = network.get_implementation_info("conv");
1695
+ ASSERT_EQ(impl_info, impl_kernel_data[m]);
1696
+
1697
+ network.set_input_data("input", input);
1698
+
1699
+ auto outputs = network.execute();
1700
+ ASSERT_EQ(outputs.size(), size_t(1));
1701
+ ASSERT_EQ(outputs.begin()->first, "conv");
1702
+
1703
+ auto output_memory = outputs.at("conv").get_memory();
1704
+ auto output_layout = output_memory->get_layout();
1705
+ cldnn::mem_lock<float> output_ptr(output_memory, get_test_stream());
1706
+
1707
+ int y_size = output_layout.spatial(1);
1708
+ int x_size = output_layout.spatial(0);
1709
+ int f_size = output_layout.feature();
1710
+ int b_size = output_layout.batch();
1711
+
1712
+ ASSERT_EQ(y_size, 1);
1713
+ ASSERT_EQ(x_size, 1);
1714
+ ASSERT_EQ(f_size, 1);
1715
+ ASSERT_EQ(b_size, 1);
1716
+
1717
+ ASSERT_EQ(output_sum, output_ptr[0]);
1718
+ }
1719
+
1720
+ }
1721
+
1641
1722
TEST(convolution_f32_fw_gpu, basic_convolution_bfyx_weights_as_input_layout) {
1642
1723
//Same params as convolution_f32_fw_gpu, basic_convolution but with bfyx optimized data and weights set as input_layout
1643
1724
auto& engine = get_test_engine();
0 commit comments