@@ -1638,6 +1638,83 @@ TEST(convolution_f32_fw_gpu, basic_convolution) {
1638
1638
}
1639
1639
}
1640
1640
1641
+ TEST(convolution_f32_fw_gpu, convolution_big_size_weights) {
1642
+ auto& engine = get_test_engine();
1643
+
1644
+ const std::vector<int> filter_size_data = {
1645
+ 65, 65,
1646
+ };
1647
+
1648
+ const std::vector<std::string> impl_kernel_data = {
1649
+ "convolution_gpu_ref__f32"
1650
+ };
1651
+
1652
+ for (size_t m = 0 ; m < filter_size_data.size() / 2; m++) {
1653
+ const int in_y = filter_size_data[m * 2];
1654
+ const int in_x = filter_size_data[m * 2 + 1];
1655
+
1656
+ auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, in_y, in_x } });
1657
+ auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, in_y, in_x } });
1658
+ auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
1659
+
1660
+ tests::random_generator rg(GET_SUITE_NAME);
1661
+ VVVVF<float> input_rnd = rg.generate_random_4d<float>(1, 1, in_y, in_x, -10, 10);
1662
+ VF<float> input_rnd_vec = flatten_4d<float>(format::yxfb, input_rnd);
1663
+ VVVVF<float> filter_rnd = rg.generate_random_4d<float>(1, 1, in_y, in_x, -10, 10);
1664
+ VF<float> filter_rnd_vec = flatten_4d<float>(format::bfyx, filter_rnd);
1665
+
1666
+ set_values(biases, { 0.0f });
1667
+ set_values(input, input_rnd_vec);
1668
+ set_values(weights, filter_rnd_vec);
1669
+
1670
+ float output_sum = 0.f;
1671
+ size_t idx = 0;
1672
+ for (int i = 0 ; i < in_y; i++) {
1673
+ for (int k = 0 ; k < in_x; k++) {
1674
+ idx = i * in_x + k;
1675
+ output_sum += input_rnd_vec[idx] * filter_rnd_vec[idx];
1676
+ }
1677
+ }
1678
+
1679
+ topology topology(
1680
+ input_layout("input", input->get_layout()),
1681
+ data("weights", weights),
1682
+ data("biases", biases),
1683
+ convolution( "conv", input_info("input"), "weights", "biases", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false));
1684
+
1685
+ ExecutionConfig config = get_test_default_config(engine);
1686
+ config.set_property(ov::intel_gpu::optimize_data(true));
1687
+
1688
+ network network(engine, topology, config);
1689
+
1690
+ auto impl_info = network.get_implementation_info("conv");
1691
+ ASSERT_EQ(impl_info, impl_kernel_data[m]);
1692
+
1693
+ network.set_input_data("input", input);
1694
+
1695
+ auto outputs = network.execute();
1696
+ ASSERT_EQ(outputs.size(), size_t(1));
1697
+ ASSERT_EQ(outputs.begin()->first, "conv");
1698
+
1699
+ auto output_memory = outputs.at("conv").get_memory();
1700
+ auto output_layout = output_memory->get_layout();
1701
+ cldnn::mem_lock<float> output_ptr(output_memory, get_test_stream());
1702
+
1703
+ int y_size = output_layout.spatial(1);
1704
+ int x_size = output_layout.spatial(0);
1705
+ int f_size = output_layout.feature();
1706
+ int b_size = output_layout.batch();
1707
+
1708
+ ASSERT_EQ(y_size, 1);
1709
+ ASSERT_EQ(x_size, 1);
1710
+ ASSERT_EQ(f_size, 1);
1711
+ ASSERT_EQ(b_size, 1);
1712
+
1713
+ ASSERT_EQ(output_sum, output_ptr[0]);
1714
+ }
1715
+
1716
+ }
1717
+
1641
1718
TEST(convolution_f32_fw_gpu, basic_convolution_bfyx_weights_as_input_layout) {
1642
1719
//Same params as convolution_f32_fw_gpu, basic_convolution but with bfyx optimized data and weights set as input_layout
1643
1720
auto& engine = get_test_engine();
0 commit comments