|
2 | 2 | // SPDX-License-Identifier: Apache-2.0
|
3 | 3 | //
|
4 | 4 |
|
| 5 | +#include "intel_gpu/primitives/permute.hpp" |
5 | 6 | #include "test_utils.h"
|
| 7 | +#include "random_generator.hpp" |
6 | 8 | #include "program_wrapper.h"
|
7 | 9 | #include "fully_connected_inst.h"
|
8 | 10 | #include "convolution_inst.h"
|
@@ -242,3 +244,49 @@ TEST(post_optimize_weights, onednn_group_conv_weights_reorder_test) {
|
242 | 244 | ASSERT_TRUE(onednn_weights_params->_out_desc.get_size() == prog->get_node("weights_weights_reorder_0").get_output_layout().bytes_count());
|
243 | 245 | #endif
|
244 | 246 | }
|
| 247 | + |
| 248 | +TEST(post_optimize_weights, fuse_constant_transposes_removal_and_add_intermediate_including_siblings) { |
| 249 | + auto& engine = get_test_engine(); |
| 250 | + |
| 251 | + auto input2_layout_dyn = layout{ ov::PartialShape{ -1, 32 }, data_types::f16, format::bfyx }; |
| 252 | + |
| 253 | + auto input = engine.allocate_memory({ { 2, 32 }, data_types::f16, format::bfyx }); |
| 254 | + auto input2 = engine.allocate_memory({ { 2, 32 }, data_types::f16, format::bfyx }); |
| 255 | + auto weights = engine.allocate_memory({{ 32, 2 }, data_types::f32, format::bfyx }); |
| 256 | + |
| 257 | + tests::random_generator rg(GET_SUITE_NAME); |
| 258 | + auto input_data = rg.generate_random_2d<ov::float16>(2, 32, -1, 1); |
| 259 | + auto input2_data = rg.generate_random_2d<ov::float16>(2, 32, -1, -1); |
| 260 | + auto weights_data = rg.generate_random_2d<float>(32, 2, -1, 1); |
| 261 | + |
| 262 | + set_values(input, flatten_2d(format::bfyx, input_data)); |
| 263 | + set_values(input2, input2_data); |
| 264 | + set_values(weights, flatten_2d(format::bfyx, weights_data)); |
| 265 | + |
| 266 | + topology topology( |
| 267 | + input_layout("input", input->get_layout()), |
| 268 | + input_layout("input2", input2_layout_dyn), |
| 269 | + data("weights", weights), |
| 270 | + permute("permute_test", input_info("weights"), {1, 0}), |
| 271 | + reorder("reorder_dt", input_info("permute_test"), format::any, data_types::f16, std::vector<float>()), |
| 272 | + fully_connected("fc1", input_info("input"), { "reorder_dt" }, "", data_types::f16), |
| 273 | + fully_connected("fc2", input_info("input2"), { "reorder_dt" }, "", data_types::f16) |
| 274 | + ); |
| 275 | + |
| 276 | + ExecutionConfig config = get_test_default_config(engine); |
| 277 | + config.set_property(ov::intel_gpu::optimize_data(true)); |
| 278 | + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); |
| 279 | + |
| 280 | + if (engine.get_device_info().supports_immad) { |
| 281 | + ov::intel_gpu::ImplementationDesc fc_impl = { format::bfyx, "", impl_types::onednn }; |
| 282 | + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc1", fc_impl} })); |
| 283 | + } |
| 284 | + |
| 285 | + cldnn::network network(engine, topology, config); |
| 286 | + network.set_input_data("input", input); |
| 287 | + network.set_input_data("input2", input2); |
| 288 | + |
| 289 | + auto outputs = network.execute(); |
| 290 | + auto output = outputs.at("fc1").get_memory(); |
| 291 | + cldnn::mem_lock<ov::float16> output_ptr(output, get_test_stream()); |
| 292 | +} |
0 commit comments