diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp index 435b67cafaf7e5..f0aab7ebc4677a 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp @@ -110,6 +110,7 @@ void propagate_constants::run(program& p) { [](program_node* node) { return node->is_constant(); }), curr_node.users.end()); p.replace(curr_node, new_node); + new_node.recalc_output_layout(false); } } diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/propagate_constants_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/propagate_constants_gpu_test.cpp index 206c93c6af39ed..8e98aa43a6a800 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/propagate_constants_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/propagate_constants_gpu_test.cpp @@ -3,9 +3,12 @@ // #include "test_utils.h" +#include "random_generator.hpp" #include <intel_gpu/primitives/input_layout.hpp> #include <intel_gpu/primitives/concatenation.hpp> +#include <intel_gpu/primitives/fully_connected.hpp> +#include <intel_gpu/primitives/permute.hpp> #include <intel_gpu/primitives/reorder.hpp> #include <intel_gpu/primitives/data.hpp> #include <intel_gpu/primitives/reshape.hpp> @@ -57,3 +60,70 @@ TEST(propagate_constants, copy_dependecies_from_nodes) { TEST(propagate_constants, copy_dependecies_from_nodes_cached) { test_copy_dependecies_from_nodes<ov::float16>(true); } + +TEST(propagate_constants, permute_1_0_reorder_fc) { + auto& engine = get_test_engine(); + + auto input2_layout_dyn = layout{ ov::PartialShape{ -1, 32 }, data_types::f16, format::bfyx }; + + auto input = engine.allocate_memory({ { 2, 32 }, data_types::f16, format::bfyx }); + auto input2 = engine.allocate_memory({ { 2, 32 }, data_types::f16, format::bfyx }); + auto weights = engine.allocate_memory({{ 32, 2 }, data_types::f32, format::bfyx }); + + tests::random_generator rg(GET_SUITE_NAME); + auto input_data = rg.generate_random_2d<ov::float16>(2, 32, -1, 1); + auto input2_data = rg.generate_random_2d<ov::float16>(2, 32, -1, -1); + auto weights_data = rg.generate_random_2d<float>(32, 2, -1, 1); + + set_values(input, flatten_2d(format::bfyx, input_data)); + set_values(input2, input2_data); + set_values(weights, flatten_2d(format::bfyx, weights_data)); + + topology topology( + input_layout("input", input->get_layout()), + input_layout("input2", input2_layout_dyn), + data("weights", weights), + permute("permute_test", input_info("weights"), {1, 0}), + reorder("reorder_dt", input_info("permute_test"), format::any, data_types::f16, std::vector<float>()), + fully_connected("fc1", input_info("input"), { "reorder_dt" }, "", data_types::f16), + fully_connected("fc2", input_info("input2"), { "reorder_dt" }, "", data_types::f16) + ); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + + if (engine.get_device_info().supports_immad) { + ov::intel_gpu::ImplementationDesc fc_impl = { format::bfyx, "", impl_types::onednn }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc1", fc_impl} })); + } + + cldnn::network network(engine, topology, config); + network.set_input_data("input", input); + network.set_input_data("input2", input2); + + auto outputs = network.execute(); + auto output = outputs.at("fc1").get_memory(); + cldnn::mem_lock<ov::float16> output_ptr(output, get_test_stream()); + + ExecutionConfig config_ref = get_test_default_config(engine); + config_ref.set_property(ov::intel_gpu::optimize_data(false)); + config_ref.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + + if (engine.get_device_info().supports_immad) { + ov::intel_gpu::ImplementationDesc fc_impl = { format::bfyx, "", impl_types::onednn }; + config_ref.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc1", fc_impl} })); + } + + cldnn::network network_ref(engine, topology, config_ref); + network_ref.set_input_data("input", input); + network_ref.set_input_data("input2", input2); + + auto outputs_ref = network_ref.execute(); + auto output_ref = outputs_ref.at("fc1").get_memory(); + cldnn::mem_lock<ov::float16> output_ref_ptr(output_ref, get_test_stream()); + + for (size_t i = 0; i < output_ref_ptr.size(); ++i) { + ASSERT_EQ(output_ptr[i], output_ref_ptr[i]); + } +}