Skip to content

Commit 6e02445

Browse files
authoredDec 21, 2024
[GPU] Skip reorder opt when its dependency is crop (openvinotoolkit#27547)
### Details: - *Skip reorder opt when its dependency is crop* ### Tickets: - *155068*
1 parent 574c1a2 commit 6e02445

File tree

4 files changed

+102
-3
lines changed

4 files changed

+102
-3
lines changed
 

‎src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,12 @@ void remove_redundant_reorders::run(program& p) {
295295
auto o_layout = r_node.get_output_layout();
296296
const auto& i_layout = r_node.get_input_layout(0);
297297

298+
auto is_r_node_rank_changed = r_node.get_output_layout().get_rank() != r_node.get_dependency(0).get_output_layout().get_rank();
299+
if (is_r_node_rank_changed &&
300+
((!update_implementations && r_node.get_dependency(0).is_type<crop>()) ||
301+
(r_node.get_dependency(0).is_type<crop>() && r_node.get_dependency(0).can_be_optimized())))
302+
continue;
303+
298304
// Optimize reorder b_fs_yx_fsv16 -> bfyx when spatials are equal to 1. In this case we can reinterpret buffer,
299305
// but pads need to be handled correctly.
300306
if (i_layout.format == format::b_fs_yx_fsv16 && o_layout.format == format::bfyx && !r_node.is_output() &&

‎src/plugins/intel_gpu/tests/unit/passes/add_required_reorders_test.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -192,9 +192,9 @@ TEST(add_required_reorders, skip_adding_reorder_batch_axis_padding) {
192192
crop_prim = network.get_primitive("crop2");
193193
ASSERT_EQ(crop_prim->can_be_optimized(), true);
194194
auto reorder_prim = network.get_primitive("crop1_reorder");
195-
ASSERT_EQ(reorder_prim->can_be_optimized(), true);
195+
ASSERT_EQ(reorder_prim->can_be_optimized(), false);
196196
reorder_prim = network.get_primitive("crop2_reorder");
197-
ASSERT_EQ(reorder_prim->can_be_optimized(), true);
197+
ASSERT_EQ(reorder_prim->can_be_optimized(), false);
198198
auto concate = network.get_primitive("concat");
199199
ASSERT_EQ(concate->can_be_optimized(), false);
200200
}

‎src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1224,7 +1224,7 @@ TEST(prepare_buffer_fusing, test_implicit_crop_and_outerpadding) {
12241224
auto reorder_prim = network.get_primitive("gather1_reorder");
12251225
ASSERT_EQ(reorder_prim->can_be_optimized(), true);
12261226
reorder_prim = network.get_primitive("gather2_reorder");
1227-
ASSERT_EQ(reorder_prim->can_be_optimized(), true);
1227+
ASSERT_EQ(reorder_prim->can_be_optimized(), false);
12281228
auto reshape_prim = network.get_primitive("reshape1");
12291229
ASSERT_EQ(reshape_prim->can_be_optimized(), true);
12301230
}

‎src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp

+93
Original file line numberDiff line numberDiff line change
@@ -2467,6 +2467,99 @@ TEST(reorder_gpu_f32, bfzyx_to_bsv16_fsv16_padded)
24672467
}
24682468
}
24692469

2470+
TEST(reorder_gpu_f32, bfzyx_to_bfyx_padded) {
2471+
tests::random_generator rg(GET_SUITE_NAME);
2472+
auto& engine = get_test_engine();
2473+
2474+
const int32_t b_in = 1024;
2475+
const int32_t f_in = 64;
2476+
const int32_t x_in = 72;
2477+
const int32_t y_in = 2;
2478+
const int32_t z_in = 3;
2479+
2480+
const int32_t b_crop = 1024;
2481+
const int32_t f_crop = 64;
2482+
const int32_t x_crop = 72;
2483+
const int32_t y_crop = 2;
2484+
const int32_t z_crop = 1;
2485+
2486+
const int32_t z0_off = 0;
2487+
const int32_t z1_off = 1;
2488+
const int32_t z2_off = 2;
2489+
2490+
auto input = engine.allocate_memory({ data_types::f32,format::bfzyx,{ b_in, f_in, x_in, y_in, z_in } });
2491+
2492+
topology topology;
2493+
topology.add(input_layout("input", input->get_layout()));
2494+
topology.add(crop("crop0", input_info("input"), { b_crop, f_crop, x_crop, y_crop, z_crop }, { 0, 0, 0, 0, z0_off }));
2495+
topology.add(crop("crop1", input_info("input"), { b_crop, f_crop, x_crop, y_crop, z_crop }, { 0, 0, 0, 0, z1_off }));
2496+
topology.add(crop("crop2", input_info("input"), { b_crop, f_crop, x_crop, y_crop, z_crop }, { 0, 0, 0, 0, z2_off }));
2497+
topology.add(reorder("reorder0", input_info("crop0"), format::bfyx, data_types::f32));
2498+
topology.add(reorder("reorder1", input_info("crop1"), format::bfyx, data_types::f32));
2499+
topology.add(reorder("reorder2", input_info("crop2"), format::bfyx, data_types::f32));
2500+
topology.add(reshape("reshape0", input_info("reorder0"), tensor(batch(b_in), feature(y_in), spatial(x_in, f_in))));
2501+
topology.add(reshape("reshape1", input_info("reorder1"), tensor(batch(b_in), feature(y_in), spatial(x_in, f_in))));
2502+
topology.add(reshape("reshape2", input_info("reorder2"), tensor(batch(b_in), feature(y_in), spatial(x_in, f_in))));
2503+
2504+
std::vector<float> input_vec = rg.generate_random_1d<float>(input->count(), -10, 10);
2505+
set_values(input, input_vec);
2506+
2507+
ExecutionConfig config = get_test_default_config(engine);
2508+
config.set_property(ov::intel_gpu::optimize_data(true));
2509+
network network(engine, topology, config);
2510+
2511+
network.set_input_data("input", input);
2512+
auto outputs = network.execute();
2513+
auto output0 = outputs.at("reshape0").get_memory();
2514+
auto output1 = outputs.at("reshape1").get_memory();
2515+
auto output2 = outputs.at("reshape2").get_memory();
2516+
2517+
cldnn::mem_lock<float> output_ptr0(output0, get_test_stream());
2518+
for (int b = 0; b < b_crop; ++b) {
2519+
for (int f = 0; f < f_crop; ++f) {
2520+
for (int z = 0; z < z_crop; ++z) {
2521+
for (int y = 0; y < y_crop; ++y) {
2522+
for (int x = 0; x < x_crop; ++x) {
2523+
int linear_id = x + x_in * (y + y_in * (z + z0_off + z_in * (f + f_in * b)));
2524+
int output_linear_id = x + x_crop * (y + y_crop * (z + z_crop * (f + f_crop * b)));
2525+
ASSERT_EQ(output_ptr0[output_linear_id], input_vec[linear_id]);
2526+
}
2527+
}
2528+
}
2529+
}
2530+
}
2531+
2532+
cldnn::mem_lock<float> output_ptr1(output1, get_test_stream());
2533+
for (int b = 0; b < b_crop; ++b) {
2534+
for (int f = 0; f < f_crop; ++f) {
2535+
for (int z = 0; z < z_crop; ++z) {
2536+
for (int y = 0; y < y_crop; ++y) {
2537+
for (int x = 0; x < x_crop; ++x) {
2538+
int linear_id = x + x_in * (y + y_in * (z + z1_off + z_in * (f + f_in * b)));
2539+
int output_linear_id = x + x_crop * (y + y_crop * (z + z_crop * (f + f_crop * b)));
2540+
ASSERT_EQ(output_ptr1[output_linear_id], input_vec[linear_id]);
2541+
}
2542+
}
2543+
}
2544+
}
2545+
}
2546+
2547+
cldnn::mem_lock<float> output_ptr2(output2, get_test_stream());
2548+
for (int b = 0; b < b_crop; ++b) {
2549+
for (int f = 0; f < f_crop; ++f) {
2550+
for (int z = 0; z < z_crop; ++z) {
2551+
for (int y = 0; y < y_crop; ++y) {
2552+
for (int x = 0; x < x_crop; ++x) {
2553+
int linear_id = x + x_in * (y + y_in * (z + z2_off + z_in * (f + f_in * b)));
2554+
int output_linear_id = x + x_crop * (y + y_crop * (z + z_crop * (f + f_crop * b)));
2555+
ASSERT_EQ(output_ptr2[output_linear_id], input_vec[linear_id]);
2556+
}
2557+
}
2558+
}
2559+
}
2560+
}
2561+
}
2562+
24702563
TEST(reorder_gpu_f32, b_fs_yx_fsv16_to_bfyx_opt_allowed)
24712564
{
24722565
auto& engine = get_test_engine();

0 commit comments

Comments
 (0)