|
7 | 7 | #include <intel_gpu/runtime/memory.hpp>
|
8 | 8 | #include <intel_gpu/runtime/engine.hpp>
|
9 | 9 | #include <intel_gpu/graph/network.hpp>
|
| 10 | +#include "intel_gpu/plugin/common_utils.hpp" |
10 | 11 | #include <intel_gpu/primitives/input_layout.hpp>
|
11 | 12 | #include "intel_gpu/primitives/eltwise.hpp"
|
12 | 13 | #include <intel_gpu/primitives/data.hpp>
|
@@ -1212,3 +1213,108 @@ TEST(loop_gpu, support_loop_w_dynamic_input_update_primitive_id) {
|
1212 | 1213 | std::vector<float>(),
|
1213 | 1214 | 2, 3);
|
1214 | 1215 | }
|
| 1216 | + |
| 1217 | +template <typename T> |
| 1218 | +void test_loop_gpu_zero_bytes_layout(bool is_caching_test) |
| 1219 | +{ |
| 1220 | + auto& engine = get_test_engine(); |
| 1221 | + |
| 1222 | + // shape for zero bytes layout |
| 1223 | + auto trip_count_mem = ov::intel_gpu::allocate_memory_evenif_zero_bytes(engine, { cldnn::layout{ ov::PartialShape({0}), data_types::i32, format::bfyx } }); |
| 1224 | + |
| 1225 | + auto input_mem = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 4, 5 } }); |
| 1226 | + auto operand_mem = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 4, 5 } }); |
| 1227 | + auto initial_condition_mem = engine.allocate_memory({ data_types::i32, format::bfyx, { 1, 1, 1, 1 } }); |
| 1228 | + auto num_iteration_mem = engine.allocate_memory({ data_types::i32, format::bfyx, { 1, 1, 1, 1 } }); |
| 1229 | + |
| 1230 | + std::vector<T> input_data{ |
| 1231 | + 1.0f, 2.0f, -15.f, 3.0f, 4.0f, -15.f, 5.0f, 6.0f, -15.f, 7.0f, |
| 1232 | + -15.f, 0.0f, 0.0f, -15.f, 0.5f, -0.5f, -15.f, 8.0f, 1.5f, 5.2f |
| 1233 | + }; |
| 1234 | + std::vector<T> eltwise_operand { |
| 1235 | + 1.f, -2.f, 3.f, -4.f, 3.0f, -2.0f, 1.f, -2.f, 3.0f, -4.0f, |
| 1236 | + 3.f, -2.f, 1.f, -2.f, 3.5f, -4.5f, 5.f, -4.f, 3.5f, -2.2f |
| 1237 | + }; |
| 1238 | + int trip_count = 8; |
| 1239 | + int initial_condition = 1; |
| 1240 | + |
| 1241 | + // initialize input buffers |
| 1242 | + set_values(input_mem, input_data); |
| 1243 | + set_values(operand_mem, eltwise_operand); |
| 1244 | + set_values(trip_count_mem, { trip_count }); |
| 1245 | + set_values(initial_condition_mem, {initial_condition}); |
| 1246 | + |
| 1247 | + topology body( |
| 1248 | + input_layout("input", input_mem->get_layout()), |
| 1249 | + data("eltwise_operand", operand_mem), |
| 1250 | + eltwise("eltwise", input_info("input"), input_info("eltwise_operand"), eltwise_mode::sum) |
| 1251 | + ); |
| 1252 | + |
| 1253 | + std::vector<loop::io_primitive_map> input_primitive_maps { loop::io_primitive_map("input", "input") }; |
| 1254 | + std::vector<loop::io_primitive_map> output_primitive_maps { loop::io_primitive_map("loop", "eltwise") }; |
| 1255 | + std::vector<loop::backedge_mapping> back_edges { loop::backedge_mapping("eltwise", "input") }; |
| 1256 | + |
| 1257 | + auto body_program = build_program(engine, body, "", output_primitive_maps, back_edges); |
| 1258 | + |
| 1259 | + topology topology( |
| 1260 | + input_layout("input", input_mem->get_layout()), |
| 1261 | + input_layout("trip_count", trip_count_mem->get_layout()), |
| 1262 | + input_layout("initial_condition", initial_condition_mem->get_layout()), |
| 1263 | + mutable_data("num_iteration", num_iteration_mem), |
| 1264 | + loop("loop", { input_info("num_iteration"), input_info("trip_count"), input_info("initial_condition"), input_info("input") }, body_program, |
| 1265 | + "trip_count", "initial_condition", "num_iteration", |
| 1266 | + input_primitive_maps, output_primitive_maps, back_edges, 8) |
| 1267 | + ); |
| 1268 | + |
| 1269 | + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); |
| 1270 | + |
| 1271 | + network->set_input_data("input", input_mem); |
| 1272 | + network->set_input_data("trip_count", trip_count_mem); |
| 1273 | + network->set_input_data("initial_condition", initial_condition_mem); |
| 1274 | + |
| 1275 | + auto outputs = network->execute(); |
| 1276 | + ASSERT_EQ(outputs.size(), 1); |
| 1277 | + auto output = outputs.begin()->second.get_memory(); |
| 1278 | + auto output_layout = output->get_layout(); |
| 1279 | + |
| 1280 | + ASSERT_EQ(output_layout.batch(), 1); |
| 1281 | + ASSERT_EQ(output_layout.feature(), 1); |
| 1282 | + ASSERT_EQ(output_layout.spatial(0), 4); |
| 1283 | + ASSERT_EQ(output_layout.spatial(1), 5); |
| 1284 | + |
| 1285 | + // value check |
| 1286 | + { |
| 1287 | + mem_lock<T> output_ptr{ output, get_test_stream() }; |
| 1288 | + ASSERT_EQ(output_ptr.size(), input_data.size()); |
| 1289 | + for (size_t i = 0, iend = input_data.size(); i < iend; ++i) { |
| 1290 | + ASSERT_FLOAT_EQ(output_ptr[i], input_data[i] + eltwise_operand[i] * trip_count); |
| 1291 | + } |
| 1292 | + } |
| 1293 | + |
| 1294 | + // allocate new output memory |
| 1295 | + layout loop_l = network->get_output_memory("loop")->get_layout(); |
| 1296 | + auto output_mem = engine.allocate_memory(loop_l); |
| 1297 | + network->set_output_memory("loop", output_mem); |
| 1298 | + |
| 1299 | + //one more execute |
| 1300 | + set_values(input_mem, input_data); |
| 1301 | + set_values(operand_mem, eltwise_operand); |
| 1302 | + set_values(trip_count_mem, { trip_count }); |
| 1303 | + set_values(initial_condition_mem, { initial_condition }); |
| 1304 | + outputs = network->execute(); |
| 1305 | + |
| 1306 | + // check everything once again |
| 1307 | + ASSERT_EQ(outputs.size(), 1); |
| 1308 | + auto output2 = outputs.begin()->second.get_memory(); |
| 1309 | + { |
| 1310 | + mem_lock<T> output_ptr2{ output2, get_test_stream() }; |
| 1311 | + ASSERT_EQ(output_ptr2.size(), input_data.size()); |
| 1312 | + for (size_t i = 0, iend = input_data.size(); i < iend; ++i) { |
| 1313 | + ASSERT_FLOAT_EQ(output_ptr2[i], input_data[i] + eltwise_operand[i] * trip_count); |
| 1314 | + } |
| 1315 | + } |
| 1316 | +} |
| 1317 | + |
| 1318 | +TEST(loop_gpu, zero_bytes_layout) { |
| 1319 | + test_loop_gpu_zero_bytes_layout<float>(false); |
| 1320 | +} |
0 commit comments