@@ -1212,3 +1212,108 @@ TEST(loop_gpu, support_loop_w_dynamic_input_update_primitive_id) {
1212
1212
std::vector<float >(),
1213
1213
2 , 3 );
1214
1214
}
1215
+
1216
+ template <typename T>
1217
+ void test_loop_gpu_zero_bytes_layout (bool is_caching_test)
1218
+ {
1219
+ auto & engine = get_test_engine ();
1220
+
1221
+ // shape for zero bytes layout
1222
+ auto trip_count_mem = engine.allocate_memory ({ cldnn::layout{ ov::PartialShape ({0 }), data_types::i32, format::bfyx } });
1223
+
1224
+ auto input_mem = engine.allocate_memory ({ data_types::f32, format::bfyx, { 1 , 1 , 4 , 5 } });
1225
+ auto operand_mem = engine.allocate_memory ({ data_types::f32, format::bfyx, { 1 , 1 , 4 , 5 } });
1226
+ auto initial_condition_mem = engine.allocate_memory ({ data_types::i32, format::bfyx, { 1 , 1 , 1 , 1 } });
1227
+ auto num_iteration_mem = engine.allocate_memory ({ data_types::i32, format::bfyx, { 1 , 1 , 1 , 1 } });
1228
+
1229
+ std::vector<T> input_data{
1230
+ 1 .0f , 2 .0f , -15 .f , 3 .0f , 4 .0f , -15 .f , 5 .0f , 6 .0f , -15 .f , 7 .0f ,
1231
+ -15 .f , 0 .0f , 0 .0f , -15 .f , 0 .5f , -0 .5f , -15 .f , 8 .0f , 1 .5f , 5 .2f
1232
+ };
1233
+ std::vector<T> eltwise_operand {
1234
+ 1 .f , -2 .f , 3 .f , -4 .f , 3 .0f , -2 .0f , 1 .f , -2 .f , 3 .0f , -4 .0f ,
1235
+ 3 .f , -2 .f , 1 .f , -2 .f , 3 .5f , -4 .5f , 5 .f , -4 .f , 3 .5f , -2 .2f
1236
+ };
1237
+ int trip_count = 8 ;
1238
+ int initial_condition = 1 ;
1239
+
1240
+ // initialize input buffers
1241
+ set_values (input_mem, input_data);
1242
+ set_values (operand_mem, eltwise_operand);
1243
+ set_values (trip_count_mem, { trip_count });
1244
+ set_values (initial_condition_mem, {initial_condition});
1245
+
1246
+ topology body (
1247
+ input_layout (" input" , input_mem->get_layout ()),
1248
+ data (" eltwise_operand" , operand_mem),
1249
+ eltwise (" eltwise" , input_info (" input" ), input_info (" eltwise_operand" ), eltwise_mode::sum)
1250
+ );
1251
+
1252
+ std::vector<loop::io_primitive_map> input_primitive_maps { loop::io_primitive_map (" input" , " input" ) };
1253
+ std::vector<loop::io_primitive_map> output_primitive_maps { loop::io_primitive_map (" loop" , " eltwise" ) };
1254
+ std::vector<loop::backedge_mapping> back_edges { loop::backedge_mapping (" eltwise" , " input" ) };
1255
+
1256
+ auto body_program = build_program (engine, body, " " , output_primitive_maps, back_edges);
1257
+
1258
+ topology topology (
1259
+ input_layout (" input" , input_mem->get_layout ()),
1260
+ input_layout (" trip_count" , trip_count_mem->get_layout ()),
1261
+ input_layout (" initial_condition" , initial_condition_mem->get_layout ()),
1262
+ mutable_data (" num_iteration" , num_iteration_mem),
1263
+ loop (" loop" , { input_info (" num_iteration" ), input_info (" trip_count" ), input_info (" initial_condition" ), input_info (" input" ) }, body_program,
1264
+ " trip_count" , " initial_condition" , " num_iteration" ,
1265
+ input_primitive_maps, output_primitive_maps, back_edges, 8 )
1266
+ );
1267
+
1268
+ cldnn::network::ptr network = get_network (engine, topology, get_test_default_config (engine), get_test_stream_ptr (), is_caching_test);
1269
+
1270
+ network->set_input_data (" input" , input_mem);
1271
+ network->set_input_data (" trip_count" , trip_count_mem);
1272
+ network->set_input_data (" initial_condition" , initial_condition_mem);
1273
+
1274
+ auto outputs = network->execute ();
1275
+ ASSERT_EQ (outputs.size (), 1 );
1276
+ auto output = outputs.begin ()->second .get_memory ();
1277
+ auto output_layout = output->get_layout ();
1278
+
1279
+ ASSERT_EQ (output_layout.batch (), 1 );
1280
+ ASSERT_EQ (output_layout.feature (), 1 );
1281
+ ASSERT_EQ (output_layout.spatial (0 ), 4 );
1282
+ ASSERT_EQ (output_layout.spatial (1 ), 5 );
1283
+
1284
+ // value check
1285
+ {
1286
+ mem_lock<T> output_ptr{ output, get_test_stream () };
1287
+ ASSERT_EQ (output_ptr.size (), input_data.size ());
1288
+ for (size_t i = 0 , iend = input_data.size (); i < iend; ++i) {
1289
+ ASSERT_FLOAT_EQ (output_ptr[i], input_data[i] + eltwise_operand[i] * trip_count);
1290
+ }
1291
+ }
1292
+
1293
+ // allocate new output memory
1294
+ layout loop_l = network->get_output_memory (" loop" )->get_layout ();
1295
+ auto output_mem = engine.allocate_memory (loop_l);
1296
+ network->set_output_memory (" loop" , output_mem);
1297
+
1298
+ // one more execute
1299
+ set_values (input_mem, input_data);
1300
+ set_values (operand_mem, eltwise_operand);
1301
+ set_values (trip_count_mem, { trip_count });
1302
+ set_values (initial_condition_mem, { initial_condition });
1303
+ outputs = network->execute ();
1304
+
1305
+ // check everything once again
1306
+ ASSERT_EQ (outputs.size (), 1 );
1307
+ auto output2 = outputs.begin ()->second .get_memory ();
1308
+ {
1309
+ mem_lock<T> output_ptr2{ output2, get_test_stream () };
1310
+ ASSERT_EQ (output_ptr2.size (), input_data.size ());
1311
+ for (size_t i = 0 , iend = input_data.size (); i < iend; ++i) {
1312
+ ASSERT_FLOAT_EQ (output_ptr2[i], input_data[i] + eltwise_operand[i] * trip_count);
1313
+ }
1314
+ }
1315
+ }
1316
+
1317
+ TEST (loop_gpu, zero_bytes_layout) {
1318
+ test_loop_gpu_zero_bytes_layout<float >(false );
1319
+ }
0 commit comments