@@ -787,3 +787,182 @@ TEST(loop_gpu, support_loop_w_dynamic_input_w_various_shapes) {
787
787
std::vector<float >(),
788
788
2 , 3 );
789
789
}
790
+
791
+ static void test_loop_gpu_wo_trip_count_update_primitive_id (ov::PartialShape body_input_layout,
792
+ std::vector<ov::PartialShape> whole_layouts,
793
+ std::vector<std::vector<float >> input_data_list,
794
+ std::vector<float > expected_output_data,
795
+ size_t axis,
796
+ size_t exit_value,
797
+ bool is_caching_test = false ) {
798
+ auto & engine = get_test_engine ();
799
+
800
+ auto b_input_layout = cldnn::layout{ body_input_layout, data_types::f32, format::bfyx };
801
+
802
+ ov::PartialShape sliced_input_shape = body_input_layout;
803
+ sliced_input_shape[axis] = 1 ;
804
+ auto sliced_input_layout = cldnn::layout{ sliced_input_shape, data_types::f32, format::bfyx };
805
+
806
+ auto const_layout = cldnn::layout{ {}, data_types::i64, format::bfyx };
807
+
808
+ auto e_initial_condition_mem = engine.allocate_memory (const_layout);
809
+ auto e_num_iteration_mem = engine.allocate_memory (const_layout);
810
+ auto b_exit_value_mem = engine.allocate_memory (const_layout);
811
+ auto b_index_inc_mem = engine.allocate_memory (const_layout);
812
+ auto init_mean = engine.allocate_memory (const_layout);
813
+
814
+ // initialize input buffers
815
+ set_values (e_initial_condition_mem, {1 });
816
+ set_values (b_exit_value_mem, {exit_value});
817
+ set_values (b_index_inc_mem, {1 });
818
+ set_values (e_num_iteration_mem, {0 });
819
+ set_values (init_mean, {1 });
820
+
821
+ primitive_id body_current_iteration_id = " b_index" ;
822
+ primitive_id body_execution_condition_id = " b_cond_exit_value" ;
823
+
824
+ cldnn::topology body (
825
+ input_layout (body_current_iteration_id, const_layout),
826
+ input_layout (" b_add_data" , sliced_input_layout),
827
+ input_layout (" b_mul_data" , sliced_input_layout),
828
+ data (" b_exit_value" , b_exit_value_mem),
829
+ data (" b_index_inc" , b_index_inc_mem),
830
+ eltwise (" b_index_update" , input_info (body_current_iteration_id), input_info (" b_index_inc" ), eltwise_mode::sum),
831
+ reorder (" b_index_cast" , input_info (" b_index_update" ),
832
+ cldnn::format::any, data_types::f32, {}, cldnn::reorder_mean_mode::subtract, cldnn::padding (), true ),
833
+ eltwise (body_execution_condition_id, input_info (" b_index" ), input_info (" b_exit_value" ), eltwise_mode::lt),
834
+ eltwise (" b_add" , input_info (" b_add_data" ), input_info (" b_index_cast" ), eltwise_mode::sum),
835
+ eltwise (" b_mul" , input_info (" b_mul_data" ), input_info (" b_index_cast" ), eltwise_mode::prod));
836
+
837
+ primitive_id trip_count_id = " " ;
838
+ primitive_id actual_iteration_count_id = " actual_iteration_count" ;
839
+ primitive_id initial_mean = " initial_mean" ;
840
+
841
+ primitive_id initial_condition_id = " initial_condition" ;
842
+ primitive_id initial_condition_id_elt = " initial_condition_elt" ;
843
+ primitive_id initial_condition_id_reorder = " initial_condition_reorder" ;
844
+ primitive_id initial_condition_id_reorder2 = " initial_condition_reorder2" ;
845
+ int64_t num_iterations = -1 ;
846
+
847
+ std::vector<loop::io_primitive_map> input_primitive_maps {
848
+ loop::io_primitive_map (" input" , " b_add_data" , axis),
849
+ loop::io_primitive_map (" input" , " b_mul_data" , axis),
850
+ loop::io_primitive_map (actual_iteration_count_id, body_current_iteration_id) };
851
+ std::vector<loop::io_primitive_map> output_primitive_maps {
852
+ loop::io_primitive_map (cldnn::input_info (" loop" , 0 ), cldnn::input_info (" b_add" , 0 ), axis),
853
+ loop::io_primitive_map (cldnn::input_info (" loop" , 1 ), cldnn::input_info (" b_mul" , 0 ), axis) };
854
+ std::vector<loop::backedge_mapping> back_edges {
855
+ loop::backedge_mapping (" b_index_update" , body_current_iteration_id) };
856
+
857
+ auto body_program = build_program (engine, body, body_execution_condition_id, output_primitive_maps, back_edges, true );
858
+
859
+ auto const_shape = engine.allocate_memory ({ov::PartialShape{4 }, data_types::i32, format::bfyx});
860
+
861
+
862
+ std::vector<int32_t > body_input_layouts;
863
+ for (size_t i = 0 ; i < body_input_layout.size (); i++) {
864
+ if (body_input_layout[i].is_dynamic ())
865
+ body_input_layouts.push_back (-1 );
866
+ else
867
+ body_input_layouts.push_back (body_input_layout[i].get_length ());
868
+ }
869
+ set_values<int32_t >(const_shape, body_input_layouts);
870
+
871
+
872
+ cldnn::topology topology (
873
+ input_layout (" input_origin" , b_input_layout),
874
+ input_layout (initial_condition_id, e_initial_condition_mem->get_layout ()),
875
+ mutable_data (actual_iteration_count_id, e_num_iteration_mem),
876
+ data (initial_mean, init_mean),
877
+
878
+ reorder (initial_condition_id_reorder, input_info (initial_condition_id), cldnn::format::any, data_types::f32/* , initial_mean*/ ),
879
+ reorder (initial_condition_id_reorder2, input_info (initial_condition_id_reorder), cldnn::format::any, data_types::i32), // should be fused to test updating input id of loop
880
+
881
+ shape_of (" shape_of_input" , input_info (" input_origin" ), data_types::i32),
882
+ reduce (" reduced_shape" , input_info (" shape_of_input" ), reduce_mode::prod, {0 }, true ),
883
+ reshape (" reshape1" , input_info (" input_origin" ), input_info (" reduced_shape" ), false , ov::PartialShape::dynamic (1 )),
884
+ data (" const" , const_shape),
885
+ reshape (" input" , input_info (" reshape1" ), input_info (" const" ), false , ov::PartialShape::dynamic (4 )),
886
+
887
+ loop (" loop" , { input_info (actual_iteration_count_id), input_info (initial_condition_id_reorder2), input_info (" input" ) }, body_program,
888
+ trip_count_id, initial_condition_id_reorder2, actual_iteration_count_id,
889
+ input_primitive_maps, output_primitive_maps, back_edges,
890
+ num_iterations, body_current_iteration_id, body_execution_condition_id, 2 ),
891
+ eltwise (" out_sum" , input_info (" loop" , 0 ), input_info (" loop" , 1 ), eltwise_mode::sum));
892
+
893
+ ExecutionConfig config = get_test_default_config (engine);
894
+ config.set_property (ov::intel_gpu::allow_new_shape_infer (true ));
895
+
896
+ cldnn::network::ptr network = get_network (engine, topology, config, get_test_stream_ptr (), is_caching_test);
897
+
898
+ for (size_t i = 0 ; i < whole_layouts.size (); i++) {
899
+ auto whole_layout = whole_layouts[i];
900
+ auto input_data = input_data_list[i];
901
+
902
+ // initialize input buffers
903
+ set_values (e_initial_condition_mem, {1 });
904
+ set_values (b_exit_value_mem, {exit_value});
905
+ set_values (b_index_inc_mem, {1 });
906
+ set_values (e_num_iteration_mem, {0 });
907
+
908
+ auto e_input_layout = cldnn::layout{ whole_layout, data_types::f32, format::bfyx };
909
+ auto e_input_mem = engine.allocate_memory (e_input_layout); // b,f,x,y
910
+ auto expected_output_layout = whole_layout;
911
+ set_values (e_input_mem, input_data);
912
+ network->set_input_data (" input_origin" , e_input_mem);
913
+
914
+ network->set_input_data (initial_condition_id, e_initial_condition_mem);
915
+
916
+ auto outputs = network->execute ();
917
+ ASSERT_EQ (outputs.size (), 2 );
918
+
919
+ auto expected_num_iterations = (exit_value + 1 );
920
+ expected_output_layout[axis] = expected_num_iterations;
921
+ auto e_output_layout = cldnn::layout{ expected_output_layout, data_types::f32, format::bfyx };
922
+
923
+ auto num_iter_mem = network->get_output_memory (actual_iteration_count_id);
924
+ if (num_iter_mem != nullptr ) {
925
+ mem_lock<int64_t > num_iter_ptr{ num_iter_mem, get_test_stream () };
926
+ ASSERT_EQ (num_iter_ptr.data ()[0 ], expected_num_iterations);
927
+ }
928
+
929
+ std::vector<float > expected (input_data.size ());
930
+ if (expected_output_data.size () == 0 ) {
931
+ size_t unit = 1 ;
932
+ for (size_t k = axis; k < whole_layout.size (); k++) {
933
+ unit *= whole_layout[k].get_length ();
934
+ }
935
+
936
+ for (size_t j = 0 ; j < input_data.size (); j++) {
937
+ auto val = static_cast <size_t >((j % unit) / 4 ) + 1 ;
938
+ expected[j] = static_cast <float >(input_data[j] + val) + static_cast <float >(input_data[j] * val);
939
+ }
940
+ } else {
941
+ expected = expected_output_data;
942
+ }
943
+
944
+ auto output_mem = outputs.begin ()->second .get_memory ();
945
+ auto output_layout = output_mem->get_layout ();
946
+ ASSERT_EQ (output_layout.batch (), e_output_layout.batch ());
947
+ ASSERT_EQ (output_layout.feature (), e_output_layout.feature ());
948
+ ASSERT_EQ (output_layout.spatial (0 ), e_output_layout.spatial (0 ));
949
+ ASSERT_EQ (output_layout.spatial (1 ), e_output_layout.spatial (1 ));
950
+ // value check
951
+ {
952
+ mem_lock<float > output_ptr{ output_mem, get_test_stream () };
953
+ for (size_t i = 0 , iend = output_layout.count (); i < iend; ++i) {
954
+ ASSERT_FLOAT_EQ (output_ptr[i], expected.at (i));
955
+ }
956
+ }
957
+ }
958
+ }
959
+
960
+
961
+ TEST (loop_gpu, support_loop_w_dynamic_input_update_primitive_id) {
962
+ test_loop_gpu_wo_trip_count_update_primitive_id (
963
+ { 1 , -1 , 4 , 4 },
964
+ {{ 1 , 1 , 4 , 4 }}, // axis value should be iter_num = (exit_value + 1)
965
+ {input_data_4_4, input_data_2_4_4},
966
+ std::vector<float >(),
967
+ 2 , 3 );
968
+ }
0 commit comments