@@ -787,3 +787,179 @@ TEST(loop_gpu, support_loop_w_dynamic_input_w_various_shapes) {
787
787
std::vector<float >(),
788
788
2 , 3 );
789
789
}
790
+
791
+ static void test_loop_gpu_wo_trip_count_update_primitive_id (ov::PartialShape body_input_layout,
792
+ std::vector<ov::PartialShape> whole_layouts,
793
+ std::vector<std::vector<float >> input_data_list,
794
+ std::vector<float > expected_output_data,
795
+ size_t axis,
796
+ size_t exit_value,
797
+ bool is_caching_test = false ) {
798
+ auto & engine = get_test_engine ();
799
+
800
+ auto b_input_layout = cldnn::layout{ body_input_layout, data_types::f32, format::bfyx };
801
+
802
+ ov::PartialShape sliced_input_shape = body_input_layout;
803
+ sliced_input_shape[axis] = 1 ;
804
+ auto sliced_input_layout = cldnn::layout{ sliced_input_shape, data_types::f32, format::bfyx };
805
+
806
+ auto const_layout = cldnn::layout{ {}, data_types::i64, format::bfyx };
807
+
808
+ auto e_initial_condition_mem = engine.allocate_memory (const_layout);
809
+ auto e_num_iteration_mem = engine.allocate_memory (const_layout);
810
+ auto b_exit_value_mem = engine.allocate_memory (const_layout);
811
+ auto b_index_inc_mem = engine.allocate_memory (const_layout);
812
+
813
+ // initialize input buffers
814
+ set_values (e_initial_condition_mem, {1 });
815
+ set_values (b_exit_value_mem, {exit_value});
816
+ set_values (b_index_inc_mem, {1 });
817
+ set_values (e_num_iteration_mem, {0 });
818
+
819
+ primitive_id body_current_iteration_id = " b_index" ;
820
+ primitive_id body_execution_condition_id = " b_cond_exit_value" ;
821
+
822
+ cldnn::topology body (
823
+ input_layout (body_current_iteration_id, const_layout),
824
+ input_layout (" b_add_data" , sliced_input_layout),
825
+ input_layout (" b_mul_data" , sliced_input_layout),
826
+ data (" b_exit_value" , b_exit_value_mem),
827
+ data (" b_index_inc" , b_index_inc_mem),
828
+ eltwise (" b_index_update" , input_info (body_current_iteration_id), input_info (" b_index_inc" ), eltwise_mode::sum),
829
+ reorder (" b_index_cast" , input_info (" b_index_update" ),
830
+ cldnn::format::any, data_types::f32, {}, cldnn::reorder_mean_mode::subtract, cldnn::padding (), true ),
831
+ eltwise (body_execution_condition_id, input_info (" b_index" ), input_info (" b_exit_value" ), eltwise_mode::lt),
832
+ eltwise (" b_add" , input_info (" b_add_data" ), input_info (" b_index_cast" ), eltwise_mode::sum),
833
+ eltwise (" b_mul" , input_info (" b_mul_data" ), input_info (" b_index_cast" ), eltwise_mode::prod));
834
+
835
+ primitive_id trip_count_id = " " ;
836
+ primitive_id actual_iteration_count_id = " actual_iteration_count" ;
837
+ primitive_id initial_mean = " initial_mean" ;
838
+
839
+ primitive_id initial_condition_id = " initial_condition" ;
840
+ primitive_id initial_condition_id_elt = " initial_condition_elt" ;
841
+ primitive_id initial_condition_id_reorder = " initial_condition_reorder" ;
842
+ primitive_id initial_condition_id_reorder2 = " initial_condition_reorder2" ;
843
+ int64_t num_iterations = -1 ;
844
+
845
+ std::vector<loop::io_primitive_map> input_primitive_maps {
846
+ loop::io_primitive_map (" input" , " b_add_data" , axis),
847
+ loop::io_primitive_map (" input" , " b_mul_data" , axis),
848
+ loop::io_primitive_map (actual_iteration_count_id, body_current_iteration_id) };
849
+ std::vector<loop::io_primitive_map> output_primitive_maps {
850
+ loop::io_primitive_map (cldnn::input_info (" loop" , 0 ), cldnn::input_info (" b_add" , 0 ), axis),
851
+ loop::io_primitive_map (cldnn::input_info (" loop" , 1 ), cldnn::input_info (" b_mul" , 0 ), axis) };
852
+ std::vector<loop::backedge_mapping> back_edges {
853
+ loop::backedge_mapping (" b_index_update" , body_current_iteration_id) };
854
+
855
+ auto body_program = build_program (engine, body, body_execution_condition_id, output_primitive_maps, back_edges, true );
856
+
857
+ auto const_shape = engine.allocate_memory ({ov::PartialShape{4 }, data_types::i32, format::bfyx});
858
+
859
+
860
+ std::vector<int32_t > body_input_layouts;
861
+ for (size_t i = 0 ; i < body_input_layout.size (); i++) {
862
+ if (body_input_layout[i].is_dynamic ())
863
+ body_input_layouts.push_back (-1 );
864
+ else
865
+ body_input_layouts.push_back (body_input_layout[i].get_length ());
866
+ }
867
+ set_values<int32_t >(const_shape, body_input_layouts);
868
+ const std::vector<float > values_to_subtract = {0 .f };
869
+
870
+ cldnn::topology topology (
871
+ input_layout (" input_origin" , b_input_layout),
872
+ input_layout (initial_condition_id, e_initial_condition_mem->get_layout ()),
873
+ mutable_data (actual_iteration_count_id, e_num_iteration_mem),
874
+
875
+ reorder (initial_condition_id_reorder, input_info (initial_condition_id), cldnn::format::any, data_types::f32, values_to_subtract),
876
+ reorder (initial_condition_id_reorder2, input_info (initial_condition_id_reorder), cldnn::format::any, data_types::i32), // should be fused to test updating input id of loop
877
+
878
+ shape_of (" shape_of_input" , input_info (" input_origin" ), data_types::i32),
879
+ reduce (" reduced_shape" , input_info (" shape_of_input" ), reduce_mode::prod, {0 }, true ),
880
+ reshape (" reshape1" , input_info (" input_origin" ), input_info (" reduced_shape" ), false , ov::PartialShape::dynamic (1 )),
881
+ data (" const" , const_shape),
882
+ reshape (" input" , input_info (" reshape1" ), input_info (" const" ), false , ov::PartialShape::dynamic (4 )),
883
+
884
+ loop (" loop" , { input_info (actual_iteration_count_id), input_info (initial_condition_id_reorder2), input_info (" input" ) }, body_program,
885
+ trip_count_id, initial_condition_id_reorder2, actual_iteration_count_id,
886
+ input_primitive_maps, output_primitive_maps, back_edges,
887
+ num_iterations, body_current_iteration_id, body_execution_condition_id, 2 ),
888
+ eltwise (" out_sum" , input_info (" loop" , 0 ), input_info (" loop" , 1 ), eltwise_mode::sum));
889
+
890
+ ExecutionConfig config = get_test_default_config (engine);
891
+ config.set_property (ov::intel_gpu::allow_new_shape_infer (true ));
892
+
893
+ cldnn::network::ptr network = get_network (engine, topology, config, get_test_stream_ptr (), is_caching_test);
894
+
895
+ for (size_t i = 0 ; i < whole_layouts.size (); i++) {
896
+ auto whole_layout = whole_layouts[i];
897
+ auto input_data = input_data_list[i];
898
+
899
+ // initialize input buffers
900
+ set_values (e_initial_condition_mem, {1 });
901
+ set_values (b_exit_value_mem, {exit_value});
902
+ set_values (b_index_inc_mem, {1 });
903
+ set_values (e_num_iteration_mem, {0 });
904
+
905
+ auto e_input_layout = cldnn::layout{ whole_layout, data_types::f32, format::bfyx };
906
+ auto e_input_mem = engine.allocate_memory (e_input_layout); // b,f,x,y
907
+ auto expected_output_layout = whole_layout;
908
+ set_values (e_input_mem, input_data);
909
+ network->set_input_data (" input_origin" , e_input_mem);
910
+
911
+ network->set_input_data (initial_condition_id, e_initial_condition_mem);
912
+
913
+ auto outputs = network->execute ();
914
+ ASSERT_EQ (outputs.size (), 1 );
915
+
916
+ auto expected_num_iterations = (exit_value + 1 );
917
+ expected_output_layout[axis] = expected_num_iterations;
918
+ auto e_output_layout = cldnn::layout{ expected_output_layout, data_types::f32, format::bfyx };
919
+
920
+ auto num_iter_mem = network->get_output_memory (actual_iteration_count_id);
921
+ if (num_iter_mem != nullptr ) {
922
+ mem_lock<int64_t > num_iter_ptr{ num_iter_mem, get_test_stream () };
923
+ ASSERT_EQ (num_iter_ptr.data ()[0 ], expected_num_iterations);
924
+ }
925
+
926
+ std::vector<float > expected (input_data.size ());
927
+ if (expected_output_data.size () == 0 ) {
928
+ size_t unit = 1 ;
929
+ for (size_t k = axis; k < whole_layout.size (); k++) {
930
+ unit *= whole_layout[k].get_length ();
931
+ }
932
+
933
+ for (size_t j = 0 ; j < input_data.size (); j++) {
934
+ auto val = static_cast <size_t >((j % unit) / 4 ) + 1 ;
935
+ expected[j] = static_cast <float >(input_data[j] + val) + static_cast <float >(input_data[j] * val);
936
+ }
937
+ } else {
938
+ expected = expected_output_data;
939
+ }
940
+
941
+ auto output_mem = outputs.begin ()->second .get_memory ();
942
+ auto output_layout = output_mem->get_layout ();
943
+ ASSERT_EQ (output_layout.batch (), e_output_layout.batch ());
944
+ ASSERT_EQ (output_layout.feature (), e_output_layout.feature ());
945
+ ASSERT_EQ (output_layout.spatial (0 ), e_output_layout.spatial (0 ));
946
+ ASSERT_EQ (output_layout.spatial (1 ), e_output_layout.spatial (1 ));
947
+ // value check
948
+ {
949
+ mem_lock<float > output_ptr{ output_mem, get_test_stream () };
950
+ for (size_t i = 0 , iend = output_layout.count (); i < iend; ++i) {
951
+ ASSERT_FLOAT_EQ (output_ptr[i], expected.at (i));
952
+ }
953
+ }
954
+ }
955
+ }
956
+
957
+
958
+ TEST (loop_gpu, support_loop_w_dynamic_input_update_primitive_id) {
959
+ test_loop_gpu_wo_trip_count_update_primitive_id (
960
+ { 1 , -1 , 4 , 4 },
961
+ {{ 1 , 1 , 4 , 4 }}, // axis value should be iter_num = (exit_value + 1)
962
+ {input_data_4_4, input_data_2_4_4},
963
+ std::vector<float >(),
964
+ 2 , 3 );
965
+ }
0 commit comments