@@ -601,3 +601,165 @@ TEST(loop_gpu, support_dynamic_tensoriterator_outer_axis) {
601
601
602
602
test_loop_gpu_wo_trip_count ({ 2 , 1 , 1 , 2 }, { 2 , 5 , 1 , 2 }, input_data_5_4, output_data_5_4, 1 , 4 );
603
603
}
604
+
605
+ static void test_loop_gpu_wo_trip_count_w_multiple_shapes (ov::PartialShape body_input_layout,
606
+ std::vector<ov::PartialShape> whole_layouts,
607
+ std::vector<std::vector<float >> input_data_list,
608
+ std::vector<float > expected_output_data,
609
+ size_t axis,
610
+ size_t exit_value,
611
+ bool is_caching_test = false ) {
612
+ auto & engine = get_test_engine ();
613
+
614
+ auto b_input_layout = cldnn::layout{ body_input_layout, data_types::f32, format::bfyx };
615
+ auto const_layout = cldnn::layout{ {}, data_types::i64, format::bfyx };
616
+
617
+ auto e_initial_condition_mem = engine.allocate_memory (const_layout);
618
+ auto e_num_iteration_mem = engine.allocate_memory (const_layout);
619
+ auto b_exit_value_mem = engine.allocate_memory (const_layout);
620
+ auto b_index_inc_mem = engine.allocate_memory (const_layout);
621
+
622
+ // initialize input buffers
623
+ set_values (e_initial_condition_mem, {1 });
624
+ set_values (b_exit_value_mem, {exit_value});
625
+ set_values (b_index_inc_mem, {1 });
626
+ set_values (e_num_iteration_mem, {0 });
627
+
628
+ primitive_id body_current_iteration_id = " b_index" ;
629
+ primitive_id body_execution_condition_id = " b_cond_exit_value" ;
630
+
631
+ cldnn::topology body (
632
+ input_layout (body_current_iteration_id, const_layout),
633
+ input_layout (" b_add_data" , b_input_layout),
634
+ input_layout (" b_mul_data" , b_input_layout),
635
+ data (" b_exit_value" , b_exit_value_mem),
636
+ data (" b_index_inc" , b_index_inc_mem),
637
+ eltwise (" b_index_update" , input_info (body_current_iteration_id), input_info (" b_index_inc" ), eltwise_mode::sum),
638
+ reorder (" b_index_cast" , input_info (" b_index_update" ),
639
+ cldnn::format::any, data_types::f32, {}, cldnn::reorder_mean_mode::subtract, cldnn::padding (), true ),
640
+ eltwise (body_execution_condition_id, input_info (" b_index" ), input_info (" b_exit_value" ), eltwise_mode::lt),
641
+ eltwise (" b_add" , input_info (" b_add_data" ), input_info (" b_index_cast" ), eltwise_mode::sum),
642
+ eltwise (" b_mul" , input_info (" b_mul_data" ), input_info (" b_index_cast" ), eltwise_mode::prod));
643
+
644
+ primitive_id trip_count_id = " " ;
645
+ primitive_id actual_iteration_count_id = " actual_iteration_count" ;
646
+ primitive_id initial_condition_id = " initial_condition" ;
647
+ int64_t num_iterations = -1 ;
648
+
649
+ std::vector<loop::io_primitive_map> input_primitive_maps {
650
+ loop::io_primitive_map (" input" , " b_add_data" , axis),
651
+ loop::io_primitive_map (" input" , " b_mul_data" , axis),
652
+ loop::io_primitive_map (actual_iteration_count_id, body_current_iteration_id) };
653
+ std::vector<loop::io_primitive_map> output_primitive_maps {
654
+ loop::io_primitive_map (cldnn::input_info (" loop" , 0 ), cldnn::input_info (" b_add" , 0 ), axis),
655
+ loop::io_primitive_map (cldnn::input_info (" loop" , 1 ), cldnn::input_info (" b_mul" , 0 ), axis) };
656
+ std::vector<loop::backedge_mapping> back_edges {
657
+ loop::backedge_mapping (" b_index_update" , body_current_iteration_id) };
658
+
659
+ auto body_program = build_program (engine, body, body_execution_condition_id, output_primitive_maps, back_edges, true );
660
+
661
+ cldnn::topology topology (
662
+ input_layout (" input" , b_input_layout),
663
+ input_layout (initial_condition_id, e_initial_condition_mem->get_layout ()),
664
+ mutable_data (actual_iteration_count_id, e_num_iteration_mem),
665
+ loop (" loop" , { input_info (actual_iteration_count_id), input_info (initial_condition_id), input_info (" input" ) }, body_program,
666
+ trip_count_id, initial_condition_id, actual_iteration_count_id,
667
+ input_primitive_maps, output_primitive_maps, back_edges,
668
+ num_iterations, body_current_iteration_id, body_execution_condition_id, 2 ),
669
+ eltwise (" out_sum" , input_info (" loop" , 0 ), input_info (" loop" , 1 ), eltwise_mode::sum));
670
+
671
+ ExecutionConfig config = get_test_default_config (engine);
672
+ config.set_property (ov::intel_gpu::allow_new_shape_infer (true ));
673
+
674
+ cldnn::network::ptr network = get_network (engine, topology, config, get_test_stream_ptr (), is_caching_test);
675
+
676
+
677
+ for (size_t i = 0 ; i < whole_layouts.size (); i++) {
678
+ auto whole_layout = whole_layouts[i];
679
+ auto input_data = input_data_list[i];
680
+
681
+ // initialize input buffers
682
+ set_values (e_initial_condition_mem, {1 });
683
+ set_values (b_exit_value_mem, {exit_value});
684
+ set_values (b_index_inc_mem, {1 });
685
+ set_values (e_num_iteration_mem, {0 });
686
+
687
+ auto e_input_layout = cldnn::layout{ whole_layout, data_types::f32, format::bfyx };
688
+ auto e_input_mem = engine.allocate_memory (e_input_layout); // b,f,x,y
689
+ auto expected_output_layout = whole_layout;
690
+ set_values (e_input_mem, input_data);
691
+ network->set_input_data (" input" , e_input_mem);
692
+
693
+ network->set_input_data (initial_condition_id, e_initial_condition_mem);
694
+
695
+ auto outputs = network->execute ();
696
+ ASSERT_EQ (outputs.size (), 1 );
697
+
698
+ auto expected_num_iterations = (exit_value + 1 );
699
+ expected_output_layout[axis] = expected_num_iterations;
700
+ auto e_output_layout = cldnn::layout{ expected_output_layout, data_types::f32, format::bfyx };
701
+
702
+ auto num_iter_mem = network->get_output_memory (actual_iteration_count_id);
703
+ if (num_iter_mem != nullptr ) {
704
+ mem_lock<int64_t > num_iter_ptr{ num_iter_mem, get_test_stream () };
705
+ ASSERT_EQ (num_iter_ptr.data ()[0 ], expected_num_iterations);
706
+ }
707
+
708
+ std::vector<float > expected (input_data.size ());
709
+ if (expected_output_data.size () == 0 ) {
710
+ size_t unit = 1 ;
711
+ for (size_t k = axis; k < whole_layout.size (); k++) {
712
+ unit *= whole_layout[k].get_length ();
713
+ }
714
+
715
+ for (size_t j = 0 ; j < input_data.size (); j++) {
716
+ auto val = static_cast <size_t >((j % unit) / 4 ) + 1 ;
717
+ expected[j] = static_cast <float >(input_data[j] + val) + static_cast <float >(input_data[j] * val);
718
+ }
719
+ } else {
720
+ expected = expected_output_data;
721
+ }
722
+
723
+ auto output_mem = outputs.begin ()->second .get_memory ();
724
+ auto output_layout = output_mem->get_layout ();
725
+ ASSERT_EQ (output_layout.batch (), e_output_layout.batch ());
726
+ ASSERT_EQ (output_layout.feature (), e_output_layout.feature ());
727
+ ASSERT_EQ (output_layout.spatial (0 ), e_output_layout.spatial (0 ));
728
+ ASSERT_EQ (output_layout.spatial (1 ), e_output_layout.spatial (1 ));
729
+ // value check
730
+ {
731
+ mem_lock<float > output_ptr{ output_mem, get_test_stream () };
732
+ for (size_t i = 0 , iend = output_layout.count (); i < iend; ++i) {
733
+ ASSERT_FLOAT_EQ (output_ptr[i], expected.at (i));
734
+ }
735
+ }
736
+ }
737
+ }
738
+
739
+ std::vector<float > input_data_4_4{
740
+ 1 .0f , 2 .0f , -15 .f , 3 .0f ,
741
+ 4 .0f , -15 .f , 5 .0f , 6 .0f ,
742
+ -15 .f , 7 .0f , -15 .f , 0 .0f ,
743
+ 0 .0f , -15 .f , 0 .5f , -0 .5f ,
744
+ };
745
+
746
+ std::vector<float > input_data_2_4_4{
747
+ 1 .0f , 2 .0f , -15 .f , 3 .0f ,
748
+ 4 .0f , -15 .f , 5 .0f , 6 .0f ,
749
+ -15 .f , 7 .0f , -15 .f , 0 .0f ,
750
+ 0 .0f , -15 .f , 0 .5f , -0 .5f ,
751
+
752
+ 1 .0f , 2 .0f , -15 .f , 3 .0f ,
753
+ 4 .0f , -15 .f , 5 .0f , 6 .0f ,
754
+ -15 .f , 7 .0f , -15 .f , 0 .0f ,
755
+ 0 .0f , -15 .f , 0 .5f , -0 .5f ,
756
+ };
757
+
758
+ TEST (loop_gpu, support_loop_w_dynamic_input_w_various_shapes) {
759
+ test_loop_gpu_wo_trip_count_w_multiple_shapes (
760
+ { 1 , -1 , 1 , 4 },
761
+ {{ 1 , 1 , 4 , 4 }, { 1 , 2 , 4 , 4 }}, // axis value should be iter_num = (exit_value + 1)
762
+ {input_data_4_4, input_data_2_4_4},
763
+ std::vector<float >(),
764
+ 2 , 3 );
765
+ }
0 commit comments