|
11 | 11 | #include "intel_gpu/primitives/eltwise.hpp"
|
12 | 12 | #include <intel_gpu/primitives/data.hpp>
|
13 | 13 | #include <intel_gpu/primitives/loop.hpp>
|
| 14 | +#include <intel_gpu/primitives/reshape.hpp> |
| 15 | +#include <intel_gpu/primitives/reduce.hpp> |
| 16 | +#include <intel_gpu/primitives/shape_of.hpp> |
14 | 17 | #include <intel_gpu/primitives/mutable_data.hpp>
|
15 | 18 | #include <intel_gpu/primitives/data.hpp>
|
16 | 19 | #include <intel_gpu/graph/program.hpp>
|
@@ -601,3 +604,186 @@ TEST(loop_gpu, support_dynamic_tensoriterator_outer_axis) {
|
601 | 604 |
|
602 | 605 | test_loop_gpu_wo_trip_count({ 2, 1, 1, 2}, { 2, 5, 1, 2}, input_data_5_4, output_data_5_4, 1, 4);
|
603 | 606 | }
|
| 607 | + |
| 608 | +static void test_loop_gpu_wo_trip_count_w_multiple_shapes(ov::PartialShape body_input_layout, |
| 609 | + std::vector<ov::PartialShape> whole_layouts, |
| 610 | + std::vector<std::vector<float>> input_data_list, |
| 611 | + std::vector<float> expected_output_data, |
| 612 | + size_t axis, |
| 613 | + size_t exit_value, |
| 614 | + bool is_caching_test = false) { |
| 615 | + auto& engine = get_test_engine(); |
| 616 | + |
| 617 | + auto b_input_layout = cldnn::layout{ body_input_layout, data_types::f32, format::bfyx }; |
| 618 | + |
| 619 | + ov::PartialShape sliced_input_shape = body_input_layout; |
| 620 | + sliced_input_shape[axis] = 1; |
| 621 | + auto sliced_input_layout = cldnn::layout{ sliced_input_shape, data_types::f32, format::bfyx }; |
| 622 | + |
| 623 | + auto const_layout = cldnn::layout{ {}, data_types::i64, format::bfyx }; |
| 624 | + |
| 625 | + auto e_initial_condition_mem = engine.allocate_memory(const_layout); |
| 626 | + auto e_num_iteration_mem = engine.allocate_memory(const_layout); |
| 627 | + auto b_exit_value_mem = engine.allocate_memory(const_layout); |
| 628 | + auto b_index_inc_mem = engine.allocate_memory(const_layout); |
| 629 | + |
| 630 | + // initialize input buffers |
| 631 | + set_values(e_initial_condition_mem, {1}); |
| 632 | + set_values(b_exit_value_mem, {exit_value}); |
| 633 | + set_values(b_index_inc_mem, {1}); |
| 634 | + set_values(e_num_iteration_mem, {0}); |
| 635 | + |
| 636 | + primitive_id body_current_iteration_id = "b_index"; |
| 637 | + primitive_id body_execution_condition_id = "b_cond_exit_value"; |
| 638 | + |
| 639 | + cldnn::topology body( |
| 640 | + input_layout(body_current_iteration_id, const_layout), |
| 641 | + input_layout("b_add_data", sliced_input_layout), |
| 642 | + input_layout("b_mul_data", sliced_input_layout), |
| 643 | + data("b_exit_value", b_exit_value_mem), |
| 644 | + data("b_index_inc", b_index_inc_mem), |
| 645 | + eltwise("b_index_update", input_info(body_current_iteration_id), input_info("b_index_inc"), eltwise_mode::sum), |
| 646 | + reorder("b_index_cast", input_info("b_index_update"), |
| 647 | + cldnn::format::any, data_types::f32, {}, cldnn::reorder_mean_mode::subtract, cldnn::padding(), true), |
| 648 | + eltwise(body_execution_condition_id, input_info("b_index"), input_info("b_exit_value"), eltwise_mode::lt), |
| 649 | + eltwise("b_add", input_info("b_add_data"), input_info("b_index_cast"), eltwise_mode::sum), |
| 650 | + eltwise("b_mul", input_info("b_mul_data"), input_info("b_index_cast"), eltwise_mode::prod)); |
| 651 | + |
| 652 | + primitive_id trip_count_id = ""; |
| 653 | + primitive_id actual_iteration_count_id = "actual_iteration_count"; |
| 654 | + primitive_id initial_condition_id = "initial_condition"; |
| 655 | + int64_t num_iterations = -1; |
| 656 | + |
| 657 | + std::vector<loop::io_primitive_map> input_primitive_maps { |
| 658 | + loop::io_primitive_map("input", "b_add_data", axis), |
| 659 | + loop::io_primitive_map("input", "b_mul_data", axis), |
| 660 | + loop::io_primitive_map(actual_iteration_count_id, body_current_iteration_id) }; |
| 661 | + std::vector<loop::io_primitive_map> output_primitive_maps { |
| 662 | + loop::io_primitive_map(cldnn::input_info("loop", 0), cldnn::input_info("b_add", 0), axis), |
| 663 | + loop::io_primitive_map(cldnn::input_info("loop", 1), cldnn::input_info("b_mul", 0), axis) }; |
| 664 | + std::vector<loop::backedge_mapping> back_edges { |
| 665 | + loop::backedge_mapping("b_index_update", body_current_iteration_id) }; |
| 666 | + |
| 667 | + auto body_program = build_program(engine, body, body_execution_condition_id, output_primitive_maps, back_edges, true); |
| 668 | + |
| 669 | + auto const_shape = engine.allocate_memory({ov::PartialShape{4}, data_types::i32, format::bfyx}); |
| 670 | + std::vector<int32_t> body_input_layouts; |
| 671 | + for (size_t i = 0; i < body_input_layout.size(); i++) { |
| 672 | + if (body_input_layout[i].is_dynamic()) |
| 673 | + body_input_layouts.push_back(-1); |
| 674 | + else |
| 675 | + body_input_layouts.push_back(body_input_layout[i].get_length()); |
| 676 | + } |
| 677 | + set_values<int32_t>(const_shape, body_input_layouts); |
| 678 | + |
| 679 | + cldnn::topology topology( |
| 680 | + input_layout("input_origin", b_input_layout), |
| 681 | + input_layout(initial_condition_id, e_initial_condition_mem->get_layout()), |
| 682 | + mutable_data(actual_iteration_count_id, e_num_iteration_mem), |
| 683 | + |
| 684 | + shape_of("shape_of_input", input_info("input_origin"), data_types::i32), |
| 685 | + reduce("reduced_shape", input_info("shape_of_input"), reduce_mode::prod, {0}, true), |
| 686 | + reshape("reshape1", input_info("input_origin"), input_info("reduced_shape"), false, ov::PartialShape::dynamic(1)), |
| 687 | + data("const", const_shape), |
| 688 | + reshape("input", input_info("reshape1"), input_info("const"), false, ov::PartialShape::dynamic(4)), |
| 689 | + |
| 690 | + loop("loop", { input_info(actual_iteration_count_id), input_info(initial_condition_id), input_info("input") }, body_program, |
| 691 | + trip_count_id, initial_condition_id, actual_iteration_count_id, |
| 692 | + input_primitive_maps, output_primitive_maps, back_edges, |
| 693 | + num_iterations, body_current_iteration_id, body_execution_condition_id, 2), |
| 694 | + eltwise("out_sum", input_info("loop", 0), input_info("loop", 1), eltwise_mode::sum)); |
| 695 | + |
| 696 | + ExecutionConfig config = get_test_default_config(engine); |
| 697 | + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); |
| 698 | + |
| 699 | + cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); |
| 700 | + |
| 701 | + for (size_t i = 0 ; i < whole_layouts.size(); i++) { |
| 702 | + auto whole_layout = whole_layouts[i]; |
| 703 | + auto input_data = input_data_list[i]; |
| 704 | + |
| 705 | + // initialize input buffers |
| 706 | + set_values(e_initial_condition_mem, {1}); |
| 707 | + set_values(b_exit_value_mem, {exit_value}); |
| 708 | + set_values(b_index_inc_mem, {1}); |
| 709 | + set_values(e_num_iteration_mem, {0}); |
| 710 | + |
| 711 | + auto e_input_layout = cldnn::layout{ whole_layout, data_types::f32, format::bfyx }; |
| 712 | + auto e_input_mem = engine.allocate_memory(e_input_layout); // b,f,x,y |
| 713 | + auto expected_output_layout = whole_layout; |
| 714 | + set_values(e_input_mem, input_data); |
| 715 | + network->set_input_data("input_origin", e_input_mem); |
| 716 | + |
| 717 | + network->set_input_data(initial_condition_id, e_initial_condition_mem); |
| 718 | + |
| 719 | + auto outputs = network->execute(); |
| 720 | + ASSERT_EQ(outputs.size(), 1); |
| 721 | + |
| 722 | + auto expected_num_iterations = (exit_value + 1); |
| 723 | + expected_output_layout[axis] = expected_num_iterations; |
| 724 | + auto e_output_layout = cldnn::layout{ expected_output_layout, data_types::f32, format::bfyx }; |
| 725 | + |
| 726 | + auto num_iter_mem = network->get_output_memory(actual_iteration_count_id); |
| 727 | + if (num_iter_mem != nullptr) { |
| 728 | + mem_lock<int64_t> num_iter_ptr{ num_iter_mem, get_test_stream() }; |
| 729 | + ASSERT_EQ(num_iter_ptr.data()[0], expected_num_iterations); |
| 730 | + } |
| 731 | + |
| 732 | + std::vector<float> expected(input_data.size()); |
| 733 | + if (expected_output_data.size() == 0) { |
| 734 | + size_t unit = 1; |
| 735 | + for (size_t k = axis; k < whole_layout.size(); k++) { |
| 736 | + unit *= whole_layout[k].get_length(); |
| 737 | + } |
| 738 | + |
| 739 | + for (size_t j = 0; j < input_data.size(); j++) { |
| 740 | + auto val = static_cast<size_t>((j % unit) / 4) + 1; |
| 741 | + expected[j] = static_cast<float>(input_data[j] + val) + static_cast<float>(input_data[j] * val); |
| 742 | + } |
| 743 | + } else { |
| 744 | + expected = expected_output_data; |
| 745 | + } |
| 746 | + |
| 747 | + auto output_mem = outputs.begin()->second.get_memory(); |
| 748 | + auto output_layout = output_mem->get_layout(); |
| 749 | + ASSERT_EQ(output_layout.batch(), e_output_layout.batch()); |
| 750 | + ASSERT_EQ(output_layout.feature(), e_output_layout.feature()); |
| 751 | + ASSERT_EQ(output_layout.spatial(0), e_output_layout.spatial(0)); |
| 752 | + ASSERT_EQ(output_layout.spatial(1), e_output_layout.spatial(1)); |
| 753 | + // value check |
| 754 | + { |
| 755 | + mem_lock<float> output_ptr{ output_mem, get_test_stream() }; |
| 756 | + for (size_t i = 0, iend = output_layout.count(); i < iend; ++i) { |
| 757 | + ASSERT_FLOAT_EQ(output_ptr[i], expected.at(i)); |
| 758 | + } |
| 759 | + } |
| 760 | + } |
| 761 | +} |
| 762 | + |
| 763 | +std::vector<float> input_data_4_4{ |
| 764 | + 1.0f, 2.0f, -15.f, 3.0f, |
| 765 | + 4.0f, -15.f, 5.0f, 6.0f, |
| 766 | + -15.f, 7.0f, -15.f, 0.0f, |
| 767 | + 0.0f, -15.f, 0.5f, -0.5f, |
| 768 | +}; |
| 769 | + |
| 770 | +std::vector<float> input_data_2_4_4{ |
| 771 | + 1.0f, 2.0f, -15.f, 3.0f, |
| 772 | + 4.0f, -15.f, 5.0f, 6.0f, |
| 773 | + -15.f, 7.0f, -15.f, 0.0f, |
| 774 | + 0.0f, -15.f, 0.5f, -0.5f, |
| 775 | + |
| 776 | + 1.0f, 2.0f, -15.f, 3.0f, |
| 777 | + 4.0f, -15.f, 5.0f, 6.0f, |
| 778 | + -15.f, 7.0f, -15.f, 0.0f, |
| 779 | + 0.0f, -15.f, 0.5f, -0.5f, |
| 780 | +}; |
| 781 | + |
| 782 | +TEST(loop_gpu, support_loop_w_dynamic_input_w_various_shapes) { |
| 783 | + test_loop_gpu_wo_trip_count_w_multiple_shapes( |
| 784 | + { 1, -1, 4, 4 }, |
| 785 | + {{ 1, 1, 4, 4 }, { 1, 2, 4, 4 }}, // axis value should be iter_num = (exit_value + 1) |
| 786 | + {input_data_4_4, input_data_2_4_4}, |
| 787 | + std::vector<float>(), |
| 788 | + 2, 3); |
| 789 | +} |
0 commit comments