@@ -522,6 +522,168 @@ class gemm_gpu_tests: public ::testing::Test {
522
522
}
523
523
}
524
524
525
+ void test_dynamic_padding_w_transpose_order (bool is_caching_test, bool n_dim_only) {
526
+ tests::random_generator rg;
527
+ rg.set_seed (GET_SUITE_NAME);
528
+
529
+ auto & engine = get_test_engine ();
530
+
531
+ const unsigned long BATCH_SIZE = 128 ;
532
+ const unsigned long M_SIZE = 12 ;
533
+ const unsigned long K_SIZE = 64 ;
534
+ const unsigned long N_SIZE = 12 ;
535
+
536
+ auto fill_mem = [&](cldnn::memory_ptr mem, std::vector<ov::float16>& data) {
537
+ cldnn::mem_lock<ov::float16> mem_ptr (mem, get_test_stream ());
538
+ auto && l = mem->get_layout ();
539
+ auto data_idx = 0 ;
540
+ for (cldnn::tensor::value_type b = 0 ; b < l.batch (); ++b) {
541
+ for (cldnn::tensor::value_type f = 0 ; f < l.feature (); ++f) {
542
+ for (cldnn::tensor::value_type y = 0 ; y < l.spatial (1 ); ++y) {
543
+ for (cldnn::tensor::value_type x = 0 ; x < l.spatial (0 ); ++x) {
544
+ auto tensor_coord = cldnn::tensor{{b, f, x, y}, 0 };
545
+ auto buffer_idx = l.get_linear_offset (tensor_coord);
546
+ mem_ptr[buffer_idx] = data[data_idx++];
547
+ }
548
+ }
549
+ }
550
+ }
551
+ };
552
+
553
+ const auto align_size_m = 13 ;
554
+ const auto align_size_k = 16 ;
555
+ const auto align_size_n = 15 ;
556
+ const auto align_size_b1 = 3 ;
557
+ const auto align_size_b2 = 19 ;
558
+
559
+ const auto aligned_batch1_size = align_to (1ul , align_size_b1);
560
+ auto padding_size_batch1 = static_cast <int >(aligned_batch1_size - 1 );
561
+
562
+ const auto aligned_batch2_size = align_to (BATCH_SIZE, align_size_b2);
563
+ auto padding_size_batch2 = static_cast <int >(aligned_batch2_size - BATCH_SIZE);
564
+
565
+ const auto aligned_m_size = align_to (M_SIZE, align_size_m);
566
+ auto padding_size_m = static_cast <int >(aligned_m_size - M_SIZE);
567
+ const auto aligned_k_size = align_to (K_SIZE, align_size_k);
568
+ auto padding_size_k = static_cast <int >(aligned_k_size - K_SIZE);
569
+ const auto aligned_n_size = align_to (N_SIZE, align_size_n);
570
+ auto padding_size_n = static_cast <int >(aligned_n_size - N_SIZE);
571
+
572
+ ov::Shape in1_shape = { 1 , BATCH_SIZE, M_SIZE, K_SIZE };
573
+ ov::Shape in2_shape = { 1 , BATCH_SIZE, N_SIZE, K_SIZE };
574
+ ov::Shape in1_shape_aligned = { aligned_batch1_size, aligned_batch2_size, aligned_m_size, aligned_k_size };
575
+ ov::Shape in2_shape_aligned = { aligned_batch1_size, aligned_batch2_size, aligned_n_size, aligned_k_size };
576
+
577
+ // Use dynamic padding for all BFYX dimensions
578
+ tensor dyn_pad_dims_input1 ({0 , 0 , 0 , 0 }, 0 );
579
+ tensor dyn_pad_dims_input2 ({0 , 0 , 0 , 0 }, 0 );
580
+
581
+ if (n_dim_only) {
582
+ dyn_pad_dims_input1 = tensor ({0 , 0 , 0 , 0 }, 0 );
583
+ dyn_pad_dims_input2 = tensor ({0 , 0 , 1 , 0 }, 0 );
584
+ } else {
585
+ dyn_pad_dims_input1 = tensor ({1 , 1 , 1 , 1 }, 0 );
586
+ dyn_pad_dims_input2 = tensor ({1 , 1 , 1 , 1 }, 0 );
587
+ }
588
+
589
+ auto in1_layout = layout{ {-1 , -1 , -1 , -1 }, data_types::f16, format::bfyx, padding ({0 , 0 , 0 , 0 }, {0 , 0 , 0 , 0 }, 0 .0f , dyn_pad_dims_input1)};
590
+ auto in2_layout = layout{ {-1 , -1 , -1 , -1 }, data_types::f16, format::bfyx, padding ({0 , 0 , 0 , 0 }, {0 , 0 , 0 , 0 }, 0 .0f , dyn_pad_dims_input2)};
591
+
592
+ auto aligned_input1_mem = engine.allocate_memory ({ov::PartialShape (in1_shape_aligned), data_types::f16, format::bfyx});
593
+ auto aligned_input2_mem = engine.allocate_memory ({ov::PartialShape (in2_shape_aligned), data_types::f16, format::bfyx});
594
+
595
+ auto input1_mem = engine.reinterpret_buffer (*aligned_input1_mem, layout{ov::PartialShape (in1_shape),
596
+ data_types::f16,
597
+ format::bfyx,
598
+ n_dim_only ? padding ({0 , 0 , 0 , 0 }, {0 , 0 , 0 , 0 }, 0 .0f , dyn_pad_dims_input1) :
599
+ padding ({padding_size_batch1, 0 , 0 , 0 }, {0 , padding_size_batch2, padding_size_m, padding_size_k}, 0 .0f , dyn_pad_dims_input1)});
600
+
601
+ auto input2_mem = engine.reinterpret_buffer (*aligned_input2_mem, layout{ov::PartialShape (in2_shape),
602
+ data_types::f16,
603
+ format::bfyx,
604
+ n_dim_only ? padding ({0 , 0 , 0 , 0 }, {0 , 0 , padding_size_n, 0 }, 0 .0f , dyn_pad_dims_input2) :
605
+ padding ({0 , padding_size_batch2, 0 , 0 }, {padding_size_batch1, 0 , padding_size_n, padding_size_k }, 0 .0f , dyn_pad_dims_input2)});
606
+
607
+ auto input_1_data = rg.generate_random_1d <ov::float16>(ov::shape_size (in1_shape), -2 , 2 );
608
+ auto input_2_data = rg.generate_random_1d <ov::float16>(ov::shape_size (in2_shape), -2 , 2 );
609
+
610
+ fill_mem (input1_mem, input_1_data);
611
+ fill_mem (input2_mem, input_2_data);
612
+
613
+ auto get_ref_results = [&]() {
614
+ ov::Shape in1_shape = { 1 , BATCH_SIZE, M_SIZE, K_SIZE };
615
+ ov::Shape in2_shape = { 1 , BATCH_SIZE, N_SIZE, K_SIZE };
616
+ auto in1_layout = layout{ {-1 , -1 , -1 , -1 }, data_types::f16, format::bfyx};
617
+ auto in2_layout = layout{ {-1 , -1 , -1 , -1 }, data_types::f16, format::bfyx};
618
+
619
+ auto input1_mem = engine.allocate_memory (layout{ov::PartialShape (in1_shape), data_types::f16, format::bfyx});
620
+ auto input2_mem = engine.allocate_memory (layout{ov::PartialShape (in2_shape), data_types::f16, format::bfyx});
621
+
622
+ fill_mem (input1_mem, input_1_data);
623
+ fill_mem (input2_mem, input_2_data);
624
+
625
+ topology topology;
626
+ topology.add (input_layout (" input1" , in1_layout),
627
+ input_layout (" input2" , in2_layout),
628
+ gemm (" gemm_ref" , { input_info (" input1" ), input_info (" input2" ) }, data_types::f16,
629
+ {0 , 2 , 1 , 3 }, {0 , 2 , 3 , 1 }, {0 , 1 , 2 , 3 })
630
+ );
631
+
632
+ auto config = get_test_default_config (engine);
633
+ config.set_property (ov::intel_gpu::optimize_data (true ));
634
+ config.set_property (ov::intel_gpu::allow_new_shape_infer (true ));
635
+
636
+ network network (engine, topology, config);
637
+ network.set_input_data (" input1" , input1_mem);
638
+ network.set_input_data (" input2" , input2_mem);
639
+
640
+ auto outputs = network.execute ();
641
+ OPENVINO_ASSERT (outputs.size () == 1 );
642
+ OPENVINO_ASSERT (outputs.begin ()->first == " gemm_ref" );
643
+
644
+ auto inst = network.get_primitive (" gemm_ref" );
645
+
646
+ auto output_mem = outputs.at (" gemm_ref" ).get_memory ();
647
+ auto output_layout = outputs.at (" gemm_ref" ).get_layout ();
648
+
649
+ return engine.reinterpret_buffer (*output_mem, output_layout);
650
+ };
651
+
652
+ topology topology;
653
+ topology.add (input_layout (" input1" , in1_layout),
654
+ input_layout (" input2" , in2_layout),
655
+ gemm (" gemm" , { input_info (" input1" ), input_info (" input2" ) }, data_types::f16,
656
+ {0 , 2 , 1 , 3 }, {0 , 2 , 3 , 1 }, {0 , 1 , 2 , 3 })
657
+ );
658
+
659
+ ExecutionConfig config = get_test_default_config (engine);
660
+ config.set_property (ov::intel_gpu::optimize_data (true ));
661
+ config.set_property (ov::intel_gpu::allow_new_shape_infer (true ));
662
+ network::ptr network = get_network (engine, topology, config, get_test_stream_ptr (), is_caching_test);
663
+ network->set_input_data (" input1" , input1_mem);
664
+ network->set_input_data (" input2" , input2_mem);
665
+
666
+ auto inst = network->get_primitive (" gemm" );
667
+ auto impl = inst->get_impl ();
668
+ ASSERT_TRUE (impl != nullptr );
669
+ ASSERT_TRUE (impl->is_dynamic ());
670
+
671
+ auto outputs = network->execute ();
672
+
673
+ auto output_mem = outputs.at (" gemm" ).get_memory ();
674
+ auto output_layout = outputs.at (" gemm" ).get_layout ();
675
+
676
+ auto res = engine.reinterpret_buffer (*output_mem, output_layout);
677
+
678
+ auto ref_res = get_ref_results ();
679
+
680
+ mem_lock<ov::float16> res_lock (res, get_test_stream ());
681
+ mem_lock<ov::float16> res_ref_lock (ref_res, get_test_stream ());
682
+ for (size_t i = 0 ; i < res->count (); i++) {
683
+ ASSERT_EQ (res_lock[i], res_ref_lock[i]) << i;
684
+ }
685
+ }
686
+
525
687
void test_dynamic_multi_inference_same_shape (bool is_caching_test) {
526
688
auto & engine = get_test_engine ();
527
689
@@ -1433,6 +1595,13 @@ TEST_F(gemm_gpu_tests, dynamic_padding_n_dim_only) {
1433
1595
this ->test_dynamic_padding (false , true );
1434
1596
}
1435
1597
1598
+ TEST_F (gemm_gpu_tests, dynamic_padding_w_transpose_order_all_dim) {
1599
+ this ->test_dynamic_padding_w_transpose_order (false , false );
1600
+ }
1601
+
1602
+ TEST_F (gemm_gpu_tests, dynamic_padding_w_transpose_order_n_dim_only) {
1603
+ this ->test_dynamic_padding_w_transpose_order (false , true );
1604
+ }
1436
1605
1437
1606
TEST_F (gemm_gpu_tests, dynamic_multi_inference_same_shape) {
1438
1607
this ->test_dynamic_multi_inference_same_shape (false );
0 commit comments