@@ -1316,7 +1316,7 @@ class gemm_gpu_tests: public ::testing::Test {
1316
1316
}
1317
1317
}
1318
1318
1319
- void test_transpose_matmul_f16 (size_t num_dims, bool is_input_dynamic, bool is_caching_test, std::vector<size_t > BMKN, std::vector<int64_t > input0_order, std::vector<int64_t > input1_order) {
1319
+ void test_transpose_matmul_f16 (size_t num_dims, bool is_input_dynamic, bool is_caching_test, std::vector<size_t > BMKN, std::vector<int64_t > input0_order, std::vector<int64_t > input1_order, const double abs_error = 0.0001 ) {
1320
1320
tests::random_generator rg;
1321
1321
rg.set_seed (GET_SUITE_NAME);
1322
1322
@@ -1411,7 +1411,6 @@ class gemm_gpu_tests: public ::testing::Test {
1411
1411
1412
1412
ASSERT_EQ (output_ptr.size (), ref_out_data.size ());
1413
1413
1414
- const auto abs_error = 0.0001 ;
1415
1414
for (uint32_t i = 0 ; i < ref_out_data.size (); ++i) {
1416
1415
ASSERT_NEAR (output_ptr[i], ref_out_data[i], abs_error) << " at " << i;
1417
1416
}
@@ -1487,10 +1486,22 @@ TEST_F(gemm_gpu_tests, transpose_matmul_static_3d_f16) {
1487
1486
this ->test_transpose_matmul_f16 (3 , false , false , /* BMKN*/ {19 , 37 , 23 , 29 }, /* input0_order*/ {0 , 2 , 1 }, /* input1_order*/ {1 , 2 , 0 });
1488
1487
}
1489
1488
1489
+ TEST_F (gemm_gpu_tests, transpose_matmul_static_3d_f16_n32) {
1490
+ this ->test_transpose_matmul_f16 (3 , false , false , /* BMKN*/ {1 , 256 , 32 , 128 }, /* input0_order*/ {0 , 1 , 2 }, /* input1_order*/ {0 , 2 , 1 }, 0.1 );
1491
+ }
1492
+
1490
1493
TEST_F (gemm_gpu_tests, transpose_matmul_static_3d_f32) {
1491
1494
this ->test_transpose_matmul_f32 (3 , false , false , /* BMKN*/ {19 , 37 , 23 , 29 }, /* input0_order*/ {0 , 2 , 1 }, /* input1_order*/ {1 , 2 , 0 });
1492
1495
}
1493
1496
1497
+ TEST_F (gemm_gpu_tests, transpose_matmul_static_3d_f32_n32) {
1498
+ this ->test_transpose_matmul_f32 (3 , false , false , /* BMKN*/ {2 , 128 , 16 , 256 }, /* input0_order*/ {0 , 1 , 2 }, /* input1_order*/ {0 , 2 , 1 });
1499
+ }
1500
+
1501
+ TEST_F (gemm_gpu_tests, transpose_matmul_static_3d_f32_n32_k_remainder) {
1502
+ this ->test_transpose_matmul_f32 (3 , false , false , /* BMKN*/ {2 , 128 , 17 , 256 }, /* input0_order*/ {0 , 1 , 2 }, /* input1_order*/ {0 , 2 , 1 });
1503
+ }
1504
+
1494
1505
TEST_F (gemm_gpu_tests, transpose_matmul_dynamic_4d_f16_unaligned) {
1495
1506
this ->test_transpose_matmul_f16 (4 , true , false , /* BMKN*/ {19 , 37 , 23 , 29 }, /* input0_order*/ {0 , 2 , 3 , 1 }, /* input1_order*/ {1 , 2 , 3 , 0 });
1496
1507
}
0 commit comments