@@ -1255,7 +1255,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
1255
1255
}
1256
1256
}
1257
1257
1258
- void test_compressed_int4_scale_dyn_quan (bool is_caching_test, bool is_dynamic, int batch = 1 ) {
1258
+ void test_compressed_int4_scale_dyn_quan (bool is_caching_test, bool is_dynamic, int batch = 1 , bool is_wei_dyn = false ) {
1259
1259
tests::random_generator rg (GET_SUITE_NAME);
1260
1260
auto & engine = get_test_engine ();
1261
1261
@@ -1285,6 +1285,11 @@ class fully_connected_gpu_tests: public ::testing::Test {
1285
1285
auto scale_data = rg.generate_random_1d <ov::float16>(ofm_num * ifm_num / scales_group_size, -4 .0f , 4 .0f );
1286
1286
set_values (scale_mem, scale_data);
1287
1287
1288
+ if (is_wei_dyn) {
1289
+ // ifm_num is dynamic
1290
+ dyn_input_ps = is_3d ? ov::PartialShape{ -1 , -1 , -1 } : ov::PartialShape{ -1 , -1 };
1291
+ }
1292
+
1288
1293
auto in_layout = is_dynamic ? layout{ dyn_input_ps, data_types::f16, format::bfyx }
1289
1294
: layout{ input_ps, data_types::f16, format::bfyx };
1290
1295
@@ -1302,7 +1307,8 @@ class fully_connected_gpu_tests: public ::testing::Test {
1302
1307
1303
1308
auto config = get_test_default_config (engine);
1304
1309
config.set_property (ov::intel_gpu::allow_new_shape_infer (true ));
1305
- config.set_property (ov::intel_gpu::optimize_data (true ));
1310
+ ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, " fully_connected_gpu_bfyx_ref" , impl_types::ocl };
1311
+ config.set_property (ov::intel_gpu::force_implementations (ov::intel_gpu::ImplForcingMap{ {" fc_prim" , fc_impl_desc} }));
1306
1312
1307
1313
network network (engine, topology, config);
1308
1314
network.set_input_data (" input" , input_mem);
@@ -1365,13 +1371,13 @@ class fully_connected_gpu_tests: public ::testing::Test {
1365
1371
}
1366
1372
1367
1373
1368
- void test_compressed_int4_scale (bool is_caching_test, bool is_dynamic, long int batch_num, long int scales_group_size = 128 ) {
1374
+ void test_compressed_int4_scale (bool is_caching_test, bool is_dynamic, long int batch_num, long int scales_group_size = 128 , bool is_wei_dyn = false ) {
1369
1375
tests::random_generator rg (GET_SUITE_NAME);
1370
1376
auto & engine = get_test_engine ();
1371
1377
auto supports_immad = engine.get_device_info ().supports_immad ;
1372
1378
1373
1379
long int ifm_num = 256 ;
1374
- long int ofm_num = 256 ;
1380
+ long int ofm_num = 512 ;
1375
1381
1376
1382
auto input_mem = engine.allocate_memory ({ { batch_num, ifm_num}, data_types::f16, format::bfyx });
1377
1383
auto weights_mem = engine.allocate_memory ({ {ofm_num, ifm_num}, data_types::u4, format::bfyx });
@@ -1392,6 +1398,11 @@ class fully_connected_gpu_tests: public ::testing::Test {
1392
1398
auto in_layout = is_dynamic ? layout{ {-1 , ifm_num}, data_types::f16, format::bfyx }
1393
1399
: layout{ {batch_num, ifm_num}, data_types::f16, format::bfyx };
1394
1400
1401
+ if (is_dynamic && is_wei_dyn) {
1402
+ // ifm_num is dynamic
1403
+ in_layout = layout{ {-1 , -1 }, data_types::f16, format::bfyx };
1404
+ }
1405
+
1395
1406
auto dcomp_zp_name = supports_immad ? " dcomp_zp" : " " ;
1396
1407
1397
1408
auto fc_prim = fully_connected (" fc_prim" , input_info (" input" ), " weights" , " " , " scale" , dcomp_zp_name, data_types::f16, 2 , 2 );
@@ -1409,6 +1420,8 @@ class fully_connected_gpu_tests: public ::testing::Test {
1409
1420
1410
1421
auto config = get_test_default_config (engine);
1411
1422
config.set_property (ov::intel_gpu::allow_new_shape_infer (true ));
1423
+ ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, " fully_connected_gpu_bfyx_ref" , impl_types::ocl };
1424
+ config.set_property (ov::intel_gpu::force_implementations (ov::intel_gpu::ImplForcingMap{ {" fc_prim" , fc_impl_desc} }));
1412
1425
1413
1426
network network (engine, topology, config);
1414
1427
network.set_input_data (" input" , input_mem);
@@ -3387,6 +3400,32 @@ TEST_F(fully_connected_gpu_tests, compressed_int4_scale_dyn_cache_dynamic) {
3387
3400
this ->test_compressed_int4_scale_dyn_quan (true , true , 512 );
3388
3401
}
3389
3402
3403
+ TEST_F (fully_connected_gpu_tests, compressed_int4_scale_dynamic_f_input) {
3404
+ this ->test_compressed_int4_scale (false , true , 256 , true );
3405
+ }
3406
+
3407
+ TEST_F (fully_connected_gpu_tests, compressed_int4_scale_dynamic_f_input_cached) {
3408
+ this ->test_compressed_int4_scale (true , true , 260 , true );
3409
+ }
3410
+ TEST_F (fully_connected_gpu_tests, compressed_int4_scale_dynamic_f_input_b1g64) {
3411
+ this ->test_compressed_int4_scale (false , true , 1 , 64 , true );
3412
+ }
3413
+
3414
+ TEST_F (fully_connected_gpu_tests, compressed_int4_scale_dynamic_f_input_b1g128) {
3415
+ this ->test_compressed_int4_scale (false , true , 1 , 128 , true );
3416
+ }
3417
+
3418
+ TEST_F (fully_connected_gpu_tests, compressed_int4_scale_dyn_quan_dynamic_f_input_single_batch) {
3419
+ this ->test_compressed_int4_scale_dyn_quan (false , true , 1 , true );
3420
+ }
3421
+
3422
+ TEST_F (fully_connected_gpu_tests, compressed_int4_scale_dyn_quan_dynamic_f_input) {
3423
+ this ->test_compressed_int4_scale_dyn_quan (false , true , 512 , true );
3424
+ }
3425
+
3426
+ TEST_F (fully_connected_gpu_tests, compressed_int4_scale_dyn_quan_dynamic_f_input_unaligned) {
3427
+ this ->test_compressed_int4_scale_dyn_quan (false , true , 511 , true );
3428
+ }
3390
3429
3391
3430
3392
3431
TEST_F (fully_connected_gpu_tests, compressed_scale_bias) {
0 commit comments