@@ -1255,7 +1255,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
1255
1255
}
1256
1256
}
1257
1257
1258
- void test_compressed_int4_scale_dyn_quan (bool is_caching_test, bool is_dynamic, int batch = 1 ) {
1258
+ void test_compressed_int4_scale_dyn_quan (bool is_caching_test, bool is_dynamic, int batch = 1 , bool is_wei_dyn = false ) {
1259
1259
tests::random_generator rg (GET_SUITE_NAME);
1260
1260
auto & engine = get_test_engine ();
1261
1261
@@ -1285,6 +1285,11 @@ class fully_connected_gpu_tests: public ::testing::Test {
1285
1285
auto scale_data = rg.generate_random_1d <ov::float16>(ofm_num * ifm_num / scales_group_size, -4 .0f , 4 .0f );
1286
1286
set_values (scale_mem, scale_data);
1287
1287
1288
+ if (is_wei_dyn) {
1289
+ // ifm_num is dynamic
1290
+ dyn_input_ps = is_3d ? ov::PartialShape{ -1 , -1 , -1 } : ov::PartialShape{ -1 , -1 };
1291
+ }
1292
+
1288
1293
auto in_layout = is_dynamic ? layout{ dyn_input_ps, data_types::f16, format::bfyx }
1289
1294
: layout{ input_ps, data_types::f16, format::bfyx };
1290
1295
@@ -1302,7 +1307,8 @@ class fully_connected_gpu_tests: public ::testing::Test {
1302
1307
1303
1308
auto config = get_test_default_config (engine);
1304
1309
config.set_property (ov::intel_gpu::allow_new_shape_infer (true ));
1305
- config.set_property (ov::intel_gpu::optimize_data (true ));
1310
+ ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, " fully_connected_gpu_bfyx_ref" , impl_types::ocl };
1311
+ config.set_property (ov::intel_gpu::force_implementations (ov::intel_gpu::ImplForcingMap{ {" fc_prim" , fc_impl_desc} }));
1306
1312
1307
1313
network network (engine, topology, config);
1308
1314
network.set_input_data (" input" , input_mem);
@@ -1365,13 +1371,13 @@ class fully_connected_gpu_tests: public ::testing::Test {
1365
1371
}
1366
1372
1367
1373
1368
- void test_compressed_int4_scale (bool is_caching_test, bool is_dynamic, long int batch_num, long int scales_group_size = 128 ) {
1374
+ void test_compressed_int4_scale (bool is_caching_test, bool is_dynamic, long int batch_num, long int scales_group_size = 128 , bool is_wei_dyn = false ) {
1369
1375
tests::random_generator rg (GET_SUITE_NAME);
1370
1376
auto & engine = get_test_engine ();
1371
1377
auto supports_immad = engine.get_device_info ().supports_immad ;
1372
1378
1373
1379
long int ifm_num = 256 ;
1374
- long int ofm_num = 256 ;
1380
+ long int ofm_num = 512 ;
1375
1381
1376
1382
auto input_mem = engine.allocate_memory ({ { batch_num, ifm_num}, data_types::f16, format::bfyx });
1377
1383
auto weights_mem = engine.allocate_memory ({ {ofm_num, ifm_num}, data_types::u4, format::bfyx });
@@ -1392,6 +1398,11 @@ class fully_connected_gpu_tests: public ::testing::Test {
1392
1398
auto in_layout = is_dynamic ? layout{ {-1 , ifm_num}, data_types::f16, format::bfyx }
1393
1399
: layout{ {batch_num, ifm_num}, data_types::f16, format::bfyx };
1394
1400
1401
+ if (is_dynamic && is_wei_dyn) {
1402
+ // ifm_num is dynamic
1403
+ in_layout = layout{ {-1 , -1 }, data_types::f16, format::bfyx };
1404
+ }
1405
+
1395
1406
auto dcomp_zp_name = supports_immad ? " dcomp_zp" : " " ;
1396
1407
1397
1408
auto fc_prim = fully_connected (" fc_prim" , input_info (" input" ), " weights" , " " , " scale" , dcomp_zp_name, data_types::f16, padding (), 2 , 2 );
@@ -1409,6 +1420,8 @@ class fully_connected_gpu_tests: public ::testing::Test {
1409
1420
1410
1421
auto config = get_test_default_config (engine);
1411
1422
config.set_property (ov::intel_gpu::allow_new_shape_infer (true ));
1423
+ ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, " fully_connected_gpu_bfyx_ref" , impl_types::ocl };
1424
+ config.set_property (ov::intel_gpu::force_implementations (ov::intel_gpu::ImplForcingMap{ {" fc_prim" , fc_impl_desc} }));
1412
1425
1413
1426
network network (engine, topology, config);
1414
1427
network.set_input_data (" input" , input_mem);
@@ -3324,6 +3337,32 @@ TEST_F(fully_connected_gpu_tests, compressed_int4_scale_dyn_cache_dynamic) {
3324
3337
this ->test_compressed_int4_scale_dyn_quan (true , true , 512 );
3325
3338
}
3326
3339
3340
+ TEST_F (fully_connected_gpu_tests, compressed_int4_scale_dynamic_f_input) {
3341
+ this ->test_compressed_int4_scale (false , true , 256 , true );
3342
+ }
3343
+
3344
+ TEST_F (fully_connected_gpu_tests, compressed_int4_scale_dynamic_f_input_cached) {
3345
+ this ->test_compressed_int4_scale (true , true , 260 , true );
3346
+ }
3347
+ TEST_F (fully_connected_gpu_tests, compressed_int4_scale_dynamic_f_input_b1g64) {
3348
+ this ->test_compressed_int4_scale (false , true , 1 , 64 , true );
3349
+ }
3350
+
3351
+ TEST_F (fully_connected_gpu_tests, compressed_int4_scale_dynamic_f_input_b1g128) {
3352
+ this ->test_compressed_int4_scale (false , true , 1 , 128 , true );
3353
+ }
3354
+
3355
+ TEST_F (fully_connected_gpu_tests, compressed_int4_scale_dyn_quan_dynamic_f_input_single_batch) {
3356
+ this ->test_compressed_int4_scale_dyn_quan (false , true , 1 , true );
3357
+ }
3358
+
3359
+ TEST_F (fully_connected_gpu_tests, compressed_int4_scale_dyn_quan_dynamic_f_input) {
3360
+ this ->test_compressed_int4_scale_dyn_quan (false , true , 512 , true );
3361
+ }
3362
+
3363
+ TEST_F (fully_connected_gpu_tests, compressed_int4_scale_dyn_quan_dynamic_f_input_unaligned) {
3364
+ this ->test_compressed_int4_scale_dyn_quan (false , true , 511 , true );
3365
+ }
3327
3366
3328
3367
3329
3368
TEST_F (fully_connected_gpu_tests, compressed_scale_bias) {
0 commit comments