@@ -518,6 +518,135 @@ TEST(quantize_gpu, quantize_levels_256_3d_unsigned) {
518
518
}
519
519
}
520
520
521
+ TEST (quantize_gpu, eltwise_quantize_fs_b_yx_fsv32) {
522
+ tests::random_generator rg (GET_SUITE_NAME);
523
+ auto & engine = get_test_engine ();
524
+
525
+ // conv to enable 'fs_b_yx_fsv32_network'
526
+ const int batch_num = 2 ;
527
+ const int input_xy = 5 ;
528
+ const int input_f = 32 ;
529
+ const int output_f = 32 ;
530
+ const int filter_xy = 1 ;
531
+ const int pad = filter_xy / 2 ;
532
+
533
+ auto input_size = tensor (batch_num, input_f, input_xy, input_xy);
534
+ auto input_data = rg.generate_random_4d <ov::float16>(batch_num, input_f, input_xy, input_xy, -1 , 1 );
535
+ auto input_data_bfyx = flatten_4d (format::bfyx, input_data);
536
+ auto input_mem = engine.allocate_memory ({ data_types::f16, format::bfyx, input_size });
537
+ set_values (input_mem, input_data_bfyx);
538
+
539
+ auto weights_size = tensor (output_f, input_f, filter_xy, filter_xy);
540
+ auto weights_data = rg.generate_random_4d <ov::float16>(output_f, input_f, filter_xy, filter_xy, -1 , 1 );
541
+ auto weights_data_bfyx = flatten_4d (format::bfyx, weights_data);
542
+ auto weights_mem = engine.allocate_memory ({ data_types::f16, format::bfyx, weights_size });
543
+ set_values (weights_mem, weights_data_bfyx);
544
+
545
+ topology topology (
546
+ input_layout (" input_conv" , input_mem->get_layout ()),
547
+ data (" weights_fsv" , weights_mem));
548
+
549
+ // Reorder input to fs_byx_fsv32
550
+ topology.add (reorder (" input_fsv" , input_info (" input_conv" ), { data_types::f16, format::fs_b_yx_fsv32, input_size }));
551
+
552
+ topology.add (convolution (" conv0" , input_info (" input_fsv" ), " weights_fsv" , " " , 1 , {1 , 1 }, {1 , 1 }, { pad, pad }, { pad, pad }, false ));
553
+ topology.add (convolution (" conv1" , input_info (" conv0" ), " weights_fsv" , " " , 1 , {1 , 1 }, {1 , 1 }, { pad, pad }, { pad, pad }, false ));
554
+ topology.add (convolution (" conv2" , input_info (" conv1" ), " weights_fsv" , " " , 1 , {1 , 1 }, {1 , 1 }, { pad, pad }, { pad, pad }, false ));
555
+ topology.add (convolution (" conv3" , input_info (" conv2" ), " weights_fsv" , " " , 1 , {1 , 1 }, {1 , 1 }, { pad, pad }, { pad, pad }, false ));
556
+ topology.add (convolution (" conv4" , input_info (" conv3" ), " weights_fsv" , " " , 1 , {1 , 1 }, {1 , 1 }, { pad, pad }, { pad, pad }, false ));
557
+ topology.add (convolution (" conv5" , input_info (" conv4" ), " weights_fsv" , " " , 1 , {1 , 1 }, {1 , 1 }, { pad, pad }, { pad, pad }, false ));
558
+ topology.add (convolution (" conv6" , input_info (" conv5" ), " weights_fsv" , " " , 1 , {1 , 1 }, {1 , 1 }, { pad, pad }, { pad, pad }, false ));
559
+ topology.add (convolution (" conv7" , input_info (" conv6" ), " weights_fsv" , " " , 1 , {1 , 1 }, {1 , 1 }, { pad, pad }, { pad, pad }, false ));
560
+ topology.add (convolution (" conv8" , input_info (" conv7" ), " weights_fsv" , " " , 1 , {1 , 1 }, {1 , 1 }, { pad, pad }, { pad, pad }, false ));
561
+ topology.add (convolution (" conv9" , input_info (" conv8" ), " weights_fsv" , " " , 1 , {1 , 1 }, {1 , 1 }, { pad, pad }, { pad, pad }, false ));
562
+ topology.add (convolution (" conv10" , input_info (" conv9" ), " weights_fsv" , " " , 1 , {1 , 1 }, {1 , 1 }, { pad, pad }, { pad, pad }, false ));
563
+ topology.add (convolution (" conv11" , input_info (" conv10" ), " weights_fsv" , " " , 1 , {1 , 1 }, {1 , 1 }, { pad, pad }, { pad, pad }, false ));
564
+
565
+ topology.add (reorder (" reorder_conv" , input_info (" conv11" ), format::b_fs_yx_fsv16, data_types::f32));
566
+
567
+ // eltwise + quantize pattern
568
+ auto in_layout = layout{ ov::PartialShape{2 , 16 , 1 , 2 }, data_types::f16, format::b_fs_yx_fsv16 };
569
+ auto input = engine.allocate_memory (in_layout);
570
+ auto input_low = engine.allocate_memory ({ data_types::f32,format::bfyx,{ 1 , 16 , 1 , 1 } });
571
+ auto input_high = engine.allocate_memory ({ data_types::f32,format::bfyx,{ 1 , 16 , 1 , 1 } });
572
+ auto output_low = engine.allocate_memory ({ data_types::f32,format::bfyx,{ 1 , 1 , 1 , 1 } });
573
+ auto output_high = engine.allocate_memory ({ data_types::f32,format::bfyx,{ 1 , 1 , 1 , 1 } });
574
+
575
+ set_values (input, { -1 .0f , 2 .0f , 3 .0f , 4 .0f ,
576
+ 5 .0f , 2 .0f , 2 .0f , 3 .0f ,
577
+ 4 .0f , 6 .0f , 3 .0f , 3 .0f ,
578
+ 3 .0f , 5 .0f , 1 .0f , 1 .0f ,
579
+
580
+ 1 .0f , 1 .0f , 1 .0f , 1 .0f ,
581
+ 4 .0f , 6 .0f , 3 .0f , 3 .0f ,
582
+ 3 .0f , 5 .0f , 1 .0f , 1 .0f ,
583
+ 1 .0f , 1 .0f , 1 .0f , 1 .0f ,
584
+
585
+ -1 .0f , 2 .0f , 3 .0f , 4 .0f ,
586
+ 5 .0f , 2 .0f , 2 .0f , 3 .0f ,
587
+ 4 .0f , 6 .0f , 3 .0f , 3 .0f ,
588
+ 3 .0f , 5 .0f , 1 .0f , 1 .0f ,
589
+
590
+ 1 .0f , 1 .0f , 1 .0f , 1 .0f ,
591
+ 4 .0f , 6 .0f , 3 .0f , 3 .0f ,
592
+ 3 .0f , 5 .0f , 1 .0f , 1 .0f ,
593
+ 1 .0f , 1 .0f , 1 .0f , 1 .0f });
594
+
595
+ set_values (input_low, { 0 .0f , 1 .0f , 2 .0f , 3 .0f ,
596
+ 4 .0f , 5 .0f , 6 .0f , 7 .0f ,
597
+ 7 .0f , 6 .0f , 5 .0f , 4 .0f ,
598
+ 3 .0f , 2 .0f , 1 .0f , 0 .0f });
599
+ set_values (input_high, { 0 .0f , 1 .0f , 2 .0f , 3 .0f ,
600
+ 4 .0f , 5 .0f , 6 .0f , 7 .0f ,
601
+ 7 .0f , 6 .0f , 5 .0f , 4 .0f ,
602
+ 3 .0f , 2 .0f , 1 .0f , 0 .0f });
603
+ set_values (output_low, { -1 .0f });
604
+ set_values (output_high, { 1 .0f });
605
+
606
+ std::vector<float > ref_data = { -1 , 1 , -1 , 1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 ,
607
+ -1 , 1 , -1 , 1 , -1 , 1 , -1 , 1 , -1 , -1 , -1 , -1 ,
608
+ -1 , -1 , -1 , 1 , -1 , 1 , -1 , 1 , -1 , 1 , -1 , 1 ,
609
+ -1 , -1 , -1 , -1 , -1 , -1 , -1 , 1 , -1 , 1 , -1 , 1 ,
610
+ -1 , 1 , -1 , 1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 ,
611
+ -1 , 1 , -1 , 1 };
612
+
613
+ topology.add (
614
+ input_layout (" input1" , in_layout),
615
+ input_layout (" input2" , in_layout),
616
+ eltwise (" multiply" , input_info (" input1" ), input_info (" input2" ), eltwise_mode::prod),
617
+ data (" input_low" , input_low),
618
+ data (" input_high" , input_high),
619
+ data (" output_low" , output_low),
620
+ data (" output_high" , output_high),
621
+ quantize (" quantize" , input_info (" multiply" ), input_info (" input_low" ), input_info (" input_high" ), input_info (" output_low" ), input_info (" output_high" ), 2 , data_types::f32),
622
+ reorder (" reorder" , input_info (" quantize" ), format::b_fs_yx_fsv16, data_types::f32)
623
+ );
624
+
625
+ ExecutionConfig config = get_test_default_config (engine);
626
+ ov::intel_gpu::ImplementationDesc quantize_impl = { format::b_fs_yx_fsv16, " quantize_gpu_ref" };
627
+ config.set_property (ov::intel_gpu::force_implementations (ov::intel_gpu::ImplForcingMap{ { " quantize" , quantize_impl } }));
628
+ config.set_property (ov::intel_gpu::optimize_data (true ));
629
+
630
+ network network (engine, topology, config);
631
+ network.set_input_data (" input_conv" , input_mem);
632
+ network.set_input_data (" input1" , input);
633
+ network.set_input_data (" input2" , input);
634
+ auto outputs = network.execute ();
635
+
636
+ auto output = outputs.at (" reorder" ).get_memory ();
637
+ cldnn::mem_lock<float > output_ptr (output, get_test_stream ());
638
+
639
+ // Check that layout and memory contains logical size of tensor
640
+ ASSERT_EQ (output->count (), (size_t )64 );
641
+ ASSERT_EQ (output->get_layout ().count (), (size_t )64 );
642
+
643
+ ASSERT_EQ (output->size (), ref_data.size () * sizeof (uint32_t ));
644
+
645
+ for (size_t i = 0 ; i < ref_data.size (); ++i) {
646
+ ASSERT_EQ (output_ptr[i], ref_data[i]) << " index = " << i;
647
+ }
648
+ }
649
+
521
650
TEST (quantize_gpu, dynamic) {
522
651
auto & engine = get_test_engine ();
523
652
0 commit comments