[GPU] enabling more layer fusions (openvinotoolkit#26592)

e-ddykim · web-flow · commit 1e3160d94615 · 2024-09-25T17:34:28.000Z
### Details:
 - added `gelu_tanh` to the list of activations supported by OneDNN
 - allowed `quantize` to be fused into `mvn`

### Tickets:
 - 151419
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
@@ -534,11 +534,11 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
             return does_support_fusings;
         };
 
-        auto mvn_supports_fusings = [](mvn_node& node, bool for_eltwise = false) -> bool {
+        auto mvn_supports_fusings = [](mvn_node& node) -> bool {
             auto in_layout = node.get_input_layout(0);
             if (node.get_primitive()->requires_alignment(in_layout.get_partial_shape()))
                 return false;
-            return data_type_traits::is_i8_u8(in_layout.data_type) || for_eltwise;
+            return true;
         };
 
         auto dts_supports_fusings = [](depth_to_space_node& node) -> bool {
@@ -896,7 +896,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
                 can_fuse_parents[i] = (parents[i].first->is_type<convolution>() &&
                                        conv_supports_fusings(parents[i].first->as<convolution>())) ||
                                       (parents[i].first->is_type<mvn>() &&
-                                       mvn_supports_fusings(parents[i].first->as<mvn>(), true)) ||
+                                       mvn_supports_fusings(parents[i].first->as<mvn>())) ||
                                       (parents[i].first->is_type<group_normalization>()) ||
                                       (parents[i].first->is_type<deconvolution>()) ||
                                       (parents[i].first->is_type<permute>()) ||
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp
@@ -447,6 +447,7 @@ dnnl::algorithm convert_activation_func(cldnn::activation_func func) {
         case cldnn::activation_func::relu: return dnnl::algorithm::eltwise_relu;
         case cldnn::activation_func::relu_negative_slope: return dnnl::algorithm::eltwise_relu;
         case cldnn::activation_func::gelu: return dnnl::algorithm::eltwise_gelu_erf;
+        case cldnn::activation_func::gelu_tanh: return dnnl::algorithm::eltwise_gelu_tanh;
         case cldnn::activation_func::elu: return dnnl::algorithm::eltwise_elu;
         case cldnn::activation_func::mish: return dnnl::algorithm::eltwise_mish;
         case cldnn::activation_func::swish: return dnnl::algorithm::eltwise_swish;
diff --git a/src/plugins/intel_gpu/tests/unit/fusions/mvn_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/mvn_fusion_test.cpp
@@ -158,15 +158,14 @@ TEST_P(mvn_scale_quantize_i8, basic) {
 }
 
 INSTANTIATE_TEST_SUITE_P(fusings_gpu, mvn_scale_quantize_i8, ::testing::ValuesIn(std::vector<mvn_test_params>{
-    // Full fusing for fp input not supported yet, it may lead to output padding and non-optimal kernel
-    // mvn_test_params{ CASE_MVN_F32_1, 2, 4 },
-    // mvn_test_params{ CASE_MVN_F32_2, 2, 4 },
-    // mvn_test_params{ CASE_MVN_3D_F32_1, 2, 4 },
-    // mvn_test_params{ CASE_MVN_3D_F32_2, 2, 4 },
-    // mvn_test_params{ CASE_MVN_F16_1, 2, 4 },
-    // mvn_test_params{ CASE_MVN_F16_2, 2, 4 },
-    // mvn_test_params{ CASE_MVN_3D_F16_1, 2, 4 },
-    // mvn_test_params{ CASE_MVN_3D_F16_2, 2, 4 },
+    mvn_test_params{ CASE_MVN_F32_1, 2, 2, 4 },
+    mvn_test_params{ CASE_MVN_F32_2, 2, 2, 4 },
+    mvn_test_params{ CASE_MVN_3D_F32_1, 2, 2, 4 },
+    mvn_test_params{ CASE_MVN_3D_F32_2, 2, 2, 4 },
+    mvn_test_params{ CASE_MVN_F16_1, 2, 2, 4 },
+    mvn_test_params{ CASE_MVN_F16_2, 2, 2, 4 },
+    mvn_test_params{ CASE_MVN_3D_F16_1, 2, 2, 4 },
+    mvn_test_params{ CASE_MVN_3D_F16_2, 2, 2, 4 },
     mvn_test_params{ CASE_MVN_I8_1, 2, 2, 4 },
     mvn_test_params{ CASE_MVN_I8_2, 2, 2, 4 },
     mvn_test_params{ CASE_MVN_I8_3, 2, 2, 4 },
@@ -207,15 +206,14 @@ TEST_P(mvn_scale_activation_eltwise_fp32_quantize_i8, basic) {
 }
 
 INSTANTIATE_TEST_SUITE_P(fusings_gpu, mvn_scale_activation_eltwise_fp32_quantize_i8, ::testing::ValuesIn(std::vector<mvn_test_params>{
-    // Full using for fp input not supported yet, it may lead to output padding and non-optimal kernel
-    // mvn_test_params{ CASE_MVN_F32_1, 2, 7 },
-    // mvn_test_params{ CASE_MVN_F32_2, 2, 7 },
-    // mvn_test_params{ CASE_MVN_3D_F32_1, 2, 7 },
-    // mvn_test_params{ CASE_MVN_3D_F32_2, 2, 7 },
-    // mvn_test_params{ CASE_MVN_F16_1, 2, 7 },
-    // mvn_test_params{ CASE_MVN_F16_2, 2, 7 },
-    // mvn_test_params{ CASE_MVN_3D_F16_1, 2, 7 },
-    // mvn_test_params{ CASE_MVN_3D_F16_2, 2, 7 },
+    mvn_test_params{ CASE_MVN_F32_1, 2, 4, 6 },
+    mvn_test_params{ CASE_MVN_F32_2, 2, 4, 6 },
+    mvn_test_params{ CASE_MVN_3D_F32_1, 2, 4, 6 },
+    mvn_test_params{ CASE_MVN_3D_F32_2, 2, 4, 6 },
+    mvn_test_params{ CASE_MVN_F16_1, 2, 4, 6 },
+    mvn_test_params{ CASE_MVN_F16_2, 2, 4, 6 },
+    mvn_test_params{ CASE_MVN_3D_F16_1, 2, 4, 6 },
+    mvn_test_params{ CASE_MVN_3D_F16_2, 2, 4, 6 },
     mvn_test_params{ CASE_MVN_I8_1, 2, 4, 6 },
     mvn_test_params{ CASE_MVN_I8_2, 2, 4, 6 },
     mvn_test_params{ CASE_MVN_I8_3, 2, 4, 6 },