From 1e3160d9461591b3555b030fb54cf68777358a00 Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Thu, 26 Sep 2024 02:34:28 +0900 Subject: [PATCH] [GPU] enabling more layer fusions (#26592) ### Details: - added `gelu_tanh` to the list of activations supported by OneDNN - allowed `quantize` to be fused into `mvn` ### Tickets: - 151419 --- .../prepare_primitive_fusing.cpp | 6 ++-- .../src/graph/impls/onednn/utils.cpp | 1 + .../tests/unit/fusions/mvn_fusion_test.cpp | 34 +++++++++---------- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index 8e7cdd0337874c..b42ab89eafd61a 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -534,11 +534,11 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { return does_support_fusings; }; - auto mvn_supports_fusings = [](mvn_node& node, bool for_eltwise = false) -> bool { + auto mvn_supports_fusings = [](mvn_node& node) -> bool { auto in_layout = node.get_input_layout(0); if (node.get_primitive()->requires_alignment(in_layout.get_partial_shape())) return false; - return data_type_traits::is_i8_u8(in_layout.data_type) || for_eltwise; + return true; }; auto dts_supports_fusings = [](depth_to_space_node& node) -> bool { @@ -896,7 +896,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { can_fuse_parents[i] = (parents[i].first->is_type() && conv_supports_fusings(parents[i].first->as())) || (parents[i].first->is_type() && - mvn_supports_fusings(parents[i].first->as(), true)) || + mvn_supports_fusings(parents[i].first->as())) || (parents[i].first->is_type()) || (parents[i].first->is_type()) || (parents[i].first->is_type()) || diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp index 4776417b3146fc..b8ff112cead147 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp @@ -447,6 +447,7 @@ dnnl::algorithm convert_activation_func(cldnn::activation_func func) { case cldnn::activation_func::relu: return dnnl::algorithm::eltwise_relu; case cldnn::activation_func::relu_negative_slope: return dnnl::algorithm::eltwise_relu; case cldnn::activation_func::gelu: return dnnl::algorithm::eltwise_gelu_erf; + case cldnn::activation_func::gelu_tanh: return dnnl::algorithm::eltwise_gelu_tanh; case cldnn::activation_func::elu: return dnnl::algorithm::eltwise_elu; case cldnn::activation_func::mish: return dnnl::algorithm::eltwise_mish; case cldnn::activation_func::swish: return dnnl::algorithm::eltwise_swish; diff --git a/src/plugins/intel_gpu/tests/unit/fusions/mvn_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/mvn_fusion_test.cpp index cda17515190f42..24aa5d31e1d76e 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/mvn_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/mvn_fusion_test.cpp @@ -158,15 +158,14 @@ TEST_P(mvn_scale_quantize_i8, basic) { } INSTANTIATE_TEST_SUITE_P(fusings_gpu, mvn_scale_quantize_i8, ::testing::ValuesIn(std::vector{ - // Full fusing for fp input not supported yet, it may lead to output padding and non-optimal kernel - // mvn_test_params{ CASE_MVN_F32_1, 2, 4 }, - // mvn_test_params{ CASE_MVN_F32_2, 2, 4 }, - // mvn_test_params{ CASE_MVN_3D_F32_1, 2, 4 }, - // mvn_test_params{ CASE_MVN_3D_F32_2, 2, 4 }, - // mvn_test_params{ CASE_MVN_F16_1, 2, 4 }, - // mvn_test_params{ CASE_MVN_F16_2, 2, 4 }, - // mvn_test_params{ CASE_MVN_3D_F16_1, 2, 4 }, - // mvn_test_params{ CASE_MVN_3D_F16_2, 2, 4 }, + mvn_test_params{ CASE_MVN_F32_1, 2, 2, 4 }, + mvn_test_params{ CASE_MVN_F32_2, 2, 2, 4 }, + mvn_test_params{ CASE_MVN_3D_F32_1, 2, 2, 4 }, + mvn_test_params{ CASE_MVN_3D_F32_2, 2, 2, 4 }, + mvn_test_params{ CASE_MVN_F16_1, 2, 2, 4 }, + mvn_test_params{ CASE_MVN_F16_2, 2, 2, 4 }, + mvn_test_params{ CASE_MVN_3D_F16_1, 2, 2, 4 }, + mvn_test_params{ CASE_MVN_3D_F16_2, 2, 2, 4 }, mvn_test_params{ CASE_MVN_I8_1, 2, 2, 4 }, mvn_test_params{ CASE_MVN_I8_2, 2, 2, 4 }, mvn_test_params{ CASE_MVN_I8_3, 2, 2, 4 }, @@ -207,15 +206,14 @@ TEST_P(mvn_scale_activation_eltwise_fp32_quantize_i8, basic) { } INSTANTIATE_TEST_SUITE_P(fusings_gpu, mvn_scale_activation_eltwise_fp32_quantize_i8, ::testing::ValuesIn(std::vector{ - // Full using for fp input not supported yet, it may lead to output padding and non-optimal kernel - // mvn_test_params{ CASE_MVN_F32_1, 2, 7 }, - // mvn_test_params{ CASE_MVN_F32_2, 2, 7 }, - // mvn_test_params{ CASE_MVN_3D_F32_1, 2, 7 }, - // mvn_test_params{ CASE_MVN_3D_F32_2, 2, 7 }, - // mvn_test_params{ CASE_MVN_F16_1, 2, 7 }, - // mvn_test_params{ CASE_MVN_F16_2, 2, 7 }, - // mvn_test_params{ CASE_MVN_3D_F16_1, 2, 7 }, - // mvn_test_params{ CASE_MVN_3D_F16_2, 2, 7 }, + mvn_test_params{ CASE_MVN_F32_1, 2, 4, 6 }, + mvn_test_params{ CASE_MVN_F32_2, 2, 4, 6 }, + mvn_test_params{ CASE_MVN_3D_F32_1, 2, 4, 6 }, + mvn_test_params{ CASE_MVN_3D_F32_2, 2, 4, 6 }, + mvn_test_params{ CASE_MVN_F16_1, 2, 4, 6 }, + mvn_test_params{ CASE_MVN_F16_2, 2, 4, 6 }, + mvn_test_params{ CASE_MVN_3D_F16_1, 2, 4, 6 }, + mvn_test_params{ CASE_MVN_3D_F16_2, 2, 4, 6 }, mvn_test_params{ CASE_MVN_I8_1, 2, 4, 6 }, mvn_test_params{ CASE_MVN_I8_2, 2, 4, 6 }, mvn_test_params{ CASE_MVN_I8_3, 2, 4, 6 },