diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp index 84fc32ad9d7c49..9dabf5f51ecc4b 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp @@ -5,12 +5,10 @@ #include "pass_manager.h" #include "program_node.h" -#include "mutable_data_inst.h" #include "convert_color_inst.h" #include "fully_connected_inst.h" #include "assign_inst.h" #include "mvn_inst.h" -#include "tensor_type.h" #include #include @@ -64,6 +62,10 @@ void add_required_reorders::run(program& p) { if (usr->is_type()) continue; + if (!usr->is_all_valid_output_layouts()) { + usr->recalc_output_layouts(false); + } + // If usr is assign and input and output data types are different // add reorder with usr's output data type between dep and usr if (usr->is_type()) { @@ -75,7 +77,7 @@ void add_required_reorders::run(program& p) { auto new_reorder = std::make_shared(dep.id() + "_reorder_" + usr->id(), dep.id(), out_layout.format, out_layout.data_type); auto& new_reorder_node = p.get_or_create(new_reorder); p.add_intermediate(new_reorder_node, *usr, dep); - new_reorder_node.recalc_output_layout(false); + new_reorder_node.recalc_output_layouts(false); } } @@ -92,7 +94,7 @@ void add_required_reorders::run(program& p) { auto new_reorder = std::make_shared(dep.id() + "_reorder_" + usr->id(), dep.id(), out_layout.format, out_layout.data_type); auto& new_reorder_node = p.get_or_create(new_reorder); p.add_intermediate(new_reorder_node, *usr, dep); - new_reorder_node.recalc_output_layout(false); + new_reorder_node.recalc_output_layouts(false); } } } @@ -193,7 +195,7 @@ void add_required_reorders::run(program& p) { auto new_reorder = std::make_shared(input.id() + "_padding_reorder_" + usr->id(), input.id(), layout_wo_padding); auto& new_reorder_node = p.get_or_create(new_reorder); p.add_intermediate(new_reorder_node, *usr, idx); - new_reorder_node.recalc_output_layout(false); + new_reorder_node.recalc_output_layouts(false); } else { continue; } @@ -222,42 +224,6 @@ void add_required_reorders::run(program& p) { if (usr->type()->does_possible_implementation_exist(*usr)) { correct_layout_selected = true; break; - } else if (original_layout.data_type == data_types::i64) { - // goal of this section is to use int32 implementation - // if int64 is not available for usr primitive - current_layout = original_layout; - current_layout.data_type = data_types::i32; - usr->set_output_layout(current_layout, false); - if (usr->type()->does_possible_implementation_exist(*usr)) { - correct_layout_selected = true; - } else { - current_layout = original_layout; - current_layout.data_type = data_types::i32; - current_layout.format = node.first->get_output_layout().format; - usr->set_output_layout(current_layout, false); - if (usr->type()->does_possible_implementation_exist(*usr)) { - correct_layout_selected = true; - } - } - - if (correct_layout_selected) { - // change output_data_type field in usr to i32 - if ((static_cast(usr->get_primitive()->output_data_types[0]) == true) && - (*(usr->get_primitive()->output_data_types[0]) == data_types::i64)) { - std::const_pointer_cast(usr->get_primitive())->output_data_types[0] = data_types::i32; - } - // add reorders between usr int32 output and inputs of its users - auto next_usr_itr = usr->get_users().begin(); - while (next_usr_itr != usr->get_users().end()) { - auto next_usr = *next_usr_itr++; - if (!next_usr->is_type()) { - if ((next_usr->get_output_layout() != usr->get_output_layout())) { - add_reorder(p, usr, next_usr); - } - } - } - break; - } } } @@ -310,54 +276,6 @@ void add_required_reorders::run(program& p) { } } } - - if (!correct_layout_selected) { - // goal of this section is to use int32 implementation - // if int64 is not available for usr primitive - if (original_layout.data_type == data_types::i64) { - layout original_layout_i32(original_layout.get_partial_shape(), - data_types::i32, - original_layout.format); - usr->set_output_layout(original_layout_i32, false); - if (usr->type()->does_possible_implementation_exist(*usr)) { - correct_layout_selected = true; - } - - if (!correct_layout_selected) { - for (auto new_layout_format : preferred_layout_formats) { - layout current_layout_i32(original_layout_i32.get_partial_shape(), - original_layout_i32.data_type, - new_layout_format); - usr->set_output_layout(current_layout_i32, false); - if (usr->type()->does_possible_implementation_exist(*usr)) { - correct_layout_selected = true; - break; - } - } - } - if (!correct_layout_selected) { - throw std::runtime_error("Internal Error: no implementation for " + usr->id() + - " kernel which satisfies output format dependecies."); - } - - // change output_data_type field in usr to i32 - if ((static_cast(usr->get_primitive()->output_data_types[0]) == true) && - (*(usr->get_primitive()->output_data_types[0]) == data_types::i64)) { - std::const_pointer_cast(usr->get_primitive())->output_data_types[0] = data_types::i32; - } - - // add reorders between usr int32 output and inputs of its users - auto next_usr_itr = usr->get_users().begin(); - while (next_usr_itr != usr->get_users().end()) { - auto next_usr = *next_usr_itr++; - if (!next_usr->is_type()) { - if ((next_usr->get_output_layout() != usr->get_output_layout())) { - add_reorder(p, usr, next_usr); - } - } - } - } - } } // layout is selected now add required reorders diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp index 125be3f9b938a6..ca8b781f8d9e48 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp @@ -181,6 +181,8 @@ void handle_reshape::run(program& p) { } reorder_reshape_nodes.push_back(&new_reshape_node); + new_reshape_node.recalc_output_layouts(false); + node->recalc_output_layouts(false); } } @@ -208,7 +210,8 @@ void handle_reshape::run(program& p) { 0, reshape_input_node.get_dependencies().empty()); reshape_reorder_id++; - reshape_input_node.recalc_output_layout(); + reshape_input_node.recalc_output_layouts(false); + node->recalc_output_layouts(false); } } @@ -233,7 +236,8 @@ void handle_reshape::run(program& p) { << " input_info : " << reshape_input->dependencies().front().to_string() << std::endl; auto& reshape_input_node = p.get_or_create(reshape_input); p.add_intermediate(reshape_input_node, *node, 0, reshape_input_node.get_dependencies().empty()); - reshape_input_node.recalc_output_layout(); + reshape_input_node.recalc_output_layouts(false); + node->recalc_output_layouts(false); } // Check whether output reorder is required for format change @@ -251,9 +255,9 @@ void handle_reshape::run(program& p) { *user, *node, reshape_output_node.get_dependencies().empty()); - reshape_output_node.recalc_output_layout(); + reshape_output_node.recalc_output_layouts(false); } - node->recalc_output_layout(); + node->recalc_output_layouts(false); } } } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp index 05a34a1584e5e6..9a97885e2601f9 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp @@ -41,6 +41,7 @@ void prepare_padding::run(program& p) { auto new_reorder = std::make_shared(node.id() + "_padding_reorder_for_" + input.id(), input.id(), input.get_output_layout()); auto& new_reorder_node = p.get_or_create(new_reorder); p.add_intermediate(new_reorder_node, node, input); + new_reorder_node.recalc_output_layouts(false); } p.apply_needed_padding(node, node.get_dependency(0), needed_padding); @@ -209,6 +210,7 @@ void prepare_padding::run(program& p) { auto new_reorder = std::make_shared(node.id() + "_padding_reorder_for_" + input.id(), input.id(), input.get_output_layout()); auto& new_reorder_node = p.get_or_create(new_reorder); p.add_intermediate(new_reorder_node, node, input); + new_reorder_node.recalc_output_layouts(false); } p.apply_needed_padding(node, node.get_dependency(0), needed_padding); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp index 328629479b9d21..e4725ace72441b 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp @@ -31,6 +31,19 @@ using namespace cldnn; #define LOG_NODE_REMOVAL(id) GPU_DEBUG_LOG_PASS << __func__ << ":" << __LINE__ << ": remove node: " << (id) << std::endl; #define LOG_NODE_REPLACEMENT(id) GPU_DEBUG_LOG_PASS << __func__ << ":" << __LINE__ << ": replace node: " << (id) << std::endl; +namespace { + +bool does_any_user_have_impl_type(program_node& node, impl_types impl) { + for (auto& user : node.get_users()) { + if (user->get_preferred_impl_type() == impl) + return true; + } + + return false; +} + +} // namespace + remove_redundant_reorders::remove_redundant_reorders(bool enable_reorder_fusing, bool update_implementations, bool remove_output_reorders) : base_pass("remove_redundant_reorders"), enable_reorder_fusing(enable_reorder_fusing), update_implementations(update_implementations), @@ -290,7 +303,7 @@ void remove_redundant_reorders::run(program& p) { i_layout.data_padding._upper_size[3] == 0 && i_layout.data_padding._lower_size[3] == 0 && !o_layout.data_padding && i_layout.data_type == o_layout.data_type && - !layout_optimizer::onednn_check_preferred_impl_type_of_users(r_node)) { + !does_any_user_have_impl_type(r_node, impl_types::onednn)) { // If the newly aligned pad is merged into output layout during post_optimize_graph phase // and then buffer is reinterpreted, user node cannot handle pad properly for kernel execution if (!update_implementations || (i_layout.feature() % 16 == 0 && diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp index 2557be62621ba0..213da8cb0ab606 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp @@ -756,6 +756,7 @@ void reorder_inputs::run(program& p, reorder_factory& rf) { if (new_input.first) { p.add_intermediate(new_input.first, detection_output_node, i, !new_input.second); + detection_output_node.recalc_output_layouts(); } } } @@ -770,6 +771,7 @@ void reorder_inputs::run(program& p, reorder_factory& rf) { layout{ input_layout.get_partial_shape(), input_layout.data_type, new_format }); if (reorder.first) { p.add_intermediate(reorder.first, deconv_node, 0, !reorder.second); + deconv_node.recalc_output_layouts(); } } @@ -893,6 +895,7 @@ void reorder_inputs::run(program& p, reorder_factory& rf) { auto new_input = rf.get_reorder(input.id(), input_layout, new_layout); if (new_input.first) { p.add_intermediate(new_input.first, fc_node, 0, !new_input.second); + fc_node.recalc_output_layouts(); } } @@ -919,6 +922,7 @@ void reorder_inputs::run(program& p, reorder_factory& rf) { auto new_input = rf.get_reorder(input->id(), dep.second, input_layout, new_layout); if (new_input.first) { p.add_intermediate(new_input.first, pooling_node, 0); + pooling_node.recalc_output_layouts(); } } }; diff --git a/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h b/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h index 351dd50d6ae9a6..ee3801d8eb944c 100644 --- a/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h +++ b/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h @@ -186,7 +186,6 @@ class layout_optimizer { static bool onednn_check_data_types_for_convolution(data_types in_dt, data_types wei_dt, data_types out_dt); static bool onednn_check_data_types_for_deconvolution(data_types in_dt, data_types wei_dt, data_types out_dt); static bool onednn_check_data_types_for_fc_gemm(data_types in_dt, data_types wei_dt, data_types out_dt); - static bool onednn_check_preferred_impl_type_of_users(program_node& node); bool is_primitive_implemented_for_onednn(program_node& node); bool is_format_supported(program_node& node, format::type fmt); diff --git a/src/plugins/intel_gpu/src/graph/include/paged_attention_inst.h b/src/plugins/intel_gpu/src/graph/include/paged_attention_inst.h index 16650d0411d50d..a7918ba9c3719c 100644 --- a/src/plugins/intel_gpu/src/graph/include/paged_attention_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/paged_attention_inst.h @@ -66,8 +66,6 @@ class typed_primitive_inst : public typed_primitive_inst_base

; diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h index 7efbaa0fe4ba1a..6efb2c4c03644f 100644 --- a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h @@ -386,6 +386,7 @@ class primitive_inst { bool reset_mem = true, bool runtime_alloc = false); memory::ptr allocate_internal_buffer(size_t idx, bool reset = true); + void allocate_shape_info_memory(); static std::vector build_exec_deps( std::vector> const& mem_deps); int32_t get_index_in_deps(memory::cptr arg) const; diff --git a/src/plugins/intel_gpu/src/graph/kv_cache.cpp b/src/plugins/intel_gpu/src/graph/kv_cache.cpp index 1927054faa9bf0..95cdd587cdf175 100644 --- a/src/plugins/intel_gpu/src/graph/kv_cache.cpp +++ b/src/plugins/intel_gpu/src/graph/kv_cache.cpp @@ -83,6 +83,9 @@ int32_t kv_cache_inst::get_prealloc_iter_num() { } void kv_cache_inst::update_shape_info_tensor(const kernel_impl_params& params) { + if (!_shape_info_memory) { + allocate_shape_info_memory(); + } mem_lock lock(_shape_info_memory, _network.get_stream()); auto shape_info_ptr = lock.data(); size_t offset = 0; diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index f0be15f586f67b..6439ae570cd8c5 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -1434,18 +1434,6 @@ bool layout_optimizer::is_primitive_implemented_for_onednn(program_node& node) { return false; } -bool layout_optimizer::onednn_check_preferred_impl_type_of_users(program_node& node) { - if (node.get_users().size() == 0) - return false; - - for (auto& user : node.get_users()) { - if (user->get_preferred_impl_type() == impl_types::onednn) - return true; - } - - return false; -} - impl_types layout_optimizer::get_forced_impl_type_by_config(program_node& node) { #ifdef GPU_DEBUG_CONFIG GPU_DEBUG_GET_INSTANCE(debug_config); diff --git a/src/plugins/intel_gpu/src/graph/paged_attention.cpp b/src/plugins/intel_gpu/src/graph/paged_attention.cpp index 9a186604770801..037a6a1e8b04aa 100644 --- a/src/plugins/intel_gpu/src/graph/paged_attention.cpp +++ b/src/plugins/intel_gpu/src/graph/paged_attention.cpp @@ -143,10 +143,6 @@ void paged_attention_inst::on_execute() { } } -void paged_attention_inst::update_shape_info_tensor(const kernel_impl_params& params) { - parent::update_shape_info_tensor(params); -} - paged_attention_inst::typed_primitive_inst(network& network, const paged_attention_node& node) : parent(network, node) { const auto desc = node.get_primitive(); diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 3e6b766ba1212a..ad1541177b7dd6 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -945,7 +945,16 @@ void primitive_inst::fill_shape_info_data(const layout& runtime_layout, const la } } +void primitive_inst::allocate_shape_info_memory() { + int64_t shape_elements = _node->get_total_shape_info_size(); + _shape_info_memory = _network.get_engine().allocate_memory(layout{{shape_elements}, data_types::i32, format::bfyx}, false); +} + void primitive_inst::update_shape_info_tensor(const kernel_impl_params& params) { + if (!_shape_info_memory) { + allocate_shape_info_memory(); + } + mem_lock lock(_shape_info_memory, _network.get_stream()); auto shape_info_ptr = lock.data(); size_t offset = 0; @@ -1858,8 +1867,6 @@ primitive_inst::primitive_inst(network & network, program_node const& node, bool if (_impl->is_dynamic() && !_impl->is_cpu()) { GPU_DEBUG_TRACE_DETAIL << id() << ": initialize impl with dynamic impl " << _impl->get_kernel_name() << std::endl; _dynamic_impl = _impl->clone(); - const int64_t shape_elements = node.get_total_shape_info_size(); - _shape_info_memory = _network.get_engine().allocate_memory(layout{{shape_elements}, data_types::i32, format::bfyx}); } } _impl_params->strm = _network.get_stream_ptr(); diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 699671a1c4c614..4f18facce3e820 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -813,6 +813,7 @@ void program::apply_needed_padding(program_node& node, program_node& prev_node, auto r_prim = std::make_shared("reorder_input_" + node.id(), prev_node.id(), target_layout); add_intermediate(r_prim, node, 0); + get_or_create(r_prim).recalc_output_layouts(false); return; } diff --git a/src/plugins/intel_gpu/src/graph/slice.cpp b/src/plugins/intel_gpu/src/graph/slice.cpp index 66bc59458425e0..6f0ca337414b69 100644 --- a/src/plugins/intel_gpu/src/graph/slice.cpp +++ b/src/plugins/intel_gpu/src/graph/slice.cpp @@ -96,6 +96,10 @@ std::string slice_inst::to_string(slice_node const& node) { } void slice_inst::update_shape_info_tensor(const kernel_impl_params& params) { + if (!_shape_info_memory) { + allocate_shape_info_memory(); + } + mem_lock lock(_shape_info_memory, _network.get_stream()); auto shape_info_ptr = lock.data(); size_t offset = 0; diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gru_sequence.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gru_sequence.cpp index 254fdc0d6ca306..cfe7d4ccab651a 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gru_sequence.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gru_sequence.cpp @@ -2,20 +2,45 @@ // SPDX-License-Identifier: Apache-2.0 // +#include #include "single_op_tests/gru_sequence.hpp" +#include "common_test_utils/test_constants.hpp" +#include "common_test_utils/test_enums.hpp" -namespace { - using ov::test::GRUSequenceTest; +using ov::test::GRUSequenceTest; +using ov::test::utils::InputLayerType; +using ov::test::utils::SequenceTestsMode; - std::vector mode{ov::test::utils::SequenceTestsMode::CONVERT_TO_TI_MAX_SEQ_LEN_CONST, - ov::test::utils::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_CONST, - ov::test::utils::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_PARAM, - ov::test::utils::SequenceTestsMode::PURE_SEQ_RAND_SEQ_LEN_CONST, - ov::test::utils::SequenceTestsMode::PURE_SEQ_RAND_SEQ_LEN_PARAM, - ov::test::utils::SequenceTestsMode::PURE_SEQ}; +namespace { + std::vector mode{SequenceTestsMode::CONVERT_TO_TI_MAX_SEQ_LEN_CONST, + SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_CONST, + SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_PARAM, + SequenceTestsMode::PURE_SEQ_RAND_SEQ_LEN_PARAM, + SequenceTestsMode::PURE_SEQ_RAND_SEQ_LEN_CONST, + SequenceTestsMode::PURE_SEQ}; // output values increase rapidly without clip, so use only seq_lengths = 2 - std::vector seq_lengths_zero_clip{2}; - std::vector seq_lengths_clip_non_zero{20}; + + const std::vector> input_shapes_zero_clip_static = { + // {batch, seq_lengths, input_size}, {batch, num_directions, hidden_size}, {batch}, + {{ 10, 2, 1}, { 10, 1, 1 }, { 10 }}, + {{ 10, 2, 1}, { 10, 1, 10 }, { 10 }}, + }; + const std::vector> input_shapes_bidirect_zero_clip_static = { + {{ 10, 2, 1}, { 10, 2, 1 }, { 10 }}, + {{ 10, 2, 1}, { 10, 2, 10 }, { 10 }}, + }; + const std::vector> input_shapes_non_zero_clip_static = { + {{ 10, 20, 1}, { 10, 1, 1 }, { 10 }}, + {{ 10, 20, 1}, { 10, 1, 10 }, { 10 }}, + }; + const std::vector> input_shapes_bidirect_non_zero_clip_static = { + {{ 10, 20, 1}, { 10, 2, 1 }, { 10 }}, + {{ 10, 20, 1}, { 10, 2, 10 }, { 10 }}, + }; + std::vector seq_lengths_zero_clip{2}; + std::vector seq_lengths_clip_non_zero{20}; + std::vector batch{10}; + std::vector hidden_size{1, 10}; // std::vector input_size{10}; std::vector> activations = {{"relu", "tanh"}, {"tanh", "sigmoid"}, {"sigmoid", "tanh"}, {"tanh", "relu"}}; @@ -23,50 +48,64 @@ namespace { std::vector clip{0.f}; std::vector clip_non_zeros{0.7f}; std::vector direction = {ov::op::RecurrentSequenceDirection::FORWARD, - ov::op::RecurrentSequenceDirection::REVERSE, - ov::op::RecurrentSequenceDirection::BIDIRECTIONAL - }; + ov::op::RecurrentSequenceDirection::REVERSE}; + std::vector direction_bi = {ov::op::RecurrentSequenceDirection::BIDIRECTIONAL}; + std::vector netPrecisions = {ov::element::f32, ov::element::f16}; - INSTANTIATE_TEST_SUITE_P(GRUSequenceCommonZeroClip, GRUSequenceTest, + INSTANTIATE_TEST_SUITE_P(smoke_GRUSequenceCommonZeroClip, GRUSequenceTest, ::testing::Combine( ::testing::ValuesIn(mode), - ::testing::Values(seq_lengths_zero_clip), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_zero_clip_static)), // ::testing::ValuesIn(input_size), // hardcoded to 10 due to Combine supports up to 10 args ::testing::ValuesIn(activations), ::testing::ValuesIn(clip), ::testing::ValuesIn(linear_before_reset), ::testing::ValuesIn(direction), - ::testing::Values(ov::test::utils::InputLayerType::CONSTANT), + ::testing::Values(InputLayerType::CONSTANT), ::testing::ValuesIn(netPrecisions), ::testing::Values(ov::test::utils::DEVICE_GPU)), GRUSequenceTest::getTestCaseName); - INSTANTIATE_TEST_SUITE_P(GRUSequenceCommonZeroClipNonConstantWRB, GRUSequenceTest, + INSTANTIATE_TEST_SUITE_P(smoke_GRUSequenceCommonZeroClipBidirect, GRUSequenceTest, ::testing::Combine( - ::testing::Values(ov::test::utils::SequenceTestsMode::PURE_SEQ), - ::testing::Values(seq_lengths_zero_clip), + ::testing::ValuesIn(mode), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_bidirect_zero_clip_static)), // ::testing::ValuesIn(input_size), // hardcoded to 10 due to Combine supports up to 10 args ::testing::ValuesIn(activations), ::testing::ValuesIn(clip), ::testing::ValuesIn(linear_before_reset), - ::testing::ValuesIn(direction), - ::testing::Values(ov::test::utils::InputLayerType::PARAMETER), + ::testing::ValuesIn(direction_bi), + ::testing::Values(InputLayerType::CONSTANT), ::testing::ValuesIn(netPrecisions), ::testing::Values(ov::test::utils::DEVICE_GPU)), GRUSequenceTest::getTestCaseName); - INSTANTIATE_TEST_SUITE_P(GRUSequenceCommonClip, GRUSequenceTest, + INSTANTIATE_TEST_SUITE_P(smoke_GRUSequenceCommonClip, GRUSequenceTest, ::testing::Combine( ::testing::ValuesIn(mode), - ::testing::Values(seq_lengths_clip_non_zero), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_non_zero_clip_static)), // ::testing::ValuesIn(input_size), // hardcoded to 10 due to Combine supports up to 10 args ::testing::ValuesIn(activations), ::testing::ValuesIn(clip_non_zeros), ::testing::ValuesIn(linear_before_reset), ::testing::ValuesIn(direction), - ::testing::Values(ov::test::utils::InputLayerType::CONSTANT), + ::testing::Values(InputLayerType::CONSTANT), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + GRUSequenceTest::getTestCaseName); + + INSTANTIATE_TEST_SUITE_P(smoke_GRUSequenceCommonClipBidirect, GRUSequenceTest, + ::testing::Combine( + ::testing::ValuesIn(mode), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_bidirect_non_zero_clip_static)), + // ::testing::ValuesIn(input_size), // hardcoded to 10 due to Combine supports up to 10 args + ::testing::ValuesIn(activations), + ::testing::ValuesIn(clip_non_zeros), + ::testing::ValuesIn(linear_before_reset), + ::testing::ValuesIn(direction_bi), + ::testing::Values(InputLayerType::CONSTANT), ::testing::ValuesIn(netPrecisions), ::testing::Values(ov::test::utils::DEVICE_GPU)), GRUSequenceTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp index e65d5d542e8e48..3c73842742c451 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp @@ -350,9 +350,7 @@ class WeightsPrimitiveFusingTestOneDNN : public BaseFusingTest pad_ = { 0, 0, static_cast(pad[1]), static_cast(pad[0]) }; - return layout{ p.in_shape, p.data_type, p.input_format, padding{ pad_ } }; + return layout{ p.in_shape, p.data_type, p.input_format }; } layout get_output_layout(convolution_test_params& p) { @@ -408,9 +406,7 @@ class ConvActivationTestOnednn : public BaseFusingTest pad_ = { 0, 0, static_cast(pad[1]), static_cast(pad[0]) }; - return layout{ p.in_shape, p.data_type, p.input_format, padding{ pad_ } }; + return layout{ p.in_shape, p.data_type, p.input_format }; } layout get_output_layout(conv_activation_onednn_test_params& p) { @@ -1034,7 +1030,7 @@ TEST_P(conv_fp32_prelu_eltwise, vector_ops) { reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32) ); - ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" }; + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "", impl_types::ocl }; cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = default_tolerance(p.data_type); @@ -1055,7 +1051,7 @@ TEST_P(conv_fp32_prelu_eltwise, vector_ops_slope_2) { reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32) ); - ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" }; + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "", impl_types::ocl }; cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = default_tolerance(p.data_type); @@ -1077,7 +1073,7 @@ TEST_P(conv_fp32_prelu_eltwise, vector_ops_mixed_types) { reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32) ); - ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" }; + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "", impl_types::ocl }; cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = default_tolerance(p.data_type); @@ -1099,7 +1095,7 @@ TEST_P(conv_fp32_prelu_eltwise, vector_ops_mixed_types_slope_2) { reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32) ); - ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" }; + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "", impl_types::ocl }; cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = default_tolerance(p.data_type); @@ -2762,7 +2758,7 @@ TEST_P(conv_int8_scale_prelu_quantize_i8_eltwise_fp32_quantize_i8_vec, vector_op reorder("reorder_bfyx", input_info("quantize_1"), p.default_format, data_types::f32) ); - ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv4, "convolution_gpu_b_fs_yx_fsv4_1x1" }; + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv4, "convolution_gpu_b_fs_yx_fsv4_1x1", impl_types::ocl }; cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = 1.f; @@ -2797,7 +2793,7 @@ TEST_P(conv_int8_scale_prelu_quantize_i8_eltwise_fp32_quantize_i8_vec, vector_op reorder("reorder_bfyx", input_info("quantize_1"), p.default_format, data_types::f32) ); - ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv4, "convolution_gpu_b_fs_yx_fsv4_1x1" }; + ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv4, "convolution_gpu_b_fs_yx_fsv4_1x1", impl_types::ocl }; cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); tolerance = 1.f; @@ -2930,7 +2926,7 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_basic, basic) { reorder("reorder_out", input_info("activation"), format::bfyx, data_types::f32) ); - ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "" }; + ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "", impl_types::ocl }; cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); execute(p); @@ -2955,7 +2951,7 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_mean, have_mean) { activation("activation", input_info("conv_prim"), activation_func::abs) ); - ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "" }; + ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "", impl_types::ocl }; cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); execute(p); @@ -2987,7 +2983,7 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_subtract, have_subtract_per_feature) convolution("conv_output", input_info("reorder_fsv32"), "weights_dw", "", p.out_shape[1].get_length(), dw_stride, p.dilation, p.pad, p.pad, true) ); - ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "" }; + ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "", impl_types::ocl }; cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); execute(p); @@ -3016,7 +3012,7 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_fused_activation, have_fused_activat activation("activation", input_info("conv_prim2"), activation_func::abs) ); - ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "" }; + ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "", impl_types::ocl }; cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim2", conv_impl } })); cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "activation", conv_impl } })); @@ -3046,7 +3042,7 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_fused_through_activation, have_fused activation("activation", input_info("conv_prim2"), activation_func::abs) ); - ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "" }; + ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "", impl_types::ocl }; cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim2", conv_impl } })); cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "activation", conv_impl } })); @@ -3075,7 +3071,7 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_data_padding, have_data_padding) { reorder("reorder_out", input_info("conv_prim2"), format::fs_b_yx_fsv32, data_types::f32) ); - ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "" }; + ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "", impl_types::ocl }; cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim2", conv_impl } })); execute(p); @@ -3943,9 +3939,7 @@ class EltwiseSumFusingTestOneDNN : public BaseFusingTest pad_ = { 0, 0, static_cast(pad[0]), static_cast(pad[1]) }; - return layout{ p.in_shape, p.data_type, p.input_format, padding{ pad_ } }; + return layout{ p.in_shape, p.data_type, p.input_format }; } layout get_per_channel_layout(convolution_eltw_sum_test_params& p) { @@ -4074,9 +4068,7 @@ class ImplicitCropConcatTestOneDNN: public BaseFusingTest pad_ = { 0, 0, static_cast(pad[0]), static_cast(pad[1]) }; - return layout{ p.in_shape, p.data_type, p.input_format, padding{ pad_ } }; + return layout{ p.in_shape, p.data_type, p.input_format }; } layout get_per_channel_layout(implicit_crop_concat_convolution_test_params& p) { @@ -4189,9 +4181,7 @@ class PermuteOptimizingTestOnednn : public BaseFusingTest pad_ = { 0, 0, static_cast(pad[1]), static_cast(pad[0]) }; - return layout{ p.in_shape, p.data_type, p.input_format, padding{ pad_ } }; + return layout{ p.in_shape, p.data_type, p.input_format }; } layout get_per_channel_layout(convolution_test_params& p) { @@ -4320,9 +4310,7 @@ class EltwiseSumWithConstantFullTensorFusingTestOneDNN : public BaseFusingTest pad_ = { 0, 0, static_cast(pad[0]), static_cast(pad[1]) }; - return layout{ p.in_shape, p.data_type, p.input_format, padding{ pad_ } }; + return layout{ p.in_shape, p.data_type, p.input_format }; } layout get_weights_layout(convolution_eltw_sum_test_params& p) { diff --git a/src/plugins/intel_gpu/tests/unit/fusions/deconvolution_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/deconvolution_fusion_test.cpp index 71ba6d0703248e..c76a0fe2eb1bdd 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/deconvolution_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/deconvolution_fusion_test.cpp @@ -826,6 +826,9 @@ TEST_P(deconv_scale_activation_quantize_i8_eltwise_quantize_u8, basic) { reorder("reorder_bfyx", input_info("quant2"), p.default_format, data_types::f32) ); + ov::intel_gpu::ImplementationDesc gemmv_impl = { cldnn::format::type::any, "", impl_types::ocl }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "deconv_prim", gemmv_impl } })); + tolerance = 1.f; execute(p); } diff --git a/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp index 8a2c75369a25df..799916fd61c345 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp @@ -53,8 +53,8 @@ class GemmFusingTest : public ::BaseFusingTest { auto input1_prim = get_mem(get_input_layout(p, 1)); if (!p.kernel_name.empty()) { - ov::intel_gpu::ImplementationDesc gemm_ref_impl = { format::bfyx, "gemm_ref" }; - ov::intel_gpu::ImplementationDesc gemm_target_impl = { format::bfyx, p.kernel_name }; + ov::intel_gpu::ImplementationDesc gemm_ref_impl = { format::bfyx, "gemm_ref", impl_types::ocl }; + ov::intel_gpu::ImplementationDesc gemm_target_impl = { format::bfyx, p.kernel_name, impl_types::ocl }; cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"gemm_prim", gemm_target_impl} })); cfg_not_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"gemm_prim", gemm_ref_impl} })); } @@ -198,6 +198,9 @@ TEST_P(gemm_2in_quantize_u8, basic) { reorder("reorder_bfyx", input_info("quantize"), p.default_format, data_types::f32) ); + ov::intel_gpu::ImplementationDesc gemm_impl = { format::bfyx, "", impl_types::ocl }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"gemm_prim", gemm_impl} })); + tolerance = default_tolerance(data_types::u8); execute(p, false); } @@ -569,6 +572,10 @@ TEST_P(gemm_2in_act_scale_eltwise, broadcast_eltwise) { reorder("reorder_bfyx", input_info("sum"), p.default_format, data_types::f32) ); + // Onednn impl gives different results for some reason (looks like missing saturation somewhere) + ov::intel_gpu::ImplementationDesc gemm_impl = { format::bfyx, "", impl_types::ocl }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "gemm_prim", gemm_impl } })); + tolerance = default_tolerance(p.default_type); if (p.default_type == data_types::f16 && p.kernel_name == "gemm_tiled_opt") { tolerance *= 2.1f; // Issue: 94154 diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/graph_manipulation_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/graph_manipulation_gpu_test.cpp index 5f2295e372bffa..e3e3001f0fb646 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/graph_manipulation_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/module_tests/graph_manipulation_gpu_test.cpp @@ -155,7 +155,6 @@ TEST(add_intermediate_gpu, test2) prog->add_intermediate(new_conv, prog->get_node("conv2a"), 0, true, true); program_wrapper::add_connection(*prog, prog->get_or_create(weights_node), prog->get_or_create(new_conv)); - prog->dump_program("custom_dump", true); program_wrapper::build(*prog); diff --git a/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp index 6640db80ac5168..5449ba1f7fb45b 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp @@ -408,6 +408,12 @@ TEST(prepare_buffer_fusing, in_place_concat_dynamic_onednn_batch2) { ExecutionConfig config; config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + ov::intel_gpu::ImplForcingMap forcing_map = { + {"reorder1", ov::intel_gpu::ImplementationDesc{format::any, "", impl_types::onednn}}, + {"reorder2", ov::intel_gpu::ImplementationDesc{format::any, "", impl_types::onednn}} + }; + config.set_property(ov::intel_gpu::force_implementations(forcing_map)); + auto prog = program::build_program(engine, topology, config, false, false); ASSERT_NE(prog, nullptr); auto& concat_node_p = prog->get_node("concat"); diff --git a/src/plugins/intel_gpu/tests/unit/passes/select_preferred_formats_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/select_preferred_formats_test.cpp index 6eaa3cf5699e17..39151ce1306c56 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/select_preferred_formats_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/select_preferred_formats_test.cpp @@ -39,6 +39,9 @@ TEST(test_select_preferred_formats, setting_target_conv_format) { config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv1", impl} })); auto prog = program::build_program(engine, topology, config, false, true); + if (engine.get_device_info().supports_immad) { + prog->get_layout_optimizer().set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, 1); + } // It initializes output_layout. // It's necessary because this test runs select_preferred_formats pass alone. @@ -85,6 +88,9 @@ TEST(test_select_preferred_formats, fsv2_fallback_to_byxf) { config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv1", impl} })); auto prog = program::build_program(engine, topology, config, false, true); + if (engine.get_device_info().supports_immad) { + prog->get_layout_optimizer().set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, 1); + } // It initializes output_layout. // It's necessary because this test runs select_preferred_formats pass alone. diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp index 198bc7a7d08dab..5d7ff14bd177f2 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp @@ -1752,6 +1752,8 @@ TEST(convolution_f16_fw_gpu, convolution_big_size_weights) { ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); + ov::intel_gpu::ImplementationDesc conv_impl = {format::any, "", impl_types::ocl}; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv", conv_impl} })); network network(engine, topology, config); @@ -5722,7 +5724,7 @@ TEST_P(convolution_gpu_fs_byx_fsv32_crop, fs_byx_fsv32_crop) } ExecutionConfig config = get_test_default_config(engine); - ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "convolution_gpu_bfyx_to_fs_byx_fsv32" }; + ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "convolution_gpu_bfyx_to_fs_byx_fsv32", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } })); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); @@ -10649,6 +10651,9 @@ TEST_P(conv_dyn_test, convolution_gpu_fsv16_1x1_no_bias) { data("weights", weights), convolution("conv", input_info("input"), "weights", no_bias, groups_num, p.stride, p.dilation, p.pad_begin, p.pad_end, is_grouped)); + ov::intel_gpu::ImplementationDesc conv_impl = { in_layout.format, "convolution_gpu_ref", impl_types::ocl }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", conv_impl } })); + network network_ref(engine, topology_ref, config); network_ref.set_input_data("input", input); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp index 681a0ebbe9e05b..30b15f0c25a08b 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp @@ -1620,6 +1620,8 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + ov::intel_gpu::ImplementationDesc fc_impl = { in_layout.format, "", impl_types::ocl }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "fc_prim1", fc_impl }, { "fc_prim2", fc_impl } })); network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -1856,7 +1858,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto input_mem = engine.allocate_memory({ {1, 2, 4}, data_types::f32, format::bfyx }); auto weights_mem = engine.allocate_memory({ {8, 4}, data_types::u8, format::bfyx }); auto bias_mem = engine.allocate_memory({ {1, 1, 8}, data_types::f32, format::bfyx }); - auto scale_mem = engine.allocate_memory({ {1, 1, 8}, data_types::f32, format::bfyx }); + auto scale_mem = engine.allocate_memory({ {8, 1}, data_types::f32, format::bfyx }); set_values(input_mem, { -0.5f, 2.0f, 0.5f, 1.0f, 0.5f, -2.0f, -0.5f, -1.0f }); @@ -1897,7 +1899,7 @@ class fully_connected_gpu_tests: public ::testing::Test { ov::PartialShape expected_shape{1, 2, 8}; ASSERT_EQ(expected_shape, output_mem->get_layout().get_partial_shape()); - std::vector expected_result = {19.f, 40.f, 69.f, 54.f, 83.f, 48.f, 37.f, -2.f, -17.f, -44.f, -63.f, -62.f, -73.f, -60.f, -23.f, -14.f }; + std::vector expected_result = {19.f, 82.f, -63.f, -120.f, 24.5f, -19.5f, 37.f, -5.f, -17.f, -86.f, 69.f, 112.f, -14.5f, 7.5f, -23.f, -11.f }; for (size_t i = 0; i < expected_result.size(); i++) { ASSERT_EQ(expected_result[i], output_ptr[i]) << "i = " << i; diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp index 7d62a2d437a35e..b61d05767fee66 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp @@ -1331,7 +1331,7 @@ class gemm_gpu_tests: public ::testing::Test { set_default_shapes(num_dims, BMKN, input0_shape_default, input1_shape_default, output_shape_default); ov::Shape input0_shape(input0_shape_default.size()); ov::Shape input1_shape(input1_shape_default.size()); - + for (size_t dim = 0; dim < input0_shape_default.size(); ++dim) { input0_shape[input0_order[dim]] = input0_shape_default[dim]; } @@ -2551,36 +2551,34 @@ class gemm_onednn: public ::testing::Test { ov::Shape in1_shape = { 1, 1, 3, 4 }; ov::Shape in2_shape = { 1, 4 }; - auto in1_layout = layout{ov::PartialShape::dynamic(in1_shape.size()), data_types::f32, format::bfyx}; - auto in2_layout = layout{ov::PartialShape::dynamic(in2_shape.size()), data_types::f32, format::bfyx}; - auto input1 = engine.allocate_memory(layout{ov::PartialShape(in1_shape), data_types::f32, format::bfyx}); - auto input2 = engine.allocate_memory(layout{ov::PartialShape(in2_shape), data_types::f32, format::bfyx}); + auto in1_layout = layout{ov::PartialShape::dynamic(in1_shape.size()), data_types::f16, format::bfyx}; + auto in2_layout = layout{ov::PartialShape::dynamic(in2_shape.size()), data_types::f16, format::bfyx}; + auto input1 = engine.allocate_memory(layout{ov::PartialShape(in1_shape), data_types::f16, format::bfyx}); + auto input2 = engine.allocate_memory(layout{ov::PartialShape(in2_shape), data_types::f16, format::bfyx}); - std::vector input1_data = { + std::vector input1_data = { 1.f, -2.f, 3.f, -4.f, 5.f, 6.f, 1.f, 2.f, 3.f, 3.f, 2.f, -1.f, }; - std::vector input2_data = { + std::vector input2_data = { 2.f, 5.f, -4.f, -7.f, }; set_values(input1, input1_data); set_values(input2, input2_data); - std::vector out_data = { + std::vector out_data = { 8.f, 22.f, 20.f }; topology topology; topology.add(input_layout("input1", in1_layout), input_layout("input2", in2_layout), - gemm("gemm", { input_info("input1"), input_info("input2") }, data_types::f32, false, true, 1.0f, 0.0f, 4, 2) + gemm("gemm", { input_info("input1"), input_info("input2") }, data_types::f16, false, true, 1.0f, 0.0f, 4, 2) ); - ov::intel_gpu::ImplementationDesc fc_impl = { format::bfyx, "", impl_types::onednn }; ExecutionConfig cfg{ ov::intel_gpu::queue_type(QueueTypes::in_order), - ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"gemm", fc_impl} }), ov::intel_gpu::optimize_data(true), ov::intel_gpu::allow_new_shape_infer(true) }; @@ -2596,7 +2594,7 @@ class gemm_onednn: public ::testing::Test { auto outputs = network.execute(); auto output = outputs.at("gemm").get_memory(); - cldnn::mem_lock output_ptr(output, get_test_stream()); + cldnn::mem_lock output_ptr(output, get_test_stream()); ASSERT_EQ(output_ptr.size(), (uint32_t)3); for (uint32_t i = 0; i < out_data.size(); ++i) { @@ -2764,7 +2762,7 @@ class gemm_onednn: public ::testing::Test { ov::intel_gpu::ImplementationDesc gemm_impl = { format::bfyx, std::string(""), impl_types::onednn }; ExecutionConfig cfg{ ov::intel_gpu::queue_type(QueueTypes::in_order), - ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"gemm", gemm_impl} }), + ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"gemm_ref", gemm_impl} }), ov::intel_gpu::optimize_data(true), ov::intel_gpu::allow_new_shape_infer(true) }; diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/pooling_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/pooling_gpu_test.cpp index 9de124918f44a8..42c0ede306823a 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/pooling_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/pooling_gpu_test.cpp @@ -1944,7 +1944,7 @@ class pooling_test_base { pad.insert(pad.begin(), offset_z()); } - topo.add(pooling("pool", input_info("input"), pool_mode(), kernel, stride, pad)); + topo.add(pooling("pool", input_info("input"), pool_mode(), kernel, stride, pad, pad)); return topo; } diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/reduce_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/reduce_gpu_test.cpp index 9707b0319e0d6f..fde0c0e1e31a16 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/reduce_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/reduce_gpu_test.cpp @@ -1936,7 +1936,7 @@ class ReduceXYWithBigTensorTestBase : public ::testing::TestWithParamset_input_data("input", input_mem);