From 004c0fffd1a70a8e07510d3bf34ac856e3e4d46f Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Wed, 17 Apr 2024 12:18:57 +0400 Subject: [PATCH 1/2] [GPU] Global migration to new shape infer and multi-out approach --- .../include/intel_gpu/graph/program.hpp | 3 - .../intel_gpu/plugin/program_builder.hpp | 3 - .../intel_gpu/primitives/adaptive_pooling.hpp | 29 - .../intel_gpu/primitives/arg_max_min.hpp | 6 - .../intel_gpu/primitives/batch_to_space.hpp | 35 +- .../intel_gpu/primitives/broadcast.hpp | 38 +- .../intel_gpu/primitives/embedding_bag.hpp | 5 +- ...xperimental_detectron_detection_output.hpp | 57 +- ...ectron_generate_proposals_single_image.hpp | 33 +- .../primitives/extract_image_patches.hpp | 10 +- .../include/intel_gpu/primitives/eye.hpp | 7 +- .../primitives/generate_proposals.hpp | 67 +- .../intel_gpu/primitives/matrix_nms.hpp | 11 +- .../intel_gpu/primitives/multiclass_nms.hpp | 36 +- .../primitives/non_max_suppression.hpp | 68 +- .../include/intel_gpu/primitives/one_hot.hpp | 25 - .../include/intel_gpu/primitives/pooling.hpp | 18 +- .../include/intel_gpu/primitives/reverse.hpp | 12 +- .../include/intel_gpu/primitives/roll.hpp | 23 +- .../intel_gpu/primitives/space_to_batch.hpp | 34 +- .../include/intel_gpu/primitives/swiglu.hpp | 9 +- .../intel_gpu/runtime/internal_properties.hpp | 1 - .../intel_gpu/src/graph/activation.cpp | 29 - .../intel_gpu/src/graph/adaptive_pooling.cpp | 6 - .../intel_gpu/src/graph/arg_max_min.cpp | 52 - src/plugins/intel_gpu/src/graph/assign.cpp | 4 - .../intel_gpu/src/graph/batch_to_space.cpp | 73 +- src/plugins/intel_gpu/src/graph/border.cpp | 17 - src/plugins/intel_gpu/src/graph/broadcast.cpp | 101 +- src/plugins/intel_gpu/src/graph/bucketize.cpp | 6 - .../intel_gpu/src/graph/concatenation.cpp | 29 - src/plugins/intel_gpu/src/graph/condition.cpp | 42 +- .../intel_gpu/src/graph/convert_color.cpp | 26 - .../intel_gpu/src/graph/convolution.cpp | 4 - src/plugins/intel_gpu/src/graph/crop.cpp | 25 +- .../src/graph/ctc_greedy_decoder.cpp | 8 - src/plugins/intel_gpu/src/graph/ctc_loss.cpp | 7 - src/plugins/intel_gpu/src/graph/cum_sum.cpp | 4 - .../intel_gpu/src/graph/deconvolution.cpp | 83 - .../intel_gpu/src/graph/depth_to_space.cpp | 35 - .../intel_gpu/src/graph/detection_output.cpp | 43 - src/plugins/intel_gpu/src/graph/dft.cpp | 26 - src/plugins/intel_gpu/src/graph/eltwise.cpp | 93 +- .../intel_gpu/src/graph/embedding_bag.cpp | 11 - ...xperimental_detectron_detection_output.cpp | 8 - ...tectron_generate_proposal_single_image.cpp | 8 - ...imental_detectron_prior_grid_generator.cpp | 18 - ...mental_detectron_roi_feature_extractor.cpp | 24 +- .../experimental_detectron_topk_rois.cpp | 11 - .../src/graph/extract_image_patches.cpp | 10 - src/plugins/intel_gpu/src/graph/eye.cpp | 7 +- .../intel_gpu/src/graph/fully_connected.cpp | 127 +- src/plugins/intel_gpu/src/graph/gather.cpp | 55 - .../intel_gpu/src/graph/gather_elements.cpp | 18 - src/plugins/intel_gpu/src/graph/gather_nd.cpp | 60 - .../intel_gpu/src/graph/gather_tree.cpp | 7 - src/plugins/intel_gpu/src/graph/gemm.cpp | 92 +- .../src/graph/generate_proposals.cpp | 9 +- .../graph_optimizer/add_required_reorders.cpp | 18 - .../graph/graph_optimizer/handle_reshape.cpp | 3 +- .../mark_shape_of_subgraphs.cpp | 6 +- .../graph/graph_optimizer/prepare_padding.cpp | 91 +- .../prepare_primitive_fusing_through.cpp | 2 +- .../remove_redundant_reorders.cpp | 3 +- .../intel_gpu/src/graph/grid_sample.cpp | 14 - src/plugins/intel_gpu/src/graph/grn.cpp | 7 - .../src/graph/group_normalization.cpp | 12 - .../intel_gpu/src/graph/impls/common/loop.cpp | 1 - .../graph/impls/cpu/non_max_suppression.cpp | 12 - .../src/graph/impls/ocl/adaptive_pooling.cpp | 21 +- .../src/graph/impls/ocl/arg_max_min.cpp | 22 +- .../src/graph/impls/ocl/batch_to_space.cpp | 7 +- .../src/graph/impls/ocl/broadcast.cpp | 63 +- .../graph/impls/ocl/ctc_greedy_decoder.cpp | 25 +- .../intel_gpu/src/graph/impls/ocl/dft.cpp | 39 +- .../intel_gpu/src/graph/impls/ocl/eltwise.cpp | 3 +- .../src/graph/impls/ocl/embedding_bag.cpp | 50 +- ...xperimental_detectron_detection_output.cpp | 35 +- ...ectron_generate_proposals_single_image.cpp | 41 +- ...mental_detectron_roi_feature_extractor.cpp | 1 + .../src/graph/impls/ocl/fully_connected.cpp | 10 +- .../graph/impls/ocl/generate_proposals.cpp | 38 +- .../graph/impls/ocl/kernel_selector_helper.h | 19 +- .../src/graph/impls/ocl/matrix_nms.cpp | 23 +- .../src/graph/impls/ocl/multiclass_nms.cpp | 27 +- .../graph/impls/ocl/non_max_suppression.cpp | 31 +- .../intel_gpu/src/graph/impls/ocl/pooling.cpp | 29 +- .../intel_gpu/src/graph/impls/ocl/reverse.cpp | 4 +- .../intel_gpu/src/graph/impls/ocl/roll.cpp | 70 +- .../src/graph/impls/ocl/space_to_batch.cpp | 8 +- .../impls/onednn/primitive_onednn_base.h | 4 - .../src/graph/include/activation_inst.h | 1 - .../src/graph/include/adaptive_pooling_inst.h | 1 - .../src/graph/include/arg_max_min_inst.h | 1 - .../intel_gpu/src/graph/include/assign_inst.h | 2 - .../src/graph/include/batch_to_space_inst.h | 1 - .../intel_gpu/src/graph/include/border_inst.h | 1 - .../src/graph/include/broadcast_inst.h | 1 - .../src/graph/include/bucketize_inst.hpp | 1 - .../src/graph/include/concatenation_inst.h | 1 - .../src/graph/include/condition_inst.h | 1 - .../src/graph/include/convert_color_inst.h | 3 +- .../src/graph/include/convolution_inst.h | 1 - .../intel_gpu/src/graph/include/crop_inst.h | 1 - .../graph/include/ctc_greedy_decoder_inst.h | 4 - .../src/graph/include/ctc_loss_inst.hpp | 2 +- .../src/graph/include/cum_sum_inst.h | 1 - .../graph/include/custom_gpu_primitive_inst.h | 14 - .../intel_gpu/src/graph/include/data_inst.h | 4 - .../src/graph/include/deconvolution_inst.h | 1 - .../src/graph/include/depth_to_space_inst.h | 1 - .../src/graph/include/detection_output_inst.h | 1 - .../intel_gpu/src/graph/include/dft_inst.h | 1 - .../src/graph/include/eltwise_inst.h | 1 - .../src/graph/include/embedding_bag_inst.h | 1 - ...mental_detectron_detection_output_inst.hpp | 15 - ...n_generate_proposals_single_image_inst.hpp | 1 - ...ntal_detectron_prior_grid_generator_inst.h | 1 - ...l_detectron_roi_feature_extractor_inst.hpp | 2 - .../experimental_detectron_topk_rois_inst.h | 1 - .../include/extract_image_patches_inst.h | 1 - .../intel_gpu/src/graph/include/eye_inst.h | 2 +- .../src/graph/include/fully_connected_inst.h | 1 - .../src/graph/include/gather_elements_inst.h | 1 - .../intel_gpu/src/graph/include/gather_inst.h | 1 - .../src/graph/include/gather_nd_inst.h | 1 - .../src/graph/include/gather_tree_inst.h | 1 - .../intel_gpu/src/graph/include/gemm_inst.h | 1 - .../graph/include/generate_proposals_inst.h | 1 - .../src/graph/include/grid_sample_inst.hpp | 1 - .../intel_gpu/src/graph/include/grn_inst.h | 1 - .../graph/include/group_normalization_inst.h | 1 - .../src/graph/include/input_layout_inst.h | 3 - .../src/graph/include/kv_cache_inst.h | 1 - .../intel_gpu/src/graph/include/loop_inst.h | 1 - .../intel_gpu/src/graph/include/lrn_inst.h | 1 - .../src/graph/include/lstm_elt_inst.h | 1 - .../src/graph/include/matrix_nms_inst.h | 8 - .../src/graph/include/multiclass_nms_inst.h | 27 +- .../src/graph/include/multinomial_inst.h | 1 - .../src/graph/include/mutable_data_inst.h | 4 - .../intel_gpu/src/graph/include/mvn_inst.h | 1 - .../graph/include/non_max_suppression_inst.h | 60 +- .../src/graph/include/non_zero_inst.h | 2 - .../src/graph/include/normalize_inst.h | 1 - .../src/graph/include/one_hot_inst.h | 1 - .../src/graph/include/permute_inst.h | 1 - .../src/graph/include/pooling_inst.h | 1 - .../src/graph/include/primitive_type.h | 1 - .../src/graph/include/primitive_type_base.h | 11 - .../src/graph/include/prior_box_inst.h | 1 - .../src/graph/include/program_node.h | 4 - .../src/graph/include/proposal_inst.h | 1 - .../src/graph/include/quantize_inst.h | 1 - .../src/graph/include/random_uniform_inst.h | 1 - .../intel_gpu/src/graph/include/range_inst.h | 1 - .../src/graph/include/read_value_inst.h | 2 - .../intel_gpu/src/graph/include/reduce_inst.h | 1 - .../src/graph/include/region_yolo_inst.h | 1 - .../src/graph/include/reorder_inst.h | 1 - .../src/graph/include/reorg_yolo_inst.h | 1 - .../src/graph/include/resample_inst.h | 1 - .../src/graph/include/reshape_inst.h | 1 - .../src/graph/include/reverse_inst.h | 1 - .../src/graph/include/reverse_sequence_inst.h | 1 - .../intel_gpu/src/graph/include/rms_inst.h | 1 - .../src/graph/include/roi_align_inst.h | 1 - .../src/graph/include/roi_pooling_inst.h | 1 - .../intel_gpu/src/graph/include/roll_inst.hpp | 1 - .../include/scatter_elements_update_inst.h | 1 - .../graph/include/scatter_nd_update_inst.h | 1 - .../src/graph/include/scatter_update_inst.h | 1 - .../intel_gpu/src/graph/include/select_inst.h | 1 - .../src/graph/include/shape_of_inst.h | 1 - .../src/graph/include/shuffle_channels_inst.h | 1 - .../intel_gpu/src/graph/include/slice_inst.h | 3 +- .../src/graph/include/softmax_inst.h | 1 - .../src/graph/include/space_to_batch_inst.h | 1 - .../src/graph/include/space_to_depth_inst.h | 1 - .../src/graph/include/strided_slice_inst.h | 1 - .../intel_gpu/src/graph/include/swiglu_inst.h | 1 - .../intel_gpu/src/graph/include/tile_inst.h | 1 - .../src/graph/include/unique_inst.hpp | 2 - src/plugins/intel_gpu/src/graph/kv_cache.cpp | 4 - .../intel_gpu/src/graph/layout_optimizer.cpp | 81 +- src/plugins/intel_gpu/src/graph/loop.cpp | 49 +- src/plugins/intel_gpu/src/graph/lrn.cpp | 16 - src/plugins/intel_gpu/src/graph/lstm_elt.cpp | 17 - .../intel_gpu/src/graph/matrix_nms.cpp | 28 +- .../intel_gpu/src/graph/multiclass_nms.cpp | 35 +- .../intel_gpu/src/graph/multinomial.cpp | 17 - src/plugins/intel_gpu/src/graph/mvn.cpp | 13 - .../src/graph/non_max_suppression.cpp | 13 +- src/plugins/intel_gpu/src/graph/non_zero.cpp | 18 - src/plugins/intel_gpu/src/graph/normalize.cpp | 15 - src/plugins/intel_gpu/src/graph/one_hot.cpp | 65 +- src/plugins/intel_gpu/src/graph/permute.cpp | 33 - src/plugins/intel_gpu/src/graph/pooling.cpp | 161 -- .../intel_gpu/src/graph/primitive_inst.cpp | 1 - src/plugins/intel_gpu/src/graph/prior_box.cpp | 72 - src/plugins/intel_gpu/src/graph/program.cpp | 37 +- .../intel_gpu/src/graph/program_node.cpp | 37 +- src/plugins/intel_gpu/src/graph/proposal.cpp | 11 - src/plugins/intel_gpu/src/graph/quantize.cpp | 12 - .../intel_gpu/src/graph/random_uniform.cpp | 7 - src/plugins/intel_gpu/src/graph/range.cpp | 4 - .../intel_gpu/src/graph/read_value.cpp | 4 - src/plugins/intel_gpu/src/graph/reduce.cpp | 78 - .../intel_gpu/src/graph/region_yolo.cpp | 24 - src/plugins/intel_gpu/src/graph/reorder.cpp | 157 -- .../intel_gpu/src/graph/reorg_yolo.cpp | 17 - src/plugins/intel_gpu/src/graph/resample.cpp | 18 - src/plugins/intel_gpu/src/graph/reshape.cpp | 40 +- src/plugins/intel_gpu/src/graph/reverse.cpp | 7 +- .../intel_gpu/src/graph/reverse_sequence.cpp | 7 - src/plugins/intel_gpu/src/graph/rms.cpp | 9 - src/plugins/intel_gpu/src/graph/roi_align.cpp | 11 - .../intel_gpu/src/graph/roi_pooling.cpp | 16 +- src/plugins/intel_gpu/src/graph/roll.cpp | 5 - .../src/graph/scatter_elements_update.cpp | 22 - .../intel_gpu/src/graph/scatter_nd_update.cpp | 14 - .../intel_gpu/src/graph/scatter_update.cpp | 16 - src/plugins/intel_gpu/src/graph/select.cpp | 68 - src/plugins/intel_gpu/src/graph/shape_of.cpp | 9 - .../intel_gpu/src/graph/shuffle_channels.cpp | 29 - src/plugins/intel_gpu/src/graph/slice.cpp | 4 - src/plugins/intel_gpu/src/graph/softmax.cpp | 33 +- .../intel_gpu/src/graph/space_to_batch.cpp | 63 +- .../intel_gpu/src/graph/space_to_depth.cpp | 60 - .../intel_gpu/src/graph/strided_slice.cpp | 17 - src/plugins/intel_gpu/src/graph/swiglu.cpp | 9 - src/plugins/intel_gpu/src/graph/tile.cpp | 17 - src/plugins/intel_gpu/src/graph/unique.cpp | 8 - .../matrix_nms/matrix_nms_kernel_ref.cpp | 7 + src/plugins/intel_gpu/src/plugin/graph.cpp | 9 +- .../src/plugin/ops/adaptive_pooling.cpp | 72 +- .../src/plugin/ops/batch_to_space.cpp | 29 +- .../intel_gpu/src/plugin/ops/broadcast.cpp | 38 - .../intel_gpu/src/plugin/ops/bucketize.cpp | 1 - .../intel_gpu/src/plugin/ops/concat.cpp | 1 - .../intel_gpu/src/plugin/ops/condition.cpp | 1 - .../intel_gpu/src/plugin/ops/constant.cpp | 213 +-- .../intel_gpu/src/plugin/ops/convert.cpp | 1 - .../src/plugin/ops/convert_color.cpp | 1 - .../intel_gpu/src/plugin/ops/convolution.cpp | 8 - .../src/plugin/ops/ctc_greedy_decoder.cpp | 131 +- .../intel_gpu/src/plugin/ops/cum_sum.cpp | 1 - .../intel_gpu/src/plugin/ops/custom.cpp | 1 - src/plugins/intel_gpu/src/plugin/ops/dft.cpp | 2 +- .../intel_gpu/src/plugin/ops/eltwise.cpp | 44 +- .../src/plugin/ops/embedding_bag.cpp | 79 +- ...xperimental_detectron_detection_output.cpp | 95 +- ...ectron_generate_proposals_single_image.cpp | 36 +- ...mental_detectron_roi_feature_extractor.cpp | 51 +- .../src/plugin/ops/extract_image_patches.cpp | 3 +- src/plugins/intel_gpu/src/plugin/ops/eye.cpp | 12 - .../src/plugin/ops/fully_connected.cpp | 31 - .../intel_gpu/src/plugin/ops/gather.cpp | 51 +- .../src/plugin/ops/gather_elements.cpp | 13 +- .../src/plugin/ops/generate_proposals.cpp | 61 +- .../intel_gpu/src/plugin/ops/interpolate.cpp | 146 +- src/plugins/intel_gpu/src/plugin/ops/loop.cpp | 115 +- .../intel_gpu/src/plugin/ops/matmul.cpp | 29 - .../intel_gpu/src/plugin/ops/matrix_nms.cpp | 57 +- .../src/plugin/ops/multiclass_nms.cpp | 65 +- .../intel_gpu/src/plugin/ops/multinomial.cpp | 5 +- .../src/plugin/ops/non_max_suppression.cpp | 165 +- .../intel_gpu/src/plugin/ops/normalize_l2.cpp | 2 +- .../intel_gpu/src/plugin/ops/one_hot.cpp | 5 - src/plugins/intel_gpu/src/plugin/ops/pad.cpp | 1 - .../intel_gpu/src/plugin/ops/parameter.cpp | 4 - .../intel_gpu/src/plugin/ops/pooling.cpp | 96 +- .../intel_gpu/src/plugin/ops/proposal.cpp | 149 +- .../src/plugin/ops/random_uniform.cpp | 31 +- .../intel_gpu/src/plugin/ops/range.cpp | 15 +- .../intel_gpu/src/plugin/ops/reduce.cpp | 46 - .../intel_gpu/src/plugin/ops/region_yolo.cpp | 1 - .../intel_gpu/src/plugin/ops/reorg_yolo.cpp | 1 - .../intel_gpu/src/plugin/ops/reshape.cpp | 86 +- .../intel_gpu/src/plugin/ops/result.cpp | 5 - .../intel_gpu/src/plugin/ops/reverse.cpp | 7 +- .../src/plugin/ops/reverse_sequence.cpp | 1 - src/plugins/intel_gpu/src/plugin/ops/rnn.cpp | 181 +-- .../intel_gpu/src/plugin/ops/roi_align.cpp | 1 - .../intel_gpu/src/plugin/ops/roi_pooling.cpp | 1 - src/plugins/intel_gpu/src/plugin/ops/roll.cpp | 39 +- .../plugin/ops/scatter_elements_update.cpp | 1 - .../src/plugin/ops/scatter_nd_update.cpp | 4 - .../src/plugin/ops/scatter_update.cpp | 1 - .../intel_gpu/src/plugin/ops/select.cpp | 50 - .../intel_gpu/src/plugin/ops/shape_of.cpp | 5 +- .../src/plugin/ops/shuffle_channels.cpp | 1 - .../intel_gpu/src/plugin/ops/slice.cpp | 1 - .../intel_gpu/src/plugin/ops/softmax.cpp | 1 - .../src/plugin/ops/space_to_batch.cpp | 28 +- .../src/plugin/ops/space_to_depth.cpp | 1 - .../intel_gpu/src/plugin/ops/split.cpp | 101 +- .../src/plugin/ops/strided_slice.cpp | 248 +-- .../intel_gpu/src/plugin/ops/swiglu.cpp | 29 +- src/plugins/intel_gpu/src/plugin/ops/tile.cpp | 27 - src/plugins/intel_gpu/src/plugin/ops/topk.cpp | 89 +- .../intel_gpu/src/plugin/ops/variable.cpp | 1 - .../intel_gpu/src/plugin/program_builder.cpp | 22 +- .../src/plugin/sync_infer_request.cpp | 13 - .../src/runtime/execution_config.cpp | 1 - .../intel_gpu/src/runtime/kernels_cache.cpp | 3 +- .../concurrency/gpu_concurrency_tests.cpp | 12 +- .../gpu_dyn_batch_shape_tests.cpp | 4 +- ...nvolution_backprop_data_transformation.cpp | 10 +- .../convolution_transformation.cpp | 22 +- ...ntwise_branch_selection_transformation.cpp | 12 +- .../fq_transformation.cpp | 12 +- .../fq_with_dq_not_optimal_transformation.cpp | 8 +- .../group_convolution_transformation.cpp | 14 +- .../groupconvolution_qdq_transformation.cpp | 20 +- .../mat_mul_with_constant_transformation.cpp | 16 +- .../move_fake_quantize_transformation.cpp | 16 +- ...ly_to_group_convolution_transformation.cpp | 6 +- .../pull_reshape_through_dequantization.cpp | 4 +- .../recurrent_cell_transformation.cpp | 8 +- .../reshape_transformation.cpp | 32 +- .../dynamic/read_value_assign.cpp | 1 + .../dynamic/rms_norm_decomposition.cpp | 1 + .../subgraph_tests/dynamic/swiglu_fusion.cpp | 1 + .../bounded_shape_mem_alloc.cpp | 2 - .../is_valid_fusion_test.cpp | 2 +- .../dynamic_execution/memory_realloc_test.cpp | 7 +- .../optimized_out_execution_test.cpp | 2 +- .../unit/dynamic_execution/priorbox_test.cpp | 2 +- .../skip_redundant_reorder_at_runtime.cpp | 4 +- .../unit/dynamic_execution/stateful_model.cpp | 8 +- .../fake_alignment/fc_fake_alignment_test.cpp | 1 - .../fusions/batch_to_space_fusion_test.cpp | 25 +- .../fusions/fully_connected_fusion_test.cpp | 5 - .../tests/unit/fusions/gather_fusion_test.cpp | 2 - .../tests/unit/fusions/gemm_fusion_test.cpp | 6 - .../tests/unit/fusions/reduce_fusion_test.cpp | 3 - .../tests/unit/fusions/select_fusion_test.cpp | 4 +- .../fusions/space_to_batch_fusion_test.cpp | 25 +- .../kernel_impl_params_relevance_test.cpp | 3 +- .../tests/unit/module_tests/network_test.cpp | 12 +- .../weights_reorder_factory_test.cpp | 4 +- ...dd_onednn_optimization_attributes_test.cpp | 2 +- .../passes/add_required_reorders_test.cpp | 2 +- .../unit/passes/clamp_fp16_output_test.cpp | 4 +- .../tests/unit/passes/handle_reshape.cpp | 12 +- .../tests/unit/passes/kernels_cache_test.cpp | 2 +- .../passes/mark_shape_of_subgraphs_test.cpp | 16 +- .../unit/passes/post_optimize_weights.cpp | 11 +- .../passes/prepare_buffer_fusing_test.cpp | 18 +- .../unit/passes/prepare_padding_test.cpp | 4 +- .../passes/prepare_primitive_fusing_test.cpp | 30 +- .../remove_redundant_reorders_tests.cpp | 12 +- .../tests/unit/passes/reorder_inputs_test.cpp | 18 +- .../tests/unit/passes/reorder_transfer.cpp | 2 +- .../passes/select_preferred_formats_test.cpp | 2 +- .../unit/shape_infer/broadcast_si_test.cpp | 2 +- .../tests/unit/shape_infer/matmul_si_test.cpp | 4 +- .../non_max_suppression_si_test.cpp | 12 +- .../unit/shape_infer/one_hot_si_test.cpp | 2 +- .../unit/shape_infer/reverse_si_test.cpp | 12 +- .../tests/unit/shape_infer/roll_si_test.cpp | 2 +- .../test_cases/activation_simple_gpu_test.cpp | 7 +- .../adaptive_avg_pooling_gpu_test.cpp | 66 +- .../adaptive_max_pooling_gpu_test.cpp | 312 ---- .../unit/test_cases/add_reorders_gpu_test.cpp | 1 - .../unit/test_cases/arg_max_gpu_test.cpp | 6 +- .../test_cases/batch_to_space_gpu_test.cpp | 70 +- .../tests/unit/test_cases/border_gpu_test.cpp | 10 +- .../unit/test_cases/broadcast_gpu_test.cpp | 1204 ++++---------- .../test_cases/canonicalization_gpu_test.cpp | 3 +- .../test_cases/concatenation_gpu_test.cpp | 6 +- .../unit/test_cases/condition_gpu_test.cpp | 8 +- .../unit/test_cases/convolution_gpu_test.cpp | 11 +- .../tests/unit/test_cases/crop_gpu_test.cpp | 5 +- .../unit/test_cases/cum_sum_gpu_test.cpp | 2 +- .../tests/unit/test_cases/data_gpu_test.cpp | 4 +- .../test_cases/deconvolution_gpu_test.cpp | 2 +- .../unit/test_cases/eltwise_gpu_test.cpp | 18 +- .../test_cases/embedding_bag_gpu_test.cpp | 1427 ----------------- .../unit/test_cases/empty_tensor_gpu_test.cpp | 2 +- ...al_detectron_detection_output_gpu_test.cpp | 456 ------ ...nerate_proposals_single_image_gpu_test.cpp | 359 ----- ...etectron_prior_grid_generator_gpu_test.cpp | 239 --- ...tectron_roi_feature_extractor_gpu_test.cpp | 306 ---- ...erimental_detectron_topk_rois_gpu_test.cpp | 176 -- .../extract_image_patches_gpu_test.cpp | 571 ------- .../intel_gpu/tests/unit/test_cases/eye.cpp | 229 --- .../test_cases/fully_connected_gpu_test.cpp | 40 +- .../test_cases/gather_elements_gpu_test.cpp | 2 +- .../tests/unit/test_cases/gather_gpu_test.cpp | 17 +- .../unit/test_cases/gather_nd_gpu_test.cpp | 4 +- .../tests/unit/test_cases/gemm_gpu_test.cpp | 38 +- .../generate_proposals_gpu_test.cpp | 478 ------ .../unit/test_cases/grid_sample_gpu_test.cpp | 2 +- .../tests/unit/test_cases/loop_gpu_test.cpp | 24 +- .../unit/test_cases/matrix_nms_gpu_test.cpp | 695 -------- .../tests/unit/test_cases/memory_test.cpp | 8 +- .../test_cases/multiclass_nms_gpu_test.cpp | 877 ---------- .../test_cases/multiple_streams_gpu_test.cpp | 2 +- .../tests/unit/test_cases/mvn_gpu_test.cpp | 4 +- .../test_cases/non_max_suppression_test.cpp | 105 +- .../unit/test_cases/non_zero_gpu_test.cpp | 8 +- .../unit/test_cases/one_hot_gpu_test.cpp | 53 +- .../unit/test_cases/permute_gpu_test.cpp | 10 +- .../unit/test_cases/quantize_gpu_test.cpp | 2 +- .../tests/unit/test_cases/range_gpu_test.cpp | 28 +- .../tests/unit/test_cases/reduce_gpu_test.cpp | 6 +- .../unit/test_cases/reorder_gpu_test.cpp | 7 +- .../unit/test_cases/resample_gpu_test.cpp | 30 +- .../unit/test_cases/reshape_gpu_test.cpp | 16 +- .../unit/test_cases/reverse_gpu_test.cpp | 56 +- .../tests/unit/test_cases/rms_gpu_test.cpp | 3 - .../tests/unit/test_cases/roll_gpu_test.cpp | 2 +- .../test_cases/scatter_nd_update_gpu_test.cpp | 6 +- .../test_cases/scatter_update_gpu_test.cpp | 6 +- .../tests/unit/test_cases/select_gpu_test.cpp | 2 +- .../unit/test_cases/shape_of_gpu_test.cpp | 4 +- .../tests/unit/test_cases/slice_gpu_test.cpp | 10 +- .../unit/test_cases/softmax_gpu_test.cpp | 6 +- .../test_cases/space_to_batch_gpu_test.cpp | 70 +- .../test_cases/strided_slice_gpu_test.cpp | 12 +- .../tests/unit/test_cases/swiglu_gpu_test.cpp | 4 +- .../tests/unit/test_cases/tile_gpu_test.cpp | 2 +- .../tests/unit/test_cases/unique_gpu_test.cpp | 2 +- 425 files changed, 1716 insertions(+), 13984 deletions(-) delete mode 100644 src/plugins/intel_gpu/tests/unit/test_cases/adaptive_max_pooling_gpu_test.cpp delete mode 100644 src/plugins/intel_gpu/tests/unit/test_cases/embedding_bag_gpu_test.cpp delete mode 100644 src/plugins/intel_gpu/tests/unit/test_cases/experimental_detectron_detection_output_gpu_test.cpp delete mode 100644 src/plugins/intel_gpu/tests/unit/test_cases/experimental_detectron_generate_proposals_single_image_gpu_test.cpp delete mode 100644 src/plugins/intel_gpu/tests/unit/test_cases/experimental_detectron_prior_grid_generator_gpu_test.cpp delete mode 100644 src/plugins/intel_gpu/tests/unit/test_cases/experimental_detectron_roi_feature_extractor_gpu_test.cpp delete mode 100644 src/plugins/intel_gpu/tests/unit/test_cases/experimental_detectron_topk_rois_gpu_test.cpp delete mode 100644 src/plugins/intel_gpu/tests/unit/test_cases/extract_image_patches_gpu_test.cpp delete mode 100644 src/plugins/intel_gpu/tests/unit/test_cases/eye.cpp delete mode 100644 src/plugins/intel_gpu/tests/unit/test_cases/generate_proposals_gpu_test.cpp delete mode 100644 src/plugins/intel_gpu/tests/unit/test_cases/matrix_nms_gpu_test.cpp delete mode 100644 src/plugins/intel_gpu/tests/unit/test_cases/multiclass_nms_gpu_test.cpp diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp index 379d7b3b64a222..245d6e1a0f1767 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp @@ -287,7 +287,6 @@ struct program { void load(cldnn::BinaryInputBuffer& ib); bool is_loaded_from_cache() const { return _loaded_from_cache; } - bool is_new_shape_infer() const { return new_shape_infer; } layout_optimizer& get_layout_optimizer() const { return *_layout_optimizer; } private: @@ -313,8 +312,6 @@ struct program { std::shared_ptr _compilation_context; bool _loaded_from_cache = false; - bool new_shape_infer = false; - std::map> nodes_map; std::list optimized_out; diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp index 5cede62fd17e69..e0b94070249524 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp @@ -138,7 +138,6 @@ class ProgramBuilder final { void add_primitive(const ov::Node& op, std::shared_ptr prim, std::vector aliases = {}); - bool use_new_shape_infer() const { return allow_new_shape_infer; } bool requires_new_shape_infer(const std::shared_ptr& op) const; bool is_inner_program() const { return m_is_inner_program; } bool is_query_mode() { return queryMode; } @@ -157,8 +156,6 @@ class ProgramBuilder final { std::shared_ptr m_topology; CustomLayerMap m_custom_layers; - bool allow_new_shape_infer = false; - bool queryMode; std::shared_ptr m_task_executor; diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/adaptive_pooling.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/adaptive_pooling.hpp index b6a80edd2b6d14..d04cd2ceec8968 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/adaptive_pooling.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/adaptive_pooling.hpp @@ -20,35 +20,6 @@ struct adaptive_pooling : public primitive_base { mode{adaptive_pooling_mode::average}, output_size{} {} - /// @brief Constructs AdaptiveAvgPooling primitive. - /// @param id This primitive id. - /// @param input Input primitive id. - /// @param output_size Output data size of the primitive - adaptive_pooling(const primitive_id &id, - const input_info &input, - tensor output_size) - : primitive_base(id, {input}), - mode{adaptive_pooling_mode::average}, - output_size{output_size} {} - - /// @brief Constructs AdaptiveMaxPooling primitive. - /// @param id This primitive id. - /// @param input Input primitive id. - /// @param output_shape Output shape primitive id. - /// @param output_size Output data size of the primitive - /// @param indices_output Indices output primitive id. - /// @param index_element_type Data type of indices output. - adaptive_pooling(const primitive_id &id, - const input_info &input, - tensor output_size, - const primitive_id &indices_output, - data_types index_element_type) - : primitive_base(id, {input, indices_output}), - mode{adaptive_pooling_mode::max}, - output_size{output_size}, - indices_output{indices_output}, - index_element_type{index_element_type} {} - /// @brief Constructs AdaptiveAvgPooling primitive for dynamic shape. /// @param id This primitive id. /// @param input Input primitive id. diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/arg_max_min.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/arg_max_min.hpp index 978b77d9ea03f5..181427ecec3430 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/arg_max_min.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/arg_max_min.hpp @@ -111,12 +111,6 @@ struct arg_max_min : public primitive_base { stable == rhs_casted.stable; } - size_t get_output_nums() const { - return (input_size() == 3 ? 2 : output_size()); - } - bool has_second_output() const { return get_output_nums() == 2; } - bool use_multiple_outputs() const { return input_size() != 3; } - void save(BinaryOutputBuffer& ob) const override { primitive_base::save(ob); ob << make_data(&mode, sizeof(ov::op::TopKMode)); diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/batch_to_space.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/batch_to_space.hpp index 090bcf44a140a3..6ee60ae3a41cdd 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/batch_to_space.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/batch_to_space.hpp @@ -50,38 +50,33 @@ struct batch_to_space : public primitive_base { /// @param crops_end Amount to crop from the ending along each axis of data input batch_to_space(const primitive_id& id, const input_info& input, - const tensor& block_shape, - const tensor& crops_begin, - const tensor& crops_end, - const tensor& out_size) + const std::vector& block_shape, + const std::vector& crops_begin, + const std::vector& crops_end) : primitive_base(id, {input}), block_shape(block_shape), crops_begin(crops_begin), crops_end(crops_end), - out_size(out_size), shape_constant(1) {} batch_to_space(const primitive_id& id, - const std::vector& inputs, - const tensor& out_size) - : primitive_base(id, inputs), - block_shape(tensor()), - crops_begin(tensor()), - crops_end(tensor()), - out_size(out_size), + const std::vector& inputs) + : primitive_base(id, inputs, {}), + block_shape({}), + crops_begin({}), + crops_end({}), shape_constant(0) {} - tensor block_shape; - tensor crops_begin; - tensor crops_end; - tensor out_size; + std::vector block_shape; + std::vector crops_begin; + std::vector crops_end; int64_t shape_constant; size_t hash() const override { size_t seed = primitive::hash(); - seed = hash_combine(seed, block_shape.hash()); - seed = hash_combine(seed, crops_begin.hash()); - seed = hash_combine(seed, crops_end.hash()); + seed = hash_range(seed, block_shape.begin(), block_shape.end()); + seed = hash_range(seed, crops_begin.begin(), crops_begin.end()); + seed = hash_range(seed, crops_end.begin(), crops_end.end()); seed = hash_combine(seed, shape_constant); return seed; } @@ -102,7 +97,6 @@ struct batch_to_space : public primitive_base { ob << block_shape; ob << crops_begin; ob << crops_end; - ob << out_size; ob << shape_constant; } @@ -111,7 +105,6 @@ struct batch_to_space : public primitive_base { ib >> block_shape; ib >> crops_begin; ib >> crops_end; - ib >> out_size; ib >> shape_constant; } }; diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/broadcast.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/broadcast.hpp index 4ce31e2b118e9b..00d26d974099fb 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/broadcast.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/broadcast.hpp @@ -57,25 +57,6 @@ struct broadcast : public primitive_base { broadcast() : primitive_base("", {}) {} - /// @brief Constructs broadcast primitive / layer. - /// - /// @param id An identifier of new primitive. - /// @param input An identifier of primitive which is an input for newly created - /// broadcast primitive. - /// @param broadcast_sizes Sizes of broadcast. Output size of current primitive - /// will match broadcast sizes (layout type will not change). - /// @param broadcast_axes Axes positions (0-based, from left to right) in output_shape - /// that are being broadcast. Values of broadcast_axes on remaining - /// axes must be greater (dividable) or equal to corresponding input - /// dimension values. - broadcast(const primitive_id& id, - const input_info& input, - const tensor& broadcast_sizes, - const std::vector& broadcast_axes = {}) - : primitive_base(id, {input}), - broadcast_sizes(broadcast_sizes), - broadcast_axes(broadcast_axes) {} - /// @brief Constructs broadcast primitive / layer with static target_shape. /// /// @param id An identifier of new primitive. @@ -99,9 +80,7 @@ struct broadcast : public primitive_base { : primitive_base(id, {input}), target_shape(target_shape), axes_mapping(axes_mapping), - broadcast_mode(broadcast_spec), - broadcast_sizes(target_shape.empty() ? tensor(1) : tensor(0)), - broadcast_axes({}) {} + broadcast_mode(broadcast_spec) {} /// @brief Constructs broadcast primitive / layer with dynamic target_shape. broadcast(const primitive_id& id, @@ -112,9 +91,7 @@ struct broadcast : public primitive_base { : primitive_base(id, {input, target_shape_id}), target_shape({}), axes_mapping(axes_mapping), - broadcast_mode(broadcast_spec), - broadcast_sizes({}), - broadcast_axes({}) {} + broadcast_mode(broadcast_spec) {} /// @brief The shape of the output tensor. ov::Shape target_shape; @@ -122,17 +99,11 @@ struct broadcast : public primitive_base { ov::AxisSet axes_mapping; /// @brief Broadcast mode to use for determining broadcast axes. ov::op::BroadcastModeSpec broadcast_mode; - /// @brief Expected sizes of output from broadcast primitive. - tensor broadcast_sizes; - /// @brief Array of axes positions from output shape (0-based, from left to right) - /// along which broadcast should happen. - std::vector broadcast_axes; ov::PartialShape output_pshape = ov::PartialShape::dynamic(); size_t hash() const override { size_t seed = primitive::hash(); - seed = hash_range(seed, broadcast_axes.begin(), broadcast_axes.end()); seed = hash_range(seed, axes_mapping.begin(), axes_mapping.end()); return seed; } @@ -145,7 +116,6 @@ struct broadcast : public primitive_base { return axes_mapping == rhs_casted.axes_mapping && broadcast_mode == rhs_casted.broadcast_mode && - broadcast_sizes == rhs_casted.broadcast_sizes && output_pshape == rhs_casted.output_pshape; } @@ -154,8 +124,6 @@ struct broadcast : public primitive_base { ob << target_shape; ob << axes_mapping; ob << make_data(&broadcast_mode, sizeof(ov::op::BroadcastModeSpec)); - ob << broadcast_sizes; - ob << broadcast_axes; ob << output_pshape; } @@ -164,8 +132,6 @@ struct broadcast : public primitive_base { ib >> target_shape; ib >> axes_mapping; ib >> make_data(&broadcast_mode, sizeof(ov::op::BroadcastModeSpec)); - ib >> broadcast_sizes; - ib >> broadcast_axes; ib >> output_pshape; } }; diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/embedding_bag.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/embedding_bag.hpp index ba85b5a7840cf9..5131fb803f63c8 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/embedding_bag.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/embedding_bag.hpp @@ -29,9 +29,8 @@ struct embedding_bag : public primitive_base { embedding_bag(const primitive_id& id, const std::vector& inputs, const embedding_bag_type& type, - const tensor& output_shape, const int32_t default_index = -1) - : primitive_base(id, inputs), type(type), output_shape(output_shape), default_index(default_index) {} + : primitive_base(id, inputs), type(type), default_index(default_index) {} /// @brief Type of EmbeddingBag operation embedding_bag_type type; @@ -60,14 +59,12 @@ struct embedding_bag : public primitive_base { void save(BinaryOutputBuffer& ob) const override { primitive_base::save(ob); ob << make_data(&type, sizeof(embedding_bag_type)); - ob << output_shape; ob << default_index; } void load(BinaryInputBuffer& ib) override { primitive_base::load(ib); ib >> make_data(&type, sizeof(embedding_bag_type)); - ib >> output_shape; ib >> default_index; } }; diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/experimental_detectron_detection_output.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/experimental_detectron_detection_output.hpp index 081acee527fe87..7bed9155ca9718 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/experimental_detectron_detection_output.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/experimental_detectron_detection_output.hpp @@ -37,8 +37,6 @@ struct experimental_detectron_detection_output : public primitive_base deltas_weights) - : primitive_base{id, - {input_rois, input_deltas, input_scores, input_im_info, output_classes, output_scores}}, - output_classes{output_classes.pid}, - output_scores{output_scores.pid}, + : primitive_base{id, {input_rois, input_deltas, input_scores, input_im_info}}, score_threshold{score_threshold}, nms_threshold{nms_threshold}, num_classes{num_classes}, @@ -60,34 +55,6 @@ struct experimental_detectron_detection_output : public primitive_base deltas_weights) - : primitive_base{id, - {input_rois, input_deltas, input_scores, input_im_info}}, - output_classes{}, - output_scores{}, - score_threshold{score_threshold}, - nms_threshold{nms_threshold}, - num_classes{num_classes}, - post_nms_count{post_nms_count}, - max_detections_per_image{max_detections_per_image}, - class_agnostic_box_regression{class_agnostic_box_regression}, - max_delta_log_wh{max_delta_log_wh}, - deltas_weights{std::move(deltas_weights)} {} - - primitive_id output_classes; - primitive_id output_scores; float score_threshold = 0.0f; float nms_threshold = 0.0f; int num_classes = 0; @@ -107,8 +74,6 @@ struct experimental_detectron_detection_output : public primitive_base::save(ob); - ob << output_classes; - ob << output_scores; ob << score_threshold; ob << nms_threshold; ob << num_classes; @@ -148,8 +109,6 @@ struct experimental_detectron_detection_output : public primitive_base::load(ib); - ib >> output_classes; - ib >> output_scores; ib >> score_threshold; ib >> nms_threshold; ib >> num_classes; @@ -159,17 +118,5 @@ struct experimental_detectron_detection_output : public primitive_base> max_delta_log_wh; ib >> deltas_weights; } - -protected: - std::vector get_dependencies() const override { - std::vector ret; - if (!output_classes.empty()) - ret.emplace_back(output_classes); - - if (!output_scores.empty()) - ret.emplace_back(output_scores); - - return ret; - } }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/experimental_detectron_generate_proposals_single_image.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/experimental_detectron_generate_proposals_single_image.hpp index 99817ba14cbb07..e179b76ddebfb2 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/experimental_detectron_generate_proposals_single_image.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/experimental_detectron_generate_proposals_single_image.hpp @@ -26,23 +26,6 @@ struct experimental_detectron_generate_proposals_single_image /// @param nms_threshold threshold to be used in NonMaxSuppression stage /// @param pre_nms_count number of top-n proposals before NMS /// @param post_nms_count number of top-n proposals after NMS - experimental_detectron_generate_proposals_single_image(const primitive_id& id, - const input_info& input_im_info, - const input_info& input_anchors, - const input_info& input_deltas, - const input_info& input_scores, - const input_info& output_roi_scores, - float min_size, - float nms_threshold, - int64_t pre_nms_count, - int64_t post_nms_count) : - primitive_base{id, {input_im_info, input_anchors, input_deltas, input_scores, output_roi_scores}}, - output_roi_scores{output_roi_scores.pid}, - min_size{min_size}, - nms_threshold{nms_threshold}, - pre_nms_count{pre_nms_count}, - post_nms_count{post_nms_count} {} - experimental_detectron_generate_proposals_single_image(const primitive_id& id, const input_info& input_im_info, const input_info& input_anchors, @@ -53,13 +36,11 @@ struct experimental_detectron_generate_proposals_single_image int64_t pre_nms_count, int64_t post_nms_count) : primitive_base{id, {input_im_info, input_anchors, input_deltas, input_scores}}, - output_roi_scores{}, min_size{min_size}, nms_threshold{nms_threshold}, pre_nms_count{pre_nms_count}, post_nms_count{post_nms_count} {} - primitive_id output_roi_scores; float min_size = 0.0f; float nms_threshold = 0.0f; int64_t pre_nms_count = 0; @@ -71,7 +52,6 @@ struct experimental_detectron_generate_proposals_single_image seed = hash_combine(seed, nms_threshold); seed = hash_combine(seed, pre_nms_count); seed = hash_combine(seed, post_nms_count); - seed = hash_combine(seed, output_roi_scores.empty()); return seed; } @@ -84,13 +64,11 @@ struct experimental_detectron_generate_proposals_single_image return min_size == rhs_casted.min_size && nms_threshold == rhs_casted.nms_threshold && pre_nms_count == rhs_casted.pre_nms_count && - post_nms_count == rhs_casted.post_nms_count && - output_roi_scores.empty() == rhs_casted.output_roi_scores.empty(); + post_nms_count == rhs_casted.post_nms_count; } void save(BinaryOutputBuffer& ob) const override { primitive_base::save(ob); - ob << output_roi_scores; ob << min_size; ob << nms_threshold; ob << pre_nms_count; @@ -99,19 +77,10 @@ struct experimental_detectron_generate_proposals_single_image void load(BinaryInputBuffer& ib) override { primitive_base::load(ib); - ib >> output_roi_scores; ib >> min_size; ib >> nms_threshold; ib >> pre_nms_count; ib >> post_nms_count; } - -protected: - std::vector get_dependencies() const override { - std::vector ret; - if (!output_roi_scores.empty()) - ret.push_back(output_roi_scores); - return ret; - } }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/extract_image_patches.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/extract_image_patches.hpp index cd5ded9872c91c..49c6795f1e78f0 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/extract_image_patches.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/extract_image_patches.hpp @@ -28,14 +28,12 @@ struct extract_image_patches : public primitive_base { const ov::Shape& sizes, const ov::Strides& strides, const ov::Shape& rates, - const ov::op::PadType& auto_pad, - const tensor& output_shape = tensor{}) + const ov::op::PadType& auto_pad) : primitive_base(id, {input}), sizes(sizes), strides(strides), rates(rates), - auto_pad(auto_pad), - output_shape(output_shape) {} + auto_pad(auto_pad) {} /// @brief Vector with sizes ov::Shape sizes; @@ -45,8 +43,6 @@ struct extract_image_patches : public primitive_base { ov::Shape rates; /// @brief Mode how the padding is calculated ov::op::PadType auto_pad; - /// @brief Shape of output layout - tensor output_shape; size_t hash() const override { size_t seed = primitive::hash(); @@ -75,7 +71,6 @@ struct extract_image_patches : public primitive_base { ob << strides; ob << rates; ob << make_data(&auto_pad, sizeof(ov::op::PadType)); - ob << output_shape; } void load(BinaryInputBuffer& ib) override { @@ -84,7 +79,6 @@ struct extract_image_patches : public primitive_base { ib >> strides; ib >> rates; ib >> make_data(&auto_pad, sizeof(ov::op::PadType)); - ib >> output_shape; } }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/eye.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/eye.hpp index 40b8905f16d495..85e6d59531d24b 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/eye.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/eye.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2022-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -17,17 +17,14 @@ struct eye : public primitive_base { /// @brief Constructs eye primitive. /// @param id This primitive id. /// @param inputs List of primitive ids. - /// @param output_shape Tensor output shape /// @param ext_prim_id Primitive extra id (friendly name) /// @param shift Eye diagonal /// @param output_type Tensor output type eye(const primitive_id& id, const std::vector& inputs, - const tensor& output_shape, const int32_t shift, const cldnn::data_types output_type) : primitive_base{id, inputs, 1, {optional_data_type(output_type)}}, - output_shape{output_shape}, shift{shift} {} tensor output_shape; @@ -50,13 +47,11 @@ struct eye : public primitive_base { void save(BinaryOutputBuffer& ob) const override { primitive_base::save(ob); - ob << output_shape; ob << shift; } void load(BinaryInputBuffer& ib) override { primitive_base::load(ib); - ib >> output_shape; ib >> shift; } }; diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/generate_proposals.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/generate_proposals.hpp index 2c056c7c21e274..643333cffb9582 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/generate_proposals.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/generate_proposals.hpp @@ -3,6 +3,7 @@ // #pragma once +#include "openvino/op/generate_proposals.hpp" #include "primitive.hpp" #include "openvino/op/generate_proposals.hpp" #include @@ -18,52 +19,16 @@ struct generate_proposals /// @brief Constructs generate_proposals primitive /// @param id This primitive id - /// @param input_im_info image size info - /// @param input_anchors anchors - /// @param input_deltas deltas for anchors - /// @param input_scores proposal scores - /// @param output_rois_scores ROIs scores - /// @param output_rois_num number of proposed ROIs - /// @param min_size minimum box width and height - /// @param nms_threshold threshold to be used in NonMaxSuppression stage - /// @param pre_nms_count number of top-n proposals before NMS - /// @param post_nms_count number of top-n proposals after NMS - /// @param normalized indicates whether proposal bboxes are normalized - /// @param nms_eta eta parameter for adaptive NMS - /// @param roi_num_type type of 3rd output elements - generate_proposals(const primitive_id& id, - const std::vector& inputs, - float min_size, - float nms_threshold, - int64_t pre_nms_count, - int64_t post_nms_count, - bool normalized, - float nms_eta, - const data_types roi_num_type) : - primitive_base{id, inputs}, - output_rois_scores{inputs[4].pid}, - output_rois_num{inputs[5].pid}, - roi_num_type{roi_num_type} { - attrs.min_size = min_size; - attrs.nms_threshold = nms_threshold; - attrs.pre_nms_count = pre_nms_count; - attrs.post_nms_count = post_nms_count; - attrs.normalized = normalized; - attrs.nms_eta = nms_eta; - } - + /// @param inputs input primitive ids + /// @param attr Attributes of GenerateProposal op generate_proposals(const primitive_id& id, const std::vector& inputs, const ov::op::v9::GenerateProposals::Attributes& attrs) : - primitive_base{id, inputs, {}}, + primitive_base{id, inputs}, attrs{attrs} {} ov::op::v9::GenerateProposals::Attributes attrs; - primitive_id output_rois_scores; - primitive_id output_rois_num; - data_types roi_num_type = data_types::undefined; - size_t hash() const override { size_t seed = primitive::hash(); seed = hash_combine(seed, attrs.min_size); @@ -72,9 +37,6 @@ struct generate_proposals seed = hash_combine(seed, attrs.post_nms_count); seed = hash_combine(seed, attrs.normalized); seed = hash_combine(seed, attrs.nms_eta); - seed = hash_combine(seed, roi_num_type); - seed = hash_combine(seed, output_rois_scores.empty()); - seed = hash_combine(seed, output_rois_num.empty()); return seed; } @@ -90,47 +52,28 @@ struct generate_proposals cmp_fields(attrs.pre_nms_count) && cmp_fields(attrs.post_nms_count) && cmp_fields(attrs.normalized) && - cmp_fields(attrs.nms_eta) && - cmp_fields(roi_num_type) && - cmp_fields(output_rois_scores.empty()) && - cmp_fields(output_rois_num.empty()); + cmp_fields(attrs.nms_eta); #undef cmp_fields } void save(BinaryOutputBuffer& ob) const override { primitive_base::save(ob); - ob << output_rois_scores; - ob << output_rois_num; ob << attrs.min_size; ob << attrs.nms_threshold; ob << attrs.pre_nms_count; ob << attrs.post_nms_count; ob << attrs.normalized; ob << attrs.nms_eta; - ob << make_data(&roi_num_type, sizeof(data_types)); } void load(BinaryInputBuffer& ib) override { primitive_base::load(ib); - ib >> output_rois_scores; - ib >> output_rois_num; ib >> attrs.min_size; ib >> attrs.nms_threshold; ib >> attrs.pre_nms_count; ib >> attrs.post_nms_count; ib >> attrs.normalized; ib >> attrs.nms_eta; - ib >> make_data(&roi_num_type, sizeof(data_types)); - } - -protected: - std::vector get_dependencies() const override { - std::vector ret; - if (!output_rois_scores.empty()) - ret.push_back(output_rois_scores); - if (!output_rois_num.empty()) - ret.push_back(output_rois_num); - return ret; } }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/matrix_nms.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/matrix_nms.hpp index 475922183b24a1..2543c9ef131f1e 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/matrix_nms.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/matrix_nms.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2022-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -23,15 +23,6 @@ struct matrix_nms : public primitive_base { /// @param second_output primitive id. /// @param third_output primitive id. /// @param attrs operation attributes. - matrix_nms(const primitive_id& id, - const input_info& boxes, - const input_info& scores, - const input_info& second_output, - const input_info& third_output, - const ov::op::v8::MatrixNms::Attributes& attrs) - : primitive_base(id, {boxes, scores, second_output, third_output}), - attribs(attrs) {} - matrix_nms(const primitive_id& id, const input_info& boxes, const input_info& scores, diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/multiclass_nms.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/multiclass_nms.hpp index 910d57021bbddd..89a95f9cd4d065 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/multiclass_nms.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/multiclass_nms.hpp @@ -19,28 +19,20 @@ struct multiclass_nms : public primitive_base { multiclass_nms() : primitive_base("", {}) {} + /// @brief Constructs multiclass_nms primitive + /// @param id This primitive id + /// @param boxes Boxes coordinates + /// @param scores Box scores + /// @param roisnum Number of boxes in each batch for MulticlassNMS-9 (empty string for MulticlassNMS-8) + /// @param attrs Attributes multiclass_nms(const primitive_id& id, const std::vector inputs, - const ov::op::util::MulticlassNmsBase::Attributes& attrs) + const ov::op::util::MulticlassNmsBase::Attributes& attrs, + const padding& output_padding = {}) : primitive_base{id, inputs}, - attrs(attrs) { - // Legacy multi-output - if (inputs.size() == 5) { - output_selected_indices = inputs[InputIdx::OutputSelectedIndices].pid; - output_selected_num = inputs[InputIdx::OutputSelectedNum].pid; - has_roisnum = !inputs[InputIdx::RoisNum].pid.empty(); - if (inputs[InputIdx::RoisNum].pid.empty()) { - this->input.erase(this->input.begin() + 2); - } - } else { - has_roisnum = inputs.size() == 3; - } - } + attrs(attrs) {} - primitive_id output_selected_indices{}; - primitive_id output_selected_num{}; ov::op::util::MulticlassNmsBase::Attributes attrs; - bool has_roisnum{false}; size_t hash() const override { size_t seed = primitive::hash(); @@ -101,16 +93,6 @@ struct multiclass_nms : public primitive_base { ib >> attrs.nms_eta; } -protected: - std::vector get_dependencies() const override { - std::vector ret; - if (!output_selected_indices.empty()) - ret.emplace_back(output_selected_indices); - if (!output_selected_num.empty()) - ret.emplace_back(output_selected_num); - return ret; - } - private: enum InputIdx : size_t { Boxes = 0, diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/non_max_suppression.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/non_max_suppression.hpp index b2497c6d711d7b..f18314386ebd25 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/non_max_suppression.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/non_max_suppression.hpp @@ -4,7 +4,6 @@ #pragma once #include "primitive.hpp" -#include "intel_gpu/graph/serialization/string_serializer.hpp" #include @@ -42,50 +41,25 @@ struct non_max_suppression : public primitive_base { /// @param second_output Id of primitive specifying output for scores for each selected box. /// @param third_output Id of primitive specifying output for total number of selected boxes. non_max_suppression(const primitive_id& id, - const input_info& boxes_positions, - const input_info& boxes_score, + const std::vector& inputs, int selected_indices_num, bool center_point_box = false, bool sort_result_descending = true, - const primitive_id& num_select_per_class = primitive_id(), - const primitive_id& iou_threshold = primitive_id(), - const primitive_id& score_threshold = primitive_id(), - const primitive_id& soft_nms_sigma = primitive_id(), - const primitive_id& second_output = primitive_id(), - const primitive_id& third_output = primitive_id(), const size_t num_outputs = 1) - : primitive_base(id, {boxes_positions, boxes_score}, num_outputs, {optional_data_type()}) + : primitive_base(id, inputs, num_outputs) , selected_indices_num(selected_indices_num) , center_point_box(center_point_box) - , sort_result_descending(sort_result_descending) - , num_select_per_class(num_select_per_class) - , iou_threshold(iou_threshold) - , score_threshold(score_threshold) - , soft_nms_sigma(soft_nms_sigma) - , second_output(second_output) - , third_output(third_output) {} + , sort_result_descending(sort_result_descending) {} int selected_indices_num; bool center_point_box; bool sort_result_descending; - primitive_id num_select_per_class; - primitive_id iou_threshold; - primitive_id score_threshold; - primitive_id soft_nms_sigma; - primitive_id second_output; - primitive_id third_output; Rotation rotation{Rotation::NONE}; size_t hash() const override { size_t seed = primitive::hash(); seed = hash_combine(seed, center_point_box); seed = hash_combine(seed, sort_result_descending); - seed = hash_combine(seed, num_select_per_class.empty()); - seed = hash_combine(seed, iou_threshold.empty()); - seed = hash_combine(seed, score_threshold.empty()); - seed = hash_combine(seed, soft_nms_sigma.empty()); - seed = hash_combine(seed, second_output.empty()); - seed = hash_combine(seed, third_output.empty()); seed = hash_combine(seed, rotation); return seed; } @@ -100,45 +74,15 @@ struct non_max_suppression : public primitive_base { return cmp_fields(selected_indices_num) && cmp_fields(center_point_box) && cmp_fields(sort_result_descending) && - cmp_fields(num_select_per_class.empty()) && - cmp_fields(iou_threshold.empty()) && - cmp_fields(score_threshold.empty()) && - cmp_fields(soft_nms_sigma.empty()) && - cmp_fields(second_output.empty()) && - cmp_fields(third_output.empty()) && cmp_fields(rotation); #undef cmp_fields } - std::vector get_dependencies() const override { - std::vector ret; - if (!num_select_per_class.empty()) - ret.push_back(num_select_per_class); - if (!iou_threshold.empty()) - ret.push_back(iou_threshold); - if (!score_threshold.empty()) - ret.push_back(score_threshold); - if (!soft_nms_sigma.empty()) - ret.push_back(soft_nms_sigma); - if (!second_output.empty()) - ret.push_back(second_output); - if (!third_output.empty()) - ret.push_back(third_output); - - return ret; - } - void save(BinaryOutputBuffer& ob) const override { primitive_base::save(ob); ob << selected_indices_num; ob << center_point_box; ob << sort_result_descending; - ob << num_select_per_class; - ob << iou_threshold; - ob << score_threshold; - ob << soft_nms_sigma; - ob << second_output; - ob << third_output; ob << make_data(&rotation, sizeof(rotation)); } @@ -147,12 +91,6 @@ struct non_max_suppression : public primitive_base { ib >> selected_indices_num; ib >> center_point_box; ib >> sort_result_descending; - ib >> num_select_per_class; - ib >> iou_threshold; - ib >> score_threshold; - ib >> soft_nms_sigma; - ib >> second_output; - ib >> third_output; ib >> make_data(&rotation, sizeof(rotation)); } }; diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/one_hot.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/one_hot.hpp index c66bbedc80ee0c..7dbe6f062094fe 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/one_hot.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/one_hot.hpp @@ -35,25 +35,6 @@ struct one_hot : public primitive_base { one_hot() : primitive_base("", {}) {} - /// @brief Constructs one-hot primitive layer. - /// @param id An identifier of new primitive. - /// @param input An identifier of primitive which is an input for newly created one-hot primitive. - /// @param shape Size of the output primitive. - /// @param one_hot_axis One-hot axis position (0-based, from left to right) in shape. - one_hot(const primitive_id& id, - const input_info& input, - const tensor& shape, - const int64_t& one_hot_axis, - const int64_t& depth, - const float& on_value = 1.0f, - const float& off_value = 0.0f) - : primitive_base(id, {input}) - , shape(shape) - , one_hot_axis(one_hot_axis) - , depth(depth) - , on_value(on_value) - , off_value(off_value) {} - /// @brief Constructs one-hot primitive layer. /// @param id An identifier of new primitive. /// @param input An identifier of primitive which is an input for newly created one-hot primitive. @@ -62,21 +43,17 @@ struct one_hot : public primitive_base { /// @param one_hot_axis One-hot axis position (0-based, from left to right) in shape. one_hot(const primitive_id& id, const input_info& input, - const tensor& shape, const data_types output_dt, const int64_t& one_hot_axis, const int64_t& depth, const float& on_value = 1.0f, const float& off_value = 0.0f) : primitive_base(id, {input}, 1, {optional_data_type{output_dt}}) - , shape(shape) , one_hot_axis(one_hot_axis) , depth(depth) , on_value(on_value) , off_value(off_value) {} - /// @brief Output size reference. - tensor shape; /// @brief One-hot axis position in output shape (0-based, from left to right). int64_t one_hot_axis = 0; /// @brief The number of classes and thus the size of the one-hot dimension @@ -108,7 +85,6 @@ struct one_hot : public primitive_base { void save(BinaryOutputBuffer& ob) const override { primitive_base::save(ob); - ob << shape; ob << one_hot_axis; ob << depth; ob << on_value; @@ -117,7 +93,6 @@ struct one_hot : public primitive_base { void load(BinaryInputBuffer& ib) override { primitive_base::load(ib); - ib >> shape; ib >> one_hot_axis; ib >> depth; ib >> on_value; diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/pooling.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/pooling.hpp index 5cecc8638b19c9..689713d5871948 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/pooling.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/pooling.hpp @@ -57,8 +57,7 @@ struct pooling : public primitive_base { pads_begin(pads_begin), pads_end(pads_end), auto_pad(auto_pad), - rounding_type(rounding_type), - with_output_size(false) {} + rounding_type(rounding_type) {} /// @brief Constructs pooling primitive with known output shape. /// @param id This primitive id. @@ -84,9 +83,7 @@ struct pooling : public primitive_base { pads_begin(pads_begin), pads_end(pads_end), auto_pad(ov::op::PadType::EXPLICIT), - rounding_type(ov::op::RoundingType::CEIL), - with_output_size(true), - output_size(output_size) {} + rounding_type(ov::op::RoundingType::CEIL) {} /// @brief Constructs pooling primitive that supports MaxPool features from opset8 (dilation and indices output). /// @param id This primitive id. @@ -112,7 +109,6 @@ struct pooling : public primitive_base { ov::op::RoundingType rounding_type, int64_t axis, data_types index_element_type, - tensor output_size, const data_types output_data_type) : primitive_base(id, {input, indices_output}, 1, {optional_data_type{output_data_type}}), indices_output(indices_output.pid), @@ -125,8 +121,6 @@ struct pooling : public primitive_base { auto_pad(auto_pad), rounding_type(rounding_type), axis(axis), - with_output_size(true), - output_size(output_size), index_element_type(index_element_type), maxPoolOpset8Features(true) {} @@ -150,10 +144,6 @@ struct pooling : public primitive_base { ov::op::RoundingType rounding_type = ov::op::RoundingType::CEIL; /// @brief first dimension of input that should be used to calculate the upper bound of index output. int64_t axis = 0; - /// @brief Indicates that the primitive has user-defined output size (non-zero value). - bool with_output_size = true; - /// @brief User-defined output data size of the primitive (w/o padding). - tensor output_size; /// @brief type of index output data_types index_element_type = data_types::i32; bool maxPoolOpset8Features{false}; @@ -209,8 +199,6 @@ struct pooling : public primitive_base { ob << make_data(&auto_pad, sizeof(ov::op::PadType)); ob << make_data(&rounding_type, sizeof(ov::op::RoundingType)); ob << axis; - ob << with_output_size; - ob << output_size; ob << make_data(&index_element_type, sizeof(data_types)); ob << maxPoolOpset8Features; } @@ -227,8 +215,6 @@ struct pooling : public primitive_base { ib >> make_data(&auto_pad, sizeof(ov::op::PadType)); ib >> make_data(&rounding_type, sizeof(ov::op::RoundingType)); ib >> axis; - ib >> with_output_size; - ib >> output_size; ib >> make_data(&index_element_type, sizeof(data_types)); ib >> maxPoolOpset8Features; } diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/reverse.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/reverse.hpp index aaa11b219d76d8..45ec3f43e1a435 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/reverse.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/reverse.hpp @@ -6,9 +6,9 @@ #include "primitive.hpp" -namespace cldnn { +#include "openvino/op/reverse.hpp" -enum class reverse_mode : uint32_t { index, mask }; +namespace cldnn { struct reverse : public primitive_base { CLDNN_DECLARE_PRIMITIVE(reverse) @@ -23,11 +23,11 @@ struct reverse : public primitive_base { reverse(const primitive_id& id, const input_info& input, const input_info& axes, - const reverse_mode mode) + const ov::op::v1::Reverse::Mode& mode) : primitive_base{id, {input, axes}}, mode{mode} {} - reverse_mode mode{reverse_mode::index}; + ov::op::v1::Reverse::Mode mode; size_t hash() const override { size_t seed = primitive::hash(); @@ -46,12 +46,12 @@ struct reverse : public primitive_base { void save(BinaryOutputBuffer& ob) const override { primitive_base::save(ob); - ob << make_data(&mode, sizeof(reverse_mode)); + ob << make_data(&mode, sizeof(ov::op::v1::Reverse::Mode)); } void load(BinaryInputBuffer& ib) override { primitive_base::load(ib); - ib >> make_data(&mode, sizeof(reverse_mode)); + ib >> make_data(&mode, sizeof(ov::op::v1::Reverse::Mode)); } }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/roll.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/roll.hpp index 671c60e57688bf..8b9a8f4a888ecc 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/roll.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/roll.hpp @@ -15,16 +15,6 @@ struct roll : primitive_base { roll() : primitive_base("", {}) {} - /// @brief Constructs roll primitive. - /// @param id This primitive id. - /// @param input Input primitive id. - /// @param shift Tensor which specifies the number of places by which the elements are shifted. - roll(const primitive_id& id, - const input_info& input, - const tensor& shift) - : primitive_base(id, {input}), - shift(shift) {} - /// @brief Constructs roll primitive for dynamic shape. /// @param id This primitive id. /// @param input Input primitive id. @@ -37,16 +27,12 @@ struct roll : primitive_base { : primitive_base(id, {input}), raw_shift(raw_shift), raw_axes(raw_axes) {} - /// @brief Tensor which specifies the number of places by which the elements are shifted. - tensor shift; - /// @brief Raw shift/axes vector to calculate normalized shift when input shape becomes static std::vector raw_shift; std::vector raw_axes; size_t hash() const override { size_t seed = primitive::hash(); - seed = hash_combine(seed, shift.hash()); seed = hash_range(seed, raw_shift.begin(), raw_shift.end()); seed = hash_range(seed, raw_axes.begin(), raw_axes.end()); return seed; @@ -58,19 +44,20 @@ struct roll : primitive_base { auto rhs_casted = downcast(rhs); - return shift == rhs_casted.shift && - raw_shift == rhs_casted.raw_shift && + return raw_shift == rhs_casted.raw_shift && raw_axes == rhs_casted.raw_axes; } void save(BinaryOutputBuffer& ob) const override { primitive_base::save(ob); - ob << shift; + ob << raw_shift; + ob << raw_axes; } void load(BinaryInputBuffer& ib) override { primitive_base::load(ib); - ib >> shift; + ib >> raw_shift; + ib >> raw_axes; } }; diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/space_to_batch.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/space_to_batch.hpp index 900804883f1a64..abe1e8643dcc9e 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/space_to_batch.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/space_to_batch.hpp @@ -3,6 +3,7 @@ // #pragma once +#include "intel_gpu/runtime/utils.hpp" #include "primitive.hpp" namespace cldnn { @@ -47,38 +48,33 @@ struct space_to_batch : public primitive_base { /// @param out_size Size of output tensor. space_to_batch(const primitive_id& id, const input_info& input, - const tensor& block_shape, - const tensor& pads_begin, - const tensor& pads_end, - const tensor& out_size) + const std::vector& block_shape, + const std::vector& pads_begin, + const std::vector& pads_end) : primitive_base(id, {input}), block_shape(block_shape), pads_begin(pads_begin), pads_end(pads_end), - out_size(out_size), shape_constant(1) {} space_to_batch(const primitive_id& id, - const std::vector& inputs, - const tensor& out_size) + const std::vector& inputs) : primitive_base(id, inputs), - block_shape(tensor()), - pads_begin(tensor()), - pads_end(tensor()), - out_size(out_size), + block_shape({}), + pads_begin({}), + pads_end({}), shape_constant(0) {} - tensor block_shape; - tensor pads_begin; - tensor pads_end; - tensor out_size; + std::vector block_shape; + std::vector pads_begin; + std::vector pads_end; int64_t shape_constant; size_t hash() const override { size_t seed = primitive::hash(); - seed = hash_combine(seed, block_shape.hash()); - seed = hash_combine(seed, pads_begin.hash()); - seed = hash_combine(seed, pads_end.hash()); + seed = hash_range(seed, block_shape.begin(), block_shape.end()); + seed = hash_range(seed, pads_begin.begin(), pads_begin.end()); + seed = hash_range(seed, pads_end.begin(), pads_end.end()); seed = hash_combine(seed, shape_constant); return seed; } @@ -100,7 +96,6 @@ struct space_to_batch : public primitive_base { ob << block_shape; ob << pads_begin; ob << pads_end; - ob << out_size; ob << shape_constant; } @@ -109,7 +104,6 @@ struct space_to_batch : public primitive_base { ib >> block_shape; ib >> pads_begin; ib >> pads_end; - ib >> out_size; ib >> shape_constant; } }; diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/swiglu.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/swiglu.hpp index 0aa30c619e65a7..7be2f98338a886 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/swiglu.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/swiglu.hpp @@ -26,20 +26,17 @@ struct swiglu : public primitive_base { const int64_t& axis, const int64_t& split_lengths, const ov::intel_gpu::op::SwiGLU::GluType glu_type, - const size_t split_to_glu_idx, - const tensor output_size) + const size_t split_to_glu_idx) : primitive_base(id, {input}), axis(axis), split_lengths(split_lengths), glu_type(glu_type), - split_to_glu_idx(split_to_glu_idx), - output_size(output_size) {} + split_to_glu_idx(split_to_glu_idx) {} int64_t axis = 0; int64_t split_lengths = 0; ov::intel_gpu::op::SwiGLU::GluType glu_type = ov::intel_gpu::op::SwiGLU::GluType::Swish; size_t split_to_glu_idx = 0; - tensor output_size; size_t hash() const override { size_t seed = primitive::hash(); @@ -63,7 +60,6 @@ struct swiglu : public primitive_base { primitive_base::save(ob); ob << axis; ob << split_lengths; - ob << output_size; ob << make_data(&glu_type, sizeof(glu_type)); ob << split_to_glu_idx; } @@ -72,7 +68,6 @@ struct swiglu : public primitive_base { primitive_base::load(ib); ib >> axis; ib >> split_lengths; - ib >> output_size; ib >> make_data(&glu_type, sizeof(glu_type)); ib >> split_to_glu_idx; } diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp index febcabd57efba0..6a8da44dca315d 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/internal_properties.hpp @@ -45,7 +45,6 @@ static constexpr Property enable_memory_pool{"GPU_ static constexpr Property optimize_data{"GPU_OPTIMIZE_DATA"}; static constexpr Property allow_static_input_reorder{"GPU_ALLOW_STATIC_INPUT_REORDER"}; static constexpr Property partial_build_program{"GPU_PARTIAL_BUILD"}; -static constexpr Property allow_new_shape_infer{"GPU_ALLOW_NEW_SHAPE_INFER"}; static constexpr Property use_only_static_kernels_for_dynamic_shape{"GPU_USE_ONLY_STATIC_KERNELS_FOR_DYNAMIC_SHAPE"}; static constexpr Property dump_graphs{"GPU_DUMP_GRAPHS"}; static constexpr Property, PropertyMutability::RW> custom_outputs{"GPU_CUSTOM_OUTPUTS"}; diff --git a/src/plugins/intel_gpu/src/graph/activation.cpp b/src/plugins/intel_gpu/src/graph/activation.cpp index 6c1a57f8759478..e356bee74855a7 100644 --- a/src/plugins/intel_gpu/src/graph/activation.cpp +++ b/src/plugins/intel_gpu/src/graph/activation.cpp @@ -12,35 +12,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(activation) -layout activation_inst::calc_output_layout(activation_node const& node, kernel_impl_params const& impl_param) { - assert(static_cast(impl_param.desc->output_data_types[0]) == false && - "Output data type forcing is not supported for activation_node!"); - - auto input_node_layout = impl_param.get_non_padded_input_layout(); - auto desc = impl_param.typed_desc(); - auto func = desc->activation_function; - - std::vector activations_int8 = { - activation_func::none, - activation_func::negative, - activation_func::negation, - activation_func::relu, - activation_func::floor, - activation_func::clamp }; - - if (input_node_layout.data_type == data_types::i8 || input_node_layout.data_type == data_types::u8 || - input_node_layout.data_type == data_types::i32) { - if (std::find(activations_int8.begin(), activations_int8.end(), func) == activations_int8.end()) - CLDNN_ERROR_MESSAGE(desc->id, "Requested activation is not supported for integer type."); - } - - if (impl_param.has_fused_primitives()) { - input_node_layout.data_type = impl_param.get_output_element_type(); - } - - return input_node_layout; -} - std::string activation_inst::to_string(activation_node const& node) { auto node_info = node.desc_to_json(); auto desc = node.get_primitive(); diff --git a/src/plugins/intel_gpu/src/graph/adaptive_pooling.cpp b/src/plugins/intel_gpu/src/graph/adaptive_pooling.cpp index f003d1a44b62a2..a86486adb4fc00 100644 --- a/src/plugins/intel_gpu/src/graph/adaptive_pooling.cpp +++ b/src/plugins/intel_gpu/src/graph/adaptive_pooling.cpp @@ -13,12 +13,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(adaptive_pooling) -layout adaptive_pooling_inst::calc_output_layout(adaptive_pooling_node const& node, kernel_impl_params const& impl_param) { - const auto data_layout = impl_param.get_input_layout(); - const auto prim = impl_param.typed_desc(); - return {data_layout.data_type, data_layout.format, prim->output_size}; -} - template std::vector adaptive_pooling_inst::calc_output_layouts(adaptive_pooling_node const& /*node*/, const kernel_impl_params& impl_param) { std::vector layouts; diff --git a/src/plugins/intel_gpu/src/graph/arg_max_min.cpp b/src/plugins/intel_gpu/src/graph/arg_max_min.cpp index 63df47850c061d..94f3fc4ef0d657 100644 --- a/src/plugins/intel_gpu/src/graph/arg_max_min.cpp +++ b/src/plugins/intel_gpu/src/graph/arg_max_min.cpp @@ -14,58 +14,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(arg_max_min) -layout arg_max_min_inst::calc_output_layout(arg_max_min_node const& node, kernel_impl_params const& impl_param) { - auto desc = impl_param.typed_desc(); - auto input_layout = impl_param.get_input_layout(); - bool values_first = desc->values_first; - data_types output_data_type; - data_types output_idx_type; - output_data_type = desc->output_data_types[0].value_or(input_layout.data_type); - if (impl_param.input_layouts.size() == 3) { - output_idx_type = impl_param.get_input_layout(2).data_type; - } else { - output_idx_type = *(desc->output_data_types[0]); - } - auto size_check = [&](size_t tensor_size) { - if (desc->input.size() == 1 && values_first) - return; - size_t max_size; - // lowest integer not representable in floating point type = 2^(mantissa_bits + 1) + 1 - // https://stackoverflow.com/questions/3793838/which-is-the-first-integer-that-an-ieee-754-float-is-incapable-of-representing-e - if (output_idx_type == data_types::f32) { - max_size = (1 << std::numeric_limits::digits); - } else if (output_idx_type == data_types::f16) { - // mantissa_bits for fp16 = 10 - max_size = (1 << 11); - } else if (output_idx_type == data_types::u8) { - max_size = std::numeric_limits::max(); - } else if (output_idx_type == data_types::i32) { - max_size = std::numeric_limits::max(); - } else { - max_size = std::numeric_limits::max(); - } - - if (tensor_size > max_size) { - CLDNN_ERROR_GREATER_THAN(desc->id, - "Reduced tensor size", - tensor_size, - "Maximum output data type value", - max_size, - "Current output data type is unable to hold maximum index of a tensor."); - } - }; - for (auto dim : input_layout.get_dims()) { - size_check(dim); - } - auto format = input_layout.format; - auto sizes = input_layout.get_dims(); - if (desc->axis >= static_cast(sizes.size()) || desc->axis < 0) { - OPENVINO_THROW("Incorrect arg_max_min axis."); - } - sizes[desc->axis] = desc->top_k; - return layout{output_data_type, format, tensor(format::get_default_format(input_layout.get_rank()), sizes)}; -} - template std::vector arg_max_min_inst::calc_output_layouts(arg_max_min_node const& /*node*/, const kernel_impl_params& impl_param) { std::vector layouts; diff --git a/src/plugins/intel_gpu/src/graph/assign.cpp b/src/plugins/intel_gpu/src/graph/assign.cpp index 1e8e71dc34c988..3002172110d1da 100644 --- a/src/plugins/intel_gpu/src/graph/assign.cpp +++ b/src/plugins/intel_gpu/src/graph/assign.cpp @@ -15,10 +15,6 @@ assign_inst::typed_primitive_inst(network& network, const assign_node& node) : memory_state::variable{node.get_primitive()->variable_id, node.get_primitive()->user_specified_type} { } -layout assign_inst::calc_output_layout(const assign_node& node, kernel_impl_params const& impl_param) { - return impl_param.typed_desc()->output_layout; -} - std::string assign_inst::to_string(const assign_node& node) { auto node_info = node.desc_to_json(); json_composite assign_info; diff --git a/src/plugins/intel_gpu/src/graph/batch_to_space.cpp b/src/plugins/intel_gpu/src/graph/batch_to_space.cpp index c620b8e6c30f82..68031944f637d8 100644 --- a/src/plugins/intel_gpu/src/graph/batch_to_space.cpp +++ b/src/plugins/intel_gpu/src/graph/batch_to_space.cpp @@ -5,7 +5,6 @@ #include "batch_to_space_inst.h" #include "primitive_type_base.h" -#include "intel_gpu/runtime/error_handler.hpp" #include "json_object.h" #include #include @@ -15,68 +14,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(batch_to_space) -layout batch_to_space_inst::calc_output_layout(batch_to_space_node const& node, kernel_impl_params const& impl_param) { - auto desc = impl_param.typed_desc(); - - auto input_layout = impl_param.get_input_layout(); - auto input_format = input_layout.format; - - auto output_type = desc->output_data_types[0].value_or(input_layout.data_type); - - if (impl_param.has_fused_primitives()) - output_type = impl_param.get_output_element_type(); - - const size_t spatial_num = format::spatial_num(input_format); - - const auto& block_shape = desc->block_shape; - const auto& crops_begin = desc->crops_begin; - const auto& crops_end = desc->crops_end; - - if (block_shape.batch[0] != 1) - CLDNN_ERROR_MESSAGE(desc->id, - "block_shape[0] is expected to be 1. Actual block_shape[0] is " + - std::to_string(block_shape.batch[0])); - - if (crops_begin.batch[0] != 0) - CLDNN_ERROR_MESSAGE(desc->id, - "crops_begin[0] is expected to be 0. Actual crops_begin[0] is " + - std::to_string(crops_begin.batch[0])); - - if (crops_end.batch[0] != 0) - CLDNN_ERROR_MESSAGE(desc->id, - "crops_end[0] is expected to be 0. Actual crops_end[0] is " + - std::to_string(crops_end.batch[0])); - - size_t block_sizes_multiplied = block_shape.feature[0]; - for (size_t i = 0; i < spatial_num; ++i) - block_sizes_multiplied *= block_shape.spatial[i]; - - if (input_layout.batch() % block_sizes_multiplied != 0) - CLDNN_ERROR_MESSAGE(desc->id, - "The batch of the input tensor must be divisible by multiplied block sizes = " + - std::to_string(block_sizes_multiplied)); - - if (crops_begin.feature[0] + crops_end.feature[0] >= block_shape.feature[0] * input_layout.feature()) - CLDNN_ERROR_MESSAGE(desc->id, - "Output dimensions must be positive"); - - for (size_t i = 0; i < spatial_num; ++i) - if (crops_begin.spatial[i] + crops_end.spatial[i] >= block_shape.spatial[i] * input_layout.spatial(i)) - CLDNN_ERROR_MESSAGE(desc->id, - "Output dimensions must be positive"); - - return layout{output_type, input_format, desc->out_size}; -} - -static std::vector tensor_to_vec(const tensor& t, const format f) { - std::vector vec(cldnn::format::dimension(f)); - for (size_t i = 0; i < vec.size(); ++i) { - vec[i] = t.sizes()[i]; - } - std::reverse(vec.begin() + 2, vec.end()); - return vec; -} - template std::vector batch_to_space_inst::calc_output_layouts(batch_to_space_node const& /*node*/, const kernel_impl_params& impl_param) { auto desc = impl_param.typed_desc(); @@ -114,13 +51,9 @@ std::vector batch_to_space_inst::calc_output_layouts(batch_to_space_node std::unordered_map const_data; if (desc->shape_constant) { - auto block_sizes = tensor_to_vec(block_data, input0_format); - auto begin_sizes = tensor_to_vec(begin_data, input0_format); - auto end_sizes = tensor_to_vec(end_data, input0_format); - - auto block_values = static_cast(block_sizes.data()); - auto begin_values = static_cast(begin_sizes.data()); - auto end_values = static_cast(end_sizes.data()); + auto block_values = static_cast(block_data.data()); + auto begin_values = static_cast(begin_data.data()); + auto end_values = static_cast(end_data.data()); auto block_tensor = make_tensor({ block_shape, data_types::i32, input0_format }, block_values); auto begin_tensor = make_tensor({ begin_shape, data_types::i32, input0_format }, begin_values); diff --git a/src/plugins/intel_gpu/src/graph/border.cpp b/src/plugins/intel_gpu/src/graph/border.cpp index 09bf9f7d71f591..3e23b326415652 100644 --- a/src/plugins/intel_gpu/src/graph/border.cpp +++ b/src/plugins/intel_gpu/src/graph/border.cpp @@ -15,23 +15,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(border) -layout border_inst::calc_output_layout(border_node const& node, kernel_impl_params const& impl_param) { - assert(static_cast(impl_param.desc->output_data_types[0]) == false && - "Output data type forcing is not supported for border_node!"); - auto input_layout = impl_param.get_input_layout(); - auto input_format = input_layout.format; - auto desc = impl_param.typed_desc(); - - auto dims_format = format::adjust_to_rank(format::bfyx, input_layout.get_rank()); - auto new_dims = input_layout.get_dims(); - - for (size_t i = 0; i < new_dims.size(); ++i) { - new_dims[i] += (i < desc->pads_begin.size()) ? desc->pads_begin[i] : 0; - new_dims[i] += (i < desc->pads_end.size()) ? desc->pads_end[i] : 0; - } - return layout{ input_layout.data_type, input_format, tensor(dims_format, new_dims) }; -} - template std::vector border_inst::calc_output_layouts(border_node const& /*node*/, const kernel_impl_params& impl_param) { auto desc = impl_param.typed_desc(); diff --git a/src/plugins/intel_gpu/src/graph/broadcast.cpp b/src/plugins/intel_gpu/src/graph/broadcast.cpp index 741a3ad8a30661..3f32f05dc17ad7 100644 --- a/src/plugins/intel_gpu/src/graph/broadcast.cpp +++ b/src/plugins/intel_gpu/src/graph/broadcast.cpp @@ -5,7 +5,6 @@ #include "broadcast_inst.h" #include "broadcast_shape_inference.hpp" -#include "intel_gpu/runtime/error_handler.hpp" #include "json_object.h" #include "primitive_type_base.h" #include @@ -15,33 +14,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(broadcast) -layout broadcast_inst::calc_output_layout(broadcast_node const& node, kernel_impl_params const& impl_param) { - assert(static_cast(impl_param.desc->output_data_types[0]) == false && - "Output data type forcing is not supported for broadcast_node!"); - auto input_layout = impl_param.get_input_layout(); - auto desc = impl_param.typed_desc(); - - auto output_type = input_layout.data_type; - if (impl_param.has_fused_primitives()) { - output_type = impl_param.get_output_element_type(); - } - - if (!desc->target_shape.empty()) { - std::vector dims_converted(desc->target_shape.size()); - std::transform(desc->target_shape.begin(), desc->target_shape.end(), dims_converted.begin(), [](size_t value) { - return static_cast(value); - }); - for (size_t i = dims_converted.size(); i < 4; i++) - dims_converted.push_back(1); // extend shape to 4d - - return { output_type, - input_layout.format, - tensor(format::get_default_format(dims_converted.size()), dims_converted) }; - } else { - return { output_type, input_layout.format, desc->broadcast_sizes }; - } -} - template std::vector broadcast_inst::calc_output_layouts(broadcast_node const& /*node*/, const kernel_impl_params& impl_param) { auto desc = impl_param.typed_desc(); @@ -86,6 +58,9 @@ std::vector broadcast_inst::calc_output_layouts(broadcast_node const& /* output_shapes = ov::op::v3::shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data)); } else if (impl_param.input_layouts.size() == 1) { // predefined pattern shape + if (target_shape.empty()) { + target_shape.push_back(0); // add some value to vec to have not null ptr in tensor + } auto target_shape_tensor = make_tensor({pattern_shape, data_types::i64, format::bfyx}, static_cast(target_shape.data())); const_data.emplace(1, target_shape_tensor); output_shapes = ov::op::v3::shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data)); @@ -108,21 +83,13 @@ template std::vector broadcast_inst::calc_output_layoutsbroadcast_sizes; - const auto& broadcast_axes = desc->broadcast_axes; auto& input = node.input(); std::stringstream primitive_description; std::stringstream ss_broadcast_axes; - - for (size_t i = 0; i < broadcast_axes.size(); ++i) { - ss_broadcast_axes << broadcast_axes.at(i); - i != (broadcast_axes.size() - 1) ? ss_broadcast_axes << ", " : ss_broadcast_axes << ""; - } - + ss_broadcast_axes << desc->axes_mapping; json_composite broadcast_info; broadcast_info.add("input id", input.id()); - broadcast_info.add("broadcast_sizes", broadcast_sizes.to_string()); broadcast_info.add("broadcast axes", ss_broadcast_axes.str()); node_info->add("broadcast info", broadcast_info); @@ -150,63 +117,5 @@ void broadcast_inst::update_output_memory() { _mem_allocated = false; } -broadcast_inst::typed_primitive_inst(network& network, broadcast_node const& node) : parent(network, node) { - auto input_layout = node.get_input_layout(); - if (input_layout.is_dynamic()) - return; - const auto& output_sizes = argument->broadcast_sizes; - - std::vector input_dims = input_layout.get_dims(); - size_t max_axes_num = input_layout.get_rank(); - - std::vector reordered_input_dims(max_axes_num, 0); - std::set existing; - - const auto& broadcast_axes = node.get_primitive()->broadcast_axes; - size_t broadcast_axes_size = broadcast_axes.size(); - size_t index = 0; - size_t input_index = broadcast_axes_size; - - OPENVINO_ASSERT(broadcast_axes_size >= 0 && broadcast_axes_size <= max_axes_num, - "Incorrect parameters configuration: broadcast_axes size should be less or equal ", std::to_string(max_axes_num), "."); - for (size_t i = 0; i < broadcast_axes_size; ++i) { - if (broadcast_axes.at(i) >= max_axes_num) { - CLDNN_ERROR_MESSAGE( - node.id(), - "Incorrect parameters configuration: broadcast_axes index should be within broadcast_sizes range."); - } - if (existing.find(broadcast_axes.at(i)) != existing.end()) { - CLDNN_ERROR_MESSAGE( - node.id(), - "Incorrect parameters configuration: Duplicate axes numbers was found in broadcast_axes."); - } - existing.insert(broadcast_axes.at(i)); - } - for (size_t i = 0; i < input_index; ++i) { - CLDNN_ERROR_NOT_EQUAL(node.id(), - "Input size on dimension number " + std::to_string(i), - input_dims.at(i), - "", - 1, - "Must be equal 1."); - } - // bfyx, bfzyx format - for (size_t i = 0; i < max_axes_num; ++i) { - if (std::find(broadcast_axes.begin(), broadcast_axes.end(), i) != broadcast_axes.end()) { - reordered_input_dims.at(i) = input_dims.at(index); - ++index; - } else { - reordered_input_dims.at(i) = input_dims.at(input_index); - ++input_index; - } - } - tensor input_sizes_to_compare = tensor(format::get_default_format(reordered_input_dims.size()), reordered_input_dims); - - CLDNN_ERROR_TENSOR_SIZES_NOT_DIVIDABLE(node.id(), - "Broadcast sizes", - output_sizes, - "input sizes", - input_sizes_to_compare, - "Invalid broadcast size: not dividable by input size"); -} +broadcast_inst::typed_primitive_inst(network& network, broadcast_node const& node) : parent(network, node) { } } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/bucketize.cpp b/src/plugins/intel_gpu/src/graph/bucketize.cpp index 7a0b673d693f46..d0dc9d53681009 100644 --- a/src/plugins/intel_gpu/src/graph/bucketize.cpp +++ b/src/plugins/intel_gpu/src/graph/bucketize.cpp @@ -13,12 +13,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(bucketize) -layout bucketize_inst::calc_output_layout(const bucketize_node& node, kernel_impl_params const& impl_param) { - auto input_layout = impl_param.get_input_layout(); - auto primitive = impl_param.desc; - return {*primitive->output_data_types[0], input_layout.format, input_layout.get_tensor()}; -} - std::string bucketize_inst::to_string(const bucketize_node& node) { auto primitive = node.get_primitive(); json_composite bucketize_info; diff --git a/src/plugins/intel_gpu/src/graph/concatenation.cpp b/src/plugins/intel_gpu/src/graph/concatenation.cpp index b493bb217b1c32..6487eb5ef9dca3 100644 --- a/src/plugins/intel_gpu/src/graph/concatenation.cpp +++ b/src/plugins/intel_gpu/src/graph/concatenation.cpp @@ -15,35 +15,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(concatenation) -layout concatenation_inst::calc_output_layout(concatenation_node const& node, kernel_impl_params const& impl_param) { - auto desc = impl_param.typed_desc(); - - auto input_layout = impl_param.get_input_layout(); - auto output_format = input_layout.format; - auto result_sizes = input_layout.get_dims(); - - auto output_dt = desc->output_data_types[0].value_or(input_layout.data_type); - if (impl_param.has_fused_primitives()) { - output_dt = impl_param.get_output_element_type(); - } - - auto axis_index = desc->axis; - - // calculate sum of features from all inputs - result_sizes[axis_index] = 0; - for (size_t i = 0; i < desc->input.size(); ++i) { - auto input_sizes = impl_param.get_input_layout(i).get_dims(); - if (impl_param.get_input_layout(i).format == format::b_fs_yx_fsv16) - output_format = format::b_fs_yx_fsv16; - - result_sizes[axis_index] += input_sizes[axis_index]; - } - - auto def_fmt = format::get_default_format(input_layout.get_rank()); - - return layout {output_dt, output_format, tensor(def_fmt, result_sizes)}; -} - template std::vector concatenation_inst::calc_output_layouts(const concatenation_node& /* node */, const kernel_impl_params& impl_param) { auto desc = impl_param.typed_desc(); diff --git a/src/plugins/intel_gpu/src/graph/condition.cpp b/src/plugins/intel_gpu/src/graph/condition.cpp index 18717b9406cc30..faa985ef3c011b 100644 --- a/src/plugins/intel_gpu/src/graph/condition.cpp +++ b/src/plugins/intel_gpu/src/graph/condition.cpp @@ -45,34 +45,6 @@ static std::vector get_output_layouts(std::map&& o return out_layouts; } -/* - Calc_output_layout method is called only when output layout is invalidated. - It means, that it is called when: - 1) It has never been called. - 2) Dependency has changed output layout. - In this both cases, we need to recalc branch_true and branch_false. - !* We can be sure, that this method was called AT LEAST once during graph compilation.*! -*/ -layout condition_inst::calc_output_layout(condition_node const& /* node */, kernel_impl_params const& impl_param) { - OPENVINO_ASSERT(static_cast(impl_param.desc->output_data_types[0]) == false, "Output data type forcing is not supported for condition_node!"); - OPENVINO_ASSERT(impl_param.get_input_layout(0).count() == 1, "layout of compare_data of condition should be {1,1,1,1}"); - - OPENVINO_ASSERT(impl_param.inner_progs.size() == 2, "If(Condition) contains incorrect number of inner programs ", impl_param.inner_progs.size()); - OPENVINO_ASSERT(impl_param.io_output_maps.size() == 2, "If(Condition) contains incorrect number of io output maps ", impl_param.io_output_maps.size()); - - auto layouts_true = get_output_layouts(get_out_layout_map(impl_param.inner_progs[idx_branch_true]), impl_param.io_output_maps[idx_branch_true]); - auto layouts_false = get_output_layouts(get_out_layout_map(impl_param.inner_progs[idx_branch_false]), impl_param.io_output_maps[idx_branch_false]); - - CLDNN_ERROR_LAYOUT_MISMATCH(impl_param.desc->id, - "Branch true output layout", - layouts_true[0], - "branch false output layout", - layouts_false[0], - "Layout of the branches should be the same."); - - return layouts_true[0]; -} - template static bool convert_data(memory::ptr mem, stream& stream) { mem_lock lock_data{mem, stream}; @@ -234,16 +206,10 @@ void condition_inst::update_output_layout() { _impl_params->memory_deps = memory_deps; auto new_layouts = _node->type()->calc_output_layouts(*_node, *_impl_params); - if (new_layouts.empty()) { - auto new_layout = _node->type()->calc_output_layout(*_node, *_impl_params); - new_layout.data_padding = padding::max(_node->get_primitive()->get_output_padding(0), new_layout.data_padding); - _impl_params->output_layouts[0] = new_layout; - } else { - for (size_t i = 0; i != new_layouts.size(); ++i) { - auto new_layout = new_layouts[i]; - new_layout.data_padding = padding::max(_node->get_primitive()->get_output_padding(i), new_layout.data_padding); - _impl_params->output_layouts[i] = new_layout; - } + for (size_t i = 0; i != new_layouts.size(); ++i) { + auto new_layout = new_layouts[i]; + new_layout.data_padding = padding::max(_node->get_primitive()->get_output_padding(i), new_layout.data_padding); + _impl_params->output_layouts[i] = new_layout; } } diff --git a/src/plugins/intel_gpu/src/graph/convert_color.cpp b/src/plugins/intel_gpu/src/graph/convert_color.cpp index ebe9211068e6d4..4c39833867c196 100644 --- a/src/plugins/intel_gpu/src/graph/convert_color.cpp +++ b/src/plugins/intel_gpu/src/graph/convert_color.cpp @@ -39,32 +39,6 @@ std::vector convert_color_inst::calc_output_layouts(convert_color_node c } template std::vector convert_color_inst::calc_output_layouts(convert_color_node const& node, const kernel_impl_params& impl_param); -layout convert_color_inst::calc_output_layout(convert_color_node const& /* node */, kernel_impl_params const& impl_param) { - auto desc = impl_param.typed_desc(); - - auto src_fmt = desc->input_color_format; - auto dst_fmt = desc->output_color_format; - auto dst_is_rgb_or_bgr = dst_fmt == convert_color::color_format::BGR || - dst_fmt == convert_color::color_format::RGB; - auto inputs_count = desc->input_size(); - bool single_plane_input = inputs_count == 1; - const size_t h_dim = 1; - const size_t c_dim = 3; - if ((src_fmt == convert_color::color_format::NV12 || src_fmt == convert_color::color_format::I420) && dst_is_rgb_or_bgr) { - auto out_layout = impl_param.get_input_layout(0); - out_layout.format = format::bfyx; - auto out_shape = out_layout.get_partial_shape(); - out_shape[c_dim] = 3; - if (single_plane_input) { - out_shape[h_dim] = out_shape[h_dim] * 2 / 3; - } - out_layout.set_partial_shape(out_shape); - - return out_layout; - } - OPENVINO_THROW("[GPU] Unsupported color format combinations"); -} - std::string convert_color_inst::to_string(convert_color_node const& node) { auto desc = node.get_primitive(); auto node_info = node.desc_to_json(); diff --git a/src/plugins/intel_gpu/src/graph/convolution.cpp b/src/plugins/intel_gpu/src/graph/convolution.cpp index 04acd4b8305a34..36ccbac43bd1db 100644 --- a/src/plugins/intel_gpu/src/graph/convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/convolution.cpp @@ -146,10 +146,6 @@ std::vector calc_output_layout_impl(convolution_node const& node, kernel namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(convolution) -layout convolution_inst::calc_output_layout(convolution_node const& node, kernel_impl_params const& impl_param) { - return calc_output_layout_impl(node, impl_param, true)[0]; -} - template std::vector convolution_inst::calc_output_layouts(convolution_node const& node, kernel_impl_params const& impl_param) { return calc_output_layout_impl(node, impl_param, false); diff --git a/src/plugins/intel_gpu/src/graph/crop.cpp b/src/plugins/intel_gpu/src/graph/crop.cpp index 0fd733be570f88..dbb8508e808b44 100644 --- a/src/plugins/intel_gpu/src/graph/crop.cpp +++ b/src/plugins/intel_gpu/src/graph/crop.cpp @@ -16,29 +16,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(crop) -layout crop_inst::calc_output_layout(crop_node const& node, kernel_impl_params const& impl_param) { - assert(static_cast(impl_param.desc->output_data_types[0]) == false && - "Output data type forcing is not supported for crop_node!"); - auto desc = impl_param.typed_desc(); - const auto& ref_in_sizes = desc->reference_input; - const auto in_layout = impl_param.get_input_layout(); - const auto& in_sizes = in_layout.get_tensor(); - const auto& offsets = desc->offsets; - - // Check for borders variant of crop. - if (ref_in_sizes.batch[0] < 0 || ref_in_sizes.feature[0] < 0 || ref_in_sizes.spatial[0] < 0 || - ref_in_sizes.spatial[1] < 0 || ref_in_sizes.spatial[2] < 0) { - // Ignore not supported dimensions. - const auto rb_sizes = ref_in_sizes.negate().sub({0, 0, 0, 0, 0}); - const auto lt_sizes = offsets.sub({0, 0, 0, 0, 0}); - - const auto out_sizes = in_sizes - (rb_sizes + lt_sizes); - - return layout({in_layout.data_type, in_layout.format, out_sizes}); - } - return layout({in_layout.data_type, in_layout.format, ref_in_sizes}); -} - template std::vector crop_inst::calc_output_layouts(const crop_node& /*node*/, const kernel_impl_params& impl_param) { OPENVINO_ASSERT(static_cast(impl_param.desc->output_data_types[0]) == false, @@ -269,7 +246,7 @@ void crop_inst::update_output_memory() { if (_node != nullptr) build_deps(); - if (node->get_program().is_new_shape_infer() && input_memory_ptr() == nullptr) + if (input_memory_ptr() == nullptr) return; if (_outputs[0] && _network.get_engine().is_the_same_buffer(output_memory(), input_memory())) diff --git a/src/plugins/intel_gpu/src/graph/ctc_greedy_decoder.cpp b/src/plugins/intel_gpu/src/graph/ctc_greedy_decoder.cpp index af7a41b8b57fec..c63afb2cc6f2a2 100644 --- a/src/plugins/intel_gpu/src/graph/ctc_greedy_decoder.cpp +++ b/src/plugins/intel_gpu/src/graph/ctc_greedy_decoder.cpp @@ -13,14 +13,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(ctc_greedy_decoder) -layout ctc_greedy_decoder_inst::calc_output_layout(ctc_greedy_decoder_node const& node, kernel_impl_params const& impl_param) { - auto input_node_layout = impl_param.get_input_layout(); - auto prim = impl_param.typed_desc(); - auto output_type = prim->output_data_types[0].value_or(input_node_layout.data_type); - - return layout(output_type, input_node_layout.format, prim->output_tensor); -} - template std::vector ctc_greedy_decoder_inst::calc_output_layouts(ctc_greedy_decoder_node const& /*node*/, const kernel_impl_params& impl_param) { std::vector layouts; diff --git a/src/plugins/intel_gpu/src/graph/ctc_loss.cpp b/src/plugins/intel_gpu/src/graph/ctc_loss.cpp index ec9e62ae545e1c..746c3efabe565c 100644 --- a/src/plugins/intel_gpu/src/graph/ctc_loss.cpp +++ b/src/plugins/intel_gpu/src/graph/ctc_loss.cpp @@ -20,13 +20,6 @@ std::vector ctc_loss_inst::calc_output_layouts(ctc_loss_node const& /*no template std::vector ctc_loss_inst::calc_output_layouts(ctc_loss_node const& node, const kernel_impl_params& impl_param); - -layout ctc_loss_inst::calc_output_layout(const ctc_loss_node& node, const kernel_impl_params& impl_param) { - auto input_layout = impl_param.get_input_layout(); - std::vector out_tensor = {input_layout.get_tensor().sizes().front(), 1, 1, 1}; - return {input_layout.data_type, input_layout.format, tensor(input_layout.format, out_tensor)}; -} - std::string ctc_loss_inst::to_string(const ctc_loss_node& node) { auto primitive = node.get_primitive(); json_composite ctc_loss_info; diff --git a/src/plugins/intel_gpu/src/graph/cum_sum.cpp b/src/plugins/intel_gpu/src/graph/cum_sum.cpp index 6227b0cce4b561..e0a7fdc332c70f 100644 --- a/src/plugins/intel_gpu/src/graph/cum_sum.cpp +++ b/src/plugins/intel_gpu/src/graph/cum_sum.cpp @@ -11,10 +11,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(cum_sum) -layout cum_sum_inst::calc_output_layout(cum_sum_node const& node, kernel_impl_params const& impl_param) { - return impl_param.get_input_layout(); -} - std::string cum_sum_inst::to_string(cum_sum_node const& node) { auto desc = node.get_primitive(); auto node_info = node.desc_to_json(); diff --git a/src/plugins/intel_gpu/src/graph/deconvolution.cpp b/src/plugins/intel_gpu/src/graph/deconvolution.cpp index dc6c2af30523d2..7085bea9b2fa1d 100644 --- a/src/plugins/intel_gpu/src/graph/deconvolution.cpp +++ b/src/plugins/intel_gpu/src/graph/deconvolution.cpp @@ -16,89 +16,6 @@ using namespace ov::intel_gpu; namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(deconvolution) -layout deconvolution_inst::calc_output_layout(deconvolution_node const& node, kernel_impl_params const& impl_param) { - assert(static_cast(impl_param.desc->output_data_types[0]) == false && - "Output data type forcing is not supported for deconvolution_node!"); - auto desc = impl_param.typed_desc(); - - auto input_layout = impl_param.get_input_layout(); - auto weights_layout = *impl_param.weights_layout; - weights_layout = weights_layout.convert_to_weights_layout(desc->grouped_weights_shape); - - auto data_type = input_layout.data_type; - if ((input_layout.data_type == data_types::i8 || input_layout.data_type == data_types::u8) && !impl_param.has_fused_primitives()) { - data_type = data_types::f32; - } - - if (impl_param.has_fused_primitives()) { - data_type = impl_param.get_output_element_type(); - } - - auto pad = desc->pad; - auto strd = desc->stride; - - int32_t number_of_features = weights_layout.group() * weights_layout.ofm(); - - format out_fmt = input_layout.format; - if (node.get_preferred_impl_type() == impl_types::onednn && node.get_preferred_output_fmt() != format::any) { - out_fmt = node.get_preferred_output_fmt(); - } - - if (desc->with_output_size) { - CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id, - "User-defined output spatial X", - desc->output_size.spatial[0], - "value 0", - 0, - "User-defined size of output layout must be positive (>= 1)"); - CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id, - "User-defined output spatial Y", - desc->output_size.spatial[1], - "value 0", - 0, - "User-defined size of output layout must be positive (>= 1)"); - CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id, - "User-defined output spatial Z", - desc->output_size.spatial[2], - "value 0", - 0, - "User-defined size of output layout must be positive (>= 1)"); - - tensor output_size(input_layout.batch(), - number_of_features, - desc->output_size.spatial[0], - desc->output_size.spatial[1], - desc->output_size.spatial[2]); - return {data_type, out_fmt, output_size}; - } - - int32_t off_factor = -2; - size_t spatial_dims = input_layout.get_spatial_rank(); - CLDNN_ERROR_GREATER_THAN(desc->id, - "number of spatial dimensions", - spatial_dims, - "expected number of dimensions", - 3, - "As for now, deconvolutions with more than 3 dimensions are not supported"); - - int32_t x = static_cast( - off_factor * pad[pad.size() - 1] + (input_layout.spatial(0) - 1) * strd[strd.size() - 1] + weights_layout.spatial(0)); - int32_t y = 1; - if (spatial_dims > 1) { - y = static_cast( - off_factor * pad[pad.size() - 2] + (input_layout.spatial(1) - 1) * strd[strd.size() - 2] + weights_layout.spatial(1)); - } - int32_t z = 1; - if (spatial_dims > 2) { - z = static_cast( - off_factor * pad[pad.size() - 3] + (input_layout.spatial(2) - 1) * strd[strd.size() - 3] + weights_layout.spatial(2)); - } - - tensor output_size(input_layout.batch(), - number_of_features, x, y, z); - return {data_type, out_fmt, output_size}; -} - template std::vector deconvolution_inst::calc_output_layouts(deconvolution_node const& node, const kernel_impl_params& impl_param) { auto desc = impl_param.typed_desc(); diff --git a/src/plugins/intel_gpu/src/graph/depth_to_space.cpp b/src/plugins/intel_gpu/src/graph/depth_to_space.cpp index 37f57c28215432..83f99fa8bfb875 100644 --- a/src/plugins/intel_gpu/src/graph/depth_to_space.cpp +++ b/src/plugins/intel_gpu/src/graph/depth_to_space.cpp @@ -13,41 +13,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(depth_to_space) -layout depth_to_space_inst::calc_output_layout(depth_to_space_node const& node, kernel_impl_params const& impl_param) { - auto desc = impl_param.typed_desc(); - - auto input_layout = impl_param.get_input_layout(); - auto input_format = input_layout.format; - - const size_t block_size = desc->block_size; - - if (input_layout.feature() % (block_size * block_size) != 0) - CLDNN_ERROR_MESSAGE( - desc->id, - "The depth of the input tensor must be divisible by squared block size. Actual block size is " + - std::to_string(block_size)); - - auto out_size = input_layout.get_tensor(); - if (format::spatial_num(input_layout.format) == 3) { - const size_t feature = input_layout.feature() / block_size / block_size / block_size; - const size_t z = input_layout.spatial(2) * block_size; - const size_t y = input_layout.spatial(1) * block_size; - const size_t x = input_layout.spatial(0) * block_size; - out_size = tensor(TensorValue(input_layout.batch()), TensorValue(feature), TensorValue(x), TensorValue(y), TensorValue(z)); - } else { - const size_t feature = input_layout.feature() / block_size / block_size; - const size_t y = input_layout.spatial(1) * block_size; - const size_t x = input_layout.spatial(0) * block_size; - out_size = tensor(TensorValue(input_layout.batch()), TensorValue(feature), TensorValue(x), TensorValue(y)); - } - - if (impl_param.has_fused_primitives()) { - input_layout.data_type = impl_param.get_output_element_type(); - } - - return layout{input_layout.data_type, input_format, out_size}; -} - template std::vector depth_to_space_inst::calc_output_layouts(depth_to_space_node const& node, kernel_impl_params const& impl_param) { auto desc = impl_param.typed_desc(); diff --git a/src/plugins/intel_gpu/src/graph/detection_output.cpp b/src/plugins/intel_gpu/src/graph/detection_output.cpp index 54af95978e0f31..85406b6c5bfc82 100644 --- a/src/plugins/intel_gpu/src/graph/detection_output.cpp +++ b/src/plugins/intel_gpu/src/graph/detection_output.cpp @@ -13,49 +13,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(detection_output) -layout detection_output_inst::calc_output_layout(detection_output_node const& node, kernel_impl_params const& impl_param) { - assert(static_cast(impl_param.desc->output_data_types[0]) == false && - "Output data type forcing is not supported for " - "detection_output_node!"); - auto desc = impl_param.typed_desc(); - CLDNN_ERROR_NOT_EQUAL(desc->id, - "Detection output layer input number", - impl_param.input_layouts.size(), - "expected number of inputs", - static_cast(3), - ""); - - auto input_layout = impl_param.get_input_layout(); - - // Batch size and feature size are 1. - // Number of bounding boxes to be kept is set to keep_top_k*batch size. - // If number of detections is lower than top_k, will write dummy results at the end with image_id=-1. - // Each row is a 7 dimension vector, which stores: - // [image_id, label, confidence, xmin, ymin, xmax, ymax] - int output_size = static_cast(input_layout.get_linear_size()) / PRIOR_BOX_SIZE; - int num_classes = desc->num_classes; - - if (desc->share_location) { - num_classes = (desc->background_label_id == 0) ? desc->num_classes - 1 - : desc->num_classes; - output_size *= num_classes; - } - - if (desc->top_k != -1) { - int top_k = desc->top_k * num_classes * input_layout.batch(); - if (top_k < output_size) { - output_size = top_k; - } - } - - output_size *= DETECTION_OUTPUT_ROW_SIZE; - // Add space for number of output results per image - needed in the next detection output step - output_size += ((input_layout.batch() + 15) / 16) * 16; - - return {input_layout.data_type, cldnn::format::bfyx, - cldnn::tensor(1, 1, DETECTION_OUTPUT_ROW_SIZE, desc->keep_top_k * input_layout.batch())}; -} - template std::vector detection_output_inst::calc_output_layouts(detection_output_node const& node, kernel_impl_params const& impl_param) { auto desc = impl_param.typed_desc(); diff --git a/src/plugins/intel_gpu/src/graph/dft.cpp b/src/plugins/intel_gpu/src/graph/dft.cpp index f044ce53f4e165..d0f4eb32c03302 100644 --- a/src/plugins/intel_gpu/src/graph/dft.cpp +++ b/src/plugins/intel_gpu/src/graph/dft.cpp @@ -13,32 +13,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(dft) -layout dft_inst::calc_output_layout(dft_node const& node, kernel_impl_params const& impl_param) { - const auto primitive = impl_param.typed_desc(); - const auto input_layout = impl_param.get_input_layout(); - - std::vector dims_converted(primitive->output_shape.size()); - std::transform(primitive->output_shape.begin(), - primitive->output_shape.end(), - dims_converted.begin(), - [](size_t value) { - return static_cast(value); - }); - - // Extend shape to 4d by pushing ones at the end (needed to support less than 4d cases) - for (auto i = dims_converted.size(); i < 4; ++i) { - auto it = dims_converted.end(); - // For IRDFT push ones at the end, for other DTFs push ones before the last dim - if (primitive->direction != dft_direction::inverse || primitive->mode != dft_mode::real) { - it = std::prev(it); - } - dims_converted.insert(it, 1); - } - - const auto output_format = format::adjust_to_rank(input_layout.format, dims_converted.size()); - return {input_layout.data_type, output_format, tensor(output_format, dims_converted)}; -} - template std::vector dft_inst::calc_output_layouts(dft_node const& /*node*/, kernel_impl_params const& impl_param) { std::vector layouts; diff --git a/src/plugins/intel_gpu/src/graph/eltwise.cpp b/src/plugins/intel_gpu/src/graph/eltwise.cpp index a370e8ba260f8b..bde2d1f376d366 100644 --- a/src/plugins/intel_gpu/src/graph/eltwise.cpp +++ b/src/plugins/intel_gpu/src/graph/eltwise.cpp @@ -29,92 +29,6 @@ const std::set eltwise_mode::is_inf, eltwise_mode::is_nan }; -layout eltwise_inst::calc_output_layout(eltwise_node const& node, kernel_impl_params const& impl_param) { - size_t primary_input_idx = 0; - if (node.input(primary_input_idx).is_constant()) { - for (size_t i = 1; i < node.get_dependencies().size(); i++) { - if (!node.input(i).is_constant()) { - primary_input_idx = i; - break; - } - } - } - auto input_node_layout = impl_param.get_non_padded_input_layout(primary_input_idx); - auto desc = impl_param.typed_desc(); - auto output_type = desc->output_data_types[0].value_or(input_node_layout.data_type); - - auto size = input_node_layout.get_tensor(); - auto format = input_node_layout.format; - for (size_t i = 0; i < desc->input_size(); i++) { - if (i == primary_input_idx) - continue; - - auto l = impl_param.get_non_padded_input_layout(i); - size = tensor::max(size, l.get_tensor()); - if (l.format == format::b_fs_zyx_fsv16) // use optimized 5D - format = format::b_fs_zyx_fsv16; - else if (l.format == format::bs_fs_zyx_bsv16_fsv16) - format = format::bs_fs_zyx_bsv16_fsv16; - } - auto output_layout = layout(output_type, format, size); - - auto mode = desc->mode; - // list of operations supported for integer types - if (input_node_layout.data_type == data_types::i8 || input_node_layout.data_type == data_types::u8 || - input_node_layout.data_type == data_types::i32 || input_node_layout.data_type == data_types::i64) { - std::vector eltwise_int_modes = {eltwise_mode::sum, - eltwise_mode::sub, - eltwise_mode::prod, - eltwise_mode::div, - eltwise_mode::min, - eltwise_mode::max, - eltwise_mode::mod, - eltwise_mode::eq, - eltwise_mode::ne, - eltwise_mode::lt, - eltwise_mode::le, - eltwise_mode::gt, - eltwise_mode::ge, - eltwise_mode::squared_diff, - eltwise_mode::floor_mod, - eltwise_mode::logic_and, - eltwise_mode::logic_or, - eltwise_mode::logic_xor, - eltwise_mode::right_shift, - eltwise_mode::left_shift, - eltwise_mode::bitwise_and, - eltwise_mode::bitwise_or, - eltwise_mode::bitwise_xor}; - if (std::find(eltwise_int_modes.begin(), eltwise_int_modes.end(), mode) == eltwise_int_modes.end()) - CLDNN_ERROR_MESSAGE(desc->id, "Requested eltwise mode is not supported for integer types."); - } - - // Logic and comparison operations should return i8 for any inputs - if (eltwise::eltwise_bool_modes.find(mode) != eltwise::eltwise_bool_modes.end()) { - output_layout.data_type = data_types::i8; - } - - if (desc->output_data_types[0]) { - output_layout.data_type = *desc->output_data_types[0]; - } - - if (node.has_fused_primitives()) { - output_layout.data_type = impl_param.get_output_element_type(); - } - - if (!desc->stride.empty()) { - auto new_size = input_node_layout.get_tensor(); - // we can safely use only first stride, since we're using first input, and input / stride should give exact same - // value for every input - new_size.spatial[0] = (input_node_layout.spatial(0) - 1) / desc->stride[0].spatial[0] + 1; - new_size.spatial[1] = (input_node_layout.spatial(1) - 1) / desc->stride[0].spatial[1] + 1; - new_size.spatial[2] = (input_node_layout.spatial(2) - 1) / desc->stride[0].spatial[2] + 1; - input_node_layout.set_tensor(new_size); - return input_node_layout; - } - return output_layout; -} - template std::vector eltwise_inst::calc_output_layouts(eltwise_node const& /*node*/, kernel_impl_params const& impl_param) { auto desc = impl_param.typed_desc(); @@ -393,18 +307,13 @@ eltwise_inst::typed_primitive_inst(network& network, eltwise_node const& node) : ""); } } else { - bool use_new_shape_infer = network.get_config().get_property(ov::intel_gpu::allow_new_shape_infer); auto input0_pshape = node.get_input_pshape(0); for (size_t i = 1; i < inputs_count; ++i) { auto input_pshape = node.get_input_pshape(i); if (input0_pshape.size() > input_pshape.size()) { - if (use_new_shape_infer) { - input_pshape.insert(input_pshape.begin(), input0_pshape.size() - input_pshape.size(), 1); - } else { - input_pshape.insert(input_pshape.end(), input0_pshape.size() - input_pshape.size(), 1); - } + input_pshape.insert(input_pshape.begin(), input0_pshape.size() - input_pshape.size(), 1); } auto base_pshape = input0_pshape; diff --git a/src/plugins/intel_gpu/src/graph/embedding_bag.cpp b/src/plugins/intel_gpu/src/graph/embedding_bag.cpp index 1e16b108ef72f3..fe436070ae5866 100644 --- a/src/plugins/intel_gpu/src/graph/embedding_bag.cpp +++ b/src/plugins/intel_gpu/src/graph/embedding_bag.cpp @@ -61,17 +61,6 @@ std::vector embedding_bag_inst::calc_output_layouts(embedding_bag_node c template std::vector embedding_bag_inst::calc_output_layouts(embedding_bag_node const& node, const kernel_impl_params& impl_param); -layout embedding_bag_inst::calc_output_layout(embedding_bag_node const& node, kernel_impl_params const& impl_param) { - auto desc = impl_param.typed_desc(); - - auto input_layout = impl_param.get_input_layout(); - auto output_format = input_layout.format; - - auto output_shape = desc->output_shape; - - return layout(input_layout.data_type, output_format, output_shape); -} - std::string embedding_bag_inst::to_string(embedding_bag_node const& node) { auto desc = node.get_primitive(); auto node_info = node.desc_to_json(); diff --git a/src/plugins/intel_gpu/src/graph/experimental_detectron_detection_output.cpp b/src/plugins/intel_gpu/src/graph/experimental_detectron_detection_output.cpp index 4d6a090e77bdc8..eacd8940873417 100644 --- a/src/plugins/intel_gpu/src/graph/experimental_detectron_detection_output.cpp +++ b/src/plugins/intel_gpu/src/graph/experimental_detectron_detection_output.cpp @@ -33,14 +33,6 @@ template std::vector experimental_detectron_detection_output_inst::calc_output_layouts( experimental_detectron_detection_output_node const& node, const kernel_impl_params& impl_param); -layout experimental_detectron_detection_output_inst::calc_output_layout( - const experimental_detectron_detection_output_node& node, kernel_impl_params const& impl_param) { - const layout data_layout = impl_param.get_input_layout(); - auto desc = impl_param.typed_desc(); - - return layout(data_layout.data_type, data_layout.format, {static_cast(desc->max_detections_per_image), 4, 1, 1}); -} - std::string experimental_detectron_detection_output_inst::to_string( const experimental_detectron_detection_output_node& node) { auto desc = node.get_primitive(); diff --git a/src/plugins/intel_gpu/src/graph/experimental_detectron_generate_proposal_single_image.cpp b/src/plugins/intel_gpu/src/graph/experimental_detectron_generate_proposal_single_image.cpp index b538e001427be7..99520ccfb253a1 100644 --- a/src/plugins/intel_gpu/src/graph/experimental_detectron_generate_proposal_single_image.cpp +++ b/src/plugins/intel_gpu/src/graph/experimental_detectron_generate_proposal_single_image.cpp @@ -31,14 +31,6 @@ template std::vector experimental_detectron_generate_proposals_single_image_inst::calc_output_layouts( experimental_detectron_generate_proposals_single_image_node const& node, const kernel_impl_params& impl_param); -layout experimental_detectron_generate_proposals_single_image_inst::calc_output_layout( - const experimental_detectron_generate_proposals_single_image_node& node, kernel_impl_params const& impl_param) { - const layout data_layout = impl_param.get_input_layout(); - auto desc = impl_param.typed_desc(); - - return layout(data_layout.data_type, data_layout.format, {static_cast(desc->post_nms_count), 4, 1, 1}); -} - std::string experimental_detectron_generate_proposals_single_image_inst::to_string( const experimental_detectron_generate_proposals_single_image_node& node) { auto desc = node.get_primitive(); diff --git a/src/plugins/intel_gpu/src/graph/experimental_detectron_prior_grid_generator.cpp b/src/plugins/intel_gpu/src/graph/experimental_detectron_prior_grid_generator.cpp index 2e0e5c0a1e2fec..3a24d8cb47cd0f 100644 --- a/src/plugins/intel_gpu/src/graph/experimental_detectron_prior_grid_generator.cpp +++ b/src/plugins/intel_gpu/src/graph/experimental_detectron_prior_grid_generator.cpp @@ -31,24 +31,6 @@ template std::vector experimental_detectron_prior_grid_generator_inst::calc_output_layouts( experimental_detectron_prior_grid_generator_node const& node, const kernel_impl_params& impl_param); -layout experimental_detectron_prior_grid_generator_inst::calc_output_layout( - const experimental_detectron_prior_grid_generator_node& node, kernel_impl_params const& impl_param) { - const layout data_layout = impl_param.get_input_layout(); - auto desc = impl_param.typed_desc(); - if (desc->flatten) { - return layout(data_layout.data_type, - format::bfyx, - {static_cast(desc->featmap_width * desc->featmap_height * data_layout.batch()), 4, 1, 1}); - } else { - return layout(data_layout.data_type, - format::bfyx, - {static_cast(desc->featmap_height), - static_cast(desc->featmap_width), - 4, - static_cast(data_layout.batch())}); - } -} - std::string experimental_detectron_prior_grid_generator_inst::to_string( experimental_detectron_prior_grid_generator_node const& node) { auto node_info = node.desc_to_json(); diff --git a/src/plugins/intel_gpu/src/graph/experimental_detectron_roi_feature_extractor.cpp b/src/plugins/intel_gpu/src/graph/experimental_detectron_roi_feature_extractor.cpp index 1c09fcb7cc7d4b..881b281457320b 100644 --- a/src/plugins/intel_gpu/src/graph/experimental_detectron_roi_feature_extractor.cpp +++ b/src/plugins/intel_gpu/src/graph/experimental_detectron_roi_feature_extractor.cpp @@ -14,22 +14,15 @@ size_t experimental_detectron_roi_feature_extractor_inst::inputs_memory_count() return parent::inputs_memory_count() - 1; } -memory::ptr experimental_detectron_roi_feature_extractor_inst::second_output_memory() const { - if (desc()->num_outputs == 1) { - return input_memory_ptr(parent::inputs_memory_count() - 1); - } else { - return output_memory_ptr(1); - } -} - memory::ptr experimental_detectron_roi_feature_extractor_inst::rois_memory() const { return input_memory_ptr(0); } void experimental_detectron_roi_feature_extractor_inst::copy_rois_input_to_second_output() const { - second_output_memory()->copy_from(get_network().get_stream(), *rois_memory()); + output_memory_ptr(1)->copy_from(get_network().get_stream(), *rois_memory()); } + template std::vector experimental_detectron_roi_feature_extractor_inst::calc_output_layouts( experimental_detectron_roi_feature_extractor_node const& /*node*/, const kernel_impl_params& impl_param) { @@ -49,19 +42,6 @@ template std::vector experimental_detectron_roi_feature_extractor_inst::calc_output_layouts( experimental_detectron_roi_feature_extractor_node const& node, const kernel_impl_params& impl_param); -layout experimental_detectron_roi_feature_extractor_inst::calc_output_layout( - experimental_detectron_roi_feature_extractor_node const& node, kernel_impl_params const& impl_param) { - assert(static_cast(impl_param.desc->output_data_types[0]) == false && - "Output data type forcing is not supported for roi_pooling_node!"); - layout rois_layout = impl_param.get_input_layout(0); - layout data_layout = impl_param.get_input_layout(1); - int num_rois = rois_layout.batch(); - int num_channels = data_layout.feature(); - auto desc = impl_param.typed_desc(); - - return layout(data_layout.data_type, format::bfyx, {num_rois, num_channels, desc->output_dim, desc->output_dim}); -} - std::string experimental_detectron_roi_feature_extractor_inst::to_string(experimental_detectron_roi_feature_extractor_node const& node) { auto desc = node.get_primitive(); diff --git a/src/plugins/intel_gpu/src/graph/experimental_detectron_topk_rois.cpp b/src/plugins/intel_gpu/src/graph/experimental_detectron_topk_rois.cpp index 4626cbd8f27f3a..7d6885fc7f5ac5 100644 --- a/src/plugins/intel_gpu/src/graph/experimental_detectron_topk_rois.cpp +++ b/src/plugins/intel_gpu/src/graph/experimental_detectron_topk_rois.cpp @@ -27,17 +27,6 @@ template std::vector experimental_detectron_topk_rois_inst::calc_output_layouts( experimental_detectron_topk_rois_node const& node, const kernel_impl_params& impl_param); -layout experimental_detectron_topk_rois_inst::calc_output_layout( - experimental_detectron_topk_rois_node const &node, kernel_impl_params const& impl_param) { - auto input_layout = impl_param.get_input_layout(); - auto desc = impl_param.typed_desc(); - - int32_t roi_num = std::min(input_layout.get_tensor().sizes()[0], static_cast(desc->max_rois)); - - return {input_layout.data_type, input_layout.format, {roi_num, - input_layout.get_tensor().sizes()[1], 1, 1 }}; -} - std::string experimental_detectron_topk_rois_inst::to_string(experimental_detectron_topk_rois_node const &node) { auto node_info = node.desc_to_json(); json_composite experimental_detectron_topk_rois_info; diff --git a/src/plugins/intel_gpu/src/graph/extract_image_patches.cpp b/src/plugins/intel_gpu/src/graph/extract_image_patches.cpp index aff15321da2e1d..b155c7917522e8 100644 --- a/src/plugins/intel_gpu/src/graph/extract_image_patches.cpp +++ b/src/plugins/intel_gpu/src/graph/extract_image_patches.cpp @@ -29,16 +29,6 @@ std::vector extract_image_patches_inst::calc_output_layouts(extract_imag template std::vector extract_image_patches_inst::calc_output_layouts(extract_image_patches_node const& node, const kernel_impl_params& impl_param); -layout extract_image_patches_inst::calc_output_layout(extract_image_patches_node const& node, kernel_impl_params const& impl_param) { - auto desc = impl_param.typed_desc(); - - auto input_layout = impl_param.get_input_layout(); - auto input_format = input_layout.format; - - auto output_shape = desc->output_shape; - return layout(input_layout.data_type, input_format, output_shape); -} - std::string extract_image_patches_inst::to_string(extract_image_patches_node const& node) { auto desc = node.get_primitive(); auto node_info = node.desc_to_json(); diff --git a/src/plugins/intel_gpu/src/graph/eye.cpp b/src/plugins/intel_gpu/src/graph/eye.cpp index 3715fbf619c1eb..fbbff319609eb2 100644 --- a/src/plugins/intel_gpu/src/graph/eye.cpp +++ b/src/plugins/intel_gpu/src/graph/eye.cpp @@ -2,8 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // -#include #include "openvino/op/eye.hpp" +#include #include "eye_shape_inference.hpp" #include @@ -42,11 +42,6 @@ std::vector eye_inst::calc_output_layouts(eye_node const& /*node*/, cons template std::vector eye_inst::calc_output_layouts(eye_node const& node, const kernel_impl_params& impl_param); -layout eye_inst::calc_output_layout(eye_node const& node, const kernel_impl_params&) { - auto primitive = node.get_primitive(); - return {*(primitive->output_data_types[0]), node.get_input_layout().format, primitive->output_shape}; -} - std::string eye_inst::to_string(eye_node const& node) { auto node_info = node.desc_to_json(); json_composite eye_info; diff --git a/src/plugins/intel_gpu/src/graph/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/fully_connected.cpp index d0820a119d5468..d1e6955391f913 100644 --- a/src/plugins/intel_gpu/src/graph/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/fully_connected.cpp @@ -13,128 +13,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(fully_connected) -namespace { -bool is_batch_after_spatial(const std::string order) { - bool spatial_found = false; - for (auto c : order) { - switch (c) { - case 'b': - case 'n': - return spatial_found; - - case 'x': - case 'y': - case 'z': - case 'w': - case 's': - spatial_found = true; - break; - - default: - break; - } - } - return false; -} - -format::type get_preferred_format(fully_connected_node const& node, const kernel_impl_params& impl_param) { - if (node.get_preferred_impl_type() == impl_types::onednn && node.get_preferred_output_fmt() != format::any) { - return node.get_preferred_output_fmt(); - } - - auto input_layout = impl_param.get_input_layout(); - - // for 3d output we have to chose bfyx format - if (impl_param.typed_desc()->input_size == 3) - return format::bfyx; - - if (data_type_traits::is_floating_point(input_layout.data_type) && - (is_batch_after_spatial(input_layout.format.order()) || - input_layout.format == format::bs_f_bsv16 || - input_layout.format == format::bs_fs_fsv8_bsv8)) - return format::yxfb; - - bool no_spatial_padding = true; - // C++ 11 range loop shouldn't be used here because of incorrect iterator functionality in mutable_array_ref<> - for (size_t i = 0; i < input_layout.get_spatial_rank(); ++i) { - no_spatial_padding &= (input_layout.data_padding._lower_size[2 + i] == 0); - } - for (size_t i = 0; i < input_layout.get_spatial_rank(); ++i) { - no_spatial_padding &= (input_layout.data_padding._upper_size[2 + i] == 0); - } - - if (input_layout.data_type == data_types::f32 && - input_layout.format == format::bfyx && - no_spatial_padding && - input_layout.batch() != 8) - return format::bfyx; - - auto input_pitches = input_layout.get_pitches(); - if (input_layout.data_type == data_types::f16 && - input_layout.format == format::bfyx && - no_spatial_padding && - input_pitches[0] % 2 == 0 && - input_layout.batch() != 16) - return format::bfyx; - - // this condition tests whether our input is batch>1 in bfyx format, if yes there will be - // extra reorder between input and this fc from bfyx to yxfb format (so - // "is_batch_after_spatial" should return true) - if (data_type_traits::is_floating_point(input_layout.data_type) && - input_layout.format == format::bfyx && - input_layout.batch() > 1) - return format::yxfb; - - return format::bfyx; -} - -} // namespace - -layout fully_connected_inst::calc_output_layout(fully_connected_node const& node, kernel_impl_params const& impl_param) { - auto desc = impl_param.typed_desc(); - - auto input_layout = impl_param.get_input_layout(); - auto input_pshape = input_layout.get_partial_shape(); - auto weights_layout = *impl_param.weights_layout; - auto weights_pshape = weights_layout.get_partial_shape(); - auto output_type = desc->output_data_types[0].value_or(input_layout.data_type); - if (data_type_traits::is_i8_u8(input_layout.data_type) && desc->output_data_types[0]) - output_type = *desc->output_data_types[0]; - - if (impl_param.has_fused_primitives()) { - output_type = impl_param.get_output_element_type(); - } - - auto reshape_to_2d = [](const ov::PartialShape& shape, int64_t feature) { - auto staticShape = shape.to_shape(); - size_t total = std::accumulate(staticShape.begin(), staticShape.end(), static_cast(1), std::multiplies()); - std::vector reshapeSize = { static_cast(total) / feature, feature }; - return reshapeSize; - }; - - int64_t feature = input_pshape[std::min(desc->input_size, static_cast(4)) - 1].get_length(); - if (desc->input_size == 3) { - feature = std::max({input_layout.spatial(0), input_layout.spatial(1), input_layout.spatial(2)}); - } - - if (desc->input_size > 4) { - input_layout.set_partial_shape(reshape_to_2d(input_pshape, feature)); - } - if (weights_pshape.size() != 2) { - weights_layout.set_partial_shape(reshape_to_2d(weights_pshape, feature)); - } - - auto output_size = tensor(input_layout.batch(), weights_layout.batch(), 1, 1); - if (desc->input_size == 3) { - output_size = tensor(input_layout.batch(), input_layout.feature(), 1, weights_layout.batch()); - } else if (desc->input_size == 4) { - output_size = tensor(input_layout.batch(), input_layout.feature(), weights_layout.batch(), input_layout.spatial(1)); - } - format output_format = get_preferred_format(node, impl_param); - - return layout(output_type, output_format, output_size); -} - template std::vector fully_connected_inst::calc_output_layouts(fully_connected_node const& node, const kernel_impl_params& impl_param) { auto desc = impl_param.typed_desc(); @@ -158,10 +36,7 @@ std::vector fully_connected_inst::calc_output_layouts(fully_connected_no std::vector output_shapes = ov::op::v0::shape_infer(&op, input_shapes); - bool is_static = input_layout.is_static() && weights_layout.is_static(); - bool allow_new_shape_infer = impl_param.get_program().is_new_shape_infer(); - format::type output_format = is_static && !allow_new_shape_infer ? get_preferred_format(node, impl_param) : - input_layout.format.value; + format::type output_format = input_layout.format.value; if (node.get_preferred_output_fmt() != format::any) output_format = node.get_preferred_output_fmt(); diff --git a/src/plugins/intel_gpu/src/graph/gather.cpp b/src/plugins/intel_gpu/src/graph/gather.cpp index b4fdc8b2f81013..d631cdf50ae47d 100644 --- a/src/plugins/intel_gpu/src/graph/gather.cpp +++ b/src/plugins/intel_gpu/src/graph/gather.cpp @@ -13,61 +13,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(gather) -layout gather_inst::calc_output_layout(gather_node const& node, kernel_impl_params const& impl_param) { - auto desc = impl_param.typed_desc(); - - auto input_layout = impl_param.get_input_layout(); - std::vector dims_converted; - for (auto dim : desc->output_shape) { - dims_converted.push_back(static_cast(dim)); - } - // extend shape to 4d - for (size_t i = dims_converted.size(); i < 4; i++) - dims_converted.push_back(1); - - format output_format = input_layout.format; - if (dims_converted.size() == 5) { - switch (input_layout.format) { - case format::bfyx: - output_format = format::get_default_format(dims_converted.size()); - break; - case format::b_fs_yx_fsv16: - output_format = format::b_fs_zyx_fsv16; - break; - case format::b_fs_yx_fsv32: - output_format = format::b_fs_zyx_fsv32; - break; - case format::bs_fs_yx_bsv16_fsv16: - output_format = format::bs_fs_zyx_bsv16_fsv16; - break; - default: - break; - } - } else if (dims_converted.size() == 6) { - switch (input_layout.format) { - case format::bfyx: - case format::bfzyx: - case format::b_fs_zyx_fsv16: - case format::b_fs_zyx_fsv32: - output_format = format::get_default_format(dims_converted.size()); - break; - default: - break; - } - } - auto output_type = input_layout.data_type; - if (impl_param.typed_desc()->compressed_weights) { - output_type = impl_param.typed_desc()->decompressed_type; - } - if (impl_param.has_fused_primitives()) { - output_type = impl_param.get_output_element_type(); - } - - return layout{output_type, - output_format, - tensor(format::get_default_format(dims_converted.size()), dims_converted)}; -} - template std::vector gather_inst::calc_output_layouts(gather_node const& /*node*/, const kernel_impl_params& impl_param) { auto desc = impl_param.typed_desc(); diff --git a/src/plugins/intel_gpu/src/graph/gather_elements.cpp b/src/plugins/intel_gpu/src/graph/gather_elements.cpp index e4c3cb20e21a1c..cb08e9c522881c 100644 --- a/src/plugins/intel_gpu/src/graph/gather_elements.cpp +++ b/src/plugins/intel_gpu/src/graph/gather_elements.cpp @@ -12,23 +12,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(gather_elements) -layout gather_elements_inst::calc_output_layout(gather_elements_node const& node, kernel_impl_params const& impl_param) { - auto op = impl_param.typed_desc(); - - auto input_layout_origin = impl_param.get_input_layout(0); - auto indices_layout_origin = impl_param.get_input_layout(1); - - auto input_layout = input_layout_origin.get_tensor().sizes(input_layout_origin.format); - auto indices_layout = indices_layout_origin.get_tensor().sizes(indices_layout_origin.format); - - auto output_type = (impl_param.has_fused_primitives()) ? impl_param.get_output_element_type() : - input_layout_origin.data_type; - auto output_shape = op->output_shape; - auto output_format = op->output_format; - // calculate initial output shape - return layout(output_type, output_format, output_shape); -} - template std::vector gather_elements_inst::calc_output_layouts(gather_elements_node const& /*node*/, const kernel_impl_params& impl_param) { auto desc = impl_param.typed_desc(); @@ -65,7 +48,6 @@ std::string gather_elements_inst::to_string(gather_elements_node const& node) { json_composite gather_elements_info; gather_elements_info.add("input id", input.id()); - gather_elements_info.add("output format", calc_output_layout(node, *node.get_kernel_impl_params()).format); gather_elements_info.add("axis", desc->axis); node_info->add("gather_elements info", gather_elements_info); diff --git a/src/plugins/intel_gpu/src/graph/gather_nd.cpp b/src/plugins/intel_gpu/src/graph/gather_nd.cpp index 676d76164094d8..909dba9437b8db 100644 --- a/src/plugins/intel_gpu/src/graph/gather_nd.cpp +++ b/src/plugins/intel_gpu/src/graph/gather_nd.cpp @@ -12,66 +12,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(gather_nd) -layout gather_nd_inst::calc_output_layout(gather_nd_node const& node, kernel_impl_params const& impl_param) { - auto op = impl_param.typed_desc(); - - auto input_layout_origin = impl_param.get_input_layout(0); - auto indices_layout_origin = impl_param.get_input_layout(1); - - auto input_layout = input_layout_origin.get_tensor().sizes(input_layout_origin.format); - auto indices_layout = indices_layout_origin.get_tensor().sizes(indices_layout_origin.format); - - const auto input_rank = static_cast(op->input_rank); - const auto indices_rank = op->indices_rank; - const auto batch_dims = op->batch_dims; - - // calculate initial output shape - std::vector output_sizes; - - for (uint8_t x = 0; x < indices_rank - 1; x++) { - output_sizes.push_back(indices_layout[x]); - } - - const size_t indices_last_dim = indices_layout[indices_rank - 1]; - for (size_t x = static_cast(batch_dims + indices_last_dim); x < input_rank; x++) { - output_sizes.push_back(input_layout[x]); - } - - // create final output shape by batch_dims - std::vector final_output_sizes; - - if (op->batch_merged_output) { - // calculate batch_size by batch_dims - int batch_size = 1; - for (uint8_t x = 0; x < batch_dims; x++) { - batch_size *= output_sizes[x]; - } - - if (batch_dims > 0) { - final_output_sizes.push_back(batch_size); - } - - for (size_t x = static_cast(batch_dims); x < output_sizes.size(); x++) { - final_output_sizes.push_back(output_sizes[x]); - } - } else { - for (size_t x = 0; x < output_sizes.size(); x++) { - final_output_sizes.push_back(output_sizes[x]); - } - } - - auto output_format = format::get_default_format(final_output_sizes.size()); - auto output_sizes_tensor = tensor(tensor(final_output_sizes).sizes(output_format)); - auto padding = op->output_paddings[0]; - - if (impl_param.has_fused_primitives()) { - input_layout_origin.data_type = impl_param.get_output_element_type(); - } - - return layout(input_layout_origin.data_type, output_format, output_sizes_tensor, padding); -} - - template std::vector gather_nd_inst::calc_output_layouts(gather_nd_node const& /*node*/, const kernel_impl_params& impl_param) { auto desc = impl_param.typed_desc(); diff --git a/src/plugins/intel_gpu/src/graph/gather_tree.cpp b/src/plugins/intel_gpu/src/graph/gather_tree.cpp index 26b55b65490925..ec44c72f74a4b7 100644 --- a/src/plugins/intel_gpu/src/graph/gather_tree.cpp +++ b/src/plugins/intel_gpu/src/graph/gather_tree.cpp @@ -14,13 +14,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(gather_tree) -layout gather_tree_inst::calc_output_layout(gather_tree_node const& node, kernel_impl_params const& impl_param) { - assert(static_cast(impl_param.desc->output_data_types[0]) == false && - "Output data type forcing is not supported for gather_tree_node!"); - auto input_layout = impl_param.get_input_layout(); - return input_layout; -} - template std::vector gather_tree_inst::calc_output_layouts(gather_tree_node const& /*node*/, const kernel_impl_params& impl_param) { auto desc = impl_param.typed_desc(); diff --git a/src/plugins/intel_gpu/src/graph/gemm.cpp b/src/plugins/intel_gpu/src/graph/gemm.cpp index a8b196bd45885f..c971481d654d2f 100644 --- a/src/plugins/intel_gpu/src/graph/gemm.cpp +++ b/src/plugins/intel_gpu/src/graph/gemm.cpp @@ -13,96 +13,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(gemm) -layout gemm_inst::calc_output_layout(gemm_node const& node, kernel_impl_params const& impl_param) { - auto prim = impl_param.typed_desc(); - - auto input0_layout = impl_param.get_input_layout(0); - auto input1_layout = impl_param.get_input_layout(1); - - auto input0_shape = input0_layout.get_shape(); - auto input1_shape = input1_layout.get_shape(); - - auto input0_transpose_order = prim->input0_transpose_order; - auto input1_transpose_order = prim->input1_transpose_order; - - bool reordered = prim->input_rank > 4 || prim->weight_rank > 4; - size_t output_rank = std::max(prim->input_rank, prim->weight_rank); - size_t input_rank = reordered ? output_rank : prim->input_rank; - size_t weight_rank = reordered ? output_rank : prim->weight_rank; - - auto update_input_shape = [&output_rank](const ov::Shape& input_shape, size_t rank, std::vector input_order, bool first_input) { - auto input_shape_update = ov::Shape(); - auto _input_shape_update = ov::Shape(input_shape.begin(), input_shape.begin() + std::min(rank, input_shape.size())); - if (_input_shape_update.size() == input_order.size() && input_order.size() > 1) { - for (auto idx : input_order) { - input_shape_update.push_back(_input_shape_update[idx]); - } - } else { - input_shape_update = _input_shape_update; - } - if (input_shape_update.size() == 1) { - first_input ? input_shape_update.insert(input_shape_update.begin(), 1) - : input_shape_update.insert(input_shape_update.end(), 1); - output_rank = std::max(output_rank, rank + 1); - } - input_shape_update.insert(input_shape_update.begin(), output_rank - input_shape_update.size(), 1); - return input_shape_update; - }; - - auto transpose_shape = [](const ov::Shape& shape, const std::vector& order) { - auto shape_transposed = ov::Shape(shape); - auto rank_diff = shape.size() - order.size(); - for (size_t i = 0; i < order.size(); i++) { - size_t idx = static_cast(order[i]); - shape_transposed[i + rank_diff] = shape[idx + rank_diff]; - } - - return shape_transposed; - }; - - auto input0_shape_update = update_input_shape(input0_shape, input_rank, input0_transpose_order, true); - auto input1_shape_update = update_input_shape(input1_shape, weight_rank, input1_transpose_order, false); - - ov::Shape bias_shape(output_rank); - if (prim->input_size() == 3) { - bias_shape = impl_param.get_input_layout(2).get_shape(); - bias_shape = update_input_shape(bias_shape, weight_rank, input1_transpose_order, false); - } - - auto output_shape = input0_shape_update; - for (size_t i = 0; i < output_rank; ++i) { - output_shape[i] = std::max(std::max(input0_shape_update[i], input1_shape_update[i]), bias_shape[i]); - } - - size_t M = *(input0_shape_update.end() - 2); - size_t N = input1_shape_update.back(); - - output_shape[output_rank - 2] = M; - output_shape[output_rank - 1] = N; - - size_t ones_to_add = 4 - std::min(output_shape.size(), static_cast(4)); - output_shape.insert(output_shape.begin(), ones_to_add, 1); - - if (prim->output_transpose_order.size() > 0) - output_shape = transpose_shape(output_shape, prim->output_transpose_order); - - auto output_type = input0_layout.data_type; - if ((output_type == data_types::u8 || output_type == data_types::i8) && prim->output_data_types[0]) - output_type = *prim->output_data_types[0]; - - if (impl_param.has_fused_primitives()) { - output_type = impl_param.get_output_element_type(); - } - - auto output_format = input0_layout.format; - - if (node.get_preferred_impl_type() == impl_types::onednn && node.get_preferred_output_fmt() != format::any) { - output_format = node.get_preferred_output_fmt(); - } - - return layout(output_shape, output_type, output_format, prim->output_paddings[0]); -} - template std::vector gemm_inst::calc_output_layouts(gemm_node const& node, const kernel_impl_params& impl_param) { auto prim = impl_param.typed_desc(); @@ -229,7 +139,7 @@ layout gemm_inst::transform_output_layout(const std::shared_ptr prim (i == 1) ? transposed_input1_pshape : input_layouts[i].get_partial_shape(); for (size_t j = 0; j != input_pshape.size(); ++j) { - ov::Dimension::merge(output_pshape[j], output_pshape[j], input_pshape[j]); + ov::Dimension::broadcast_merge(output_pshape[j], output_pshape[j], input_pshape[j]); } } diff --git a/src/plugins/intel_gpu/src/graph/generate_proposals.cpp b/src/plugins/intel_gpu/src/graph/generate_proposals.cpp index 3ecb0e51b5761a..9d4bcc949f1a79 100644 --- a/src/plugins/intel_gpu/src/graph/generate_proposals.cpp +++ b/src/plugins/intel_gpu/src/graph/generate_proposals.cpp @@ -2,8 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "generate_proposals_inst.h" #include "openvino/op/generate_proposals.hpp" +#include "generate_proposals_inst.h" #include "generate_proposals_shape_inference.hpp" #include "primitive_type_base.h" #include "json_object.h" @@ -41,13 +41,6 @@ std::vector generate_proposals_inst::calc_output_layouts(generate_propos template std::vector generate_proposals_inst::calc_output_layouts(generate_proposals_node const& node, const kernel_impl_params& impl_param); -layout generate_proposals_inst::calc_output_layout(const generate_proposals_node& node, kernel_impl_params const& impl_param) { - const layout data_layout = impl_param.get_input_layout(); - const auto num_batches = data_layout.batch(); - const auto desc = impl_param.typed_desc(); - return layout(data_layout.data_type, data_layout.format, {static_cast(num_batches * desc->attrs.post_nms_count), 4, 1, 1}); -} - std::string generate_proposals_inst::to_string(const generate_proposals_node& node) { auto desc = node.get_primitive(); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp index 9dabf5f51ecc4b..438d076c4d0a42 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp @@ -81,24 +81,6 @@ void add_required_reorders::run(program& p) { } } - if (usr->is_type()) { - for (size_t i = 0; i < usr->get_dependencies().size(); i++) { - auto& dep = usr->get_dependency(i); - if (!dep.is_in_data_flow() || dep.is_constant()) - continue; - auto dep_layout = dep.get_output_layout(); - auto out_layout = usr->get_output_layout(); - bool required_reorder = (format::dimension(out_layout.format) != format::dimension(dep_layout.format)) || - (usr->is_in_shape_of_subgraph() && (out_layout.data_type != dep_layout.data_type)); - if (required_reorder) { - auto new_reorder = std::make_shared(dep.id() + "_reorder_" + usr->id(), dep.id(), out_layout.format, out_layout.data_type); - auto& new_reorder_node = p.get_or_create(new_reorder); - p.add_intermediate(new_reorder_node, *usr, dep); - new_reorder_node.recalc_output_layouts(false); - } - } - } - if (optimize_data) { auto fused_ops = usr->get_fused_primitives(); auto out_layout = usr->get_output_layout(); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp index ca8b781f8d9e48..7d75092e5aaae8 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp @@ -72,8 +72,7 @@ void handle_reshape::run(program& p) { // In case of new shape infer we should not shrink reshapes chain if first reshape changes input rank, e.g. // [a, b] -> reshape1 -> [a1, b1, c1] -> reshape2 -> [a2, b2, 0] and any of the reshapes has special_zero=true // Configuration above will fail if we remove reshape1 node as attempt to handle special zero will fail due to small rank of input - if (p.is_new_shape_infer() && - out_node->get_output_pshape().size() != node.get_input_pshape().size() && + if (out_node->get_output_pshape().size() != node.get_input_pshape().size() && (out_reshape.get_primitive()->special_zero || node.get_primitive()->special_zero)) return; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_shape_of_subgraphs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_shape_of_subgraphs.cpp index cf0b733b6ef178..c92377bd1c18fb 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_shape_of_subgraphs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_shape_of_subgraphs.cpp @@ -120,9 +120,7 @@ void mark_shape_of_subgraphs::mark_node(program_node& node) { } void mark_shape_of_subgraphs::run(program& p) { - if (p.is_new_shape_infer()) { - for (auto& node : p.get_processing_order()) { - look_for_shape_of_subgraph(*node); - } + for (auto& node : p.get_processing_order()) { + look_for_shape_of_subgraph(*node); } } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp index 9a97885e2601f9..0daffb74fdf0b5 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp @@ -128,27 +128,36 @@ void prepare_padding::run(program& p) { auto& prim_node = node->as(); const auto& prim = prim_node.get_primitive(); - if (!prim->with_output_size) - continue; + auto padding_begin = prim->pads_begin; + auto padding_end = prim->pads_end; - padding needed_padding; - // WA for this format. sliding window needs to be fixed --perf degradation for IncepctionV1 type models - tensor size(1); - for (size_t i = 0; i < prim->size.size(); i++) { - size.spatial[i] = static_cast(prim->size[prim->size.size() - i - 1]); - } + tensor::value_type pb_z = std::max(padding_begin.size() >= 3 ? padding_begin[padding_begin.size() - 3] : 0, 0); + tensor::value_type pb_y = std::max(padding_begin.size() >= 2 ? padding_begin[padding_begin.size() - 2] : 0, 0); + tensor::value_type pb_x = std::max(padding_begin.size() >= 1 ? padding_begin[padding_begin.size() - 1] : 0, 0); + + tensor::value_type pe_z = std::max(padding_end.size() >= 3 ? padding_end[padding_end.size() - 3] : 0, 0); + tensor::value_type pe_y = std::max(padding_end.size() >= 2 ? padding_end[padding_end.size() - 2] : 0, 0); + tensor::value_type pe_x = std::max(padding_end.size() >= 1 ? padding_end[padding_end.size() - 1] : 0, 0); + + tensor pad_l = tensor(0); + tensor pad_u = tensor(0); + pad_l.spatial[0] = pb_x; + pad_l.spatial[1] = pb_y; + pad_l.spatial[2] = pb_z; + + pad_u.spatial[0] = pe_x; + pad_u.spatial[1] = pe_y; + pad_u.spatial[2] = pe_z; + + auto in_layout = prim_node.get_input_layout(); - if (node->get_output_layout().format == format::b_fs_yx_fsv16) - needed_padding = calc_sliding_window_needed_input_padding(prim_node.get_input_layout(), - prim->output_size, - size, - ov::CoordinateDiff(prim->pads_begin.begin(), prim->pads_begin.end()), - prim->stride, - ov::Strides(prim->size.size(), 1), - false, - 1); - else - needed_padding = prim_node.get_input_layout().data_padding; + const auto& actual_lpad = in_layout.data_padding.lower_size(); + const auto& actual_upad = in_layout.data_padding.upper_size(); + + auto needed_lpad = tensor::max(pad_l, actual_lpad); + auto needed_upad = tensor::max(pad_u, actual_upad); + + padding needed_padding(needed_lpad.sizes(), needed_upad.sizes()); add_required_padding(prim_node, needed_padding); } @@ -223,7 +232,7 @@ cldnn::padding prepare_padding::get_needed_padding_for_convolution(convolution_n auto& conv_input_node = node.get_dependency(0); // convolution have only one input primitive - auto prev_prim_output_layout = conv_input_node.get_output_layout(); + auto output_layout = conv_input_node.get_output_layout(); // Calculating input padding needed for convolution auto& filter_node = node.as().weights(); @@ -264,19 +273,41 @@ cldnn::padding prepare_padding::get_needed_padding_for_convolution(convolution_n padding_end_y = std::max(pad_y, 0); padding_end_z = std::max(pad_z, 0); } else { - auto input_limit_x = -pad_x + (conv_layout.spatial(0) - 1) * stride_x + - (filter_layout.spatial(0) - 1) * dilation_x + 1; - auto input_limit_y = -pad_y + (conv_layout.spatial(1) - 1) * stride_y + - (filter_layout.spatial(1) - 1) * dilation_y + 1; - auto input_limit_z = -pad_z + (conv_layout.spatial(2) - 1) * stride_z + - (filter_layout.spatial(2) - 1) * dilation_z + 1; + const auto& ker_size = filter_layout.get_partial_shape(); + const auto& in_size = conv_layout.get_partial_shape(); + const auto& out_size = output_layout.get_partial_shape(); + auto spatials_size = ker_size.size() - (conv->grouped_weights_shape ? 3 : 2); + + uint32_t kz = spatials_size >= 3 ? static_cast(ker_size[ker_size.size() - 3].get_length()) : 1; + uint32_t ky = spatials_size >= 2 ? static_cast(ker_size[ker_size.size() - 2].get_length()) : 1; + uint32_t kx = spatials_size >= 1 ? static_cast(ker_size[ker_size.size() - 1].get_length()) : 1; + + uint32_t in_z = spatials_size >= 3 ? static_cast(in_size[in_size.size() - 3].get_length()) : 1; + uint32_t in_y = spatials_size >= 2 ? static_cast(in_size[in_size.size() - 2].get_length()) : 1; + uint32_t in_x = spatials_size >= 1 ? static_cast(in_size[in_size.size() - 1].get_length()) : 1; + + uint32_t out_z = spatials_size >= 3 ? static_cast(out_size[out_size.size() - 3].get_length()) : 1; + uint32_t out_y = spatials_size >= 2 ? static_cast(out_size[out_size.size() - 2].get_length()) : 1; + uint32_t out_x = spatials_size >= 1 ? static_cast(out_size[out_size.size() - 1].get_length()) : 1; + + auto input_limit_x = -pad_x + (in_x - 1) * stride_x + + (kx - 1) * dilation_x + 1; + auto input_limit_y = -pad_y + (in_y - 1) * stride_y + + (ky - 1) * dilation_y + 1; + auto input_limit_z = -pad_z + (in_z - 1) * stride_z + + (kz - 1) * dilation_z + 1; padding_begin_x = std::max(pad_x, 0); padding_begin_y = std::max(pad_y, 0); padding_begin_z = std::max(pad_z, 0); - padding_end_x = std::max(input_limit_x - prev_prim_output_layout.spatial(0), 0); - padding_end_y = std::max(input_limit_y - prev_prim_output_layout.spatial(1), 0); - padding_end_z = std::max(input_limit_z - prev_prim_output_layout.spatial(2), 0); + padding_end_x = std::max(input_limit_x - out_x, 0); + padding_end_y = std::max(input_limit_y - out_y, 0); + padding_end_z = std::max(input_limit_z - out_z, 0); + + if (in_size.size() == 3) { + std::swap(padding_begin_x, padding_begin_y); + std::swap(padding_end_x, padding_end_y); + } } // Adjust right padding, so entire buffer size in X dimension is properly aligned. @@ -292,7 +323,7 @@ cldnn::padding prepare_padding::get_needed_padding_for_convolution(convolution_n needed_padding = padding({0, 0, padding_begin_y, padding_begin_x}, {0, 0, padding_end_y, padding_end_x}, 0); else if (padding_begin.size() >= 1) needed_padding = padding({0, 0, padding_begin_x}, {0, 0, padding_end_x}, 0); - needed_padding = padding::max(prev_prim_output_layout.data_padding, needed_padding); + needed_padding = padding::max(output_layout.data_padding, needed_padding); return needed_padding; } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp index f63f1bf4efbe21..69d914084aa91e 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp @@ -134,7 +134,7 @@ void prepare_primitive_fusing_through::run(program& p) { if (node->is_type()) { auto out_shape = new_prev->get_output_layout().get_partial_shape(); // new_prev's layout became node's new layout after fusing auto in_shape = node->get_dependency(1).get_output_layout().get_partial_shape(); - if (!broadcastable(in_shape, out_shape, true, true)) + if (!broadcastable(in_shape, out_shape, true)) continue; } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp index e4725ace72441b..9ef9fc1412e86a 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp @@ -676,8 +676,7 @@ void remove_redundant_reorders::run(program& p) { // In case of new shape infer we should not shrink reshapes chain if first reshape changes input rank, e.g. // [a, b] -> reshape1 -> [a1, b1, c1] -> reshape2 -> [a2, b2, 0] and any of the reshapes has special_zero=true // Configuration above will fail if we remove reshape1 node as attempt to handle special zero will fail due to small rank of input - if (p.is_new_shape_infer() && - reshape_node.get_output_pshape().size() != dep_node.get_input_pshape().size() && + if (reshape_node.get_output_pshape().size() != dep_node.get_input_pshape().size() && (reshape_node.get_primitive()->special_zero || reshape_input_node.get_primitive()->special_zero)) continue; diff --git a/src/plugins/intel_gpu/src/graph/grid_sample.cpp b/src/plugins/intel_gpu/src/graph/grid_sample.cpp index b800aa8d470773..d027cd4750a379 100644 --- a/src/plugins/intel_gpu/src/graph/grid_sample.cpp +++ b/src/plugins/intel_gpu/src/graph/grid_sample.cpp @@ -12,20 +12,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(grid_sample) -layout grid_sample_inst::calc_output_layout(const grid_sample_node& node, const kernel_impl_params& impl_param) { - const auto data_layout = impl_param.get_input_layout(); - const auto data_sizes = data_layout.get_dims(); - const auto& N = data_sizes[0]; - const auto& C = data_sizes[1]; - - const auto grid_layout = impl_param.get_input_layout(1); - const auto grid_sizes = grid_layout.get_dims(); - const auto& H = grid_sizes[1]; - const auto& W = grid_sizes[2]; - - return {data_layout.data_type, data_layout.format, tensor(data_layout.format, {N, C, H, W})}; -} - template std::vector grid_sample_inst::calc_output_layouts(grid_sample_node const& /*node*/, const kernel_impl_params& impl_param) { auto prim = impl_param.typed_desc(); diff --git a/src/plugins/intel_gpu/src/graph/grn.cpp b/src/plugins/intel_gpu/src/graph/grn.cpp index 8ee2ff6720a640..110549e33daff0 100644 --- a/src/plugins/intel_gpu/src/graph/grn.cpp +++ b/src/plugins/intel_gpu/src/graph/grn.cpp @@ -10,13 +10,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(grn) -layout grn_inst::calc_output_layout(grn_node const& node, kernel_impl_params const& impl_param) { - auto input_node_layout = impl_param.get_non_padded_input_layout(); - auto output_type = impl_param.desc->output_data_types[0].value_or(input_node_layout.data_type); - - return layout(output_type, input_node_layout.format, input_node_layout.get_tensor()); -} - std::string grn_inst::to_string(grn_node const& node) { auto node_info = node.desc_to_json(); auto desc = node.get_primitive(); diff --git a/src/plugins/intel_gpu/src/graph/group_normalization.cpp b/src/plugins/intel_gpu/src/graph/group_normalization.cpp index d9d359f339bcac..eab2230718dc54 100644 --- a/src/plugins/intel_gpu/src/graph/group_normalization.cpp +++ b/src/plugins/intel_gpu/src/graph/group_normalization.cpp @@ -9,18 +9,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(group_normalization) -layout group_normalization_inst::calc_output_layout(group_normalization_node const& node, kernel_impl_params const& impl_param) { - assert(static_cast(impl_param.desc->output_data_types[0]) == false && - "Output data type forcing is not supported for group_normalization_node!"); - auto input_node_layout = impl_param.get_non_padded_input_layout(); - auto output_type = impl_param.desc->output_data_types[0].value_or(input_node_layout.data_type); - - if (impl_param.has_fused_primitives()) - output_type = impl_param.get_output_element_type(); - - return layout(output_type, input_node_layout.format, input_node_layout.get_tensor()); -} - std::string group_normalization_inst::to_string(group_normalization_node const& node) { auto desc = node.get_primitive(); auto node_info = node.desc_to_json(); diff --git a/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp b/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp index 3523b420d221ae..e9383a8a61be0b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp @@ -4,7 +4,6 @@ #include "loop_inst.h" #include "impls/registry/implementation_map.hpp" #include "register.hpp" -#include "mutable_data_inst.h" #include "input_layout_inst.h" #include #include diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp index 4783159d501404..7b8c89829254ea 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/non_max_suppression.cpp @@ -363,18 +363,6 @@ void run(non_max_suppression_inst& instance) { size_t output_size = instance.get_impl_params()->output_layouts[0].batch(); - // Legacy APIs using mutable inputs for multiple outputs - if (instance.has_third_output()) { - store_third_output(stream, instance.third_output_mem(), result); - } - - if (instance.has_second_output()) { - store_second_output(stream, instance.second_output_mem(), result, output_size); - store_first_output(stream, instance.output_memory_ptr(), result, output_size); - return; - } - - // New API for mutiple outputs support if (instance.outputs_memory_count() == 3) store_third_output(stream, instance.output_memory_ptr(2), result); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/adaptive_pooling.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/adaptive_pooling.cpp index 9f8f3ecfa5b7e2..8faddbc53c80d1 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/adaptive_pooling.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/adaptive_pooling.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "impls/ocl/kernel_selector_helper.h" #include "primitive_base.hpp" #include "adaptive_pooling_inst.h" @@ -22,19 +23,6 @@ struct adaptive_pooling_impl : public typed_primitive_impl_ocl return make_unique(*this); } -protected: - kernel_arguments_data get_arguments(const typed_primitive_inst& instance) const override { - kernel_arguments_data args = parent::get_arguments(instance); - auto desc = instance.get_typed_desc(); - - // Legacy multi-output - if (desc->num_outputs == 1 && desc->mode == adaptive_pooling_mode::max) { - args.outputs.push_back(instance.dep_memory_ptr(2)); - } - - return args; - } - public: static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) { const auto& primitive = impl_param.typed_desc(); @@ -46,12 +34,7 @@ struct adaptive_pooling_impl : public typed_primitive_impl_ocl params.mode = kernel_selector::PoolType::MAX; params.poolIndexElementType = to_data_type(primitive->index_element_type); params.outputs_num = 2; - if (primitive->num_outputs == 2) { - params.outputs.push_back(convert_data_tensor(impl_param.get_output_layout(1))); - } else { - // Legacy multi-output - params.outputs.push_back(convert_data_tensor(impl_param.get_input_layout(2))); - } + params.outputs.push_back(convert_data_tensor(impl_param.get_output_layout(1))); } return params; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp index a1a869f131b3f4..acef46133fd317 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp @@ -54,17 +54,6 @@ struct arg_max_min_impl : typed_primitive_impl_ocl { } } -protected: - kernel_arguments_data get_arguments(const typed_primitive_inst& instance) const override { - kernel_arguments_data args = parent::get_arguments(instance); - - // Legacy multi-output - if (instance.get_typed_desc()->has_second_output()) { - args.outputs.push_back(instance.dep_memory_ptr(instance.dependencies().size() - 1)); - } - - return args; - } public: static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { @@ -75,7 +64,7 @@ struct arg_max_min_impl : typed_primitive_impl_ocl { const auto& sort_type = primitive->sort; const auto& values_first = primitive->values_first; const auto& stable = primitive->stable; - const auto& outputs_num = primitive->input_size() == 3 ? 2 : static_cast(primitive->output_size()); + const auto& outputs_num = static_cast(primitive->output_size()); auto argm_params = get_default_params(impl_param, is_shape_agnostic); @@ -103,13 +92,8 @@ struct arg_max_min_impl : typed_primitive_impl_ocl { else argm_params.argMaxMinSortType = kernel_selector::argm_sort::INDEX; - if (outputs_num == 2) { // for backward compatibility - if (primitive->input_size() != 3) { - argm_params.outputs.push_back(convert_data_tensor(impl_param.get_output_layout(1))); - } else { - // Legacy multi-output - argm_params.outputs.push_back(convert_data_tensor(impl_param.get_input_layout(2))); - } + if (outputs_num == 2) { + argm_params.outputs.push_back(convert_data_tensor(impl_param.get_output_layout(1))); } argm_params.values_first = values_first; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/batch_to_space.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/batch_to_space.cpp index 87e73704b7e7cd..88ba6cbbed563e 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/batch_to_space.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/batch_to_space.cpp @@ -25,15 +25,16 @@ struct batch_to_space_impl : typed_primitive_impl_ocl { const auto& primitive = impl_param.typed_desc(); auto params = get_default_params(impl_param); + auto out_rank = impl_param.output_layouts[0].get_rank(); if (primitive->shape_constant) { params.block_type = kernel_selector::base_params::ArgType::Constant; - params.block_shape = convert_dim_vector(primitive->block_shape); + params.block_shape = convert_vec_to_dim_tensor(primitive->block_shape, out_rank, 1); params.begin_type = kernel_selector::base_params::ArgType::Constant; - params.crops_begin = convert_dim_vector(primitive->crops_begin); + params.crops_begin = convert_vec_to_dim_tensor(primitive->crops_begin, out_rank, 0); params.end_type = kernel_selector::base_params::ArgType::Constant; - params.crops_end = convert_dim_vector(primitive->crops_end); + params.crops_end = convert_vec_to_dim_tensor(primitive->crops_end, out_rank, 0); } else { params.block_input_index = 1; params.block_type = kernel_selector::base_params::ArgType::Input; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp index a1cb339ce7cae0..ace9cce7e96b99 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp @@ -39,19 +39,9 @@ struct broadcast_impl : typed_primitive_impl_ocl { const auto format = impl_param.get_output_layout().format; size_t max_axes_num = format.dimension(); - const auto& broadcast_axes = primitive->broadcast_axes; - uint16_t index = (uint16_t)0; - uint16_t input_index = (uint16_t)broadcast_axes.size(); - // bfyx, bfzyx format for (size_t i = 0; i < max_axes_num; ++i) { - if (std::find(broadcast_axes.begin(), broadcast_axes.end(), i) != broadcast_axes.end()) { - params.input_order.push_back(index); - ++index; - } else { - params.input_order.push_back(input_index); - ++input_index; - } + params.input_order.push_back(i); } return params; @@ -67,57 +57,24 @@ struct broadcast_impl : typed_primitive_impl_ocl { auto input_pshape = i_layout.get_partial_shape(); auto output_pshape = o_layout.get_partial_shape(); - auto output_rank = output_pshape.size(); + auto new_in_shape = output_pshape; if (primitive->axes_mapping.empty()) { - bool use_new_shape_infer = impl_params.prog->is_new_shape_infer(); - if (!broadcastable(input_pshape, output_pshape, use_new_shape_infer)) { - input_pshape = extend_shape_to_rank_from_begin(input_pshape, output_pshape.size()); + if (!broadcastable(input_pshape, output_pshape)) { + new_in_shape = extend_shape_to_rank_from_begin(input_pshape, output_pshape.size()); } else { - input_pshape = extend_shape_to_rank_from_end(input_pshape, output_pshape.size()); + new_in_shape = extend_shape_to_rank_from_end(input_pshape, output_pshape.size()); } } else { - if (i_layout.is_static() && o_layout.is_static()) { - // If axis_mapping is specified, then ones are inserted according to it. - ov::Shape tmp_shape; - int prev_axis = -1; - int next_axis = -1; - size_t currentRank = 0; - int axe_idx = 0; - for (auto& axis : primitive->axes_mapping) { - prev_axis = next_axis; - next_axis = static_cast(axis); - - int ones_count = std::max(next_axis - prev_axis - 1, 0); - tmp_shape.insert(tmp_shape.begin() + currentRank, ones_count, 1ul); - tmp_shape.push_back(input_pshape[axe_idx].get_length()); // Consider the Broadcast kernel 'broadcast' input to output shape - - currentRank += ones_count + 1; - axe_idx += 1; - } - input_pshape = extend_shape_to_rank_from_end(tmp_shape, output_rank); - } else { - // dynamic input - // insert 1 to extend dimensions by axes_mapping - ov::Shape tmp_shape; - size_t idx = 0; - for (auto& axis : primitive->axes_mapping) { - if (idx == axis) { - tmp_shape.insert(tmp_shape.begin() + idx, 1, -1); - idx += 1; - } else { - tmp_shape.insert(tmp_shape.begin() + idx, axis - idx, 1); - idx = axis; - tmp_shape.insert(tmp_shape.begin() + idx, 1, -1); - idx += 1; - } + for (size_t i = 0; i < new_in_shape.size(); i++) { + if (primitive->axes_mapping.find(i) == primitive->axes_mapping.end()) { + new_in_shape[i] = 1; } - input_pshape = extend_shape_to_rank_from_end(tmp_shape, output_rank); } } - updated_impl_params.input_layouts[0].set_partial_shape(extend_shape_to_rank_from_end(input_pshape)); - updated_impl_params.input_layouts[0].format = format::adjust_to_rank(i_layout.format, input_pshape.size()); + updated_impl_params.input_layouts[0].set_partial_shape(extend_shape_to_rank_from_end(new_in_shape)); + updated_impl_params.input_layouts[0].format = format::adjust_to_rank(i_layout.format, new_in_shape.size()); updated_impl_params.output_layouts[0].set_partial_shape(extend_shape_to_rank_from_end(output_pshape)); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/ctc_greedy_decoder.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/ctc_greedy_decoder.cpp index 377c863b534f1e..d9946ec0c0a37a 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/ctc_greedy_decoder.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/ctc_greedy_decoder.cpp @@ -19,17 +19,6 @@ struct ctc_greedy_decoder_impl : typed_primitive_impl_ocl { DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::ocl::ctc_greedy_decoder_impl) -protected: - kernel_arguments_data get_arguments(const ctc_greedy_decoder_inst& instance) const override { - kernel_arguments_data args = parent::get_arguments(instance); - // Legacy multi-output - if (instance.desc()->num_outputs == 1) { - args.outputs.push_back(instance.dep_memory_ptr(instance.desc()->input_size() - 1)); - } - - return args; - } - public: std::unique_ptr clone() const override { return make_unique(*this); @@ -39,7 +28,6 @@ struct ctc_greedy_decoder_impl : typed_primitive_impl_ocl { const auto& primitive = impl_param.typed_desc(); auto params = get_default_params(impl_param); - auto has_second_output = !primitive->second_output.empty(); params.inputs.push_back(convert_data_tensor(impl_param.input_layouts[1])); params.merge_repeated = primitive->ctc_merge_repeated; if (primitive->blank_index == UINT32_MAX) { @@ -48,17 +36,14 @@ struct ctc_greedy_decoder_impl : typed_primitive_impl_ocl { params.blank_index = primitive->blank_index; } + if (primitive->blank_index == UINT32_MAX) { + params.blank_index = impl_param.get_input_layout(0).get_partial_shape()[2].get_length() - 1; + } else { + params.blank_index = primitive->blank_index; + } if (primitive->num_outputs == 2) { params.outputs_num = 2; params.outputs.push_back(convert_data_tensor(impl_param.get_output_layout(1))); - - } else { - // Legacy multi-output - params.outputs_num = has_second_output ? 2 : 1; - - if (params.outputs_num == 2) { - params.outputs.push_back(convert_data_tensor(impl_param.get_input_layout(1))); - } } return params; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/dft.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/dft.cpp index 071c5e466a2d8f..517488443330e9 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/dft.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/dft.cpp @@ -27,8 +27,7 @@ struct dft_impl : typed_primitive_impl_ocl { auto params = get_default_params(impl_param); auto& memory_deps = impl_param.memory_deps; - bool allow_new_shape_infer = impl_param.get_program().is_new_shape_infer(); - if (allow_new_shape_infer && primitive->axes.empty() && primitive->signal_size.empty()) { + if (primitive->axes.empty() && primitive->signal_size.empty()) { if (memory_deps.count(1)) { auto axes_mem = memory_deps.at(1); cldnn::mem_lock axes_lock(axes_mem, impl_param.get_stream()); @@ -83,10 +82,10 @@ struct dft_impl : typed_primitive_impl_ocl { const auto output_layout = impl_param.get_output_layout(); // No need to extend layout for input that has less than 4 dimensions if (input_layout.get_rank() != output_layout.get_rank()) { - auto new_dims = input_layout.get_dims(); + auto new_dims = input_layout.get_partial_shape(); new_dims.push_back(1); const auto new_fmt = format::adjust_to_rank(input_layout.format, new_dims.size()); - params.inputs[0] = convert_data_tensor({input_layout.data_type, new_fmt, tensor(new_fmt, new_dims)}); + params.inputs[0] = convert_data_tensor({new_dims, input_layout.data_type, new_fmt}); } } @@ -96,15 +95,43 @@ struct dft_impl : typed_primitive_impl_ocl { const auto output_layout = impl_param.get_output_layout(); // No need to extend layout for output that has less than 4 dimensions if (input_layout.get_rank() != output_layout.get_rank()) { - auto new_dims = output_layout.get_dims(); + auto new_dims = output_layout.get_partial_shape(); new_dims.push_back(1); const auto new_fmt = format::adjust_to_rank(output_layout.format, new_dims.size()); - params.outputs[0] = convert_data_tensor({output_layout.data_type, new_fmt, tensor(new_fmt, new_dims)}); + params.outputs[0] = convert_data_tensor({new_dims, output_layout.data_type, new_fmt}); } } return params; } + + static kernel_impl_params static_canonicalize_shapes(const kernel_impl_params& impl_params) { + auto updated_impl_params = canonicalize_fused_shapes(impl_params); + auto primitive = impl_params.typed_desc(); + + for (auto& input_layout : updated_impl_params.input_layouts) { + input_layout.set_partial_shape(extend_shape_to_rank_from_end(input_layout.get_partial_shape())); + } + + auto& output_layout = updated_impl_params.output_layouts[0]; + auto output_shape = output_layout.get_partial_shape(); + // Extend shape to 4d by pushing ones at the end (needed to support less than 4d cases) + for (auto i = output_shape.size(); i < 4; ++i) { + auto it = output_shape.end(); + // For IRDFT push ones at the end, for other DTFs push ones before the last dim + if (primitive->direction != dft_direction::inverse || primitive->mode != dft_mode::real) { + it = std::prev(it); + } + output_shape.insert(it, 1); + } + output_layout.set_partial_shape(output_shape); + + return updated_impl_params; + } + + kernel_impl_params canonicalize_shapes(const kernel_impl_params& impl_params) const override { + return dft_impl::static_canonicalize_shapes(impl_params); + } }; namespace detail { diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/eltwise.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/eltwise.cpp index c3772d922e3c26..5b113785058f87 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/eltwise.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/eltwise.cpp @@ -129,7 +129,6 @@ struct eltwise_impl : typed_primitive_impl_ocl { static kernel_impl_params static_canonicalize_shapes(const kernel_impl_params& impl_params) { auto updated_impl_params = canonicalize_fused_shapes(impl_params); - bool use_new_shape_infer = impl_params.prog->is_new_shape_infer(); auto& output_layout = updated_impl_params.output_layouts[0]; auto out_pshape = output_layout.get_partial_shape(); @@ -137,7 +136,7 @@ struct eltwise_impl : typed_primitive_impl_ocl { for (auto& input_layout : updated_impl_params.input_layouts) { auto input_pshape = input_layout.get_partial_shape(); - if (!broadcastable(input_pshape, out_pshape, use_new_shape_infer)) { + if (!broadcastable(input_pshape, out_pshape)) { input_pshape = extend_shape_to_rank_from_begin(input_pshape, out_pshape.size()); } input_layout.set_partial_shape(extend_shape_to_rank_from_end(input_pshape)); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/embedding_bag.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/embedding_bag.cpp index 43ed52e50b81b3..3bc53327b5fbfb 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/embedding_bag.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/embedding_bag.cpp @@ -10,6 +10,34 @@ namespace cldnn { namespace ocl { +namespace { +std::vector get_kernel_arg_indices(size_t num_inputs, embedding_bag::embedding_bag_type type) { + std::vector input_idx = {0, 1}; // common input indices + switch (type) { + case embedding_bag::packed_sum: { + if (num_inputs == 3) { + input_idx.push_back(2); // optional per_sample_weights + } + break; + } + case embedding_bag::offsets_sum: { + input_idx.push_back(2); // offsets + if (num_inputs == 5) { + input_idx.push_back(4); // optional per_sample_weights + } + break; + } + case embedding_bag::segments_sum: + input_idx.push_back(2); // segment_ids + if (num_inputs == 6) { + input_idx.push_back(5); // optional per_sample_weights + } + break; + } + return input_idx; +} + +} // namespace struct embedding_bag_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; @@ -22,12 +50,22 @@ struct embedding_bag_impl : typed_primitive_impl_ocl { return make_unique(*this); } + kernel_arguments_data get_arguments(const typed_primitive_inst& instance) const override { + kernel_arguments_data args = parent::get_arguments(instance); + args.inputs.clear(); + auto primitive = instance.get_typed_desc(); + + auto input_idx = get_kernel_arg_indices(primitive->input_size(), primitive->type); + for (size_t i = 0; i < input_idx.size(); i++) { + args.inputs.push_back(instance.input_memory_ptr(input_idx[i])); + } + + return args; + } + static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) { const auto& primitive = impl_param.typed_desc(); auto params = get_default_params(impl_param); - - auto inputs_count = impl_param.input_layouts.size(); - switch (primitive->type) { case embedding_bag::packed_sum: params.type = kernel_selector::EmbeddingBagType::PACKED_SUM; @@ -41,8 +79,10 @@ struct embedding_bag_impl : typed_primitive_impl_ocl { default: OPENVINO_ASSERT(false, "[GPU] Unknown embedding_bag type in primitive ", primitive->id); } - for (size_t i = 1; i < inputs_count; i++) { - params.inputs.push_back(convert_data_tensor(impl_param.input_layouts[i])); + params.inputs.clear(); + auto input_idx = get_kernel_arg_indices(primitive->input_size(), primitive->type); + for (size_t i = 0; i < input_idx.size(); i++) { + params.inputs.push_back(convert_data_tensor(impl_param.input_layouts[input_idx[i]])); } params.default_index = primitive->default_index; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_detection_output.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_detection_output.cpp index df65ba8beea5c6..384643aa4f2f6b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_detection_output.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_detection_output.cpp @@ -23,17 +23,6 @@ struct experimental_detectron_detection_output_impl return make_unique(*this); } -protected: - kernel_arguments_data get_arguments(const typed_primitive_inst& instance) const override { - kernel_arguments_data args = parent::get_arguments(instance); - if (instance.desc()->num_outputs == 1) { - // Legacy multi-output - args.outputs.push_back(instance.output_classes_memory()); - args.outputs.push_back(instance.output_scores_memory()); - } - - return args; - } public: static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) { @@ -49,26 +38,14 @@ struct experimental_detectron_detection_output_impl params.class_agnostic_box_regression = primitive->class_agnostic_box_regression; params.deltas_weights = primitive->deltas_weights; - if (impl_param.prog->is_new_shape_infer()) { - const size_t num_inputs = primitive->input_size(); - for (size_t i = 1; i < num_inputs; i++) { - params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(i))); - } - - params.outputs.push_back(convert_data_tensor(impl_param.output_layouts[1])); - params.outputs.push_back(convert_data_tensor(impl_param.output_layouts[2])); - } else { - const size_t num_deps = primitive->input_size(); - OPENVINO_ASSERT(num_deps == 6, "Unexpected deps num: ", num_deps); - const size_t num_inputs = num_deps - 2; - for (size_t i = 1; i < num_inputs; i++) { - params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(i))); - } - for (size_t i = num_inputs; i < num_deps; i++) { - params.outputs.push_back(convert_data_tensor(impl_param.get_input_layout(i))); - } + const size_t num_inputs = primitive->input_size(); + for (size_t i = 1; i < num_inputs; i++) { + params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(i))); } + params.outputs.push_back(convert_data_tensor(impl_param.output_layouts[1])); + params.outputs.push_back(convert_data_tensor(impl_param.output_layouts[2])); + return params; } }; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_generate_proposals_single_image.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_generate_proposals_single_image.cpp index 6242b987369126..33d53322e7c4ac 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_generate_proposals_single_image.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_generate_proposals_single_image.cpp @@ -23,25 +23,6 @@ struct experimental_detectron_generate_proposals_single_image_impl return make_unique(*this); } -protected: - kernel_arguments_data get_arguments(const typed_primitive_inst& instance) const override { - kernel_arguments_data args; - if (instance.desc()->num_outputs == 1) { - const auto num_inputs = instance.inputs_memory_count(); - for (size_t i = 0; i < num_inputs; ++i) { - args.inputs.push_back(instance.input_memory_ptr(i)); - } - - args.outputs.push_back(instance.output_memory_ptr()); - //TODO: Future improvement: To add second output parameter only when it's needed - args.outputs.push_back(instance.output_roi_scores_memory()); - } else { - args = parent::get_arguments(instance); - } - - return args; - } - public: static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) { const auto& primitive = impl_param.typed_desc(); @@ -52,25 +33,13 @@ struct experimental_detectron_generate_proposals_single_image_impl params.pre_nms_count = primitive->pre_nms_count; params.post_nms_count = primitive->post_nms_count; - if (impl_param.prog->is_new_shape_infer()) { - const size_t num_inputs = primitive->input_size(); - for (size_t i = 1; i < num_inputs; i++) { - params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(i))); - } - - params.outputs.push_back(convert_data_tensor(impl_param.output_layouts[1])); - } else { - const size_t num_deps = primitive->input_size(); - OPENVINO_ASSERT(num_deps == 5, "Unexpected deps num: ", num_deps); - const size_t num_inputs = num_deps - 1; - for (size_t i = 1; i < num_inputs; i++) { - params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(i))); - } - for (size_t i = num_inputs; i < num_deps; i++) { - params.outputs.push_back(convert_data_tensor(impl_param.get_input_layout(i))); - } + const size_t num_inputs = primitive->input_size(); + for (size_t i = 1; i < num_inputs; i++) { + params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(i))); } + params.outputs.push_back(convert_data_tensor(impl_param.output_layouts[1])); + return params; } }; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_roi_feature_extractor.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_roi_feature_extractor.cpp index ae008547fe30b6..a15800c9a32008 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_roi_feature_extractor.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_roi_feature_extractor.cpp @@ -22,6 +22,7 @@ struct experimental_detectron_roi_feature_extractor_impl : public typed_primitiv return make_unique(*this); } +protected: event::ptr execute_impl(const std::vector& events, experimental_detectron_roi_feature_extractor_inst& instance) override { instance.copy_rois_input_to_second_output(); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp index d3acb9dd6a9b55..98cb73249973a0 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp @@ -80,7 +80,7 @@ struct fully_connected_impl : typed_primitive_impl_ocl { static kernel_impl_params update_impl_params(const kernel_impl_params& impl_param) { const auto& primitive = impl_param.typed_desc(); - auto get_fc_input_layouts = [primitive](const std::vector& input_layouts, bool allow_new_shape_infer) { + auto get_fc_input_layouts = [primitive](const std::vector& input_layouts) { auto reshape_to_2d = [](const ov::PartialShape& shape, const ov::Dimension& feature, size_t rank) { if (shape.is_static()) { auto static_shape = shape.to_shape(); @@ -98,10 +98,7 @@ struct fully_connected_impl : typed_primitive_impl_ocl { auto input0_pshape = input0_layout.get_partial_shape(); auto input1_pshape = input1_layout.get_partial_shape(); - ov::Dimension feature = input0_pshape[std::min(primitive->input_size, static_cast(4)) - 1ul]; - if (allow_new_shape_infer) { - feature = input0_pshape[primitive->input_size - 1ul]; - } + ov::Dimension feature = input0_pshape[primitive->input_size - 1ul]; // TO DO, to remove WA if (primitive->input_size > 3) { @@ -147,10 +144,9 @@ struct fully_connected_impl : typed_primitive_impl_ocl { return updated_out_layout; }; - bool allow_new_shape_infer = impl_param.get_program().is_new_shape_infer(); auto updated_impl_param = impl_param; - const auto input_layouts = get_fc_input_layouts(impl_param.input_layouts, allow_new_shape_infer); + const auto input_layouts = get_fc_input_layouts(impl_param.input_layouts); for (size_t i = 0; i < input_layouts.size(); ++i) { updated_impl_param.input_layouts[i] = input_layouts[i]; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/generate_proposals.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/generate_proposals.cpp index 244c77151a812d..4bd2c314c128e9 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/generate_proposals.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/generate_proposals.cpp @@ -23,17 +23,6 @@ struct generate_proposals_impl return make_unique(*this); } -protected: - kernel_arguments_data get_arguments(const typed_primitive_inst& instance) const override { - auto args = parent::get_arguments(instance); - if (instance.desc()->num_outputs == 1) { - // Legacy multi-output - args.outputs.push_back(instance.output_rois_scores_memory()); - args.outputs.push_back(instance.output_rois_nums_memory()); - } - return args; - } - public: static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) { const auto& primitive = impl_param.typed_desc(); @@ -45,29 +34,16 @@ struct generate_proposals_impl params.post_nms_count = primitive->attrs.post_nms_count; params.normalized = primitive->attrs.normalized; params.nms_eta = primitive->attrs.nms_eta; + params.roi_num_type = to_data_type(primitive->output_data_types[2].value()); - if (impl_param.prog-> is_new_shape_infer()) { - params.roi_num_type = to_data_type(primitive->output_data_types[2].value()); - const size_t num_inputs = primitive->input_size(); - for (size_t i = 1; i < num_inputs; i++) { - params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(i))); - } - - params.outputs.push_back(convert_data_tensor(impl_param.output_layouts[1])); - params.outputs.push_back(convert_data_tensor(impl_param.output_layouts[2])); - } else { - params.roi_num_type = primitive->roi_num_type == cldnn::data_types::i32 ? kernel_selector::Datatype::INT32 : kernel_selector::Datatype::INT64; - const size_t num_deps = primitive->input_size(); - OPENVINO_ASSERT(num_deps == 6, "Unexpected deps num: ", num_deps); - const size_t num_inputs = num_deps - 2; - for (size_t i = 1; i < num_inputs; i++) { - params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(i))); - } - for (size_t i = num_inputs; i < num_deps; i++) { - params.outputs.push_back(convert_data_tensor(impl_param.get_input_layout(i))); - } + const size_t num_inputs = primitive->input_size(); + for (size_t i = 1; i < num_inputs; i++) { + params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(i))); } + params.outputs.push_back(convert_data_tensor(impl_param.output_layouts[1])); + params.outputs.push_back(convert_data_tensor(impl_param.output_layouts[2])); + return params; } }; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.h b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.h index 3ddb5bf8793c29..4229cd57128ebb 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.h +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.h @@ -117,6 +117,17 @@ kernel_selector::dim_tensor convert_dim_vector(const tensor& t) { static_cast(sizes[5])}; } + +inline kernel_selector::DimTensor convert_vec_to_dim_tensor(const std::vector& p, size_t out_rank, int32_t default_value) { + auto sizes = p; + auto format = cldnn::format::get_default_format(out_rank); + for (size_t s = sizes.size(); s < format.dimension(); s++) { + sizes.push_back(default_value); + } + tensor block_shape(format, sizes, default_value); + return convert_dim_vector(block_shape); +} + std::shared_ptr convert_fuse_params(std::shared_ptr p); void convert_fused_ops_to_legacy_activations(const kernel_impl_params& param_info, std::vector& activations); bool use_legacy_fused_ops(const kernel_impl_params& param_info); @@ -233,8 +244,7 @@ inline ov::PartialShape extend_shape_to_rank_from_begin(const ov::PartialShape& return extended_pshape; } -inline bool broadcastable(const ov::PartialShape& first_pshape, const ov::PartialShape& second_pshape, bool use_new_shape_infer, - bool first_to_second_only = false) { +inline bool broadcastable(const ov::PartialShape& first_pshape, const ov::PartialShape& second_pshape, bool first_to_second_only = false) { if (first_pshape.is_dynamic() || second_pshape.is_dynamic()) { return false; } @@ -243,7 +253,7 @@ inline bool broadcastable(const ov::PartialShape& first_pshape, const ov::Partia return false; } } else { - if (first_pshape.size() != second_pshape.size() && use_new_shape_infer) { + if (first_pshape.size() != second_pshape.size()) { return false; } } @@ -259,7 +269,6 @@ inline bool broadcastable(const ov::PartialShape& first_pshape, const ov::Partia inline kernel_impl_params canonicalize_fused_shapes(const kernel_impl_params& impl_params) { auto updated_impl_params = impl_params; - bool use_new_shape_infer = impl_params.prog->is_new_shape_infer(); for (auto& fd : updated_impl_params.fused_desc) { if (fd.is_type() && fd.total_num_deps == 2 && fd.has_outer_dep()) { @@ -269,7 +278,7 @@ inline kernel_impl_params canonicalize_fused_shapes(const kernel_impl_params& im auto& dep_layout = updated_impl_params.input_layouts[fd.outer_dep_start_idx]; const auto& dep_shape = dep_layout.get_partial_shape(); - if (!broadcastable(dep_shape, out_pshape, use_new_shape_infer)) { + if (!broadcastable(dep_shape, out_pshape)) { dep_layout.set_partial_shape(extend_shape_to_rank_from_begin(dep_shape, out_pshape.size())); } } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/matrix_nms.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/matrix_nms.cpp index 4e6a32ace1d8ef..3cc16f2fde4911 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/matrix_nms.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/matrix_nms.cpp @@ -47,33 +47,14 @@ struct matrix_nms_impl : typed_primitive_impl_ocl { return make_unique(*this); } -protected: - kernel_arguments_data get_arguments(const matrix_nms_inst& instance) const override { - kernel_arguments_data args = parent::get_arguments(instance); - // Legacy multi-output - if (instance.desc()->num_outputs == 1) { - args.outputs.push_back(instance.input_selected_boxes_mem()); - args.outputs.push_back(instance.input_valid_outputs_mem()); - } - - return args; - } - public: static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) { const auto& primitive = impl_param.typed_desc(); auto params = get_default_params(impl_param); params.inputs.push_back(convert_data_tensor(impl_param.input_layouts[1])); - - if (primitive->num_outputs == 3) { - params.outputs.push_back(convert_data_tensor(impl_param.output_layouts[1])); - params.outputs.push_back(convert_data_tensor(impl_param.output_layouts[2])); - } else { - // Legacy multi-output - params.outputs.push_back(convert_data_tensor(impl_param.get_input_layout(2))); - params.outputs.push_back(convert_data_tensor(impl_param.get_input_layout(3))); - } + params.outputs.push_back(convert_data_tensor(impl_param.output_layouts[1])); + params.outputs.push_back(convert_data_tensor(impl_param.output_layouts[2])); params.sort_type = from(primitive->attribs.sort_result_type); params.sort_result_across_batch = primitive->attribs.sort_result_across_batch; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/multiclass_nms.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/multiclass_nms.cpp index c07bf9dac81daa..3907f6019a58fa 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/multiclass_nms.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/multiclass_nms.cpp @@ -43,17 +43,6 @@ struct multiclass_nms_impl : public typed_primitive_impl_ocl { return make_unique(*this); } -protected: - kernel_arguments_data get_arguments(const typed_primitive_inst& instance) const override { - kernel_arguments_data args = parent::get_arguments(instance); - // Legacy multi-output - if (instance.desc()->num_outputs == 1) { - args.outputs.push_back(instance.output_indices_memory()); - args.outputs.push_back(instance.output_num_memory()); - } - return args; - } - public: static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) { const auto& primitive = impl_param.typed_desc(); @@ -70,24 +59,14 @@ struct multiclass_nms_impl : public typed_primitive_impl_ocl { params.background_class = attrs.background_class; params.normalized = attrs.normalized; params.nms_eta = attrs.nms_eta; - params.has_roisnum = primitive->has_roisnum; - - size_t inputs_num = primitive->has_roisnum ? 3 : 2; params.inputs.push_back(convert_data_tensor(impl_param.input_layouts[1])); - if (inputs_num == 3) { + if (primitive->input_size() == 3) { params.inputs.push_back(convert_data_tensor(impl_param.input_layouts[2])); params.has_roisnum = true; } - - if (primitive->num_outputs == 3) { - params.outputs.push_back(convert_data_tensor(impl_param.output_layouts[1])); - params.outputs.push_back(convert_data_tensor(impl_param.output_layouts[2])); - } else { - // Legacy multi-output - params.outputs.push_back(convert_data_tensor(impl_param.input_layouts[inputs_num + 0])); - params.outputs.push_back(convert_data_tensor(impl_param.input_layouts[inputs_num + 1])); - } + params.outputs.push_back(convert_data_tensor(impl_param.output_layouts[1])); + params.outputs.push_back(convert_data_tensor(impl_param.output_layouts[2])); return params; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/non_max_suppression.cpp index 65bfa94173bf11..61fdad39a67841 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/non_max_suppression.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/non_max_suppression.cpp @@ -46,15 +46,9 @@ struct non_max_suppression_impl : typed_primitive_impl_ocl args.inputs.push_back(instance.soft_nms_sigma_mem()); } - // New API for mutiple outputs support for (size_t i = 0; i < instance.outputs_memory_count(); i++) { args.outputs.push_back(instance.output_memory_ptr(i)); } - // // Legacy multi-output - if (instance.has_second_output()) - args.outputs.push_back(instance.second_output_mem()); - if (instance.has_third_output()) - args.outputs.push_back(instance.third_output_mem()); return args; } @@ -112,29 +106,8 @@ static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, b } } - auto get_additional_output_node_idx = [&] (bool is_third) { - size_t offset = 2; - offset += arg.has_num_select_per_class(); - offset += arg.has_iou_threshold(); - offset += arg.has_score_threshold(); - offset += arg.has_soft_nms_sigma(); - if (is_third) - offset += arg.has_second_output(); - return offset; - }; - - // Legacy multi-output - if (arg.has_second_output()) { - params.outputs.push_back(convert_data_tensor(impl_param.input_layouts[get_additional_output_node_idx(false)])); - } - - if (arg.has_third_output()) { - params.outputs.push_back(convert_data_tensor(impl_param.input_layouts[get_additional_output_node_idx(true)])); - } - - if (arg.use_multiple_outputs()) { - params.outputs.push_back(convert_data_tensor(impl_param.output_layouts[1])); - params.outputs.push_back(convert_data_tensor(impl_param.output_layouts[2])); + for (size_t i = 1; i < primitive->num_outputs; i++) { + params.outputs.push_back(convert_data_tensor(impl_param.output_layouts[i])); } params.sort_result_descending = primitive->sort_result_descending; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/pooling.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/pooling.cpp index d9496db3377915..63a12cdbd6f0d0 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/pooling.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/pooling.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "impls/ocl/kernel_selector_helper.h" #include "openvino/core/validation_util.hpp" #include "pooling/pooling_kernel_base.h" #include "pooling/pooling_kernel_selector.h" @@ -54,17 +55,6 @@ struct pooling_impl : typed_primitive_impl_ocl { return make_unique(*this); } -protected: - kernel_arguments_data get_arguments(const typed_primitive_inst& instance) const override { - kernel_arguments_data args = parent::get_arguments(instance); - // Legacy multi-output - if (instance.get_typed_desc()->maxPoolOpset8Features) { - args.inputs = { instance.dep_memory_ptr(0) }; - args.outputs.push_back(instance.dep_memory_ptr(1)); - } - return args; - } - public: static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) { const auto& primitive = impl_param.typed_desc(); @@ -72,19 +62,12 @@ struct pooling_impl : typed_primitive_impl_ocl { params.maxPoolOpset8Features = primitive->maxPoolOpset8Features; if (params.maxPoolOpset8Features) { - switch (primitive->index_element_type) { - case cldnn::data_types::i32: { - params.poolIndexElementType = kernel_selector::Datatype::INT32; - break; - } - case cldnn::data_types::i64: { - params.poolIndexElementType = kernel_selector::Datatype::INT64; - break; - } - default: - throw std::runtime_error{"Not supported index element type"}; - } + params.poolIndexElementType = to_data_type(primitive->index_element_type); params.poolAxis = primitive->axis; + + if (primitive->num_outputs == 2) { + params.outputs.push_back(convert_data_tensor(impl_param.get_output_layout(1))); + } } const auto& input_layout = impl_param.get_input_layout(); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/reverse.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/reverse.cpp index aee736b6871299..e925f4103e3956 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/reverse.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/reverse.cpp @@ -28,8 +28,8 @@ struct reverse_impl : typed_primitive_impl_ocl { auto params = get_default_params(impl_param); params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(1))); - params.reverseMode = primitive->mode == reverse_mode::index ? kernel_selector::reverse_mode::index - : kernel_selector::reverse_mode::mask; + params.reverseMode = primitive->mode == ov::op::v1::Reverse::Mode::INDEX ? kernel_selector::reverse_mode::index + : kernel_selector::reverse_mode::mask; return params; } }; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/roll.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/roll.cpp index 85cff366ac18bf..d5fb1dbb6cc569 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/roll.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/roll.cpp @@ -26,44 +26,38 @@ struct roll_impl : typed_primitive_impl_ocl { static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) { const auto& primitive = impl_param.typed_desc(); auto params = get_default_params(impl_param); - - if ((primitive->raw_shift.empty()) && (primitive->raw_axes.empty())) { - // Primitive created with static shape input - params.shift = convert_dim_vector(primitive->shift); - } else { - // Primitive created with dynamic shape input - const auto input_layout = impl_param.get_input_layout(0); - const auto& input_shape = input_layout.get_shape(); - const auto rank = static_cast(input_layout.get_rank()); - const auto format = cldnn::format::get_default_format(rank); - const auto default_rank = format.dimension(); - auto axes_raw = primitive->raw_axes; - auto shift_raw = primitive->raw_shift; - - // Normalize axes and sum shift - std::vector shift(default_rank); - for (size_t a = 0; a < axes_raw.size(); ++a) { - auto& axis = axes_raw[a]; - if (axis < 0) { - axis += rank; - } - if (axis < 0 || axis >= rank) { - OPENVINO_THROW(" Incorrect axis value: ", axis); - } - shift[axis] += shift_raw[a]; + // Primitive created with dynamic shape input + const auto input_layout = impl_param.get_input_layout(0); + const auto& input_shape = input_layout.get_shape(); + const auto rank = static_cast(input_layout.get_rank()); + const auto format = cldnn::format::get_default_format(rank); + const auto default_rank = format.dimension(); + auto axes_raw = primitive->raw_axes; + auto shift_raw = primitive->raw_shift; + + // Normalize axes and sum shift + std::vector shift(default_rank); + for (size_t a = 0; a < axes_raw.size(); ++a) { + auto& axis = axes_raw[a]; + if (axis < 0) { + axis += rank; + } + if (axis < 0 || axis >= rank) { + OPENVINO_THROW(" Incorrect axis value: ", axis); } + shift[axis] += shift_raw[a]; + } - // Normalize shift - for (int s = 0; s < rank; ++s) { - auto& sh = shift[s]; - const auto dim = static_cast(input_shape[s]); - sh %= dim; - if (sh < 0) { - sh += dim; - } + // Normalize shift + for (int s = 0; s < rank; ++s) { + auto& sh = shift[s]; + const auto dim = static_cast(input_shape[s]); + sh %= dim; + if (sh < 0) { + sh += dim; } - params.shift = convert_dim_vector({format, shift}); } + params.shift = convert_vec_to_dim_tensor(shift, input_layout.get_rank(), 0); return params; } }; @@ -90,13 +84,7 @@ attach_roll_impl::attach_roll_impl() { format::bfwzyx }; - std::set> keys; - for (const auto& t : types) { - for (const auto& f : formats) { - keys.emplace(t, f); - } - } - implementation_map::add(impl_types::ocl, typed_primitive_impl_ocl::create, keys); + implementation_map::add(impl_types::ocl, typed_primitive_impl_ocl::create, types, formats); } } // namespace detail diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/space_to_batch.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/space_to_batch.cpp index 75568cc8984bd3..a83e2514cbb106 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/space_to_batch.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/space_to_batch.cpp @@ -10,6 +10,7 @@ namespace cldnn { namespace ocl { + struct space_to_batch_impl : typed_primitive_impl_ocl { using parent = typed_primitive_impl_ocl; using parent::parent; @@ -25,16 +26,17 @@ struct space_to_batch_impl : typed_primitive_impl_ocl { static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { const auto& primitive = impl_param.typed_desc(); auto params = get_default_params(impl_param); + auto out_rank = impl_param.output_layouts[0].get_rank(); if (primitive->shape_constant) { params.block_type = kernel_selector::base_params::ArgType::Constant; - params.block_shape = convert_dim_vector(primitive->block_shape); + params.block_shape = convert_vec_to_dim_tensor(primitive->block_shape, out_rank, 1); params.begin_type = kernel_selector::base_params::ArgType::Constant; - params.pads_begin = convert_dim_vector(primitive->pads_begin); + params.pads_begin = convert_vec_to_dim_tensor(primitive->pads_begin, out_rank, 0); params.end_type = kernel_selector::base_params::ArgType::Constant; - params.pads_end = convert_dim_vector(primitive->pads_end); + params.pads_end = convert_vec_to_dim_tensor(primitive->pads_end, out_rank, 0); } else { params.block_input_index = 1; params.block_type = kernel_selector::base_params::ArgType::Input; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h index 57fd4afbe933d6..8d9b88e5a0c652 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h @@ -353,10 +353,6 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { void build_primitive(const ExecutionConfig& config) { auto cache_outpath = get_cache_directory(config); - if (!config.get_property(ov::intel_gpu::allow_new_shape_infer)) { - cache_outpath = ""; - } - if (cache_outpath.empty()) { _prim = PrimType(_pd); } else { diff --git a/src/plugins/intel_gpu/src/graph/include/activation_inst.h b/src/plugins/intel_gpu/src/graph/include/activation_inst.h index 83c89ea6fe023d..2440b18968638e 100644 --- a/src/plugins/intel_gpu/src/graph/include/activation_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/activation_inst.h @@ -54,7 +54,6 @@ class typed_primitive_inst : public typed_primitive_inst_base(impl_param); } - static layout calc_output_layout(activation_node const& node, kernel_impl_params const& impl_param); static std::string to_string(activation_node const& node); typed_primitive_inst(network& network, activation_node const& node); diff --git a/src/plugins/intel_gpu/src/graph/include/adaptive_pooling_inst.h b/src/plugins/intel_gpu/src/graph/include/adaptive_pooling_inst.h index 59ac50aa567ab0..2fc9deecd29f14 100644 --- a/src/plugins/intel_gpu/src/graph/include/adaptive_pooling_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/adaptive_pooling_inst.h @@ -29,7 +29,6 @@ class typed_primitive_inst : public typed_primitive_inst_base< public: template static std::vector calc_output_layouts(adaptive_pooling_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(adaptive_pooling_node const& node, kernel_impl_params const& impl_param); static std::string to_string(adaptive_pooling_node const& node); typed_primitive_inst(network& network, adaptive_pooling_node const& node); diff --git a/src/plugins/intel_gpu/src/graph/include/arg_max_min_inst.h b/src/plugins/intel_gpu/src/graph/include/arg_max_min_inst.h index e7a19946e96021..90f45f8fb94e91 100644 --- a/src/plugins/intel_gpu/src/graph/include/arg_max_min_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/arg_max_min_inst.h @@ -32,7 +32,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(arg_max_min_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(arg_max_min_node const& node, kernel_impl_params const& impl_param); static std::string to_string(arg_max_min_node const& node); public: diff --git a/src/plugins/intel_gpu/src/graph/include/assign_inst.h b/src/plugins/intel_gpu/src/graph/include/assign_inst.h index 0c92345e173e0d..80a3ea173e645d 100644 --- a/src/plugins/intel_gpu/src/graph/include/assign_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/assign_inst.h @@ -34,8 +34,6 @@ class typed_primitive_inst : public typed_primitive_inst_base, p return forward_input0_shape(impl_param); } - static layout calc_output_layout(const assign_node& node, kernel_impl_params const& impl_param); - static std::string to_string(const assign_node& node); typed_primitive_inst(network& network, const assign_node& desc); diff --git a/src/plugins/intel_gpu/src/graph/include/batch_to_space_inst.h b/src/plugins/intel_gpu/src/graph/include/batch_to_space_inst.h index 8f9668f370f02e..8b7fc918fc8e50 100644 --- a/src/plugins/intel_gpu/src/graph/include/batch_to_space_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/batch_to_space_inst.h @@ -30,7 +30,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(batch_to_space_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(batch_to_space_node const& node, kernel_impl_params const& impl_param); static std::string to_string(batch_to_space_node const& node); bool need_reset_output_memory() const override { diff --git a/src/plugins/intel_gpu/src/graph/include/border_inst.h b/src/plugins/intel_gpu/src/graph/include/border_inst.h index 67a08ea0e97d58..3fec5a6072957c 100644 --- a/src/plugins/intel_gpu/src/graph/include/border_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/border_inst.h @@ -35,7 +35,6 @@ class typed_primitive_inst : public typed_primitive_inst_base { public: template static std::vector calc_output_layouts(border_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(border_node const& node, kernel_impl_params const& impl_param); static std::string to_string(border_node const& node); typed_primitive_inst(network& network, border_node const& node); }; diff --git a/src/plugins/intel_gpu/src/graph/include/broadcast_inst.h b/src/plugins/intel_gpu/src/graph/include/broadcast_inst.h index 9b9b34574498b9..de0e5bad235f77 100644 --- a/src/plugins/intel_gpu/src/graph/include/broadcast_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/broadcast_inst.h @@ -36,7 +36,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(broadcast_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(broadcast_node const& node, kernel_impl_params const& impl_param); static std::string to_string(broadcast_node const& node); typed_primitive_inst(network& network, broadcast_node const& node); void update_output_memory() override; diff --git a/src/plugins/intel_gpu/src/graph/include/bucketize_inst.hpp b/src/plugins/intel_gpu/src/graph/include/bucketize_inst.hpp index 884011037d51bf..b6245135ab9919 100644 --- a/src/plugins/intel_gpu/src/graph/include/bucketize_inst.hpp +++ b/src/plugins/intel_gpu/src/graph/include/bucketize_inst.hpp @@ -21,7 +21,6 @@ class typed_primitive_inst : public typed_primitive_inst_base(impl_param); } - static layout calc_output_layout(const bucketize_node& node, kernel_impl_params const& impl_param); static std::string to_string(const bucketize_node& node); }; diff --git a/src/plugins/intel_gpu/src/graph/include/concatenation_inst.h b/src/plugins/intel_gpu/src/graph/include/concatenation_inst.h index 30f100fd04c80f..7c90cc13331321 100644 --- a/src/plugins/intel_gpu/src/graph/include/concatenation_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/concatenation_inst.h @@ -36,7 +36,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(const concatenation_node& /* node */, const kernel_impl_params& impl_param); - static layout calc_output_layout(concatenation_node const& node, kernel_impl_params const& impl_param); static std::string to_string(concatenation_node const& node); typed_primitive_inst(network& network, concatenation_node const& node); diff --git a/src/plugins/intel_gpu/src/graph/include/condition_inst.h b/src/plugins/intel_gpu/src/graph/include/condition_inst.h index d15ed5ff864862..1bd95bb4c5ba1f 100644 --- a/src/plugins/intel_gpu/src/graph/include/condition_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/condition_inst.h @@ -67,7 +67,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(condition_node const& /*node*/, kernel_impl_params const& impl_param); - static layout calc_output_layout(condition_node const& /* node */, kernel_impl_params const& impl_param); static std::string to_string(condition_node const& node); static bool get_pred_from_memory(memory::ptr mem, stream& stream); typed_primitive_inst(network& network, condition_node const& node); diff --git a/src/plugins/intel_gpu/src/graph/include/convert_color_inst.h b/src/plugins/intel_gpu/src/graph/include/convert_color_inst.h index dcdd113416e0f1..d1b8ed80831271 100644 --- a/src/plugins/intel_gpu/src/graph/include/convert_color_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/convert_color_inst.h @@ -26,8 +26,7 @@ class typed_primitive_inst : public typed_primitive_inst_base - static std::vector calc_output_layouts(convert_color_node const& /* node */, const kernel_impl_params& impl_param); - static layout calc_output_layout(convert_color_node const& node, kernel_impl_params const& impl_param); + static std::vector calc_output_layouts(convert_color_node const& node, const kernel_impl_params& impl_param); static std::string to_string(convert_color_node const& node); typed_primitive_inst(network& network, convert_color_node const& desc); }; diff --git a/src/plugins/intel_gpu/src/graph/include/convolution_inst.h b/src/plugins/intel_gpu/src/graph/include/convolution_inst.h index c66bd35fb1daed..459f909d829a09 100644 --- a/src/plugins/intel_gpu/src/graph/include/convolution_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/convolution_inst.h @@ -125,7 +125,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(convolution_node const& node, kernel_impl_params const& impl_param); - static layout calc_output_layout(convolution_node const& node, kernel_impl_params const& impl_param); static std::string to_string(convolution_node const& node); bool need_reset_input_memory(size_t idx = 0) const override { diff --git a/src/plugins/intel_gpu/src/graph/include/crop_inst.h b/src/plugins/intel_gpu/src/graph/include/crop_inst.h index 30b0839caaaefc..c732467eb7f6f2 100644 --- a/src/plugins/intel_gpu/src/graph/include/crop_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/crop_inst.h @@ -51,7 +51,6 @@ class typed_primitive_inst : public typed_primitive_inst_base { public: template static std::vector calc_output_layouts(const crop_node& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(crop_node const& node, kernel_impl_params const& impl_param); static std::string to_string(crop_node const& node); typed_primitive_inst(network& network, crop_node const& node); void update_output_memory() override; diff --git a/src/plugins/intel_gpu/src/graph/include/ctc_greedy_decoder_inst.h b/src/plugins/intel_gpu/src/graph/include/ctc_greedy_decoder_inst.h index 24c4780928d24a..d0e61338ef7369 100644 --- a/src/plugins/intel_gpu/src/graph/include/ctc_greedy_decoder_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/ctc_greedy_decoder_inst.h @@ -19,9 +19,6 @@ struct typed_program_node : public typed_program_node_basesecond_output.empty(); } - program_node& second_output() const { return get_dependency(2); } }; using ctc_greedy_decoder_node = typed_program_node; @@ -34,7 +31,6 @@ class typed_primitive_inst : public typed_primitive_inst_bas public: template static std::vector calc_output_layouts(ctc_greedy_decoder_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(ctc_greedy_decoder_node const& node, kernel_impl_params const& impl_param); static std::string to_string(ctc_greedy_decoder_node const& node); public: diff --git a/src/plugins/intel_gpu/src/graph/include/ctc_loss_inst.hpp b/src/plugins/intel_gpu/src/graph/include/ctc_loss_inst.hpp index 4a750a64678689..24d086c25ed1f8 100644 --- a/src/plugins/intel_gpu/src/graph/include/ctc_loss_inst.hpp +++ b/src/plugins/intel_gpu/src/graph/include/ctc_loss_inst.hpp @@ -27,7 +27,7 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(ctc_loss_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(const ctc_loss_node& node, kernel_impl_params const& impl_param); + static std::string to_string(const ctc_loss_node& node); }; diff --git a/src/plugins/intel_gpu/src/graph/include/cum_sum_inst.h b/src/plugins/intel_gpu/src/graph/include/cum_sum_inst.h index 0599a640bb2b4b..0e5690737fb6b5 100644 --- a/src/plugins/intel_gpu/src/graph/include/cum_sum_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/cum_sum_inst.h @@ -32,7 +32,6 @@ class typed_primitive_inst : public typed_primitive_inst_base return forward_input0_shape(impl_param); } - static layout calc_output_layout(cum_sum_node const& node, kernel_impl_params const& impl_param); static std::string to_string(cum_sum_node const& node); typed_primitive_inst(network& network, cum_sum_node const& desc); }; diff --git a/src/plugins/intel_gpu/src/graph/include/custom_gpu_primitive_inst.h b/src/plugins/intel_gpu/src/graph/include/custom_gpu_primitive_inst.h index a414067d74065a..79d94206b1c18e 100644 --- a/src/plugins/intel_gpu/src/graph/include/custom_gpu_primitive_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/custom_gpu_primitive_inst.h @@ -32,20 +32,6 @@ class typed_primitive_inst : public typed_primitive_inst_b return { output_layout }; } - static layout calc_output_layout(custom_gpu_primitive_node const& node, kernel_impl_params const& impl_param) { - assert(static_cast(impl_param.desc->output_data_types[0]) == false && - "Output data type forcing is not supported for " - "custom_gpu_primitive_node!"); - layout output_layout = impl_param.typed_desc()->output_layout; - - // if the output layout format was set to any, it means the layer output format will be the same as the first - // input - if (output_layout.format == format::any) { - output_layout.format = impl_param.get_input_layout().format; - } - return output_layout; - } - static std::string to_string(custom_gpu_primitive_node const& node); public: diff --git a/src/plugins/intel_gpu/src/graph/include/data_inst.h b/src/plugins/intel_gpu/src/graph/include/data_inst.h index 362c73d9e1b0a9..be37656e0121f6 100644 --- a/src/plugins/intel_gpu/src/graph/include/data_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/data_inst.h @@ -38,10 +38,6 @@ class typed_primitive_inst : public typed_primitive_inst_base { return { node.get_attached_memory().get_layout() }; } - static layout calc_output_layout(data_node const& node, kernel_impl_params const& impl_param) { - return node.get_attached_memory().get_layout(); - } - static std::string to_string(data_node const& node); typed_primitive_inst(network& network, data_node const& node); diff --git a/src/plugins/intel_gpu/src/graph/include/deconvolution_inst.h b/src/plugins/intel_gpu/src/graph/include/deconvolution_inst.h index 1b050b371b098b..6517163da468b0 100644 --- a/src/plugins/intel_gpu/src/graph/include/deconvolution_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/deconvolution_inst.h @@ -57,7 +57,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(deconvolution_node const& node, const kernel_impl_params& impl_param); - static layout calc_output_layout(deconvolution_node const& node, kernel_impl_params const& impl_param); static std::string to_string(deconvolution_node const& node); bool need_reset_input_memory(size_t idx = 0) const override { diff --git a/src/plugins/intel_gpu/src/graph/include/depth_to_space_inst.h b/src/plugins/intel_gpu/src/graph/include/depth_to_space_inst.h index 1e39e07d9a70b0..ec8451ffb45a78 100644 --- a/src/plugins/intel_gpu/src/graph/include/depth_to_space_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/depth_to_space_inst.h @@ -34,7 +34,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(depth_to_space_node const& node, kernel_impl_params const& impl_param); - static layout calc_output_layout(depth_to_space_node const& node, kernel_impl_params const& impl_param); static std::string to_string(depth_to_space_node const& node); typed_primitive_inst(network& network, depth_to_space_node const& desc); diff --git a/src/plugins/intel_gpu/src/graph/include/detection_output_inst.h b/src/plugins/intel_gpu/src/graph/include/detection_output_inst.h index 80438a8044f38e..32dc58ed7b81dc 100644 --- a/src/plugins/intel_gpu/src/graph/include/detection_output_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/detection_output_inst.h @@ -38,7 +38,6 @@ class typed_primitive_inst : public typed_primitive_inst_base< public: template static std::vector calc_output_layouts(detection_output_node const& node, kernel_impl_params const& impl_param); - static layout calc_output_layout(detection_output_node const& node, kernel_impl_params const& impl_param); static std::string to_string(detection_output_node const& node); typed_primitive_inst(network& network, detection_output_node const& node); diff --git a/src/plugins/intel_gpu/src/graph/include/dft_inst.h b/src/plugins/intel_gpu/src/graph/include/dft_inst.h index f6bb1d54850bfa..24e457db556352 100644 --- a/src/plugins/intel_gpu/src/graph/include/dft_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/dft_inst.h @@ -35,7 +35,6 @@ class typed_primitive_inst : public typed_primitive_inst_base { public: template static std::vector calc_output_layouts(dft_node const& /*node*/, kernel_impl_params const& impl_param); - static layout calc_output_layout(dft_node const& node, kernel_impl_params const& impl_param); static std::string to_string(dft_node const& node); typed_primitive_inst(network& network, dft_node const& node); diff --git a/src/plugins/intel_gpu/src/graph/include/eltwise_inst.h b/src/plugins/intel_gpu/src/graph/include/eltwise_inst.h index d75a18a4f40504..0a9bd1e36c7527 100644 --- a/src/plugins/intel_gpu/src/graph/include/eltwise_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/eltwise_inst.h @@ -48,7 +48,6 @@ class typed_primitive_inst : public typed_primitive_inst_base public: template static std::vector calc_output_layouts(eltwise_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(eltwise_node const& node, kernel_impl_params const& impl_param); static std::string to_string(eltwise_node const& node); typed_primitive_inst(network& network, eltwise_node const& node); }; diff --git a/src/plugins/intel_gpu/src/graph/include/embedding_bag_inst.h b/src/plugins/intel_gpu/src/graph/include/embedding_bag_inst.h index a093b571ecaa51..c3018a0ee07caa 100644 --- a/src/plugins/intel_gpu/src/graph/include/embedding_bag_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/embedding_bag_inst.h @@ -29,7 +29,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(embedding_bag_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(embedding_bag_node const& node, kernel_impl_params const& impl_param); static std::string to_string(embedding_bag_node const& node); typed_primitive_inst(network& network, embedding_bag_node const& desc); }; diff --git a/src/plugins/intel_gpu/src/graph/include/experimental_detectron_detection_output_inst.hpp b/src/plugins/intel_gpu/src/graph/include/experimental_detectron_detection_output_inst.hpp index 89b42dacd8f3f8..1b1bfd34a435bd 100644 --- a/src/plugins/intel_gpu/src/graph/include/experimental_detectron_detection_output_inst.hpp +++ b/src/plugins/intel_gpu/src/graph/include/experimental_detectron_detection_output_inst.hpp @@ -28,13 +28,6 @@ struct typed_program_node program_node& image_size_info() const { return get_dependency(3); } - - program_node& output_classes_node() const { - return get_dependency(4); - } - program_node& output_scores_node() const { - return get_dependency(5); - } }; using experimental_detectron_detection_output_node = typed_program_node; @@ -48,18 +41,10 @@ class typed_primitive_inst public: template static std::vector calc_output_layouts(experimental_detectron_detection_output_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(const experimental_detectron_detection_output_node& node, kernel_impl_params const& impl_param); static std::string to_string(const experimental_detectron_detection_output_node& node); typed_primitive_inst(network& network, const experimental_detectron_detection_output_node& node) : parent(network, node) {} - - memory::ptr output_classes_memory() const { - return dep_memory_ptr(4); - } - memory::ptr output_scores_memory() const { - return dep_memory_ptr(5); - } }; using experimental_detectron_detection_output_inst = typed_primitive_inst; diff --git a/src/plugins/intel_gpu/src/graph/include/experimental_detectron_generate_proposals_single_image_inst.hpp b/src/plugins/intel_gpu/src/graph/include/experimental_detectron_generate_proposals_single_image_inst.hpp index a863368d23b3da..5ce4ebf122b466 100644 --- a/src/plugins/intel_gpu/src/graph/include/experimental_detectron_generate_proposals_single_image_inst.hpp +++ b/src/plugins/intel_gpu/src/graph/include/experimental_detectron_generate_proposals_single_image_inst.hpp @@ -35,7 +35,6 @@ class typed_primitive_inst static std::vector calc_output_layouts(experimental_detectron_generate_proposals_single_image_node const&, const kernel_impl_params& impl_param); - static layout calc_output_layout(const experimental_detectron_generate_proposals_single_image_node& node, kernel_impl_params const& impl_param); static std::string to_string(const experimental_detectron_generate_proposals_single_image_node& node); typed_primitive_inst(network& network, const experimental_detectron_generate_proposals_single_image_node& node) diff --git a/src/plugins/intel_gpu/src/graph/include/experimental_detectron_prior_grid_generator_inst.h b/src/plugins/intel_gpu/src/graph/include/experimental_detectron_prior_grid_generator_inst.h index 38aaae23fb333f..39e5b107aa1623 100644 --- a/src/plugins/intel_gpu/src/graph/include/experimental_detectron_prior_grid_generator_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/experimental_detectron_prior_grid_generator_inst.h @@ -29,7 +29,6 @@ class typed_primitive_inst template static std::vector calc_output_layouts(experimental_detectron_prior_grid_generator_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(experimental_detectron_prior_grid_generator_node const& node, kernel_impl_params const& impl_param); static std::string to_string(experimental_detectron_prior_grid_generator_node const& node); }; diff --git a/src/plugins/intel_gpu/src/graph/include/experimental_detectron_roi_feature_extractor_inst.hpp b/src/plugins/intel_gpu/src/graph/include/experimental_detectron_roi_feature_extractor_inst.hpp index 6970d286f0e5dc..375ff3f2a94085 100644 --- a/src/plugins/intel_gpu/src/graph/include/experimental_detectron_roi_feature_extractor_inst.hpp +++ b/src/plugins/intel_gpu/src/graph/include/experimental_detectron_roi_feature_extractor_inst.hpp @@ -21,11 +21,9 @@ struct typed_primitive_inst : publ template static std::vector calc_output_layouts(experimental_detectron_roi_feature_extractor_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(experimental_detectron_roi_feature_extractor_node const& node, kernel_impl_params const& impl_param); static std::string to_string(experimental_detectron_roi_feature_extractor_node const& node); private: - memory::ptr second_output_memory() const; memory::ptr rois_memory() const; }; diff --git a/src/plugins/intel_gpu/src/graph/include/experimental_detectron_topk_rois_inst.h b/src/plugins/intel_gpu/src/graph/include/experimental_detectron_topk_rois_inst.h index fb223171a26f94..f657682703c043 100644 --- a/src/plugins/intel_gpu/src/graph/include/experimental_detectron_topk_rois_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/experimental_detectron_topk_rois_inst.h @@ -19,7 +19,6 @@ class typed_primitive_inst : public typed_prim public: template static std::vector calc_output_layouts(experimental_detectron_topk_rois_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(experimental_detectron_topk_rois_node const &node, kernel_impl_params const& impl_param); static std::string to_string(experimental_detectron_topk_rois_node const &node); diff --git a/src/plugins/intel_gpu/src/graph/include/extract_image_patches_inst.h b/src/plugins/intel_gpu/src/graph/include/extract_image_patches_inst.h index 4ccf35b0d398ed..fde398e66c5c7a 100644 --- a/src/plugins/intel_gpu/src/graph/include/extract_image_patches_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/extract_image_patches_inst.h @@ -18,7 +18,6 @@ class typed_primitive_inst : public typed_primitive_inst_ public: template static std::vector calc_output_layouts(extract_image_patches_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(extract_image_patches_node const& node, kernel_impl_params const& impl_param); static std::string to_string(extract_image_patches_node const& node); typed_primitive_inst(network& network, extract_image_patches_node const& desc); diff --git a/src/plugins/intel_gpu/src/graph/include/eye_inst.h b/src/plugins/intel_gpu/src/graph/include/eye_inst.h index b909829bf534da..907806054b14d8 100644 --- a/src/plugins/intel_gpu/src/graph/include/eye_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/eye_inst.h @@ -22,6 +22,7 @@ struct typed_program_node : typed_program_node_base { }; using eye_node = typed_program_node; + template <> class typed_primitive_inst : public typed_primitive_inst_base { using parent = typed_primitive_inst_base; @@ -30,7 +31,6 @@ class typed_primitive_inst : public typed_primitive_inst_base { public: template static std::vector calc_output_layouts(eye_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(eye_node const& node, const kernel_impl_params& impl_param); static std::string to_string(eye_node const& node); typed_primitive_inst(network& network, eye_node const& desc); diff --git a/src/plugins/intel_gpu/src/graph/include/fully_connected_inst.h b/src/plugins/intel_gpu/src/graph/include/fully_connected_inst.h index ac7bd91f84d662..cc92e40a148c99 100644 --- a/src/plugins/intel_gpu/src/graph/include/fully_connected_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/fully_connected_inst.h @@ -46,7 +46,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(fully_connected_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(fully_connected_node const& node, kernel_impl_params const& impl_param); static kernel_impl_params get_fake_aligned_params(kernel_impl_params const& orig_impl_param); static std::string to_string(fully_connected_node const& node); diff --git a/src/plugins/intel_gpu/src/graph/include/gather_elements_inst.h b/src/plugins/intel_gpu/src/graph/include/gather_elements_inst.h index b73ca3bc3f28f9..2601ab76bbf955 100644 --- a/src/plugins/intel_gpu/src/graph/include/gather_elements_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/gather_elements_inst.h @@ -29,7 +29,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(gather_elements_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(gather_elements_node const& node, kernel_impl_params const& impl_param); static std::string to_string(gather_elements_node const& node); public: diff --git a/src/plugins/intel_gpu/src/graph/include/gather_inst.h b/src/plugins/intel_gpu/src/graph/include/gather_inst.h index d6d390cd0eb291..1734643397913b 100644 --- a/src/plugins/intel_gpu/src/graph/include/gather_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/gather_inst.h @@ -30,7 +30,6 @@ class typed_primitive_inst : public typed_primitive_inst_base { public: template static std::vector calc_output_layouts(gather_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(gather_node const& node, kernel_impl_params const& impl_param); static std::string to_string(gather_node const& node); typed_primitive_inst(network& network, gather_node const& desc); diff --git a/src/plugins/intel_gpu/src/graph/include/gather_nd_inst.h b/src/plugins/intel_gpu/src/graph/include/gather_nd_inst.h index c8821bf78f9649..c8511da3672069 100644 --- a/src/plugins/intel_gpu/src/graph/include/gather_nd_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/gather_nd_inst.h @@ -19,7 +19,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(gather_nd_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(gather_nd_node const& node, kernel_impl_params const& impl_param); static std::string to_string(gather_nd_node const& node); public: diff --git a/src/plugins/intel_gpu/src/graph/include/gather_tree_inst.h b/src/plugins/intel_gpu/src/graph/include/gather_tree_inst.h index 1a0ac6f423eae1..cb27195c1c1fdf 100644 --- a/src/plugins/intel_gpu/src/graph/include/gather_tree_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/gather_tree_inst.h @@ -32,7 +32,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(gather_tree_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(gather_tree_node const& node, kernel_impl_params const& impl_param); static std::string to_string(gather_tree_node const& node); typed_primitive_inst(network& network, gather_tree_node const& node); }; diff --git a/src/plugins/intel_gpu/src/graph/include/gemm_inst.h b/src/plugins/intel_gpu/src/graph/include/gemm_inst.h index b6bb55ceb55497..5d2214bb19b0af 100644 --- a/src/plugins/intel_gpu/src/graph/include/gemm_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/gemm_inst.h @@ -30,7 +30,6 @@ class typed_primitive_inst : public typed_primitive_inst_base { public: template static std::vector calc_output_layouts(gemm_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(gemm_node const& node, kernel_impl_params const& impl_param); static std::string to_string(gemm_node const& node); static std::vector transform_input_layouts(const std::shared_ptr primitive, diff --git a/src/plugins/intel_gpu/src/graph/include/generate_proposals_inst.h b/src/plugins/intel_gpu/src/graph/include/generate_proposals_inst.h index bddeb909083fd8..f80e0c4ce40216 100644 --- a/src/plugins/intel_gpu/src/graph/include/generate_proposals_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/generate_proposals_inst.h @@ -36,7 +36,6 @@ class typed_primitive_inst public: template static std::vector calc_output_layouts(generate_proposals_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(const generate_proposals_node& node, kernel_impl_params const& impl_param); static std::string to_string(const generate_proposals_node& node); typed_primitive_inst(network& network, const generate_proposals_node& node) diff --git a/src/plugins/intel_gpu/src/graph/include/grid_sample_inst.hpp b/src/plugins/intel_gpu/src/graph/include/grid_sample_inst.hpp index e6603d5922574a..a4b07669a80ccc 100644 --- a/src/plugins/intel_gpu/src/graph/include/grid_sample_inst.hpp +++ b/src/plugins/intel_gpu/src/graph/include/grid_sample_inst.hpp @@ -29,7 +29,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(grid_sample_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(const grid_sample_node& node, const kernel_impl_params& impl_param); static std::string to_string(const grid_sample_node& node); typed_primitive_inst(network& network, grid_sample_node const& node); diff --git a/src/plugins/intel_gpu/src/graph/include/grn_inst.h b/src/plugins/intel_gpu/src/graph/include/grn_inst.h index 376dba93f480bb..875d4370da4a3e 100644 --- a/src/plugins/intel_gpu/src/graph/include/grn_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/grn_inst.h @@ -22,7 +22,6 @@ class typed_primitive_inst : public typed_primitive_inst_base { static std::vector calc_output_layouts(grn_node const& /*node*/, const kernel_impl_params& impl_param) { return forward_input0_shape(impl_param); } - static layout calc_output_layout(grn_node const& node, kernel_impl_params const& impl_param); static std::string to_string(grn_node const& node); typed_primitive_inst(network& network, grn_node const& node); diff --git a/src/plugins/intel_gpu/src/graph/include/group_normalization_inst.h b/src/plugins/intel_gpu/src/graph/include/group_normalization_inst.h index 27fe382146999b..d1838e14013910 100644 --- a/src/plugins/intel_gpu/src/graph/include/group_normalization_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/group_normalization_inst.h @@ -28,7 +28,6 @@ class typed_primitive_inst : public typed_primitive_inst_ba return forward_input0_shape(impl_param); } - static layout calc_output_layout(group_normalization_node const& node, kernel_impl_params const& impl_param); static std::string to_string(group_normalization_node const& node); typed_primitive_inst(network& network, group_normalization_node const& desc); diff --git a/src/plugins/intel_gpu/src/graph/include/input_layout_inst.h b/src/plugins/intel_gpu/src/graph/include/input_layout_inst.h index ee59d43bf116aa..3ca7fc23f6e0bf 100644 --- a/src/plugins/intel_gpu/src/graph/include/input_layout_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/input_layout_inst.h @@ -32,9 +32,6 @@ class typed_primitive_inst : public typed_primitive_inst_base()->layout }; } - static layout calc_output_layout(input_layout_node const& node, kernel_impl_params const& impl_param) { - return impl_param.typed_desc()->layout; - } static std::string to_string(input_layout_node const& node); void update_shape() override; diff --git a/src/plugins/intel_gpu/src/graph/include/kv_cache_inst.h b/src/plugins/intel_gpu/src/graph/include/kv_cache_inst.h index f3aa4de5ec34e1..8d77e88ca2eb14 100644 --- a/src/plugins/intel_gpu/src/graph/include/kv_cache_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/kv_cache_inst.h @@ -48,7 +48,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(kv_cache_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(const kv_cache_node& node, kernel_impl_params const& impl_param); static std::string to_string(const kv_cache_node& node); diff --git a/src/plugins/intel_gpu/src/graph/include/loop_inst.h b/src/plugins/intel_gpu/src/graph/include/loop_inst.h index a9ec2262342bec..efc69418947ad3 100644 --- a/src/plugins/intel_gpu/src/graph/include/loop_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/loop_inst.h @@ -322,7 +322,6 @@ class typed_primitive_inst : public typed_primitive_inst_base { template static std::vector calc_output_layouts(loop_node const& /*node*/, kernel_impl_params const& impl_param); - static layout calc_output_layout(const loop_node& /*node*/, kernel_impl_params const& impl_param); bool preproc_memories_done = false; std::vector backedge_memory_mappings; std::vector concatenated_input_mem_mappings; diff --git a/src/plugins/intel_gpu/src/graph/include/lrn_inst.h b/src/plugins/intel_gpu/src/graph/include/lrn_inst.h index 7b3caf2b9ac4ae..7d51df14aa2bc4 100644 --- a/src/plugins/intel_gpu/src/graph/include/lrn_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/lrn_inst.h @@ -22,7 +22,6 @@ class typed_primitive_inst : public typed_primitive_inst_base { static std::vector calc_output_layouts(lrn_node const& /*node*/, const kernel_impl_params& impl_param) { return forward_input0_shape(impl_param); } - static layout calc_output_layout(lrn_node const& node, kernel_impl_params const& impl_param); static std::string to_string(lrn_node const& node); typed_primitive_inst(network& network, lrn_node const& node); diff --git a/src/plugins/intel_gpu/src/graph/include/lstm_elt_inst.h b/src/plugins/intel_gpu/src/graph/include/lstm_elt_inst.h index 1524598c6f3987..f17272ca99a765 100644 --- a/src/plugins/intel_gpu/src/graph/include/lstm_elt_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/lstm_elt_inst.h @@ -40,7 +40,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(lstm_elt_node const& node, kernel_impl_params const& impl_param); - static layout calc_output_layout(lstm_elt_node const& node, kernel_impl_params const& impl_param); static std::string to_string(lstm_elt_node const& node); public: diff --git a/src/plugins/intel_gpu/src/graph/include/matrix_nms_inst.h b/src/plugins/intel_gpu/src/graph/include/matrix_nms_inst.h index 591de52c5061ed..8319b793baa213 100644 --- a/src/plugins/intel_gpu/src/graph/include/matrix_nms_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/matrix_nms_inst.h @@ -23,8 +23,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(matrix_nms_node const& /*node*/, const kernel_impl_params& impl_param); - - static layout calc_output_layout(const matrix_nms_node& node, const kernel_impl_params& impl_param); static std::string to_string(const matrix_nms_node& node); memory::ptr input_boxes_mem() const { @@ -33,12 +31,6 @@ class typed_primitive_inst : public typed_primitive_inst_base; diff --git a/src/plugins/intel_gpu/src/graph/include/multiclass_nms_inst.h b/src/plugins/intel_gpu/src/graph/include/multiclass_nms_inst.h index 4bdff2792f40b1..0a6d48cc30e8ca 100644 --- a/src/plugins/intel_gpu/src/graph/include/multiclass_nms_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/multiclass_nms_inst.h @@ -1,4 +1,4 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2022-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -28,26 +28,13 @@ struct typed_program_node : public typed_program_node_basehas_roisnum; + return get_primitive()->input_size() == 3; } const program_node& roisnum() const { - if (!get_primitive()->has_roisnum) - throw std::runtime_error("there is no roisnum input"); + OPENVINO_ASSERT(has_roisnum(), "[GPU] rois_num not found"); return get_dependency(2); } - - const program_node& output_selected_indices() const { - return get_dependency(input_count()); - } - const program_node& output_selected_num() const { - return get_dependency(input_count() + 1); - } - -private: - int input_count() const { - return 2 + (get_primitive()->has_roisnum ? 1 : 0); - } }; using multiclass_nms_node = typed_program_node; @@ -60,17 +47,9 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(multiclass_nms_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(const multiclass_nms_node& node, const kernel_impl_params& impl_param); static std::string to_string(const multiclass_nms_node& node); typed_primitive_inst(network& network, const multiclass_nms_node& node) : parent(network, node) {} - - memory::ptr output_indices_memory() const { - return dep_memory_ptr(dependencies().size() - 2); - } - memory::ptr output_num_memory() const { - return dep_memory_ptr(dependencies().size() - 1); - } }; using multiclass_nms_inst = typed_primitive_inst; diff --git a/src/plugins/intel_gpu/src/graph/include/multinomial_inst.h b/src/plugins/intel_gpu/src/graph/include/multinomial_inst.h index 102e300b74b4ef..4e66118b6a36e2 100644 --- a/src/plugins/intel_gpu/src/graph/include/multinomial_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/multinomial_inst.h @@ -17,7 +17,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(multinomial_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(multinomial_node const& node, kernel_impl_params const& impl_param); static std::string to_string(multinomial_node const& node); typed_primitive_inst(network& network, multinomial_node const& desc); diff --git a/src/plugins/intel_gpu/src/graph/include/mutable_data_inst.h b/src/plugins/intel_gpu/src/graph/include/mutable_data_inst.h index 0140c60f12a0d6..89e683f3687538 100644 --- a/src/plugins/intel_gpu/src/graph/include/mutable_data_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/mutable_data_inst.h @@ -41,10 +41,6 @@ class typed_primitive_inst : public typed_primitive_inst_base : public typed_primitive_inst_base { static std::vector calc_output_layouts(mvn_node const& /*node*/, const kernel_impl_params& impl_param) { return forward_input0_shape(impl_param); } - static layout calc_output_layout(mvn_node const& node, kernel_impl_params const& impl_param); static std::string to_string(mvn_node const& node); typed_primitive_inst(network& network, mvn_node const& node); diff --git a/src/plugins/intel_gpu/src/graph/include/non_max_suppression_inst.h b/src/plugins/intel_gpu/src/graph/include/non_max_suppression_inst.h index 4e73633f3a13a8..1a61b463d9ca34 100644 --- a/src/plugins/intel_gpu/src/graph/include/non_max_suppression_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/non_max_suppression_inst.h @@ -30,19 +30,19 @@ struct typed_program_node : public typed_program_node_base< return get_dependency(1); } - bool has_num_select_per_class() const { return !get_primitive()->num_select_per_class.empty(); } + bool has_num_select_per_class() const { return get_primitive()->input_size() >= 3; } program_node& num_select_per_class_node() const { return get_dependency(2); } - bool has_iou_threshold() const { return !get_primitive()->iou_threshold.empty(); } + bool has_iou_threshold() const { return get_primitive()->input_size() >= 4; } program_node& iou_threshold_node() const { size_t offset = 2; offset += has_num_select_per_class(); return get_dependency(offset); } - bool has_score_threshold() const { return !get_primitive()->score_threshold.empty(); } + bool has_score_threshold() const { return get_primitive()->input_size() >= 5; } program_node& score_threshold_node() const { size_t offset = 2; offset += has_num_select_per_class(); @@ -50,7 +50,7 @@ struct typed_program_node : public typed_program_node_base< return get_dependency(offset); } - bool has_soft_nms_sigma() const { return !get_primitive()->soft_nms_sigma.empty(); } + bool has_soft_nms_sigma() const { return get_primitive()->input_size() >= 6; } program_node& soft_nms_sigma_node() const { size_t offset = 2; offset += has_num_select_per_class(); @@ -59,28 +59,6 @@ struct typed_program_node : public typed_program_node_base< return get_dependency(offset); } - bool has_second_output() const { return !get_primitive()->second_output.empty(); } - program_node& second_output_node() const { - size_t offset = 2; - offset += has_num_select_per_class(); - offset += has_iou_threshold(); - offset += has_score_threshold(); - offset += has_soft_nms_sigma(); - return get_dependency(offset); - } - - bool has_third_output() const { return !get_primitive()->third_output.empty(); } - program_node& third_output_node() const { - size_t offset = 2; - offset += has_num_select_per_class(); - offset += has_iou_threshold(); - offset += has_score_threshold(); - offset += has_soft_nms_sigma(); - offset += has_second_output(); - return get_dependency(offset); - } - bool use_multiple_outputs() const { return get_primitive()->output_size() == 3; } - std::vector get_shape_infer_dependencies() const override { return {2}; } }; @@ -119,7 +97,6 @@ class typed_primitive_inst : public typed_primitive_inst_ba template static std::vector calc_output_layouts(non_max_suppression_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(non_max_suppression_node const& node, kernel_impl_params const& impl_param); static std::string to_string(non_max_suppression_node const& node); memory::ptr input_boxes_mem() const { @@ -130,7 +107,7 @@ class typed_primitive_inst : public typed_primitive_inst_ba return dep_memory_ptr(1); } - bool has_num_select_per_class() const { return !get_typed_desc()->num_select_per_class.empty(); } + bool has_num_select_per_class() const { return static_cast(_node)->has_num_select_per_class(); } memory::ptr num_select_per_class_mem() const { return dep_memory_ptr(2); } @@ -138,7 +115,7 @@ class typed_primitive_inst : public typed_primitive_inst_ba return dependencies().at(2).first; } - bool has_iou_threshold() const { return !get_typed_desc()->iou_threshold.empty(); } + bool has_iou_threshold() const { return static_cast(_node)->has_iou_threshold(); } memory::ptr iou_threshold_mem() const { return dep_memory_ptr(get_iou_threshold_offset()); } @@ -146,7 +123,7 @@ class typed_primitive_inst : public typed_primitive_inst_ba return dependencies().at(get_iou_threshold_offset()).first; } - bool has_score_threshold() const { return !get_typed_desc()->score_threshold.empty(); } + bool has_score_threshold() const { return static_cast(_node)->has_score_threshold(); } memory::ptr score_threshold_mem() const { return dep_memory_ptr(get_score_threshold_offset()); } @@ -154,34 +131,13 @@ class typed_primitive_inst : public typed_primitive_inst_ba return dependencies().at(get_score_threshold_offset()).first; } - bool has_soft_nms_sigma() const { return !get_typed_desc()->soft_nms_sigma.empty(); } + bool has_soft_nms_sigma() const { return static_cast(_node)->has_soft_nms_sigma(); } memory::ptr soft_nms_sigma_mem() const { return dep_memory_ptr(get_soft_nms_sigma_offset()); } const primitive_inst* soft_nms_sigma_inst() const { return dependencies().at(get_soft_nms_sigma_offset()).first; } - - bool has_second_output() const { return !get_typed_desc()->second_output.empty(); } - memory::ptr second_output_mem() const { - size_t offset = 2; - offset += has_num_select_per_class(); - offset += has_iou_threshold(); - offset += has_score_threshold(); - offset += has_soft_nms_sigma(); - return dep_memory_ptr(offset); - } - - bool has_third_output() const { return !get_typed_desc()->third_output.empty(); } - memory::ptr third_output_mem() const { - size_t offset = 2; - offset += has_num_select_per_class(); - offset += has_iou_threshold(); - offset += has_score_threshold(); - offset += has_soft_nms_sigma(); - offset += has_second_output(); - return dep_memory_ptr(offset); - } }; using non_max_suppression_inst = typed_primitive_inst; diff --git a/src/plugins/intel_gpu/src/graph/include/non_zero_inst.h b/src/plugins/intel_gpu/src/graph/include/non_zero_inst.h index cbaea742335502..3518c598e1e4c7 100644 --- a/src/plugins/intel_gpu/src/graph/include/non_zero_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/non_zero_inst.h @@ -36,7 +36,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(count_nonzero_node const& /*node*/, kernel_impl_params const& impl_param); - static layout calc_output_layout(count_nonzero_node const& node, kernel_impl_params const& impl_param); static std::string to_string(count_nonzero_node const& node); typed_primitive_inst(network& network, count_nonzero_node const& node); @@ -71,7 +70,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(gather_nonzero_node const& /*node*/, kernel_impl_params const& impl_param); - static layout calc_output_layout(gather_nonzero_node const& node, kernel_impl_params const& impl_param); static std::string to_string(gather_nonzero_node const& node); typed_primitive_inst(network& network, gather_nonzero_node const& node); diff --git a/src/plugins/intel_gpu/src/graph/include/normalize_inst.h b/src/plugins/intel_gpu/src/graph/include/normalize_inst.h index 5a86ce13e0e5d8..880bb94808e45d 100644 --- a/src/plugins/intel_gpu/src/graph/include/normalize_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/normalize_inst.h @@ -34,7 +34,6 @@ class typed_primitive_inst : public typed_primitive_inst_base calc_output_layouts(normalize_node const& /*node*/, const kernel_impl_params& impl_param) { return forward_input0_shape(impl_param); } - static layout calc_output_layout(normalize_node const& node, kernel_impl_params const& impl_param); static std::string to_string(normalize_node const& node); typed_primitive_inst(network& network, normalize_node const& node); diff --git a/src/plugins/intel_gpu/src/graph/include/one_hot_inst.h b/src/plugins/intel_gpu/src/graph/include/one_hot_inst.h index 782751e261e51a..ad6cb5ff9db5ba 100644 --- a/src/plugins/intel_gpu/src/graph/include/one_hot_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/one_hot_inst.h @@ -35,7 +35,6 @@ class typed_primitive_inst : public typed_primitive_inst_base public: template static std::vector calc_output_layouts(const one_hot_node& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(one_hot_node const& node, kernel_impl_params const& impl_param); static std::string to_string(one_hot_node const& node); typed_primitive_inst(network& network, one_hot_node const& node); }; diff --git a/src/plugins/intel_gpu/src/graph/include/permute_inst.h b/src/plugins/intel_gpu/src/graph/include/permute_inst.h index e00a49ec8bfa15..2b48b309b9a3cd 100644 --- a/src/plugins/intel_gpu/src/graph/include/permute_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/permute_inst.h @@ -62,7 +62,6 @@ class typed_primitive_inst : public typed_primitive_inst_base public: template static std::vector calc_output_layouts(permute_node const& node, kernel_impl_params const& impl_param); - static layout calc_output_layout(permute_node const& node, kernel_impl_params const& impl_param); static std::string to_string(permute_node const& node); public: diff --git a/src/plugins/intel_gpu/src/graph/include/pooling_inst.h b/src/plugins/intel_gpu/src/graph/include/pooling_inst.h index ab258ea47f24c3..ca804c14958379 100644 --- a/src/plugins/intel_gpu/src/graph/include/pooling_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/pooling_inst.h @@ -33,7 +33,6 @@ class typed_primitive_inst : public typed_primitive_inst_base public: template static std::vector calc_output_layouts(pooling_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(pooling_node const& node, kernel_impl_params const& impl_param); static std::string to_string(pooling_node const& node); public: diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_type.h b/src/plugins/intel_gpu/src/graph/include/primitive_type.h index 2c52079584db8d..58ce07c483d37c 100644 --- a/src/plugins/intel_gpu/src/graph/include/primitive_type.h +++ b/src/plugins/intel_gpu/src/graph/include/primitive_type.h @@ -43,7 +43,6 @@ struct primitive_type { virtual bool does_dynamic_implementation_exist(const program_node& node) const = 0; virtual bool does_dynamic_implementation_exist(const program_node& node, const kernel_impl_params& params) const = 0; - virtual layout calc_output_layout(const program_node& node, const kernel_impl_params& params) const = 0; virtual std::vector calc_output_layouts(const program_node& node, const kernel_impl_params& impl_param) const = 0; virtual kernel_impl_params get_fake_aligned_params(kernel_impl_params const& orig_impl_param) const = 0; virtual std::string to_string(const program_node& node) const = 0; diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_type_base.h b/src/plugins/intel_gpu/src/graph/include/primitive_type_base.h index 96df67322d08c9..0f3f82d744de79 100644 --- a/src/plugins/intel_gpu/src/graph/include/primitive_type_base.h +++ b/src/plugins/intel_gpu/src/graph/include/primitive_type_base.h @@ -99,17 +99,6 @@ struct primitive_type_base : primitive_type { return implementation_map::check(impl_param, node.get_preferred_impl_type(), shape_types::dynamic_shape); } - cldnn::layout calc_output_layout(const cldnn::program_node& node, const kernel_impl_params& impl_param) const override { - OPENVINO_ASSERT(node.type() == this, "[GPU] primitive_type_base::calc_output_layout: primitive type mismatch"); - for (auto& t : impl_param.input_layouts) { - GPU_DEBUG_TRACE_DETAIL << impl_param.desc->id << " input tensor: " << t.to_short_string() << std::endl; - } - auto res = typed_primitive_inst::calc_output_layout(node, impl_param); - - GPU_DEBUG_TRACE_DETAIL << impl_param.desc->id << " output tensor: " << res.to_short_string() << std::endl; - return res; - } - std::vector calc_output_layouts(const cldnn::program_node& node, const kernel_impl_params& impl_param) const override { OPENVINO_ASSERT(node.type() == this, "primitive_type_base::calc_output_layouts: primitive type mismatch"); diff --git a/src/plugins/intel_gpu/src/graph/include/prior_box_inst.h b/src/plugins/intel_gpu/src/graph/include/prior_box_inst.h index 9a789733a5e3c9..cdf5b705a1b4d0 100644 --- a/src/plugins/intel_gpu/src/graph/include/prior_box_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/prior_box_inst.h @@ -38,7 +38,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(prior_box_node const& /*node*/, kernel_impl_params const& impl_param); - static layout calc_output_layout(prior_box_node const& node, kernel_impl_params const& impl_param); static std::string to_string(prior_box_node const& node); public: diff --git a/src/plugins/intel_gpu/src/graph/include/program_node.h b/src/plugins/intel_gpu/src/graph/include/program_node.h index 029755c4733fe4..aef9c194e5aeb4 100644 --- a/src/plugins/intel_gpu/src/graph/include/program_node.h +++ b/src/plugins/intel_gpu/src/graph/include/program_node.h @@ -79,8 +79,6 @@ struct program_node { } bool is_shape_infer_dep(void) const { - if (!myprog.is_new_shape_infer()) - return false; for (auto u : users) { for (auto dep_idx : u->get_shape_infer_dependencies()) { if (u->get_dependencies().size() <= dep_idx) { @@ -235,8 +233,6 @@ struct program_node { set_output_padding(padding::max(padd, output_layouts[idx].data_padding)); } - // only calculated output layout (for external usage), does not modify/use cached output layout nor invalidate users - layout calc_output_layout() const; std::vector calc_output_layouts() const; // uses cached output layout if valid, if not calls 'calc_output_layout' and stores its result + invalidate all diff --git a/src/plugins/intel_gpu/src/graph/include/proposal_inst.h b/src/plugins/intel_gpu/src/graph/include/proposal_inst.h index d4f3ef545b272f..9388d6f1f9b4c3 100644 --- a/src/plugins/intel_gpu/src/graph/include/proposal_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/proposal_inst.h @@ -67,7 +67,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(proposal_node const& node, kernel_impl_params const& impl_param); - static layout calc_output_layout(proposal_node const& node, kernel_impl_params const& impl_param); static std::string to_string(proposal_node const& node); typed_primitive_inst(network& network, proposal_node const& desc); diff --git a/src/plugins/intel_gpu/src/graph/include/quantize_inst.h b/src/plugins/intel_gpu/src/graph/include/quantize_inst.h index 7bffd87e5af598..3d3dc5fc6beb1e 100644 --- a/src/plugins/intel_gpu/src/graph/include/quantize_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/quantize_inst.h @@ -203,7 +203,6 @@ class typed_primitive_inst : public typed_primitive_inst_base calc_output_layouts(quantize_node const& node, kernel_impl_params const& impl_param) { return forward_input0_shape(impl_param); } - static layout calc_output_layout(quantize_node const& node, kernel_impl_params const& impl_param); static std::string to_string(quantize_node const& node); typed_primitive_inst(network& network, quantize_node const& node); diff --git a/src/plugins/intel_gpu/src/graph/include/random_uniform_inst.h b/src/plugins/intel_gpu/src/graph/include/random_uniform_inst.h index e9c647dcf0d5af..d579cb2e9e9aa8 100644 --- a/src/plugins/intel_gpu/src/graph/include/random_uniform_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/random_uniform_inst.h @@ -30,7 +30,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(random_uniform_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(random_uniform_node const &node, kernel_impl_params const& impl_param); static std::string to_string(random_uniform_node const &node); diff --git a/src/plugins/intel_gpu/src/graph/include/range_inst.h b/src/plugins/intel_gpu/src/graph/include/range_inst.h index 79ef03e0057dd6..57a56f823f99d1 100644 --- a/src/plugins/intel_gpu/src/graph/include/range_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/range_inst.h @@ -32,7 +32,6 @@ class typed_primitive_inst : public typed_primitive_inst_base { public: template static std::vector calc_output_layouts(range_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(range_node const& node, kernel_impl_params const& impl_param); static std::string to_string(range_node const& node); typed_primitive_inst(network& network, range_node const& desc); diff --git a/src/plugins/intel_gpu/src/graph/include/read_value_inst.h b/src/plugins/intel_gpu/src/graph/include/read_value_inst.h index 74f9ffff581b87..2e6051be85cd36 100644 --- a/src/plugins/intel_gpu/src/graph/include/read_value_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/read_value_inst.h @@ -38,8 +38,6 @@ class typed_primitive_inst : public typed_primitive_inst_base : public typed_primitive_inst_base { public: template static std::vector calc_output_layouts(reduce_node const& node, const kernel_impl_params& impl_param); - static layout calc_output_layout(reduce_node const& node, kernel_impl_params const& impl_param); static std::string to_string(reduce_node const& node); bool need_reset_input_memory(size_t idx = 0) const override { diff --git a/src/plugins/intel_gpu/src/graph/include/region_yolo_inst.h b/src/plugins/intel_gpu/src/graph/include/region_yolo_inst.h index 2c24172949ea64..fa3e7921232369 100644 --- a/src/plugins/intel_gpu/src/graph/include/region_yolo_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/region_yolo_inst.h @@ -30,7 +30,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(region_yolo_node const& node, kernel_impl_params const& impl_param); - static layout calc_output_layout(region_yolo_node const& node, kernel_impl_params const& impl_param); static std::string to_string(region_yolo_node const& node); public: diff --git a/src/plugins/intel_gpu/src/graph/include/reorder_inst.h b/src/plugins/intel_gpu/src/graph/include/reorder_inst.h index 8e91957c5192dd..d68e5e16f2de8e 100644 --- a/src/plugins/intel_gpu/src/graph/include/reorder_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/reorder_inst.h @@ -82,7 +82,6 @@ class typed_primitive_inst : public typed_primitive_inst_base public: template static std::vector calc_output_layouts(reorder_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(reorder_node const& node, kernel_impl_params const& impl_param); static std::string to_string(reorder_node const& node); public: diff --git a/src/plugins/intel_gpu/src/graph/include/reorg_yolo_inst.h b/src/plugins/intel_gpu/src/graph/include/reorg_yolo_inst.h index 3aa4e571fa96da..9250343de29d06 100644 --- a/src/plugins/intel_gpu/src/graph/include/reorg_yolo_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/reorg_yolo_inst.h @@ -30,7 +30,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(reorg_yolo_node const& node, kernel_impl_params const& impl_param); - static layout calc_output_layout(reorg_yolo_node const& node, kernel_impl_params const& impl_param); static std::string to_string(reorg_yolo_node const& node); public: diff --git a/src/plugins/intel_gpu/src/graph/include/resample_inst.h b/src/plugins/intel_gpu/src/graph/include/resample_inst.h index a7257f8c6907b9..373ffe7ceaa03a 100644 --- a/src/plugins/intel_gpu/src/graph/include/resample_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/resample_inst.h @@ -35,7 +35,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(resample_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(resample_node const& node, kernel_impl_params const& impl_param); static std::string to_string(resample_node const& node); public: diff --git a/src/plugins/intel_gpu/src/graph/include/reshape_inst.h b/src/plugins/intel_gpu/src/graph/include/reshape_inst.h index 1bbfd94256a50c..2bd2b19531db5f 100644 --- a/src/plugins/intel_gpu/src/graph/include/reshape_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/reshape_inst.h @@ -191,7 +191,6 @@ class typed_primitive_inst : public typed_primitive_inst_base public: template static std::vector calc_output_layouts(reshape_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(reshape_node const& node, kernel_impl_params const& impl_param); static std::string to_string(reshape_node const& node); typed_primitive_inst(network& network, reshape_node const& node); diff --git a/src/plugins/intel_gpu/src/graph/include/reverse_inst.h b/src/plugins/intel_gpu/src/graph/include/reverse_inst.h index fbd870ab196a3a..006866ccbfb43b 100644 --- a/src/plugins/intel_gpu/src/graph/include/reverse_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/reverse_inst.h @@ -22,7 +22,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(reverse_node const& /*node*/, const kernel_impl_params& impl_param) { return forward_input0_shape(impl_param); } - static layout calc_output_layout(reverse_node const& node, kernel_impl_params const& impl_param); static std::string to_string(reverse_node const& node); typed_primitive_inst(network& network, reverse_node const& desc); diff --git a/src/plugins/intel_gpu/src/graph/include/reverse_sequence_inst.h b/src/plugins/intel_gpu/src/graph/include/reverse_sequence_inst.h index c5f8762a4451b8..f247d964c87caa 100644 --- a/src/plugins/intel_gpu/src/graph/include/reverse_sequence_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/reverse_sequence_inst.h @@ -22,7 +22,6 @@ class typed_primitive_inst : public typed_primitive_inst_base< static std::vector calc_output_layouts(reverse_sequence_node const& /*node*/, const kernel_impl_params& impl_param) { return forward_input0_shape(impl_param); } - static layout calc_output_layout(reverse_sequence_node const& node, kernel_impl_params const& impl_param); static std::string to_string(reverse_sequence_node const& node); typed_primitive_inst(network& network, reverse_sequence_node const& desc); diff --git a/src/plugins/intel_gpu/src/graph/include/rms_inst.h b/src/plugins/intel_gpu/src/graph/include/rms_inst.h index a7800249f40421..9f6afceb434daf 100644 --- a/src/plugins/intel_gpu/src/graph/include/rms_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/rms_inst.h @@ -33,7 +33,6 @@ class typed_primitive_inst : public typed_primitive_inst_base { static std::vector calc_output_layouts(rms_node const& /*node*/, const kernel_impl_params& impl_params) { return forward_input0_shape(impl_params); } - static layout calc_output_layout(rms_node const& node, kernel_impl_params const& impl_params); static std::string to_string(rms_node const& node); typed_primitive_inst(network& network, rms_node const& node); diff --git a/src/plugins/intel_gpu/src/graph/include/roi_align_inst.h b/src/plugins/intel_gpu/src/graph/include/roi_align_inst.h index 371ac1b6ab1522..d0a0cc1a0220f4 100644 --- a/src/plugins/intel_gpu/src/graph/include/roi_align_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/roi_align_inst.h @@ -29,7 +29,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(roi_align_node const& node, kernel_impl_params const& impl_param); - static layout calc_output_layout(roi_align_node const& node, kernel_impl_params const& impl_param); static std::string to_string(roi_align_node const& node); public: diff --git a/src/plugins/intel_gpu/src/graph/include/roi_pooling_inst.h b/src/plugins/intel_gpu/src/graph/include/roi_pooling_inst.h index 8c4c2622488ced..1d1b7e0410cb09 100644 --- a/src/plugins/intel_gpu/src/graph/include/roi_pooling_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/roi_pooling_inst.h @@ -29,7 +29,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(roi_pooling_node const& node, kernel_impl_params const& impl_param); - static layout calc_output_layout(roi_pooling_node const& node, kernel_impl_params const& impl_param); static std::string to_string(roi_pooling_node const& node); public: diff --git a/src/plugins/intel_gpu/src/graph/include/roll_inst.hpp b/src/plugins/intel_gpu/src/graph/include/roll_inst.hpp index 0b98d68651256d..47ecc250b46cc0 100644 --- a/src/plugins/intel_gpu/src/graph/include/roll_inst.hpp +++ b/src/plugins/intel_gpu/src/graph/include/roll_inst.hpp @@ -30,7 +30,6 @@ class typed_primitive_inst : public typed_primitive_inst_base { static std::vector calc_output_layouts(roll_node const& /*node*/, const kernel_impl_params& impl_param) { return forward_input0_shape(impl_param); } - static layout calc_output_layout(const roll_node& node, kernel_impl_params const& impl_param); static std::string to_string(const roll_node& node); }; diff --git a/src/plugins/intel_gpu/src/graph/include/scatter_elements_update_inst.h b/src/plugins/intel_gpu/src/graph/include/scatter_elements_update_inst.h index 0e06d900bddcbc..49d54f7d388ea5 100644 --- a/src/plugins/intel_gpu/src/graph/include/scatter_elements_update_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/scatter_elements_update_inst.h @@ -22,7 +22,6 @@ class typed_primitive_inst : public typed_primitive_ins return forward_input0_shape(impl_param); } - static layout calc_output_layout(scatter_elements_update_node const& node, kernel_impl_params const& impl_param); static std::string to_string(scatter_elements_update_node const& node); typed_primitive_inst(network& network, scatter_elements_update_node const& desc); diff --git a/src/plugins/intel_gpu/src/graph/include/scatter_nd_update_inst.h b/src/plugins/intel_gpu/src/graph/include/scatter_nd_update_inst.h index 4718bf367d3b55..3c828aa994390e 100644 --- a/src/plugins/intel_gpu/src/graph/include/scatter_nd_update_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/scatter_nd_update_inst.h @@ -31,7 +31,6 @@ class typed_primitive_inst : public typed_primitive_inst_base public: template static std::vector calc_output_layouts(scatter_nd_update_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(scatter_nd_update_node const& node, kernel_impl_params const& impl_param); static std::string to_string(scatter_nd_update_node const& node); public: diff --git a/src/plugins/intel_gpu/src/graph/include/scatter_update_inst.h b/src/plugins/intel_gpu/src/graph/include/scatter_update_inst.h index 008c99ef070119..640ef528a87369 100644 --- a/src/plugins/intel_gpu/src/graph/include/scatter_update_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/scatter_update_inst.h @@ -33,7 +33,6 @@ class typed_primitive_inst : public typed_primitive_inst_base calc_output_layouts(scatter_update_node const& /*node*/, const kernel_impl_params& impl_param) { return forward_input0_shape(impl_param); } - static layout calc_output_layout(scatter_update_node const& node, kernel_impl_params const& impl_param); static std::string to_string(scatter_update_node const& node); typed_primitive_inst(network& network, scatter_update_node const& desc); diff --git a/src/plugins/intel_gpu/src/graph/include/select_inst.h b/src/plugins/intel_gpu/src/graph/include/select_inst.h index 5c1305cde1e4af..f0795de5192306 100644 --- a/src/plugins/intel_gpu/src/graph/include/select_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/select_inst.h @@ -30,7 +30,6 @@ class typed_primitive_inst { public: template static std::vector calc_output_layouts(const select_node& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(select_node const& node, kernel_impl_params const& impl_param); static std::string to_string(select_node const& node); typed_primitive_inst(network& network, select_node const& node); }; diff --git a/src/plugins/intel_gpu/src/graph/include/shape_of_inst.h b/src/plugins/intel_gpu/src/graph/include/shape_of_inst.h index 7711ee84cf9a96..3c7edcc584673a 100644 --- a/src/plugins/intel_gpu/src/graph/include/shape_of_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/shape_of_inst.h @@ -35,7 +35,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(shape_of_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(shape_of_node const& node, kernel_impl_params const& impl_param); static std::string to_string(shape_of_node const& node); typed_primitive_inst(network& network, shape_of_node const& node); diff --git a/src/plugins/intel_gpu/src/graph/include/shuffle_channels_inst.h b/src/plugins/intel_gpu/src/graph/include/shuffle_channels_inst.h index 6ca6aed36ec85e..e02331f379bb2b 100644 --- a/src/plugins/intel_gpu/src/graph/include/shuffle_channels_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/shuffle_channels_inst.h @@ -23,7 +23,6 @@ class typed_primitive_inst : public typed_primitive_inst_base< return forward_input0_shape(impl_param); } - static layout calc_output_layout(shuffle_channels_node const& node, kernel_impl_params const& impl_param); static std::string to_string(shuffle_channels_node const& node); typed_primitive_inst(network& network, shuffle_channels_node const& desc); diff --git a/src/plugins/intel_gpu/src/graph/include/slice_inst.h b/src/plugins/intel_gpu/src/graph/include/slice_inst.h index 8889cca07eb850..1297b6e7028fe9 100644 --- a/src/plugins/intel_gpu/src/graph/include/slice_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/slice_inst.h @@ -56,8 +56,7 @@ class typed_primitive_inst : public typed_primitive_inst_base { public: template - static std::vector calc_output_layouts(const slice_node& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(slice_node const& node, kernel_impl_params const& impl_param); + static std::vector calc_output_layouts(const slice_node& /*node*/, const kernel_impl_params& impl_param); static std::string to_string(slice_node const& node); typed_primitive_inst(network& network, slice_node const& desc); diff --git a/src/plugins/intel_gpu/src/graph/include/softmax_inst.h b/src/plugins/intel_gpu/src/graph/include/softmax_inst.h index d030dbfca58d72..e0a17fe9732307 100644 --- a/src/plugins/intel_gpu/src/graph/include/softmax_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/softmax_inst.h @@ -31,7 +31,6 @@ class typed_primitive_inst : public typed_primitive_inst_base return forward_input0_shape(impl_param); } - static layout calc_output_layout(softmax_node const& node, kernel_impl_params const& impl_param); static std::string to_string(softmax_node const& node); typed_primitive_inst(network& network, softmax_node const& desc); diff --git a/src/plugins/intel_gpu/src/graph/include/space_to_batch_inst.h b/src/plugins/intel_gpu/src/graph/include/space_to_batch_inst.h index 43c4ba49309032..6e12213dafb3b7 100644 --- a/src/plugins/intel_gpu/src/graph/include/space_to_batch_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/space_to_batch_inst.h @@ -32,7 +32,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(space_to_batch_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(space_to_batch_node const& node, kernel_impl_params const& impl_param); static std::string to_string(space_to_batch_node const& node); bool need_reset_output_memory() const override { diff --git a/src/plugins/intel_gpu/src/graph/include/space_to_depth_inst.h b/src/plugins/intel_gpu/src/graph/include/space_to_depth_inst.h index dbd94dc446ee5b..963d6093e191a3 100644 --- a/src/plugins/intel_gpu/src/graph/include/space_to_depth_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/space_to_depth_inst.h @@ -33,7 +33,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(space_to_depth_node const& node, kernel_impl_params const& impl_param); - static layout calc_output_layout(space_to_depth_node const& node, kernel_impl_params const& impl_param); static std::string to_string(space_to_depth_node const& node); public: diff --git a/src/plugins/intel_gpu/src/graph/include/strided_slice_inst.h b/src/plugins/intel_gpu/src/graph/include/strided_slice_inst.h index 6f7ca39bd5efda..6858032e52d3b1 100644 --- a/src/plugins/intel_gpu/src/graph/include/strided_slice_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/strided_slice_inst.h @@ -36,7 +36,6 @@ class typed_primitive_inst : public typed_primitive_inst_base static std::vector calc_output_layouts(strided_slice_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(strided_slice_node const& node, kernel_impl_params const& impl_param); static std::string to_string(strided_slice_node const& node); typed_primitive_inst(network& network, strided_slice_node const& desc); diff --git a/src/plugins/intel_gpu/src/graph/include/swiglu_inst.h b/src/plugins/intel_gpu/src/graph/include/swiglu_inst.h index 6a5ce08dc54bd2..2413cfa72af86a 100644 --- a/src/plugins/intel_gpu/src/graph/include/swiglu_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/swiglu_inst.h @@ -31,7 +31,6 @@ class typed_primitive_inst : public typed_primitive_inst_base { public: template static std::vector calc_output_layouts(swiglu_node const& /*node*/, const kernel_impl_params& impl_params); - static layout calc_output_layout(swiglu_node const& node, kernel_impl_params const& impl_params); static std::string to_string(swiglu_node const& node); typed_primitive_inst(network& network, swiglu_node const& node); diff --git a/src/plugins/intel_gpu/src/graph/include/tile_inst.h b/src/plugins/intel_gpu/src/graph/include/tile_inst.h index db9a1b87fedd9a..c456802070d5a4 100644 --- a/src/plugins/intel_gpu/src/graph/include/tile_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/tile_inst.h @@ -32,7 +32,6 @@ class typed_primitive_inst : public typed_primitive_inst_base { public: template static std::vector calc_output_layouts(tile_node const& /*node*/, const kernel_impl_params& impl_param); - static layout calc_output_layout(tile_node const& node, kernel_impl_params const& impl_param); static std::string to_string(tile_node const& node); diff --git a/src/plugins/intel_gpu/src/graph/include/unique_inst.hpp b/src/plugins/intel_gpu/src/graph/include/unique_inst.hpp index 6b1faecd5d39fe..21394110563398 100644 --- a/src/plugins/intel_gpu/src/graph/include/unique_inst.hpp +++ b/src/plugins/intel_gpu/src/graph/include/unique_inst.hpp @@ -27,7 +27,6 @@ class typed_primitive_inst : public typed_primitive_inst_base; using parent::parent; - static layout calc_output_layout(const unique_count_node& node, const kernel_impl_params& impl_param); template static std::vector calc_output_layouts(const unique_count_node& node, const kernel_impl_params& impl_param); static std::string to_string(const unique_count_node& node); @@ -57,7 +56,6 @@ class typed_primitive_inst : public typed_primitive_inst_base; using parent::parent; - static layout calc_output_layout(const unique_gather_node& node, const kernel_impl_params& impl_param); template static std::vector calc_output_layouts(const unique_gather_node& node, const kernel_impl_params& impl_param); diff --git a/src/plugins/intel_gpu/src/graph/kv_cache.cpp b/src/plugins/intel_gpu/src/graph/kv_cache.cpp index 95cdd587cdf175..fa158039291670 100644 --- a/src/plugins/intel_gpu/src/graph/kv_cache.cpp +++ b/src/plugins/intel_gpu/src/graph/kv_cache.cpp @@ -20,10 +20,6 @@ kv_cache_inst::typed_primitive_inst(network& network, const kv_cache_node& node) kv_cache_id = network.get_kv_cache_ids().size(); } -layout kv_cache_inst::calc_output_layout(const kv_cache_node& node, kernel_impl_params const& impl_param) { - return impl_param.input_layouts[0]; -} - template std::vector kv_cache_inst::calc_output_layouts(kv_cache_node const& /*node*/, kernel_impl_params const& impl_param) { auto desc = impl_param.typed_desc(); diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index 6439ae570cd8c5..a4de595d5a5af1 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -426,7 +426,6 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next, } bool layout_optimizer::can_fuse_reorder_to_prev(program_node& prev, reorder_node& node, format fmt_prev, format fmt_next) { - bool allow_new_shape_infer = node.get_program().is_new_shape_infer(); // Because mvn and concatenation kernel can work cross-layout, if reorder only performs type conversion, // fusing reorder to the previous node can be done even if it is a dynamic shape case if ((prev.is_type() || prev.is_type() || prev.is_type() || prev.is_type() || @@ -497,8 +496,7 @@ bool layout_optimizer::can_fuse_reorder_to_prev(program_node& prev, reorder_node if (fmt_prev.dimension() > 6 || fmt_next.dimension() > 6) return false; - // Skip reorder fusing to permute when allow_new_shape_infer is True and input and output rank is different - if (allow_new_shape_infer && (fmt_prev.dimension() != fmt_next.dimension())) + if (fmt_prev.dimension() != fmt_next.dimension()) return false; return true; @@ -727,7 +725,7 @@ bool layout_optimizer::should_select_b_fs_yx_fsv16_layout(convolution_node const auto fully_support_conv_num = _optimized_conv_count.at({format::b_fs_yx_fsv16, false}); auto partially_support_conv_num = _optimized_conv_count.at({format::b_fs_yx_fsv16, true}); - auto output_layout = node.calc_output_layout(); + auto output_layout = node.calc_output_layouts()[0]; auto current_conv_supports_layout = convolution_b_fs_yx_fsv16_opt(input_layout, output_layout, weights_layout, prim); auto is_prev_conv_node_supports_layout = node.get_dependency(0).is_type() && @@ -1006,7 +1004,7 @@ bool layout_optimizer::users_for_convolution_byxf_opt(program_node const& node, // convolution that is capable to use byxf and is performant is also valid for byxf opt } else if (user->type() == cldnn::convolution::type_id()) { if (convolution_byxf_opt(node.get_output_layout(), - user->calc_output_layout(), + user->calc_output_layouts()[0], user->get_input_layout(1), user->as())) { if (!users_for_convolution_byxf_opt(*user, depth - 1)) @@ -1751,28 +1749,10 @@ format layout_optimizer::get_preferred_format(program_node& node) { auto output_layout = node.get_output_layout(); bool use_onednn_impls = _optimization_attributes.use_onednn_impls; - bool allow_new_shape_infer = node.get_program().is_new_shape_infer(); - - if (allow_new_shape_infer) { - // Let reorder_input pass to check input format instead of output_format in forward investigation, vice versa - auto out_lay_rank = node.get_output_layout(false).get_rank(); - auto has_reshape_user = [&](const program_node& node) -> bool { - for (auto& user_node : node.get_users()) { - if (user_node->is_type()) - return true; - } - return false; - }; - - // Return default format for output layout rank when user node is reshape - // to add reorder in front of reshape in reorder_input stage instead of handle_reshpae stage. - // It is only applied for the dynamic shape with static input shape - if (!node.is_dynamic() && has_reshape_user(node)) - return format::get_default_format(out_lay_rank); - - if (node.is_type()) - return format::get_default_format(node.get_input_layout(0).get_rank()); + if (node.is_type()) + return format::get_default_format(node.get_input_layout(0).get_rank()); + if (node.is_dynamic()) { auto dep_size = node.get_dependencies().size(); for (size_t i = 0; i < dep_size; i++) { auto in_lay_rank = node.get_input_layout(i).get_rank(); @@ -1780,31 +1760,17 @@ format layout_optimizer::get_preferred_format(program_node& node) { if (std::find(shape_infer_deps.begin(), shape_infer_deps.end(), i) != shape_infer_deps.end()) { auto fmt = format::get_default_format(in_lay_rank, false, false); node.set_preferred_input_fmt(i, fmt); - } else if (in_lay_rank != out_lay_rank) { - auto fmt = get_preferred_format(node.get_dependency(i)); - // Check if selected format can be adjusted to the required input rank - // If no, use default fotmat instead - try { - // 7-dimention and 8-dimention only support plain format - if (in_lay_rank >= 7 || out_lay_rank >= 7) { - fmt = format::get_default_format(in_lay_rank); - } else { - format::adjust_to_rank(fmt, in_lay_rank); - } - } catch (ov::Exception&) { - fmt = format::get_default_format(in_lay_rank); - } - node.set_preferred_input_fmt(i, fmt); } } + } - // shape_infer_dep should be plain format because the memory is being read by ngraph shape infer as is - if (node.is_shape_infer_dep()) { - expected = format::get_default_format(output_layout.get_rank(), false, false); - node.set_preferred_output_fmt(0, expected); - return expected; - } + // shape_infer_dep should be plain format because the memory is being read by ngraph shape infer as is + if (node.is_shape_infer_dep()) { + expected = format::get_default_format(output_layout.get_rank(), false, false); + node.set_preferred_output_fmt(0, expected); + return expected; } + if (!_forcing_map.empty() && _forcing_map.count(node.id()) != 0) { expected = _forcing_map.at(node.id()).first; } else if (node.is_type()) { @@ -1866,19 +1832,10 @@ format layout_optimizer::get_preferred_format(program_node& node) { expected = node.get_preferred_output_fmt(); } if (node.is_type()) { - if (allow_new_shape_infer) { - // Plain input format is enforced because no available shape agnostic kernel supporting blocked format. - // The condition will be relaxed once more shape agnostic kernels for other formats are enabled (e.g., fsv->bfyx FC optimized kernel(i8))) - expected = format::get_default_format(node.get_input_layout(0).get_rank()); - node.set_preferred_input_fmt(0, expected); - } else { - auto& fc_node = node.as(); - auto input_layout = fc_node.get_input_layout(); - if (input_layout.format.dimension() > 4) { - expected = format::bfyx; - node.set_preferred_input_fmt(0, format::bfyx); - } - } + // Plain input format is enforced because no available shape agnostic kernel supporting blocked format. + // The condition will be relaxed once more shape agnostic kernels for other formats are enabled (e.g., fsv->bfyx FC optimized kernel(i8))) + expected = format::get_default_format(node.get_input_layout(0).get_rank()); + node.set_preferred_input_fmt(0, expected); } } else if (node.is_type()) { // Gather needs the original input/output rank because @@ -1893,7 +1850,7 @@ format layout_optimizer::get_preferred_format(program_node& node) { } } - if (allow_new_shape_infer && node.get_preferred_input_fmt() != format::any) { + if (node.get_preferred_input_fmt() != format::any) { if (node.get_preferred_output_fmt() != format::any) expected = node.get_preferred_output_fmt(); node.set_preferred_output_fmt(0, expected); @@ -2088,7 +2045,7 @@ void layout_optimizer::set_optimization_attribute(optimization_attributes_type a bool layout_optimizer::is_format_optimized(const convolution_node& node, const format& format, bool use_weak_restrictions) { auto input_layout = node.get_input_layout(); auto weights_layout = node.weights().get_output_layout(); - auto output_layout = node.calc_output_layout(); + auto output_layout = node.calc_output_layouts()[0]; auto prim = node.get_primitive(); if (input_layout.is_dynamic() || output_layout.is_dynamic()) diff --git a/src/plugins/intel_gpu/src/graph/loop.cpp b/src/plugins/intel_gpu/src/graph/loop.cpp index 51a7f5f0040a8f..ef07279a4afbf9 100644 --- a/src/plugins/intel_gpu/src/graph/loop.cpp +++ b/src/plugins/intel_gpu/src/graph/loop.cpp @@ -42,35 +42,6 @@ std::map loop_node::get_memory_deps() const { return memory_deps; } -layout loop_inst::calc_output_layout(loop_node const& /*node*/, kernel_impl_params const& impl_param) { - auto prim = impl_param.typed_desc(); - - // finds internal output - const auto& output_primitive_maps = prim->output_primitive_maps; - const auto& output_mapping = output_primitive_maps.front(); - - const auto& body_program = impl_param.inner_progs.front(); - const auto& body_outputs = body_program->get_outputs(); - - const primitive_id& output_internal_id = output_mapping.internal_id.pid; - auto target = std::find_if(body_outputs.begin(), body_outputs.end(), [&](const cldnn::program_node * output) { - return output->id() == output_internal_id; - }); - OPENVINO_ASSERT(target != body_outputs.end(), impl_param.desc->id, "output not found"); - - // set body output layout - layout loop_output_layout = (*target)->get_output_layout(); - const int64_t axis_to_iterate_through = output_mapping.axis; - if (axis_to_iterate_through != -1) { - const size_t ndim = loop_output_layout.get_rank(); - auto shape = loop_output_layout.get_dims(); - shape[axis_to_iterate_through] = static_cast(prim->max_num_iterations); - loop_output_layout.set_tensor(tensor(format::get_default_format(ndim), shape)); - } - - return loop_output_layout; -} - template static std::vector get_output_layouts(kernel_impl_params const& impl_param, std::vector body_outputs, const int64_t num_iterations = -1) { auto prim = impl_param.typed_desc(); @@ -782,19 +753,13 @@ void loop_inst::update_output_layout() { _impl_params->memory_deps = memory_deps; auto new_layouts = _node->type()->calc_output_layouts(*_node, *_impl_params); - if (new_layouts.empty()) { - auto new_layout = _node->type()->calc_output_layout(*_node, *_impl_params); - new_layout.data_padding = padding::max(_node->get_primitive()->get_output_padding(0), new_layout.data_padding); - _impl_params->output_layouts[0] = new_layout; - } else { - if (_impl_params->output_layouts.size() < new_layouts.size()) { - _impl_params->output_layouts.resize(new_layouts.size()); - } - for (size_t i = 0; i < new_layouts.size(); ++i) { - auto new_layout = new_layouts[i]; - new_layout.data_padding = padding::max(_node->get_primitive()->get_output_padding(i), new_layout.data_padding); - _impl_params->output_layouts[i] = new_layout; - } + if (_impl_params->output_layouts.size() < new_layouts.size()) { + _impl_params->output_layouts.resize(new_layouts.size()); + } + for (size_t i = 0; i < new_layouts.size(); ++i) { + auto new_layout = new_layouts[i]; + new_layout.data_padding = padding::max(_node->get_primitive()->get_output_padding(i), new_layout.data_padding); + _impl_params->output_layouts[i] = new_layout; } } diff --git a/src/plugins/intel_gpu/src/graph/lrn.cpp b/src/plugins/intel_gpu/src/graph/lrn.cpp index 3ece107b2d9de6..7e4b7e4dc06330 100644 --- a/src/plugins/intel_gpu/src/graph/lrn.cpp +++ b/src/plugins/intel_gpu/src/graph/lrn.cpp @@ -11,22 +11,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(lrn) -layout lrn_inst::calc_output_layout(lrn_node const& node, kernel_impl_params const& impl_param) { - assert(static_cast(impl_param.desc->output_data_types[0]) == false && - "Output data type forcing is not supported for lrn_node!"); - auto input_layout = impl_param.get_input_layout(); - auto output_type = input_layout.data_type; - - if (impl_param.has_fused_primitives()) { - output_type = impl_param.get_output_element_type(); - } - - auto result = impl_param.get_non_padded_input_layout(); - result.data_type = output_type; - - return result; -} - std::string lrn_inst::to_string(lrn_node const& node) { auto node_info = node.desc_to_json(); auto desc = node.get_primitive(); diff --git a/src/plugins/intel_gpu/src/graph/lstm_elt.cpp b/src/plugins/intel_gpu/src/graph/lstm_elt.cpp index 098e89aa45003e..aa8e01f9acf64e 100644 --- a/src/plugins/intel_gpu/src/graph/lstm_elt.cpp +++ b/src/plugins/intel_gpu/src/graph/lstm_elt.cpp @@ -10,23 +10,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(lstm_elt) -layout lstm_elt_inst::calc_output_layout(lstm_elt_node const& node, kernel_impl_params const& impl_param) { - assert(static_cast(impl_param.desc->output_data_types[0]) == false && - "Output data type forcing is not supported for lstm_elt_node!"); - auto input_layout = impl_param.get_input_layout(); - - // tempGEMM{bfyx} = [b: batch, f: direction, x: 1, y: 4 * hidden_size ] input - // cell{bfyx} = [b: batch, f: direction, x: 1, y: hidden_size ] optional - // output{bfyx} = [b: batch, f: 2, x: direction, y: hidden_size ] output - // The output of the lstm_elt node is the concatenation of the intermediate [hidden, cell] tensors. - // A crop/split node is needed to extract each individual tensors - auto result = - layout(input_layout.data_type, - input_layout.format, - tensor(input_layout.batch(), 2, input_layout.spatial(0) / 4, input_layout.feature())); - return result; -} - template std::vector lstm_elt_inst::calc_output_layouts(lstm_elt_node const& node, kernel_impl_params const& impl_param) { std::vector output_layouts; diff --git a/src/plugins/intel_gpu/src/graph/matrix_nms.cpp b/src/plugins/intel_gpu/src/graph/matrix_nms.cpp index cd7462633c89a8..c03a1c277f0dcb 100644 --- a/src/plugins/intel_gpu/src/graph/matrix_nms.cpp +++ b/src/plugins/intel_gpu/src/graph/matrix_nms.cpp @@ -1,6 +1,7 @@ // Copyright (C) 2018-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 +#include "intel_gpu/primitives/matrix_nms.hpp" #include #include @@ -61,33 +62,6 @@ std::vector matrix_nms_inst::calc_output_layouts(matrix_nms_node const& template std::vector matrix_nms_inst::calc_output_layouts(matrix_nms_node const& node, const kernel_impl_params& impl_param); -layout matrix_nms_inst::calc_output_layout(const matrix_nms_node& node, const kernel_impl_params& impl_param) { - const auto primitive = impl_param.typed_desc(); - const auto boxes_layout = impl_param.get_input_layout(0); - const auto scores_layout = impl_param.get_input_layout(1); - - const auto batches_num = boxes_layout.batch(); - auto classes_num = scores_layout.feature(); - const auto boxes_num = boxes_layout.feature(); - - if (primitive->attribs.background_class >= 0 && primitive->attribs.background_class < classes_num) - classes_num = std::max(1, classes_num - 1); - - int max_output_boxes_per_class{boxes_num}; - if (primitive->attribs.nms_top_k >= 0) - max_output_boxes_per_class = std::min(max_output_boxes_per_class, primitive->attribs.nms_top_k); - - auto max_output_boxes_per_batch = max_output_boxes_per_class * classes_num; - if (primitive->attribs.keep_top_k >= 0) - max_output_boxes_per_batch = std::min(max_output_boxes_per_batch, primitive->attribs.keep_top_k); - - auto output_num = max_output_boxes_per_batch * batches_num; - - // BOX_DATA: class_id, box_score, xmin, ymin, xmax, ymax - constexpr size_t BOX_DATA{6}; - return layout(boxes_layout.data_type, boxes_layout.format, {output_num, BOX_DATA, 1, 1}); -} - std::string matrix_nms_inst::to_string(const matrix_nms_node& node) { json_composite matrix_nms_info; matrix_nms_info.add("boxes id", node.input().id()); diff --git a/src/plugins/intel_gpu/src/graph/multiclass_nms.cpp b/src/plugins/intel_gpu/src/graph/multiclass_nms.cpp index 736f64d1b2591c..c0414a9b8ef5d2 100644 --- a/src/plugins/intel_gpu/src/graph/multiclass_nms.cpp +++ b/src/plugins/intel_gpu/src/graph/multiclass_nms.cpp @@ -2,6 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "openvino/op/multiclass_nms.hpp" +#include "multiclass_nms_shape_inference.hpp" #include #include "json_object.h" @@ -41,39 +43,6 @@ std::vector multiclass_nms_inst::calc_output_layouts(multiclass_nms_node template std::vector multiclass_nms_inst::calc_output_layouts(multiclass_nms_node const& node, const kernel_impl_params& impl_param); - -layout multiclass_nms_inst::calc_output_layout(const multiclass_nms_node& node, const kernel_impl_params& impl_param) { - const auto input_layout = impl_param.get_input_layout(); - const auto attrs = impl_param.typed_desc()->attrs; - - const auto num_batches = - node.has_roisnum() ? node.roisnum().get_output_layout().batch() : node.scores().get_output_layout().batch(); - auto num_classes = - node.has_roisnum() ? node.boxes().get_output_layout().batch() : node.scores().get_output_layout().feature(); - const auto num_boxes = node.boxes().get_output_layout().feature(); - - // see shape_infer() call in MulticlassNmsIEInternal::validate_and_infer_types() - ignore_bg_class == true - if (attrs.background_class >= 0 && attrs.background_class < num_classes) { - num_classes = std::max(1, num_classes - 1); - } - - int max_output_boxes_per_class = 0; - if (attrs.nms_top_k >= 0) { - max_output_boxes_per_class = std::min(num_boxes, attrs.nms_top_k); - } else { - max_output_boxes_per_class = num_boxes; - } - - auto max_output_boxes_per_batch = max_output_boxes_per_class * num_classes; - if (attrs.keep_top_k >= 0) { - max_output_boxes_per_batch = std::min(max_output_boxes_per_batch, attrs.keep_top_k); - } - - const auto dim = max_output_boxes_per_batch * num_batches; - constexpr auto output_size = 6; // 4 coordinates + 1 class + 1 score - return layout{input_layout.data_type, input_layout.format, {dim, output_size, 1, 1}}; -} - std::string multiclass_nms_inst::to_string(const multiclass_nms_node& node) { const auto attrs = node.get_primitive()->attrs; std::stringstream primitive_description; diff --git a/src/plugins/intel_gpu/src/graph/multinomial.cpp b/src/plugins/intel_gpu/src/graph/multinomial.cpp index fb33bc77844e35..eb4724c7478da5 100644 --- a/src/plugins/intel_gpu/src/graph/multinomial.cpp +++ b/src/plugins/intel_gpu/src/graph/multinomial.cpp @@ -23,23 +23,6 @@ std::vector multinomial_inst::calc_output_layouts(multinomial_node const template std::vector multinomial_inst::calc_output_layouts(multinomial_node const& node, const kernel_impl_params& impl_param); -layout multinomial_inst::calc_output_layout(multinomial_node const& node, kernel_impl_params const& impl_param) { - auto primitive = impl_param.typed_desc(); - auto input_layout = impl_param.get_input_layout(0); - if (input_layout.get_shape().size() == 1) { - return {primitive->output_data_type, input_layout.format, - tensor{std::vector{ - static_cast(primitive->num_samples) - }}}; - } else { - return {primitive->output_data_type, input_layout.format, - tensor{std::vector{ - input_layout.batch(), - static_cast(primitive->num_samples) - }}}; - } -} - multinomial_inst::typed_primitive_inst(network& network, multinomial_node const& node) : parent{network, node} {} diff --git a/src/plugins/intel_gpu/src/graph/mvn.cpp b/src/plugins/intel_gpu/src/graph/mvn.cpp index 3f7dfb8663b37c..ce92b617630264 100644 --- a/src/plugins/intel_gpu/src/graph/mvn.cpp +++ b/src/plugins/intel_gpu/src/graph/mvn.cpp @@ -10,19 +10,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(mvn) -layout mvn_inst::calc_output_layout(mvn_node const& node, kernel_impl_params const& impl_param) { - auto input_node_layout = impl_param.get_non_padded_input_layout(); - auto output_type = impl_param.desc->output_data_types[0].value_or(input_node_layout.data_type); - - if (impl_param.has_fused_primitives()) { - output_type = impl_param.get_output_element_type(); - } else if (input_node_layout.data_type == data_types::u8 || input_node_layout.data_type == data_types::i8) { - output_type = data_types::f32; - } - - return layout(output_type, input_node_layout.format, input_node_layout.get_tensor()); -} - std::string mvn_inst::to_string(mvn_node const& node) { auto node_info = node.desc_to_json(); auto desc = node.get_primitive(); diff --git a/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp index 00fdc5dc6db31e..d77c1a0450ba79 100644 --- a/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp +++ b/src/plugins/intel_gpu/src/graph/non_max_suppression.cpp @@ -1,7 +1,9 @@ // Copyright (C) 2018-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/runtime/tensor_accessor.hpp" #include "non_max_suppression_inst.h" +#include "openvino/op/nms_rotated.hpp" #include "primitive_type_base.h" #include "json_object.h" #include @@ -17,15 +19,6 @@ namespace cldnn { // ----------------------------------------------- GPU_DEFINE_PRIMITIVE_TYPE_ID(non_max_suppression) -layout non_max_suppression_inst::calc_output_layout(non_max_suppression_node const& node, kernel_impl_params const& impl_param) { - auto desc = impl_param.typed_desc(); - - auto output_type = desc->output_data_types[0].value_or(data_types::i32); - - auto output_size = tensor(batch(desc->selected_indices_num), feature(3)); - return layout(output_type, impl_param.get_input_layout().format, output_size); -} - template std::vector non_max_suppression_inst::calc_output_layouts(non_max_suppression_node const& /*node*/, const kernel_impl_params& impl_param) { std::vector layouts; @@ -148,7 +141,7 @@ void non_max_suppression_gather_inst::update_output_memory() { return; for (size_t i = 0; i < inputs_memory_count(); i++) { - if (node->get_program().is_new_shape_infer() && input_memory_ptr(i) == nullptr) + if (input_memory_ptr(i) == nullptr) return; if (output_memory_ptr(i) != nullptr && _network.get_engine().is_the_same_buffer(output_memory(i), input_memory(i))) diff --git a/src/plugins/intel_gpu/src/graph/non_zero.cpp b/src/plugins/intel_gpu/src/graph/non_zero.cpp index 9bf1fd4ff32b73..429eaf68acba96 100644 --- a/src/plugins/intel_gpu/src/graph/non_zero.cpp +++ b/src/plugins/intel_gpu/src/graph/non_zero.cpp @@ -16,12 +16,6 @@ namespace cldnn { // ----------------------------------------------- GPU_DEFINE_PRIMITIVE_TYPE_ID(count_nonzero) -layout count_nonzero_inst::calc_output_layout(count_nonzero_node const& node, kernel_impl_params const& impl_param) { - assert(static_cast(node.get_primitive()->output_data_types[0]) == false && - "Output data type forcing is not supported for count_nonzero_node!"); - return layout{cldnn::data_types::i32, cldnn::format::bfyx, tensor{1, 1, 1, 1}}; -} - template std::vector count_nonzero_inst::calc_output_layouts(count_nonzero_node const& /*node*/, kernel_impl_params const& impl_param) { assert(static_cast(impl_param.desc->output_data_types[0]) == false && @@ -53,18 +47,6 @@ count_nonzero_inst::typed_primitive_inst(network& network, count_nonzero_node co // ----------------------------------------------- GPU_DEFINE_PRIMITIVE_TYPE_ID(gather_nonzero) -layout gather_nonzero_inst::calc_output_layout(gather_nonzero_node const& node, kernel_impl_params const& impl_param) { - assert(static_cast(node.get_primitive()->output_data_types[0]) == false && - "Output data type forcing is not supported for gather_nonzero_node!"); - auto rank = impl_param.get_input_layout(0).get_partial_shape().rank().get_length(); - if (impl_param.memory_deps.count(1)) { - auto out_size = read_vector(impl_param.memory_deps.at(1), impl_param.get_stream()); - return layout{{rank, out_size[0], 1, 1}, cldnn::data_types::i32, cldnn::format::bfyx}; - } else { - return layout{ov::PartialShape({ov::Dimension(rank), ov::Dimension::dynamic(), 1, 1}), cldnn::data_types::i32, cldnn::format::bfyx}; - } -} - template std::vector gather_nonzero_inst::calc_output_layouts(gather_nonzero_node const& /*node*/, kernel_impl_params const& impl_param) { auto desc = impl_param.typed_desc(); diff --git a/src/plugins/intel_gpu/src/graph/normalize.cpp b/src/plugins/intel_gpu/src/graph/normalize.cpp index 02d2524f23dba3..7c20972562b277 100644 --- a/src/plugins/intel_gpu/src/graph/normalize.cpp +++ b/src/plugins/intel_gpu/src/graph/normalize.cpp @@ -11,21 +11,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(normalize) -layout normalize_inst::calc_output_layout(normalize_node const& node, kernel_impl_params const& impl_param) { - assert(static_cast(impl_param.desc->output_data_types[0]) == false && - "Output data type forcing is not supported for normalize_node!"); - auto input_node_layout = impl_param.get_non_padded_input_layout(); - auto output_type = input_node_layout.data_type; - - if (impl_param.has_fused_primitives()) { - output_type = impl_param.get_output_element_type(); - } else if (input_node_layout.data_type == data_types::u8 || input_node_layout.data_type == data_types::i8) { - output_type = data_types::f32; - } - - return layout(output_type, input_node_layout.format, input_node_layout.get_tensor()); -} - std::string normalize_inst::to_string(normalize_node const& node) { auto node_info = node.desc_to_json(); auto desc = node.get_primitive(); diff --git a/src/plugins/intel_gpu/src/graph/one_hot.cpp b/src/plugins/intel_gpu/src/graph/one_hot.cpp index ede34e8e0935b0..fb3a17b675e7f7 100644 --- a/src/plugins/intel_gpu/src/graph/one_hot.cpp +++ b/src/plugins/intel_gpu/src/graph/one_hot.cpp @@ -4,7 +4,6 @@ #include "one_hot_inst.h" -#include "intel_gpu/runtime/error_handler.hpp" #include "json_object.h" #include "primitive_type_base.h" #include @@ -15,35 +14,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(one_hot) -static bool is_output_bfzyx(const layout& input, int32_t axis) { - if (input.format == format::bfzyx) - return true; - if (axis == 4) - return true; - auto in_dims = input.get_tensor().sizes(format::bfyx); - if (in_dims[3] != 1) - return true; - return false; -} - -layout one_hot_inst::calc_output_layout(one_hot_node const& node, kernel_impl_params const& impl_param) { - auto input_layout = impl_param.get_input_layout(); - auto desc = impl_param.typed_desc(); - - auto dt = desc->output_data_types[0].value_or(input_layout.data_type); - auto format = input_layout.format; - - if (desc->one_hot_axis > 4) { - CLDNN_ERROR_MESSAGE(desc->id, - "Incorrect parameters configuration: one_hot_axis should be less or equal to 4."); - } - - if (is_output_bfzyx(input_layout, desc->one_hot_axis)) - format = format::bfzyx; - - return {dt, format, desc->shape}; -} - template std::vector one_hot_inst::calc_output_layouts(const one_hot_node& /*node*/, const kernel_impl_params& impl_param) { auto desc = impl_param.typed_desc(); @@ -81,7 +51,6 @@ template std::vector one_hot_inst::calc_output_layouts std::string one_hot_inst::to_string(one_hot_node const& node) { auto desc = node.get_primitive(); auto node_info = node.desc_to_json(); - const auto& shape = desc->shape; const auto& one_hot_axis = desc->one_hot_axis; auto& input = node.input(); @@ -89,7 +58,6 @@ std::string one_hot_inst::to_string(one_hot_node const& node) { json_composite one_hot_info; one_hot_info.add("input id", input.id()); - one_hot_info.add("output shape", shape.to_string()); one_hot_info.add("one-hot axis", one_hot_axis); node_info->add("one_hot info", one_hot_info); @@ -98,36 +66,5 @@ std::string one_hot_inst::to_string(one_hot_node const& node) { return primitive_description.str(); } -one_hot_inst::typed_primitive_inst(network& network, one_hot_node const& node) : parent(network, node) { - auto input_layout = node.get_input_layout(); - - if (input_layout.is_dynamic()) - return; - - const auto& input_sizes = input_layout.get_tensor(); - const auto& output_sizes = argument->shape; - - std::vector input_dims = {input_sizes.batch[0], - input_sizes.feature[0], - input_sizes.spatial[1], - input_sizes.spatial[0]}; - std::vector output_dims = {output_sizes.batch[0], - output_sizes.feature[0], - output_sizes.spatial[1], - output_sizes.spatial[0]}; - - if (is_output_bfzyx(input_layout, node.get_primitive()->one_hot_axis)) { - output_dims.insert(output_dims.begin() + 2, output_sizes.spatial[2]); - } - - const auto& one_hot_axis = node.get_primitive()->one_hot_axis; - - for (int64_t i = 0, j = 0; j < static_cast(output_dims.size()) - 1; ++i, ++j) { - if (j == one_hot_axis) - ++j; - if (input_dims[i] != output_dims[j]) { - CLDNN_ERROR_MESSAGE(node.id(), "Incorrect parameters configuration: shape does not fit input size."); - } - } -} +one_hot_inst::typed_primitive_inst(network& network, one_hot_node const& node) : parent(network, node) { } } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/permute.cpp b/src/plugins/intel_gpu/src/graph/permute.cpp index d73e6052fd5ecb..8b46fb2878a1dd 100644 --- a/src/plugins/intel_gpu/src/graph/permute.cpp +++ b/src/plugins/intel_gpu/src/graph/permute.cpp @@ -13,39 +13,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(permute) -layout permute_inst::calc_output_layout(permute_node const& node, kernel_impl_params const& impl_param) { - auto desc = impl_param.typed_desc(); - auto input_layout = impl_param.get_input_layout(); - auto permute_order = desc->permute_order; - std::vector output_shape; - - auto input_shape = input_layout.get_dims(); - - for (size_t x = 0; x < permute_order.size(); x++) { - output_shape.push_back(input_shape[permute_order[x]]); - } - - for (size_t i = output_shape.size(); i < 4; i++) { - output_shape.push_back(1); - } - - auto output_size = tensor(format::get_default_format(input_layout.get_rank()), output_shape); - auto op = desc->output_paddings[0]; - - auto output_type = desc->output_data_types[0].value_or(input_layout.data_type); - if (impl_param.has_fused_primitives()) { - output_type = impl_param.get_output_element_type(); - } - - // Adjust output format for optimizing out of transpose related to acdb format. - auto out_fmt = input_layout.format; - if (node.get_preferred_output_fmt() != format::any) { - out_fmt = node.get_preferred_output_fmt(); - } - - return layout(output_type, out_fmt, output_size, op); -} - template std::vector permute_inst::calc_output_layouts(permute_node const& node, kernel_impl_params const& impl_param) { auto desc = impl_param.typed_desc(); diff --git a/src/plugins/intel_gpu/src/graph/pooling.cpp b/src/plugins/intel_gpu/src/graph/pooling.cpp index 671a5a412f1067..bbf19835c3edf8 100644 --- a/src/plugins/intel_gpu/src/graph/pooling.cpp +++ b/src/plugins/intel_gpu/src/graph/pooling.cpp @@ -17,133 +17,6 @@ using namespace ov::intel_gpu; namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(pooling) -layout pooling_inst::calc_output_layout(parent::typed_node const& node, kernel_impl_params const& impl_param) { - auto desc = impl_param.typed_desc(); - - auto input_layout = impl_param.get_input_layout(); - - auto pad = desc->pads_begin; - auto stride = desc->stride; - auto window_size = desc->size; - - // auto output_type = node.get_primitive()->output_data_type ? *node.get_primitive()->output_data_type : input_layout.data_type; - // FIXME: dirty hack. Replace it with optional output data type (above) once OV returns correct precision on edges - auto output_type = input_layout.data_type; - - if (output_type == data_types::u8 || output_type == data_types::i8) { - if (desc->mode == pooling_mode::average_no_padding || desc->mode == pooling_mode::average) { - output_type = data_types::f32; - } - } - - if (impl_param.has_fused_primitives()) { - output_type = impl_param.get_output_element_type(); - - // pooling doesn't support i32 data type - // FIXME: Someday delete this, when pooling supports i32 output. - if (desc->mode == pooling_mode::max && output_type == data_types::i32) { - output_type = data_types::f32; - } - } - - auto stride_z = stride.size() >= 3 ? stride[stride.size() - 3] : 1; - auto stride_y = stride.size() >= 2 ? stride[stride.size() - 2] : 1; - auto stride_x = stride.size() >= 1 ? stride[stride.size() - 1] : 1; - - auto kernel_z = window_size.size() >= 3 ? window_size[window_size.size() - 3] : 1; - auto kernel_y = window_size.size() >= 2 ? window_size[window_size.size() - 2] : 1; - auto kernel_x = window_size.size() >= 1 ? window_size[window_size.size() - 1] : 1; - - // TODO: Consider moving general parameter verification to arguments constructor. - CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id, - "stride spatial X", - stride_x, - "", - 0, - "Stride spatial X must be positive (>= 1)"); - CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id, - "stride spatial Y", - stride_y, - "", - 0, - "Stride spatial Y must be positive (>= 1)"); - CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id, - "window size spatial X", - kernel_x, - "", - 0, - "Size X (of pooling window) must be positive (>= 1)"); - CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id, - "window size spatial Y", - kernel_y, - "", - 0, - "Size Y (of pooling window) must be positive (>= 1)"); - if (input_layout.format.spatial_num() == 3) { - // 3D - CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id, - "stride spatial Z", - stride_z, - "", - 0, - "Stride spatial Z must be positive (>= 1)"); - CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id, - "window size spatial Z", - kernel_z, - "", - 0, - "Size Z (of pooling window) must be positive (>= 1)"); - } - - if (desc->with_output_size) { - CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id, - "User-defined size of output X", - desc->output_size.spatial[0], - "", - 0, - "User-defined size of output layout (spatial X) must be positive (>= 1)"); - CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id, - "User-defined size of output Y", - desc->output_size.spatial[1], - "", - 0, - "User-defined size of output layout (spatial Y) must be positive (>= 1)"); - CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id, - "User-defined size of output Z", - desc->output_size.spatial[2], - "", - 0, - "User-defined size of output layout (spatial Z) must be positive (>= 1)"); - - tensor output_size(input_layout.batch(), - input_layout.feature(), - desc->output_size.spatial[0], - desc->output_size.spatial[1], - desc->output_size.spatial[2]); - return {output_type, input_layout.format, output_size}; - } - - // TODO: Check compatibility of output size calculation (with caffe). - tensor size(1); - for (size_t i = 0; i < window_size.size(); i++) { - size.spatial[i] = static_cast(window_size[window_size.size() - i - 1]); - } - auto output_range = calc_sliding_window_output_range(input_layout.get_tensor(), - size, - ov::CoordinateDiff(pad.begin(), pad.end()), - stride, - ov::Strides(window_size.size(), 1), - true, - 1); - - tensor output_size(input_layout.batch(), - input_layout.feature(), - output_range.spatial[0], - output_range.spatial[1], - output_range.spatial[2]); - return {output_type, input_layout.format, output_size}; -} - template std::vector pooling_inst::calc_output_layouts(pooling_node const& /*node*/, const kernel_impl_params& impl_param) { auto desc = impl_param.typed_desc(); @@ -184,34 +57,6 @@ std::vector pooling_inst::calc_output_layouts(pooling_node const& /*node return out_layouts; } - if (desc->with_output_size) { - CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id, - "User-defined size of output X", - desc->output_size.spatial[0], - "", - 0, - "User-defined size of output layout (spatial X) must be positive (>= 1)"); - CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id, - "User-defined size of output Y", - desc->output_size.spatial[1], - "", - 0, - "User-defined size of output layout (spatial Y) must be positive (>= 1)"); - CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id, - "User-defined size of output Z", - desc->output_size.spatial[2], - "", - 0, - "User-defined size of output layout (spatial Z) must be positive (>= 1)"); - - tensor output_size(input_layout.batch(), - input_layout.feature(), - desc->output_size.spatial[0], - desc->output_size.spatial[1], - desc->output_size.spatial[2]); - return {{output_dtype, input_layout.format, output_size}}; - } - auto kernel_size = desc->size; auto stride = desc->stride; auto dilation = desc->dilation.empty() ? ov::Strides(stride.size(), 1) @@ -268,12 +113,6 @@ std::string pooling_inst::to_string(pooling_node const& node) { pooling_info.add("mode", mode); pooling_info.add("stride", cldnn::to_string(strd)); pooling_info.add("kernel size", cldnn::to_string(kernel_size)); - if (desc->with_output_size) { - json_composite ud_out_size_info; - ud_out_size_info.add("size", desc->output_size.to_string()); - pooling_info.add("with_user_defined_output_size", ud_out_size_info); - } - node_info->add("pooling info", pooling_info); node_info->dump(primitive_description); diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index ad1541177b7dd6..2a19fecfce8687 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -2344,7 +2344,6 @@ cldnn::network::ptr primitive_inst::get_unfused_subgraph() { } ExecutionConfig subgraph_config{ ov::intel_gpu::allow_static_input_reorder(true), - ov::intel_gpu::allow_new_shape_infer(true), ov::enable_profiling(get_network().get_config().get_property(ov::enable_profiling)) }; auto prog = program::build_program(get_network().get_engine(), diff --git a/src/plugins/intel_gpu/src/graph/prior_box.cpp b/src/plugins/intel_gpu/src/graph/prior_box.cpp index 44634aea0d2b25..d142c2d70d2870 100644 --- a/src/plugins/intel_gpu/src/graph/prior_box.cpp +++ b/src/plugins/intel_gpu/src/graph/prior_box.cpp @@ -225,59 +225,6 @@ std::string vector_to_string(const std::vector& vec) { return result.str(); } -std::vector normalized_aspect_ratio(const std::vector& aspect_ratio, bool flip) { - std::set unique_ratios; - for (auto ratio : aspect_ratio) { - unique_ratios.insert(std::round(ratio * 1e6) / 1e6); - if (flip) - unique_ratios.insert(std::round(1 / ratio * 1e6) / 1e6); - } - unique_ratios.insert(1); - return std::vector(unique_ratios.begin(), unique_ratios.end()); -} - -int64_t number_of_priors(const std::vector& aspect_ratio, - const std::vector& min_size, - const std::vector& max_size, - const std::vector& fixed_size, - const std::vector& fixed_ratio, - const std::vector& densities, - bool scale_all_sizes, - bool flip) { - // Starting with 0 number of prior and then various conditions on attributes will contribute - // real number of prior boxes as PriorBox is a fat thing with several modes of - // operation that will be checked in order in the next statements. - int64_t num_priors = 0; - - // Total number of boxes around each point; depends on whether flipped boxes are included - // plus one box 1x1. - int64_t total_aspect_ratios = normalized_aspect_ratio(aspect_ratio, flip).size(); - - if (scale_all_sizes) { - num_priors = total_aspect_ratios * min_size.size() + max_size.size(); - } else { - num_priors = total_aspect_ratios + min_size.size() - 1; - } - - if (!fixed_size.empty()) { - num_priors = total_aspect_ratios * fixed_size.size(); - } - - for (auto density : densities) { - auto rounded_density = static_cast(density); - auto density_2d = (rounded_density * rounded_density - 1); - if (!fixed_ratio.empty()) { - num_priors += fixed_ratio.size() * density_2d; - } else { - num_priors += total_aspect_ratios * density_2d; - } - } - return num_priors; -} - -tensor get_output_shape(int32_t height, int32_t width, int32_t number_of_priors) { - return tensor{std::vector{2, 4 * height * width * number_of_priors}}; -} } // namespace void prior_box_node::calc_result() { @@ -412,25 +359,6 @@ void prior_box_node::calc_result() { *typed_desc()); } -layout prior_box_inst::calc_output_layout(prior_box_node const& node, kernel_impl_params const& impl_param) { - const auto primitive = impl_param.typed_desc(); - auto number = number_of_priors(primitive->aspect_ratios, - primitive->min_sizes, - primitive->max_sizes, - primitive->fixed_size, - primitive->fixed_ratio, - primitive->density, - primitive->scale_all_sizes, - primitive->flip); - if (primitive->is_clustered()) { - number = primitive->widths.size(); - } - const auto output_type = primitive->output_data_types[0].value_or(data_types::f32); - const auto output_shape = get_output_shape(primitive->output_size.spatial[1], primitive->output_size.spatial[0], number); - - return {output_type, impl_param.get_input_layout().format, output_shape}; -} - template std::vector prior_box_inst::calc_output_layouts(prior_box_node const& /*node*/, kernel_impl_params const& impl_param) { const auto primitive = impl_param.typed_desc(); diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 55b87fea9fe298..d8db65987dfc07 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/primitives/non_max_suppression.hpp" #include "openvino/runtime/system_conf.hpp" #include "openvino/runtime/threading/cpu_streams_info.hpp" @@ -209,7 +210,6 @@ program::program(engine& engine, const ExecutionConfig& config) processing_order() { init_primitives(); _config.apply_user_properties(_engine.get_device_info()); - new_shape_infer = _config.get_property(ov::intel_gpu::allow_new_shape_infer); _layout_optimizer = cldnn::make_unique(); } @@ -221,7 +221,6 @@ void program::init_program() { set_options(); pm = std::unique_ptr(new pass_manager(*this)); - new_shape_infer = _config.get_property(ov::intel_gpu::allow_new_shape_infer); if (_task_executor == nullptr) _task_executor = program::make_task_executor(_config); @@ -355,34 +354,6 @@ bool program::analyze_output_size_handling_need() { true, 1); - if (specified_output_range != calc_output_range) - handling_needed = true; - } else if (node->is_type()) { - auto& prim_node = node->as(); - const auto& prim = prim_node.get_primitive(); - - if (!prim->with_output_size) - continue; - - tensor specified_output_range( - {0, 0, prim->output_size.spatial[0], prim->output_size.spatial[1], prim->output_size.spatial[2]}, - 1); - - tensor size(1); - for (size_t i = 0; i < prim->size.size(); i++) { - size.spatial[i] = static_cast(prim->size[prim->size.size() - i - 1]); - } - // TODO: Check compatibility of output size calculation (with caffe). - auto primInputSize = prim_node.get_input_layout().get_tensor(); - auto calc_output_range = calc_sliding_window_output_range( - primInputSize, - size, - ov::CoordinateDiff(prim->pads_begin.begin(), prim->pads_begin.end()), - prim->stride, - ov::Strides(prim->stride.size(), 1), - true, - 1); - if (specified_output_range != calc_output_range) handling_needed = true; } @@ -656,7 +627,11 @@ void program::post_optimize_graph(bool is_internal) { // mark if the node is constant assuming that all dependencies are marked properly void program::mark_if_constant(program_node& node) { - if (node.get_dependencies().empty() || node.is_type() || node.is_type() || node.is_type()) { + if (node.get_dependencies().empty() || + node.is_type() || + node.is_type() || + node.is_type() || + node.is_type() /* WA: constant folding works incorrectly for NMS */) { return; } node.constant = true; diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index 3c21800c66d938..76422d4749ac8d 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -367,31 +367,10 @@ bool program_node::is_detached(bool whole_branch) { return true; } -layout program_node::calc_output_layout() const { - bool allow_new_shape_infer = get_program().is_new_shape_infer(); - if (allow_new_shape_infer) { - auto out_layouts = type()->calc_output_layouts(*this, *get_kernel_impl_params()); - if (!out_layouts.empty()) { - GPU_DEBUG_TRACE_DETAIL << id() << ": calc_output_layout(new):" << out_layouts[0] << std::endl; - return out_layouts[0]; - } - } - - auto res = type()->calc_output_layout(*this, *get_kernel_impl_params()); - GPU_DEBUG_TRACE_DETAIL << id() << ": calc_output_layout:" << res << std::endl; - - return res; -} - std::vector program_node::calc_output_layouts() const { - bool allow_new_shape_infer = get_program().is_new_shape_infer(); - if (allow_new_shape_infer) { - auto out_layouts = type()->calc_output_layouts(*this, *get_kernel_impl_params()); - if (!out_layouts.empty()) - return out_layouts; - } - - return {type()->calc_output_layout(*this, *get_kernel_impl_params())}; + auto out_layouts = type()->calc_output_layouts(*this, *get_kernel_impl_params()); + OPENVINO_ASSERT(!out_layouts.empty()); + return out_layouts; } const layout& program_node::get_output_layout(bool invalidate_users_if_changed, size_t idx) { @@ -460,8 +439,7 @@ bool program_node::set_output_layouts(std::vector& new_layouts, bool inv } bool program_node::recalc_output_layout(bool invalidate_users_if_changed) { - auto new_layout = calc_output_layout(); - return set_output_layout(new_layout, invalidate_users_if_changed); + return recalc_output_layouts(); } bool program_node::recalc_output_layouts(bool invalidate_users_if_changed) { @@ -1487,15 +1465,10 @@ void program_node::create_onednn_primitive_attributes( auto& desc = cldnn_post_ops[idx]; if (desc.is_type()) { auto fused_desc = desc.typed_desc(); - bool allow_new_shape_infer = get_program().is_new_shape_infer(); if (fused_desc->activation_function == cldnn::activation_func::relu_negative_slope && !fused_desc->additional_params_input.empty()) { auto dep_idx = cldnn_post_ops[idx].outer_dep_start_idx; - int oc_dim = 1; - if (allow_new_shape_infer) - oc_dim = static_cast(desc.output_layout.get_partial_shape()[1].get_max_length()); - else - oc_dim = static_cast(desc.output_layout.get_tensor().feature.size()); + int oc_dim = static_cast(desc.output_layout.get_partial_shape()[1].get_max_length()); post_ops.append_prelu(1 << std::max(0, oc_dim)); update_onednn_post_op_list(onednn_post_op_type::binary_relu, dep_idx); } else if (fused_desc->activation_function == cldnn::activation_func::hard_sigmoid) { diff --git a/src/plugins/intel_gpu/src/graph/proposal.cpp b/src/plugins/intel_gpu/src/graph/proposal.cpp index 9439ce18b1ed09..a3461850bbd41a 100644 --- a/src/plugins/intel_gpu/src/graph/proposal.cpp +++ b/src/plugins/intel_gpu/src/graph/proposal.cpp @@ -23,17 +23,6 @@ static void generate_anchors(unsigned base_size, GPU_DEFINE_PRIMITIVE_TYPE_ID(proposal) -layout proposal_inst::calc_output_layout(proposal_node const& node, kernel_impl_params const& impl_param) { - assert(static_cast(impl_param.desc->output_data_types[0]) == false && - "Output data type forcing is not supported for proposal_node!"); - auto desc = impl_param.typed_desc(); - layout input_layout = impl_param.get_input_layout(cls_scores_index); - - return layout(input_layout.data_type, - format::bfyx, - {input_layout.batch() * desc->post_nms_topn, CLDNN_ROI_VECTOR_SIZE, 1, 1}); -} - template std::vector proposal_inst::calc_output_layouts(proposal_node const& node, kernel_impl_params const& impl_param) { std::vector layouts; diff --git a/src/plugins/intel_gpu/src/graph/quantize.cpp b/src/plugins/intel_gpu/src/graph/quantize.cpp index 02f1f8213e50ba..1728af5f524274 100644 --- a/src/plugins/intel_gpu/src/graph/quantize.cpp +++ b/src/plugins/intel_gpu/src/graph/quantize.cpp @@ -12,18 +12,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(quantize) -layout quantize_inst::calc_output_layout(quantize_node const& node, kernel_impl_params const& impl_param) { - auto desc = impl_param.typed_desc(); - - auto input_layout = impl_param.get_input_layout(); - auto output_format = input_layout.format; - auto out_dt = input_layout.data_type; - if (desc->output_data_types[0]) - out_dt = *desc->output_data_types[0]; - - return layout{out_dt, output_format, input_layout.get_tensor()}; -} - std::string quantize_inst::to_string(quantize_node const& node) { auto desc = node.get_primitive(); auto node_info = node.desc_to_json(); diff --git a/src/plugins/intel_gpu/src/graph/random_uniform.cpp b/src/plugins/intel_gpu/src/graph/random_uniform.cpp index 437b84a9b6cfdc..3e92e70f6f83da 100644 --- a/src/plugins/intel_gpu/src/graph/random_uniform.cpp +++ b/src/plugins/intel_gpu/src/graph/random_uniform.cpp @@ -15,13 +15,6 @@ random_uniform_inst::typed_primitive_inst(network& network, random_uniform_node : parent(network, node) { } -layout random_uniform_inst::calc_output_layout(random_uniform_node const &node, kernel_impl_params const& impl_param) { - auto primitive = impl_param.typed_desc(); - auto format = format::get_default_format(primitive->output_shape.size()); - - return {primitive->output_shape, *primitive->output_data_types[0], format}; -} - template std::vector random_uniform_inst::calc_output_layouts(random_uniform_node const& /*node*/, kernel_impl_params const& impl_param) { auto desc = impl_param.typed_desc(); diff --git a/src/plugins/intel_gpu/src/graph/range.cpp b/src/plugins/intel_gpu/src/graph/range.cpp index 46455c980efae2..c6d7117bda7f2c 100644 --- a/src/plugins/intel_gpu/src/graph/range.cpp +++ b/src/plugins/intel_gpu/src/graph/range.cpp @@ -20,10 +20,6 @@ std::string lexical_cast(const json_base& j, int offset = 1) { GPU_DEFINE_PRIMITIVE_TYPE_ID(range) -layout range_inst::calc_output_layout(range_node const& node, kernel_impl_params const& impl_param) { - return impl_param.typed_desc()->output_layout; -} - template std::vector range_inst::calc_output_layouts(range_node const& /*node*/, kernel_impl_params const& impl_param) { auto desc = impl_param.typed_desc(); diff --git a/src/plugins/intel_gpu/src/graph/read_value.cpp b/src/plugins/intel_gpu/src/graph/read_value.cpp index bf6e730e8a808b..c071b8661f6091 100644 --- a/src/plugins/intel_gpu/src/graph/read_value.cpp +++ b/src/plugins/intel_gpu/src/graph/read_value.cpp @@ -15,10 +15,6 @@ read_value_inst::typed_primitive_inst(network& network, const read_value_node& n memory_state::variable{node.get_primitive()->variable_id, node.get_primitive()->user_specified_type} { } -layout read_value_inst::calc_output_layout(const read_value_node& node, kernel_impl_params const& impl_param) { - return impl_param.typed_desc()->output_layout; -} - std::string read_value_inst::to_string(const read_value_node& node) { auto node_info = node.desc_to_json(); diff --git a/src/plugins/intel_gpu/src/graph/reduce.cpp b/src/plugins/intel_gpu/src/graph/reduce.cpp index 8621ba3705fd5c..a388391f0110a7 100644 --- a/src/plugins/intel_gpu/src/graph/reduce.cpp +++ b/src/plugins/intel_gpu/src/graph/reduce.cpp @@ -14,84 +14,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(reduce) -static std::vector convert_axes(std::vector axes, size_t rank) { - std::vector converted_axes; - for (auto axis : axes) { - if (axis == 0 || axis == 1) { - converted_axes.push_back(axis); - continue; - } - - if (axis < 0) - axis = axis + rank; - - converted_axes.push_back(static_cast(rank + 1 - axis)); - } - - return converted_axes; -} - -layout reduce_inst::calc_output_layout(reduce_node const& node, kernel_impl_params const& impl_param) { - auto desc = impl_param.typed_desc(); - - auto input_layout = impl_param.get_input_layout(); - auto input_format = input_layout.format; - auto format_dim = input_format.dimension(); - auto output_type = input_layout.data_type; - auto mode = desc->mode; - auto reduce_axes = convert_axes(desc->axes, input_layout.get_rank()); - auto in_dims = input_layout.get_tensor().sizes(); - - for (size_t a = 0; a < reduce_axes.size(); a++) { - in_dims[reduce_axes[a]] = 1; - } - - std::vector updated_dims; - if (!desc->keep_dims) { - // Get unreduced from b-f and x-w range - for (size_t b_f_index = 0; b_f_index < 2; b_f_index++) { - bool index_to_remove = std::find(reduce_axes.begin(), reduce_axes.end(), b_f_index) != reduce_axes.end(); - if (!index_to_remove) - updated_dims.push_back(in_dims[b_f_index]); - } - for (size_t x_w_index = format_dim - 1; x_w_index >= 2; x_w_index--) { - bool index_to_remove = std::find(reduce_axes.begin(), reduce_axes.end(), x_w_index) != reduce_axes.end(); - if (!index_to_remove) - updated_dims.push_back(in_dims[x_w_index]); - } - - if (input_format.dimension() == 4 && reduce_axes.size() == 1) - updated_dims.push_back(1); - if (updated_dims.size() > 2) - std::reverse(updated_dims.begin() + 2, updated_dims.end()); - - // Fill updated dims to format_dim size - while (updated_dims.size() < format_dim) - updated_dims.push_back(1); - - in_dims = std::move(updated_dims); - } - - std::vector reduce_bool_modes = {reduce_mode::logical_and, reduce_mode::logical_or}; - if (std::find(reduce_bool_modes.begin(), reduce_bool_modes.end(), mode) != reduce_bool_modes.end()) - output_type = data_types::i8; - else if (output_type == data_types::i8 || output_type == data_types::u8) - output_type = data_types::f32; - - if (desc->output_data_types[0]) - output_type = *desc->output_data_types[0]; - - if (impl_param.has_fused_primitives()) - output_type = impl_param.get_output_element_type(); - - if (format_dim == 6) - return layout{output_type, input_format, tensor(batch(in_dims[0]), feature(in_dims[1]), spatial(in_dims[2], in_dims[3], in_dims[4], in_dims[5]))}; - else if (format_dim == 5) - return layout{output_type, input_format, tensor(batch(in_dims[0]), feature(in_dims[1]), spatial(in_dims[2], in_dims[3], in_dims[4]))}; - else - return layout{output_type, input_format, tensor(batch(in_dims[0]), feature(in_dims[1]), spatial(in_dims[2], in_dims[3]))}; -} - template std::vector reduce_inst::calc_output_layouts(reduce_node const& /*node*/, kernel_impl_params const& impl_param) { auto desc = impl_param.typed_desc(); diff --git a/src/plugins/intel_gpu/src/graph/region_yolo.cpp b/src/plugins/intel_gpu/src/graph/region_yolo.cpp index 49ab172d633820..bda2f45b276142 100644 --- a/src/plugins/intel_gpu/src/graph/region_yolo.cpp +++ b/src/plugins/intel_gpu/src/graph/region_yolo.cpp @@ -12,30 +12,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(region_yolo) -layout region_yolo_inst::calc_output_layout(region_yolo_node const& node, kernel_impl_params const& impl_param) { - assert(static_cast(impl_param.desc->output_data_types[0]) == false && - "Output data type forcing is not supported for " - "region_yolo_node!"); - auto input_layout = impl_param.get_input_layout(); - auto desc = impl_param.typed_desc(); - - if (desc->do_softmax) { - return cldnn::layout( - input_layout.data_type, - input_layout.format, - tensor(input_layout.batch(), - input_layout.feature() * input_layout.spatial(0) * input_layout.spatial(1), - 1, - 1)); - } else { - tensor::value_type features = (desc->classes + desc->coords + 1) * desc->mask_size; - return cldnn::layout( - input_layout.data_type, - input_layout.format, - tensor(input_layout.batch(), features, input_layout.spatial(0), input_layout.spatial(1))); - } -} - template std::vector region_yolo_inst::calc_output_layouts(region_yolo_node const& node, kernel_impl_params const& impl_param) { auto desc = impl_param.typed_desc(); diff --git a/src/plugins/intel_gpu/src/graph/reorder.cpp b/src/plugins/intel_gpu/src/graph/reorder.cpp index e322baeee95e52..d704301384ad08 100644 --- a/src/plugins/intel_gpu/src/graph/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/reorder.cpp @@ -16,163 +16,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(reorder) -layout reorder_inst::calc_output_layout(reorder_node const& node, kernel_impl_params const& impl_param) { - auto input_layout = impl_param.get_input_layout(); - auto ifmt = input_layout.format; - - auto desc = impl_param.typed_desc(); - auto odt = desc->output_data_types[0].value_or(input_layout.data_type); - auto ofmt = desc->output_format; - auto op = desc->output_paddings[0]; - - if (ofmt == format::any) { - ofmt = ifmt; - } - - if (ifmt.is_nv12() && !desc->has_surface_input()) { - const size_t h_dim = 1; - const size_t c_dim = 3; - - auto out_shape = input_layout.get_partial_shape(); - out_shape[c_dim] = 3; - if (desc->input_size() == 1) - out_shape[h_dim] = out_shape[h_dim] * 2 / 3; - - if (ofmt != ifmt) - return layout(out_shape, odt, ofmt, op); - - CLDNN_ERROR_MESSAGE(desc->id, "No image_nv12 to image_nv12 reorder is supported"); - } else if (ofmt.is_winograd() && ifmt.is_winograd()) { - if (ofmt == ifmt) - return layout(odt, ofmt, input_layout.get_tensor(), op); - - CLDNN_ERROR_MESSAGE(desc->id, "Reordering between winograd weights and data formats is unsupported"); - } else if (ifmt == format::image_2d_rgba) { - return layout(data_types::f16, format::bfyx, input_layout.get_tensor(), op); - } - - // transformation of data from standard to winograd - if (ofmt == format::winograd_2x3_s1_data) { - // some constants which are defined by F(2,3) with stride 1 -- todo: think about generic way to calculate them - // for any F(r,m) with stride s - // NOTE: FOR THE FOLLOWING CONSTANTS 'OUTPUT' MEANS OUTPUT OF WINOGRAD CONV (in standard domain) AND 'INPUT' - // MEANS INPUT FOR WINOGRAD CONV (in winograd domain), THEREFORE 'INPUT' ACTUALLY REFERS TO THE OUTPUT OF THIS - // CONVERSION (which is later fed as input for winograd conv) - constexpr tensor::value_type output_tile_width = 2; // by definition of F(2,3) - constexpr tensor::value_type filter_width = 3; // by definition of F(2,3) - constexpr tensor::value_type filter_stride = - 1; // by definition of format::winograd_2x3_s1_data (our assumption) - - constexpr tensor::value_type input_tile_width = - filter_width + - (output_tile_width - 1) * filter_stride; // input tile should be large enought to hold data for - // computations of output tile (for given filter size and stride) - - // how many tiles do we need to produce - // each input tile produces one output tile so we can find no. of input tiles by calculating no. of output tiles - // (which is equal to width of an output divided by output tile width) - tensor::value_type conv_output_width = - input_layout.spatial(0) - filter_width + 1; - tensor::value_type input_tiles_count_x = conv_output_width / output_tile_width; - tensor::value_type output_width = input_tiles_count_x * input_tile_width; - tensor::value_type output_height = input_layout.spatial(1); - - tensor::value_type padd_x = 0; - tensor::value_type padd_y = (8 - ((output_height - 2) % 8)) % 8; - if (conv_output_width % output_tile_width != 0) { // leftovers - output_width += 3; // one tile is 4 elements from which only 3 first are used to generate first output - // value - padd_x = 1; - } - - auto data_size = tensor{input_layout.batch(), input_layout.feature(), output_width, output_height}; - tensor upper_padd = tensor{0, 0, padd_x, padd_y}; - return layout(odt, ofmt, data_size, padding{{0, 0, 0, 0}, upper_padd.sizes()}); - } - - // transformation of weights from standard to winograd - if (ofmt == format::winograd_2x3_s1_weights || ofmt == format::winograd_2x3_s1_fused_weights) { - CLDNN_ERROR_NOT_EQUAL(desc->id, - "input_layout.spatial(0)", - input_layout.spatial(0), - "expected value", - 3, - "input for conversion to winograd_2x3_s1 weights format should have spatial size 3x3"); - CLDNN_ERROR_NOT_EQUAL(desc->id, - "input_layout.spatial(1)", - input_layout.spatial(1), - "expected value", - 3, - "input for conversion to winograd_2x3_s1 weights format should have spatial size 3x3"); - - return layout(odt, ofmt, tensor{input_layout.batch(), input_layout.feature(), 4, 3}); - } else if (ofmt == format::winograd_6x3_s1_fused_weights) { - CLDNN_ERROR_NOT_EQUAL(desc->id, - "input_layout.spatial(0)", - input_layout.spatial(0), - "expected value", - 3, - "input for conversion to winograd_2x3_s1 weights format should have spatial size 3x3"); - CLDNN_ERROR_NOT_EQUAL(desc->id, - "input_layout.spatial(1)", - input_layout.spatial(1), - "expected value", - 3, - "input for conversion to winograd_2x3_s1 weights format should have spatial size 3x3"); - - return layout(odt, ofmt, tensor{input_layout.batch(), input_layout.feature(), 8, 3}); - } - - // transformation of data from winograd to standard - if (ifmt == format::winograd_2x3_s1_data) { - constexpr tensor::value_type output_tile_width = 2; // by definition of F(2,3) - constexpr tensor::value_type filter_width = 3; // by definition of F(2,3) - constexpr tensor::value_type filter_stride = - 1; // by definition of format::winograd_2x3_s1_data (our assumption) - - constexpr tensor::value_type input_tile_width = - filter_width + - (output_tile_width - 1) * filter_stride; // input tile should be large enought to hold data for - // computations of output tile (for given filter size and stride) - - auto output_width = input_layout.spatial(0) / input_tile_width * output_tile_width; - if (input_layout.spatial(0) % input_tile_width != 0) // leftovers - ++output_width; // output tile is 2 by default, so we can have only 1 value as leftover - - return layout(odt, - ofmt, - tensor{input_layout.batch(), - input_layout.feature(), - output_width, - input_layout.spatial(1)}); - } - - // transformation of weights from winograd to standard - if (ifmt == format::winograd_2x3_s1_weights || ifmt == format::winograd_2x3_s1_fused_weights || - ifmt == format::winograd_6x3_s1_fused_weights) { - CLDNN_ERROR_MESSAGE(desc->id, - "Conversion of weights from winograd to standard domain is currently unsupported"); - } - - if (desc->weights_reorder_params) { - return desc->weights_reorder_params->get_output_layout(); - } - - if ((ofmt == format::bs_fs_fsv8_bsv8 || ofmt == format::os_i_osv8__ai8 || ofmt == format::os_i_osv16__ai8 || ofmt == format::os_i_osv16 || - ofmt == format::bfzyx || ifmt == format::bfzyx || ofmt == format::b_fs_zyx_fsv16 || ifmt == format::b_fs_zyx_fsv16 || - ofmt == format::bs_fs_zyx_bsv16_fsv16 || ifmt == format::bs_fs_zyx_bsv16_fsv16 || - ofmt == format::bs_fs_zyx_bsv16_fsv32 || ifmt == format::bs_fs_zyx_bsv16_fsv32 || - ofmt == format::b_fs_zyx_fsv32 || ifmt == format::b_fs_zyx_fsv32 || - ofmt == format::bs_fs_yx_bsv16_fsv16 || ifmt == format::bs_fs_yx_bsv16_fsv16) && input_layout.is_static()) { - return layout(odt, ofmt, input_layout.get_tensor().transform(ofmt, 1), op); - } else if (ofmt != ifmt && (ofmt == format::bfwzyx || ifmt == format::bfwzyx)) { - // TODO Shouldn't transform be called every time ifmt != ofmt? - return layout(odt, ofmt, input_layout.get_tensor().transform(ofmt, 1), op); - } else { - return layout(odt, ofmt, input_layout.get_tensor(), op); - } -} - template std::vector reorder_inst::calc_output_layouts(reorder_node const& /*node*/, const kernel_impl_params& impl_param) { auto desc = impl_param.typed_desc(); diff --git a/src/plugins/intel_gpu/src/graph/reorg_yolo.cpp b/src/plugins/intel_gpu/src/graph/reorg_yolo.cpp index 11cf8a8a833445..e5f9aec11985c3 100644 --- a/src/plugins/intel_gpu/src/graph/reorg_yolo.cpp +++ b/src/plugins/intel_gpu/src/graph/reorg_yolo.cpp @@ -12,23 +12,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(reorg_yolo) -layout reorg_yolo_inst::calc_output_layout(reorg_yolo_node const& node, kernel_impl_params const& impl_param) { - assert(static_cast(impl_param.desc->output_data_types[0]) == false && - "Output data type forcing is not supported for " - "reorg_yolo_node!"); - auto input_layout = impl_param.get_input_layout(); - auto desc = impl_param.typed_desc(); - auto stride = desc->stride; - - cldnn::layout layoutTemp = cldnn::layout(input_layout.data_type, - input_layout.format, - tensor(input_layout.batch(), - input_layout.feature() * stride * stride, - input_layout.spatial(0) / stride, - input_layout.spatial(1) / stride)); - return layoutTemp; -} - template std::vector reorg_yolo_inst::calc_output_layouts(reorg_yolo_node const& node, kernel_impl_params const& impl_param) { auto desc = impl_param.typed_desc(); diff --git a/src/plugins/intel_gpu/src/graph/resample.cpp b/src/plugins/intel_gpu/src/graph/resample.cpp index 29d2fec685bae9..637331ad8736ec 100644 --- a/src/plugins/intel_gpu/src/graph/resample.cpp +++ b/src/plugins/intel_gpu/src/graph/resample.cpp @@ -12,24 +12,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(resample) -layout resample_inst::calc_output_layout(resample_node const& node, kernel_impl_params const& impl_param) { - auto desc = impl_param.typed_desc(); - auto input_layout = impl_param.get_input_layout(); - - auto output_type = input_layout.data_type; - if ((input_layout.data_type == data_types::i8 || input_layout.data_type == data_types::u8) - && desc->operation_type != resample::InterpolateOp::InterpolateMode::NEAREST - && desc->operation_type != resample::InterpolateOp::InterpolateMode::LINEAR_ONNX) { - output_type = data_types::f32; - } - if (impl_param.has_fused_primitives()) { - output_type = impl_param.get_output_element_type(); - } - - return desc->sizes.empty() ? layout({output_type, input_layout.format, desc->output_size}) : - layout({desc->sizes, output_type, input_layout.format}); -} - namespace v4 { template static std::vector calc_output_layouts(resample_node const& /*node*/, const kernel_impl_params& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/reshape.cpp b/src/plugins/intel_gpu/src/graph/reshape.cpp index 0830dc3e5b9226..ec31ce0121899a 100644 --- a/src/plugins/intel_gpu/src/graph/reshape.cpp +++ b/src/plugins/intel_gpu/src/graph/reshape.cpp @@ -106,44 +106,6 @@ padding propagate_padding(const layout& in_layout, const ov::PartialShape& out_s return padding(update_pad_lower, update_pad_upper, ret_update_pad_mask); } -layout reshape_inst::calc_output_layout(reshape_node const& node, kernel_impl_params const& impl_param) { - assert(static_cast(impl_param.desc->output_data_types[0]) == false && - "Output data type forcing is not supported for reshape_node!"); - auto input_layout = impl_param.get_non_padded_input_layout(); - auto desc = impl_param.typed_desc(); - if (desc->output_shape.count() == 0) { - if (desc->output_partial_shape.size() != 0) { - format out_fmt = format::adjust_to_rank(input_layout.format, desc->output_partial_shape.rank().get_length()); - return layout{desc->output_partial_shape, input_layout.data_type, out_fmt}; - } else { - OPENVINO_ASSERT("[GPU] Output shape is not provided"); - } - } - - auto sizes = desc->output_shape.sizes(); - auto input_sizes = input_layout.get_tensor().sizes(); - size_t need_recalc = 0; - uint32_t shape_count = 1; - - for (size_t i = 0; i < sizes.size(); i++) { - if (sizes[i] == -1) { - if (need_recalc) { - CLDNN_ERROR_MESSAGE(desc->id, "Only one dimension of the new shape can be -1"); - } - need_recalc = i; - continue; - } - if (sizes[i] == 0) { - sizes[i] = input_sizes[i]; - } - shape_count *= sizes[i]; - } - if (need_recalc) - sizes[need_recalc] = static_cast(input_layout.count()) / shape_count; - - return layout{input_layout.data_type, input_layout.format, tensor(sizes)}; -} - template std::vector reshape_inst::calc_output_layouts(reshape_node const& node, const kernel_impl_params& impl_param) { assert(static_cast(impl_param.typed_desc()->output_data_types[0]) == false && @@ -312,7 +274,7 @@ void reshape_inst::update_output_memory() { return; build_deps(); // reshape need deps - if (node->get_program().is_new_shape_infer() && input_memory_ptr() == nullptr) + if (input_memory_ptr() == nullptr) return; OPENVINO_ASSERT(input_memory_ptr() != nullptr, "[GPU] Failed to reuse input in ", id(), " primitive: input memory was not allocated"); _outputs = {_network.get_engine().reinterpret_buffer(input_memory(), _impl_params->get_output_layout())}; diff --git a/src/plugins/intel_gpu/src/graph/reverse.cpp b/src/plugins/intel_gpu/src/graph/reverse.cpp index 11388ecadb4f11..1706b7f3299ac4 100644 --- a/src/plugins/intel_gpu/src/graph/reverse.cpp +++ b/src/plugins/intel_gpu/src/graph/reverse.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "openvino/op/reverse.hpp" #include #include "json_object.h" @@ -11,10 +12,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(reverse) -layout reverse_inst::calc_output_layout(reverse_node const& node, kernel_impl_params const& impl_param) { - return impl_param.get_input_layout(); -} - std::string reverse_inst::to_string(reverse_node const& node) { const auto prim = node.get_primitive(); @@ -23,7 +20,7 @@ std::string reverse_inst::to_string(reverse_node const& node) { json_composite info; info.add("input id", node.input(0).id()); info.add("axes id", node.input(1).id()); - const auto mode = prim->mode == reverse_mode::index ? "index" : "mask"; + const auto mode = prim->mode == ov::op::v1::Reverse::Mode::INDEX ? "index" : "mask"; info.add("mode", mode); auto node_info = node.desc_to_json(); diff --git a/src/plugins/intel_gpu/src/graph/reverse_sequence.cpp b/src/plugins/intel_gpu/src/graph/reverse_sequence.cpp index 0fe2c4df37172f..b0173c265035af 100644 --- a/src/plugins/intel_gpu/src/graph/reverse_sequence.cpp +++ b/src/plugins/intel_gpu/src/graph/reverse_sequence.cpp @@ -11,13 +11,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(reverse_sequence) -layout reverse_sequence_inst::calc_output_layout(reverse_sequence_node const& node, kernel_impl_params const& impl_param) { - auto input_layout = impl_param.get_input_layout(); - auto input_format = input_layout.format; - - return layout{input_layout.data_type, input_format, input_layout.get_tensor()}; -} - std::string reverse_sequence_inst::to_string(reverse_sequence_node const& node) { auto desc = node.get_primitive(); auto node_info = node.desc_to_json(); diff --git a/src/plugins/intel_gpu/src/graph/rms.cpp b/src/plugins/intel_gpu/src/graph/rms.cpp index 5002417df57394..c2e1eed46e58e2 100644 --- a/src/plugins/intel_gpu/src/graph/rms.cpp +++ b/src/plugins/intel_gpu/src/graph/rms.cpp @@ -11,15 +11,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(rms); -layout rms_inst::calc_output_layout(rms_node const& node, kernel_impl_params const& impl_param) { - auto desc = impl_param.typed_desc(); - auto input_layout = impl_param.get_input_layout(); - auto output_type = desc->output_data_types[0].value_or(input_layout.data_type); - auto output_format = input_layout.format; - - return layout(output_type, output_format, input_layout.get_tensor()); -} - std::string rms_inst::to_string(rms_node const& node) { auto desc = node.get_primitive(); auto node_info = node.desc_to_json(); diff --git a/src/plugins/intel_gpu/src/graph/roi_align.cpp b/src/plugins/intel_gpu/src/graph/roi_align.cpp index 22e5dd4bf98f6c..8649f5ce194aea 100644 --- a/src/plugins/intel_gpu/src/graph/roi_align.cpp +++ b/src/plugins/intel_gpu/src/graph/roi_align.cpp @@ -16,17 +16,6 @@ GPU_DEFINE_PRIMITIVE_TYPE_ID(roi_align) roi_align_inst::typed_primitive_inst(network& network, roi_align_node const& node) : parent(network, node) {} -layout roi_align_inst::calc_output_layout(roi_align_node const& node, kernel_impl_params const& impl_param) { - auto primitive = impl_param.typed_desc(); - auto input_layout = impl_param.get_input_layout(0); - auto rois_layout = impl_param.get_input_layout(1); - auto num_rois = rois_layout.batch(); - auto num_channels = input_layout.feature(); - return layout({num_rois, num_channels, primitive->pooled_h, primitive->pooled_w}, - input_layout.data_type, - input_layout.format); -} - template std::vector roi_align_inst::calc_output_layouts(roi_align_node const& node, kernel_impl_params const& impl_param) { auto primitive = impl_param.typed_desc(); diff --git a/src/plugins/intel_gpu/src/graph/roi_pooling.cpp b/src/plugins/intel_gpu/src/graph/roi_pooling.cpp index d2af4d4d4d8674..a07ad6353c1eba 100644 --- a/src/plugins/intel_gpu/src/graph/roi_pooling.cpp +++ b/src/plugins/intel_gpu/src/graph/roi_pooling.cpp @@ -2,6 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "openvino/core/partial_shape.hpp" +#include "openvino/op/psroi_pooling.hpp" #include "roi_pooling_inst.h" #include "roi_pooling_shape_inference.hpp" #include "psroi_pooling_shape_inference.hpp" @@ -13,20 +15,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(roi_pooling) -layout roi_pooling_inst::calc_output_layout(roi_pooling_node const& node, kernel_impl_params const& impl_param) { - assert(static_cast(impl_param.desc->output_data_types[0]) == false && - "Output data type forcing is not supported for roi_pooling_node!"); - auto desc = impl_param.typed_desc(); - layout data_layout = impl_param.get_input_layout(0); - layout rois_layout = impl_param.get_input_layout(1); - int num_rois = rois_layout.batch(); - int out_fm = desc->position_sensitive ? desc->output_dim : data_layout.feature(); - - return layout(data_layout.data_type, - data_layout.format, - {num_rois, out_fm, desc->pooled_width, desc->pooled_height}); -} - template std::vector roi_pooling_inst::calc_output_layouts(roi_pooling_node const& node, kernel_impl_params const& impl_param) { auto desc = impl_param.typed_desc(); diff --git a/src/plugins/intel_gpu/src/graph/roll.cpp b/src/plugins/intel_gpu/src/graph/roll.cpp index e42c3302c5c080..1a3ad61f286cb5 100644 --- a/src/plugins/intel_gpu/src/graph/roll.cpp +++ b/src/plugins/intel_gpu/src/graph/roll.cpp @@ -11,15 +11,10 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(roll) -layout roll_inst::calc_output_layout(const roll_node& node, kernel_impl_params const& impl_param) { - return impl_param.get_input_layout(); -} - std::string roll_inst::to_string(const roll_node& node) { auto node_info = node.desc_to_json(); json_composite roll_info; roll_info.add("input id", node.input().id()); - roll_info.add("shift", node.get_primitive()->shift); node_info->add("roll info", roll_info); std::ostringstream primitive_description; node_info->dump(primitive_description); diff --git a/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp index df9236a15474ef..069b745ee2b137 100644 --- a/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp +++ b/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp @@ -14,28 +14,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(scatter_elements_update) -layout scatter_elements_update_inst::calc_output_layout(scatter_elements_update_node const& node, kernel_impl_params const& impl_param) { - auto desc = impl_param.typed_desc(); - - const int32_t axis = desc->axis; - const size_t input_number_of_dims = impl_param.get_input_layout().get_partial_shape().size(); - - auto input_layout = impl_param.get_input_layout(); - - auto output_shape = input_layout.get_partial_shape(); - auto input_format = input_layout.format; - auto output_type = input_layout.data_type; - - if (impl_param.has_fused_primitives()) { - output_type = impl_param.get_output_element_type(); - } - - if (static_cast(axis) < 0 || static_cast(axis) >= input_number_of_dims) - CLDNN_ERROR_MESSAGE(desc->id, "Incorrect axis value for ScatterElementsUpdate: Axis must be positive and less than the input tensor dimension."); - - return layout{output_shape, output_type, input_format}; -} - std::string scatter_elements_update_inst::to_string(scatter_elements_update_node const& node) { auto desc = node.get_primitive(); auto node_info = node.desc_to_json(); diff --git a/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp index 8f66889c25b884..d545ef15db1f8b 100644 --- a/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp +++ b/src/plugins/intel_gpu/src/graph/scatter_nd_update.cpp @@ -13,20 +13,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(scatter_nd_update) -layout scatter_nd_update_inst::calc_output_layout(scatter_nd_update_node const& node, kernel_impl_params const& impl_param) { - auto input_layout = impl_param.get_input_layout(); - - auto output_shape = input_layout.get_tensor(); - auto input_format = input_layout.format; - auto output_type = input_layout.data_type; - - if (impl_param.has_fused_primitives()) { - output_type = impl_param.get_output_element_type(); - } - - return layout{output_type, input_format, output_shape}; -} - template std::vector scatter_nd_update_inst::calc_output_layouts(scatter_nd_update_node const& /*node*/, const kernel_impl_params& impl_param) { const auto& input0_layout = impl_param.get_input_layout(0); diff --git a/src/plugins/intel_gpu/src/graph/scatter_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_update.cpp index 59995341d832b0..1b4a35ca1e4802 100644 --- a/src/plugins/intel_gpu/src/graph/scatter_update.cpp +++ b/src/plugins/intel_gpu/src/graph/scatter_update.cpp @@ -11,22 +11,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(scatter_update) -layout scatter_update_inst::calc_output_layout(scatter_update_node const& node, kernel_impl_params const& impl_param) { - auto desc = impl_param.typed_desc(); - - auto input_layout = impl_param.get_input_layout(); - - auto output_shape = input_layout.get_tensor(); - auto input_format = input_layout.format; - auto output_type = input_layout.data_type; - - if (impl_param.has_fused_primitives()) { - output_type = impl_param.get_output_element_type(); - } - - return layout{output_type, input_format, output_shape}; -} - std::string scatter_update_inst::to_string(scatter_update_node const& node) { auto desc = node.get_primitive(); auto node_info = node.desc_to_json(); diff --git a/src/plugins/intel_gpu/src/graph/select.cpp b/src/plugins/intel_gpu/src/graph/select.cpp index 9e0902e1f2ad4e..17af323dbe5d30 100644 --- a/src/plugins/intel_gpu/src/graph/select.cpp +++ b/src/plugins/intel_gpu/src/graph/select.cpp @@ -12,25 +12,6 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(select) -layout select_inst::calc_output_layout(select_node const& node, kernel_impl_params const& impl_param) { - assert(static_cast(impl_param.desc->output_data_types[0]) == false && - "Output data type forcing is not supported for select_node!"); - - auto in_layout = impl_param.get_non_padded_input_layout(1); - auto output_size = in_layout.get_tensor(); - - if (impl_param.typed_desc