From f19282fe8106bf0f90c229d957020330dee560e8 Mon Sep 17 00:00:00 2001 From: River Li Date: Tue, 6 Aug 2024 23:45:40 +0800 Subject: [PATCH] [dGPU] avoid strided_slice to be executed in cpu (#25601) ### Details: - Big input tensor to StrideSlice primitive is executed in CPU will lead to huge performance drop. - *...* ### Tickets: - CVS-147088 --------- Co-authored-by: Pavel Durandin --- .../src/graph/graph_optimizer/mark_shape_of_subgraphs.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_shape_of_subgraphs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_shape_of_subgraphs.cpp index aec9e8b5f497e6..3599e68301da29 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_shape_of_subgraphs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_shape_of_subgraphs.cpp @@ -7,6 +7,7 @@ #include "reshape_inst.h" #include "eltwise_inst.h" #include "select_inst.h" +#include "strided_slice_inst.h" #include "gather_inst.h" #include "pass_manager.h" @@ -78,6 +79,13 @@ bool mark_shape_of_subgraphs::can_mark_node(const program_node& node) { return false; } + // Exclude stride_slice primitive if it's input is big const ternsor, else CPU reference implementation + // will lead to huge performance drop. + if (node.is_type() && node.get_dependency(0).is_constant() && + node.get_dependency(0).get_output_layout().count() > 1024 * 1024) { + return false; + } + auto available_impls = node.type()->get_available_impls(node); auto cpu_impl_found = available_impls.find(impl_types::cpu) != available_impls.end();