Skip to content

Commit

Permalink
[GPU] Avoid optimizing out crop for faster gemm kernel selection. (op…
Browse files Browse the repository at this point in the history
…envinotoolkit#26556)

### Details:
- Disable crop optimization when inner axis padding leads to GEMM ref
kernel selection due to lack of support in optimized kernels.

### Tickets:
 - *150556*
  • Loading branch information
jade-cho authored Sep 26, 2024
1 parent c864266 commit c20059a
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,17 @@ bool crop_in_place_optimization::match(const program_node& node,
// TODO: Need to allow optimization for gemm user
if (node.is_dynamic() && (user->is_type<convolution>() || user->is_type<gemm>()))
return false;
// For static shape, gemm ref kernel is selected if there is padding on the feature, x, or y axes.
// In such cases, do not optimize out this crop to use the opt kernel.
// TODO: Modify gemm_tiled_opt kernel to support padding even in static shape.
if ((!node.is_dynamic() || is_runtime) && user->is_type<gemm>() &&
(user->get_dependency_index(node) == 0 || user->get_dependency_index(node) == 1)) {
if (crop_params.input_offsets[0].feature[0] != 0 ||
crop_params.input_offsets[0].spatial[0] != 0 ||
crop_params.input_offsets[0].spatial[1] != 0) {
return false;
}
}
if (user->is_type<reshape>()) {
// runtime buffer fusing is only handled when there is only one reshape user
if (node.is_dynamic() && node.get_users().size() != 1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1464,3 +1464,33 @@ TEST(prepare_buffer_fusing, in_place_onednn_concat_static) {
}
}
#endif // ENABLE_ONEDNN_FOR_GPU

TEST(prepare_buffer_fusing, inner_axis_data_offset_with_gemm_user) {
tests::random_generator rg(GET_SUITE_NAME);

auto& engine = get_test_engine();

auto in_layout = layout{ ov::PartialShape{1, 6, 16, 16}, data_types::f16, format::bfyx };
auto crop_layout = layout{ ov::PartialShape{1, 6, 8, 16}, data_types::f16, format::bfyx };

auto input_memory = engine.allocate_memory(in_layout);
auto input_data = rg.generate_random_1d<float>(input_memory->count(), -1, 1);

auto offsets1 = tensor{0, 0, 0, 0};
auto offsets2 = tensor{0, 0, 8, 0};

topology topology;
topology.add(input_layout("input", in_layout));
topology.add(crop("crop1", input_info("input"), crop_layout.get_tensor(), offsets1));
topology.add(permute("permute", input_info("crop1"), {0, 1, 3, 2}));
topology.add(crop("crop2", input_info("input"), crop_layout.get_tensor(), offsets2));
topology.add(gemm("gemm", {input_info("permute"), input_info("crop2")}, data_types::f16, false, false));

ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
auto prog = program::build_program(engine, topology, config, false, false);
ASSERT_NE(prog, nullptr);

auto& crop_node = prog->get_node("crop2").as<crop>();
ASSERT_FALSE(crop_node.can_be_optimized());
}

0 comments on commit c20059a

Please sign in to comment.