Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compile pass for hipBLASLt #3594

Merged
merged 15 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/targets/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ add_library(migraphx_gpu
compile_gen.cpp
compile_hip.cpp
compile_hip_code_object.cpp
compile_hipblaslt.cpp
compile_miopen.cpp
compile_pointwise.cpp
compiler.cpp
Expand Down
78 changes: 78 additions & 0 deletions src/targets/gpu/compile_hipblaslt.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

#if MIGRAPHX_USE_HIPBLASLT
#include <migraphx/gpu/compile_hipblaslt.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/module.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {

static size_t compile(migraphx::context& ctx, operation& op, instruction_ref ins)
{
auto v = op.compile(ctx, ins->get_shape(), to_shapes(ins->inputs()));
return v.get<std::size_t>("workspace", 0);
}

void compile_hipblaslt::apply(module& m) const
{
assert(ctx);
for(auto ins : iterator_for(m))
{
if(ins->name() != "gpu::hipblaslt_op")
continue;
auto op = any_cast<hipblaslt_op>(ins->get_operator()).op;
auto inputs = ins->inputs();

std::size_t ws = hipblaslt_workspace_size;

auto alloc = m.insert_instruction(
ins, make_op("allocate", {{"shape", to_value(shape{shape::uint8_type, {ws}})}}));
inputs.insert(std::prev(inputs.end()), alloc);
m.replace_instruction(ins, op, inputs);

// Calculate workspace size
ws = compile(*ctx, op, ins);
auto alloc_after = m.insert_instruction(
ins, make_op("allocate", {{"shape", to_value(shape{shape::uint8_type, {ws}})}}));

// Replace the workspace size with actual worksapce size needed.
auto it = std::find(inputs.begin(), inputs.end(), alloc);
if(it != inputs.end())
{
*it = alloc_after; // Replace `alloc` with `alloc_after`
TedThemistokleous marked this conversation as resolved.
Show resolved Hide resolved
}
m.replace_instruction(ins, op, inputs);
}
}

} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_USE_HIPBLASLT
70 changes: 68 additions & 2 deletions src/targets/gpu/hip_gemm_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,8 @@ struct hip_gemm_impl
const std::vector<argument>& args,
int32_t solution_idx)
{
auto* algo = &solution.get_result(ctx, *this, solution_idx)[0].algo;
auto* algo = &solution.get_result(ctx, *this, solution_idx)[0].algo;
size_t workspace_size = ((is_3inputs ? args[3] : args[2]).get_shape()).bytes();
return pack(ctx.get_stream().get_hipblaslt(),
hipblaslt_desc,
get_alpha(), // alpha
Expand All @@ -414,7 +415,7 @@ struct hip_gemm_impl
is_3inputs ? mat_d : mat_c, // Ddesc
algo, // algo
is_3inputs ? args[3].data() : args[2].data(), // workspace
algo->max_workspace_bytes, // workspaceSizeInBytes
workspace_size, // workspaceSizeInBytes
ctx.get_stream().get() // stream
);
}
Expand Down Expand Up @@ -478,6 +479,53 @@ struct hip_gemm_impl
return solution_idx;
}

/**
* Get workspace size for the solution index: Gets algo from the solution index,
* and calls matmulIsAlgoSupported() to get the workspace size.
*/

size_t get_workspace_size(context& ctx,
const std::vector<shape>& input_shapes,
int32_t solution_idx) const
{
size_t workspace_size = hipblaslt_workspace_size;
std::vector<argument> input_args;
std::transform(input_shapes.begin(),
input_shapes.end(),
std::back_inserter(input_args),
[](const shape& x) { return to_gpu(generate_argument(x)); });

std::vector<int32_t> algo_index = {solution_idx};
std::vector<hipblasLtMatmulHeuristicResult_t> heuristic_result;

hipblaslt_invoke([&]() {
return hipblaslt_ext::getAlgosFromIndex(
ctx.get_stream().get_hipblaslt(), algo_index, heuristic_result);
});
assert(heuristic_result.size() == 1);
TedThemistokleous marked this conversation as resolved.
Show resolved Hide resolved

auto algo = heuristic_result[0].algo;
size_t ret_workspace_size = 0;
auto supporting_args =
create_hipblaslt_supporting_args_common(ctx, input_args, algo, ret_workspace_size);

auto status =
hipblaslt_invoke(&hipblaslt_ext::matmulIsAlgoSupported, supporting_args, false);

// If algo is supported, update the workspace size to the actual size needed.
// Otherwise, use the default workspace size.
if(status == HIPBLAS_STATUS_SUCCESS)
{
// TODO: Remove this check once issues with '0' workspace size are resolved.
TedThemistokleous marked this conversation as resolved.
Show resolved Hide resolved
// Temporarily, we use the approach where, if the returned workspace size is '0',
// we use the default workspace size.
// Otherwise, we use the returned workspace size.
if(ret_workspace_size != 0)
workspace_size = ret_workspace_size;
}
return workspace_size;
}

/**
* Find best hipBLASLt solution: Get list of solutions and try them all, returning the index
* of the fastest one.
Expand Down Expand Up @@ -531,6 +579,13 @@ struct hip_gemm_impl

// Initialize to default solution index
int32_t best_sol = 0;
// If no valid/supported solution is returned, use hipblasLtMatmulAlgoGetHeuristic
// to get an algo and use solution index from that algo.
if(solution_indices.empty())
{
auto algo = solution.get_result(ctx, *this, 0)[0].algo;
solution_indices.push_back(hipblaslt_ext::getIndexFromAlgo(algo));
}
for(auto sol : solution_indices)
{
// Warmup: the first call to an op. may not be representative since there is
Expand Down Expand Up @@ -664,6 +719,17 @@ int32_t hip_gemm_default_solution(context& ctx,
return 0;
}

size_t hip_gemm_workspace_size(context& ctx,
const shape& output_shape,
const std::vector<shape>& input_shapes,
float alpha,
float beta,
int32_t solution_idx)
{
auto gemm_item = hip_gemm_impl(output_shape, input_shapes, alpha, beta);
return gemm_item.get_workspace_size(ctx, input_shapes, solution_idx);
}

} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
Expand Down
77 changes: 77 additions & 0 deletions src/targets/gpu/include/migraphx/gpu/compile_hipblaslt.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_GPU_COMPILE_HIPBLASLT_HPP
#define MIGRAPHX_GUARD_GPU_COMPILE_HIPBLASLT_HPP

#include <migraphx/config.hpp>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/register_op.hpp>
#include <string>

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {

struct module;
struct context;
struct operation;

namespace gpu {

struct hipblaslt_op
{
operation op = op::identity{};

template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.op, "op"));
}

std::string name() const { return "gpu::hipblaslt_op"; }

shape compute_shape(std::vector<shape> inputs) const
{
inputs.push_back(inputs.back());
return op.compute_shape(inputs);
}

std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
MIGRAPHX_REGISTER_OP(hipblaslt_op);

struct compile_hipblaslt
{
context* ctx = nullptr;
std::string name() const { return "gpu::compile_hipblaslt"; }
void apply(module& m) const;
};

} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_COMPILE_HIPBLASLT_HPP
9 changes: 9 additions & 0 deletions src/targets/gpu/include/migraphx/gpu/hip_gemm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,15 @@ struct hip_gemm
hip_gemm_finalize(ctx, output_shape, input_shapes, alpha, beta, solution_idx);
}
}

value
compile(migraphx::context& ctx, const shape& output, const std::vector<shape>& input_shapes)
{
finalize(any_cast<migraphx::gpu::context>(ctx), output, input_shapes);
size_t ws = hip_gemm_workspace_size(
any_cast<migraphx::gpu::context>(ctx), output, input_shapes, alpha, beta, solution_idx);
return {{"workspace", ws}};
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
Expand Down
7 changes: 7 additions & 0 deletions src/targets/gpu/include/migraphx/gpu/hip_gemm_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,13 @@ int32_t hip_gemm_default_solution(context& ctx,
const shape& output_shape,
const std::vector<shape>& input_shapes);

size_t hip_gemm_workspace_size(context& ctx,
const shape& output_shape,
const std::vector<shape>& input_shapes,
float alpha,
float beta,
int32_t solution_idx);

} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
Expand Down
21 changes: 12 additions & 9 deletions src/targets/gpu/lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,14 +251,6 @@ struct miopen_apply
apply_map.emplace(name, [=](instruction_ref ins) {
std::vector<instruction_ref> refs = ins->inputs();
assert(refs.size() == 2);
#if MIGRAPHX_USE_HIPBLASLT
if(enabled(MIGRAPHX_ENABLE_HIPBLASLT_GEMM{}))
{
shape workspace_shape{shape::uint8_type, {hipblaslt_workspace_size}};
auto workspace = insert_allocation(ins, workspace_shape);
refs.push_back(workspace);
}
#endif
auto output = insert_allocation(ins, ins->get_shape());
refs.push_back(output);
#if MIGRAPHX_USE_HIPBLASLT
Expand All @@ -269,7 +261,18 @@ struct miopen_apply
ins, rocblas_gemm<Op>{Op{}, 1, 0, compute_fp32}, refs);
#if MIGRAPHX_USE_HIPBLASLT
}
return mod->replace_instruction(ins, hip_gemm<Op>{Op{}, 1, 0}, refs);
std::string op_name = "gpu::hip_gemm";
if(contains(name, "quant_"))
{
op_name = "gpu::hip_quant_gemm";
}
operation gemm_op = make_op(op_name);
return mod->replace_instruction(
ins,
make_op("gpu::hipblaslt_op", {{"op", to_value(gemm_op)}}),
ins->inputs().at(0),
ins->inputs().at(1),
output);
#endif
});
}
Expand Down
5 changes: 5 additions & 0 deletions src/targets/gpu/target.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
#include <migraphx/split_reduce.hpp>
#include <migraphx/split_single_dyn_dim.hpp>
#include <migraphx/gpu/allocation_model.hpp>
#include <migraphx/gpu/compile_hipblaslt.hpp>
#include <migraphx/gpu/compile_miopen.hpp>
#include <migraphx/gpu/compile_ops.hpp>
#include <migraphx/gpu/concat_gpu_opt.hpp>
Expand Down Expand Up @@ -224,6 +225,10 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination{},
fuse_ops{&ctx, options.fast_math},
dead_code_elimination{},
#if MIGRAPHX_USE_HIPBLASLT
TedThemistokleous marked this conversation as resolved.
Show resolved Hide resolved
compile_hipblaslt{&gctx},
dead_code_elimination{},
#endif
replace_allocate{gpu_allocation_model{}, options.offload_copy},
dead_code_elimination{},
adjust_allocation{gpu_allocation_model{}},
Expand Down
Loading
Loading