From ca5cb76a6205bba847031128730ad084054b4537 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Sun, 10 Jul 2022 09:39:07 +0200 Subject: [PATCH] Fix the launch heuristic. (#40) --- src/gpuarrays.jl | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/gpuarrays.jl b/src/gpuarrays.jl index 0796de3ee..be9023132 100644 --- a/src/gpuarrays.jl +++ b/src/gpuarrays.jl @@ -14,11 +14,8 @@ struct mtlKernelContext <: AbstractKernelContext end kernel = @metal launch=false f(mtlKernelContext(), args...) # The pipeline state automatically computes occupancy stats - threads_needed = cld(elements, elements_per_thread) - - # Limit the threadgroup size - threads = min(threads_needed, kernel.pipeline_state.maxTotalThreadsPerThreadgroup) - blocks = cld(threads_needed, threads) + threads = min(elements, kernel.pipeline_state.maxTotalThreadsPerThreadgroup) + blocks = cld(elements, threads) return (; threads, blocks) end