diff --git a/demos/computeflocking/data/shaders/flock.comp b/demos/computeflocking/data/shaders/flock.comp index 0e9d5b7003..e2f55725c3 100644 --- a/demos/computeflocking/data/shaders/flock.comp +++ b/demos/computeflocking/data/shaders/flock.comp @@ -27,9 +27,13 @@ // Workgroup size as specialization constant as per: // https://www.khronos.org/registry/OpenGL/specs/gl/GLSLangSpec.4.60.html#specialization-constant-qualifier -// When a workgroup size specialization constant is detected, NAP automatically overwrites -// it with the maximum group size of the device on pipeline creation. +// NAP overwrites the workgroup size specialization constant, when detected and not 0, with the maximum group +// size supported by the device on pipeline creation. layout(local_size_x_id = 0) in; + +// When declaring the workgroup size specialization constant to be higher than 0, NAP will not +// override the workgroup size when creating the compute pipeline. Uncommenting this bit and commenting out +// the above will let the shader define the workgroup size. //layout(local_size_x = 512) in; struct Boid diff --git a/modules/naprender/data/shaders/constant.vert b/modules/naprender/data/shaders/constant.vert index 8716163a59..1d983d0021 100644 --- a/modules/naprender/data/shaders/constant.vert +++ b/modules/naprender/data/shaders/constant.vert @@ -12,9 +12,6 @@ uniform nap } mvp; in vec3 in_Position; -in vec3 in_UV0; - -out vec3 passUVs; void main(void) { diff --git a/modules/naprender/src/renderservice.cpp b/modules/naprender/src/renderservice.cpp index 55ed5b31a4..212b535365 100644 --- a/modules/naprender/src/renderservice.cpp +++ b/modules/naprender/src/renderservice.cpp @@ -1052,9 +1052,16 @@ namespace nap entry.constantID = static_cast(const_ids[i]); entry.offset = static_cast(spec_entries.size() * sizeof(uint)); entry.size = sizeof(uint); - spec_entries.emplace_back(std::move(entry)); - spec_data.emplace_back(computeShader.getWorkGroupSize()[i]); + uint32 work_group_size = computeShader.getWorkGroupSize()[i]; +#ifdef __APPLE__ + // Clamp work group size for Apple to 512, based on maxTotalThreadsPerThreadgroup, + // which doesn't necessarily match physical device limits, especially on older devices. + // See: https://developer.apple.com/documentation/metal/compute_passes/calculating_threadgroup_and_grid_sizes + // And: https://github.com/KhronosGroup/SPIRV-Cross/issues/837 + work_group_size = math::min(work_group_size, 512); +#endif // __APPLE__ + spec_data.emplace_back(work_group_size); } } diff --git a/modules/naprender/src/shader.cpp b/modules/naprender/src/shader.cpp index 4839f9797d..238026c304 100644 --- a/modules/naprender/src/shader.cpp +++ b/modules/naprender/src/shader.cpp @@ -866,10 +866,6 @@ namespace nap if (!parseShaderVariables(comp_shader_compiler, VK_SHADER_STAGE_COMPUTE_BIT, mUBODeclarations, mSSBODeclarations, mSamplerDeclarations, errorState)) return false; - // Query useful compute info - std::array max_workgroup_size; - std::memcpy(max_workgroup_size.data(), &mRenderService->getPhysicalDeviceProperties().limits.maxComputeWorkGroupSize[0], sizeof(max_workgroup_size)); - // Cache workgroup size specialization constants std::array spec_constants; comp_shader_compiler.get_work_group_size_specialization_constants(spec_constants[0], spec_constants[1], spec_constants[2]); @@ -884,7 +880,7 @@ namespace nap { // Overwrite workgroup size with quaried maximum supported workgroup size mWorkGroupSizeConstantIds[i] = spec_constants[i].constant_id; - mWorkGroupSize[i] = max_workgroup_size[i]; + mWorkGroupSize[i] = mRenderService->getPhysicalDeviceProperties().limits.maxComputeWorkGroupSize[i]; } else {