From 4bcd615bb1253459f02ab89f2bd4bb37e29bcc9d Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sun, 11 Jun 2023 12:35:19 +0200 Subject: [PATCH 01/71] Get rid of the extra field in timestamps. Kinda useless and also works around a weird GCC warning. --- vulkan/device.cpp | 17 +++++++++-------- vulkan/device.hpp | 6 +++--- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/vulkan/device.cpp b/vulkan/device.cpp index 97d93c97..9679be62 100644 --- a/vulkan/device.cpp +++ b/vulkan/device.cpp @@ -1400,7 +1400,7 @@ void Device::submit_empty_inner(QueueIndices physical_type, InternalFence *fence auto start_ts = write_calibrated_timestamp_nolock(); auto result = submit_batches(composer, queue, cleared_fence); auto end_ts = write_calibrated_timestamp_nolock(); - register_time_interval_nolock("CPU", std::move(start_ts), std::move(end_ts), "submit", ""); + register_time_interval_nolock("CPU", std::move(start_ts), std::move(end_ts), "submit"); if (result != VK_SUCCESS) LOGE("vkQueueSubmit2KHR failed (code: %d).\n", int(result)); @@ -1884,7 +1884,7 @@ void Device::submit_queue(QueueIndices physical_type, InternalFence *fence, auto start_ts = write_calibrated_timestamp_nolock(); auto result = submit_batches(composer, queue, cleared_fence, profiling_iteration); auto end_ts = write_calibrated_timestamp_nolock(); - register_time_interval_nolock("CPU", std::move(start_ts), std::move(end_ts), "submit", ""); + register_time_interval_nolock("CPU", std::move(start_ts), std::move(end_ts), "submit"); if (result != VK_SUCCESS) LOGE("vkQueueSubmit2KHR failed (code: %d).\n", int(result)); @@ -2572,7 +2572,7 @@ void Device::next_frame_context() if (frame_context_begin_ts) { auto frame_context_end_ts = write_calibrated_timestamp_nolock(); - register_time_interval_nolock("CPU", std::move(frame_context_begin_ts), std::move(frame_context_end_ts), "command submissions", ""); + register_time_interval_nolock("CPU", std::move(frame_context_begin_ts), std::move(frame_context_end_ts), "command submissions"); frame_context_begin_ts = {}; } @@ -2754,14 +2754,15 @@ void Device::recalibrate_timestamps() resample_calibrated_timestamps(); } -void Device::register_time_interval(std::string tid, QueryPoolHandle start_ts, QueryPoolHandle end_ts, std::string tag, std::string extra) +void Device::register_time_interval(std::string tid, QueryPoolHandle start_ts, QueryPoolHandle end_ts, + const std::string &tag) { LOCK(); - register_time_interval_nolock(std::move(tid), std::move(start_ts), std::move(end_ts), std::move(tag), std::move(extra)); + register_time_interval_nolock(std::move(tid), std::move(start_ts), std::move(end_ts), tag); } void Device::register_time_interval_nolock(std::string tid, QueryPoolHandle start_ts, QueryPoolHandle end_ts, - std::string tag, std::string extra) + const std::string &tag) { if (start_ts && end_ts) { @@ -2770,7 +2771,7 @@ void Device::register_time_interval_nolock(std::string tid, QueryPoolHandle star if (start_ts->is_signalled() && end_ts->is_signalled()) VK_ASSERT(end_ts->get_timestamp_ticks() >= start_ts->get_timestamp_ticks()); #endif - frame().timestamp_intervals.push_back({ std::move(tid), std::move(start_ts), std::move(end_ts), timestamp_tag, std::move(extra) }); + frame().timestamp_intervals.push_back({ std::move(tid), std::move(start_ts), std::move(end_ts), timestamp_tag }); } } @@ -2917,7 +2918,7 @@ void Device::PerFrame::begin() allocations.clear(); if (!in_destructor) - device.register_time_interval_nolock("CPU", std::move(wait_fence_ts), device.write_calibrated_timestamp_nolock(), "fence + recycle", ""); + device.register_time_interval_nolock("CPU", std::move(wait_fence_ts), device.write_calibrated_timestamp_nolock(), "fence + recycle"); int64_t min_timestamp_us = std::numeric_limits::max(); int64_t max_timestamp_us = 0; diff --git a/vulkan/device.hpp b/vulkan/device.hpp index 6dd7430b..b6eae3df 100644 --- a/vulkan/device.hpp +++ b/vulkan/device.hpp @@ -286,7 +286,7 @@ class Device void submit_discard(CommandBufferHandle &cmd); QueueIndices get_physical_queue_type(CommandBuffer::Type queue_type) const; void register_time_interval(std::string tid, QueryPoolHandle start_ts, QueryPoolHandle end_ts, - std::string tag, std::string extra = {}); + const std::string &tag); // Request shaders and programs. These objects are owned by the Device. Shader *request_shader(const uint32_t *code, size_t size, const ResourceLayout *layout = nullptr); @@ -551,7 +551,8 @@ class Device QueryPoolHandle write_timestamp_nolock(VkCommandBuffer cmd, VkPipelineStageFlags2 stage); QueryPoolHandle write_calibrated_timestamp_nolock(); - void register_time_interval_nolock(std::string tid, QueryPoolHandle start_ts, QueryPoolHandle end_ts, std::string tag, std::string extra); + void register_time_interval_nolock(std::string tid, QueryPoolHandle start_ts, QueryPoolHandle end_ts, + const std::string &tag); // Make sure this is deleted last. HandlePool handle_pool; @@ -645,7 +646,6 @@ class Device QueryPoolHandle start_ts; QueryPoolHandle end_ts; TimestampInterval *timestamp_tag; - std::string extra; }; std::vector timestamp_intervals; From 3c7c90fd39f80aade8ea81f73eb020f7b9ca7f23 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sun, 11 Jun 2023 12:35:53 +0200 Subject: [PATCH 02/71] Smol refactors in asset manager. --- filesystem/asset_manager.cpp | 4 ++-- filesystem/asset_manager.hpp | 4 ++++ vulkan/managers/resource_manager.hpp | 4 ++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/filesystem/asset_manager.cpp b/filesystem/asset_manager.cpp index c49d8a52..8b033a73 100644 --- a/filesystem/asset_manager.cpp +++ b/filesystem/asset_manager.cpp @@ -100,7 +100,7 @@ void AssetManager::set_asset_instantiator_interface(AssetInstantiatorInterface * { signal->wait_until_at_least(timestamp); for (uint32_t id = 0; id < id_count; id++) - iface->release_image_resource({ id }); + iface->release_image_resource(ImageAssetID{id}); } for (auto *a : asset_bank) @@ -116,7 +116,7 @@ void AssetManager::set_asset_instantiator_interface(AssetInstantiatorInterface * { iface->set_id_bounds(id_count); for (uint32_t i = 0; i < id_count; i++) - iface->set_image_class({ i }, asset_bank[i]->image_class); + iface->set_image_class(ImageAssetID{i}, asset_bank[i]->image_class); } } diff --git a/filesystem/asset_manager.hpp b/filesystem/asset_manager.hpp index 75b54107..8ab09703 100644 --- a/filesystem/asset_manager.hpp +++ b/filesystem/asset_manager.hpp @@ -35,7 +35,11 @@ namespace Granite struct ImageAssetID { uint32_t id = uint32_t(-1); + ImageAssetID() = default; + explicit ImageAssetID(uint32_t id_) : id{id_} {} explicit inline operator bool() const { return id != uint32_t(-1); } + inline bool operator==(const ImageAssetID &other) const { return id == other.id; } + inline bool operator!=(const ImageAssetID &other) const { return !(*this == other); } }; class AssetManager; diff --git a/vulkan/managers/resource_manager.hpp b/vulkan/managers/resource_manager.hpp index 5d1a7dbf..3c129491 100644 --- a/vulkan/managers/resource_manager.hpp +++ b/vulkan/managers/resource_manager.hpp @@ -31,11 +31,11 @@ namespace Vulkan { class MemoryMappedTexture; -class ResourceManager : private Granite::AssetInstantiatorInterface +class ResourceManager final : private Granite::AssetInstantiatorInterface { public: explicit ResourceManager(Device *device); - ~ResourceManager(); + ~ResourceManager() override; void init(); inline const Vulkan::ImageView *get_image_view(Granite::ImageAssetID id) const From d52781a247d39d2272a683401bd22a3cb300c98e Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sun, 11 Jun 2023 12:46:18 +0200 Subject: [PATCH 03/71] Add a MeshAssetID. --- filesystem/asset_manager.hpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/filesystem/asset_manager.hpp b/filesystem/asset_manager.hpp index 8ab09703..1ecb0ced 100644 --- a/filesystem/asset_manager.hpp +++ b/filesystem/asset_manager.hpp @@ -42,6 +42,16 @@ struct ImageAssetID inline bool operator!=(const ImageAssetID &other) const { return !(*this == other); } }; +struct MeshAssetID +{ + uint32_t id = uint32_t(-1); + MeshAssetID() = default; + explicit MeshAssetID(uint32_t id_) : id{id_} {} + explicit inline operator bool() const { return id != uint32_t(-1); } + inline bool operator==(const MeshAssetID &other) const { return id == other.id; } + inline bool operator!=(const MeshAssetID &other) const { return !(*this == other); } +}; + class AssetManager; // If we have to fall back due to no image being present, From c67746dae96225ab78b2694e4978fe2a7293485f Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sun, 11 Jun 2023 12:47:59 +0200 Subject: [PATCH 04/71] Update meshoptimizer submodule. --- third_party/meshoptimizer | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/meshoptimizer b/third_party/meshoptimizer index 5baa38ef..eb385d69 160000 --- a/third_party/meshoptimizer +++ b/third_party/meshoptimizer @@ -1 +1 @@ -Subproject commit 5baa38ef5cd288c6a4d1b3a69f8a168943d593cd +Subproject commit eb385d6987d12f33a4e0284cf2ba6660c9272602 From d3903c7a57509796ad4e82a90723cfbc1f3a19bd Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 14 Jun 2023 13:27:39 +0200 Subject: [PATCH 05/71] Meshopt experimentation. --- tests/CMakeLists.txt | 4 ++ tests/meshopt_sandbox.cpp | 137 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+) create mode 100644 tests/meshopt_sandbox.cpp diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 6592962b..13f15910 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -161,6 +161,10 @@ add_granite_offline_tool(linkage-test linkage_test.cpp) add_granite_offline_tool(external-objects external_objects.cpp) add_granite_offline_tool(performance-query performance_query.cpp) add_granite_offline_tool(asset-manager-test asset_manager_test.cpp) + +add_granite_offline_tool(meshopt-sandbox meshopt_sandbox.cpp) +target_link_libraries(meshopt-sandbox PRIVATE meshoptimizer) + add_granite_application(dgc-test dgc_test.cpp) if (NOT ANDROID) target_compile_definitions(dgc-test PRIVATE ASSET_DIRECTORY=\"${CMAKE_CURRENT_SOURCE_DIR}/assets\") diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp new file mode 100644 index 00000000..4948c906 --- /dev/null +++ b/tests/meshopt_sandbox.cpp @@ -0,0 +1,137 @@ +#include "meshoptimizer.h" +#include "logging.hpp" +#include +#include +#include "math.hpp" +#include "muglm/muglm_impl.hpp" +#include +using namespace Granite; + +int main() +{ + constexpr uint32_t count = 8; + constexpr uint32_t stride = 8; + constexpr uint32_t block_elements_limit = 256u; + constexpr uint32_t max_stride = 256u; + size_t bound = meshopt_encodeVertexBufferBound(count, stride); + std::vector buffer(bound); + + const uint32_t vertices[] = { + 800, 805, 810, 750, + 710, 720, 700, 701, + 800, 890, 800, 800, + 800, 800, 800, 800 + }; + size_t encoded_size = meshopt_encodeVertexBuffer(buffer.data(), bound, vertices, count, stride); + buffer.resize(encoded_size); + + uint32_t max_block_elements = min(block_elements_limit, (8192 / stride) & ~15u); + + std::vector output_buffer(((count + 15) & ~15) * stride); + + assert(buffer[0] == 0xa0); + uint32_t buffer_index = 1; + + for (size_t i = 0, n = buffer.size(); i < n; i++) + LOGI("Output byte %02zu: 0x%02x\n", i, buffer[i]); + + uint8_t decode_buffer[max_stride]; + uint32_t tail_size = (stride + 31) & ~31; + memcpy(decode_buffer, buffer.data() + buffer.size() - stride, stride); + buffer.resize(buffer.size() - tail_size); + + for (uint32_t element = 0; element < count; element += max_block_elements) + { + uint32_t remaining_elements = min(max_block_elements, count - element); + uint32_t group_count = (remaining_elements + 15u) / 16u; + + for (uint32_t data_block = 0; data_block < stride; data_block++) + { + uint8_t *out_ptr = output_buffer.data() + element * stride + data_block; + uint8_t decode_value = decode_buffer[data_block]; + uint32_t out_vertex_index = 0; + + uint8_t header_bits[16]; + for (uint32_t group_index = 0; group_index < group_count; group_index++) + header_bits[group_index] = + (buffer[buffer_index + group_index / 4] >> (2 * (group_index & 3u))) & 3u; + buffer_index += (group_count + 3) / 4; + + for (uint32_t group_index = 0; group_index < group_count; group_index++) + { + switch (header_bits[group_index]) + { + case 0: + for (uint32_t i = 0; i < 16; i++, out_vertex_index++) + out_ptr[stride * out_vertex_index] = decode_value; + break; + + case 1: + { + uint32_t sentinel_count = 0; + // 2-bit sentinel decode. + for (uint32_t i = 0; i < 16; i++, out_vertex_index++) + { + uint32_t bits = (buffer[buffer_index + i / 4] >> (2 * ((i ^ 3) & 3))) & 3; + uint8_t delta; + + if (bits == 3) + bits = buffer[buffer_index + 4 + sentinel_count++]; + + delta = (bits & 1) ? uint8_t(~(bits >> 1)) : uint8_t(bits >> 1); + + uint8_t updated = decode_value + delta; + out_ptr[out_vertex_index * stride] = decode_value = updated; + } + buffer_index += 4 + sentinel_count; + break; + } + + case 2: + { + uint32_t sentinel_count = 0; + // 4-bit sentinel decode. + for (uint32_t i = 0; i < 16; i++, out_vertex_index++) + { + uint32_t bits = (buffer[buffer_index + i / 2] >> (4 * ((i ^ 1) & 1))) & 0xf; + uint8_t delta; + + if (bits == 15) + delta = buffer[buffer_index + 8 + sentinel_count++]; + else + delta = (bits & 1) ? uint8_t(~(bits >> 1)) : uint8_t(bits >> 1); + + uint8_t updated = decode_value + delta; + out_ptr[out_vertex_index * stride] = decode_value = updated; + } + buffer_index += 8 + sentinel_count; + break; + } + + default: + for (uint32_t i = 0; i < 16; i++, out_vertex_index++) + { + uint8_t delta = buffer[buffer_index + i]; + uint8_t updated = decode_value + delta; + out_ptr[out_vertex_index * stride] = decode_value = updated; + } + buffer_index += 16; + break; + } + } + } + } + + for (uint32_t i = 0; i < count * stride / 4; i++) + LOGI("Output value %u: %u\n", i, reinterpret_cast(output_buffer.data())[i]); + + int8_t output[4]; + float float_inputs[4] = { 0.5f, 0.5f, 0.5f }; + vec3 normalized = normalize(vec3(float_inputs[0], float_inputs[1], float_inputs[2])); + float float_output[4]; + meshopt_encodeFilterOct(output, 1, 4, 8, normalized.data); + meshopt_decodeFilterOct(output, 1, 4); + + LOGI("Input (%.3f, %.3f, %.3f)\n", normalized[0], normalized[1], normalized[2]); + LOGI("Value (%.3f, %.3f, %.3f)\n", output[0] / 127.0f, output[1] / 127.0f, output[2] / 127.0f); +} \ No newline at end of file From 2b3e4048db7f42773533a5b72a8a349197c86b2f Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sat, 17 Jun 2023 18:41:35 +0200 Subject: [PATCH 06/71] Experiment with custom GPU-friendly mesh compression. --- tests/meshopt_sandbox.cpp | 334 +++++++++++++++++++++++++------------- 1 file changed, 218 insertions(+), 116 deletions(-) diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp index 4948c906..a9c50fbc 100644 --- a/tests/meshopt_sandbox.cpp +++ b/tests/meshopt_sandbox.cpp @@ -1,137 +1,239 @@ -#include "meshoptimizer.h" #include "logging.hpp" #include #include #include "math.hpp" #include "muglm/muglm_impl.hpp" +#include +#include "bitops.hpp" #include +#include using namespace Granite; -int main() +static constexpr unsigned MaxStreams = 16; +static constexpr unsigned MaxU32Streams = 16; +static constexpr unsigned MaxElements = 256; +static constexpr unsigned MaxPrimitives = MaxElements; +static constexpr unsigned MaxVertices = MaxElements; + +struct MeshletStream +{ + uint32_t offset_from_base_u32; + uint16_t predictor[4 * 2]; + uint16_t bitplane_meta[MaxElements / 32]; +}; + +struct MeshletMetadata +{ + uint32_t base_vertex_offset; + uint8_t num_primitives_minus_1; + uint8_t num_attributes_minus_1; + uint16_t reserved; + MeshletStream u32_streams[MaxU32Streams]; +}; + +enum class StreamType : uint8_t +{ + Primitive, // R8G8B8X8_UINT + PositionF16, // R16G16B16X16_FLOAT +}; + +struct StreamMeta +{ + StreamType type; + uint8_t stream_index_component; +}; + +struct MeshMetadata +{ + uint32_t stream_count; + uint32_t data_stream_offset_u32; + uint32_t data_stream_size_u32; + + // Stream meta is used to configure the decode shader. + StreamMeta stream_meta[MaxStreams]; + + std::vector meshlets; +}; + +struct PrimitiveAnalysisResult +{ + uint32_t num_primitives; + uint32_t num_vertices; +}; + +static PrimitiveAnalysisResult analyze_primitive_count(std::unordered_map &vertex_remap, + const uint32_t *index_buffer, uint32_t max_num_primitives) { - constexpr uint32_t count = 8; - constexpr uint32_t stride = 8; - constexpr uint32_t block_elements_limit = 256u; - constexpr uint32_t max_stride = 256u; - size_t bound = meshopt_encodeVertexBufferBound(count, stride); - std::vector buffer(bound); - - const uint32_t vertices[] = { - 800, 805, 810, 750, - 710, 720, 700, 701, - 800, 890, 800, 800, - 800, 800, 800, 800 - }; - size_t encoded_size = meshopt_encodeVertexBuffer(buffer.data(), bound, vertices, count, stride); - buffer.resize(encoded_size); - - uint32_t max_block_elements = min(block_elements_limit, (8192 / stride) & ~15u); - - std::vector output_buffer(((count + 15) & ~15) * stride); - - assert(buffer[0] == 0xa0); - uint32_t buffer_index = 1; - - for (size_t i = 0, n = buffer.size(); i < n; i++) - LOGI("Output byte %02zu: 0x%02x\n", i, buffer[i]); - - uint8_t decode_buffer[max_stride]; - uint32_t tail_size = (stride + 31) & ~31; - memcpy(decode_buffer, buffer.data() + buffer.size() - stride, stride); - buffer.resize(buffer.size() - tail_size); - - for (uint32_t element = 0; element < count; element += max_block_elements) + PrimitiveAnalysisResult result = {}; + uint32_t vertex_count = 0; + + // We can reference a maximum of 256 vertices. + vertex_remap.clear(); + + for (uint32_t i = 0; i < max_num_primitives; i++) { - uint32_t remaining_elements = min(max_block_elements, count - element); - uint32_t group_count = (remaining_elements + 15u) / 16u; + uint32_t index0 = index_buffer[3 * i + 0]; + uint32_t index1 = index_buffer[3 * i + 1]; + uint32_t index2 = index_buffer[3 * i + 2]; + + vertex_count = uint32_t(vertex_remap.size()); - for (uint32_t data_block = 0; data_block < stride; data_block++) + vertex_remap.insert({ index0, uint32_t(vertex_remap.size()) }); + vertex_remap.insert({ index1, uint32_t(vertex_remap.size()) }); + vertex_remap.insert({ index2, uint32_t(vertex_remap.size()) }); + + // If this primitive causes us to go out of bounds, reset. + if (vertex_remap.size() > MaxVertices) { - uint8_t *out_ptr = output_buffer.data() + element * stride + data_block; - uint8_t decode_value = decode_buffer[data_block]; - uint32_t out_vertex_index = 0; + max_num_primitives = i; + break; + } + + vertex_count = uint32_t(vertex_remap.size()); + } - uint8_t header_bits[16]; - for (uint32_t group_index = 0; group_index < group_count; group_index++) - header_bits[group_index] = - (buffer[buffer_index + group_index / 4] >> (2 * (group_index & 3u))) & 3u; - buffer_index += (group_count + 3) / 4; + result.num_primitives = max_num_primitives; + result.num_vertices = vertex_count; + return result; +} - for (uint32_t group_index = 0; group_index < group_count; group_index++) +// Analyze bits required to encode a signed delta. +static uvec4 compute_required_bits(u8vec4 delta) +{ + uvec4 result; + for (unsigned i = 0; i < 4; i++) + { + uint32_t v = delta[i]; + if (v >= 0x80u) + v ^= 0xffu; + result[i] = v == 0 ? 0 : (32 - leading_zeroes(v)); + } + return result; +} + +static uint32_t extract_bit_plane(const uint8_t *bytes, unsigned bit_index) +{ + uint32_t u32 = 0; + for (unsigned i = 0; i < 32; i++) + u32 |= ((bytes[4 * i] >> bit_index) & 1u) << i; + return u32; +} + +static void encode_stream(std::vector &out_payload_buffer, + MeshletStream &stream, u8vec4 (&stream_buffer)[MaxElements]) +{ + stream.offset_from_base_u32 = uint32_t(out_payload_buffer.size()); + // Simple linear predictor, base equal elements[0], gradient = 0. + stream.predictor[0] = uint16_t(stream_buffer[0].x) << 8; + stream.predictor[1] = uint16_t(stream_buffer[0].y) << 8; + stream.predictor[2] = uint16_t(stream_buffer[0].z) << 8; + stream.predictor[3] = uint16_t(stream_buffer[0].w) << 8; + stream.predictor[4] = 0; + stream.predictor[5] = 0; + stream.predictor[6] = 0; + stream.predictor[7] = 0; + + // Delta-encode + u8vec4 current_value = stream_buffer[0]; + for (unsigned i = 0; i < MaxElements; i++) + { + u8vec4 next_value = stream_buffer[i]; + stream_buffer[i] = next_value - current_value; + current_value = next_value; + } + + // Encode 32 elements at once. + for (unsigned chunk_index = 0; chunk_index < MaxElements / 32; chunk_index++) + { + uvec4 required_bits = {}; + for (unsigned i = 0; i < 32; i++) + required_bits = max(required_bits, compute_required_bits(stream_buffer[chunk_index * 32 + i])); + + // Encode bit counts. + stream.bitplane_meta[chunk_index] = uint16_t((required_bits.x << 0) | (required_bits.y << 4) | + (required_bits.z << 8) | (required_bits.w << 12)); + + for (unsigned i = 0; i < required_bits.x; i++) + out_payload_buffer.push_back(extract_bit_plane(&stream_buffer[chunk_index * 32][0], i)); + for (unsigned i = 0; i < required_bits.y; i++) + out_payload_buffer.push_back(extract_bit_plane(&stream_buffer[chunk_index * 32][1], i)); + for (unsigned i = 0; i < required_bits.z; i++) + out_payload_buffer.push_back(extract_bit_plane(&stream_buffer[chunk_index * 32][2], i)); + for (unsigned i = 0; i < required_bits.w; i++) + out_payload_buffer.push_back(extract_bit_plane(&stream_buffer[chunk_index * 32][3], i)); + } +} + +static void encode_mesh(std::vector &out_payload_buffer, MeshMetadata &mesh, + const uint32_t *index_buffer, uint32_t primitive_count, + const uint32_t *attributes, + unsigned num_u32_streams) +{ + u8vec4 stream_buffer[MaxElements] = {}; + mesh.stream_count = num_u32_streams + 1; + mesh.data_stream_offset_u32 = 0; // Can be adjusted in isolation later to pack multiple payload streams into one buffer. + uint32_t base_vertex_offset = 0; + + std::unordered_map vbo_remap; + + for (uint32_t primitive_index = 0; primitive_index < primitive_count; ) + { + uint32_t primitives_to_process = min(primitive_count - primitive_index, MaxPrimitives); + auto analysis_result = analyze_primitive_count(vbo_remap, index_buffer + 3 * primitive_index, primitives_to_process); + primitives_to_process = analysis_result.num_primitives; + + MeshletMetadata meshlet = {}; + + meshlet.base_vertex_offset = base_vertex_offset; + meshlet.num_primitives_minus_1 = analysis_result.num_primitives - 1; + meshlet.num_attributes_minus_1 = analysis_result.num_vertices - 1; + meshlet.reserved = 0; + + // Encode index buffer. + for (uint32_t i = 0; i < analysis_result.num_primitives; i++) + { + uint8_t i0 = vbo_remap[index_buffer[3 * i + 0]]; + uint8_t i1 = vbo_remap[index_buffer[3 * i + 1]]; + uint8_t i2 = vbo_remap[index_buffer[3 * i + 2]]; + stream_buffer[i] = u8vec4(i0, i1, i2, 0); + } + + for (uint32_t i = analysis_result.num_primitives; i < MaxElements; i++) + stream_buffer[i] = stream_buffer[analysis_result.num_primitives - 1]; + + encode_stream(out_payload_buffer, meshlet.u32_streams[0], stream_buffer); + + uint64_t vbo_remapping[MaxVertices]; + unsigned vbo_index = 0; + for (auto &v : vbo_remap) + vbo_remapping[vbo_index++] = (uint64_t(v.second) << 32) | v.first; + std::sort(vbo_remapping, vbo_remapping + analysis_result.num_vertices); + + for (uint32_t stream_index = 0; stream_index < num_u32_streams; stream_index++) + { + for (uint32_t i = 0; i < analysis_result.num_vertices; i++) { - switch (header_bits[group_index]) - { - case 0: - for (uint32_t i = 0; i < 16; i++, out_vertex_index++) - out_ptr[stride * out_vertex_index] = decode_value; - break; - - case 1: - { - uint32_t sentinel_count = 0; - // 2-bit sentinel decode. - for (uint32_t i = 0; i < 16; i++, out_vertex_index++) - { - uint32_t bits = (buffer[buffer_index + i / 4] >> (2 * ((i ^ 3) & 3))) & 3; - uint8_t delta; - - if (bits == 3) - bits = buffer[buffer_index + 4 + sentinel_count++]; - - delta = (bits & 1) ? uint8_t(~(bits >> 1)) : uint8_t(bits >> 1); - - uint8_t updated = decode_value + delta; - out_ptr[out_vertex_index * stride] = decode_value = updated; - } - buffer_index += 4 + sentinel_count; - break; - } - - case 2: - { - uint32_t sentinel_count = 0; - // 4-bit sentinel decode. - for (uint32_t i = 0; i < 16; i++, out_vertex_index++) - { - uint32_t bits = (buffer[buffer_index + i / 2] >> (4 * ((i ^ 1) & 1))) & 0xf; - uint8_t delta; - - if (bits == 15) - delta = buffer[buffer_index + 8 + sentinel_count++]; - else - delta = (bits & 1) ? uint8_t(~(bits >> 1)) : uint8_t(bits >> 1); - - uint8_t updated = decode_value + delta; - out_ptr[out_vertex_index * stride] = decode_value = updated; - } - buffer_index += 8 + sentinel_count; - break; - } - - default: - for (uint32_t i = 0; i < 16; i++, out_vertex_index++) - { - uint8_t delta = buffer[buffer_index + i]; - uint8_t updated = decode_value + delta; - out_ptr[out_vertex_index * stride] = decode_value = updated; - } - buffer_index += 16; - break; - } + auto vertex_index = uint32_t(vbo_remapping[i]); + uint32_t payload = attributes[stream_index + num_u32_streams * vertex_index]; + stream_buffer[i] = u8vec4(uint8_t(payload >> 0), uint8_t(payload >> 8), + uint8_t(payload >> 16), uint8_t(payload >> 24)); } + + for (uint32_t i = analysis_result.num_vertices; i < MaxElements; i++) + stream_buffer[i] = stream_buffer[analysis_result.num_vertices - 1]; + + encode_stream(out_payload_buffer, meshlet.u32_streams[stream_index + 1], stream_buffer); } - } - for (uint32_t i = 0; i < count * stride / 4; i++) - LOGI("Output value %u: %u\n", i, reinterpret_cast(output_buffer.data())[i]); + mesh.meshlets.push_back(meshlet); + + primitive_index += primitives_to_process; + base_vertex_offset += analysis_result.num_vertices; + } - int8_t output[4]; - float float_inputs[4] = { 0.5f, 0.5f, 0.5f }; - vec3 normalized = normalize(vec3(float_inputs[0], float_inputs[1], float_inputs[2])); - float float_output[4]; - meshopt_encodeFilterOct(output, 1, 4, 8, normalized.data); - meshopt_decodeFilterOct(output, 1, 4); + mesh.data_stream_size_u32 = uint32_t(out_payload_buffer.size()); +} - LOGI("Input (%.3f, %.3f, %.3f)\n", normalized[0], normalized[1], normalized[2]); - LOGI("Value (%.3f, %.3f, %.3f)\n", output[0] / 127.0f, output[1] / 127.0f, output[2] / 127.0f); +int main() +{ } \ No newline at end of file From 548c3ae8a56c8b2e6c6ce92d581971110c3abe75 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sat, 17 Jun 2023 20:29:00 +0200 Subject: [PATCH 07/71] Add basic predictor. --- tests/meshopt_sandbox.cpp | 86 +++++++++++++++++++++++++++++++++------ 1 file changed, 74 insertions(+), 12 deletions(-) diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp index a9c50fbc..08744ad5 100644 --- a/tests/meshopt_sandbox.cpp +++ b/tests/meshopt_sandbox.cpp @@ -119,8 +119,43 @@ static uint32_t extract_bit_plane(const uint8_t *bytes, unsigned bit_index) return u32; } +static void find_linear_predictor(uint16_t (&predictor)[8], + const u8vec4 (&stream_buffer)[MaxElements], + unsigned num_elements) +{ + // Sign-extend since the deltas are considered to be signed ints. + ivec4 unrolled_data[MaxElements]; + for (unsigned i = 0; i < num_elements; i++) + unrolled_data[i] = ivec4(i8vec4(stream_buffer[i])); + + // Simple linear regression. + // Pilfered from: https://www.codesansar.com/numerical-methods/linear-regression-method-using-c-programming.htm + ivec4 x{0}, x2{0}, y{0}, xy{0}; + for (unsigned i = 0; i < num_elements; i++) + { + x += int(i); + x2 += int(i * i); + y += unrolled_data[i]; + xy += int(i) * unrolled_data[i]; + } + + int n = int(num_elements); + ivec4 b_denom = (n * x2 - x * x); + b_denom = select(b_denom, ivec4(1), equal(ivec4(0), b_denom)); + + // Encode in u8.8 fixed point. + ivec4 b = (ivec4(256) * (n * xy - x * y)) / b_denom; + ivec4 a = ((ivec4(256) * y - b * x)) / n; + + for (unsigned i = 0; i < 4; i++) + predictor[i] = uint16_t(a[i]); + for (unsigned i = 0; i < 4; i++) + predictor[4 + i] = uint16_t(b[i]); +} + static void encode_stream(std::vector &out_payload_buffer, - MeshletStream &stream, u8vec4 (&stream_buffer)[MaxElements]) + MeshletStream &stream, u8vec4 (&stream_buffer)[MaxElements], + unsigned num_elements) { stream.offset_from_base_u32 = uint32_t(out_payload_buffer.size()); // Simple linear predictor, base equal elements[0], gradient = 0. @@ -133,15 +168,29 @@ static void encode_stream(std::vector &out_payload_buffer, stream.predictor[6] = 0; stream.predictor[7] = 0; + // Find optimal predictor. + find_linear_predictor(stream.predictor, stream_buffer, num_elements); + + // u8.8 fixed point. + auto base_predictor = u16vec4(stream.predictor[0], stream.predictor[1], stream.predictor[2], stream.predictor[3]); + auto linear_predictor = u16vec4(stream.predictor[4], stream.predictor[5], stream.predictor[6], stream.predictor[7]); + // Delta-encode - u8vec4 current_value = stream_buffer[0]; - for (unsigned i = 0; i < MaxElements; i++) + u8vec4 current_value{0}; + for (unsigned i = 0; i < num_elements; i++) { + // Only predict-in bounds elements, since we want all out of bounds elements to be encoded to 0 delta + // without having them affect the predictor. + stream_buffer[i] -= u8vec4((base_predictor + linear_predictor * uint16_t(i)) >> uint16_t(8)); + u8vec4 next_value = stream_buffer[i]; stream_buffer[i] = next_value - current_value; current_value = next_value; } + for (unsigned i = num_elements; i < MaxElements; i++) + stream_buffer[i] = u8vec4(0); + // Encode 32 elements at once. for (unsigned chunk_index = 0; chunk_index < MaxElements / 32; chunk_index++) { @@ -169,9 +218,10 @@ static void encode_mesh(std::vector &out_payload_buffer, MeshMetadata const uint32_t *attributes, unsigned num_u32_streams) { - u8vec4 stream_buffer[MaxElements] = {}; + mesh = {}; mesh.stream_count = num_u32_streams + 1; mesh.data_stream_offset_u32 = 0; // Can be adjusted in isolation later to pack multiple payload streams into one buffer. + mesh.meshlets.reserve((primitive_count + MaxPrimitives - 1) / MaxPrimitives); uint32_t base_vertex_offset = 0; std::unordered_map vbo_remap; @@ -183,6 +233,7 @@ static void encode_mesh(std::vector &out_payload_buffer, MeshMetadata primitives_to_process = analysis_result.num_primitives; MeshletMetadata meshlet = {}; + u8vec4 stream_buffer[MaxElements]; meshlet.base_vertex_offset = base_vertex_offset; meshlet.num_primitives_minus_1 = analysis_result.num_primitives - 1; @@ -198,10 +249,7 @@ static void encode_mesh(std::vector &out_payload_buffer, MeshMetadata stream_buffer[i] = u8vec4(i0, i1, i2, 0); } - for (uint32_t i = analysis_result.num_primitives; i < MaxElements; i++) - stream_buffer[i] = stream_buffer[analysis_result.num_primitives - 1]; - - encode_stream(out_payload_buffer, meshlet.u32_streams[0], stream_buffer); + encode_stream(out_payload_buffer, meshlet.u32_streams[0], stream_buffer, analysis_result.num_primitives); uint64_t vbo_remapping[MaxVertices]; unsigned vbo_index = 0; @@ -219,10 +267,8 @@ static void encode_mesh(std::vector &out_payload_buffer, MeshMetadata uint8_t(payload >> 16), uint8_t(payload >> 24)); } - for (uint32_t i = analysis_result.num_vertices; i < MaxElements; i++) - stream_buffer[i] = stream_buffer[analysis_result.num_vertices - 1]; - - encode_stream(out_payload_buffer, meshlet.u32_streams[stream_index + 1], stream_buffer); + encode_stream(out_payload_buffer, meshlet.u32_streams[stream_index + 1], stream_buffer, + analysis_result.num_vertices); } mesh.meshlets.push_back(meshlet); @@ -236,4 +282,20 @@ static void encode_mesh(std::vector &out_payload_buffer, MeshMetadata int main() { + std::vector out_payload_buffer; + + const uint32_t index_buffer[] = { + 0, 2, 4, + 6, 4, 2, + }; + + const uint32_t u32_stream[] = { + 0, 1, 2, 3, 4, 5, 6, 7, + }; + + MeshMetadata mesh; + encode_mesh(out_payload_buffer, mesh, index_buffer, sizeof(index_buffer) / (3 * sizeof(index_buffer[0])), + u32_stream, 1); + + return 0; } \ No newline at end of file From 412f6c54d7dec1295acd85c9206ed43551319e20 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sat, 17 Jun 2023 20:44:51 +0200 Subject: [PATCH 08/71] Noodle some more ... --- tests/meshopt_sandbox.cpp | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp index 08744ad5..187ca195 100644 --- a/tests/meshopt_sandbox.cpp +++ b/tests/meshopt_sandbox.cpp @@ -18,7 +18,7 @@ static constexpr unsigned MaxVertices = MaxElements; struct MeshletStream { uint32_t offset_from_base_u32; - uint16_t predictor[4 * 2]; + uint16_t predictor[4 * 2 + 2]; uint16_t bitplane_meta[MaxElements / 32]; }; @@ -119,7 +119,7 @@ static uint32_t extract_bit_plane(const uint8_t *bytes, unsigned bit_index) return u32; } -static void find_linear_predictor(uint16_t (&predictor)[8], +static void find_linear_predictor(uint16_t *predictor, const u8vec4 (&stream_buffer)[MaxElements], unsigned num_elements) { @@ -158,15 +158,19 @@ static void encode_stream(std::vector &out_payload_buffer, unsigned num_elements) { stream.offset_from_base_u32 = uint32_t(out_payload_buffer.size()); + // Simple linear predictor, base equal elements[0], gradient = 0. - stream.predictor[0] = uint16_t(stream_buffer[0].x) << 8; - stream.predictor[1] = uint16_t(stream_buffer[0].y) << 8; - stream.predictor[2] = uint16_t(stream_buffer[0].z) << 8; - stream.predictor[3] = uint16_t(stream_buffer[0].w) << 8; - stream.predictor[4] = 0; - stream.predictor[5] = 0; - stream.predictor[6] = 0; - stream.predictor[7] = 0; + stream.predictor[8] = uint16_t((stream_buffer[0].x << 8) | stream_buffer[0].y); + stream.predictor[9] = uint16_t((stream_buffer[0].z << 8) | stream_buffer[0].w); + + // Delta-encode + u8vec4 current_value = stream_buffer[0]; + for (unsigned i = 0; i < num_elements; i++) + { + u8vec4 next_value = stream_buffer[i]; + stream_buffer[i] = next_value - current_value; + current_value = next_value; + } // Find optimal predictor. find_linear_predictor(stream.predictor, stream_buffer, num_elements); @@ -175,17 +179,11 @@ static void encode_stream(std::vector &out_payload_buffer, auto base_predictor = u16vec4(stream.predictor[0], stream.predictor[1], stream.predictor[2], stream.predictor[3]); auto linear_predictor = u16vec4(stream.predictor[4], stream.predictor[5], stream.predictor[6], stream.predictor[7]); - // Delta-encode - u8vec4 current_value{0}; for (unsigned i = 0; i < num_elements; i++) { - // Only predict-in bounds elements, since we want all out of bounds elements to be encoded to 0 delta + // Only predict in-bounds elements, since we want all out of bounds elements to be encoded to 0 delta // without having them affect the predictor. stream_buffer[i] -= u8vec4((base_predictor + linear_predictor * uint16_t(i)) >> uint16_t(8)); - - u8vec4 next_value = stream_buffer[i]; - stream_buffer[i] = next_value - current_value; - current_value = next_value; } for (unsigned i = num_elements; i < MaxElements; i++) From ce825f21dd19666944a60244ff2ea19a382287df Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sat, 24 Jun 2023 14:45:06 +0200 Subject: [PATCH 09/71] Mesh decode starting to come together. --- tests/meshopt_sandbox.cpp | 190 +++++++++++++++++++++++++++++++++++--- 1 file changed, 175 insertions(+), 15 deletions(-) diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp index 187ca195..74f57166 100644 --- a/tests/meshopt_sandbox.cpp +++ b/tests/meshopt_sandbox.cpp @@ -98,19 +98,38 @@ static PrimitiveAnalysisResult analyze_primitive_count(std::unordered_map= 0x80u) - v ^= 0xffu; result[i] = v == 0 ? 0 : (32 - leading_zeroes(v)); } return result; } +static uvec4 compute_required_bits_signed(u8vec4 delta) +{ + uvec4 result; + for (unsigned i = 0; i < 4; i++) + { + uint32_t v = delta[i]; + + if (v == 0) + { + result[i] = 0; + } + else + { + if (v >= 0x80u) + v ^= 0xffu; + result[i] = v == 0 ? 1 : (33 - leading_zeroes(v)); + } + } + return result; +} + static uint32_t extract_bit_plane(const uint8_t *bytes, unsigned bit_index) { uint32_t u32 = 0; @@ -159,12 +178,14 @@ static void encode_stream(std::vector &out_payload_buffer, { stream.offset_from_base_u32 = uint32_t(out_payload_buffer.size()); - // Simple linear predictor, base equal elements[0], gradient = 0. - stream.predictor[8] = uint16_t((stream_buffer[0].x << 8) | stream_buffer[0].y); - stream.predictor[9] = uint16_t((stream_buffer[0].z << 8) | stream_buffer[0].w); - // Delta-encode - u8vec4 current_value = stream_buffer[0]; + u8vec4 current_value; + if (num_elements > 1) + current_value = u8vec4(2) * stream_buffer[0] - stream_buffer[1]; + else + current_value = stream_buffer[0]; + u8vec4 bias_value = current_value; + for (unsigned i = 0; i < num_elements; i++) { u8vec4 next_value = stream_buffer[i]; @@ -172,7 +193,7 @@ static void encode_stream(std::vector &out_payload_buffer, current_value = next_value; } - // Find optimal predictor. + // Find optimal linear predictor. find_linear_predictor(stream.predictor, stream_buffer, num_elements); // u8.8 fixed point. @@ -189,12 +210,42 @@ static void encode_stream(std::vector &out_payload_buffer, for (unsigned i = num_elements; i < MaxElements; i++) stream_buffer[i] = u8vec4(0); + // Try to adjust the range such that it can fit in fewer bits. + // We can use the constant term in the linear predictor to nudge values in place. + i8vec4 lo(127); + i8vec4 hi(-128); + + for (unsigned i = 0; i < num_elements; i++) + { + lo = min(lo, i8vec4(stream_buffer[i])); + hi = max(hi, i8vec4(stream_buffer[i])); + } + + uvec4 full_bits = compute_required_bits_unsigned(u8vec4(hi - lo)); + u8vec4 target_lo_value = u8vec4(-((uvec4(1) << full_bits) >> 1u)); + u8vec4 bias = target_lo_value - u8vec4(lo); + + for (unsigned i = 0; i < num_elements; i++) + stream_buffer[i] += bias; + + for (unsigned i = 0; i < 4; i++) + stream.predictor[i] -= uint16_t(bias[i]) << 8; + + // Based on the linear predictor, it's possible that the encoded value in stream_buffer[0] becomes non-zero again. + // This is undesirable, since we can use the initial value to force a delta of 0 here, saving precious bits. + bias_value += stream_buffer[0]; + stream_buffer[0] = u8vec4(0); + + // Simple linear predictor, base equal elements[0], gradient = 0. + stream.predictor[8] = uint16_t((bias_value.y << 8) | bias_value.x); + stream.predictor[9] = uint16_t((bias_value.w << 8) | bias_value.z); + // Encode 32 elements at once. for (unsigned chunk_index = 0; chunk_index < MaxElements / 32; chunk_index++) { uvec4 required_bits = {}; for (unsigned i = 0; i < 32; i++) - required_bits = max(required_bits, compute_required_bits(stream_buffer[chunk_index * 32 + i])); + required_bits = max(required_bits, compute_required_bits_signed(stream_buffer[chunk_index * 32 + i])); // Encode bit counts. stream.bitplane_meta[chunk_index] = uint16_t((required_bits.x << 0) | (required_bits.y << 4) | @@ -278,22 +329,131 @@ static void encode_mesh(std::vector &out_payload_buffer, MeshMetadata mesh.data_stream_size_u32 = uint32_t(out_payload_buffer.size()); } +static void decode_mesh(std::vector &out_index_buffer, std::vector &out_u32_stream, + const std::vector &payload, const MeshMetadata &mesh) +{ + assert(mesh.stream_count > 1); + assert(mesh.stream_meta[0].type == StreamType::Primitive); + assert(mesh.stream_meta[0].stream_index_component == 0); + + const unsigned u32_stride = mesh.stream_count - 1; + unsigned index_count = 0; + unsigned attr_count = 0; + + for (auto &meshlet : mesh.meshlets) + { + index_count += (meshlet.num_primitives_minus_1 + 1) * 3; + attr_count += meshlet.num_attributes_minus_1 + 1; + } + + out_index_buffer.clear(); + out_u32_stream.clear(); + out_index_buffer.reserve(index_count); + out_u32_stream.resize(attr_count * (mesh.stream_count - 1)); + + for (auto &meshlet : mesh.meshlets) + { + for (unsigned stream_index = 0; stream_index < mesh.stream_count; stream_index++) + { + auto &stream = meshlet.u32_streams[stream_index]; + const uint32_t *pdata = payload.data() + mesh.data_stream_offset_u32 + stream.offset_from_base_u32; + + u8vec4 deltas[MaxElements] = {}; + const u16vec4 base_predictor = u16vec4( + stream.predictor[0], stream.predictor[1], + stream.predictor[2], stream.predictor[3]); + const u16vec4 linear_predictor = u16vec4( + stream.predictor[4], stream.predictor[5], + stream.predictor[6], stream.predictor[7]); + const u8vec4 initial_value = + u8vec4(u16vec2(stream.predictor[8], stream.predictor[9]).xxyy() >> u16vec4(0, 8, 0, 8)); + + for (unsigned chunk = 0; chunk < (MaxElements / 32); chunk++) + { + auto bits_per_u8 = (uvec4(stream.bitplane_meta[chunk]) >> uvec4(0, 4, 8, 12)) & 0xfu; + uvec4 bitplanes[8] = {}; + + for (unsigned comp = 0; comp < 4; comp++) + { + for (unsigned bit = 0; bit < bits_per_u8[comp]; bit++) + bitplanes[bit][comp] = *pdata++; + // Sign-extend. + + unsigned bit_count = bits_per_u8[comp]; + if (bit_count) + for (unsigned bit = bit_count; bit < 8; bit++) + bitplanes[bit][comp] = bitplanes[bit_count - 1][comp]; + } + + for (unsigned i = 0; i < 32; i++) + { + for (uint32_t bit = 0; bit < 8; bit++) + deltas[i] |= u8vec4(((bitplanes[bit] >> i) & 1u) << bit); + } + } + + // Apply predictors. + deltas[0] += initial_value; + for (unsigned i = 0; i < MaxElements; i++) + deltas[i] += u8vec4((base_predictor + linear_predictor * u16vec4(i)) >> u16vec4(8)); + + // Resolve deltas. + for (unsigned i = 1; i < MaxElements; i++) + deltas[i] += deltas[i - 1]; + + if (stream_index == 0) + { + // Index decode. + unsigned num_primitives = meshlet.num_primitives_minus_1 + 1; + for (unsigned i = 0; i < num_primitives; i++) + for (unsigned j = 0; j < 3; j++) + out_index_buffer.push_back(deltas[i][j]); + } + else + { + // Attributes. + unsigned num_attributes = meshlet.num_attributes_minus_1 + 1; + auto *out_attr = out_u32_stream.data() + meshlet.base_vertex_offset * u32_stride + (stream_index - 1); + for (unsigned i = 0; i < num_attributes; i++, out_attr += u32_stride) + memcpy(out_attr, deltas[i].data, sizeof(*out_attr)); + } + } + } +} + int main() { std::vector out_payload_buffer; const uint32_t index_buffer[] = { - 0, 2, 4, - 6, 4, 2, + 0, 0, 0, + 1, 1, 1, + 2, 2, 2, + 3, 3, 3, + 4, 4, 4, + 5, 5, 5, + 6, 6, 6, + 7, 7, 7, + 8, 8, 8, + 9, 9, 9, + 10, 10, 10, + 11, 11, 11, }; const uint32_t u32_stream[] = { - 0, 1, 2, 3, 4, 5, 6, 7, + 189, 24, 26, 96, + 500, 800, 400, 300, + 891, 1242, 8654, 14324, }; MeshMetadata mesh; - encode_mesh(out_payload_buffer, mesh, index_buffer, sizeof(index_buffer) / (3 * sizeof(index_buffer[0])), - u32_stream, 1); + encode_mesh(out_payload_buffer, mesh, index_buffer, + sizeof(index_buffer) / (3 * sizeof(index_buffer[0])), + u32_stream, 1); + + std::vector out_index_buffer; + std::vector out_u32_stream; + decode_mesh(out_index_buffer, out_u32_stream, out_payload_buffer, mesh); return 0; } \ No newline at end of file From e7dab4afd3ebe672ff488126bb071e6cfa319211 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sat, 24 Jun 2023 15:29:56 +0200 Subject: [PATCH 10/71] Fix more bugs. --- tests/meshopt_sandbox.cpp | 43 ++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp index 74f57166..3a8e7b30 100644 --- a/tests/meshopt_sandbox.cpp +++ b/tests/meshopt_sandbox.cpp @@ -292,18 +292,22 @@ static void encode_mesh(std::vector &out_payload_buffer, MeshMetadata // Encode index buffer. for (uint32_t i = 0; i < analysis_result.num_primitives; i++) { - uint8_t i0 = vbo_remap[index_buffer[3 * i + 0]]; - uint8_t i1 = vbo_remap[index_buffer[3 * i + 1]]; - uint8_t i2 = vbo_remap[index_buffer[3 * i + 2]]; + uint8_t i0 = vbo_remap[index_buffer[3 * (primitive_index + i) + 0]]; + uint8_t i1 = vbo_remap[index_buffer[3 * (primitive_index + i) + 1]]; + uint8_t i2 = vbo_remap[index_buffer[3 * (primitive_index + i) + 2]]; stream_buffer[i] = u8vec4(i0, i1, i2, 0); } encode_stream(out_payload_buffer, meshlet.u32_streams[0], stream_buffer, analysis_result.num_primitives); - uint64_t vbo_remapping[MaxVertices]; + // Handle spill region just in case. + uint64_t vbo_remapping[MaxVertices + 3]; unsigned vbo_index = 0; for (auto &v : vbo_remap) + { + assert(vbo_index < MaxVertices + 3); vbo_remapping[vbo_index++] = (uint64_t(v.second) << 32) | v.first; + } std::sort(vbo_remapping, vbo_remapping + analysis_result.num_vertices); for (uint32_t stream_index = 0; stream_index < num_u32_streams; stream_index++) @@ -388,7 +392,7 @@ static void decode_mesh(std::vector &out_index_buffer, std::vector> i) & 1u) << bit); + deltas[chunk * 32 + i] |= u8vec4(((bitplanes[bit] >> i) & 1u) << bit); } } @@ -407,7 +411,7 @@ static void decode_mesh(std::vector &out_index_buffer, std::vector out_payload_buffer; - const uint32_t index_buffer[] = { - 0, 0, 0, - 1, 1, 1, - 2, 2, 2, - 3, 3, 3, - 4, 4, 4, - 5, 5, 5, - 6, 6, 6, - 7, 7, 7, - 8, 8, 8, - 9, 9, 9, - 10, 10, 10, - 11, 11, 11, - }; - - const uint32_t u32_stream[] = { - 189, 24, 26, 96, - 500, 800, 400, 300, - 891, 1242, 8654, 14324, - }; + uint32_t index_buffer[32 * 3]; + uint32_t u32_stream[32 * 3]; + for (unsigned i = 0; i < 32 * 3; i++) + { + index_buffer[i] = i; + u32_stream[i] = 3 * i; + } MeshMetadata mesh; encode_mesh(out_payload_buffer, mesh, index_buffer, From 9f167f954078ecfbec6fb69d6bb31acc24660150 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sat, 24 Jun 2023 17:32:37 +0200 Subject: [PATCH 11/71] More testing. --- tests/meshopt_sandbox.cpp | 154 +++++++++++++++++++++++++++++++++----- 1 file changed, 137 insertions(+), 17 deletions(-) diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp index 3a8e7b30..90483f71 100644 --- a/tests/meshopt_sandbox.cpp +++ b/tests/meshopt_sandbox.cpp @@ -5,6 +5,9 @@ #include "muglm/muglm_impl.hpp" #include #include "bitops.hpp" +#include "gltf.hpp" +#include "meshoptimizer.h" +#include "global_managers_init.hpp" #include #include using namespace Granite; @@ -295,6 +298,7 @@ static void encode_mesh(std::vector &out_payload_buffer, MeshMetadata uint8_t i0 = vbo_remap[index_buffer[3 * (primitive_index + i) + 0]]; uint8_t i1 = vbo_remap[index_buffer[3 * (primitive_index + i) + 1]]; uint8_t i2 = vbo_remap[index_buffer[3 * (primitive_index + i) + 2]]; + //LOGI("Prim %u = { %u, %u, %u }\n", i, i0, i1, i2); stream_buffer[i] = u8vec4(i0, i1, i2, 0); } @@ -308,7 +312,7 @@ static void encode_mesh(std::vector &out_payload_buffer, MeshMetadata assert(vbo_index < MaxVertices + 3); vbo_remapping[vbo_index++] = (uint64_t(v.second) << 32) | v.first; } - std::sort(vbo_remapping, vbo_remapping + analysis_result.num_vertices); + std::sort(vbo_remapping, vbo_remapping + vbo_index); for (uint32_t stream_index = 0; stream_index < num_u32_streams; stream_index++) { @@ -316,8 +320,7 @@ static void encode_mesh(std::vector &out_payload_buffer, MeshMetadata { auto vertex_index = uint32_t(vbo_remapping[i]); uint32_t payload = attributes[stream_index + num_u32_streams * vertex_index]; - stream_buffer[i] = u8vec4(uint8_t(payload >> 0), uint8_t(payload >> 8), - uint8_t(payload >> 16), uint8_t(payload >> 24)); + memcpy(stream_buffer[i].data, &payload, sizeof(payload)); } encode_stream(out_payload_buffer, meshlet.u32_streams[stream_index + 1], stream_buffer, @@ -381,6 +384,7 @@ static void decode_mesh(std::vector &out_index_buffer, std::vector &out_index_buffer, std::vector &decoded_index_buffer, + const std::vector &decoded_u32_stream, + const std::vector &reference_index_buffer, + const std::vector &reference_u32_stream, unsigned u32_stride) { - std::vector out_payload_buffer; + std::vector decoded_output; + std::vector reference_output; + + if (decoded_index_buffer.size() != reference_index_buffer.size()) + return false; + + size_t count = decoded_index_buffer.size(); + + decoded_output.reserve(count * u32_stride); + reference_output.reserve(count * u32_stride); + for (size_t i = 0; i < count; i++) + { + uint32_t decoded_index = decoded_index_buffer[i]; + decoded_output.insert(decoded_output.end(), + decoded_u32_stream.data() + decoded_index * u32_stride, + decoded_u32_stream.data() + (decoded_index + 1) * u32_stride); + + uint32_t reference_index = reference_index_buffer[i]; + reference_output.insert(reference_output.end(), + reference_u32_stream.data() + reference_index * u32_stride, + reference_u32_stream.data() + (reference_index + 1) * u32_stride); + } - uint32_t index_buffer[32 * 3]; - uint32_t u32_stream[32 * 3]; - for (unsigned i = 0; i < 32 * 3; i++) + for (size_t i = 0; i < count; i++) { - index_buffer[i] = i; - u32_stream[i] = 3 * i; + for (unsigned j = 0; j < u32_stride; j++) + { + uint32_t decoded_value = decoded_output[i * u32_stride + j]; + uint32_t reference_value = reference_output[i * u32_stride + j]; + if (decoded_value != reference_value) + { + LOGI("Error in index %zu (prim %zu), word %u, expected %x, got %x.\n", + i, i / 3, j, reference_value, decoded_value); + return false; + } + } } - MeshMetadata mesh; - encode_mesh(out_payload_buffer, mesh, index_buffer, - sizeof(index_buffer) / (3 * sizeof(index_buffer[0])), - u32_stream, 1); + return true; +} - std::vector out_index_buffer; - std::vector out_u32_stream; - decode_mesh(out_index_buffer, out_u32_stream, out_payload_buffer, mesh); +int main(int argc, char *argv[]) +{ + if (argc != 2) + return EXIT_FAILURE; + + Global::init(Global::MANAGER_FEATURE_FILESYSTEM_BIT); + + GLTF::Parser parser(argv[1]); + + for (auto &mesh_ : parser.get_meshes()) + { + auto mesh = mesh_; + unsigned u32_stride = (mesh.position_stride + mesh.attribute_stride) / sizeof(uint32_t); + + if (mesh.indices.empty() || mesh.primitive_restart || mesh.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST) + { + LOGI("Unexpected mesh.\n"); + continue; + } + + std::vector index_buffer; + std::vector attr_buffer; + size_t vertex_count = mesh.positions.size() / mesh.position_stride; + attr_buffer.resize(u32_stride * vertex_count); + index_buffer.resize(mesh.count); + + if (mesh.index_type == VK_INDEX_TYPE_UINT32) + { + memcpy(index_buffer.data(), mesh.indices.data(), mesh.count * sizeof(uint32_t)); + } + else if (mesh.index_type == VK_INDEX_TYPE_UINT16) + { + auto *indices = reinterpret_cast(mesh.indices.data()); + for (unsigned i = 0; i < mesh.count; i++) + index_buffer[i] = indices[i]; + } + else if (mesh.index_type == VK_INDEX_TYPE_UINT8_EXT) + { + auto *indices = reinterpret_cast(mesh.indices.data()); + for (unsigned i = 0; i < mesh.count; i++) + index_buffer[i] = indices[i]; + } + else + continue; + + LOGI("=== Testing mesh ===\n"); + + for (size_t i = 0; i < vertex_count; i++) + { + memcpy(attr_buffer.data() + u32_stride * i, mesh.positions.data() + i * mesh.position_stride, mesh.position_stride); + memcpy(attr_buffer.data() + u32_stride * i + mesh.position_stride / sizeof(uint32_t), + mesh.attributes.data() + i * mesh.attribute_stride, mesh.attribute_stride); + } + + LOGI("Mesh payload size = %zu bytes.\n", (index_buffer.size() + attr_buffer.size()) * sizeof(uint32_t)); + + std::vector optimized_index_buffer(index_buffer.size()); + meshopt_optimizeVertexCache(optimized_index_buffer.data(), index_buffer.data(), mesh.count, vertex_count); + + std::vector out_payload_buffer; + MeshMetadata encoded_mesh; + encode_mesh(out_payload_buffer, encoded_mesh, + optimized_index_buffer.data(), optimized_index_buffer.size() / 3, + attr_buffer.data(), u32_stride); + + unsigned prim_offset = 0; + unsigned meshlet_index = 0; + for (auto &meshlet : encoded_mesh.meshlets) + { + LOGI("Meshlet #%u (%u prims, %u attrs), offset %u.\n", + meshlet_index, meshlet.num_primitives_minus_1 + 1, meshlet.num_attributes_minus_1 + 1, prim_offset); + prim_offset += meshlet.num_primitives_minus_1 + 1; + meshlet_index++; + } + + LOGI("Encoded payload size = %zu bytes.\n", out_payload_buffer.size() * sizeof(uint32_t)); + LOGI("u32 stride = %u\n", u32_stride); + + std::vector decoded_index_buffer; + std::vector decoded_u32_stream; + decode_mesh(decoded_index_buffer, decoded_u32_stream, out_payload_buffer, encoded_mesh); + + if (!validate_mesh_decode(decoded_index_buffer, decoded_u32_stream, optimized_index_buffer, attr_buffer, u32_stride)) + { + LOGE("Failed to validate mesh.\n"); + return EXIT_FAILURE; + } + + LOGI("=====================\n"); + } return 0; } \ No newline at end of file From afc28833d8a4e59b812caf3b6dc7ad13df2ba509 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sun, 25 Jun 2023 20:55:43 +0200 Subject: [PATCH 12/71] Enable VK_KHR_shader_subgroup_extended_types. --- vulkan/context.cpp | 8 ++++++++ vulkan/context.hpp | 1 + 2 files changed, 9 insertions(+) diff --git a/vulkan/context.cpp b/vulkan/context.cpp index c45f7c5a..4993adc1 100644 --- a/vulkan/context.cpp +++ b/vulkan/context.cpp @@ -1294,6 +1294,7 @@ bool Context::create_device(VkPhysicalDevice gpu_, VkSurfaceKHR surface, ext.pipeline_creation_cache_control_features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT }; ext.pageable_device_local_memory_features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PAGEABLE_DEVICE_LOCAL_MEMORY_FEATURES_EXT }; ext.mesh_shader_features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_EXT }; + ext.shader_subgroup_extended_types_features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES }; ext.compute_shader_derivative_features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV }; ext.device_generated_commands_features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_FEATURES_NV }; @@ -1476,6 +1477,13 @@ bool Context::create_device(VkPhysicalDevice gpu_, VkSurfaceKHR surface, ext.supports_spirv_1_4 = true; } + if (has_extension(VK_KHR_SHADER_SUBGROUP_EXTENDED_TYPES_EXTENSION_NAME)) + { + enabled_extensions.push_back(VK_KHR_SHADER_SUBGROUP_EXTENDED_TYPES_EXTENSION_NAME); + *ppNext = &ext.shader_subgroup_extended_types_features; + ppNext = &ext.shader_subgroup_extended_types_features.pNext; + } + if ((flags & CONTEXT_CREATION_ENABLE_ADVANCED_WSI_BIT) != 0 && requires_swapchain) { bool broken_present_wait = ext.driver_properties.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY && diff --git a/vulkan/context.hpp b/vulkan/context.hpp index 2b3d2cba..680571fe 100644 --- a/vulkan/context.hpp +++ b/vulkan/context.hpp @@ -103,6 +103,7 @@ struct DeviceFeatures VkPhysicalDeviceFloatControlsPropertiesKHR float_control_properties = {}; VkPhysicalDeviceBufferDeviceAddressFeaturesKHR buffer_device_address_features = {}; VkPhysicalDeviceIDProperties id_properties = {}; + VkPhysicalDeviceShaderSubgroupExtendedTypesFeaturesKHR shader_subgroup_extended_types_features = {}; // EXT VkPhysicalDeviceExternalMemoryHostPropertiesEXT host_memory_properties = {}; From 1f271ffffc0d51c450b36c3fe80e3772f60b9923 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sun, 25 Jun 2023 20:55:58 +0200 Subject: [PATCH 13/71] Add initial meshlet decode compute shader. --- assets/shaders/decode/meshlet_decode.comp | 170 ++++++++++++++++++++++ 1 file changed, 170 insertions(+) create mode 100644 assets/shaders/decode/meshlet_decode.comp diff --git a/assets/shaders/decode/meshlet_decode.comp b/assets/shaders/decode/meshlet_decode.comp new file mode 100644 index 00000000..4fbda5dc --- /dev/null +++ b/assets/shaders/decode/meshlet_decode.comp @@ -0,0 +1,170 @@ +#version 450 +#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require +#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require +#extension GL_KHR_shader_subgroup_arithmetic : require +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_EXT_shader_subgroup_extended_types_int8 : require +#extension GL_EXT_scalar_block_layout : require + +#define MAX_ELEMENTS 256 +#define NUM_CHUNKS 8 + +layout(local_size_x = 32, local_size_y = NUM_CHUNKS) in; + +layout(constant_id = 0) const uint NUM_U32_STREAMS = 1; + +struct MeshletStream +{ + uint offset_from_base; + u16vec4 predictor_a; + u16vec4 predictor_b; + u8vec4 initial_value; + uint16_t bitplane_meta[8]; +}; + +struct MeshletMeta +{ + uint base_vertex_offset; + uint8_t num_primitives_minus_1; + uint8_t num_attributes_minus_1; + uint16_t reserved; +}; + +layout(set = 0, binding = 0, std430) readonly buffer MeshletMetas +{ + MeshletMeta data[]; +} meshlet_metas; + +layout(set = 0, binding = 1, std430) readonly buffer MeshletStreams +{ + MeshletStream data[]; +} meshlet_streams; + +layout(set = 0, binding = 2, std430) writeonly buffer OutputAttributes +{ + uint data[]; +} output_payload; + +layout(set = 0, binding = 3, scalar) writeonly buffer OutputIndices +{ + uvec3 data[]; +} output_indices; + +layout(set = 0, binding = 4, std430) readonly buffer Payload +{ + uint data[]; +} payload; + +layout(set = 0, binding = 5, std430) readonly buffer OutputOffsets +{ + uvec2 data[]; +} output_offset_strides; + +shared ivec4 shared_chunk_bit_counts[NUM_U32_STREAMS][NUM_CHUNKS]; +shared uint shared_chunk_offset[NUM_U32_STREAMS][NUM_CHUNKS]; +shared u8vec4 chunk_values[NUM_CHUNKS]; + +// Hardcodes wave32 atm. Need fallback. + +void main() +{ + uint meshlet_index = gl_WorkGroupID.x; + int subgroup_lane = int(gl_SubgroupInvocationID); + + for (uint stream_index = gl_SubgroupID; stream_index < NUM_U32_STREAMS; stream_index += gl_NumSubgroups) + { + // Start by decoding the offset for bitplanes for all u32 streams. + if (subgroup_lane < int(gl_WorkGroupSize.y)) + { + uint bitplane_value = uint(meshlet_streams.data[stream_index + NUM_U32_STREAMS * meshlet_index].bitplane_meta[subgroup_lane]); + ivec4 bit_counts = (ivec4(bitplane_value) >> ivec4(0, 4, 8, 12)) & 0xf; + uint total_bits = bit_counts.x + bit_counts.y + bit_counts.z + bit_counts.w; + uint offset = meshlet_streams.data[stream_index + NUM_U32_STREAMS * meshlet_index].offset_from_base; + shared_chunk_offset[stream_index][subgroup_lane] = subgroupExclusiveAdd(total_bits) + offset; + shared_chunk_bit_counts[stream_index][subgroup_lane] = bit_counts; + } + } + + barrier(); + + MeshletMeta meta = meshlet_metas.data[meshlet_index]; + + uint unrolled_stream_index = NUM_U32_STREAMS * meshlet_index; + + for (uint i = 0; i < NUM_U32_STREAMS; i++, unrolled_stream_index++) + { + uint offset_from_base = meshlet_streams.data[unrolled_stream_index].offset_from_base; + u16vec4 predictor_a = meshlet_streams.data[unrolled_stream_index].predictor_a; + u16vec4 predictor_b = meshlet_streams.data[unrolled_stream_index].predictor_b; + u8vec4 initial_value = meshlet_streams.data[unrolled_stream_index].initial_value; + + uint chunk_id = gl_LocalInvocationID.y; + uint bitplane_offsets = shared_chunk_offset[i][chunk_id]; + ivec4 bit_counts = shared_chunk_bit_counts[i][chunk_id]; + + uvec4 decoded = ivec4(0); + + for (int i = 0; i < bit_counts.x; i++) + { + uint value = payload.data[bitplane_offsets++]; + decoded.x |= bitfieldExtract(value, subgroup_lane, 1) << i; + } + decoded.x = bitfieldExtract(int(decoded.x), 0, bit_counts.x); + + for (int i = 0; i < bit_counts.y; i++) + { + uint value = payload.data[bitplane_offsets++]; + decoded.y |= bitfieldExtract(value, subgroup_lane, 1) << i; + } + decoded.y = bitfieldExtract(int(decoded.y), 0, bit_counts.y); + + for (int i = 0; i < bit_counts.z; i++) + { + uint value = payload.data[bitplane_offsets++]; + decoded.z |= bitfieldExtract(value, subgroup_lane, 1) << i; + } + decoded.z = bitfieldExtract(int(decoded.z), 0, bit_counts.z); + + for (int i = 0; i < bit_counts.w; i++) + { + uint value = payload.data[bitplane_offsets++]; + decoded.w |= bitfieldExtract(value, subgroup_lane, 1) << i; + } + decoded.w = bitfieldExtract(int(decoded.w), 0, bit_counts.w); + + // Resolve deltas in packed 4x8 math. + u8vec4 packed_decoded = u8vec4(decoded); + if (gl_LocalInvocationID.x == 0u) + packed_decoded += initial_value; + packed_decoded += u8vec4((predictor_a + predictor_b * uint16_t(subgroup_lane)) >> 8us); + packed_decoded = subgroupInclusiveAdd(packed_decoded); + + if (i > 0) + barrier(); // Resolve WAR hazard from last iteration. + if (subgroup_lane == int(gl_WorkGroupSize.x) - 1) + chunk_values[chunk_id] = packed_decoded; + barrier(); + if (gl_SubgroupID == 0u && subgroup_lane < int(gl_WorkGroupSize.y)) + chunk_values[subgroup_lane] = subgroupExclusiveAdd(chunk_values[subgroup_lane]); + barrier(); + packed_decoded += chunk_values[chunk_id]; + + uint linear_index = chunk_id * gl_WorkGroupSize.x + subgroup_lane; + if (i == 0) + { + // Write index buffer. + uvec3 indices = uvec3(packed_decoded.xyz); + indices += meta.base_vertex_offset; + uint output_offset = output_offset_strides.data[unrolled_stream_index].x; + if (linear_index <= uint(meta.num_primitives_minus_1)) + output_indices.data[output_offset + linear_index] = indices; + } + else + { + // TODO: decode filters? Should probably be deferred to vertex / mesh shader. + uvec2 output_offset_stride = output_offset_strides.data[unrolled_stream_index]; + if (linear_index <= uint(meta.num_attributes_minus_1)) + output_payload.data[output_offset_stride.x + linear_index * output_offset_stride.y] = pack32(packed_decoded); + } + } +} From 13234473c7ff4d52ea94b036664060c9384848e1 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sat, 1 Jul 2023 14:42:06 +0200 Subject: [PATCH 14/71] Hook up GPU testing. --- assets/shaders/decode/meshlet_decode.comp | 11 +- tests/meshopt_sandbox.cpp | 120 ++++++++++++++++++++-- 2 files changed, 120 insertions(+), 11 deletions(-) diff --git a/assets/shaders/decode/meshlet_decode.comp b/assets/shaders/decode/meshlet_decode.comp index 4fbda5dc..78931632 100644 --- a/assets/shaders/decode/meshlet_decode.comp +++ b/assets/shaders/decode/meshlet_decode.comp @@ -19,7 +19,7 @@ struct MeshletStream u16vec4 predictor_a; u16vec4 predictor_b; u8vec4 initial_value; - uint16_t bitplane_meta[8]; + uint16_t bitplane_meta[NUM_CHUNKS]; }; struct MeshletMeta @@ -60,7 +60,7 @@ layout(set = 0, binding = 5, std430) readonly buffer OutputOffsets uvec2 data[]; } output_offset_strides; -shared ivec4 shared_chunk_bit_counts[NUM_U32_STREAMS][NUM_CHUNKS]; +shared i16vec4 shared_chunk_bit_counts[NUM_U32_STREAMS][NUM_CHUNKS]; shared uint shared_chunk_offset[NUM_U32_STREAMS][NUM_CHUNKS]; shared u8vec4 chunk_values[NUM_CHUNKS]; @@ -77,8 +77,9 @@ void main() if (subgroup_lane < int(gl_WorkGroupSize.y)) { uint bitplane_value = uint(meshlet_streams.data[stream_index + NUM_U32_STREAMS * meshlet_index].bitplane_meta[subgroup_lane]); - ivec4 bit_counts = (ivec4(bitplane_value) >> ivec4(0, 4, 8, 12)) & 0xf; - uint total_bits = bit_counts.x + bit_counts.y + bit_counts.z + bit_counts.w; + i16vec4 bit_counts = (i16vec4(bitplane_value) >> i16vec4(0, 4, 8, 12)) & 0xfus; + i16vec2 bit_counts2 = bit_counts.xy + bit_counts.zw; + uint total_bits = bit_counts2.x + bit_counts2.y; uint offset = meshlet_streams.data[stream_index + NUM_U32_STREAMS * meshlet_index].offset_from_base; shared_chunk_offset[stream_index][subgroup_lane] = subgroupExclusiveAdd(total_bits) + offset; shared_chunk_bit_counts[stream_index][subgroup_lane] = bit_counts; @@ -100,7 +101,7 @@ void main() uint chunk_id = gl_LocalInvocationID.y; uint bitplane_offsets = shared_chunk_offset[i][chunk_id]; - ivec4 bit_counts = shared_chunk_bit_counts[i][chunk_id]; + ivec4 bit_counts = ivec4(shared_chunk_bit_counts[i][chunk_id]); uvec4 decoded = ivec4(0); diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp index 90483f71..76458b05 100644 --- a/tests/meshopt_sandbox.cpp +++ b/tests/meshopt_sandbox.cpp @@ -2,6 +2,8 @@ #include #include #include "math.hpp" +#include "device.hpp" +#include "context.hpp" #include "muglm/muglm_impl.hpp" #include #include "bitops.hpp" @@ -25,12 +27,16 @@ struct MeshletStream uint16_t bitplane_meta[MaxElements / 32]; }; -struct MeshletMetadata +struct MeshletMetadataGPU { uint32_t base_vertex_offset; uint8_t num_primitives_minus_1; uint8_t num_attributes_minus_1; uint16_t reserved; +}; + +struct MeshletMetadata : MeshletMetadataGPU +{ MeshletStream u32_streams[MaxU32Streams]; }; @@ -336,14 +342,13 @@ static void encode_mesh(std::vector &out_payload_buffer, MeshMetadata mesh.data_stream_size_u32 = uint32_t(out_payload_buffer.size()); } -static void decode_mesh(std::vector &out_index_buffer, std::vector &out_u32_stream, - const std::vector &payload, const MeshMetadata &mesh) +static void decode_mesh_setup_buffers( + std::vector &out_index_buffer, std::vector &out_u32_stream, const MeshMetadata &mesh) { assert(mesh.stream_count > 1); assert(mesh.stream_meta[0].type == StreamType::Primitive); assert(mesh.stream_meta[0].stream_index_component == 0); - const unsigned u32_stride = mesh.stream_count - 1; unsigned index_count = 0; unsigned attr_count = 0; @@ -357,6 +362,13 @@ static void decode_mesh(std::vector &out_index_buffer, std::vector &out_index_buffer, std::vector &out_u32_stream, + const std::vector &payload, const MeshMetadata &mesh) +{ + decode_mesh_setup_buffers(out_index_buffer, out_u32_stream, mesh); + const unsigned u32_stride = mesh.stream_count - 1; for (auto &meshlet : mesh.meshlets) { @@ -429,6 +441,84 @@ static void decode_mesh(std::vector &out_index_buffer, std::vector &out_index_buffer, std::vector &out_u32_stream, + const std::vector &payload, const MeshMetadata &mesh) +{ + decode_mesh_setup_buffers(out_index_buffer, out_u32_stream, mesh); + const uint32_t u32_stride = mesh.stream_count - 1; + + Vulkan::BufferCreateInfo buf_info = {}; + buf_info.domain = Vulkan::BufferDomain::LinkedDeviceHost; + buf_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + + std::vector meshlet_metas; + meshlet_metas.reserve(mesh.meshlets.size()); + for (auto &meshlet : mesh.meshlets) + meshlet_metas.push_back(meshlet); + buf_info.size = mesh.meshlets.size() * sizeof(MeshletMetadataGPU); + auto meshlet_meta_buffer = dev.create_buffer(buf_info, meshlet_metas.data()); + + std::vector meshlet_streams; + meshlet_streams.reserve(mesh.meshlets.size() * mesh.stream_count); + for (auto &meshlet : mesh.meshlets) + for (unsigned i = 0; i < mesh.stream_count; i++) + meshlet_streams.push_back(meshlet.u32_streams[i]); + buf_info.size = mesh.meshlets.size() * sizeof(MeshletMetadataGPU); + auto meshlet_stream_buffer = dev.create_buffer(buf_info, meshlet_streams.data()); + + buf_info.size = payload.size() * sizeof(uint32_t); + auto payload_buffer = dev.create_buffer(buf_info, payload.data()); + + buf_info.size = out_index_buffer.size() * sizeof(uint32_t); + buf_info.domain = Vulkan::BufferDomain::CachedHost; + auto decoded_index_buffer = dev.create_buffer(buf_info, out_index_buffer.data()); + + buf_info.size = out_u32_stream.size() * sizeof(uint32_t); + buf_info.domain = Vulkan::BufferDomain::CachedHost; + auto decoded_u32_buffer = dev.create_buffer(buf_info, out_u32_stream.data()); + + std::vector output_offset_strides; + output_offset_strides.reserve(mesh.meshlets.size() * mesh.stream_count); + + uint32_t index_count = 0; + for (auto &meshlet : mesh.meshlets) + { + output_offset_strides.emplace_back(index_count, 0); + index_count += meshlet.num_primitives_minus_1 + 1; + for (uint32_t i = 1; i < mesh.stream_count; i++) + output_offset_strides.emplace_back(meshlet.base_vertex_offset * u32_stride + (i - 1), u32_stride); + } + + buf_info.domain = Vulkan::BufferDomain::LinkedDeviceHost; + buf_info.size = output_offset_strides.size() * sizeof(uvec2); + auto output_offset_strides_buffer = dev.create_buffer(buf_info, output_offset_strides.data()); + + auto cmd = dev.request_command_buffer(); + cmd->set_program("assets://shaders/decode/meshlet_decode.comp"); + cmd->set_subgroup_size_log2(true, 5, 5); + cmd->set_storage_buffer(0, 0, *meshlet_meta_buffer); + cmd->set_storage_buffer(0, 1, *meshlet_stream_buffer); + cmd->set_storage_buffer(0, 2, *payload_buffer); + cmd->set_storage_buffer(0, 3, *decoded_index_buffer); + cmd->set_storage_buffer(0, 4, *decoded_u32_buffer); + cmd->set_storage_buffer(0, 5, *output_offset_strides_buffer); + cmd->dispatch(uint32_t(mesh.meshlets.size()), 1, 1); + cmd->barrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + VK_PIPELINE_STAGE_HOST_BIT, VK_ACCESS_HOST_READ_BIT); + dev.submit(cmd); + dev.wait_idle(); + + memcpy(out_index_buffer.data(), + dev.map_host_buffer(*decoded_index_buffer, Vulkan::MEMORY_ACCESS_READ_BIT), + out_index_buffer.size() * sizeof(uint32_t)); + + memcpy(out_u32_stream.data(), + dev.map_host_buffer(*decoded_u32_buffer, Vulkan::MEMORY_ACCESS_READ_BIT), + out_u32_stream.size() * sizeof(uint32_t)); +} + static bool validate_mesh_decode(const std::vector &decoded_index_buffer, const std::vector &decoded_u32_stream, const std::vector &reference_index_buffer, @@ -484,9 +574,20 @@ int main(int argc, char *argv[]) GLTF::Parser parser(argv[1]); - for (auto &mesh_ : parser.get_meshes()) + Vulkan::Context ctx; + Vulkan::Device dev; + if (!Vulkan::Context::init_loader(nullptr)) + return EXIT_FAILURE; + + Vulkan::Context::SystemHandles handles; + handles.filesystem = GRANITE_FILESYSTEM(); + ctx.set_system_handles(handles); + if (!ctx.init_instance_and_device(nullptr, 0, nullptr, 0)) + return EXIT_FAILURE; + dev.set_context(ctx); + + for (auto &mesh : parser.get_meshes()) { - auto mesh = mesh_; unsigned u32_stride = (mesh.position_stride + mesh.attribute_stride) / sizeof(uint32_t); if (mesh.indices.empty() || mesh.primitive_restart || mesh.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST) @@ -563,6 +664,13 @@ int main(int argc, char *argv[]) return EXIT_FAILURE; } + decode_mesh_gpu(dev, decoded_index_buffer, decoded_u32_stream, out_payload_buffer, encoded_mesh); + if (!validate_mesh_decode(decoded_index_buffer, decoded_u32_stream, optimized_index_buffer, attr_buffer, u32_stride)) + { + LOGE("Failed to validate GPU decoded mesh.\n"); + return EXIT_FAILURE; + } + LOGI("=====================\n"); } From 35758a8be6b7864428f41c5c997207c83baafd53 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sat, 1 Jul 2023 14:54:14 +0200 Subject: [PATCH 15/71] Shader runs, but incorrect. --- assets/shaders/decode/meshlet_decode.comp | 6 +++--- tests/CMakeLists.txt | 3 +++ tests/meshopt_sandbox.cpp | 10 ++++++---- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/assets/shaders/decode/meshlet_decode.comp b/assets/shaders/decode/meshlet_decode.comp index 78931632..f6ab7807 100644 --- a/assets/shaders/decode/meshlet_decode.comp +++ b/assets/shaders/decode/meshlet_decode.comp @@ -60,7 +60,7 @@ layout(set = 0, binding = 5, std430) readonly buffer OutputOffsets uvec2 data[]; } output_offset_strides; -shared i16vec4 shared_chunk_bit_counts[NUM_U32_STREAMS][NUM_CHUNKS]; +shared u16vec4 shared_chunk_bit_counts[NUM_U32_STREAMS][NUM_CHUNKS]; shared uint shared_chunk_offset[NUM_U32_STREAMS][NUM_CHUNKS]; shared u8vec4 chunk_values[NUM_CHUNKS]; @@ -77,8 +77,8 @@ void main() if (subgroup_lane < int(gl_WorkGroupSize.y)) { uint bitplane_value = uint(meshlet_streams.data[stream_index + NUM_U32_STREAMS * meshlet_index].bitplane_meta[subgroup_lane]); - i16vec4 bit_counts = (i16vec4(bitplane_value) >> i16vec4(0, 4, 8, 12)) & 0xfus; - i16vec2 bit_counts2 = bit_counts.xy + bit_counts.zw; + u16vec4 bit_counts = (u16vec4(bitplane_value) >> u16vec4(0, 4, 8, 12)) & 0xfus; + u16vec2 bit_counts2 = bit_counts.xy + bit_counts.zw; uint total_bits = bit_counts2.x + bit_counts2.y; uint offset = meshlet_streams.data[stream_index + NUM_U32_STREAMS * meshlet_index].offset_from_base; shared_chunk_offset[stream_index][subgroup_lane] = subgroupExclusiveAdd(total_bits) + offset; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 13f15910..468e4e5a 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -163,6 +163,9 @@ add_granite_offline_tool(performance-query performance_query.cpp) add_granite_offline_tool(asset-manager-test asset_manager_test.cpp) add_granite_offline_tool(meshopt-sandbox meshopt_sandbox.cpp) +if (NOT ANDROID) + target_compile_definitions(meshopt-sandbox PRIVATE ASSET_DIRECTORY=\"${CMAKE_CURRENT_SOURCE_DIR}/assets\") +endif() target_link_libraries(meshopt-sandbox PRIVATE meshoptimizer) add_granite_application(dgc-test dgc_test.cpp) diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp index 76458b05..a0c286b6 100644 --- a/tests/meshopt_sandbox.cpp +++ b/tests/meshopt_sandbox.cpp @@ -360,7 +360,7 @@ static void decode_mesh_setup_buffers( out_index_buffer.clear(); out_u32_stream.clear(); - out_index_buffer.reserve(index_count); + out_index_buffer.resize(index_count); out_u32_stream.resize(attr_count * (mesh.stream_count - 1)); } @@ -368,6 +368,7 @@ static void decode_mesh(std::vector &out_index_buffer, std::vector &payload, const MeshMetadata &mesh) { decode_mesh_setup_buffers(out_index_buffer, out_u32_stream, mesh); + out_index_buffer.clear(); const unsigned u32_stride = mesh.stream_count - 1; for (auto &meshlet : mesh.meshlets) @@ -473,11 +474,11 @@ static void decode_mesh_gpu( buf_info.size = out_index_buffer.size() * sizeof(uint32_t); buf_info.domain = Vulkan::BufferDomain::CachedHost; - auto decoded_index_buffer = dev.create_buffer(buf_info, out_index_buffer.data()); + auto decoded_index_buffer = dev.create_buffer(buf_info); buf_info.size = out_u32_stream.size() * sizeof(uint32_t); buf_info.domain = Vulkan::BufferDomain::CachedHost; - auto decoded_u32_buffer = dev.create_buffer(buf_info, out_u32_stream.data()); + auto decoded_u32_buffer = dev.create_buffer(buf_info); std::vector output_offset_strides; output_offset_strides.reserve(mesh.meshlets.size() * mesh.stream_count); @@ -496,7 +497,7 @@ static void decode_mesh_gpu( auto output_offset_strides_buffer = dev.create_buffer(buf_info, output_offset_strides.data()); auto cmd = dev.request_command_buffer(); - cmd->set_program("assets://shaders/decode/meshlet_decode.comp"); + cmd->set_program("builtin://shaders/decode/meshlet_decode.comp"); cmd->set_subgroup_size_log2(true, 5, 5); cmd->set_storage_buffer(0, 0, *meshlet_meta_buffer); cmd->set_storage_buffer(0, 1, *meshlet_stream_buffer); @@ -571,6 +572,7 @@ int main(int argc, char *argv[]) return EXIT_FAILURE; Global::init(Global::MANAGER_FEATURE_FILESYSTEM_BIT); + Filesystem::setup_default_filesystem(GRANITE_FILESYSTEM(), ASSET_DIRECTORY); GLTF::Parser parser(argv[1]); From 5058c056700bb0116c8ccf1405c7dab5b613e7c9 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sat, 1 Jul 2023 16:04:24 +0200 Subject: [PATCH 16/71] Decode shader seems to work. --- assets/shaders/decode/meshlet_decode.comp | 15 +++-- tests/meshopt_sandbox.cpp | 78 +++++++++++++++++++++-- 2 files changed, 79 insertions(+), 14 deletions(-) diff --git a/assets/shaders/decode/meshlet_decode.comp b/assets/shaders/decode/meshlet_decode.comp index f6ab7807..b8391fe5 100644 --- a/assets/shaders/decode/meshlet_decode.comp +++ b/assets/shaders/decode/meshlet_decode.comp @@ -15,10 +15,10 @@ layout(constant_id = 0) const uint NUM_U32_STREAMS = 1; struct MeshletStream { - uint offset_from_base; u16vec4 predictor_a; u16vec4 predictor_b; u8vec4 initial_value; + uint offset_from_base; uint16_t bitplane_meta[NUM_CHUNKS]; }; @@ -135,22 +135,23 @@ void main() // Resolve deltas in packed 4x8 math. u8vec4 packed_decoded = u8vec4(decoded); - if (gl_LocalInvocationID.x == 0u) + uint linear_index = chunk_id * gl_WorkGroupSize.x + subgroup_lane; + if (linear_index == 0) packed_decoded += initial_value; - packed_decoded += u8vec4((predictor_a + predictor_b * uint16_t(subgroup_lane)) >> 8us); + packed_decoded += u8vec4((predictor_a + predictor_b * uint16_t(linear_index)) >> 8us); packed_decoded = subgroupInclusiveAdd(packed_decoded); if (i > 0) barrier(); // Resolve WAR hazard from last iteration. - if (subgroup_lane == int(gl_WorkGroupSize.x) - 1) + if (subgroup_lane == int(gl_SubgroupSize) - 1) chunk_values[chunk_id] = packed_decoded; barrier(); if (gl_SubgroupID == 0u && subgroup_lane < int(gl_WorkGroupSize.y)) - chunk_values[subgroup_lane] = subgroupExclusiveAdd(chunk_values[subgroup_lane]); + chunk_values[subgroup_lane] = subgroupInclusiveAdd(chunk_values[subgroup_lane]); barrier(); - packed_decoded += chunk_values[chunk_id]; + if (chunk_id != 0) + packed_decoded += chunk_values[chunk_id - 1]; - uint linear_index = chunk_id * gl_WorkGroupSize.x + subgroup_lane; if (i == 0) { // Write index buffer. diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp index a0c286b6..6470fce6 100644 --- a/tests/meshopt_sandbox.cpp +++ b/tests/meshopt_sandbox.cpp @@ -22,8 +22,8 @@ static constexpr unsigned MaxVertices = MaxElements; struct MeshletStream { - uint32_t offset_from_base_u32; uint16_t predictor[4 * 2 + 2]; + uint32_t offset_from_base_u32; uint16_t bitplane_meta[MaxElements / 32]; }; @@ -466,11 +466,13 @@ static void decode_mesh_gpu( for (auto &meshlet : mesh.meshlets) for (unsigned i = 0; i < mesh.stream_count; i++) meshlet_streams.push_back(meshlet.u32_streams[i]); - buf_info.size = mesh.meshlets.size() * sizeof(MeshletMetadataGPU); + buf_info.size = meshlet_streams.size() * sizeof(MeshletStream); auto meshlet_stream_buffer = dev.create_buffer(buf_info, meshlet_streams.data()); buf_info.size = payload.size() * sizeof(uint32_t); - auto payload_buffer = dev.create_buffer(buf_info, payload.data()); + if (buf_info.size == 0) + buf_info.size = 4; + auto payload_buffer = dev.create_buffer(buf_info, payload.empty() ? nullptr : payload.data()); buf_info.size = out_index_buffer.size() * sizeof(uint32_t); buf_info.domain = Vulkan::BufferDomain::CachedHost; @@ -496,20 +498,27 @@ static void decode_mesh_gpu( buf_info.size = output_offset_strides.size() * sizeof(uvec2); auto output_offset_strides_buffer = dev.create_buffer(buf_info, output_offset_strides.data()); + Vulkan::Device::init_renderdoc_capture(); + dev.begin_renderdoc_capture(); + auto cmd = dev.request_command_buffer(); cmd->set_program("builtin://shaders/decode/meshlet_decode.comp"); + cmd->enable_subgroup_size_control(true); cmd->set_subgroup_size_log2(true, 5, 5); cmd->set_storage_buffer(0, 0, *meshlet_meta_buffer); cmd->set_storage_buffer(0, 1, *meshlet_stream_buffer); - cmd->set_storage_buffer(0, 2, *payload_buffer); + cmd->set_storage_buffer(0, 2, *decoded_u32_buffer); cmd->set_storage_buffer(0, 3, *decoded_index_buffer); - cmd->set_storage_buffer(0, 4, *decoded_u32_buffer); + cmd->set_storage_buffer(0, 4, *payload_buffer); cmd->set_storage_buffer(0, 5, *output_offset_strides_buffer); + cmd->set_specialization_constant_mask(1); + cmd->set_specialization_constant(0, mesh.stream_count); cmd->dispatch(uint32_t(mesh.meshlets.size()), 1, 1); cmd->barrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, VK_PIPELINE_STAGE_HOST_BIT, VK_ACCESS_HOST_READ_BIT); dev.submit(cmd); dev.wait_idle(); + dev.end_renderdoc_capture(); memcpy(out_index_buffer.data(), dev.map_host_buffer(*decoded_index_buffer, Vulkan::MEMORY_ACCESS_READ_BIT), @@ -588,6 +597,58 @@ int main(int argc, char *argv[]) return EXIT_FAILURE; dev.set_context(ctx); +#if 0 + LOGI("=== Test ====\n"); + { + std::vector out_payload_buffer; + const std::vector index_buffer = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + }; + const std::vector attr_buffer = { + 9, 11, 4, 4, 2, 9, + 9, 7, 4, 29, 2, 9, + 9, 7, 4, 29, 2, 9, + 9, 7, 4, 29, 2, 9, + }; + MeshMetadata encoded_mesh; + const uint32_t u32_stride = 2; + + encode_mesh(out_payload_buffer, encoded_mesh, + index_buffer.data(), index_buffer.size() / 3, + attr_buffer.data(), u32_stride); + + LOGI("Encoded payload size = %zu bytes.\n", out_payload_buffer.size() * sizeof(uint32_t)); + LOGI("u32 stride = %u\n", u32_stride); + + std::vector decoded_index_buffer; + std::vector decoded_u32_stream; + std::vector gpu_decoded_index_buffer; + std::vector gpu_decoded_u32_stream; + decode_mesh(decoded_index_buffer, decoded_u32_stream, out_payload_buffer, encoded_mesh); + + if (!validate_mesh_decode(decoded_index_buffer, decoded_u32_stream, index_buffer, attr_buffer, u32_stride)) + { + LOGE("Failed to validate mesh.\n"); + return EXIT_FAILURE; + } + + decode_mesh_gpu(dev, gpu_decoded_index_buffer, gpu_decoded_u32_stream, out_payload_buffer, encoded_mesh); + if (!validate_mesh_decode(gpu_decoded_index_buffer, gpu_decoded_u32_stream, decoded_index_buffer, decoded_u32_stream, u32_stride)) + { + LOGE("Failed to validate GPU decoded mesh.\n"); + return EXIT_FAILURE; + } + } + LOGI("===============\n"); +#endif + +#if 1 for (auto &mesh : parser.get_meshes()) { unsigned u32_stride = (mesh.position_stride + mesh.attribute_stride) / sizeof(uint32_t); @@ -658,6 +719,8 @@ int main(int argc, char *argv[]) std::vector decoded_index_buffer; std::vector decoded_u32_stream; + std::vector gpu_decoded_index_buffer; + std::vector gpu_decoded_u32_stream; decode_mesh(decoded_index_buffer, decoded_u32_stream, out_payload_buffer, encoded_mesh); if (!validate_mesh_decode(decoded_index_buffer, decoded_u32_stream, optimized_index_buffer, attr_buffer, u32_stride)) @@ -666,8 +729,8 @@ int main(int argc, char *argv[]) return EXIT_FAILURE; } - decode_mesh_gpu(dev, decoded_index_buffer, decoded_u32_stream, out_payload_buffer, encoded_mesh); - if (!validate_mesh_decode(decoded_index_buffer, decoded_u32_stream, optimized_index_buffer, attr_buffer, u32_stride)) + decode_mesh_gpu(dev, gpu_decoded_index_buffer, gpu_decoded_u32_stream, out_payload_buffer, encoded_mesh); + if (!validate_mesh_decode(gpu_decoded_index_buffer, gpu_decoded_u32_stream, decoded_index_buffer, decoded_u32_stream, u32_stride)) { LOGE("Failed to validate GPU decoded mesh.\n"); return EXIT_FAILURE; @@ -675,6 +738,7 @@ int main(int argc, char *argv[]) LOGI("=====================\n"); } +#endif return 0; } \ No newline at end of file From 017fe401e504e43b0092a1a8ee04355693d3e061 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sat, 1 Jul 2023 16:26:43 +0200 Subject: [PATCH 17/71] Workaround missing SGPR promotion. --- assets/shaders/decode/meshlet_decode.comp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/shaders/decode/meshlet_decode.comp b/assets/shaders/decode/meshlet_decode.comp index b8391fe5..46508176 100644 --- a/assets/shaders/decode/meshlet_decode.comp +++ b/assets/shaders/decode/meshlet_decode.comp @@ -99,7 +99,7 @@ void main() u16vec4 predictor_b = meshlet_streams.data[unrolled_stream_index].predictor_b; u8vec4 initial_value = meshlet_streams.data[unrolled_stream_index].initial_value; - uint chunk_id = gl_LocalInvocationID.y; + uint chunk_id = gl_SubgroupID; uint bitplane_offsets = shared_chunk_offset[i][chunk_id]; ivec4 bit_counts = ivec4(shared_chunk_bit_counts[i][chunk_id]); From 053fce6b13d843c5062a48fc28a4dcb41da67956 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sat, 1 Jul 2023 17:16:23 +0200 Subject: [PATCH 18/71] Add a variant that does not rely on packed waveops. --- assets/shaders/decode/meshlet_decode.comp | 68 ++++++++++++++++++++++- 1 file changed, 66 insertions(+), 2 deletions(-) diff --git a/assets/shaders/decode/meshlet_decode.comp b/assets/shaders/decode/meshlet_decode.comp index 46508176..a53c8bb5 100644 --- a/assets/shaders/decode/meshlet_decode.comp +++ b/assets/shaders/decode/meshlet_decode.comp @@ -1,9 +1,14 @@ #version 450 + +#define PACKED_WAVEOPS 0 + #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require #extension GL_EXT_shader_explicit_arithmetic_types_int16 : require #extension GL_KHR_shader_subgroup_arithmetic : require #extension GL_KHR_shader_subgroup_basic : require +#if PACKED_WAVEOPS #extension GL_EXT_shader_subgroup_extended_types_int8 : require +#endif #extension GL_EXT_scalar_block_layout : require #define MAX_ELEMENTS 256 @@ -60,12 +65,21 @@ layout(set = 0, binding = 5, std430) readonly buffer OutputOffsets uvec2 data[]; } output_offset_strides; -shared u16vec4 shared_chunk_bit_counts[NUM_U32_STREAMS][NUM_CHUNKS]; +shared u8vec4 shared_chunk_bit_counts[NUM_U32_STREAMS][NUM_CHUNKS]; shared uint shared_chunk_offset[NUM_U32_STREAMS][NUM_CHUNKS]; +#if PACKED_WAVEOPS shared u8vec4 chunk_values[NUM_CHUNKS]; +#else +shared uvec2 chunk_values[NUM_CHUNKS]; +#endif // Hardcodes wave32 atm. Need fallback. +uvec2 pack_u16vec2_to_uvec2(u16vec4 v) +{ + return uvec2(pack32(v.xy), pack32(v.zw)); +} + void main() { uint meshlet_index = gl_WorkGroupID.x; @@ -82,7 +96,7 @@ void main() uint total_bits = bit_counts2.x + bit_counts2.y; uint offset = meshlet_streams.data[stream_index + NUM_U32_STREAMS * meshlet_index].offset_from_base; shared_chunk_offset[stream_index][subgroup_lane] = subgroupExclusiveAdd(total_bits) + offset; - shared_chunk_bit_counts[stream_index][subgroup_lane] = bit_counts; + shared_chunk_bit_counts[stream_index][subgroup_lane] = u8vec4(bit_counts); } } @@ -97,7 +111,12 @@ void main() uint offset_from_base = meshlet_streams.data[unrolled_stream_index].offset_from_base; u16vec4 predictor_a = meshlet_streams.data[unrolled_stream_index].predictor_a; u16vec4 predictor_b = meshlet_streams.data[unrolled_stream_index].predictor_b; +#if PACKED_WAVEOPS u8vec4 initial_value = meshlet_streams.data[unrolled_stream_index].initial_value; +#else + u8vec4 initial_value_ = meshlet_streams.data[unrolled_stream_index].initial_value; + uvec2 initial_value = pack_u16vec2_to_uvec2(u16vec4(initial_value_)); +#endif uint chunk_id = gl_SubgroupID; uint bitplane_offsets = shared_chunk_offset[i][chunk_id]; @@ -133,6 +152,7 @@ void main() } decoded.w = bitfieldExtract(int(decoded.w), 0, bit_counts.w); +#if PACKED_WAVEOPS // Resolve deltas in packed 4x8 math. u8vec4 packed_decoded = u8vec4(decoded); uint linear_index = chunk_id * gl_WorkGroupSize.x + subgroup_lane; @@ -168,5 +188,49 @@ void main() if (linear_index <= uint(meta.num_attributes_minus_1)) output_payload.data[output_offset_stride.x + linear_index * output_offset_stride.y] = pack32(packed_decoded); } +#else + // Resolve deltas in packed 4x8 math. + uvec2 packed_decoded = pack_u16vec2_to_uvec2(u16vec4(decoded)) & 0xff00ffu; + uint linear_index = chunk_id * gl_WorkGroupSize.x + subgroup_lane; + if (linear_index == 0) + packed_decoded += initial_value; + packed_decoded += pack_u16vec2_to_uvec2((predictor_a + predictor_b * uint16_t(linear_index)) >> 8us); + packed_decoded = subgroupInclusiveAdd(packed_decoded); + + if (i > 0) + barrier(); // Resolve WAR hazard from last iteration. + if (subgroup_lane == int(gl_SubgroupSize) - 1) + chunk_values[chunk_id] = packed_decoded & 0xff00ffu; + barrier(); + if (gl_SubgroupID == 0u && subgroup_lane < int(gl_WorkGroupSize.y)) + chunk_values[subgroup_lane] = subgroupInclusiveAdd(chunk_values[subgroup_lane]); + barrier(); + if (chunk_id != 0) + packed_decoded += chunk_values[chunk_id - 1]; + + if (i == 0) + { + // Write index buffer. + uvec3 indices = uvec3( + bitfieldExtract(packed_decoded.x, 0, 8), + bitfieldExtract(packed_decoded.x, 16, 8), + bitfieldExtract(packed_decoded.y, 0, 8)); + indices += meta.base_vertex_offset; + uint output_offset = output_offset_strides.data[unrolled_stream_index].x; + if (linear_index <= uint(meta.num_primitives_minus_1)) + output_indices.data[output_offset + linear_index] = indices; + } + else + { + // TODO: decode filters? Should probably be deferred to vertex / mesh shader. + uvec2 output_offset_stride = output_offset_strides.data[unrolled_stream_index]; + if (linear_index <= uint(meta.num_attributes_minus_1)) + output_payload.data[output_offset_stride.x + linear_index * output_offset_stride.y] = + bitfieldExtract(packed_decoded.x, 0, 8) | + (bitfieldExtract(packed_decoded.x, 16, 8) << 8) | + (bitfieldExtract(packed_decoded.y, 0, 8) << 16) | + (bitfieldExtract(packed_decoded.y, 16, 8) << 24); + } +#endif } } From 9f90d811f0a5d1508ad14f0a0d11cc74cd2676aa Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sun, 2 Jul 2023 13:00:34 +0200 Subject: [PATCH 19/71] Pipeline the u32 loads. Also helps descriptor load on AMD since we prove the descriptor is used and can be loaded once. --- assets/shaders/decode/meshlet_decode.comp | 18 +++++++------ tests/meshopt_sandbox.cpp | 32 ++++++++++++++++++----- 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/assets/shaders/decode/meshlet_decode.comp b/assets/shaders/decode/meshlet_decode.comp index a53c8bb5..b6e5bf6e 100644 --- a/assets/shaders/decode/meshlet_decode.comp +++ b/assets/shaders/decode/meshlet_decode.comp @@ -2,13 +2,11 @@ #define PACKED_WAVEOPS 0 -#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require #extension GL_EXT_shader_explicit_arithmetic_types_int16 : require +#extension GL_EXT_shader_subgroup_extended_types_int8 : require #extension GL_KHR_shader_subgroup_arithmetic : require #extension GL_KHR_shader_subgroup_basic : require -#if PACKED_WAVEOPS -#extension GL_EXT_shader_subgroup_extended_types_int8 : require -#endif +#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require #extension GL_EXT_scalar_block_layout : require #define MAX_ELEMENTS 256 @@ -124,31 +122,35 @@ void main() uvec4 decoded = ivec4(0); + // Overlap load with consumption. + // Helps RDNA2 quite a lot here! + uint value = payload.data[bitplane_offsets]; + for (int i = 0; i < bit_counts.x; i++) { - uint value = payload.data[bitplane_offsets++]; decoded.x |= bitfieldExtract(value, subgroup_lane, 1) << i; + value = payload.data[++bitplane_offsets]; } decoded.x = bitfieldExtract(int(decoded.x), 0, bit_counts.x); for (int i = 0; i < bit_counts.y; i++) { - uint value = payload.data[bitplane_offsets++]; decoded.y |= bitfieldExtract(value, subgroup_lane, 1) << i; + value = payload.data[++bitplane_offsets]; } decoded.y = bitfieldExtract(int(decoded.y), 0, bit_counts.y); for (int i = 0; i < bit_counts.z; i++) { - uint value = payload.data[bitplane_offsets++]; decoded.z |= bitfieldExtract(value, subgroup_lane, 1) << i; + value = payload.data[++bitplane_offsets]; } decoded.z = bitfieldExtract(int(decoded.z), 0, bit_counts.z); for (int i = 0; i < bit_counts.w; i++) { - uint value = payload.data[bitplane_offsets++]; decoded.w |= bitfieldExtract(value, subgroup_lane, 1) << i; + value = payload.data[++bitplane_offsets]; } decoded.w = bitfieldExtract(int(decoded.w), 0, bit_counts.w); diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp index 6470fce6..02e9c7fe 100644 --- a/tests/meshopt_sandbox.cpp +++ b/tests/meshopt_sandbox.cpp @@ -475,12 +475,19 @@ static void decode_mesh_gpu( auto payload_buffer = dev.create_buffer(buf_info, payload.empty() ? nullptr : payload.data()); buf_info.size = out_index_buffer.size() * sizeof(uint32_t); - buf_info.domain = Vulkan::BufferDomain::CachedHost; + buf_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_BUFFER_USAGE_TRANSFER_DST_BIT; + buf_info.domain = Vulkan::BufferDomain::Device; auto decoded_index_buffer = dev.create_buffer(buf_info); + buf_info.domain = Vulkan::BufferDomain::CachedHost; + auto readback_decoded_index_buffer = dev.create_buffer(buf_info); buf_info.size = out_u32_stream.size() * sizeof(uint32_t); - buf_info.domain = Vulkan::BufferDomain::CachedHost; + buf_info.domain = Vulkan::BufferDomain::Device; auto decoded_u32_buffer = dev.create_buffer(buf_info); + buf_info.domain = Vulkan::BufferDomain::CachedHost; + auto readback_decoded_u32_buffer = dev.create_buffer(buf_info); std::vector output_offset_strides; output_offset_strides.reserve(mesh.meshlets.size() * mesh.stream_count); @@ -498,8 +505,9 @@ static void decode_mesh_gpu( buf_info.size = output_offset_strides.size() * sizeof(uvec2); auto output_offset_strides_buffer = dev.create_buffer(buf_info, output_offset_strides.data()); - Vulkan::Device::init_renderdoc_capture(); - dev.begin_renderdoc_capture(); + bool has_renderdoc = Vulkan::Device::init_renderdoc_capture(); + if (has_renderdoc) + dev.begin_renderdoc_capture(); auto cmd = dev.request_command_buffer(); cmd->set_program("builtin://shaders/decode/meshlet_decode.comp"); @@ -514,18 +522,27 @@ static void decode_mesh_gpu( cmd->set_specialization_constant_mask(1); cmd->set_specialization_constant(0, mesh.stream_count); cmd->dispatch(uint32_t(mesh.meshlets.size()), 1, 1); + cmd->barrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + VK_PIPELINE_STAGE_2_COPY_BIT, VK_ACCESS_TRANSFER_READ_BIT); + + cmd->copy_buffer(*readback_decoded_index_buffer, *decoded_index_buffer); + cmd->copy_buffer(*readback_decoded_u32_buffer, *decoded_u32_buffer); + cmd->barrier(VK_PIPELINE_STAGE_2_COPY_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_HOST_BIT, VK_ACCESS_HOST_READ_BIT); dev.submit(cmd); + dev.wait_idle(); - dev.end_renderdoc_capture(); + + if (has_renderdoc) + dev.end_renderdoc_capture(); memcpy(out_index_buffer.data(), - dev.map_host_buffer(*decoded_index_buffer, Vulkan::MEMORY_ACCESS_READ_BIT), + dev.map_host_buffer(*readback_decoded_index_buffer, Vulkan::MEMORY_ACCESS_READ_BIT), out_index_buffer.size() * sizeof(uint32_t)); memcpy(out_u32_stream.data(), - dev.map_host_buffer(*decoded_u32_buffer, Vulkan::MEMORY_ACCESS_READ_BIT), + dev.map_host_buffer(*readback_decoded_u32_buffer, Vulkan::MEMORY_ACCESS_READ_BIT), out_u32_stream.size() * sizeof(uint32_t)); } @@ -596,6 +613,7 @@ int main(int argc, char *argv[]) if (!ctx.init_instance_and_device(nullptr, 0, nullptr, 0)) return EXIT_FAILURE; dev.set_context(ctx); + dev.init_frame_contexts(4); #if 0 LOGI("=== Test ====\n"); From 09776206bbbaf4748dcdd6cd28874265dc9f2906 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sun, 2 Jul 2023 15:17:58 +0200 Subject: [PATCH 20/71] Noodle around with meshopt cluster builder. --- tests/meshopt_sandbox.cpp | 118 +++++++++++++++++++++++++++++++++++++- 1 file changed, 117 insertions(+), 1 deletion(-) diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp index 02e9c7fe..f68c7610 100644 --- a/tests/meshopt_sandbox.cpp +++ b/tests/meshopt_sandbox.cpp @@ -592,6 +592,105 @@ static bool validate_mesh_decode(const std::vector &decoded_index_buff return true; } +struct Meshlet +{ + uint32_t offset; + uint32_t count; +}; + +static bool convert_meshlets(std::vector &out_meshlets, std::vector &bounds, + std::vector &out_index_buffer, const SceneFormats::Mesh &mesh) +{ + if (mesh.indices.empty() || mesh.primitive_restart || mesh.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST) + return false; + + size_t vertex_count = mesh.positions.size() / mesh.position_stride; + std::vector position_buffer(vertex_count); + std::vector index_buffer(mesh.count); + + if (mesh.index_type == VK_INDEX_TYPE_UINT32) + { + auto *indices = reinterpret_cast(mesh.indices.data()); + for (unsigned i = 0; i < mesh.count; i++) + index_buffer[i] = indices[i]; + } + else if (mesh.index_type == VK_INDEX_TYPE_UINT16) + { + auto *indices = reinterpret_cast(mesh.indices.data()); + for (unsigned i = 0; i < mesh.count; i++) + index_buffer[i] = indices[i]; + } + else if (mesh.index_type == VK_INDEX_TYPE_UINT8_EXT) + { + auto *indices = reinterpret_cast(mesh.indices.data()); + for (unsigned i = 0; i < mesh.count; i++) + index_buffer[i] = indices[i]; + } + else + return false; + + switch (mesh.attribute_layout[Util::ecast(MeshAttribute::Position)].format) + { + case VK_FORMAT_R32G32B32A32_SFLOAT: + case VK_FORMAT_R32G32B32_SFLOAT: + for (unsigned i = 0; i < vertex_count; i++) + memcpy(position_buffer[i].data, mesh.positions.data() + mesh.position_stride * i, sizeof(float) * 3); + break; + + default: + return false; + } + + constexpr unsigned max_vertices = 255; + constexpr unsigned max_primitives = 256; + std::vector optimized_index_buffer(index_buffer.size()); + meshopt_optimizeVertexCache(optimized_index_buffer.data(), index_buffer.data(), mesh.count, vertex_count); + index_buffer = std::move(optimized_index_buffer); + size_t num_meshlets = meshopt_buildMeshletsBound(mesh.count, max_vertices, max_primitives); + + std::vector out_vertex_redirection_buffer(num_meshlets * max_vertices); + std::vector local_index_buffer(num_meshlets * max_primitives * 3); + std::vector meshlets(num_meshlets); + + num_meshlets = meshopt_buildMeshlets(meshlets.data(), + out_vertex_redirection_buffer.data(), local_index_buffer.data(), + index_buffer.data(), mesh.count, + position_buffer[0].data, vertex_count, sizeof(vec3), + max_vertices, max_primitives, 1.0f); + + meshlets.resize(num_meshlets); + + out_meshlets.clear(); + out_meshlets.reserve(num_meshlets); + for (auto &meshlet : meshlets) + { + Meshlet m = {}; + m.offset = uint32_t(out_index_buffer.size()); + m.count = meshlet.triangle_count; + out_meshlets.push_back(m); + + auto *local_indices = index_buffer.data() + meshlet.triangle_offset; + for (unsigned i = 0; i < meshlet.triangle_count; i++) + { + out_index_buffer.emplace_back( + out_vertex_redirection_buffer[local_indices[3 * i + 0] + meshlet.vertex_offset], + out_vertex_redirection_buffer[local_indices[3 * i + 1] + meshlet.vertex_offset], + out_vertex_redirection_buffer[local_indices[3 * i + 2] + meshlet.vertex_offset]); + } + } + + bounds.clear(); + bounds.reserve(num_meshlets); + for (auto &meshlet : out_meshlets) + { + auto bound = meshopt_computeClusterBounds(out_index_buffer[0].data, meshlet.count * 3, + position_buffer[0].data, vertex_count, sizeof(vec3)); + bounds.push_back(bound); + } + + return true; +} + int main(int argc, char *argv[]) { if (argc != 2) @@ -615,6 +714,23 @@ int main(int argc, char *argv[]) dev.set_context(ctx); dev.init_frame_contexts(4); +#if 1 + { + std::vector index_buffer; + std::vector meshlets; + std::vector bounds; + + for (auto &mesh : parser.get_meshes()) + { + if (mesh.count < 60000) + continue; + if (!convert_meshlets(meshlets, bounds, index_buffer, mesh)) + return EXIT_FAILURE; + break; + } + } +#endif + #if 0 LOGI("=== Test ====\n"); { @@ -666,7 +782,7 @@ int main(int argc, char *argv[]) LOGI("===============\n"); #endif -#if 1 +#if 0 for (auto &mesh : parser.get_meshes()) { unsigned u32_stride = (mesh.position_stride + mesh.attribute_stride) / sizeof(uint32_t); From 662804cb0f1953d4e45836baeaa453adafb2ba7a Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sun, 2 Jul 2023 15:30:54 +0200 Subject: [PATCH 21/71] Add meshlet decode. --- tests/meshopt_sandbox.cpp | 45 +++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp index f68c7610..978543e3 100644 --- a/tests/meshopt_sandbox.cpp +++ b/tests/meshopt_sandbox.cpp @@ -70,6 +70,12 @@ struct PrimitiveAnalysisResult uint32_t num_vertices; }; +struct Meshlet +{ + uint32_t offset; + uint32_t count; +}; + static PrimitiveAnalysisResult analyze_primitive_count(std::unordered_map &vertex_remap, const uint32_t *index_buffer, uint32_t max_num_primitives) { @@ -272,6 +278,7 @@ static void encode_stream(std::vector &out_payload_buffer, } static void encode_mesh(std::vector &out_payload_buffer, MeshMetadata &mesh, + const Meshlet *meshlets, size_t num_meshlets, const uint32_t *index_buffer, uint32_t primitive_count, const uint32_t *attributes, unsigned num_u32_streams) @@ -283,11 +290,23 @@ static void encode_mesh(std::vector &out_payload_buffer, MeshMetadata uint32_t base_vertex_offset = 0; std::unordered_map vbo_remap; + uint32_t primitive_index = 0; + unsigned meshlet_index = 0; + bool done = false; - for (uint32_t primitive_index = 0; primitive_index < primitive_count; ) + while (!done) { - uint32_t primitives_to_process = min(primitive_count - primitive_index, MaxPrimitives); - auto analysis_result = analyze_primitive_count(vbo_remap, index_buffer + 3 * primitive_index, primitives_to_process); + uint32_t primitives_to_process = min(primitive_count - primitive_index, + num_meshlets ? meshlets[meshlet_index].count : MaxPrimitives); + + PrimitiveAnalysisResult analysis_result = {}; + if (num_meshlets) + primitive_index = meshlets[meshlet_index].offset; + + analysis_result = analyze_primitive_count( + vbo_remap, index_buffer + 3 * primitive_index, + primitives_to_process); + primitives_to_process = analysis_result.num_primitives; MeshletMetadata meshlet = {}; @@ -334,9 +353,19 @@ static void encode_mesh(std::vector &out_payload_buffer, MeshMetadata } mesh.meshlets.push_back(meshlet); - - primitive_index += primitives_to_process; base_vertex_offset += analysis_result.num_vertices; + + if (num_meshlets) + { + primitive_index += primitives_to_process; + meshlet_index++; + done = meshlet_index >= num_meshlets; + } + else + { + primitive_index += primitives_to_process; + done = primitive_index >= primitive_count; + } } mesh.data_stream_size_u32 = uint32_t(out_payload_buffer.size()); @@ -592,12 +621,6 @@ static bool validate_mesh_decode(const std::vector &decoded_index_buff return true; } -struct Meshlet -{ - uint32_t offset; - uint32_t count; -}; - static bool convert_meshlets(std::vector &out_meshlets, std::vector &bounds, std::vector &out_index_buffer, const SceneFormats::Mesh &mesh) { From f0c89b07494a6b682d47e16e126447d0c998c5aa Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 28 Jul 2023 14:48:10 +0200 Subject: [PATCH 22/71] Add a utility function to canonicalize mesh index buffers. --- renderer/formats/scene_formats.cpp | 74 ++++++++++++++++++++++++++++++ renderer/formats/scene_formats.hpp | 3 ++ 2 files changed, 77 insertions(+) diff --git a/renderer/formats/scene_formats.cpp b/renderer/formats/scene_formats.cpp index 7dbda1a8..b56edb84 100644 --- a/renderer/formats/scene_formats.cpp +++ b/renderer/formats/scene_formats.cpp @@ -178,6 +178,80 @@ static std::vector remap_indices(const std::vector &indices, return remapped; } +bool mesh_canonicalize_indices(Mesh &mesh) +{ + if (mesh.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST && + mesh.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP) + { + LOGE("Topology must be trilist or tristrip.\n"); + return false; + } + + std::vector unrolled_indices; + unrolled_indices.reserve(mesh.count); + + if (mesh.indices.empty()) + { + for (unsigned i = 0; i < mesh.count; i++) + unrolled_indices.push_back(i); + mesh.index_type = VK_INDEX_TYPE_UINT32; + } + else if (mesh.index_type == VK_INDEX_TYPE_UINT32) + { + auto *indices = reinterpret_cast(mesh.indices.data()); + for (unsigned i = 0; i < mesh.count; i++) + unrolled_indices.push_back(indices[i]); + } + else if (mesh.index_type == VK_INDEX_TYPE_UINT16) + { + auto *indices = reinterpret_cast(mesh.indices.data()); + for (unsigned i = 0; i < mesh.count; i++) + unrolled_indices.push_back(mesh.primitive_restart && indices[i] == UINT16_MAX ? UINT32_MAX : indices[i]); + } + else if (mesh.index_type == VK_INDEX_TYPE_UINT8_EXT) + { + auto *indices = reinterpret_cast(mesh.indices.data()); + for (unsigned i = 0; i < mesh.count; i++) + unrolled_indices.push_back(mesh.primitive_restart && indices[i] == UINT8_MAX ? UINT32_MAX : indices[i]); + } + + if (mesh.topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP) + { + std::vector unstripped_indices; + unstripped_indices.reserve(mesh.count * 3); + unsigned primitive_count_since_restart = 0; + + for (unsigned i = 2; i < mesh.count; i++) + { + bool emit_primitive = true; + if (mesh.primitive_restart && + unrolled_indices[i - 2] == UINT32_MAX && + unrolled_indices[i - 1] == UINT32_MAX && + unrolled_indices[i - 0] == UINT32_MAX) + { + emit_primitive = false; + primitive_count_since_restart = 0; + } + + if (emit_primitive) + { + unstripped_indices.push_back(unrolled_indices[i - 2]); + unstripped_indices.push_back(unrolled_indices[i - (1 ^ (primitive_count_since_restart & 1))]); + unstripped_indices.push_back(unrolled_indices[i - (primitive_count_since_restart & 1)]); + primitive_count_since_restart++; + } + } + + unrolled_indices = std::move(unstripped_indices); + mesh.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + } + + mesh.count = uint32_t(unrolled_indices.size()); + mesh.indices.resize(unrolled_indices.size() * sizeof(uint32_t)); + memcpy(mesh.indices.data(), unrolled_indices.data(), mesh.indices.size()); + return true; +} + static bool mesh_unroll_vertices(Mesh &mesh) { if (mesh.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST) diff --git a/renderer/formats/scene_formats.hpp b/renderer/formats/scene_formats.hpp index 18661805..ac5c041b 100644 --- a/renderer/formats/scene_formats.hpp +++ b/renderer/formats/scene_formats.hpp @@ -248,6 +248,9 @@ struct SceneInformation const SceneNodes *scene_nodes = nullptr; }; +// Ensures that a Mesh has a TRIANGLE_LIST + uint32_t indices for easy consumption later. +bool mesh_canonicalize_indices(Mesh &mesh); + bool mesh_recompute_normals(Mesh &mesh); bool mesh_recompute_tangents(Mesh &mesh); bool mesh_renormalize_normals(Mesh &mesh); From 3bcfd217b2759633aadba48127a6dfedfda37b25 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 28 Jul 2023 14:49:04 +0200 Subject: [PATCH 23/71] Reconsider how streams are laid out. --- tests/meshopt_sandbox.cpp | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp index 978543e3..b7781d8c 100644 --- a/tests/meshopt_sandbox.cpp +++ b/tests/meshopt_sandbox.cpp @@ -14,7 +14,6 @@ #include using namespace Granite; -static constexpr unsigned MaxStreams = 16; static constexpr unsigned MaxU32Streams = 16; static constexpr unsigned MaxElements = 256; static constexpr unsigned MaxPrimitives = MaxElements; @@ -42,14 +41,21 @@ struct MeshletMetadata : MeshletMetadataGPU enum class StreamType : uint8_t { - Primitive, // R8G8B8X8_UINT - PositionF16, // R16G16B16X16_FLOAT + Primitive = 0, // R8G8B8X8_UINT + PositionE16, // RGB16_SSCALED * 2^(A16_SINT) + NormalOct8, // Octahedron encoding in RG8. + TangentOct8, // Octahedron encoding in RG8, sign bit in B8 (if not zero, +1, otherwise -1). + UV, // R16G16_SNORM * B16_SSCALED + BoneIndices, // RGBA8_UINT + BoneWeights, // RGB8_UNORM (sums to 1, A is implied). }; -struct StreamMeta +enum class MeshStyle : uint32_t { - StreamType type; - uint8_t stream_index_component; + Wireframe = 0, // Primitive + Position + Untextured, // Wireframe + NormalOct8 + Textured, // Untextured + TangentOct8 + UV + Skinned // Textured + Bone* }; struct MeshMetadata @@ -57,9 +63,7 @@ struct MeshMetadata uint32_t stream_count; uint32_t data_stream_offset_u32; uint32_t data_stream_size_u32; - - // Stream meta is used to configure the decode shader. - StreamMeta stream_meta[MaxStreams]; + MeshStyle mesh_style; std::vector meshlets; }; @@ -375,8 +379,6 @@ static void decode_mesh_setup_buffers( std::vector &out_index_buffer, std::vector &out_u32_stream, const MeshMetadata &mesh) { assert(mesh.stream_count > 1); - assert(mesh.stream_meta[0].type == StreamType::Primitive); - assert(mesh.stream_meta[0].stream_index_component == 0); unsigned index_count = 0; unsigned attr_count = 0; From 9206eddd06f1075faca3eadac6bac16625514599 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 28 Jul 2023 15:34:37 +0200 Subject: [PATCH 24/71] Add helper to extract Scaled SINT encoded positions. --- renderer/formats/scene_formats.cpp | 44 ++++++++++++++++++++++++++++++ renderer/formats/scene_formats.hpp | 2 ++ 2 files changed, 46 insertions(+) diff --git a/renderer/formats/scene_formats.cpp b/renderer/formats/scene_formats.cpp index b56edb84..0421b0b2 100644 --- a/renderer/formats/scene_formats.cpp +++ b/renderer/formats/scene_formats.cpp @@ -252,6 +252,50 @@ bool mesh_canonicalize_indices(Mesh &mesh) return true; } +static i16vec4 encode_vec3_to_snorm_exp(vec3 v) +{ + vec3 vabs = abs(v); + float max_scale = max(max(vabs.x, vabs.y), vabs.z); + int max_scale_log2 = int(floor(log2(max_scale))); + int scale_log2 = 14 - max_scale_log2; + + // Maximum component should have range of [1, 2) since we use floor of log2, so scale with 2^14 instead of 15. + v.x = ldexpf(v.x, scale_log2); + v.y = ldexpf(v.y, scale_log2); + v.z = ldexpf(v.z, scale_log2); + v = clamp(round(v), vec3(-0x8000), vec3(0x7fff)); + + return i16vec4(i16vec3(v), int16_t(-scale_log2)); +} + +std::vector mesh_extract_position_snorm_exp(const Mesh &mesh) +{ + std::vector encoded_positions; + std::vector positions; + + size_t num_positions = mesh.positions.size() / mesh.position_stride; + positions.resize(num_positions); + auto &layout = mesh.attribute_layout[ecast(MeshAttribute::Position)]; + auto fmt = layout.format; + + if (fmt == VK_FORMAT_R32G32B32A32_SFLOAT || fmt == VK_FORMAT_R32G32B32_SFLOAT) + { + for (size_t i = 0; i < num_positions; i++) + memcpy(positions[i].data, mesh.positions.data() + i * mesh.position_stride + layout.offset, sizeof(float) * 3); + } + else + { + LOGE("Unexpected format %u.\n", fmt); + return {}; + } + + encoded_positions.reserve(positions.size()); + for (auto &pos : positions) + encoded_positions.push_back(encode_vec3_to_snorm_exp(pos)); + + return encoded_positions; +} + static bool mesh_unroll_vertices(Mesh &mesh) { if (mesh.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST) diff --git a/renderer/formats/scene_formats.hpp b/renderer/formats/scene_formats.hpp index ac5c041b..f5518f03 100644 --- a/renderer/formats/scene_formats.hpp +++ b/renderer/formats/scene_formats.hpp @@ -251,6 +251,8 @@ struct SceneInformation // Ensures that a Mesh has a TRIANGLE_LIST + uint32_t indices for easy consumption later. bool mesh_canonicalize_indices(Mesh &mesh); +std::vector mesh_extract_position_snorm_exp(const Mesh &mesh); + bool mesh_recompute_normals(Mesh &mesh); bool mesh_recompute_tangents(Mesh &mesh); bool mesh_renormalize_normals(Mesh &mesh); From ba2ce6a336579d87fb1a999a915b8a05ecb8c208 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sat, 29 Jul 2023 11:36:41 +0200 Subject: [PATCH 25/71] Add more encoding functions. --- renderer/formats/scene_formats.cpp | 114 +++++++++++++++++++++++++++++ renderer/formats/scene_formats.hpp | 4 +- tests/meshopt_sandbox.cpp | 11 +++ 3 files changed, 128 insertions(+), 1 deletion(-) diff --git a/renderer/formats/scene_formats.cpp b/renderer/formats/scene_formats.cpp index 0421b0b2..d7f5f4d3 100644 --- a/renderer/formats/scene_formats.cpp +++ b/renderer/formats/scene_formats.cpp @@ -268,6 +268,25 @@ static i16vec4 encode_vec3_to_snorm_exp(vec3 v) return i16vec4(i16vec3(v), int16_t(-scale_log2)); } +static i16vec3 encode_vec2_to_snorm_exp(vec2 v) +{ + vec2 vabs = abs(v); + float max_scale = max(vabs.x, vabs.y); + int max_scale_log2 = int(floor(log2(max_scale))); + int scale_log2 = 14 - max_scale_log2; + + // UVs are unorm scaled, don't need more accuracy than this. + // If all UVs are in range of [0, 1] space, we should get a constant exponent which aids compression. + scale_log2 = min(scale_log2, 15); + + // Maximum component should have range of [1, 2) since we use floor of log2, so scale with 2^14 instead of 15. + v.x = ldexpf(v.x, scale_log2); + v.y = ldexpf(v.y, scale_log2); + v = clamp(round(v), vec2(-0x8000), vec2(0x7fff)); + + return i16vec3(i16vec2(v), int16_t(-scale_log2)); +} + std::vector mesh_extract_position_snorm_exp(const Mesh &mesh) { std::vector encoded_positions; @@ -283,6 +302,8 @@ std::vector mesh_extract_position_snorm_exp(const Mesh &mesh) for (size_t i = 0; i < num_positions; i++) memcpy(positions[i].data, mesh.positions.data() + i * mesh.position_stride + layout.offset, sizeof(float) * 3); } + else if (fmt == VK_FORMAT_UNDEFINED) + return {}; else { LOGE("Unexpected format %u.\n", fmt); @@ -296,6 +317,99 @@ std::vector mesh_extract_position_snorm_exp(const Mesh &mesh) return encoded_positions; } +std::vector mesh_extract_normal_tangent_oct8(const Mesh &mesh, MeshAttribute attr) +{ + std::vector encoded_attributes; + std::vector normals; + + auto &layout = mesh.attribute_layout[ecast(attr)]; + auto fmt = layout.format; + + size_t num_attrs = mesh.attributes.size() / mesh.attribute_stride; + normals.resize(num_attrs); + + if (fmt == VK_FORMAT_R32G32B32_SFLOAT) + { + for (size_t i = 0; i < num_attrs; i++) + { + memcpy(normals[i].data, + mesh.attributes.data() + i * mesh.attribute_stride + layout.offset, + sizeof(float) * 3); + normals[i].w = 0.0f; + } + } + else if (fmt == VK_FORMAT_R32G32B32A32_SFLOAT) + { + for (size_t i = 0; i < num_attrs; i++) + { + memcpy(normals[i].data, + mesh.attributes.data() + i * mesh.attribute_stride + layout.offset, + sizeof(float) * 4); + } + } + else if (fmt == VK_FORMAT_UNDEFINED) + return {}; + else + { + LOGE("Unexpected format %u.\n", fmt); + return {}; + } + + encoded_attributes.reserve(normals.size()); + meshopt_encodeFilterOct(encoded_attributes.data(), encoded_attributes.size(), + sizeof(i8vec4), 8, normals[0].data); + for (auto &n : encoded_attributes) + n.w = n.w <= 0 ? -1 : 0; + + return encoded_attributes; +} + +static i16vec4 encode_uv_to_snorm_scale(vec2 uv) +{ + // UVs tend to be in [0, 1] range. Readjust to use more of the available range. + uv = 2.0f * uv - 1.0f; + return i16vec4(encode_vec2_to_snorm_exp(uv), 0); +} + +std::vector mesh_extract_uv_snorm_scale(const Mesh &mesh) +{ + std::vector encoded_uvs; + std::vector uvs; + + size_t num_uvs = mesh.attributes.size() / mesh.attribute_stride; + uvs.resize(num_uvs); + auto &layout = mesh.attribute_layout[ecast(MeshAttribute::UV)]; + auto fmt = layout.format; + + if (fmt == VK_FORMAT_R32G32_SFLOAT) + { + for (size_t i = 0; i < num_uvs; i++) + memcpy(uvs[i].data, mesh.attributes.data() + i * mesh.attribute_stride + layout.offset, sizeof(float) * 2); + } + else if (fmt == VK_FORMAT_R16G16_UNORM) + { + for (size_t i = 0; i < num_uvs; i++) + { + u16vec2 u16; + memcpy(u16.data, mesh.attributes.data() + i * mesh.attribute_stride + layout.offset, sizeof(uint16_t) * 2); + uvs[i] = vec2(u16) * float(1.0f / 0xffff); + } + } + else if (fmt == VK_FORMAT_UNDEFINED) + return {}; + else + { + LOGE("Unexpected format %u.\n", fmt); + return {}; + } + + encoded_uvs.reserve(uvs.size()); + for (auto &uv : uvs) + encoded_uvs.push_back(encode_uv_to_snorm_scale(uv)); + + return encoded_uvs; +} + static bool mesh_unroll_vertices(Mesh &mesh) { if (mesh.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST) diff --git a/renderer/formats/scene_formats.hpp b/renderer/formats/scene_formats.hpp index f5518f03..ed55d56a 100644 --- a/renderer/formats/scene_formats.hpp +++ b/renderer/formats/scene_formats.hpp @@ -248,10 +248,12 @@ struct SceneInformation const SceneNodes *scene_nodes = nullptr; }; -// Ensures that a Mesh has a TRIANGLE_LIST + uint32_t indices for easy consumption later. +// Ensures that a Mesh has a TRIANGLE_LIST + uint32_t indices for easy consumption later (by meshlet encoding). bool mesh_canonicalize_indices(Mesh &mesh); std::vector mesh_extract_position_snorm_exp(const Mesh &mesh); +std::vector mesh_extract_normal_tangent_oct8(const Mesh &mesh, MeshAttribute attr); +std::vector mesh_extract_uv_snorm_scale(const Mesh &mesh); bool mesh_recompute_normals(Mesh &mesh); bool mesh_recompute_tangents(Mesh &mesh); diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp index b7781d8c..71f74393 100644 --- a/tests/meshopt_sandbox.cpp +++ b/tests/meshopt_sandbox.cpp @@ -740,6 +740,17 @@ int main(int argc, char *argv[]) dev.init_frame_contexts(4); #if 1 + { + auto mesh = parser.get_meshes().front(); + SceneFormats::mesh_canonicalize_indices(mesh); + auto positions = SceneFormats::mesh_extract_position_snorm_exp(mesh); + auto normals = SceneFormats::mesh_extract_normal_tangent_oct8(mesh, MeshAttribute::Normal); + auto tangent = SceneFormats::mesh_extract_normal_tangent_oct8(mesh, MeshAttribute::Tangent); + auto uv = SceneFormats::mesh_extract_uv_snorm_scale(mesh); + } +#endif + +#if 0 { std::vector index_buffer; std::vector meshlets; From 7e75fe39150c094e5c2bcfb1ad0862349da61a41 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sat, 29 Jul 2023 12:14:39 +0200 Subject: [PATCH 26/71] Start adding mesh encoder to SceneFormats. --- renderer/formats/scene_formats.cpp | 90 ++++++++++++++++++++++++++++++ renderer/formats/scene_formats.hpp | 73 ++++++++++++++++++++++++ 2 files changed, 163 insertions(+) diff --git a/renderer/formats/scene_formats.cpp b/renderer/formats/scene_formats.cpp index d7f5f4d3..da13d4ca 100644 --- a/renderer/formats/scene_formats.cpp +++ b/renderer/formats/scene_formats.cpp @@ -410,6 +410,96 @@ std::vector mesh_extract_uv_snorm_scale(const Mesh &mesh) return encoded_uvs; } +namespace Meshlet +{ +static vec3 decode_snorm_exp(i16vec4 p) +{ + vec3 result; + result.x = ldexpf(float(p.x), p.w); + result.y = ldexpf(float(p.y), p.w); + result.z = ldexpf(float(p.z), p.w); + return result; +} + +Encoded encode_mesh(const Mesh &mesh_) +{ + auto mesh = mesh_; + mesh_canonicalize_indices(mesh); + + auto positions = SceneFormats::mesh_extract_position_snorm_exp(mesh); + auto normals = SceneFormats::mesh_extract_normal_tangent_oct8(mesh, MeshAttribute::Normal); + auto tangent = SceneFormats::mesh_extract_normal_tangent_oct8(mesh, MeshAttribute::Tangent); + auto uv = SceneFormats::mesh_extract_uv_snorm_scale(mesh); + + // Use quantized position to guide the clustering. + std::vector position_buffer; + position_buffer.reserve(positions.size()); + for (auto &p : positions) + position_buffer.push_back(decode_snorm_exp(p)); + + constexpr unsigned max_vertices = 255; + constexpr unsigned max_primitives = 256; + std::vector optimized_index_buffer(mesh.count); + meshopt_optimizeVertexCache( + optimized_index_buffer.data(), reinterpret_cast(mesh.indices.data()), + mesh.count, positions.size()); + size_t num_meshlets = meshopt_buildMeshletsBound(mesh.count, max_vertices, max_primitives); + + std::vector out_vertex_redirection_buffer(num_meshlets * max_vertices); + std::vector local_index_buffer(num_meshlets * max_primitives * 3); + std::vector meshlets(num_meshlets); + + num_meshlets = meshopt_buildMeshlets(meshlets.data(), + out_vertex_redirection_buffer.data(), local_index_buffer.data(), + optimized_index_buffer.data(), mesh.count, + position_buffer[0].data, positions.size(), sizeof(vec3), + max_vertices, max_primitives, 0.75f); + + meshlets.resize(num_meshlets); + + struct Meshlet + { + uint32_t offset; + uint32_t count; + }; + std::vector out_meshlets; + std::vector out_index_buffer; + + out_meshlets.clear(); + out_meshlets.reserve(num_meshlets); + for (auto &meshlet : meshlets) + { + Meshlet m = {}; + m.offset = uint32_t(out_index_buffer.size()); + m.count = meshlet.triangle_count; + out_meshlets.push_back(m); + + auto *local_indices = optimized_index_buffer.data() + meshlet.triangle_offset; + for (unsigned i = 0; i < meshlet.triangle_count; i++) + { + out_index_buffer.emplace_back( + out_vertex_redirection_buffer[local_indices[3 * i + 0] + meshlet.vertex_offset], + out_vertex_redirection_buffer[local_indices[3 * i + 1] + meshlet.vertex_offset], + out_vertex_redirection_buffer[local_indices[3 * i + 2] + meshlet.vertex_offset]); + } + } + + std::vector bounds; + bounds.clear(); + bounds.reserve(num_meshlets); + for (auto &meshlet : out_meshlets) + { + auto bound = meshopt_computeClusterBounds( + out_index_buffer[0].data, meshlet.count * 3, + position_buffer[0].data, positions.size(), sizeof(vec3)); + bounds.push_back(bound); + } + + Encoded encoded; + return encoded; +} +} + static bool mesh_unroll_vertices(Mesh &mesh) { if (mesh.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST) diff --git a/renderer/formats/scene_formats.hpp b/renderer/formats/scene_formats.hpp index ed55d56a..76288e42 100644 --- a/renderer/formats/scene_formats.hpp +++ b/renderer/formats/scene_formats.hpp @@ -255,6 +255,79 @@ std::vector mesh_extract_position_snorm_exp(const Mesh &mesh); std::vector mesh_extract_normal_tangent_oct8(const Mesh &mesh, MeshAttribute attr); std::vector mesh_extract_uv_snorm_scale(const Mesh &mesh); +namespace Meshlet +{ +static constexpr unsigned MaxU32Streams = 16; +static constexpr unsigned MaxElements = 256; +static constexpr unsigned MaxPrimitives = MaxElements; +static constexpr unsigned MaxVertices = MaxElements; + +struct Stream +{ + uint16_t predictor[4 * 2 + 2]; + uint32_t offset_from_base_u32; + uint16_t bitplane_meta[MaxElements / 32]; +}; + +struct MetadataGPU +{ + uint32_t base_vertex_offset; + uint8_t num_primitives_minus_1; + uint8_t num_attributes_minus_1; + uint16_t reserved; +}; + +struct Bound +{ + vec3 center; + float radius; + i8vec4 cone_axis_cutoff; +}; + +struct Metadata : MetadataGPU +{ + Bound bound; + Stream u32_streams[MaxU32Streams]; +}; + +enum class StreamType : uint8_t +{ + Primitive = 0, // R8G8B8X8_UINT + PositionE16, // RGB16_SSCALED * 2^(A16_SINT) + NormalOct8, // Octahedron encoding in RG8. + TangentOct8, // Octahedron encoding in RG8, sign bit in B8 (if not zero, +1, otherwise -1). + UV, // R16G16_SNORM * B16_SSCALED + BoneIndices, // RGBA8_UINT + BoneWeights, // RGB8_UNORM (sums to 1, A is implied). +}; + +enum class MeshStyle : uint32_t +{ + Wireframe = 0, // Primitive + Position + Untextured, // Wireframe + NormalOct8 + Textured, // Untextured + TangentOct8 + UV + Skinned // Textured + Bone* +}; + +struct CombinedMesh +{ + uint32_t stream_count; + uint32_t data_stream_offset_u32; + uint32_t data_stream_size_u32; + MeshStyle mesh_style; + + std::vector meshlets; +}; + +struct Encoded +{ + std::vector payload; + CombinedMesh mesh; +}; + +Encoded encode_mesh(const Mesh &mesh); +} + bool mesh_recompute_normals(Mesh &mesh); bool mesh_recompute_tangents(Mesh &mesh); bool mesh_renormalize_normals(Mesh &mesh); From e71e863505e9f741c68321949006306f3f46f259 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sat, 29 Jul 2023 17:30:22 +0200 Subject: [PATCH 27/71] Start moving over mesh encoding to main codebase. --- renderer/formats/scene_formats.cpp | 349 ++++++++++++++++++++++++++++- 1 file changed, 342 insertions(+), 7 deletions(-) diff --git a/renderer/formats/scene_formats.cpp b/renderer/formats/scene_formats.cpp index da13d4ca..611bfe0b 100644 --- a/renderer/formats/scene_formats.cpp +++ b/renderer/formats/scene_formats.cpp @@ -412,6 +412,18 @@ std::vector mesh_extract_uv_snorm_scale(const Mesh &mesh) namespace Meshlet { +struct Meshlet +{ + uint32_t offset; + uint32_t count; +}; + +struct PrimitiveAnalysisResult +{ + uint32_t num_primitives; + uint32_t num_vertices; +}; + static vec3 decode_snorm_exp(i16vec4 p) { vec3 result; @@ -421,6 +433,302 @@ static vec3 decode_snorm_exp(i16vec4 p) return result; } +static PrimitiveAnalysisResult analyze_primitive_count(std::unordered_map &vertex_remap, + const uint32_t *index_buffer, uint32_t max_num_primitives) +{ + PrimitiveAnalysisResult result = {}; + uint32_t vertex_count = 0; + + // We can reference a maximum of 256 vertices. + vertex_remap.clear(); + + for (uint32_t i = 0; i < max_num_primitives; i++) + { + uint32_t index0 = index_buffer[3 * i + 0]; + uint32_t index1 = index_buffer[3 * i + 1]; + uint32_t index2 = index_buffer[3 * i + 2]; + + vertex_count = uint32_t(vertex_remap.size()); + + vertex_remap.insert({ index0, uint32_t(vertex_remap.size()) }); + vertex_remap.insert({ index1, uint32_t(vertex_remap.size()) }); + vertex_remap.insert({ index2, uint32_t(vertex_remap.size()) }); + + // If this primitive causes us to go out of bounds, reset. + if (vertex_remap.size() > MaxVertices) + { + max_num_primitives = i; + break; + } + + vertex_count = uint32_t(vertex_remap.size()); + } + + result.num_primitives = max_num_primitives; + result.num_vertices = vertex_count; + return result; +} + +// Analyze bits required to encode a signed delta. +static uvec4 compute_required_bits_unsigned(u8vec4 delta) +{ + uvec4 result; + for (unsigned i = 0; i < 4; i++) + { + uint32_t v = delta[i]; + result[i] = v == 0 ? 0 : (32 - leading_zeroes(v)); + } + return result; +} + +static uvec4 compute_required_bits_signed(u8vec4 delta) +{ + uvec4 result; + for (unsigned i = 0; i < 4; i++) + { + uint32_t v = delta[i]; + + if (v == 0) + { + result[i] = 0; + } + else + { + if (v >= 0x80u) + v ^= 0xffu; + result[i] = v == 0 ? 1 : (33 - leading_zeroes(v)); + } + } + return result; +} + +static uint32_t extract_bit_plane(const uint8_t *bytes, unsigned bit_index) +{ + uint32_t u32 = 0; + for (unsigned i = 0; i < 32; i++) + u32 |= ((bytes[4 * i] >> bit_index) & 1u) << i; + return u32; +} + +static void find_linear_predictor(uint16_t *predictor, + const u8vec4 (&stream_buffer)[MaxElements], + unsigned num_elements) +{ + // Sign-extend since the deltas are considered to be signed ints. + ivec4 unrolled_data[MaxElements]; + for (unsigned i = 0; i < num_elements; i++) + unrolled_data[i] = ivec4(i8vec4(stream_buffer[i])); + + // Simple linear regression. + // Pilfered from: https://www.codesansar.com/numerical-methods/linear-regression-method-using-c-programming.htm + ivec4 x{0}, x2{0}, y{0}, xy{0}; + for (unsigned i = 0; i < num_elements; i++) + { + x += int(i); + x2 += int(i * i); + y += unrolled_data[i]; + xy += int(i) * unrolled_data[i]; + } + + int n = int(num_elements); + ivec4 b_denom = (n * x2 - x * x); + b_denom = select(b_denom, ivec4(1), equal(ivec4(0), b_denom)); + + // Encode in u8.8 fixed point. + ivec4 b = (ivec4(256) * (n * xy - x * y)) / b_denom; + ivec4 a = ((ivec4(256) * y - b * x)) / n; + + for (unsigned i = 0; i < 4; i++) + predictor[i] = uint16_t(a[i]); + for (unsigned i = 0; i < 4; i++) + predictor[4 + i] = uint16_t(b[i]); +} + +static void encode_stream(std::vector &out_payload_buffer, + Stream &stream, u8vec4 (&stream_buffer)[MaxElements], + unsigned num_elements) +{ + stream.offset_from_base_u32 = uint32_t(out_payload_buffer.size()); + + // Delta-encode + u8vec4 current_value; + if (num_elements > 1) + current_value = u8vec4(2) * stream_buffer[0] - stream_buffer[1]; + else + current_value = stream_buffer[0]; + u8vec4 bias_value = current_value; + + for (unsigned i = 0; i < num_elements; i++) + { + u8vec4 next_value = stream_buffer[i]; + stream_buffer[i] = next_value - current_value; + current_value = next_value; + } + + // Find optimal linear predictor. + find_linear_predictor(stream.predictor, stream_buffer, num_elements); + + // u8.8 fixed point. + auto base_predictor = u16vec4(stream.predictor[0], stream.predictor[1], stream.predictor[2], stream.predictor[3]); + auto linear_predictor = u16vec4(stream.predictor[4], stream.predictor[5], stream.predictor[6], stream.predictor[7]); + + for (unsigned i = 0; i < num_elements; i++) + { + // Only predict in-bounds elements, since we want all out of bounds elements to be encoded to 0 delta + // without having them affect the predictor. + stream_buffer[i] -= u8vec4((base_predictor + linear_predictor * uint16_t(i)) >> uint16_t(8)); + } + + for (unsigned i = num_elements; i < MaxElements; i++) + stream_buffer[i] = u8vec4(0); + + // Try to adjust the range such that it can fit in fewer bits. + // We can use the constant term in the linear predictor to nudge values in place. + i8vec4 lo(127); + i8vec4 hi(-128); + + for (unsigned i = 0; i < num_elements; i++) + { + lo = min(lo, i8vec4(stream_buffer[i])); + hi = max(hi, i8vec4(stream_buffer[i])); + } + + uvec4 full_bits = compute_required_bits_unsigned(u8vec4(hi - lo)); + u8vec4 target_lo_value = u8vec4(-((uvec4(1) << full_bits) >> 1u)); + u8vec4 bias = target_lo_value - u8vec4(lo); + + for (unsigned i = 0; i < num_elements; i++) + stream_buffer[i] += bias; + + for (unsigned i = 0; i < 4; i++) + stream.predictor[i] -= uint16_t(bias[i]) << 8; + + // Based on the linear predictor, it's possible that the encoded value in stream_buffer[0] becomes non-zero again. + // This is undesirable, since we can use the initial value to force a delta of 0 here, saving precious bits. + bias_value += stream_buffer[0]; + stream_buffer[0] = u8vec4(0); + + // Simple linear predictor, base equal elements[0], gradient = 0. + stream.predictor[8] = uint16_t((bias_value.y << 8) | bias_value.x); + stream.predictor[9] = uint16_t((bias_value.w << 8) | bias_value.z); + + // Encode 32 elements at once. + for (unsigned chunk_index = 0; chunk_index < MaxElements / 32; chunk_index++) + { + uvec4 required_bits = {}; + for (unsigned i = 0; i < 32; i++) + required_bits = max(required_bits, compute_required_bits_signed(stream_buffer[chunk_index * 32 + i])); + + // Encode bit counts. + stream.bitplane_meta[chunk_index] = uint16_t((required_bits.x << 0) | (required_bits.y << 4) | + (required_bits.z << 8) | (required_bits.w << 12)); + + for (unsigned i = 0; i < required_bits.x; i++) + out_payload_buffer.push_back(extract_bit_plane(&stream_buffer[chunk_index * 32][0], i)); + for (unsigned i = 0; i < required_bits.y; i++) + out_payload_buffer.push_back(extract_bit_plane(&stream_buffer[chunk_index * 32][1], i)); + for (unsigned i = 0; i < required_bits.z; i++) + out_payload_buffer.push_back(extract_bit_plane(&stream_buffer[chunk_index * 32][2], i)); + for (unsigned i = 0; i < required_bits.w; i++) + out_payload_buffer.push_back(extract_bit_plane(&stream_buffer[chunk_index * 32][3], i)); + } +} + +static void encode_mesh(Encoded &encoded, + const Meshlet *meshlets, size_t num_meshlets, + const uint32_t *index_buffer, uint32_t primitive_count, + const uint32_t *attributes, + unsigned num_u32_streams) +{ + encoded = {}; + auto &mesh = encoded.mesh; + mesh.stream_count = num_u32_streams + 1; + mesh.data_stream_offset_u32 = 0; // Can be adjusted in isolation later to pack multiple payload streams into one buffer. + mesh.meshlets.reserve((primitive_count + MaxPrimitives - 1) / MaxPrimitives); + uint32_t base_vertex_offset = 0; + + std::unordered_map vbo_remap; + uint32_t primitive_index = 0; + unsigned meshlet_index = 0; + bool done = false; + + while (!done) + { + uint32_t primitives_to_process = min(primitive_count - primitive_index, + num_meshlets ? meshlets[meshlet_index].count : MaxPrimitives); + + PrimitiveAnalysisResult analysis_result = {}; + if (num_meshlets) + primitive_index = meshlets[meshlet_index].offset; + + analysis_result = analyze_primitive_count( + vbo_remap, index_buffer + 3 * primitive_index, + primitives_to_process); + + primitives_to_process = analysis_result.num_primitives; + + Metadata meshlet = {}; + u8vec4 stream_buffer[MaxElements]; + + meshlet.base_vertex_offset = base_vertex_offset; + meshlet.num_primitives_minus_1 = analysis_result.num_primitives - 1; + meshlet.num_attributes_minus_1 = analysis_result.num_vertices - 1; + meshlet.reserved = 0; + + // Encode index buffer. + for (uint32_t i = 0; i < analysis_result.num_primitives; i++) + { + uint8_t i0 = vbo_remap[index_buffer[3 * (primitive_index + i) + 0]]; + uint8_t i1 = vbo_remap[index_buffer[3 * (primitive_index + i) + 1]]; + uint8_t i2 = vbo_remap[index_buffer[3 * (primitive_index + i) + 2]]; + //LOGI("Prim %u = { %u, %u, %u }\n", i, i0, i1, i2); + stream_buffer[i] = u8vec4(i0, i1, i2, 0); + } + + encode_stream(encoded.payload, meshlet.u32_streams[0], stream_buffer, analysis_result.num_primitives); + + // Handle spill region just in case. + uint64_t vbo_remapping[MaxVertices + 3]; + unsigned vbo_index = 0; + for (auto &v : vbo_remap) + { + assert(vbo_index < MaxVertices + 3); + vbo_remapping[vbo_index++] = (uint64_t(v.second) << 32) | v.first; + } + std::sort(vbo_remapping, vbo_remapping + vbo_index); + + for (uint32_t stream_index = 0; stream_index < num_u32_streams; stream_index++) + { + for (uint32_t i = 0; i < analysis_result.num_vertices; i++) + { + auto vertex_index = uint32_t(vbo_remapping[i]); + uint32_t payload = attributes[stream_index + num_u32_streams * vertex_index]; + memcpy(stream_buffer[i].data, &payload, sizeof(payload)); + } + + encode_stream(encoded.payload, meshlet.u32_streams[stream_index + 1], stream_buffer, + analysis_result.num_vertices); + } + + mesh.meshlets.push_back(meshlet); + base_vertex_offset += analysis_result.num_vertices; + + if (num_meshlets) + { + primitive_index += primitives_to_process; + meshlet_index++; + done = meshlet_index >= num_meshlets; + } + else + { + primitive_index += primitives_to_process; + done = primitive_index >= primitive_count; + } + } + + mesh.data_stream_size_u32 = uint32_t(encoded.payload.size()); +} + Encoded encode_mesh(const Mesh &mesh_) { auto mesh = mesh_; @@ -431,6 +739,23 @@ Encoded encode_mesh(const Mesh &mesh_) auto tangent = SceneFormats::mesh_extract_normal_tangent_oct8(mesh, MeshAttribute::Tangent); auto uv = SceneFormats::mesh_extract_uv_snorm_scale(mesh); + unsigned num_u32_streams = (sizeof(positions.front()) + sizeof(normals.front()) + + sizeof(tangent.front()) + sizeof(uv.front())) / sizeof(uint32_t); + + std::vector attributes(num_u32_streams * positions.size()); + uint32_t *ptr = attributes.data(); + for (size_t i = 0, n = positions.size(); i < n; i++) + { + memcpy(ptr, positions[i].data, sizeof(positions.front())); + ptr += sizeof(positions.front()) / sizeof(uint32_t); + memcpy(ptr, normals[i].data, sizeof(normals.front())); + ptr += sizeof(normals.front()) / sizeof(uint32_t); + memcpy(ptr, tangent[i].data, sizeof(tangent.front())); + ptr += sizeof(tangent.front()) / sizeof(uint32_t); + memcpy(ptr, uv[i].data, sizeof(uv.front())); + ptr += sizeof(uv.front()) / sizeof(uint32_t); + } + // Use quantized position to guide the clustering. std::vector position_buffer; position_buffer.reserve(positions.size()); @@ -457,15 +782,9 @@ Encoded encode_mesh(const Mesh &mesh_) meshlets.resize(num_meshlets); - struct Meshlet - { - uint32_t offset; - uint32_t count; - }; std::vector out_meshlets; std::vector out_index_buffer; - out_meshlets.clear(); out_meshlets.reserve(num_meshlets); for (auto &meshlet : meshlets) { @@ -490,12 +809,28 @@ Encoded encode_mesh(const Mesh &mesh_) for (auto &meshlet : out_meshlets) { auto bound = meshopt_computeClusterBounds( - out_index_buffer[0].data, meshlet.count * 3, + out_index_buffer[meshlet.offset].data, meshlet.count * 3, position_buffer[0].data, positions.size(), sizeof(vec3)); bounds.push_back(bound); } Encoded encoded; + encode_mesh(encoded, out_meshlets.data(), out_meshlets.size(), + out_index_buffer[0].data, out_index_buffer.size() / 3, + attributes.data(), num_u32_streams); + encoded.mesh.mesh_style = MeshStyle::Textured; + + assert(bounds.size() == encoded.mesh.meshlets.size()); + const auto *pbounds = bounds.data(); + for (auto &meshlet : encoded.mesh.meshlets) + { + meshlet.bound.center = vec3( + pbounds->center[0], pbounds->center[1], pbounds->center[2]); + meshlet.bound.radius = pbounds->radius; + meshlet.bound.cone_axis_cutoff = i8vec4( + pbounds->cone_axis_s8[0], pbounds->cone_axis_s8[1], + pbounds->cone_axis_s8[2], pbounds->cone_cutoff_s8); + } return encoded; } } From 776a19893709c68d7ca7b188afa5bb4b902ebca7 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sun, 30 Jul 2023 11:54:52 +0200 Subject: [PATCH 28/71] Move over more code. --- renderer/formats/scene_formats.cpp | 704 ++------------------------- renderer/formats/scene_formats.hpp | 80 ---- scene-export/CMakeLists.txt | 1 + scene-export/meshlet.cpp | 744 +++++++++++++++++++++++++++++ scene-export/meshlet.hpp | 35 ++ tests/CMakeLists.txt | 2 +- tests/meshopt_sandbox.cpp | 478 +----------------- 7 files changed, 830 insertions(+), 1214 deletions(-) create mode 100644 scene-export/meshlet.cpp create mode 100644 scene-export/meshlet.hpp diff --git a/renderer/formats/scene_formats.cpp b/renderer/formats/scene_formats.cpp index 611bfe0b..4b8ba1ea 100644 --- a/renderer/formats/scene_formats.cpp +++ b/renderer/formats/scene_formats.cpp @@ -51,7 +51,7 @@ struct IndexRemapping // Find duplicate indices. static IndexRemapping build_index_remap_list(const Mesh &mesh) { - unsigned attribute_count = unsigned(mesh.positions.size() / mesh.position_stride); + auto attribute_count = unsigned(mesh.positions.size() / mesh.position_stride); std::unordered_map attribute_remapper; IndexRemapping remapped; remapped.index_remap.reserve(attribute_count); @@ -66,13 +66,41 @@ static IndexRemapping build_index_remap_list(const Mesh &mesh) auto hash = h.get(); auto itr = attribute_remapper.find(hash); + bool is_unique; + if (itr != end(attribute_remapper)) { - remapped.index_remap.push_back(itr->second); + bool match = true; + if (memcmp(mesh.positions.data() + i * mesh.position_stride, + mesh.positions.data() + itr->second * mesh.position_stride, + mesh.position_stride) != 0) + { + match = false; + } + + if (match && !mesh.attributes.empty() && + memcmp(mesh.attributes.data() + i * mesh.attribute_stride, + mesh.attributes.data() + itr->second * mesh.attribute_stride, + mesh.attribute_stride) != 0) + { + match = false; + } + + if (match) + remapped.index_remap.push_back(itr->second); + else + LOGW("Hash collision in vertex dedup.\n"); + + is_unique = !match; } else { attribute_remapper[hash] = unique_count; + is_unique = true; + } + + if (is_unique) + { remapped.index_remap.push_back(unique_count); remapped.unique_attrib_to_source_index.push_back(i); unique_count++; @@ -84,25 +112,30 @@ static IndexRemapping build_index_remap_list(const Mesh &mesh) static std::vector build_canonical_index_buffer(const Mesh &mesh, const std::vector &index_remap) { + assert(mesh.topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST); + std::vector index_buffer; + index_buffer.reserve(mesh.count); if (mesh.indices.empty()) { - index_buffer.reserve(mesh.count); for (unsigned i = 0; i < mesh.count; i++) index_buffer.push_back(index_remap[i]); } else if (mesh.index_type == VK_INDEX_TYPE_UINT32) { - index_buffer.reserve(mesh.count); for (unsigned i = 0; i < mesh.count; i++) index_buffer.push_back(index_remap[reinterpret_cast(mesh.indices.data())[i]]); } else if (mesh.index_type == VK_INDEX_TYPE_UINT16) { - index_buffer.reserve(mesh.count); for (unsigned i = 0; i < mesh.count; i++) index_buffer.push_back(index_remap[reinterpret_cast(mesh.indices.data())[i]]); } + else if (mesh.index_type == VK_INDEX_TYPE_UINT8_EXT) + { + for (unsigned i = 0; i < mesh.count; i++) + index_buffer.push_back(index_remap[reinterpret_cast(mesh.indices.data())[i]]); + } return index_buffer; } @@ -178,663 +211,6 @@ static std::vector remap_indices(const std::vector &indices, return remapped; } -bool mesh_canonicalize_indices(Mesh &mesh) -{ - if (mesh.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST && - mesh.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP) - { - LOGE("Topology must be trilist or tristrip.\n"); - return false; - } - - std::vector unrolled_indices; - unrolled_indices.reserve(mesh.count); - - if (mesh.indices.empty()) - { - for (unsigned i = 0; i < mesh.count; i++) - unrolled_indices.push_back(i); - mesh.index_type = VK_INDEX_TYPE_UINT32; - } - else if (mesh.index_type == VK_INDEX_TYPE_UINT32) - { - auto *indices = reinterpret_cast(mesh.indices.data()); - for (unsigned i = 0; i < mesh.count; i++) - unrolled_indices.push_back(indices[i]); - } - else if (mesh.index_type == VK_INDEX_TYPE_UINT16) - { - auto *indices = reinterpret_cast(mesh.indices.data()); - for (unsigned i = 0; i < mesh.count; i++) - unrolled_indices.push_back(mesh.primitive_restart && indices[i] == UINT16_MAX ? UINT32_MAX : indices[i]); - } - else if (mesh.index_type == VK_INDEX_TYPE_UINT8_EXT) - { - auto *indices = reinterpret_cast(mesh.indices.data()); - for (unsigned i = 0; i < mesh.count; i++) - unrolled_indices.push_back(mesh.primitive_restart && indices[i] == UINT8_MAX ? UINT32_MAX : indices[i]); - } - - if (mesh.topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP) - { - std::vector unstripped_indices; - unstripped_indices.reserve(mesh.count * 3); - unsigned primitive_count_since_restart = 0; - - for (unsigned i = 2; i < mesh.count; i++) - { - bool emit_primitive = true; - if (mesh.primitive_restart && - unrolled_indices[i - 2] == UINT32_MAX && - unrolled_indices[i - 1] == UINT32_MAX && - unrolled_indices[i - 0] == UINT32_MAX) - { - emit_primitive = false; - primitive_count_since_restart = 0; - } - - if (emit_primitive) - { - unstripped_indices.push_back(unrolled_indices[i - 2]); - unstripped_indices.push_back(unrolled_indices[i - (1 ^ (primitive_count_since_restart & 1))]); - unstripped_indices.push_back(unrolled_indices[i - (primitive_count_since_restart & 1)]); - primitive_count_since_restart++; - } - } - - unrolled_indices = std::move(unstripped_indices); - mesh.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; - } - - mesh.count = uint32_t(unrolled_indices.size()); - mesh.indices.resize(unrolled_indices.size() * sizeof(uint32_t)); - memcpy(mesh.indices.data(), unrolled_indices.data(), mesh.indices.size()); - return true; -} - -static i16vec4 encode_vec3_to_snorm_exp(vec3 v) -{ - vec3 vabs = abs(v); - float max_scale = max(max(vabs.x, vabs.y), vabs.z); - int max_scale_log2 = int(floor(log2(max_scale))); - int scale_log2 = 14 - max_scale_log2; - - // Maximum component should have range of [1, 2) since we use floor of log2, so scale with 2^14 instead of 15. - v.x = ldexpf(v.x, scale_log2); - v.y = ldexpf(v.y, scale_log2); - v.z = ldexpf(v.z, scale_log2); - v = clamp(round(v), vec3(-0x8000), vec3(0x7fff)); - - return i16vec4(i16vec3(v), int16_t(-scale_log2)); -} - -static i16vec3 encode_vec2_to_snorm_exp(vec2 v) -{ - vec2 vabs = abs(v); - float max_scale = max(vabs.x, vabs.y); - int max_scale_log2 = int(floor(log2(max_scale))); - int scale_log2 = 14 - max_scale_log2; - - // UVs are unorm scaled, don't need more accuracy than this. - // If all UVs are in range of [0, 1] space, we should get a constant exponent which aids compression. - scale_log2 = min(scale_log2, 15); - - // Maximum component should have range of [1, 2) since we use floor of log2, so scale with 2^14 instead of 15. - v.x = ldexpf(v.x, scale_log2); - v.y = ldexpf(v.y, scale_log2); - v = clamp(round(v), vec2(-0x8000), vec2(0x7fff)); - - return i16vec3(i16vec2(v), int16_t(-scale_log2)); -} - -std::vector mesh_extract_position_snorm_exp(const Mesh &mesh) -{ - std::vector encoded_positions; - std::vector positions; - - size_t num_positions = mesh.positions.size() / mesh.position_stride; - positions.resize(num_positions); - auto &layout = mesh.attribute_layout[ecast(MeshAttribute::Position)]; - auto fmt = layout.format; - - if (fmt == VK_FORMAT_R32G32B32A32_SFLOAT || fmt == VK_FORMAT_R32G32B32_SFLOAT) - { - for (size_t i = 0; i < num_positions; i++) - memcpy(positions[i].data, mesh.positions.data() + i * mesh.position_stride + layout.offset, sizeof(float) * 3); - } - else if (fmt == VK_FORMAT_UNDEFINED) - return {}; - else - { - LOGE("Unexpected format %u.\n", fmt); - return {}; - } - - encoded_positions.reserve(positions.size()); - for (auto &pos : positions) - encoded_positions.push_back(encode_vec3_to_snorm_exp(pos)); - - return encoded_positions; -} - -std::vector mesh_extract_normal_tangent_oct8(const Mesh &mesh, MeshAttribute attr) -{ - std::vector encoded_attributes; - std::vector normals; - - auto &layout = mesh.attribute_layout[ecast(attr)]; - auto fmt = layout.format; - - size_t num_attrs = mesh.attributes.size() / mesh.attribute_stride; - normals.resize(num_attrs); - - if (fmt == VK_FORMAT_R32G32B32_SFLOAT) - { - for (size_t i = 0; i < num_attrs; i++) - { - memcpy(normals[i].data, - mesh.attributes.data() + i * mesh.attribute_stride + layout.offset, - sizeof(float) * 3); - normals[i].w = 0.0f; - } - } - else if (fmt == VK_FORMAT_R32G32B32A32_SFLOAT) - { - for (size_t i = 0; i < num_attrs; i++) - { - memcpy(normals[i].data, - mesh.attributes.data() + i * mesh.attribute_stride + layout.offset, - sizeof(float) * 4); - } - } - else if (fmt == VK_FORMAT_UNDEFINED) - return {}; - else - { - LOGE("Unexpected format %u.\n", fmt); - return {}; - } - - encoded_attributes.reserve(normals.size()); - meshopt_encodeFilterOct(encoded_attributes.data(), encoded_attributes.size(), - sizeof(i8vec4), 8, normals[0].data); - for (auto &n : encoded_attributes) - n.w = n.w <= 0 ? -1 : 0; - - return encoded_attributes; -} - -static i16vec4 encode_uv_to_snorm_scale(vec2 uv) -{ - // UVs tend to be in [0, 1] range. Readjust to use more of the available range. - uv = 2.0f * uv - 1.0f; - return i16vec4(encode_vec2_to_snorm_exp(uv), 0); -} - -std::vector mesh_extract_uv_snorm_scale(const Mesh &mesh) -{ - std::vector encoded_uvs; - std::vector uvs; - - size_t num_uvs = mesh.attributes.size() / mesh.attribute_stride; - uvs.resize(num_uvs); - auto &layout = mesh.attribute_layout[ecast(MeshAttribute::UV)]; - auto fmt = layout.format; - - if (fmt == VK_FORMAT_R32G32_SFLOAT) - { - for (size_t i = 0; i < num_uvs; i++) - memcpy(uvs[i].data, mesh.attributes.data() + i * mesh.attribute_stride + layout.offset, sizeof(float) * 2); - } - else if (fmt == VK_FORMAT_R16G16_UNORM) - { - for (size_t i = 0; i < num_uvs; i++) - { - u16vec2 u16; - memcpy(u16.data, mesh.attributes.data() + i * mesh.attribute_stride + layout.offset, sizeof(uint16_t) * 2); - uvs[i] = vec2(u16) * float(1.0f / 0xffff); - } - } - else if (fmt == VK_FORMAT_UNDEFINED) - return {}; - else - { - LOGE("Unexpected format %u.\n", fmt); - return {}; - } - - encoded_uvs.reserve(uvs.size()); - for (auto &uv : uvs) - encoded_uvs.push_back(encode_uv_to_snorm_scale(uv)); - - return encoded_uvs; -} - -namespace Meshlet -{ -struct Meshlet -{ - uint32_t offset; - uint32_t count; -}; - -struct PrimitiveAnalysisResult -{ - uint32_t num_primitives; - uint32_t num_vertices; -}; - -static vec3 decode_snorm_exp(i16vec4 p) -{ - vec3 result; - result.x = ldexpf(float(p.x), p.w); - result.y = ldexpf(float(p.y), p.w); - result.z = ldexpf(float(p.z), p.w); - return result; -} - -static PrimitiveAnalysisResult analyze_primitive_count(std::unordered_map &vertex_remap, - const uint32_t *index_buffer, uint32_t max_num_primitives) -{ - PrimitiveAnalysisResult result = {}; - uint32_t vertex_count = 0; - - // We can reference a maximum of 256 vertices. - vertex_remap.clear(); - - for (uint32_t i = 0; i < max_num_primitives; i++) - { - uint32_t index0 = index_buffer[3 * i + 0]; - uint32_t index1 = index_buffer[3 * i + 1]; - uint32_t index2 = index_buffer[3 * i + 2]; - - vertex_count = uint32_t(vertex_remap.size()); - - vertex_remap.insert({ index0, uint32_t(vertex_remap.size()) }); - vertex_remap.insert({ index1, uint32_t(vertex_remap.size()) }); - vertex_remap.insert({ index2, uint32_t(vertex_remap.size()) }); - - // If this primitive causes us to go out of bounds, reset. - if (vertex_remap.size() > MaxVertices) - { - max_num_primitives = i; - break; - } - - vertex_count = uint32_t(vertex_remap.size()); - } - - result.num_primitives = max_num_primitives; - result.num_vertices = vertex_count; - return result; -} - -// Analyze bits required to encode a signed delta. -static uvec4 compute_required_bits_unsigned(u8vec4 delta) -{ - uvec4 result; - for (unsigned i = 0; i < 4; i++) - { - uint32_t v = delta[i]; - result[i] = v == 0 ? 0 : (32 - leading_zeroes(v)); - } - return result; -} - -static uvec4 compute_required_bits_signed(u8vec4 delta) -{ - uvec4 result; - for (unsigned i = 0; i < 4; i++) - { - uint32_t v = delta[i]; - - if (v == 0) - { - result[i] = 0; - } - else - { - if (v >= 0x80u) - v ^= 0xffu; - result[i] = v == 0 ? 1 : (33 - leading_zeroes(v)); - } - } - return result; -} - -static uint32_t extract_bit_plane(const uint8_t *bytes, unsigned bit_index) -{ - uint32_t u32 = 0; - for (unsigned i = 0; i < 32; i++) - u32 |= ((bytes[4 * i] >> bit_index) & 1u) << i; - return u32; -} - -static void find_linear_predictor(uint16_t *predictor, - const u8vec4 (&stream_buffer)[MaxElements], - unsigned num_elements) -{ - // Sign-extend since the deltas are considered to be signed ints. - ivec4 unrolled_data[MaxElements]; - for (unsigned i = 0; i < num_elements; i++) - unrolled_data[i] = ivec4(i8vec4(stream_buffer[i])); - - // Simple linear regression. - // Pilfered from: https://www.codesansar.com/numerical-methods/linear-regression-method-using-c-programming.htm - ivec4 x{0}, x2{0}, y{0}, xy{0}; - for (unsigned i = 0; i < num_elements; i++) - { - x += int(i); - x2 += int(i * i); - y += unrolled_data[i]; - xy += int(i) * unrolled_data[i]; - } - - int n = int(num_elements); - ivec4 b_denom = (n * x2 - x * x); - b_denom = select(b_denom, ivec4(1), equal(ivec4(0), b_denom)); - - // Encode in u8.8 fixed point. - ivec4 b = (ivec4(256) * (n * xy - x * y)) / b_denom; - ivec4 a = ((ivec4(256) * y - b * x)) / n; - - for (unsigned i = 0; i < 4; i++) - predictor[i] = uint16_t(a[i]); - for (unsigned i = 0; i < 4; i++) - predictor[4 + i] = uint16_t(b[i]); -} - -static void encode_stream(std::vector &out_payload_buffer, - Stream &stream, u8vec4 (&stream_buffer)[MaxElements], - unsigned num_elements) -{ - stream.offset_from_base_u32 = uint32_t(out_payload_buffer.size()); - - // Delta-encode - u8vec4 current_value; - if (num_elements > 1) - current_value = u8vec4(2) * stream_buffer[0] - stream_buffer[1]; - else - current_value = stream_buffer[0]; - u8vec4 bias_value = current_value; - - for (unsigned i = 0; i < num_elements; i++) - { - u8vec4 next_value = stream_buffer[i]; - stream_buffer[i] = next_value - current_value; - current_value = next_value; - } - - // Find optimal linear predictor. - find_linear_predictor(stream.predictor, stream_buffer, num_elements); - - // u8.8 fixed point. - auto base_predictor = u16vec4(stream.predictor[0], stream.predictor[1], stream.predictor[2], stream.predictor[3]); - auto linear_predictor = u16vec4(stream.predictor[4], stream.predictor[5], stream.predictor[6], stream.predictor[7]); - - for (unsigned i = 0; i < num_elements; i++) - { - // Only predict in-bounds elements, since we want all out of bounds elements to be encoded to 0 delta - // without having them affect the predictor. - stream_buffer[i] -= u8vec4((base_predictor + linear_predictor * uint16_t(i)) >> uint16_t(8)); - } - - for (unsigned i = num_elements; i < MaxElements; i++) - stream_buffer[i] = u8vec4(0); - - // Try to adjust the range such that it can fit in fewer bits. - // We can use the constant term in the linear predictor to nudge values in place. - i8vec4 lo(127); - i8vec4 hi(-128); - - for (unsigned i = 0; i < num_elements; i++) - { - lo = min(lo, i8vec4(stream_buffer[i])); - hi = max(hi, i8vec4(stream_buffer[i])); - } - - uvec4 full_bits = compute_required_bits_unsigned(u8vec4(hi - lo)); - u8vec4 target_lo_value = u8vec4(-((uvec4(1) << full_bits) >> 1u)); - u8vec4 bias = target_lo_value - u8vec4(lo); - - for (unsigned i = 0; i < num_elements; i++) - stream_buffer[i] += bias; - - for (unsigned i = 0; i < 4; i++) - stream.predictor[i] -= uint16_t(bias[i]) << 8; - - // Based on the linear predictor, it's possible that the encoded value in stream_buffer[0] becomes non-zero again. - // This is undesirable, since we can use the initial value to force a delta of 0 here, saving precious bits. - bias_value += stream_buffer[0]; - stream_buffer[0] = u8vec4(0); - - // Simple linear predictor, base equal elements[0], gradient = 0. - stream.predictor[8] = uint16_t((bias_value.y << 8) | bias_value.x); - stream.predictor[9] = uint16_t((bias_value.w << 8) | bias_value.z); - - // Encode 32 elements at once. - for (unsigned chunk_index = 0; chunk_index < MaxElements / 32; chunk_index++) - { - uvec4 required_bits = {}; - for (unsigned i = 0; i < 32; i++) - required_bits = max(required_bits, compute_required_bits_signed(stream_buffer[chunk_index * 32 + i])); - - // Encode bit counts. - stream.bitplane_meta[chunk_index] = uint16_t((required_bits.x << 0) | (required_bits.y << 4) | - (required_bits.z << 8) | (required_bits.w << 12)); - - for (unsigned i = 0; i < required_bits.x; i++) - out_payload_buffer.push_back(extract_bit_plane(&stream_buffer[chunk_index * 32][0], i)); - for (unsigned i = 0; i < required_bits.y; i++) - out_payload_buffer.push_back(extract_bit_plane(&stream_buffer[chunk_index * 32][1], i)); - for (unsigned i = 0; i < required_bits.z; i++) - out_payload_buffer.push_back(extract_bit_plane(&stream_buffer[chunk_index * 32][2], i)); - for (unsigned i = 0; i < required_bits.w; i++) - out_payload_buffer.push_back(extract_bit_plane(&stream_buffer[chunk_index * 32][3], i)); - } -} - -static void encode_mesh(Encoded &encoded, - const Meshlet *meshlets, size_t num_meshlets, - const uint32_t *index_buffer, uint32_t primitive_count, - const uint32_t *attributes, - unsigned num_u32_streams) -{ - encoded = {}; - auto &mesh = encoded.mesh; - mesh.stream_count = num_u32_streams + 1; - mesh.data_stream_offset_u32 = 0; // Can be adjusted in isolation later to pack multiple payload streams into one buffer. - mesh.meshlets.reserve((primitive_count + MaxPrimitives - 1) / MaxPrimitives); - uint32_t base_vertex_offset = 0; - - std::unordered_map vbo_remap; - uint32_t primitive_index = 0; - unsigned meshlet_index = 0; - bool done = false; - - while (!done) - { - uint32_t primitives_to_process = min(primitive_count - primitive_index, - num_meshlets ? meshlets[meshlet_index].count : MaxPrimitives); - - PrimitiveAnalysisResult analysis_result = {}; - if (num_meshlets) - primitive_index = meshlets[meshlet_index].offset; - - analysis_result = analyze_primitive_count( - vbo_remap, index_buffer + 3 * primitive_index, - primitives_to_process); - - primitives_to_process = analysis_result.num_primitives; - - Metadata meshlet = {}; - u8vec4 stream_buffer[MaxElements]; - - meshlet.base_vertex_offset = base_vertex_offset; - meshlet.num_primitives_minus_1 = analysis_result.num_primitives - 1; - meshlet.num_attributes_minus_1 = analysis_result.num_vertices - 1; - meshlet.reserved = 0; - - // Encode index buffer. - for (uint32_t i = 0; i < analysis_result.num_primitives; i++) - { - uint8_t i0 = vbo_remap[index_buffer[3 * (primitive_index + i) + 0]]; - uint8_t i1 = vbo_remap[index_buffer[3 * (primitive_index + i) + 1]]; - uint8_t i2 = vbo_remap[index_buffer[3 * (primitive_index + i) + 2]]; - //LOGI("Prim %u = { %u, %u, %u }\n", i, i0, i1, i2); - stream_buffer[i] = u8vec4(i0, i1, i2, 0); - } - - encode_stream(encoded.payload, meshlet.u32_streams[0], stream_buffer, analysis_result.num_primitives); - - // Handle spill region just in case. - uint64_t vbo_remapping[MaxVertices + 3]; - unsigned vbo_index = 0; - for (auto &v : vbo_remap) - { - assert(vbo_index < MaxVertices + 3); - vbo_remapping[vbo_index++] = (uint64_t(v.second) << 32) | v.first; - } - std::sort(vbo_remapping, vbo_remapping + vbo_index); - - for (uint32_t stream_index = 0; stream_index < num_u32_streams; stream_index++) - { - for (uint32_t i = 0; i < analysis_result.num_vertices; i++) - { - auto vertex_index = uint32_t(vbo_remapping[i]); - uint32_t payload = attributes[stream_index + num_u32_streams * vertex_index]; - memcpy(stream_buffer[i].data, &payload, sizeof(payload)); - } - - encode_stream(encoded.payload, meshlet.u32_streams[stream_index + 1], stream_buffer, - analysis_result.num_vertices); - } - - mesh.meshlets.push_back(meshlet); - base_vertex_offset += analysis_result.num_vertices; - - if (num_meshlets) - { - primitive_index += primitives_to_process; - meshlet_index++; - done = meshlet_index >= num_meshlets; - } - else - { - primitive_index += primitives_to_process; - done = primitive_index >= primitive_count; - } - } - - mesh.data_stream_size_u32 = uint32_t(encoded.payload.size()); -} - -Encoded encode_mesh(const Mesh &mesh_) -{ - auto mesh = mesh_; - mesh_canonicalize_indices(mesh); - - auto positions = SceneFormats::mesh_extract_position_snorm_exp(mesh); - auto normals = SceneFormats::mesh_extract_normal_tangent_oct8(mesh, MeshAttribute::Normal); - auto tangent = SceneFormats::mesh_extract_normal_tangent_oct8(mesh, MeshAttribute::Tangent); - auto uv = SceneFormats::mesh_extract_uv_snorm_scale(mesh); - - unsigned num_u32_streams = (sizeof(positions.front()) + sizeof(normals.front()) + - sizeof(tangent.front()) + sizeof(uv.front())) / sizeof(uint32_t); - - std::vector attributes(num_u32_streams * positions.size()); - uint32_t *ptr = attributes.data(); - for (size_t i = 0, n = positions.size(); i < n; i++) - { - memcpy(ptr, positions[i].data, sizeof(positions.front())); - ptr += sizeof(positions.front()) / sizeof(uint32_t); - memcpy(ptr, normals[i].data, sizeof(normals.front())); - ptr += sizeof(normals.front()) / sizeof(uint32_t); - memcpy(ptr, tangent[i].data, sizeof(tangent.front())); - ptr += sizeof(tangent.front()) / sizeof(uint32_t); - memcpy(ptr, uv[i].data, sizeof(uv.front())); - ptr += sizeof(uv.front()) / sizeof(uint32_t); - } - - // Use quantized position to guide the clustering. - std::vector position_buffer; - position_buffer.reserve(positions.size()); - for (auto &p : positions) - position_buffer.push_back(decode_snorm_exp(p)); - - constexpr unsigned max_vertices = 255; - constexpr unsigned max_primitives = 256; - std::vector optimized_index_buffer(mesh.count); - meshopt_optimizeVertexCache( - optimized_index_buffer.data(), reinterpret_cast(mesh.indices.data()), - mesh.count, positions.size()); - size_t num_meshlets = meshopt_buildMeshletsBound(mesh.count, max_vertices, max_primitives); - - std::vector out_vertex_redirection_buffer(num_meshlets * max_vertices); - std::vector local_index_buffer(num_meshlets * max_primitives * 3); - std::vector meshlets(num_meshlets); - - num_meshlets = meshopt_buildMeshlets(meshlets.data(), - out_vertex_redirection_buffer.data(), local_index_buffer.data(), - optimized_index_buffer.data(), mesh.count, - position_buffer[0].data, positions.size(), sizeof(vec3), - max_vertices, max_primitives, 0.75f); - - meshlets.resize(num_meshlets); - - std::vector out_meshlets; - std::vector out_index_buffer; - - out_meshlets.reserve(num_meshlets); - for (auto &meshlet : meshlets) - { - Meshlet m = {}; - m.offset = uint32_t(out_index_buffer.size()); - m.count = meshlet.triangle_count; - out_meshlets.push_back(m); - - auto *local_indices = optimized_index_buffer.data() + meshlet.triangle_offset; - for (unsigned i = 0; i < meshlet.triangle_count; i++) - { - out_index_buffer.emplace_back( - out_vertex_redirection_buffer[local_indices[3 * i + 0] + meshlet.vertex_offset], - out_vertex_redirection_buffer[local_indices[3 * i + 1] + meshlet.vertex_offset], - out_vertex_redirection_buffer[local_indices[3 * i + 2] + meshlet.vertex_offset]); - } - } - - std::vector bounds; - bounds.clear(); - bounds.reserve(num_meshlets); - for (auto &meshlet : out_meshlets) - { - auto bound = meshopt_computeClusterBounds( - out_index_buffer[meshlet.offset].data, meshlet.count * 3, - position_buffer[0].data, positions.size(), sizeof(vec3)); - bounds.push_back(bound); - } - - Encoded encoded; - encode_mesh(encoded, out_meshlets.data(), out_meshlets.size(), - out_index_buffer[0].data, out_index_buffer.size() / 3, - attributes.data(), num_u32_streams); - encoded.mesh.mesh_style = MeshStyle::Textured; - - assert(bounds.size() == encoded.mesh.meshlets.size()); - const auto *pbounds = bounds.data(); - for (auto &meshlet : encoded.mesh.meshlets) - { - meshlet.bound.center = vec3( - pbounds->center[0], pbounds->center[1], pbounds->center[2]); - meshlet.bound.radius = pbounds->radius; - meshlet.bound.cone_axis_cutoff = i8vec4( - pbounds->cone_axis_s8[0], pbounds->cone_axis_s8[1], - pbounds->cone_axis_s8[2], pbounds->cone_cutoff_s8); - } - return encoded; -} -} - static bool mesh_unroll_vertices(Mesh &mesh) { if (mesh.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST) @@ -890,9 +266,7 @@ void mesh_deduplicate_vertices(Mesh &mesh) mesh.index_type = VK_INDEX_TYPE_UINT32; mesh.indices.resize(index_buffer.size() * sizeof(uint32_t)); - size_t count = index_buffer.size(); - for (size_t i = 0; i < count; i++) - reinterpret_cast(mesh.indices.data())[i] = index_buffer[i]; + memcpy(mesh.indices.data(), index_buffer.data(), index_buffer.size() * sizeof(uint32_t)); mesh.count = unsigned(index_buffer.size()); } diff --git a/renderer/formats/scene_formats.hpp b/renderer/formats/scene_formats.hpp index 76288e42..18661805 100644 --- a/renderer/formats/scene_formats.hpp +++ b/renderer/formats/scene_formats.hpp @@ -248,86 +248,6 @@ struct SceneInformation const SceneNodes *scene_nodes = nullptr; }; -// Ensures that a Mesh has a TRIANGLE_LIST + uint32_t indices for easy consumption later (by meshlet encoding). -bool mesh_canonicalize_indices(Mesh &mesh); - -std::vector mesh_extract_position_snorm_exp(const Mesh &mesh); -std::vector mesh_extract_normal_tangent_oct8(const Mesh &mesh, MeshAttribute attr); -std::vector mesh_extract_uv_snorm_scale(const Mesh &mesh); - -namespace Meshlet -{ -static constexpr unsigned MaxU32Streams = 16; -static constexpr unsigned MaxElements = 256; -static constexpr unsigned MaxPrimitives = MaxElements; -static constexpr unsigned MaxVertices = MaxElements; - -struct Stream -{ - uint16_t predictor[4 * 2 + 2]; - uint32_t offset_from_base_u32; - uint16_t bitplane_meta[MaxElements / 32]; -}; - -struct MetadataGPU -{ - uint32_t base_vertex_offset; - uint8_t num_primitives_minus_1; - uint8_t num_attributes_minus_1; - uint16_t reserved; -}; - -struct Bound -{ - vec3 center; - float radius; - i8vec4 cone_axis_cutoff; -}; - -struct Metadata : MetadataGPU -{ - Bound bound; - Stream u32_streams[MaxU32Streams]; -}; - -enum class StreamType : uint8_t -{ - Primitive = 0, // R8G8B8X8_UINT - PositionE16, // RGB16_SSCALED * 2^(A16_SINT) - NormalOct8, // Octahedron encoding in RG8. - TangentOct8, // Octahedron encoding in RG8, sign bit in B8 (if not zero, +1, otherwise -1). - UV, // R16G16_SNORM * B16_SSCALED - BoneIndices, // RGBA8_UINT - BoneWeights, // RGB8_UNORM (sums to 1, A is implied). -}; - -enum class MeshStyle : uint32_t -{ - Wireframe = 0, // Primitive + Position - Untextured, // Wireframe + NormalOct8 - Textured, // Untextured + TangentOct8 + UV - Skinned // Textured + Bone* -}; - -struct CombinedMesh -{ - uint32_t stream_count; - uint32_t data_stream_offset_u32; - uint32_t data_stream_size_u32; - MeshStyle mesh_style; - - std::vector meshlets; -}; - -struct Encoded -{ - std::vector payload; - CombinedMesh mesh; -}; - -Encoded encode_mesh(const Mesh &mesh); -} - bool mesh_recompute_normals(Mesh &mesh); bool mesh_recompute_tangents(Mesh &mesh); bool mesh_renormalize_normals(Mesh &mesh); diff --git a/scene-export/CMakeLists.txt b/scene-export/CMakeLists.txt index b217e6f3..406ba829 100644 --- a/scene-export/CMakeLists.txt +++ b/scene-export/CMakeLists.txt @@ -6,6 +6,7 @@ add_granite_internal_lib(granite-scene-export gltf_export.cpp gltf_export.hpp rgtc_compressor.cpp rgtc_compressor.hpp tmx_parser.cpp tmx_parser.hpp + meshlet.cpp meshlet.hpp texture_utils.cpp texture_utils.hpp) target_include_directories(granite-scene-export PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/scene-export/meshlet.cpp b/scene-export/meshlet.cpp new file mode 100644 index 00000000..5ec12e13 --- /dev/null +++ b/scene-export/meshlet.cpp @@ -0,0 +1,744 @@ +/* Copyright (c) 2017-2023 Hans-Kristian Arntzen + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "meshlet.hpp" +#include "meshoptimizer.h" +#include "enum_cast.hpp" +#include "math.hpp" + +namespace Granite +{ +namespace Meshlet +{ +static constexpr unsigned MaxU32Streams = 16; +static constexpr unsigned MaxElements = 256; +static constexpr unsigned MaxPrimitives = MaxElements; +static constexpr unsigned MaxVertices = MaxElements; + +struct Stream +{ + uint16_t predictor[4 * 2 + 2]; + uint32_t offset_from_base_u32; + uint16_t bitplane_meta[MaxElements / 32]; +}; + +struct MetadataGPU +{ + uint32_t base_vertex_offset; + uint8_t num_primitives_minus_1; + uint8_t num_attributes_minus_1; + uint16_t reserved; +}; + +struct Bound +{ + vec3 center; + float radius; + i8vec4 cone_axis_cutoff; +}; + +struct Metadata : MetadataGPU +{ + Bound bound; + Stream u32_streams[MaxU32Streams]; +}; + +enum class StreamType : uint8_t +{ + Primitive = 0, // R8G8B8X8_UINT + PositionE16, // RGB16_SSCALED * 2^(A16_SINT) + NormalOct8, // Octahedron encoding in RG8. + TangentOct8, // Octahedron encoding in RG8, sign bit in B8 (if not zero, +1, otherwise -1). + UV, // R16G16_SNORM * B16_SSCALED + BoneIndices, // RGBA8_UINT + BoneWeights, // RGB8_UNORM (sums to 1, A is implied). +}; + +enum class MeshStyle : uint32_t +{ + Wireframe = 0, // Primitive + Position + Untextured, // Wireframe + NormalOct8 + Textured, // Untextured + TangentOct8 + UV + Skinned // Textured + Bone* +}; + +struct CombinedMesh +{ + uint32_t stream_count; + uint32_t data_stream_offset_u32; + uint32_t data_stream_size_u32; + MeshStyle mesh_style; + + std::vector meshlets; +}; + +struct Encoded +{ + std::vector payload; + CombinedMesh mesh; +}; + +struct Meshlet +{ + uint32_t offset; + uint32_t count; +}; + +struct PrimitiveAnalysisResult +{ + uint32_t num_primitives; + uint32_t num_vertices; +}; + +static bool mesh_canonicalize_indices(SceneFormats::Mesh &mesh) +{ + if (mesh.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST && + mesh.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP) + { + LOGE("Topology must be trilist or tristrip.\n"); + return false; + } + + std::vector unrolled_indices; + unrolled_indices.reserve(mesh.count); + + if (mesh.indices.empty()) + { + for (unsigned i = 0; i < mesh.count; i++) + unrolled_indices.push_back(i); + mesh.index_type = VK_INDEX_TYPE_UINT32; + } + else if (mesh.index_type == VK_INDEX_TYPE_UINT32) + { + auto *indices = reinterpret_cast(mesh.indices.data()); + for (unsigned i = 0; i < mesh.count; i++) + unrolled_indices.push_back(indices[i]); + } + else if (mesh.index_type == VK_INDEX_TYPE_UINT16) + { + auto *indices = reinterpret_cast(mesh.indices.data()); + for (unsigned i = 0; i < mesh.count; i++) + unrolled_indices.push_back(mesh.primitive_restart && indices[i] == UINT16_MAX ? UINT32_MAX : indices[i]); + } + else if (mesh.index_type == VK_INDEX_TYPE_UINT8_EXT) + { + auto *indices = reinterpret_cast(mesh.indices.data()); + for (unsigned i = 0; i < mesh.count; i++) + unrolled_indices.push_back(mesh.primitive_restart && indices[i] == UINT8_MAX ? UINT32_MAX : indices[i]); + } + + if (mesh.topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP) + { + std::vector unstripped_indices; + unstripped_indices.reserve(mesh.count * 3); + unsigned primitive_count_since_restart = 0; + + for (unsigned i = 2; i < mesh.count; i++) + { + bool emit_primitive = true; + if (mesh.primitive_restart && + unrolled_indices[i - 2] == UINT32_MAX && + unrolled_indices[i - 1] == UINT32_MAX && + unrolled_indices[i - 0] == UINT32_MAX) + { + emit_primitive = false; + primitive_count_since_restart = 0; + } + + if (emit_primitive) + { + unstripped_indices.push_back(unrolled_indices[i - 2]); + unstripped_indices.push_back(unrolled_indices[i - (1 ^ (primitive_count_since_restart & 1))]); + unstripped_indices.push_back(unrolled_indices[i - (primitive_count_since_restart & 1)]); + primitive_count_since_restart++; + } + } + + unrolled_indices = std::move(unstripped_indices); + mesh.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + } + + mesh.count = uint32_t(unrolled_indices.size()); + mesh.indices.resize(unrolled_indices.size() * sizeof(uint32_t)); + memcpy(mesh.indices.data(), unrolled_indices.data(), mesh.indices.size()); + return true; +} + +static i16vec4 encode_vec3_to_snorm_exp(vec3 v) +{ + vec3 vabs = abs(v); + float max_scale = max(max(vabs.x, vabs.y), vabs.z); + int max_scale_log2 = int(floor(log2(max_scale))); + int scale_log2 = 14 - max_scale_log2; + + // Maximum component should have range of [1, 2) since we use floor of log2, so scale with 2^14 instead of 15. + v.x = ldexpf(v.x, scale_log2); + v.y = ldexpf(v.y, scale_log2); + v.z = ldexpf(v.z, scale_log2); + v = clamp(round(v), vec3(-0x8000), vec3(0x7fff)); + + return i16vec4(i16vec3(v), int16_t(-scale_log2)); +} + +static i16vec3 encode_vec2_to_snorm_exp(vec2 v) +{ + vec2 vabs = abs(v); + float max_scale = max(vabs.x, vabs.y); + int max_scale_log2 = int(floor(log2(max_scale))); + int scale_log2 = 14 - max_scale_log2; + + // UVs are unorm scaled, don't need more accuracy than this. + // If all UVs are in range of [0, 1] space, we should get a constant exponent which aids compression. + scale_log2 = min(scale_log2, 15); + + // Maximum component should have range of [1, 2) since we use floor of log2, so scale with 2^14 instead of 15. + v.x = ldexpf(v.x, scale_log2); + v.y = ldexpf(v.y, scale_log2); + v = clamp(round(v), vec2(-0x8000), vec2(0x7fff)); + + return i16vec3(i16vec2(v), int16_t(-scale_log2)); +} + +static std::vector mesh_extract_position_snorm_exp(const SceneFormats::Mesh &mesh) +{ + std::vector encoded_positions; + std::vector positions; + + size_t num_positions = mesh.positions.size() / mesh.position_stride; + positions.resize(num_positions); + auto &layout = mesh.attribute_layout[Util::ecast(MeshAttribute::Position)]; + auto fmt = layout.format; + + if (fmt == VK_FORMAT_R32G32B32A32_SFLOAT || fmt == VK_FORMAT_R32G32B32_SFLOAT) + { + for (size_t i = 0; i < num_positions; i++) + memcpy(positions[i].data, mesh.positions.data() + i * mesh.position_stride + layout.offset, sizeof(float) * 3); + } + else if (fmt == VK_FORMAT_UNDEFINED) + return {}; + else + { + LOGE("Unexpected format %u.\n", fmt); + return {}; + } + + encoded_positions.reserve(positions.size()); + for (auto &pos : positions) + encoded_positions.push_back(encode_vec3_to_snorm_exp(pos)); + + return encoded_positions; +} + +static std::vector mesh_extract_normal_tangent_oct8(const SceneFormats::Mesh &mesh, MeshAttribute attr) +{ + std::vector encoded_attributes; + std::vector normals; + + auto &layout = mesh.attribute_layout[Util::ecast(attr)]; + auto fmt = layout.format; + + size_t num_attrs = mesh.attributes.size() / mesh.attribute_stride; + normals.resize(num_attrs); + + if (fmt == VK_FORMAT_R32G32B32_SFLOAT) + { + for (size_t i = 0; i < num_attrs; i++) + { + memcpy(normals[i].data, + mesh.attributes.data() + i * mesh.attribute_stride + layout.offset, + sizeof(float) * 3); + normals[i].w = 0.0f; + } + } + else if (fmt == VK_FORMAT_R32G32B32A32_SFLOAT) + { + for (size_t i = 0; i < num_attrs; i++) + { + memcpy(normals[i].data, + mesh.attributes.data() + i * mesh.attribute_stride + layout.offset, + sizeof(float) * 4); + } + } + else if (fmt == VK_FORMAT_UNDEFINED) + return {}; + else + { + LOGE("Unexpected format %u.\n", fmt); + return {}; + } + + encoded_attributes.resize(normals.size()); + meshopt_encodeFilterOct(encoded_attributes.data(), encoded_attributes.size(), + sizeof(i8vec4), 8, normals[0].data); + for (auto &n : encoded_attributes) + n.w = n.w <= 0 ? -1 : 0; + + return encoded_attributes; +} + +static i16vec4 encode_uv_to_snorm_scale(vec2 uv) +{ + // UVs tend to be in [0, 1] range. Readjust to use more of the available range. + uv = 2.0f * uv - 1.0f; + return i16vec4(encode_vec2_to_snorm_exp(uv), 0); +} + +static std::vector mesh_extract_uv_snorm_scale(const SceneFormats::Mesh &mesh) +{ + std::vector encoded_uvs; + std::vector uvs; + + size_t num_uvs = mesh.attributes.size() / mesh.attribute_stride; + uvs.resize(num_uvs); + auto &layout = mesh.attribute_layout[Util::ecast(MeshAttribute::UV)]; + auto fmt = layout.format; + + if (fmt == VK_FORMAT_R32G32_SFLOAT) + { + for (size_t i = 0; i < num_uvs; i++) + memcpy(uvs[i].data, mesh.attributes.data() + i * mesh.attribute_stride + layout.offset, sizeof(float) * 2); + } + else if (fmt == VK_FORMAT_R16G16_UNORM) + { + for (size_t i = 0; i < num_uvs; i++) + { + u16vec2 u16; + memcpy(u16.data, mesh.attributes.data() + i * mesh.attribute_stride + layout.offset, sizeof(uint16_t) * 2); + uvs[i] = vec2(u16) * float(1.0f / 0xffff); + } + } + else if (fmt == VK_FORMAT_UNDEFINED) + return {}; + else + { + LOGE("Unexpected format %u.\n", fmt); + return {}; + } + + encoded_uvs.reserve(uvs.size()); + for (auto &uv : uvs) + encoded_uvs.push_back(encode_uv_to_snorm_scale(uv)); + + return encoded_uvs; +} + +static vec3 decode_snorm_exp(i16vec4 p) +{ + vec3 result; + result.x = ldexpf(float(p.x), p.w); + result.y = ldexpf(float(p.y), p.w); + result.z = ldexpf(float(p.z), p.w); + return result; +} + +static PrimitiveAnalysisResult analyze_primitive_count(std::unordered_map &vertex_remap, + const uint32_t *index_buffer, uint32_t max_num_primitives) +{ + PrimitiveAnalysisResult result = {}; + uint32_t vertex_count = 0; + + // We can reference a maximum of 256 vertices. + vertex_remap.clear(); + + for (uint32_t i = 0; i < max_num_primitives; i++) + { + uint32_t index0 = index_buffer[3 * i + 0]; + uint32_t index1 = index_buffer[3 * i + 1]; + uint32_t index2 = index_buffer[3 * i + 2]; + + vertex_count = uint32_t(vertex_remap.size()); + + vertex_remap.insert({index0, uint32_t(vertex_remap.size())}); + vertex_remap.insert({index1, uint32_t(vertex_remap.size())}); + vertex_remap.insert({index2, uint32_t(vertex_remap.size())}); + + // If this primitive causes us to go out of bounds, reset. + if (vertex_remap.size() > MaxVertices) + { + max_num_primitives = i; + break; + } + + vertex_count = uint32_t(vertex_remap.size()); + } + + result.num_primitives = max_num_primitives; + result.num_vertices = vertex_count; + return result; +} + +// Analyze bits required to encode a signed delta. +static uvec4 compute_required_bits_unsigned(u8vec4 delta) +{ + uvec4 result; + for (unsigned i = 0; i < 4; i++) + { + uint32_t v = delta[i]; + result[i] = v == 0 ? 0 : (32 - leading_zeroes(v)); + } + return result; +} + +static uvec4 compute_required_bits_signed(u8vec4 delta) +{ + uvec4 result; + for (unsigned i = 0; i < 4; i++) + { + uint32_t v = delta[i]; + + if (v == 0) + { + result[i] = 0; + } else + { + if (v >= 0x80u) + v ^= 0xffu; + result[i] = v == 0 ? 1 : (33 - leading_zeroes(v)); + } + } + return result; +} + +static uint32_t extract_bit_plane(const uint8_t *bytes, unsigned bit_index) +{ + uint32_t u32 = 0; + for (unsigned i = 0; i < 32; i++) + u32 |= ((bytes[4 * i] >> bit_index) & 1u) << i; + return u32; +} + +static void find_linear_predictor(uint16_t *predictor, + const u8vec4 (&stream_buffer)[MaxElements], + unsigned num_elements) +{ + // Sign-extend since the deltas are considered to be signed ints. + ivec4 unrolled_data[MaxElements]; + for (unsigned i = 0; i < num_elements; i++) + unrolled_data[i] = ivec4(i8vec4(stream_buffer[i])); + + // Simple linear regression. + // Pilfered from: https://www.codesansar.com/numerical-methods/linear-regression-method-using-c-programming.htm + ivec4 x{0}, x2{0}, y{0}, xy{0}; + for (unsigned i = 0; i < num_elements; i++) + { + x += int(i); + x2 += int(i * i); + y += unrolled_data[i]; + xy += int(i) * unrolled_data[i]; + } + + int n = int(num_elements); + ivec4 b_denom = (n * x2 - x * x); + b_denom = select(b_denom, ivec4(1), equal(ivec4(0), b_denom)); + + // Encode in u8.8 fixed point. + ivec4 b = (ivec4(256) * (n * xy - x * y)) / b_denom; + ivec4 a = ((ivec4(256) * y - b * x)) / n; + + for (unsigned i = 0; i < 4; i++) + predictor[i] = uint16_t(a[i]); + for (unsigned i = 0; i < 4; i++) + predictor[4 + i] = uint16_t(b[i]); +} + +static void encode_stream(std::vector &out_payload_buffer, + Stream &stream, u8vec4 (&stream_buffer)[MaxElements], + unsigned num_elements) +{ + stream.offset_from_base_u32 = uint32_t(out_payload_buffer.size()); + + // Delta-encode + u8vec4 current_value; + if (num_elements > 1) + current_value = u8vec4(2) * stream_buffer[0] - stream_buffer[1]; + else + current_value = stream_buffer[0]; + u8vec4 bias_value = current_value; + + for (unsigned i = 0; i < num_elements; i++) + { + u8vec4 next_value = stream_buffer[i]; + stream_buffer[i] = next_value - current_value; + current_value = next_value; + } + + // Find optimal linear predictor. + find_linear_predictor(stream.predictor, stream_buffer, num_elements); + + // u8.8 fixed point. + auto base_predictor = u16vec4(stream.predictor[0], stream.predictor[1], stream.predictor[2], stream.predictor[3]); + auto linear_predictor = u16vec4(stream.predictor[4], stream.predictor[5], stream.predictor[6], stream.predictor[7]); + + for (unsigned i = 0; i < num_elements; i++) + { + // Only predict in-bounds elements, since we want all out of bounds elements to be encoded to 0 delta + // without having them affect the predictor. + stream_buffer[i] -= u8vec4((base_predictor + linear_predictor * uint16_t(i)) >> uint16_t(8)); + } + + for (unsigned i = num_elements; i < MaxElements; i++) + stream_buffer[i] = u8vec4(0); + + // Try to adjust the range such that it can fit in fewer bits. + // We can use the constant term in the linear predictor to nudge values in place. + i8vec4 lo(127); + i8vec4 hi(-128); + + for (unsigned i = 0; i < num_elements; i++) + { + lo = min(lo, i8vec4(stream_buffer[i])); + hi = max(hi, i8vec4(stream_buffer[i])); + } + + uvec4 full_bits = compute_required_bits_unsigned(u8vec4(hi - lo)); + u8vec4 target_lo_value = u8vec4(-((uvec4(1) << full_bits) >> 1u)); + u8vec4 bias = target_lo_value - u8vec4(lo); + + for (unsigned i = 0; i < num_elements; i++) + stream_buffer[i] += bias; + + for (unsigned i = 0; i < 4; i++) + stream.predictor[i] -= uint16_t(bias[i]) << 8; + + // Based on the linear predictor, it's possible that the encoded value in stream_buffer[0] becomes non-zero again. + // This is undesirable, since we can use the initial value to force a delta of 0 here, saving precious bits. + bias_value += stream_buffer[0]; + stream_buffer[0] = u8vec4(0); + + // Simple linear predictor, base equal elements[0], gradient = 0. + stream.predictor[8] = uint16_t((bias_value.y << 8) | bias_value.x); + stream.predictor[9] = uint16_t((bias_value.w << 8) | bias_value.z); + + // Encode 32 elements at once. + for (unsigned chunk_index = 0; chunk_index < MaxElements / 32; chunk_index++) + { + uvec4 required_bits = {}; + for (unsigned i = 0; i < 32; i++) + required_bits = max(required_bits, compute_required_bits_signed(stream_buffer[chunk_index * 32 + i])); + + // Encode bit counts. + stream.bitplane_meta[chunk_index] = uint16_t((required_bits.x << 0) | (required_bits.y << 4) | + (required_bits.z << 8) | (required_bits.w << 12)); + + for (unsigned i = 0; i < required_bits.x; i++) + out_payload_buffer.push_back(extract_bit_plane(&stream_buffer[chunk_index * 32][0], i)); + for (unsigned i = 0; i < required_bits.y; i++) + out_payload_buffer.push_back(extract_bit_plane(&stream_buffer[chunk_index * 32][1], i)); + for (unsigned i = 0; i < required_bits.z; i++) + out_payload_buffer.push_back(extract_bit_plane(&stream_buffer[chunk_index * 32][2], i)); + for (unsigned i = 0; i < required_bits.w; i++) + out_payload_buffer.push_back(extract_bit_plane(&stream_buffer[chunk_index * 32][3], i)); + } +} + +static void encode_mesh(Encoded &encoded, + const Meshlet *meshlets, size_t num_meshlets, + const uint32_t *index_buffer, uint32_t primitive_count, + const uint32_t *attributes, + unsigned num_u32_streams) +{ + encoded = {}; + auto &mesh = encoded.mesh; + mesh.stream_count = num_u32_streams + 1; + mesh.data_stream_offset_u32 = 0; // Can be adjusted in isolation later to pack multiple payload streams into one buffer. + mesh.meshlets.reserve(num_meshlets); + uint32_t base_vertex_offset = 0; + + std::unordered_map vbo_remap; + uint32_t primitive_index = 0; + + for (uint32_t meshlet_index = 0; meshlet_index < num_meshlets; meshlet_index++) + { + uint32_t primitives_to_process = min(primitive_count - primitive_index, meshlets[meshlet_index].count); + assert(primitives_to_process); + assert(primitive_count > primitive_index); + + PrimitiveAnalysisResult analysis_result = {}; + primitive_index = meshlets[meshlet_index].offset; + + analysis_result = analyze_primitive_count( + vbo_remap, index_buffer + 3 * primitive_index, + primitives_to_process); + + assert(analysis_result.num_primitives); + assert(analysis_result.num_vertices); + + primitives_to_process = analysis_result.num_primitives; + + Metadata meshlet = {}; + u8vec4 stream_buffer[MaxElements]; + + meshlet.base_vertex_offset = base_vertex_offset; + meshlet.num_primitives_minus_1 = analysis_result.num_primitives - 1; + meshlet.num_attributes_minus_1 = analysis_result.num_vertices - 1; + meshlet.reserved = 0; + + // Encode index buffer. + for (uint32_t i = 0; i < analysis_result.num_primitives; i++) + { + uint8_t i0 = vbo_remap[index_buffer[3 * (primitive_index + i) + 0]]; + uint8_t i1 = vbo_remap[index_buffer[3 * (primitive_index + i) + 1]]; + uint8_t i2 = vbo_remap[index_buffer[3 * (primitive_index + i) + 2]]; + //LOGI("Prim %u = { %u, %u, %u }\n", i, i0, i1, i2); + stream_buffer[i] = u8vec4(i0, i1, i2, 0); + } + + encode_stream(encoded.payload, meshlet.u32_streams[0], stream_buffer, analysis_result.num_primitives); + + // Handle spill region just in case. + uint64_t vbo_remapping[MaxVertices + 3]; + unsigned vbo_index = 0; + for (auto &v: vbo_remap) + { + assert(vbo_index < MaxVertices + 3); + vbo_remapping[vbo_index++] = (uint64_t(v.second) << 32) | v.first; + } + std::sort(vbo_remapping, vbo_remapping + vbo_index); + + for (uint32_t stream_index = 0; stream_index < num_u32_streams; stream_index++) + { + for (uint32_t i = 0; i < analysis_result.num_vertices; i++) + { + auto vertex_index = uint32_t(vbo_remapping[i]); + uint32_t payload = attributes[stream_index + num_u32_streams * vertex_index]; + memcpy(stream_buffer[i].data, &payload, sizeof(payload)); + } + + encode_stream(encoded.payload, meshlet.u32_streams[stream_index + 1], stream_buffer, + analysis_result.num_vertices); + } + + mesh.meshlets.push_back(meshlet); + base_vertex_offset += analysis_result.num_vertices; + primitive_index += primitives_to_process; + } + + mesh.data_stream_size_u32 = uint32_t(encoded.payload.size()); +} + +bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh) +{ + if (!mesh_canonicalize_indices(mesh)) + return false; + mesh_deduplicate_vertices(mesh); + + auto positions = mesh_extract_position_snorm_exp(mesh); + auto normals = mesh_extract_normal_tangent_oct8(mesh, MeshAttribute::Normal); + auto tangent = mesh_extract_normal_tangent_oct8(mesh, MeshAttribute::Tangent); + auto uv = mesh_extract_uv_snorm_scale(mesh); + + unsigned num_u32_streams = (sizeof(positions.front()) + sizeof(normals.front()) + + sizeof(tangent.front()) + sizeof(uv.front())) / sizeof(uint32_t); + + std::vector attributes(num_u32_streams * positions.size()); + uint32_t *ptr = attributes.data(); + for (size_t i = 0, n = positions.size(); i < n; i++) + { + memcpy(ptr, positions[i].data, sizeof(positions.front())); + ptr += sizeof(positions.front()) / sizeof(uint32_t); + memcpy(ptr, normals[i].data, sizeof(normals.front())); + ptr += sizeof(normals.front()) / sizeof(uint32_t); + memcpy(ptr, tangent[i].data, sizeof(tangent.front())); + ptr += sizeof(tangent.front()) / sizeof(uint32_t); + memcpy(ptr, uv[i].data, sizeof(uv.front())); + ptr += sizeof(uv.front()) / sizeof(uint32_t); + } + + // Use quantized position to guide the clustering. + std::vector position_buffer; + position_buffer.reserve(positions.size()); + for (auto &p: positions) + position_buffer.push_back(decode_snorm_exp(p)); + + constexpr unsigned max_vertices = 255; + constexpr unsigned max_primitives = 256; + std::vector optimized_index_buffer(mesh.count); + meshopt_optimizeVertexCache( + optimized_index_buffer.data(), reinterpret_cast(mesh.indices.data()), + mesh.count, positions.size()); + size_t num_meshlets = meshopt_buildMeshletsBound(mesh.count, max_vertices, max_primitives); + + std::vector out_vertex_redirection_buffer(num_meshlets * max_vertices); + std::vector local_index_buffer(num_meshlets * max_primitives * 3); + std::vector meshlets(num_meshlets); + + num_meshlets = meshopt_buildMeshlets(meshlets.data(), + out_vertex_redirection_buffer.data(), local_index_buffer.data(), + optimized_index_buffer.data(), mesh.count, + position_buffer[0].data, positions.size(), sizeof(vec3), + max_vertices, max_primitives, 0.75f); + + meshlets.resize(num_meshlets); + + std::vector out_meshlets; + std::vector out_index_buffer; + + out_meshlets.reserve(num_meshlets); + for (auto &meshlet: meshlets) + { + Meshlet m = {}; + m.offset = uint32_t(out_index_buffer.size()); + m.count = meshlet.triangle_count; + out_meshlets.push_back(m); + + auto *local_indices = local_index_buffer.data() + meshlet.triangle_offset; + for (unsigned i = 0; i < meshlet.triangle_count; i++) + { + out_index_buffer.emplace_back( + out_vertex_redirection_buffer[local_indices[3 * i + 0] + meshlet.vertex_offset], + out_vertex_redirection_buffer[local_indices[3 * i + 1] + meshlet.vertex_offset], + out_vertex_redirection_buffer[local_indices[3 * i + 2] + meshlet.vertex_offset]); + } + } + + std::vector bounds; + bounds.clear(); + bounds.reserve(num_meshlets); + for (auto &meshlet: out_meshlets) + { + auto bound = meshopt_computeClusterBounds( + out_index_buffer[meshlet.offset].data, meshlet.count * 3, + position_buffer[0].data, positions.size(), sizeof(vec3)); + bounds.push_back(bound); + } + + Encoded encoded; + encode_mesh(encoded, out_meshlets.data(), out_meshlets.size(), + out_index_buffer[0].data, out_index_buffer.size(), + attributes.data(), num_u32_streams); + encoded.mesh.mesh_style = MeshStyle::Textured; + + assert(bounds.size() == encoded.mesh.meshlets.size()); + const auto *pbounds = bounds.data(); + for (auto &meshlet: encoded.mesh.meshlets) + { + meshlet.bound.center = vec3( + pbounds->center[0], pbounds->center[1], pbounds->center[2]); + meshlet.bound.radius = pbounds->radius; + meshlet.bound.cone_axis_cutoff = i8vec4( + pbounds->cone_axis_s8[0], pbounds->cone_axis_s8[1], + pbounds->cone_axis_s8[2], pbounds->cone_cutoff_s8); + } + return true; +} +} +} diff --git a/scene-export/meshlet.hpp b/scene-export/meshlet.hpp new file mode 100644 index 00000000..ee91d056 --- /dev/null +++ b/scene-export/meshlet.hpp @@ -0,0 +1,35 @@ +/* Copyright (c) 2017-2023 Hans-Kristian Arntzen + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#pragma once + +#include +#include +#include "scene_formats.hpp" + +namespace Granite +{ +namespace Meshlet +{ +bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh); +} +} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 468e4e5a..06bc7e90 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -166,7 +166,7 @@ add_granite_offline_tool(meshopt-sandbox meshopt_sandbox.cpp) if (NOT ANDROID) target_compile_definitions(meshopt-sandbox PRIVATE ASSET_DIRECTORY=\"${CMAKE_CURRENT_SOURCE_DIR}/assets\") endif() -target_link_libraries(meshopt-sandbox PRIVATE meshoptimizer) +target_link_libraries(meshopt-sandbox PRIVATE granite-scene-export) add_granite_application(dgc-test dgc_test.cpp) if (NOT ANDROID) diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp index 71f74393..ef1e9acf 100644 --- a/tests/meshopt_sandbox.cpp +++ b/tests/meshopt_sandbox.cpp @@ -8,375 +8,15 @@ #include #include "bitops.hpp" #include "gltf.hpp" -#include "meshoptimizer.h" #include "global_managers_init.hpp" +#include "meshlet.hpp" #include #include using namespace Granite; - -static constexpr unsigned MaxU32Streams = 16; -static constexpr unsigned MaxElements = 256; -static constexpr unsigned MaxPrimitives = MaxElements; -static constexpr unsigned MaxVertices = MaxElements; - -struct MeshletStream -{ - uint16_t predictor[4 * 2 + 2]; - uint32_t offset_from_base_u32; - uint16_t bitplane_meta[MaxElements / 32]; -}; - -struct MeshletMetadataGPU -{ - uint32_t base_vertex_offset; - uint8_t num_primitives_minus_1; - uint8_t num_attributes_minus_1; - uint16_t reserved; -}; - -struct MeshletMetadata : MeshletMetadataGPU -{ - MeshletStream u32_streams[MaxU32Streams]; -}; - -enum class StreamType : uint8_t -{ - Primitive = 0, // R8G8B8X8_UINT - PositionE16, // RGB16_SSCALED * 2^(A16_SINT) - NormalOct8, // Octahedron encoding in RG8. - TangentOct8, // Octahedron encoding in RG8, sign bit in B8 (if not zero, +1, otherwise -1). - UV, // R16G16_SNORM * B16_SSCALED - BoneIndices, // RGBA8_UINT - BoneWeights, // RGB8_UNORM (sums to 1, A is implied). -}; - -enum class MeshStyle : uint32_t -{ - Wireframe = 0, // Primitive + Position - Untextured, // Wireframe + NormalOct8 - Textured, // Untextured + TangentOct8 + UV - Skinned // Textured + Bone* -}; - -struct MeshMetadata -{ - uint32_t stream_count; - uint32_t data_stream_offset_u32; - uint32_t data_stream_size_u32; - MeshStyle mesh_style; - - std::vector meshlets; -}; - -struct PrimitiveAnalysisResult -{ - uint32_t num_primitives; - uint32_t num_vertices; -}; - -struct Meshlet -{ - uint32_t offset; - uint32_t count; -}; - -static PrimitiveAnalysisResult analyze_primitive_count(std::unordered_map &vertex_remap, - const uint32_t *index_buffer, uint32_t max_num_primitives) -{ - PrimitiveAnalysisResult result = {}; - uint32_t vertex_count = 0; - - // We can reference a maximum of 256 vertices. - vertex_remap.clear(); - - for (uint32_t i = 0; i < max_num_primitives; i++) - { - uint32_t index0 = index_buffer[3 * i + 0]; - uint32_t index1 = index_buffer[3 * i + 1]; - uint32_t index2 = index_buffer[3 * i + 2]; - - vertex_count = uint32_t(vertex_remap.size()); - - vertex_remap.insert({ index0, uint32_t(vertex_remap.size()) }); - vertex_remap.insert({ index1, uint32_t(vertex_remap.size()) }); - vertex_remap.insert({ index2, uint32_t(vertex_remap.size()) }); - - // If this primitive causes us to go out of bounds, reset. - if (vertex_remap.size() > MaxVertices) - { - max_num_primitives = i; - break; - } - - vertex_count = uint32_t(vertex_remap.size()); - } - - result.num_primitives = max_num_primitives; - result.num_vertices = vertex_count; - return result; -} - -// Analyze bits required to encode a signed delta. -static uvec4 compute_required_bits_unsigned(u8vec4 delta) -{ - uvec4 result; - for (unsigned i = 0; i < 4; i++) - { - uint32_t v = delta[i]; - result[i] = v == 0 ? 0 : (32 - leading_zeroes(v)); - } - return result; -} - -static uvec4 compute_required_bits_signed(u8vec4 delta) -{ - uvec4 result; - for (unsigned i = 0; i < 4; i++) - { - uint32_t v = delta[i]; - - if (v == 0) - { - result[i] = 0; - } - else - { - if (v >= 0x80u) - v ^= 0xffu; - result[i] = v == 0 ? 1 : (33 - leading_zeroes(v)); - } - } - return result; -} - -static uint32_t extract_bit_plane(const uint8_t *bytes, unsigned bit_index) -{ - uint32_t u32 = 0; - for (unsigned i = 0; i < 32; i++) - u32 |= ((bytes[4 * i] >> bit_index) & 1u) << i; - return u32; -} - -static void find_linear_predictor(uint16_t *predictor, - const u8vec4 (&stream_buffer)[MaxElements], - unsigned num_elements) -{ - // Sign-extend since the deltas are considered to be signed ints. - ivec4 unrolled_data[MaxElements]; - for (unsigned i = 0; i < num_elements; i++) - unrolled_data[i] = ivec4(i8vec4(stream_buffer[i])); - - // Simple linear regression. - // Pilfered from: https://www.codesansar.com/numerical-methods/linear-regression-method-using-c-programming.htm - ivec4 x{0}, x2{0}, y{0}, xy{0}; - for (unsigned i = 0; i < num_elements; i++) - { - x += int(i); - x2 += int(i * i); - y += unrolled_data[i]; - xy += int(i) * unrolled_data[i]; - } - - int n = int(num_elements); - ivec4 b_denom = (n * x2 - x * x); - b_denom = select(b_denom, ivec4(1), equal(ivec4(0), b_denom)); - - // Encode in u8.8 fixed point. - ivec4 b = (ivec4(256) * (n * xy - x * y)) / b_denom; - ivec4 a = ((ivec4(256) * y - b * x)) / n; - - for (unsigned i = 0; i < 4; i++) - predictor[i] = uint16_t(a[i]); - for (unsigned i = 0; i < 4; i++) - predictor[4 + i] = uint16_t(b[i]); -} - -static void encode_stream(std::vector &out_payload_buffer, - MeshletStream &stream, u8vec4 (&stream_buffer)[MaxElements], - unsigned num_elements) -{ - stream.offset_from_base_u32 = uint32_t(out_payload_buffer.size()); - - // Delta-encode - u8vec4 current_value; - if (num_elements > 1) - current_value = u8vec4(2) * stream_buffer[0] - stream_buffer[1]; - else - current_value = stream_buffer[0]; - u8vec4 bias_value = current_value; - - for (unsigned i = 0; i < num_elements; i++) - { - u8vec4 next_value = stream_buffer[i]; - stream_buffer[i] = next_value - current_value; - current_value = next_value; - } - - // Find optimal linear predictor. - find_linear_predictor(stream.predictor, stream_buffer, num_elements); - - // u8.8 fixed point. - auto base_predictor = u16vec4(stream.predictor[0], stream.predictor[1], stream.predictor[2], stream.predictor[3]); - auto linear_predictor = u16vec4(stream.predictor[4], stream.predictor[5], stream.predictor[6], stream.predictor[7]); - - for (unsigned i = 0; i < num_elements; i++) - { - // Only predict in-bounds elements, since we want all out of bounds elements to be encoded to 0 delta - // without having them affect the predictor. - stream_buffer[i] -= u8vec4((base_predictor + linear_predictor * uint16_t(i)) >> uint16_t(8)); - } - - for (unsigned i = num_elements; i < MaxElements; i++) - stream_buffer[i] = u8vec4(0); - - // Try to adjust the range such that it can fit in fewer bits. - // We can use the constant term in the linear predictor to nudge values in place. - i8vec4 lo(127); - i8vec4 hi(-128); - - for (unsigned i = 0; i < num_elements; i++) - { - lo = min(lo, i8vec4(stream_buffer[i])); - hi = max(hi, i8vec4(stream_buffer[i])); - } - - uvec4 full_bits = compute_required_bits_unsigned(u8vec4(hi - lo)); - u8vec4 target_lo_value = u8vec4(-((uvec4(1) << full_bits) >> 1u)); - u8vec4 bias = target_lo_value - u8vec4(lo); - - for (unsigned i = 0; i < num_elements; i++) - stream_buffer[i] += bias; - - for (unsigned i = 0; i < 4; i++) - stream.predictor[i] -= uint16_t(bias[i]) << 8; - - // Based on the linear predictor, it's possible that the encoded value in stream_buffer[0] becomes non-zero again. - // This is undesirable, since we can use the initial value to force a delta of 0 here, saving precious bits. - bias_value += stream_buffer[0]; - stream_buffer[0] = u8vec4(0); - - // Simple linear predictor, base equal elements[0], gradient = 0. - stream.predictor[8] = uint16_t((bias_value.y << 8) | bias_value.x); - stream.predictor[9] = uint16_t((bias_value.w << 8) | bias_value.z); - - // Encode 32 elements at once. - for (unsigned chunk_index = 0; chunk_index < MaxElements / 32; chunk_index++) - { - uvec4 required_bits = {}; - for (unsigned i = 0; i < 32; i++) - required_bits = max(required_bits, compute_required_bits_signed(stream_buffer[chunk_index * 32 + i])); - - // Encode bit counts. - stream.bitplane_meta[chunk_index] = uint16_t((required_bits.x << 0) | (required_bits.y << 4) | - (required_bits.z << 8) | (required_bits.w << 12)); - - for (unsigned i = 0; i < required_bits.x; i++) - out_payload_buffer.push_back(extract_bit_plane(&stream_buffer[chunk_index * 32][0], i)); - for (unsigned i = 0; i < required_bits.y; i++) - out_payload_buffer.push_back(extract_bit_plane(&stream_buffer[chunk_index * 32][1], i)); - for (unsigned i = 0; i < required_bits.z; i++) - out_payload_buffer.push_back(extract_bit_plane(&stream_buffer[chunk_index * 32][2], i)); - for (unsigned i = 0; i < required_bits.w; i++) - out_payload_buffer.push_back(extract_bit_plane(&stream_buffer[chunk_index * 32][3], i)); - } -} - -static void encode_mesh(std::vector &out_payload_buffer, MeshMetadata &mesh, - const Meshlet *meshlets, size_t num_meshlets, - const uint32_t *index_buffer, uint32_t primitive_count, - const uint32_t *attributes, - unsigned num_u32_streams) -{ - mesh = {}; - mesh.stream_count = num_u32_streams + 1; - mesh.data_stream_offset_u32 = 0; // Can be adjusted in isolation later to pack multiple payload streams into one buffer. - mesh.meshlets.reserve((primitive_count + MaxPrimitives - 1) / MaxPrimitives); - uint32_t base_vertex_offset = 0; - - std::unordered_map vbo_remap; - uint32_t primitive_index = 0; - unsigned meshlet_index = 0; - bool done = false; - - while (!done) - { - uint32_t primitives_to_process = min(primitive_count - primitive_index, - num_meshlets ? meshlets[meshlet_index].count : MaxPrimitives); - - PrimitiveAnalysisResult analysis_result = {}; - if (num_meshlets) - primitive_index = meshlets[meshlet_index].offset; - - analysis_result = analyze_primitive_count( - vbo_remap, index_buffer + 3 * primitive_index, - primitives_to_process); - - primitives_to_process = analysis_result.num_primitives; - - MeshletMetadata meshlet = {}; - u8vec4 stream_buffer[MaxElements]; - - meshlet.base_vertex_offset = base_vertex_offset; - meshlet.num_primitives_minus_1 = analysis_result.num_primitives - 1; - meshlet.num_attributes_minus_1 = analysis_result.num_vertices - 1; - meshlet.reserved = 0; - - // Encode index buffer. - for (uint32_t i = 0; i < analysis_result.num_primitives; i++) - { - uint8_t i0 = vbo_remap[index_buffer[3 * (primitive_index + i) + 0]]; - uint8_t i1 = vbo_remap[index_buffer[3 * (primitive_index + i) + 1]]; - uint8_t i2 = vbo_remap[index_buffer[3 * (primitive_index + i) + 2]]; - //LOGI("Prim %u = { %u, %u, %u }\n", i, i0, i1, i2); - stream_buffer[i] = u8vec4(i0, i1, i2, 0); - } - - encode_stream(out_payload_buffer, meshlet.u32_streams[0], stream_buffer, analysis_result.num_primitives); - - // Handle spill region just in case. - uint64_t vbo_remapping[MaxVertices + 3]; - unsigned vbo_index = 0; - for (auto &v : vbo_remap) - { - assert(vbo_index < MaxVertices + 3); - vbo_remapping[vbo_index++] = (uint64_t(v.second) << 32) | v.first; - } - std::sort(vbo_remapping, vbo_remapping + vbo_index); - - for (uint32_t stream_index = 0; stream_index < num_u32_streams; stream_index++) - { - for (uint32_t i = 0; i < analysis_result.num_vertices; i++) - { - auto vertex_index = uint32_t(vbo_remapping[i]); - uint32_t payload = attributes[stream_index + num_u32_streams * vertex_index]; - memcpy(stream_buffer[i].data, &payload, sizeof(payload)); - } - - encode_stream(out_payload_buffer, meshlet.u32_streams[stream_index + 1], stream_buffer, - analysis_result.num_vertices); - } - - mesh.meshlets.push_back(meshlet); - base_vertex_offset += analysis_result.num_vertices; - - if (num_meshlets) - { - primitive_index += primitives_to_process; - meshlet_index++; - done = meshlet_index >= num_meshlets; - } - else - { - primitive_index += primitives_to_process; - done = primitive_index >= primitive_count; - } - } - - mesh.data_stream_size_u32 = uint32_t(out_payload_buffer.size()); -} +using namespace Granite::Meshlet; static void decode_mesh_setup_buffers( - std::vector &out_index_buffer, std::vector &out_u32_stream, const MeshMetadata &mesh) + std::vector &out_index_buffer, std::vector &out_u32_stream, const CombinedMesh &mesh) { assert(mesh.stream_count > 1); @@ -396,7 +36,7 @@ static void decode_mesh_setup_buffers( } static void decode_mesh(std::vector &out_index_buffer, std::vector &out_u32_stream, - const std::vector &payload, const MeshMetadata &mesh) + const std::vector &payload, const CombinedMesh &mesh) { decode_mesh_setup_buffers(out_index_buffer, out_u32_stream, mesh); out_index_buffer.clear(); @@ -476,7 +116,7 @@ static void decode_mesh(std::vector &out_index_buffer, std::vector &out_index_buffer, std::vector &out_u32_stream, - const std::vector &payload, const MeshMetadata &mesh) + const std::vector &payload, const CombinedMesh &mesh) { decode_mesh_setup_buffers(out_index_buffer, out_u32_stream, mesh); const uint32_t u32_stride = mesh.stream_count - 1; @@ -485,19 +125,19 @@ static void decode_mesh_gpu( buf_info.domain = Vulkan::BufferDomain::LinkedDeviceHost; buf_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; - std::vector meshlet_metas; + std::vector meshlet_metas; meshlet_metas.reserve(mesh.meshlets.size()); for (auto &meshlet : mesh.meshlets) meshlet_metas.push_back(meshlet); - buf_info.size = mesh.meshlets.size() * sizeof(MeshletMetadataGPU); + buf_info.size = mesh.meshlets.size() * sizeof(MetadataGPU); auto meshlet_meta_buffer = dev.create_buffer(buf_info, meshlet_metas.data()); - std::vector meshlet_streams; + std::vector meshlet_streams; meshlet_streams.reserve(mesh.meshlets.size() * mesh.stream_count); for (auto &meshlet : mesh.meshlets) for (unsigned i = 0; i < mesh.stream_count; i++) meshlet_streams.push_back(meshlet.u32_streams[i]); - buf_info.size = meshlet_streams.size() * sizeof(MeshletStream); + buf_info.size = meshlet_streams.size() * sizeof(Stream); auto meshlet_stream_buffer = dev.create_buffer(buf_info, meshlet_streams.data()); buf_info.size = payload.size() * sizeof(uint32_t); @@ -623,99 +263,6 @@ static bool validate_mesh_decode(const std::vector &decoded_index_buff return true; } -static bool convert_meshlets(std::vector &out_meshlets, std::vector &bounds, - std::vector &out_index_buffer, const SceneFormats::Mesh &mesh) -{ - if (mesh.indices.empty() || mesh.primitive_restart || mesh.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST) - return false; - - size_t vertex_count = mesh.positions.size() / mesh.position_stride; - std::vector position_buffer(vertex_count); - std::vector index_buffer(mesh.count); - - if (mesh.index_type == VK_INDEX_TYPE_UINT32) - { - auto *indices = reinterpret_cast(mesh.indices.data()); - for (unsigned i = 0; i < mesh.count; i++) - index_buffer[i] = indices[i]; - } - else if (mesh.index_type == VK_INDEX_TYPE_UINT16) - { - auto *indices = reinterpret_cast(mesh.indices.data()); - for (unsigned i = 0; i < mesh.count; i++) - index_buffer[i] = indices[i]; - } - else if (mesh.index_type == VK_INDEX_TYPE_UINT8_EXT) - { - auto *indices = reinterpret_cast(mesh.indices.data()); - for (unsigned i = 0; i < mesh.count; i++) - index_buffer[i] = indices[i]; - } - else - return false; - - switch (mesh.attribute_layout[Util::ecast(MeshAttribute::Position)].format) - { - case VK_FORMAT_R32G32B32A32_SFLOAT: - case VK_FORMAT_R32G32B32_SFLOAT: - for (unsigned i = 0; i < vertex_count; i++) - memcpy(position_buffer[i].data, mesh.positions.data() + mesh.position_stride * i, sizeof(float) * 3); - break; - - default: - return false; - } - - constexpr unsigned max_vertices = 255; - constexpr unsigned max_primitives = 256; - std::vector optimized_index_buffer(index_buffer.size()); - meshopt_optimizeVertexCache(optimized_index_buffer.data(), index_buffer.data(), mesh.count, vertex_count); - index_buffer = std::move(optimized_index_buffer); - size_t num_meshlets = meshopt_buildMeshletsBound(mesh.count, max_vertices, max_primitives); - - std::vector out_vertex_redirection_buffer(num_meshlets * max_vertices); - std::vector local_index_buffer(num_meshlets * max_primitives * 3); - std::vector meshlets(num_meshlets); - - num_meshlets = meshopt_buildMeshlets(meshlets.data(), - out_vertex_redirection_buffer.data(), local_index_buffer.data(), - index_buffer.data(), mesh.count, - position_buffer[0].data, vertex_count, sizeof(vec3), - max_vertices, max_primitives, 1.0f); - - meshlets.resize(num_meshlets); - - out_meshlets.clear(); - out_meshlets.reserve(num_meshlets); - for (auto &meshlet : meshlets) - { - Meshlet m = {}; - m.offset = uint32_t(out_index_buffer.size()); - m.count = meshlet.triangle_count; - out_meshlets.push_back(m); - - auto *local_indices = index_buffer.data() + meshlet.triangle_offset; - for (unsigned i = 0; i < meshlet.triangle_count; i++) - { - out_index_buffer.emplace_back( - out_vertex_redirection_buffer[local_indices[3 * i + 0] + meshlet.vertex_offset], - out_vertex_redirection_buffer[local_indices[3 * i + 1] + meshlet.vertex_offset], - out_vertex_redirection_buffer[local_indices[3 * i + 2] + meshlet.vertex_offset]); - } - } - - bounds.clear(); - bounds.reserve(num_meshlets); - for (auto &meshlet : out_meshlets) - { - auto bound = meshopt_computeClusterBounds(out_index_buffer[0].data, meshlet.count * 3, - position_buffer[0].data, vertex_count, sizeof(vec3)); - bounds.push_back(bound); - } - - return true; -} - int main(int argc, char *argv[]) { if (argc != 2) @@ -741,12 +288,7 @@ int main(int argc, char *argv[]) #if 1 { - auto mesh = parser.get_meshes().front(); - SceneFormats::mesh_canonicalize_indices(mesh); - auto positions = SceneFormats::mesh_extract_position_snorm_exp(mesh); - auto normals = SceneFormats::mesh_extract_normal_tangent_oct8(mesh, MeshAttribute::Normal); - auto tangent = SceneFormats::mesh_extract_normal_tangent_oct8(mesh, MeshAttribute::Tangent); - auto uv = SceneFormats::mesh_extract_uv_snorm_scale(mesh); + Meshlet::export_mesh_to_meshlet("/tmp/export.mesh", parser.get_meshes().front()); } #endif From 4e511314187a7a786be523672fe3b7d93cbf320a Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sun, 30 Jul 2023 12:33:51 +0200 Subject: [PATCH 29/71] Export meshlet to file. --- scene-export/meshlet.cpp | 82 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 77 insertions(+), 5 deletions(-) diff --git a/scene-export/meshlet.cpp b/scene-export/meshlet.cpp index 5ec12e13..072aa291 100644 --- a/scene-export/meshlet.cpp +++ b/scene-export/meshlet.cpp @@ -24,6 +24,7 @@ #include "meshoptimizer.h" #include "enum_cast.hpp" #include "math.hpp" +#include "filesystem.hpp" namespace Granite { @@ -84,8 +85,6 @@ enum class MeshStyle : uint32_t struct CombinedMesh { uint32_t stream_count; - uint32_t data_stream_offset_u32; - uint32_t data_stream_size_u32; MeshStyle mesh_style; std::vector meshlets; @@ -559,7 +558,6 @@ static void encode_mesh(Encoded &encoded, encoded = {}; auto &mesh = encoded.mesh; mesh.stream_count = num_u32_streams + 1; - mesh.data_stream_offset_u32 = 0; // Can be adjusted in isolation later to pack multiple payload streams into one buffer. mesh.meshlets.reserve(num_meshlets); uint32_t base_vertex_offset = 0; @@ -631,8 +629,81 @@ static void encode_mesh(Encoded &encoded, base_vertex_offset += analysis_result.num_vertices; primitive_index += primitives_to_process; } +} + +static bool export_encoded_mesh(const std::string &path, const Encoded &encoded) +{ + size_t required_size = 0; + static const char magic[8] = { 'M', 'E', 'S', 'H', 'L', 'E', 'T', '1' }; + + struct MeshletHeader + { + MeshStyle style; + uint32_t u32_stream_count; + uint32_t meshlet_count; + uint32_t payload_size_words; + } header = {}; + + header.style = encoded.mesh.mesh_style; + header.u32_stream_count = encoded.mesh.stream_count; + header.meshlet_count = uint32_t(encoded.mesh.meshlets.size()); + header.payload_size_words = uint32_t(encoded.payload.size()); + + required_size += sizeof(magic); + required_size += sizeof(MeshletHeader); + + // Per-meshlet metadata. + required_size += encoded.mesh.meshlets.size() * sizeof(MetadataGPU); + + // Bounds. + required_size += encoded.mesh.meshlets.size() * sizeof(Bound); + + // Stream metadata. + required_size += encoded.mesh.stream_count * encoded.mesh.meshlets.size() * sizeof(Stream); + + // Payload. + required_size += encoded.payload.size() * sizeof(uint32_t); + + auto file = GRANITE_FILESYSTEM()->open(path, FileMode::WriteOnly); + if (!file) + return false; + + auto mapping = file->map_write(required_size); + if (!mapping) + return false; + + auto *ptr = mapping->mutable_data(); - mesh.data_stream_size_u32 = uint32_t(encoded.payload.size()); + memcpy(ptr, magic, sizeof(magic)); + ptr += sizeof(magic); + memcpy(ptr, &header, sizeof(header)); + ptr += sizeof(header); + + for (uint32_t i = 0; i < header.meshlet_count; i++) + { + auto &gpu = static_cast(encoded.mesh.meshlets[i]); + memcpy(ptr, &gpu, sizeof(gpu)); + ptr += sizeof(gpu); + } + + for (uint32_t i = 0; i < header.meshlet_count; i++) + { + auto &bound = encoded.mesh.meshlets[i].bound; + memcpy(ptr, &bound, sizeof(bound)); + ptr += sizeof(bound); + } + + for (uint32_t i = 0; i < header.meshlet_count; i++) + { + for (uint32_t j = 0; j < header.u32_stream_count; j++) + { + memcpy(ptr, &encoded.mesh.meshlets[i].u32_streams[j], sizeof(Stream)); + ptr += sizeof(Stream); + } + } + + memcpy(ptr, encoded.payload.data(), encoded.payload.size() * sizeof(uint32_t)); + return true; } bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh) @@ -738,7 +809,8 @@ bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh) pbounds->cone_axis_s8[0], pbounds->cone_axis_s8[1], pbounds->cone_axis_s8[2], pbounds->cone_cutoff_s8); } - return true; + + return export_encoded_mesh(path, encoded); } } } From e4807e2c1fd7b8a77f9e89e1f10fdb40e5ad0668 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sun, 30 Jul 2023 13:57:15 +0200 Subject: [PATCH 30/71] Add decoder. --- renderer/CMakeLists.txt | 1 + renderer/formats/meshlet.cpp | 87 ++++++ renderer/formats/meshlet.hpp | 106 ++++++++ scene-export/CMakeLists.txt | 2 +- .../{meshlet.cpp => meshlet_export.cpp} | 76 +----- .../{meshlet.hpp => meshlet_export.hpp} | 0 tests/meshopt_sandbox.cpp | 257 +++--------------- 7 files changed, 251 insertions(+), 278 deletions(-) create mode 100644 renderer/formats/meshlet.cpp create mode 100644 renderer/formats/meshlet.hpp rename scene-export/{meshlet.cpp => meshlet_export.cpp} (93%) rename scene-export/{meshlet.hpp => meshlet_export.hpp} (100%) diff --git a/renderer/CMakeLists.txt b/renderer/CMakeLists.txt index ca573aa0..ca6db955 100644 --- a/renderer/CMakeLists.txt +++ b/renderer/CMakeLists.txt @@ -38,6 +38,7 @@ add_granite_internal_lib(granite-renderer lights/volumetric_diffuse.hpp lights/volumetric_diffuse.cpp lights/decal_volume.hpp lights/decal_volume.cpp formats/scene_formats.hpp formats/scene_formats.cpp + formats/meshlet.hpp formats/meshlet.cpp formats/gltf.hpp formats/gltf.cpp scene_loader.cpp scene_loader.hpp ocean.hpp ocean.cpp diff --git a/renderer/formats/meshlet.cpp b/renderer/formats/meshlet.cpp new file mode 100644 index 00000000..adcc22a4 --- /dev/null +++ b/renderer/formats/meshlet.cpp @@ -0,0 +1,87 @@ +/* Copyright (c) 2017-2023 Hans-Kristian Arntzen + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "meshlet.hpp" + +namespace Granite +{ +namespace SceneFormats +{ +namespace Meshlet +{ +MeshView create_mesh_view(const FileMapping &mapping) +{ + MeshView view = {}; + + if (mapping.get_size() < sizeof(magic) + sizeof(FormatHeader)) + { + LOGE("MESHLET1 file too small.\n"); + return view; + } + + auto *ptr = mapping.data(); + auto *end_ptr = ptr + mapping.get_size(); + + if (memcmp(ptr, magic, sizeof(magic)) != 0) + { + LOGE("Invalid MESHLET1 magic.\n"); + return {}; + } + + ptr += sizeof(magic); + + view.format_header = reinterpret_cast(ptr); + ptr += sizeof(*view.format_header); + + if (end_ptr - ptr < ptrdiff_t(view.format_header->meshlet_count * sizeof(Header))) + return {}; + view.headers = reinterpret_cast(ptr); + ptr += view.format_header->meshlet_count * sizeof(Header); + + if (end_ptr - ptr < ptrdiff_t(view.format_header->meshlet_count * sizeof(Bound))) + return {}; + view.bounds = reinterpret_cast(ptr); + ptr += view.format_header->meshlet_count * sizeof(Bound); + + if (end_ptr - ptr < ptrdiff_t(view.format_header->meshlet_count * view.format_header->u32_stream_count * sizeof(Stream))) + return {}; + view.streams = reinterpret_cast(ptr); + ptr += view.format_header->meshlet_count * view.format_header->u32_stream_count * sizeof(Stream); + + if (!view.format_header->payload_size_words) + return {}; + + if (end_ptr - ptr < ptrdiff_t(view.format_header->payload_size_words * sizeof(uint32_t))) + return {}; + view.payload = reinterpret_cast(ptr); + + for (uint32_t i = 0, n = view.format_header->meshlet_count; i < n; i++) + { + view.total_primitives += view.headers[i].num_primitives_minus_1 + 1; + view.total_vertices += view.headers[i].num_attributes_minus_1 + 1; + } + + return view; +} +} +} +} diff --git a/renderer/formats/meshlet.hpp b/renderer/formats/meshlet.hpp new file mode 100644 index 00000000..752fbb83 --- /dev/null +++ b/renderer/formats/meshlet.hpp @@ -0,0 +1,106 @@ +/* Copyright (c) 2017-2023 Hans-Kristian Arntzen + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#pragma once + +#include +#include "filesystem.hpp" +#include "math.hpp" + +namespace Granite +{ +namespace SceneFormats +{ +// MESHLET1 format. +namespace Meshlet +{ +static constexpr unsigned MaxU32Streams = 16; +static constexpr unsigned MaxElements = 256; +static constexpr unsigned MaxPrimitives = MaxElements; +static constexpr unsigned MaxVertices = MaxElements; + +struct Stream +{ + uint16_t predictor[4 * 2 + 2]; + uint32_t offset_from_base_u32; + uint16_t bitplane_meta[MaxElements / 32]; +}; + +struct Header +{ + uint32_t base_vertex_offset; + uint8_t num_primitives_minus_1; + uint8_t num_attributes_minus_1; + uint16_t reserved; +}; + +struct Bound +{ + vec3 center; + float radius; + i8vec4 cone_axis_cutoff; +}; + +enum class StreamType +{ + Primitive = 0, // R8G8B8X8_UINT + PositionE16, // RGB16_SSCALED * 2^(A16_SINT) + NormalOct8, // Octahedron encoding in RG8. + TangentOct8, // Octahedron encoding in RG8, sign bit in B8 (if not zero, +1, otherwise -1). + UV, // R16G16_SNORM * B16_SSCALED + BoneIndices, // RGBA8_UINT + BoneWeights, // RGB8_UNORM (sums to 1, A is implied). +}; + +enum class MeshStyle : uint32_t +{ + Wireframe = 0, // Primitive + Position + Untextured, // Wireframe + NormalOct8 + Textured, // Untextured + TangentOct8 + UV + Skinned // Textured + Bone* +}; + +struct FormatHeader +{ + MeshStyle style; + uint32_t u32_stream_count; + uint32_t meshlet_count; + uint32_t payload_size_words; +}; + +struct MeshView +{ + const FormatHeader *format_header; + const Header *headers; + const Bound *bounds; + const Stream *streams; + const uint32_t *payload; + uint32_t total_primitives; + uint32_t total_vertices; +}; + +static const char magic[8] = { 'M', 'E', 'S', 'H', 'L', 'E', 'T', '1' }; + +MeshView create_mesh_view(const FileMapping &mapping); +} +} +} diff --git a/scene-export/CMakeLists.txt b/scene-export/CMakeLists.txt index 406ba829..fa741831 100644 --- a/scene-export/CMakeLists.txt +++ b/scene-export/CMakeLists.txt @@ -6,7 +6,7 @@ add_granite_internal_lib(granite-scene-export gltf_export.cpp gltf_export.hpp rgtc_compressor.cpp rgtc_compressor.hpp tmx_parser.cpp tmx_parser.hpp - meshlet.cpp meshlet.hpp + meshlet_export.cpp meshlet_export.hpp texture_utils.cpp texture_utils.hpp) target_include_directories(granite-scene-export PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/scene-export/meshlet.cpp b/scene-export/meshlet_export.cpp similarity index 93% rename from scene-export/meshlet.cpp rename to scene-export/meshlet_export.cpp index 072aa291..99422f09 100644 --- a/scene-export/meshlet.cpp +++ b/scene-export/meshlet_export.cpp @@ -20,68 +20,25 @@ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "meshlet.hpp" +#include "meshlet_export.hpp" #include "meshoptimizer.h" #include "enum_cast.hpp" #include "math.hpp" #include "filesystem.hpp" +#include "meshlet.hpp" namespace Granite { namespace Meshlet { -static constexpr unsigned MaxU32Streams = 16; -static constexpr unsigned MaxElements = 256; -static constexpr unsigned MaxPrimitives = MaxElements; -static constexpr unsigned MaxVertices = MaxElements; - -struct Stream -{ - uint16_t predictor[4 * 2 + 2]; - uint32_t offset_from_base_u32; - uint16_t bitplane_meta[MaxElements / 32]; -}; - -struct MetadataGPU -{ - uint32_t base_vertex_offset; - uint8_t num_primitives_minus_1; - uint8_t num_attributes_minus_1; - uint16_t reserved; -}; - -struct Bound -{ - vec3 center; - float radius; - i8vec4 cone_axis_cutoff; -}; +using namespace ::Granite::SceneFormats::Meshlet; -struct Metadata : MetadataGPU +struct Metadata : Header { Bound bound; Stream u32_streams[MaxU32Streams]; }; -enum class StreamType : uint8_t -{ - Primitive = 0, // R8G8B8X8_UINT - PositionE16, // RGB16_SSCALED * 2^(A16_SINT) - NormalOct8, // Octahedron encoding in RG8. - TangentOct8, // Octahedron encoding in RG8, sign bit in B8 (if not zero, +1, otherwise -1). - UV, // R16G16_SNORM * B16_SSCALED - BoneIndices, // RGBA8_UINT - BoneWeights, // RGB8_UNORM (sums to 1, A is implied). -}; - -enum class MeshStyle : uint32_t -{ - Wireframe = 0, // Primitive + Position - Untextured, // Wireframe + NormalOct8 - Textured, // Untextured + TangentOct8 + UV - Skinned // Textured + Bone* -}; - struct CombinedMesh { uint32_t stream_count; @@ -570,10 +527,9 @@ static void encode_mesh(Encoded &encoded, assert(primitives_to_process); assert(primitive_count > primitive_index); - PrimitiveAnalysisResult analysis_result = {}; primitive_index = meshlets[meshlet_index].offset; - analysis_result = analyze_primitive_count( + auto analysis_result = analyze_primitive_count( vbo_remap, index_buffer + 3 * primitive_index, primitives_to_process); @@ -596,7 +552,6 @@ static void encode_mesh(Encoded &encoded, uint8_t i0 = vbo_remap[index_buffer[3 * (primitive_index + i) + 0]]; uint8_t i1 = vbo_remap[index_buffer[3 * (primitive_index + i) + 1]]; uint8_t i2 = vbo_remap[index_buffer[3 * (primitive_index + i) + 2]]; - //LOGI("Prim %u = { %u, %u, %u }\n", i, i0, i1, i2); stream_buffer[i] = u8vec4(i0, i1, i2, 0); } @@ -634,15 +589,8 @@ static void encode_mesh(Encoded &encoded, static bool export_encoded_mesh(const std::string &path, const Encoded &encoded) { size_t required_size = 0; - static const char magic[8] = { 'M', 'E', 'S', 'H', 'L', 'E', 'T', '1' }; - struct MeshletHeader - { - MeshStyle style; - uint32_t u32_stream_count; - uint32_t meshlet_count; - uint32_t payload_size_words; - } header = {}; + FormatHeader header = {}; header.style = encoded.mesh.mesh_style; header.u32_stream_count = encoded.mesh.stream_count; @@ -650,10 +598,10 @@ static bool export_encoded_mesh(const std::string &path, const Encoded &encoded) header.payload_size_words = uint32_t(encoded.payload.size()); required_size += sizeof(magic); - required_size += sizeof(MeshletHeader); + required_size += sizeof(FormatHeader); // Per-meshlet metadata. - required_size += encoded.mesh.meshlets.size() * sizeof(MetadataGPU); + required_size += encoded.mesh.meshlets.size() * sizeof(Header); // Bounds. required_size += encoded.mesh.meshlets.size() * sizeof(Bound); @@ -662,7 +610,8 @@ static bool export_encoded_mesh(const std::string &path, const Encoded &encoded) required_size += encoded.mesh.stream_count * encoded.mesh.meshlets.size() * sizeof(Stream); // Payload. - required_size += encoded.payload.size() * sizeof(uint32_t); + // Need a padding word to speed up decoder. + required_size += (encoded.payload.size() + 1) * sizeof(uint32_t); auto file = GRANITE_FILESYSTEM()->open(path, FileMode::WriteOnly); if (!file) @@ -681,7 +630,7 @@ static bool export_encoded_mesh(const std::string &path, const Encoded &encoded) for (uint32_t i = 0; i < header.meshlet_count; i++) { - auto &gpu = static_cast(encoded.mesh.meshlets[i]); + auto &gpu = static_cast(encoded.mesh.meshlets[i]); memcpy(ptr, &gpu, sizeof(gpu)); ptr += sizeof(gpu); } @@ -703,6 +652,8 @@ static bool export_encoded_mesh(const std::string &path, const Encoded &encoded) } memcpy(ptr, encoded.payload.data(), encoded.payload.size() * sizeof(uint32_t)); + ptr += encoded.payload.size() * sizeof(uint32_t); + memset(ptr, 0, sizeof(uint32_t)); return true; } @@ -710,7 +661,6 @@ bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh) { if (!mesh_canonicalize_indices(mesh)) return false; - mesh_deduplicate_vertices(mesh); auto positions = mesh_extract_position_snorm_exp(mesh); auto normals = mesh_extract_normal_tangent_oct8(mesh, MeshAttribute::Normal); diff --git a/scene-export/meshlet.hpp b/scene-export/meshlet_export.hpp similarity index 100% rename from scene-export/meshlet.hpp rename to scene-export/meshlet_export.hpp diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp index ef1e9acf..70680d74 100644 --- a/tests/meshopt_sandbox.cpp +++ b/tests/meshopt_sandbox.cpp @@ -9,47 +9,40 @@ #include "bitops.hpp" #include "gltf.hpp" #include "global_managers_init.hpp" +#include "meshlet_export.hpp" #include "meshlet.hpp" #include #include using namespace Granite; -using namespace Granite::Meshlet; static void decode_mesh_setup_buffers( - std::vector &out_index_buffer, std::vector &out_u32_stream, const CombinedMesh &mesh) + std::vector &out_index_buffer, std::vector &out_u32_stream, + const SceneFormats::Meshlet::MeshView &mesh) { - assert(mesh.stream_count > 1); - - unsigned index_count = 0; - unsigned attr_count = 0; - - for (auto &meshlet : mesh.meshlets) - { - index_count += (meshlet.num_primitives_minus_1 + 1) * 3; - attr_count += meshlet.num_attributes_minus_1 + 1; - } + assert(mesh.format_header->u32_stream_count > 1); out_index_buffer.clear(); out_u32_stream.clear(); - out_index_buffer.resize(index_count); - out_u32_stream.resize(attr_count * (mesh.stream_count - 1)); + out_index_buffer.resize(mesh.total_primitives * 3); + out_u32_stream.resize(mesh.total_vertices * (mesh.format_header->u32_stream_count - 1)); } static void decode_mesh(std::vector &out_index_buffer, std::vector &out_u32_stream, - const std::vector &payload, const CombinedMesh &mesh) + const SceneFormats::Meshlet::MeshView &mesh) { decode_mesh_setup_buffers(out_index_buffer, out_u32_stream, mesh); out_index_buffer.clear(); - const unsigned u32_stride = mesh.stream_count - 1; + const unsigned u32_stride = mesh.format_header->u32_stream_count - 1; - for (auto &meshlet : mesh.meshlets) + for (uint32_t meshlet_index = 0; meshlet_index < mesh.format_header->meshlet_count; meshlet_index++) { - for (unsigned stream_index = 0; stream_index < mesh.stream_count; stream_index++) + auto &meshlet = mesh.headers[meshlet_index]; + for (unsigned stream_index = 0; stream_index < mesh.format_header->u32_stream_count; stream_index++) { - auto &stream = meshlet.u32_streams[stream_index]; - const uint32_t *pdata = payload.data() + mesh.data_stream_offset_u32 + stream.offset_from_base_u32; + auto &stream = mesh.streams[meshlet_index * mesh.format_header->u32_stream_count + stream_index]; + const uint32_t *pdata = mesh.payload + stream.offset_from_base_u32; - u8vec4 deltas[MaxElements] = {}; + u8vec4 deltas[SceneFormats::Meshlet::MaxElements] = {}; const u16vec4 base_predictor = u16vec4( stream.predictor[0], stream.predictor[1], stream.predictor[2], stream.predictor[3]); @@ -59,7 +52,7 @@ static void decode_mesh(std::vector &out_index_buffer, std::vector> u16vec4(0, 8, 0, 8)); - for (unsigned chunk = 0; chunk < (MaxElements / 32); chunk++) + for (unsigned chunk = 0; chunk < (SceneFormats::Meshlet::MaxElements / 32); chunk++) { auto bits_per_u8 = (uvec4(stream.bitplane_meta[chunk]) >> uvec4(0, 4, 8, 12)) & 0xfu; uvec4 bitplanes[8] = {}; @@ -86,11 +79,11 @@ static void decode_mesh(std::vector &out_index_buffer, std::vector> u16vec4(8)); // Resolve deltas. - for (unsigned i = 1; i < MaxElements; i++) + for (unsigned i = 1; i < SceneFormats::Meshlet::MaxElements; i++) deltas[i] += deltas[i - 1]; if (stream_index == 0) @@ -116,34 +109,23 @@ static void decode_mesh(std::vector &out_index_buffer, std::vector &out_index_buffer, std::vector &out_u32_stream, - const std::vector &payload, const CombinedMesh &mesh) + const SceneFormats::Meshlet::MeshView &mesh) { decode_mesh_setup_buffers(out_index_buffer, out_u32_stream, mesh); - const uint32_t u32_stride = mesh.stream_count - 1; + const uint32_t u32_stride = mesh.format_header->u32_stream_count - 1; Vulkan::BufferCreateInfo buf_info = {}; buf_info.domain = Vulkan::BufferDomain::LinkedDeviceHost; buf_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; - std::vector meshlet_metas; - meshlet_metas.reserve(mesh.meshlets.size()); - for (auto &meshlet : mesh.meshlets) - meshlet_metas.push_back(meshlet); - buf_info.size = mesh.meshlets.size() * sizeof(MetadataGPU); - auto meshlet_meta_buffer = dev.create_buffer(buf_info, meshlet_metas.data()); - - std::vector meshlet_streams; - meshlet_streams.reserve(mesh.meshlets.size() * mesh.stream_count); - for (auto &meshlet : mesh.meshlets) - for (unsigned i = 0; i < mesh.stream_count; i++) - meshlet_streams.push_back(meshlet.u32_streams[i]); - buf_info.size = meshlet_streams.size() * sizeof(Stream); - auto meshlet_stream_buffer = dev.create_buffer(buf_info, meshlet_streams.data()); - - buf_info.size = payload.size() * sizeof(uint32_t); - if (buf_info.size == 0) - buf_info.size = 4; - auto payload_buffer = dev.create_buffer(buf_info, payload.empty() ? nullptr : payload.data()); + buf_info.size = mesh.format_header->meshlet_count * sizeof(*mesh.headers); + auto meshlet_meta_buffer = dev.create_buffer(buf_info, mesh.headers); + + buf_info.size = mesh.format_header->meshlet_count * mesh.format_header->u32_stream_count * sizeof(*mesh.streams); + auto meshlet_stream_buffer = dev.create_buffer(buf_info, mesh.streams); + + buf_info.size = mesh.format_header->payload_size_words * sizeof(uint32_t); + auto payload_buffer = dev.create_buffer(buf_info, mesh.payload); buf_info.size = out_index_buffer.size() * sizeof(uint32_t); buf_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | @@ -161,15 +143,15 @@ static void decode_mesh_gpu( auto readback_decoded_u32_buffer = dev.create_buffer(buf_info); std::vector output_offset_strides; - output_offset_strides.reserve(mesh.meshlets.size() * mesh.stream_count); + output_offset_strides.reserve(mesh.format_header->meshlet_count * mesh.format_header->u32_stream_count); uint32_t index_count = 0; - for (auto &meshlet : mesh.meshlets) + for (uint32_t i = 0; i < mesh.format_header->meshlet_count; i++) { output_offset_strides.emplace_back(index_count, 0); - index_count += meshlet.num_primitives_minus_1 + 1; - for (uint32_t i = 1; i < mesh.stream_count; i++) - output_offset_strides.emplace_back(meshlet.base_vertex_offset * u32_stride + (i - 1), u32_stride); + index_count += mesh.headers[i].num_primitives_minus_1 + 1; + for (uint32_t j = 1; j < mesh.format_header->u32_stream_count; j++) + output_offset_strides.emplace_back(mesh.headers[i].base_vertex_offset * u32_stride + (j - 1), u32_stride); } buf_info.domain = Vulkan::BufferDomain::LinkedDeviceHost; @@ -191,8 +173,8 @@ static void decode_mesh_gpu( cmd->set_storage_buffer(0, 4, *payload_buffer); cmd->set_storage_buffer(0, 5, *output_offset_strides_buffer); cmd->set_specialization_constant_mask(1); - cmd->set_specialization_constant(0, mesh.stream_count); - cmd->dispatch(uint32_t(mesh.meshlets.size()), 1, 1); + cmd->set_specialization_constant(0, mesh.format_header->u32_stream_count); + cmd->dispatch(mesh.format_header->meshlet_count, 1, 1); cmd->barrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, VK_PIPELINE_STAGE_2_COPY_BIT, VK_ACCESS_TRANSFER_READ_BIT); @@ -286,171 +268,18 @@ int main(int argc, char *argv[]) dev.set_context(ctx); dev.init_frame_contexts(4); -#if 1 - { - Meshlet::export_mesh_to_meshlet("/tmp/export.mesh", parser.get_meshes().front()); - } -#endif - -#if 0 - { - std::vector index_buffer; - std::vector meshlets; - std::vector bounds; - - for (auto &mesh : parser.get_meshes()) - { - if (mesh.count < 60000) - continue; - if (!convert_meshlets(meshlets, bounds, index_buffer, mesh)) - return EXIT_FAILURE; - break; - } - } -#endif - -#if 0 - LOGI("=== Test ====\n"); - { - std::vector out_payload_buffer; - const std::vector index_buffer = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - }; - const std::vector attr_buffer = { - 9, 11, 4, 4, 2, 9, - 9, 7, 4, 29, 2, 9, - 9, 7, 4, 29, 2, 9, - 9, 7, 4, 29, 2, 9, - }; - MeshMetadata encoded_mesh; - const uint32_t u32_stride = 2; - - encode_mesh(out_payload_buffer, encoded_mesh, - index_buffer.data(), index_buffer.size() / 3, - attr_buffer.data(), u32_stride); - - LOGI("Encoded payload size = %zu bytes.\n", out_payload_buffer.size() * sizeof(uint32_t)); - LOGI("u32 stride = %u\n", u32_stride); - - std::vector decoded_index_buffer; - std::vector decoded_u32_stream; - std::vector gpu_decoded_index_buffer; - std::vector gpu_decoded_u32_stream; - decode_mesh(decoded_index_buffer, decoded_u32_stream, out_payload_buffer, encoded_mesh); - - if (!validate_mesh_decode(decoded_index_buffer, decoded_u32_stream, index_buffer, attr_buffer, u32_stride)) - { - LOGE("Failed to validate mesh.\n"); - return EXIT_FAILURE; - } - - decode_mesh_gpu(dev, gpu_decoded_index_buffer, gpu_decoded_u32_stream, out_payload_buffer, encoded_mesh); - if (!validate_mesh_decode(gpu_decoded_index_buffer, gpu_decoded_u32_stream, decoded_index_buffer, decoded_u32_stream, u32_stride)) - { - LOGE("Failed to validate GPU decoded mesh.\n"); - return EXIT_FAILURE; - } - } - LOGI("===============\n"); -#endif - -#if 0 - for (auto &mesh : parser.get_meshes()) - { - unsigned u32_stride = (mesh.position_stride + mesh.attribute_stride) / sizeof(uint32_t); - - if (mesh.indices.empty() || mesh.primitive_restart || mesh.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST) - { - LOGI("Unexpected mesh.\n"); - continue; - } - - std::vector index_buffer; - std::vector attr_buffer; - size_t vertex_count = mesh.positions.size() / mesh.position_stride; - attr_buffer.resize(u32_stride * vertex_count); - index_buffer.resize(mesh.count); - - if (mesh.index_type == VK_INDEX_TYPE_UINT32) - { - memcpy(index_buffer.data(), mesh.indices.data(), mesh.count * sizeof(uint32_t)); - } - else if (mesh.index_type == VK_INDEX_TYPE_UINT16) - { - auto *indices = reinterpret_cast(mesh.indices.data()); - for (unsigned i = 0; i < mesh.count; i++) - index_buffer[i] = indices[i]; - } - else if (mesh.index_type == VK_INDEX_TYPE_UINT8_EXT) - { - auto *indices = reinterpret_cast(mesh.indices.data()); - for (unsigned i = 0; i < mesh.count; i++) - index_buffer[i] = indices[i]; - } - else - continue; - - LOGI("=== Testing mesh ===\n"); - - for (size_t i = 0; i < vertex_count; i++) - { - memcpy(attr_buffer.data() + u32_stride * i, mesh.positions.data() + i * mesh.position_stride, mesh.position_stride); - memcpy(attr_buffer.data() + u32_stride * i + mesh.position_stride / sizeof(uint32_t), - mesh.attributes.data() + i * mesh.attribute_stride, mesh.attribute_stride); - } - - LOGI("Mesh payload size = %zu bytes.\n", (index_buffer.size() + attr_buffer.size()) * sizeof(uint32_t)); - - std::vector optimized_index_buffer(index_buffer.size()); - meshopt_optimizeVertexCache(optimized_index_buffer.data(), index_buffer.data(), mesh.count, vertex_count); - - std::vector out_payload_buffer; - MeshMetadata encoded_mesh; - encode_mesh(out_payload_buffer, encoded_mesh, - optimized_index_buffer.data(), optimized_index_buffer.size() / 3, - attr_buffer.data(), u32_stride); - - unsigned prim_offset = 0; - unsigned meshlet_index = 0; - for (auto &meshlet : encoded_mesh.meshlets) - { - LOGI("Meshlet #%u (%u prims, %u attrs), offset %u.\n", - meshlet_index, meshlet.num_primitives_minus_1 + 1, meshlet.num_attributes_minus_1 + 1, prim_offset); - prim_offset += meshlet.num_primitives_minus_1 + 1; - meshlet_index++; - } - - LOGI("Encoded payload size = %zu bytes.\n", out_payload_buffer.size() * sizeof(uint32_t)); - LOGI("u32 stride = %u\n", u32_stride); - - std::vector decoded_index_buffer; - std::vector decoded_u32_stream; - std::vector gpu_decoded_index_buffer; - std::vector gpu_decoded_u32_stream; - decode_mesh(decoded_index_buffer, decoded_u32_stream, out_payload_buffer, encoded_mesh); + if (!Meshlet::export_mesh_to_meshlet("/tmp/export.mesh", parser.get_meshes().front())) + return EXIT_FAILURE; - if (!validate_mesh_decode(decoded_index_buffer, decoded_u32_stream, optimized_index_buffer, attr_buffer, u32_stride)) - { - LOGE("Failed to validate mesh.\n"); - return EXIT_FAILURE; - } + auto file = GRANITE_FILESYSTEM()->open("/tmp/export.mesh", FileMode::ReadOnly); + if (!file) + return EXIT_FAILURE; - decode_mesh_gpu(dev, gpu_decoded_index_buffer, gpu_decoded_u32_stream, out_payload_buffer, encoded_mesh); - if (!validate_mesh_decode(gpu_decoded_index_buffer, gpu_decoded_u32_stream, decoded_index_buffer, decoded_u32_stream, u32_stride)) - { - LOGE("Failed to validate GPU decoded mesh.\n"); - return EXIT_FAILURE; - } + auto mapped = file->map(); + if (!mapped) + return EXIT_FAILURE; - LOGI("=====================\n"); - } -#endif + auto mesh = SceneFormats::Meshlet::create_mesh_view(*mapped); return 0; } \ No newline at end of file From 3094dd0318536fea5e9753d97a9213a4f0731acd Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sun, 30 Jul 2023 14:17:33 +0200 Subject: [PATCH 31/71] Try decoding from file. --- tests/meshopt_sandbox.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp index 70680d74..9e66ff03 100644 --- a/tests/meshopt_sandbox.cpp +++ b/tests/meshopt_sandbox.cpp @@ -281,5 +281,20 @@ int main(int argc, char *argv[]) auto mesh = SceneFormats::Meshlet::create_mesh_view(*mapped); + std::vector reference_index_buffer; + std::vector reference_attributes; + std::vector gpu_index_buffer; + std::vector gpu_attributes; + + decode_mesh(reference_index_buffer, reference_attributes, mesh); + decode_mesh_gpu(dev, gpu_index_buffer, gpu_attributes, mesh); + + if (!validate_mesh_decode(gpu_index_buffer, gpu_attributes, + reference_index_buffer, reference_attributes, + mesh.format_header->u32_stream_count - 1)) + { + return EXIT_FAILURE; + } + return 0; } \ No newline at end of file From d6d9ffe5f6eac6ea564fb651dc4f9a17a89293cb Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 31 Jul 2023 12:08:00 +0200 Subject: [PATCH 32/71] Add basic meshlet test app. --- renderer/formats/meshlet.cpp | 70 ++++++++++ renderer/formats/meshlet.hpp | 12 ++ scene-export/meshlet_export.cpp | 85 +++++++++--- scene-export/meshlet_export.hpp | 3 +- tests/CMakeLists.txt | 6 + tests/assets/shaders/meshlet_debug.frag | 12 ++ tests/assets/shaders/meshlet_debug.vert | 56 ++++++++ tests/meshlet_viewer.cpp | 170 ++++++++++++++++++++++++ tests/meshopt_sandbox.cpp | 7 +- vulkan/managers/shader_manager.cpp | 12 +- 10 files changed, 408 insertions(+), 25 deletions(-) create mode 100644 tests/assets/shaders/meshlet_debug.frag create mode 100644 tests/assets/shaders/meshlet_debug.vert create mode 100644 tests/meshlet_viewer.cpp diff --git a/renderer/formats/meshlet.cpp b/renderer/formats/meshlet.cpp index adcc22a4..a91cbef7 100644 --- a/renderer/formats/meshlet.cpp +++ b/renderer/formats/meshlet.cpp @@ -21,6 +21,9 @@ */ #include "meshlet.hpp" +#include "command_buffer.hpp" +#include "buffer.hpp" +#include "device.hpp" namespace Granite { @@ -82,6 +85,73 @@ MeshView create_mesh_view(const FileMapping &mapping) return view; } + +bool decode_mesh(Vulkan::CommandBuffer &cmd, + const Vulkan::Buffer &ibo, uint64_t ibo_offset, + const Vulkan::Buffer &vbo, uint64_t vbo_offset, + const Vulkan::Buffer &payload, uint64_t payload_offset, + const MeshView &view) +{ + // TODO: Implement LDS fallback. + if (!cmd.get_device().supports_subgroup_size_log2(true, 5, 5)) + { + LOGE("Device does not support Wave32.\n"); + return false; + } + + const uint32_t u32_stride = view.format_header->u32_stream_count - 1; + + Vulkan::BufferCreateInfo buf_info = {}; + buf_info.domain = Vulkan::BufferDomain::LinkedDeviceHost; + buf_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + + buf_info.size = view.format_header->meshlet_count * sizeof(*view.headers); + auto meshlet_meta_buffer = cmd.get_device().create_buffer(buf_info, view.headers); + + buf_info.size = view.format_header->meshlet_count * view.format_header->u32_stream_count * sizeof(*view.streams); + auto meshlet_stream_buffer = cmd.get_device().create_buffer(buf_info, view.streams); + + std::vector output_offset_strides; + output_offset_strides.reserve(view.format_header->meshlet_count * view.format_header->u32_stream_count); + + uint32_t index_count = 0; + for (uint32_t i = 0; i < view.format_header->meshlet_count; i++) + { + output_offset_strides.emplace_back(index_count, 0); + index_count += view.headers[i].num_primitives_minus_1 + 1; + for (uint32_t j = 1; j < view.format_header->u32_stream_count; j++) + output_offset_strides.emplace_back(view.headers[i].base_vertex_offset * u32_stride + (j - 1), u32_stride); + } + + buf_info.domain = Vulkan::BufferDomain::LinkedDeviceHost; + buf_info.size = output_offset_strides.size() * sizeof(uvec2); + auto output_offset_strides_buffer = cmd.get_device().create_buffer(buf_info, output_offset_strides.data()); + + cmd.set_program("builtin://shaders/decode/meshlet_decode.comp"); + cmd.enable_subgroup_size_control(true); + cmd.set_subgroup_size_log2(true, 5, 5); + + cmd.set_storage_buffer(0, 0, *meshlet_meta_buffer); + cmd.set_storage_buffer(0, 1, *meshlet_stream_buffer); + cmd.set_storage_buffer(0, 2, vbo, vbo_offset, view.total_vertices * u32_stride * sizeof(uint32_t)); + cmd.set_storage_buffer(0, 3, ibo, ibo_offset, view.total_primitives * 3 * sizeof(uint32_t)); + cmd.set_storage_buffer(0, 4, payload, payload_offset, view.format_header->payload_size_words * sizeof(uint32_t)); + cmd.set_storage_buffer(0, 5, *output_offset_strides_buffer); + cmd.set_specialization_constant_mask(1); + cmd.set_specialization_constant(0, view.format_header->u32_stream_count); + + // TODO: Split dispatches for big chungus meshes. + // (Starts to become a problem around 8-16 million primitives per dispatch). + if (view.format_header->meshlet_count > cmd.get_device().get_gpu_properties().limits.maxComputeWorkGroupCount[0]) + { + LOGW("Exceeding workgroup limit (%u > %u).\n", view.format_header->meshlet_count, + cmd.get_device().get_gpu_properties().limits.maxComputeWorkGroupCount[0]); + } + + cmd.dispatch(view.format_header->meshlet_count, 1, 1); + cmd.set_specialization_constant_mask(0); + return true; +} } } } diff --git a/renderer/formats/meshlet.hpp b/renderer/formats/meshlet.hpp index 752fbb83..a6dc9212 100644 --- a/renderer/formats/meshlet.hpp +++ b/renderer/formats/meshlet.hpp @@ -26,6 +26,12 @@ #include "filesystem.hpp" #include "math.hpp" +namespace Vulkan +{ +class CommandBuffer; +class Buffer; +} + namespace Granite { namespace SceneFormats @@ -101,6 +107,12 @@ struct MeshView static const char magic[8] = { 'M', 'E', 'S', 'H', 'L', 'E', 'T', '1' }; MeshView create_mesh_view(const FileMapping &mapping); + +bool decode_mesh(Vulkan::CommandBuffer &cmd, + const Vulkan::Buffer &ibo, uint64_t ibo_offset, + const Vulkan::Buffer &vbo, uint64_t vbo_offset, + const Vulkan::Buffer &payload, uint64_t payload_offset, + const MeshView &view); } } } diff --git a/scene-export/meshlet_export.cpp b/scene-export/meshlet_export.cpp index 99422f09..5911ae10 100644 --- a/scene-export/meshlet_export.cpp +++ b/scene-export/meshlet_export.cpp @@ -657,18 +657,54 @@ static bool export_encoded_mesh(const std::string &path, const Encoded &encoded) return true; } -bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh) +bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh, SceneFormats::Meshlet::MeshStyle style) { if (!mesh_canonicalize_indices(mesh)) return false; - auto positions = mesh_extract_position_snorm_exp(mesh); - auto normals = mesh_extract_normal_tangent_oct8(mesh, MeshAttribute::Normal); - auto tangent = mesh_extract_normal_tangent_oct8(mesh, MeshAttribute::Tangent); - auto uv = mesh_extract_uv_snorm_scale(mesh); + std::vector positions, uv; + std::vector normals, tangent; - unsigned num_u32_streams = (sizeof(positions.front()) + sizeof(normals.front()) + - sizeof(tangent.front()) + sizeof(uv.front())) / sizeof(uint32_t); + unsigned num_u32_streams = 0; + + switch (style) + { + case SceneFormats::Meshlet::MeshStyle::Skinned: + LOGE("Unimplemented.\n"); + return false; + case SceneFormats::Meshlet::MeshStyle::Textured: + uv = mesh_extract_uv_snorm_scale(mesh); + num_u32_streams += 2; + if (uv.empty()) + { + LOGE("No UVs.\n"); + return false; + } + // Fallthrough + case SceneFormats::Meshlet::MeshStyle::Untextured: + normals = mesh_extract_normal_tangent_oct8(mesh, MeshAttribute::Normal); + tangent = mesh_extract_normal_tangent_oct8(mesh, MeshAttribute::Tangent); + if (normals.empty() || tangent.empty()) + { + LOGE("No normal or tangent.\n"); + return false; + } + num_u32_streams += 2; + // Fallthrough + case SceneFormats::Meshlet::MeshStyle::Wireframe: + positions = mesh_extract_position_snorm_exp(mesh); + if (positions.empty()) + { + LOGE("No positions.\n"); + return false; + } + num_u32_streams += 2; + break; + + default: + LOGE("Unknown mesh style.\n"); + return false; + } std::vector attributes(num_u32_streams * positions.size()); uint32_t *ptr = attributes.data(); @@ -676,23 +712,36 @@ bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh) { memcpy(ptr, positions[i].data, sizeof(positions.front())); ptr += sizeof(positions.front()) / sizeof(uint32_t); - memcpy(ptr, normals[i].data, sizeof(normals.front())); - ptr += sizeof(normals.front()) / sizeof(uint32_t); - memcpy(ptr, tangent[i].data, sizeof(tangent.front())); - ptr += sizeof(tangent.front()) / sizeof(uint32_t); - memcpy(ptr, uv[i].data, sizeof(uv.front())); - ptr += sizeof(uv.front()) / sizeof(uint32_t); + + if (!normals.empty()) + { + memcpy(ptr, normals[i].data, sizeof(normals.front())); + ptr += sizeof(normals.front()) / sizeof(uint32_t); + } + + if (!tangent.empty()) + { + memcpy(ptr, tangent[i].data, sizeof(tangent.front())); + ptr += sizeof(tangent.front()) / sizeof(uint32_t); + } + + if (!uv.empty()) + { + memcpy(ptr, uv[i].data, sizeof(uv.front())); + ptr += sizeof(uv.front()) / sizeof(uint32_t); + } } // Use quantized position to guide the clustering. - std::vector position_buffer; + std::vector position_buffer; position_buffer.reserve(positions.size()); for (auto &p: positions) position_buffer.push_back(decode_snorm_exp(p)); + // Special meshoptimizer limit. constexpr unsigned max_vertices = 255; constexpr unsigned max_primitives = 256; - std::vector optimized_index_buffer(mesh.count); + std::vector optimized_index_buffer(mesh.count); meshopt_optimizeVertexCache( optimized_index_buffer.data(), reinterpret_cast(mesh.indices.data()), mesh.count, positions.size()); @@ -710,8 +759,8 @@ bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh) meshlets.resize(num_meshlets); - std::vector out_meshlets; - std::vector out_index_buffer; + std::vector out_meshlets; + std::vector out_index_buffer; out_meshlets.reserve(num_meshlets); for (auto &meshlet: meshlets) @@ -731,7 +780,7 @@ bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh) } } - std::vector bounds; + std::vector bounds; bounds.clear(); bounds.reserve(num_meshlets); for (auto &meshlet: out_meshlets) diff --git a/scene-export/meshlet_export.hpp b/scene-export/meshlet_export.hpp index ee91d056..6d6607c0 100644 --- a/scene-export/meshlet_export.hpp +++ b/scene-export/meshlet_export.hpp @@ -25,11 +25,12 @@ #include #include #include "scene_formats.hpp" +#include "meshlet.hpp" namespace Granite { namespace Meshlet { -bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh); +bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh, SceneFormats::Meshlet::MeshStyle style); } } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 06bc7e90..86fa40b4 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -168,6 +168,12 @@ if (NOT ANDROID) endif() target_link_libraries(meshopt-sandbox PRIVATE granite-scene-export) +add_granite_application(meshlet-viewer meshlet_viewer.cpp) +if (NOT ANDROID) + target_compile_definitions(meshlet-viewer PRIVATE ASSET_DIRECTORY=\"${CMAKE_CURRENT_SOURCE_DIR}/assets\") +endif() +target_link_libraries(meshlet-viewer PRIVATE granite-scene-export) + add_granite_application(dgc-test dgc_test.cpp) if (NOT ANDROID) target_compile_definitions(dgc-test PRIVATE ASSET_DIRECTORY=\"${CMAKE_CURRENT_SOURCE_DIR}/assets\") diff --git a/tests/assets/shaders/meshlet_debug.frag b/tests/assets/shaders/meshlet_debug.frag new file mode 100644 index 00000000..2f2f0248 --- /dev/null +++ b/tests/assets/shaders/meshlet_debug.frag @@ -0,0 +1,12 @@ +#version 450 + +layout(location = 0) in mediump vec3 vNormal; +layout(location = 1) in mediump vec4 vTangent; +layout(location = 2) in vec2 vUV; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(vNormal.xyz * 0.5 + 0.5, 1.0); +} diff --git a/tests/assets/shaders/meshlet_debug.vert b/tests/assets/shaders/meshlet_debug.vert new file mode 100644 index 00000000..1d7349fb --- /dev/null +++ b/tests/assets/shaders/meshlet_debug.vert @@ -0,0 +1,56 @@ +#version 450 + +layout(location = 0) in uvec4 ATTR0; +layout(location = 1) in uvec2 ATTR1; +layout(location = 0) out mediump vec3 vNormal; +layout(location = 1) out mediump vec4 vTangent; +layout(location = 2) out vec2 vUV; + +layout(set = 0, binding = 0) uniform UBO +{ + mat4 VP; +}; + +vec3 attribute_decode_snorm_exp_position(uvec2 payload) +{ + ivec3 sint_value = ivec3( + bitfieldExtract(int(payload.x), 0, 16), + bitfieldExtract(int(payload.x), 16, 16), + bitfieldExtract(int(payload.y), 0, 16)); + int exp = bitfieldExtract(int(payload.y), 16, 16); + return vec3( + ldexp(float(sint_value.x), exp), + ldexp(float(sint_value.y), exp), + ldexp(float(sint_value.z), exp)); +} + +vec2 attribute_decode_snorm_exp_uv(uvec2 payload) +{ + ivec2 sint_value = ivec2( + bitfieldExtract(int(payload.x), 0, 16), + bitfieldExtract(int(payload.x), 16, 16)); + int exp = bitfieldExtract(int(payload.y), 0, 16); + return vec2( + ldexp(float(sint_value.x), exp), + ldexp(float(sint_value.y), exp)) + 0.5; +} + +// Adapted from: https://knarkowicz.wordpress.com/2014/04/16/octahedron-normal-vector-encoding/ +// https://twitter.com/Stubbesaurus/status/9379947905532272640 +mediump vec4 attribute_decode_oct8_normal_tangent(uint payload) +{ + mediump vec4 f = unpackSnorm4x8(payload); + mediump vec3 n = vec3(f.x, f.y, 1.0 - abs(f.x) - abs(f.y)); + mediump float t = max(-n.z, 0.0); + n.xy += mix(vec2(t), vec2(-t), greaterThanEqual(n.xy, vec2(0.0))); + return vec4(normalize(n), f.w != 0.0 ? -1.0 : 1.0); +} + +void main() +{ + vec3 pos = attribute_decode_snorm_exp_position(ATTR0.xy); + vNormal = attribute_decode_oct8_normal_tangent(ATTR0.z).xyz; + vTangent = attribute_decode_oct8_normal_tangent(ATTR0.w); + vUV = attribute_decode_snorm_exp_uv(ATTR1); + gl_Position = VP * vec4(pos, 1.0); +} diff --git a/tests/meshlet_viewer.cpp b/tests/meshlet_viewer.cpp new file mode 100644 index 00000000..caa98e3a --- /dev/null +++ b/tests/meshlet_viewer.cpp @@ -0,0 +1,170 @@ +/* Copyright (c) 2017-2023 Hans-Kristian Arntzen + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "application.hpp" +#include "command_buffer.hpp" +#include "device.hpp" +#include "os_filesystem.hpp" +#include "muglm/muglm_impl.hpp" +#include "meshlet.hpp" +#include "aabb.hpp" +#include "event.hpp" +#include "camera.hpp" +#include "event_manager.hpp" +#include +#include +#include + +using namespace Granite; +using namespace Vulkan; + +struct MeshletViewerApplication : Granite::Application, Granite::EventHandler +{ + MeshletViewerApplication(const char *path) + { + get_wsi().set_backbuffer_srgb(false); + + auto file = GRANITE_FILESYSTEM()->open(path, FileMode::ReadOnly); + if (!file) + throw std::runtime_error("Failed to open file."); + + mapping = file->map(); + if (!mapping) + throw std::runtime_error("Failed to map file."); + + EVENT_MANAGER_REGISTER_LATCH(MeshletViewerApplication, on_device_create, on_device_destroy, DeviceCreatedEvent); + } + + FileMappingHandle mapping; + Vulkan::BufferHandle ibo; + Vulkan::BufferHandle vbo; + Vulkan::BufferHandle payload; + AABB aabb; + FPSCamera camera; + + void on_device_create(const DeviceCreatedEvent &e) + { + auto view = SceneFormats::Meshlet::create_mesh_view(*mapping); + if (!view.format_header) + throw std::runtime_error("Failed to load meshlet."); + + Vulkan::BufferCreateInfo info = {}; + info.size = view.total_primitives * sizeof(uvec3); + info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT; + info.domain = Vulkan::BufferDomain::Device; + ibo = e.get_device().create_buffer(info); + + info.size = view.total_vertices * (view.format_header->u32_stream_count - 1) * sizeof(uint32_t); + info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; + info.domain = Vulkan::BufferDomain::Device; + vbo = e.get_device().create_buffer(info); + + info.size = view.format_header->payload_size_words * sizeof(uint32_t); + info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + info.domain = Vulkan::BufferDomain::LinkedDeviceHost; + payload = e.get_device().create_buffer(info, view.payload); + + auto cmd = e.get_device().request_command_buffer(); + if (!SceneFormats::Meshlet::decode_mesh(*cmd, *ibo, 0, *vbo, 0, *payload, 0, view)) + { + e.get_device().submit_discard(cmd); + throw std::runtime_error("Failed to decode mesh.\n"); + } + + cmd->barrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT); + e.get_device().submit(cmd); + + aabb = { vec3(FLT_MAX), vec3(FLT_MIN) }; + for (uint32_t i = 0; i < view.format_header->meshlet_count; i++) + { + auto cluster_aabb = AABB{ + view.bounds[i].center - view.bounds[i].radius, + view.bounds[i].center + view.bounds[i].radius, + }; + aabb.expand(cluster_aabb); + } + + camera.set_depth_range(0.1f, 200.0f); + camera.set_fovy(0.4f * pi()); + camera.look_at(aabb.get_center() + vec3(0.1f, 0.2f, 1.1f) * aabb.get_radius(), + aabb.get_center(), vec3(0.0f, 1.0f, 0.0f)); + } + + void on_device_destroy(const DeviceCreatedEvent &) + { + ibo.reset(); + vbo.reset(); + payload.reset(); + } + + void render_frame(double, double) override + { + auto &wsi = get_wsi(); + auto &device = wsi.get_device(); + auto cmd = device.request_command_buffer(); + + cmd->begin_render_pass(device.get_swapchain_render_pass(SwapchainRenderPass::Depth)); + camera.set_aspect(cmd->get_viewport().width / cmd->get_viewport().height); + + cmd->set_program("assets://shaders/meshlet_debug.vert", + "assets://shaders/meshlet_debug.frag"); + cmd->set_opaque_state(); + + auto vp = camera.get_projection() * camera.get_view(); + *cmd->allocate_typed_constant_data(0, 0, 1) = vp; + + cmd->set_index_buffer(*ibo, 0, VK_INDEX_TYPE_UINT32); + cmd->set_vertex_binding(0, *vbo, 0, 6 * sizeof(uint32_t)); + cmd->set_vertex_attrib(0, 0, VK_FORMAT_R32G32B32A32_UINT, 0); + cmd->set_vertex_attrib(1, 0, VK_FORMAT_R32G32_UINT, 4 * sizeof(uint32_t)); + + cmd->draw_indexed(ibo->get_create_info().size / sizeof(uint32_t), 1); + cmd->end_render_pass(); + device.submit(cmd); + } +}; + +namespace Granite +{ +Application *application_create(int argc, char **argv) +{ + GRANITE_APPLICATION_SETUP_FILESYSTEM(); + + if (argc != 2) + { + LOGE("Usage: meshlet-viewer path.msh1\n"); + return nullptr; + } + + try + { + auto *app = new MeshletViewerApplication(argv[1]); + return app; + } + catch (const std::exception &e) + { + LOGE("application_create() threw exception: %s\n", e.what()); + return nullptr; + } +} +} diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp index 9e66ff03..1cd51928 100644 --- a/tests/meshopt_sandbox.cpp +++ b/tests/meshopt_sandbox.cpp @@ -268,10 +268,13 @@ int main(int argc, char *argv[]) dev.set_context(ctx); dev.init_frame_contexts(4); - if (!Meshlet::export_mesh_to_meshlet("/tmp/export.mesh", parser.get_meshes().front())) + if (!Meshlet::export_mesh_to_meshlet("/tmp/export.msh1", + parser.get_meshes().front(), SceneFormats::Meshlet::MeshStyle::Textured)) + { return EXIT_FAILURE; + } - auto file = GRANITE_FILESYSTEM()->open("/tmp/export.mesh", FileMode::ReadOnly); + auto file = GRANITE_FILESYSTEM()->open("/tmp/export.msh1", FileMode::ReadOnly); if (!file) return EXIT_FAILURE; diff --git a/vulkan/managers/shader_manager.cpp b/vulkan/managers/shader_manager.cpp index fa66c077..c69dab5f 100644 --- a/vulkan/managers/shader_manager.cpp +++ b/vulkan/managers/shader_manager.cpp @@ -355,19 +355,21 @@ Vulkan::Program *ShaderProgramVariant::get_program_graphics() auto *frag = stages[Util::ecast(Vulkan::ShaderStage::Fragment)]; #ifdef GRANITE_SHIPPING - if (mesh) + if (mesh && frag) { ret = device->request_program(task ? task->resolve(*device) : nullptr, mesh->resolve(*device), frag->resolve(*device), sampler_bank.get()); } - else + else if (vert && frag) { ret = device->request_program(vert->resolve(*device), frag->resolve(*device), sampler_bank.get()); } + else + return nullptr; #else auto &vert_instance = shader_instance[Util::ecast(Vulkan::ShaderStage::Vertex)]; auto &frag_instance = shader_instance[Util::ecast(Vulkan::ShaderStage::Fragment)]; @@ -383,7 +385,7 @@ Vulkan::Program *ShaderProgramVariant::get_program_graphics() // we can safely read program directly. // comp->instance will only ever be incremented in the main thread on an inotify, so this is fine. // If comp->instance changes in the interim, we are at least guaranteed to read a sensible value for program. - if (mesh) + if (mesh && frag) { if ((!task || (loaded_task_instance == task->instance)) && loaded_mesh_instance == mesh->instance && @@ -392,11 +394,13 @@ Vulkan::Program *ShaderProgramVariant::get_program_graphics() return program.load(std::memory_order_relaxed); } } - else + else if (vert && frag) { if (loaded_vert_instance == vert->instance && loaded_frag_instance == frag->instance) return program.load(std::memory_order_relaxed); } + else + return nullptr; instance_lock.lock_write(); From 2af40aaa590d6b9b286bda49ec4737cf18b1c0ff Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 31 Jul 2023 13:02:35 +0200 Subject: [PATCH 33/71] Refactor out decode code to helpers. --- assets/shaders/decode/meshlet_decode.comp | 214 ++---------------- .../shaders/inc/meshlet_payload_constants.h | 7 + assets/shaders/inc/meshlet_payload_decode.h | 182 +++++++++++++++ 3 files changed, 205 insertions(+), 198 deletions(-) create mode 100644 assets/shaders/inc/meshlet_payload_constants.h create mode 100644 assets/shaders/inc/meshlet_payload_decode.h diff --git a/assets/shaders/decode/meshlet_decode.comp b/assets/shaders/decode/meshlet_decode.comp index b6e5bf6e..870c3dbf 100644 --- a/assets/shaders/decode/meshlet_decode.comp +++ b/assets/shaders/decode/meshlet_decode.comp @@ -1,47 +1,15 @@ #version 450 -#define PACKED_WAVEOPS 0 - -#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require -#extension GL_EXT_shader_subgroup_extended_types_int8 : require -#extension GL_KHR_shader_subgroup_arithmetic : require -#extension GL_KHR_shader_subgroup_basic : require -#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require #extension GL_EXT_scalar_block_layout : require - -#define MAX_ELEMENTS 256 -#define NUM_CHUNKS 8 - -layout(local_size_x = 32, local_size_y = NUM_CHUNKS) in; - +#include "../inc/meshlet_payload_constants.h" +layout(local_size_x = 32, local_size_y = MESHLET_PAYLOAD_NUM_CHUNKS) in; layout(constant_id = 0) const uint NUM_U32_STREAMS = 1; - -struct MeshletStream -{ - u16vec4 predictor_a; - u16vec4 predictor_b; - u8vec4 initial_value; - uint offset_from_base; - uint16_t bitplane_meta[NUM_CHUNKS]; -}; - -struct MeshletMeta -{ - uint base_vertex_offset; - uint8_t num_primitives_minus_1; - uint8_t num_attributes_minus_1; - uint16_t reserved; -}; - -layout(set = 0, binding = 0, std430) readonly buffer MeshletMetas -{ - MeshletMeta data[]; -} meshlet_metas; - -layout(set = 0, binding = 1, std430) readonly buffer MeshletStreams -{ - MeshletStream data[]; -} meshlet_streams; +#define MESHLET_PAYLOAD_NUM_U32_STREAMS NUM_U32_STREAMS +#define MESHLET_PAYLOAD_DESCRIPTOR_SET 0 +#define MESHLET_PAYLOAD_META_BINDING 0 +#define MESHLET_PAYLOAD_STREAM_BINDING 1 +#define MESHLET_PAYLOAD_PAYLOAD_BINDING 4 +#include "../inc/meshlet_payload_decode.h" layout(set = 0, binding = 2, std430) writeonly buffer OutputAttributes { @@ -53,186 +21,36 @@ layout(set = 0, binding = 3, scalar) writeonly buffer OutputIndices uvec3 data[]; } output_indices; -layout(set = 0, binding = 4, std430) readonly buffer Payload -{ - uint data[]; -} payload; - layout(set = 0, binding = 5, std430) readonly buffer OutputOffsets { uvec2 data[]; } output_offset_strides; -shared u8vec4 shared_chunk_bit_counts[NUM_U32_STREAMS][NUM_CHUNKS]; -shared uint shared_chunk_offset[NUM_U32_STREAMS][NUM_CHUNKS]; -#if PACKED_WAVEOPS -shared u8vec4 chunk_values[NUM_CHUNKS]; -#else -shared uvec2 chunk_values[NUM_CHUNKS]; -#endif - -// Hardcodes wave32 atm. Need fallback. - -uvec2 pack_u16vec2_to_uvec2(u16vec4 v) -{ - return uvec2(pack32(v.xy), pack32(v.zw)); -} - void main() { uint meshlet_index = gl_WorkGroupID.x; - int subgroup_lane = int(gl_SubgroupInvocationID); - - for (uint stream_index = gl_SubgroupID; stream_index < NUM_U32_STREAMS; stream_index += gl_NumSubgroups) - { - // Start by decoding the offset for bitplanes for all u32 streams. - if (subgroup_lane < int(gl_WorkGroupSize.y)) - { - uint bitplane_value = uint(meshlet_streams.data[stream_index + NUM_U32_STREAMS * meshlet_index].bitplane_meta[subgroup_lane]); - u16vec4 bit_counts = (u16vec4(bitplane_value) >> u16vec4(0, 4, 8, 12)) & 0xfus; - u16vec2 bit_counts2 = bit_counts.xy + bit_counts.zw; - uint total_bits = bit_counts2.x + bit_counts2.y; - uint offset = meshlet_streams.data[stream_index + NUM_U32_STREAMS * meshlet_index].offset_from_base; - shared_chunk_offset[stream_index][subgroup_lane] = subgroupExclusiveAdd(total_bits) + offset; - shared_chunk_bit_counts[stream_index][subgroup_lane] = u8vec4(bit_counts); - } - } - - barrier(); - + meshlet_init_workgroup(meshlet_index); MeshletMeta meta = meshlet_metas.data[meshlet_index]; - uint unrolled_stream_index = NUM_U32_STREAMS * meshlet_index; - - for (uint i = 0; i < NUM_U32_STREAMS; i++, unrolled_stream_index++) + for (uint i = 0; i < NUM_U32_STREAMS; i++) { - uint offset_from_base = meshlet_streams.data[unrolled_stream_index].offset_from_base; - u16vec4 predictor_a = meshlet_streams.data[unrolled_stream_index].predictor_a; - u16vec4 predictor_b = meshlet_streams.data[unrolled_stream_index].predictor_b; -#if PACKED_WAVEOPS - u8vec4 initial_value = meshlet_streams.data[unrolled_stream_index].initial_value; -#else - u8vec4 initial_value_ = meshlet_streams.data[unrolled_stream_index].initial_value; - uvec2 initial_value = pack_u16vec2_to_uvec2(u16vec4(initial_value_)); -#endif - - uint chunk_id = gl_SubgroupID; - uint bitplane_offsets = shared_chunk_offset[i][chunk_id]; - ivec4 bit_counts = ivec4(shared_chunk_bit_counts[i][chunk_id]); - - uvec4 decoded = ivec4(0); - - // Overlap load with consumption. - // Helps RDNA2 quite a lot here! - uint value = payload.data[bitplane_offsets]; - - for (int i = 0; i < bit_counts.x; i++) - { - decoded.x |= bitfieldExtract(value, subgroup_lane, 1) << i; - value = payload.data[++bitplane_offsets]; - } - decoded.x = bitfieldExtract(int(decoded.x), 0, bit_counts.x); - - for (int i = 0; i < bit_counts.y; i++) - { - decoded.y |= bitfieldExtract(value, subgroup_lane, 1) << i; - value = payload.data[++bitplane_offsets]; - } - decoded.y = bitfieldExtract(int(decoded.y), 0, bit_counts.y); - - for (int i = 0; i < bit_counts.z; i++) - { - decoded.z |= bitfieldExtract(value, subgroup_lane, 1) << i; - value = payload.data[++bitplane_offsets]; - } - decoded.z = bitfieldExtract(int(decoded.z), 0, bit_counts.z); - - for (int i = 0; i < bit_counts.w; i++) - { - decoded.w |= bitfieldExtract(value, subgroup_lane, 1) << i; - value = payload.data[++bitplane_offsets]; - } - decoded.w = bitfieldExtract(int(decoded.w), 0, bit_counts.w); - -#if PACKED_WAVEOPS - // Resolve deltas in packed 4x8 math. - u8vec4 packed_decoded = u8vec4(decoded); - uint linear_index = chunk_id * gl_WorkGroupSize.x + subgroup_lane; - if (linear_index == 0) - packed_decoded += initial_value; - packed_decoded += u8vec4((predictor_a + predictor_b * uint16_t(linear_index)) >> 8us); - packed_decoded = subgroupInclusiveAdd(packed_decoded); - - if (i > 0) - barrier(); // Resolve WAR hazard from last iteration. - if (subgroup_lane == int(gl_SubgroupSize) - 1) - chunk_values[chunk_id] = packed_decoded; - barrier(); - if (gl_SubgroupID == 0u && subgroup_lane < int(gl_WorkGroupSize.y)) - chunk_values[subgroup_lane] = subgroupInclusiveAdd(chunk_values[subgroup_lane]); - barrier(); - if (chunk_id != 0) - packed_decoded += chunk_values[chunk_id - 1]; + uint packed_decoded = meshlet_decode_stream(meshlet_index, i); + uint linear_index = meshlet_get_linear_index(); if (i == 0) { // Write index buffer. - uvec3 indices = uvec3(packed_decoded.xyz); + uvec3 indices = uvec4(unpack8(packed_decoded)).xyz; indices += meta.base_vertex_offset; - uint output_offset = output_offset_strides.data[unrolled_stream_index].x; + uint output_offset = output_offset_strides.data[meshlet_index * NUM_U32_STREAMS + i].x; if (linear_index <= uint(meta.num_primitives_minus_1)) output_indices.data[output_offset + linear_index] = indices; } else { - // TODO: decode filters? Should probably be deferred to vertex / mesh shader. - uvec2 output_offset_stride = output_offset_strides.data[unrolled_stream_index]; - if (linear_index <= uint(meta.num_attributes_minus_1)) - output_payload.data[output_offset_stride.x + linear_index * output_offset_stride.y] = pack32(packed_decoded); - } -#else - // Resolve deltas in packed 4x8 math. - uvec2 packed_decoded = pack_u16vec2_to_uvec2(u16vec4(decoded)) & 0xff00ffu; - uint linear_index = chunk_id * gl_WorkGroupSize.x + subgroup_lane; - if (linear_index == 0) - packed_decoded += initial_value; - packed_decoded += pack_u16vec2_to_uvec2((predictor_a + predictor_b * uint16_t(linear_index)) >> 8us); - packed_decoded = subgroupInclusiveAdd(packed_decoded); - - if (i > 0) - barrier(); // Resolve WAR hazard from last iteration. - if (subgroup_lane == int(gl_SubgroupSize) - 1) - chunk_values[chunk_id] = packed_decoded & 0xff00ffu; - barrier(); - if (gl_SubgroupID == 0u && subgroup_lane < int(gl_WorkGroupSize.y)) - chunk_values[subgroup_lane] = subgroupInclusiveAdd(chunk_values[subgroup_lane]); - barrier(); - if (chunk_id != 0) - packed_decoded += chunk_values[chunk_id - 1]; - - if (i == 0) - { - // Write index buffer. - uvec3 indices = uvec3( - bitfieldExtract(packed_decoded.x, 0, 8), - bitfieldExtract(packed_decoded.x, 16, 8), - bitfieldExtract(packed_decoded.y, 0, 8)); - indices += meta.base_vertex_offset; - uint output_offset = output_offset_strides.data[unrolled_stream_index].x; - if (linear_index <= uint(meta.num_primitives_minus_1)) - output_indices.data[output_offset + linear_index] = indices; - } - else - { - // TODO: decode filters? Should probably be deferred to vertex / mesh shader. - uvec2 output_offset_stride = output_offset_strides.data[unrolled_stream_index]; + uvec2 output_offset_stride = output_offset_strides.data[meshlet_index * NUM_U32_STREAMS + i]; if (linear_index <= uint(meta.num_attributes_minus_1)) - output_payload.data[output_offset_stride.x + linear_index * output_offset_stride.y] = - bitfieldExtract(packed_decoded.x, 0, 8) | - (bitfieldExtract(packed_decoded.x, 16, 8) << 8) | - (bitfieldExtract(packed_decoded.y, 0, 8) << 16) | - (bitfieldExtract(packed_decoded.y, 16, 8) << 24); + output_payload.data[output_offset_stride.x + linear_index * output_offset_stride.y] = packed_decoded; } -#endif } } diff --git a/assets/shaders/inc/meshlet_payload_constants.h b/assets/shaders/inc/meshlet_payload_constants.h new file mode 100644 index 00000000..2cb1bbe0 --- /dev/null +++ b/assets/shaders/inc/meshlet_payload_constants.h @@ -0,0 +1,7 @@ +#ifndef MESHLET_PAYLOAD_CONSTANTS_H_ +#define MESHLET_PAYLOAD_CONSTANTS_H_ + +#define MESHLET_PAYLOAD_MAX_ELEMENTS 256 +#define MESHLET_PAYLOAD_NUM_CHUNKS 8 + +#endif \ No newline at end of file diff --git a/assets/shaders/inc/meshlet_payload_decode.h b/assets/shaders/inc/meshlet_payload_decode.h new file mode 100644 index 00000000..777a9bfa --- /dev/null +++ b/assets/shaders/inc/meshlet_payload_decode.h @@ -0,0 +1,182 @@ +#ifndef MESHLET_PAYLOAD_DECODE_H_ +#define MESHLET_PAYLOAD_DECODE_H_ + +#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require +#extension GL_KHR_shader_subgroup_arithmetic : require +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require +#extension GL_EXT_scalar_block_layout : require + +#include "meshlet_payload_constants.h" + +#ifndef MESHLET_PAYLOAD_NUM_U32_STREAMS +#error "Must define MESHLET_PAYLOAD_NUM_U32_STREAMS before including meshlet_payload_decode.h" +#endif + +#ifndef MESHLET_PAYLOAD_DESCRIPTOR_SET +#error "Must define MESHLET_PAYLOAD_DESCRIPTOR_SET" +#endif + +#ifndef MESHLET_PAYLOAD_META_BINDING +#error "Must define MESHLET_PAYLOAD_META_BINDING" +#endif + +#ifndef MESHLET_PAYLOAD_STREAM_BINDING +#error "Must define MESHLET_PAYLOAD_STREAM_BINDING" +#endif + +#ifndef MESHLET_PAYLOAD_PAYLOAD_BINDING +#error "Must define MESHLET_PAYLOAD_PAYLOAD_BINDING" +#endif + +struct MeshletStream +{ + u16vec4 predictor_a; + u16vec4 predictor_b; + u8vec4 initial_value; + uint offset_from_base; + uint16_t bitplane_meta[MESHLET_PAYLOAD_NUM_CHUNKS]; +}; + +struct MeshletMeta +{ + uint base_vertex_offset; + uint8_t num_primitives_minus_1; + uint8_t num_attributes_minus_1; + uint16_t reserved; +}; + +layout(set = MESHLET_PAYLOAD_DESCRIPTOR_SET, binding = MESHLET_PAYLOAD_META_BINDING, std430) readonly buffer MeshletMetas +{ + MeshletMeta data[]; +} meshlet_metas; + +layout(set = MESHLET_PAYLOAD_DESCRIPTOR_SET, binding = MESHLET_PAYLOAD_STREAM_BINDING, std430) readonly buffer MeshletStreams +{ + MeshletStream data[]; +} meshlet_streams; + +layout(set = MESHLET_PAYLOAD_DESCRIPTOR_SET, binding = MESHLET_PAYLOAD_PAYLOAD_BINDING, std430) readonly buffer Payload +{ + uint data[]; +} payload; + +shared u8vec4 shared_chunk_bit_counts[MESHLET_PAYLOAD_NUM_U32_STREAMS][MESHLET_PAYLOAD_NUM_CHUNKS]; +shared uint shared_chunk_offset[MESHLET_PAYLOAD_NUM_U32_STREAMS][MESHLET_PAYLOAD_NUM_CHUNKS]; +#if MESHLET_PAYLOAD_PACKED_WAVEOPS +shared u8vec4 chunk_values[MESHLET_PAYLOAD_NUM_CHUNKS]; +#else +shared uvec2 chunk_values[MESHLET_PAYLOAD_NUM_CHUNKS]; +#endif + +// Hardcodes wave32 atm. Need fallback. + +uvec2 pack_u16vec4_to_uvec2(u16vec4 v) +{ + return uvec2(pack32(v.xy), pack32(v.zw)); +} + +uint repack_uint(uvec2 v) +{ + u16vec4 v16 = u16vec4(unpack16(v.x), unpack16(v.y)); + return pack32(u8vec4(v16)); +} + +void meshlet_init_workgroup(uint meshlet_index) +{ + int subgroup_lane = int(gl_SubgroupInvocationID); + + for (uint stream_index = gl_SubgroupID; stream_index < MESHLET_PAYLOAD_NUM_U32_STREAMS; stream_index += gl_NumSubgroups) + { + // Start by decoding the offset for bitplanes for all u32 streams. + if (subgroup_lane < int(gl_WorkGroupSize.y)) + { + uint bitplane_value = uint(meshlet_streams.data[stream_index + MESHLET_PAYLOAD_NUM_U32_STREAMS * meshlet_index].bitplane_meta[subgroup_lane]); + u16vec4 bit_counts = (u16vec4(bitplane_value) >> u16vec4(0, 4, 8, 12)) & 0xfus; + u16vec2 bit_counts2 = bit_counts.xy + bit_counts.zw; + uint total_bits = bit_counts2.x + bit_counts2.y; + uint offset = meshlet_streams.data[stream_index + NUM_U32_STREAMS * meshlet_index].offset_from_base; + shared_chunk_offset[stream_index][subgroup_lane] = subgroupExclusiveAdd(total_bits) + offset; + shared_chunk_bit_counts[stream_index][subgroup_lane] = u8vec4(bit_counts); + } + } + + barrier(); +} + +uint meshlet_get_linear_index() +{ + return gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID; +} + +uint meshlet_decode_stream(uint meshlet_index, uint stream_index) +{ + uint unrolled_stream_index = MESHLET_PAYLOAD_NUM_U32_STREAMS * meshlet_index + stream_index; + uint offset_from_base = meshlet_streams.data[unrolled_stream_index].offset_from_base; + u16vec4 predictor_a = meshlet_streams.data[unrolled_stream_index].predictor_a; + u16vec4 predictor_b = meshlet_streams.data[unrolled_stream_index].predictor_b; + u8vec4 initial_value_ = meshlet_streams.data[unrolled_stream_index].initial_value; + uvec2 initial_value = pack_u16vec4_to_uvec2(u16vec4(initial_value_)); + + uint chunk_id = gl_SubgroupID; + int subgroup_lane = int(gl_SubgroupInvocationID); + uint bitplane_offsets = shared_chunk_offset[stream_index][chunk_id]; + ivec4 bit_counts = ivec4(shared_chunk_bit_counts[stream_index][chunk_id]); + + uvec4 decoded = ivec4(0); + + // Overlap load with consumption. + // Helps RDNA2 quite a lot here! + uint value = payload.data[bitplane_offsets]; + + for (int i = 0; i < bit_counts.x; i++) + { + decoded.x |= bitfieldExtract(value, subgroup_lane, 1) << i; + value = payload.data[++bitplane_offsets]; + } + decoded.x = bitfieldExtract(int(decoded.x), 0, bit_counts.x); + + for (int i = 0; i < bit_counts.y; i++) + { + decoded.y |= bitfieldExtract(value, subgroup_lane, 1) << i; + value = payload.data[++bitplane_offsets]; + } + decoded.y = bitfieldExtract(int(decoded.y), 0, bit_counts.y); + + for (int i = 0; i < bit_counts.z; i++) + { + decoded.z |= bitfieldExtract(value, subgroup_lane, 1) << i; + value = payload.data[++bitplane_offsets]; + } + decoded.z = bitfieldExtract(int(decoded.z), 0, bit_counts.z); + + for (int i = 0; i < bit_counts.w; i++) + { + decoded.w |= bitfieldExtract(value, subgroup_lane, 1) << i; + value = payload.data[++bitplane_offsets]; + } + decoded.w = bitfieldExtract(int(decoded.w), 0, bit_counts.w); + + // Resolve deltas in packed 4x8 math. + uvec2 packed_decoded = pack_u16vec4_to_uvec2(u16vec4(decoded)) & 0xff00ffu; + uint linear_index = meshlet_get_linear_index(); + if (linear_index == 0) + packed_decoded += initial_value; + packed_decoded += pack_u16vec4_to_uvec2((predictor_a + predictor_b * uint16_t(linear_index)) >> 8us); + packed_decoded = subgroupInclusiveAdd(packed_decoded); + + if (stream_index > 0) + barrier(); // Resolve WAR hazard from last iteration. + if (subgroup_lane == int(gl_SubgroupSize) - 1) + chunk_values[chunk_id] = packed_decoded & 0xff00ffu; + barrier(); + if (gl_SubgroupID == 0u && subgroup_lane < int(gl_WorkGroupSize.y)) + chunk_values[subgroup_lane] = subgroupInclusiveAdd(chunk_values[subgroup_lane]); + barrier(); + if (chunk_id != 0) + packed_decoded += chunk_values[chunk_id - 1]; + + return repack_uint(packed_decoded); +} + +#endif \ No newline at end of file From 0e361c814a45868e5db081909cdafde333b449d8 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 31 Jul 2023 13:18:00 +0200 Subject: [PATCH 34/71] Use common decoder in the sandbox. --- tests/meshopt_sandbox.cpp | 58 +++------------------------------------ 1 file changed, 4 insertions(+), 54 deletions(-) diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp index 1cd51928..aca7f96b 100644 --- a/tests/meshopt_sandbox.cpp +++ b/tests/meshopt_sandbox.cpp @@ -1,18 +1,14 @@ #include "logging.hpp" -#include #include #include "math.hpp" #include "device.hpp" #include "context.hpp" #include "muglm/muglm_impl.hpp" -#include -#include "bitops.hpp" #include "gltf.hpp" #include "global_managers_init.hpp" #include "meshlet_export.hpp" #include "meshlet.hpp" #include -#include using namespace Granite; static void decode_mesh_setup_buffers( @@ -112,79 +108,33 @@ static void decode_mesh_gpu( const SceneFormats::Meshlet::MeshView &mesh) { decode_mesh_setup_buffers(out_index_buffer, out_u32_stream, mesh); - const uint32_t u32_stride = mesh.format_header->u32_stream_count - 1; Vulkan::BufferCreateInfo buf_info = {}; buf_info.domain = Vulkan::BufferDomain::LinkedDeviceHost; buf_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; - - buf_info.size = mesh.format_header->meshlet_count * sizeof(*mesh.headers); - auto meshlet_meta_buffer = dev.create_buffer(buf_info, mesh.headers); - - buf_info.size = mesh.format_header->meshlet_count * mesh.format_header->u32_stream_count * sizeof(*mesh.streams); - auto meshlet_stream_buffer = dev.create_buffer(buf_info, mesh.streams); - buf_info.size = mesh.format_header->payload_size_words * sizeof(uint32_t); auto payload_buffer = dev.create_buffer(buf_info, mesh.payload); buf_info.size = out_index_buffer.size() * sizeof(uint32_t); - buf_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_TRANSFER_SRC_BIT | - VK_BUFFER_USAGE_TRANSFER_DST_BIT; - buf_info.domain = Vulkan::BufferDomain::Device; - auto decoded_index_buffer = dev.create_buffer(buf_info); + buf_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; buf_info.domain = Vulkan::BufferDomain::CachedHost; auto readback_decoded_index_buffer = dev.create_buffer(buf_info); buf_info.size = out_u32_stream.size() * sizeof(uint32_t); - buf_info.domain = Vulkan::BufferDomain::Device; - auto decoded_u32_buffer = dev.create_buffer(buf_info); buf_info.domain = Vulkan::BufferDomain::CachedHost; auto readback_decoded_u32_buffer = dev.create_buffer(buf_info); - std::vector output_offset_strides; - output_offset_strides.reserve(mesh.format_header->meshlet_count * mesh.format_header->u32_stream_count); - - uint32_t index_count = 0; - for (uint32_t i = 0; i < mesh.format_header->meshlet_count; i++) - { - output_offset_strides.emplace_back(index_count, 0); - index_count += mesh.headers[i].num_primitives_minus_1 + 1; - for (uint32_t j = 1; j < mesh.format_header->u32_stream_count; j++) - output_offset_strides.emplace_back(mesh.headers[i].base_vertex_offset * u32_stride + (j - 1), u32_stride); - } - - buf_info.domain = Vulkan::BufferDomain::LinkedDeviceHost; - buf_info.size = output_offset_strides.size() * sizeof(uvec2); - auto output_offset_strides_buffer = dev.create_buffer(buf_info, output_offset_strides.data()); - bool has_renderdoc = Vulkan::Device::init_renderdoc_capture(); if (has_renderdoc) dev.begin_renderdoc_capture(); auto cmd = dev.request_command_buffer(); - cmd->set_program("builtin://shaders/decode/meshlet_decode.comp"); - cmd->enable_subgroup_size_control(true); - cmd->set_subgroup_size_log2(true, 5, 5); - cmd->set_storage_buffer(0, 0, *meshlet_meta_buffer); - cmd->set_storage_buffer(0, 1, *meshlet_stream_buffer); - cmd->set_storage_buffer(0, 2, *decoded_u32_buffer); - cmd->set_storage_buffer(0, 3, *decoded_index_buffer); - cmd->set_storage_buffer(0, 4, *payload_buffer); - cmd->set_storage_buffer(0, 5, *output_offset_strides_buffer); - cmd->set_specialization_constant_mask(1); - cmd->set_specialization_constant(0, mesh.format_header->u32_stream_count); - cmd->dispatch(mesh.format_header->meshlet_count, 1, 1); - + SceneFormats::Meshlet::decode_mesh(*cmd, *readback_decoded_index_buffer, 0, + *readback_decoded_u32_buffer, 0, + *payload_buffer, 0, mesh); cmd->barrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, - VK_PIPELINE_STAGE_2_COPY_BIT, VK_ACCESS_TRANSFER_READ_BIT); - - cmd->copy_buffer(*readback_decoded_index_buffer, *decoded_index_buffer); - cmd->copy_buffer(*readback_decoded_u32_buffer, *decoded_u32_buffer); - cmd->barrier(VK_PIPELINE_STAGE_2_COPY_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_HOST_BIT, VK_ACCESS_HOST_READ_BIT); dev.submit(cmd); - dev.wait_idle(); if (has_renderdoc) From f36b5c9eb56631ded77bae170517496cf38c308d Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 31 Jul 2023 15:57:54 +0200 Subject: [PATCH 35/71] Start considering NV style mesh shaders. --- assets/shaders/decode/meshlet_decode.comp | 35 ++-- assets/shaders/inc/meshlet_attribute_decode.h | 39 +++++ assets/shaders/inc/meshlet_payload_decode.h | 150 +++++++++++++----- renderer/formats/meshlet.cpp | 3 +- tests/assets/shaders/meshlet_debug.mesh | 70 ++++++++ tests/assets/shaders/meshlet_debug.mesh.frag | 24 +++ tests/assets/shaders/meshlet_debug.vert | 37 +---- tests/meshlet_viewer.cpp | 36 +++-- 8 files changed, 287 insertions(+), 107 deletions(-) create mode 100644 assets/shaders/inc/meshlet_attribute_decode.h create mode 100644 tests/assets/shaders/meshlet_debug.mesh create mode 100644 tests/assets/shaders/meshlet_debug.mesh.frag diff --git a/assets/shaders/decode/meshlet_decode.comp b/assets/shaders/decode/meshlet_decode.comp index 870c3dbf..3758c86f 100644 --- a/assets/shaders/decode/meshlet_decode.comp +++ b/assets/shaders/decode/meshlet_decode.comp @@ -2,7 +2,7 @@ #extension GL_EXT_scalar_block_layout : require #include "../inc/meshlet_payload_constants.h" -layout(local_size_x = 32, local_size_y = MESHLET_PAYLOAD_NUM_CHUNKS) in; +layout(local_size_x = 32, local_size_y_id = 1) in; layout(constant_id = 0) const uint NUM_U32_STREAMS = 1; #define MESHLET_PAYLOAD_NUM_U32_STREAMS NUM_U32_STREAMS #define MESHLET_PAYLOAD_DESCRIPTOR_SET 0 @@ -31,26 +31,23 @@ void main() uint meshlet_index = gl_WorkGroupID.x; meshlet_init_workgroup(meshlet_index); MeshletMeta meta = meshlet_metas.data[meshlet_index]; + uint linear_index = meshlet_get_linear_index(); - for (uint i = 0; i < NUM_U32_STREAMS; i++) { - uint packed_decoded = meshlet_decode_stream(meshlet_index, i); - uint linear_index = meshlet_get_linear_index(); + uint packed_indices = meshlet_decode_stream_32_wg256(meshlet_index, 0); + // Write index buffer. + uvec3 indices = uvec4(unpack8(packed_indices)).xyz; + indices += meta.base_vertex_offset; + uint output_offset = output_offset_strides.data[meshlet_index * NUM_U32_STREAMS].x; + if (linear_index <= uint(meta.num_primitives_minus_1)) + output_indices.data[output_offset + linear_index] = indices; + } - if (i == 0) - { - // Write index buffer. - uvec3 indices = uvec4(unpack8(packed_decoded)).xyz; - indices += meta.base_vertex_offset; - uint output_offset = output_offset_strides.data[meshlet_index * NUM_U32_STREAMS + i].x; - if (linear_index <= uint(meta.num_primitives_minus_1)) - output_indices.data[output_offset + linear_index] = indices; - } - else - { - uvec2 output_offset_stride = output_offset_strides.data[meshlet_index * NUM_U32_STREAMS + i]; - if (linear_index <= uint(meta.num_attributes_minus_1)) - output_payload.data[output_offset_stride.x + linear_index * output_offset_stride.y] = packed_decoded; - } + for (uint i = 1; i < NUM_U32_STREAMS; i++) + { + uint packed_decoded = meshlet_decode_stream_32_wg256(meshlet_index, i); + uvec2 output_offset_stride0 = output_offset_strides.data[meshlet_index * NUM_U32_STREAMS + i]; + if (linear_index <= uint(meta.num_attributes_minus_1)) + output_payload.data[output_offset_stride0.x + linear_index * output_offset_stride0.y] = packed_decoded; } } diff --git a/assets/shaders/inc/meshlet_attribute_decode.h b/assets/shaders/inc/meshlet_attribute_decode.h new file mode 100644 index 00000000..51a05bc0 --- /dev/null +++ b/assets/shaders/inc/meshlet_attribute_decode.h @@ -0,0 +1,39 @@ +#ifndef MESHLET_ATTRIBUTE_DECODE_H_ +#define MESHLET_ATTRIBUTE_DECODE_H_ + +vec3 attribute_decode_snorm_exp_position(uvec2 payload) +{ + ivec3 sint_value = ivec3( + bitfieldExtract(int(payload.x), 0, 16), + bitfieldExtract(int(payload.x), 16, 16), + bitfieldExtract(int(payload.y), 0, 16)); + int exp = bitfieldExtract(int(payload.y), 16, 16); + return vec3( + ldexp(float(sint_value.x), exp), + ldexp(float(sint_value.y), exp), + ldexp(float(sint_value.z), exp)); +} + +vec2 attribute_decode_snorm_exp_uv(uvec2 payload) +{ + ivec2 sint_value = ivec2( + bitfieldExtract(int(payload.x), 0, 16), + bitfieldExtract(int(payload.x), 16, 16)); + int exp = bitfieldExtract(int(payload.y), 0, 16); + return vec2( + ldexp(float(sint_value.x), exp), + ldexp(float(sint_value.y), exp)) + 0.5; +} + +// Adapted from: https://knarkowicz.wordpress.com/2014/04/16/octahedron-normal-vector-encoding/ +// https://twitter.com/Stubbesaurus/status/9379947905532272640 +mediump vec4 attribute_decode_oct8_normal_tangent(uint payload) +{ + mediump vec4 f = unpackSnorm4x8(payload); + mediump vec3 n = vec3(f.x, f.y, 1.0 - abs(f.x) - abs(f.y)); + mediump float t = max(-n.z, 0.0); + n.xy += mix(vec2(t), vec2(-t), greaterThanEqual(n.xy, vec2(0.0))); + return vec4(normalize(n), f.w != 0.0 ? -1.0 : 1.0); +} + +#endif \ No newline at end of file diff --git a/assets/shaders/inc/meshlet_payload_decode.h b/assets/shaders/inc/meshlet_payload_decode.h index 777a9bfa..c002023d 100644 --- a/assets/shaders/inc/meshlet_payload_decode.h +++ b/assets/shaders/inc/meshlet_payload_decode.h @@ -63,11 +63,8 @@ layout(set = MESHLET_PAYLOAD_DESCRIPTOR_SET, binding = MESHLET_PAYLOAD_PAYLOAD_B shared u8vec4 shared_chunk_bit_counts[MESHLET_PAYLOAD_NUM_U32_STREAMS][MESHLET_PAYLOAD_NUM_CHUNKS]; shared uint shared_chunk_offset[MESHLET_PAYLOAD_NUM_U32_STREAMS][MESHLET_PAYLOAD_NUM_CHUNKS]; -#if MESHLET_PAYLOAD_PACKED_WAVEOPS -shared u8vec4 chunk_values[MESHLET_PAYLOAD_NUM_CHUNKS]; -#else -shared uvec2 chunk_values[MESHLET_PAYLOAD_NUM_CHUNKS]; -#endif +shared uvec2 chunk_values0[MESHLET_PAYLOAD_NUM_CHUNKS]; +shared uvec2 chunk_values1[MESHLET_PAYLOAD_NUM_CHUNKS]; // Hardcodes wave32 atm. Need fallback. @@ -82,6 +79,14 @@ uint repack_uint(uvec2 v) return pack32(u8vec4(v16)); } +void meshlet_barrier() +{ + if (gl_WorkGroupSize.y == 1) + subgroupBarrier(); + else + barrier(); +} + void meshlet_init_workgroup(uint meshlet_index) { int subgroup_lane = int(gl_SubgroupInvocationID); @@ -101,24 +106,34 @@ void meshlet_init_workgroup(uint meshlet_index) } } - barrier(); + meshlet_barrier(); } uint meshlet_get_linear_index() { - return gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID; + // Rely on SubgroupInvocationID == LocalInvocationID.x here. + return gl_WorkGroupSize.x * gl_LocalInvocationID.y + gl_SubgroupInvocationID; } -uint meshlet_decode_stream(uint meshlet_index, uint stream_index) +#define MESHLET_FETCH_BITPLANES(decoded_value, counts, payload_value, offset) \ + for (int i = 0; i < counts; i++) \ + { \ + decoded_value |= bitfieldExtract(payload_value, subgroup_lane, 1) << i; \ + payload_value = payload.data[++offset]; \ + } \ + decoded_value = bitfieldExtract(int(decoded_value), 0, counts) + +// Add some specialized variants. + +uint meshlet_decode_stream_32_wg256(uint meshlet_index, uint stream_index) { uint unrolled_stream_index = MESHLET_PAYLOAD_NUM_U32_STREAMS * meshlet_index + stream_index; - uint offset_from_base = meshlet_streams.data[unrolled_stream_index].offset_from_base; u16vec4 predictor_a = meshlet_streams.data[unrolled_stream_index].predictor_a; u16vec4 predictor_b = meshlet_streams.data[unrolled_stream_index].predictor_b; u8vec4 initial_value_ = meshlet_streams.data[unrolled_stream_index].initial_value; uvec2 initial_value = pack_u16vec4_to_uvec2(u16vec4(initial_value_)); - uint chunk_id = gl_SubgroupID; + uint chunk_id = gl_LocalInvocationID.y; int subgroup_lane = int(gl_SubgroupInvocationID); uint bitplane_offsets = shared_chunk_offset[stream_index][chunk_id]; ivec4 bit_counts = ivec4(shared_chunk_bit_counts[stream_index][chunk_id]); @@ -128,34 +143,10 @@ uint meshlet_decode_stream(uint meshlet_index, uint stream_index) // Overlap load with consumption. // Helps RDNA2 quite a lot here! uint value = payload.data[bitplane_offsets]; - - for (int i = 0; i < bit_counts.x; i++) - { - decoded.x |= bitfieldExtract(value, subgroup_lane, 1) << i; - value = payload.data[++bitplane_offsets]; - } - decoded.x = bitfieldExtract(int(decoded.x), 0, bit_counts.x); - - for (int i = 0; i < bit_counts.y; i++) - { - decoded.y |= bitfieldExtract(value, subgroup_lane, 1) << i; - value = payload.data[++bitplane_offsets]; - } - decoded.y = bitfieldExtract(int(decoded.y), 0, bit_counts.y); - - for (int i = 0; i < bit_counts.z; i++) - { - decoded.z |= bitfieldExtract(value, subgroup_lane, 1) << i; - value = payload.data[++bitplane_offsets]; - } - decoded.z = bitfieldExtract(int(decoded.z), 0, bit_counts.z); - - for (int i = 0; i < bit_counts.w; i++) - { - decoded.w |= bitfieldExtract(value, subgroup_lane, 1) << i; - value = payload.data[++bitplane_offsets]; - } - decoded.w = bitfieldExtract(int(decoded.w), 0, bit_counts.w); + MESHLET_FETCH_BITPLANES(decoded.x, bit_counts.x, value, bitplane_offsets); + MESHLET_FETCH_BITPLANES(decoded.y, bit_counts.y, value, bitplane_offsets); + MESHLET_FETCH_BITPLANES(decoded.z, bit_counts.z, value, bitplane_offsets); + MESHLET_FETCH_BITPLANES(decoded.w, bit_counts.w, value, bitplane_offsets); // Resolve deltas in packed 4x8 math. uvec2 packed_decoded = pack_u16vec4_to_uvec2(u16vec4(decoded)) & 0xff00ffu; @@ -165,18 +156,91 @@ uint meshlet_decode_stream(uint meshlet_index, uint stream_index) packed_decoded += pack_u16vec4_to_uvec2((predictor_a + predictor_b * uint16_t(linear_index)) >> 8us); packed_decoded = subgroupInclusiveAdd(packed_decoded); - if (stream_index > 0) - barrier(); // Resolve WAR hazard from last iteration. + barrier(); // Resolve WAR hazard from last iteration. if (subgroup_lane == int(gl_SubgroupSize) - 1) - chunk_values[chunk_id] = packed_decoded & 0xff00ffu; + chunk_values0[chunk_id] = packed_decoded & 0xff00ffu; barrier(); if (gl_SubgroupID == 0u && subgroup_lane < int(gl_WorkGroupSize.y)) - chunk_values[subgroup_lane] = subgroupInclusiveAdd(chunk_values[subgroup_lane]); + chunk_values0[subgroup_lane] = subgroupInclusiveAdd(chunk_values0[subgroup_lane]); barrier(); if (chunk_id != 0) - packed_decoded += chunk_values[chunk_id - 1]; + packed_decoded += chunk_values0[chunk_id - 1]; return repack_uint(packed_decoded); } +uvec2 meshlet_decode_stream_64_wg256(uint meshlet_index, uint stream_index) +{ + // Dual-pump the computation. VGPR use is quite low either way, so this is fine. + uint unrolled_stream_index = MESHLET_PAYLOAD_NUM_U32_STREAMS * meshlet_index + stream_index; + u8vec4 initial_value_; + + uint chunk_id = gl_LocalInvocationID.y; + int subgroup_lane = int(gl_SubgroupInvocationID); + + u16vec4 predictor_a0 = meshlet_streams.data[unrolled_stream_index].predictor_a; + u16vec4 predictor_b0 = meshlet_streams.data[unrolled_stream_index].predictor_b; + initial_value_ = meshlet_streams.data[unrolled_stream_index].initial_value; + uvec2 initial_value0 = pack_u16vec4_to_uvec2(u16vec4(initial_value_)); + uint bitplane_offsets0 = shared_chunk_offset[stream_index][chunk_id]; + ivec4 bit_counts0 = ivec4(shared_chunk_bit_counts[stream_index][chunk_id]); + uvec4 decoded0 = ivec4(0); + + u16vec4 predictor_a1 = meshlet_streams.data[unrolled_stream_index + 1].predictor_a; + u16vec4 predictor_b1 = meshlet_streams.data[unrolled_stream_index + 1].predictor_b; + initial_value_ = meshlet_streams.data[unrolled_stream_index + 1].initial_value; + uvec2 initial_value1 = pack_u16vec4_to_uvec2(u16vec4(initial_value_)); + uint bitplane_offsets1 = shared_chunk_offset[stream_index + 1][chunk_id]; + ivec4 bit_counts1 = ivec4(shared_chunk_bit_counts[stream_index + 1][chunk_id]); + uvec4 decoded1 = ivec4(0); + + // Overlap load with consumption. + // Helps RDNA2 quite a lot here! + uint value0 = payload.data[bitplane_offsets0]; + uint value1 = payload.data[bitplane_offsets1]; + MESHLET_FETCH_BITPLANES(decoded0.x, bit_counts0.x, value0, bitplane_offsets0); + MESHLET_FETCH_BITPLANES(decoded0.y, bit_counts0.y, value0, bitplane_offsets0); + MESHLET_FETCH_BITPLANES(decoded0.z, bit_counts0.z, value0, bitplane_offsets0); + MESHLET_FETCH_BITPLANES(decoded0.w, bit_counts0.w, value0, bitplane_offsets0); + MESHLET_FETCH_BITPLANES(decoded1.x, bit_counts1.x, value1, bitplane_offsets1); + MESHLET_FETCH_BITPLANES(decoded1.y, bit_counts1.y, value1, bitplane_offsets1); + MESHLET_FETCH_BITPLANES(decoded1.z, bit_counts1.z, value1, bitplane_offsets1); + MESHLET_FETCH_BITPLANES(decoded1.w, bit_counts1.w, value1, bitplane_offsets1); + + // Resolve deltas in packed 4x8 math. + uvec2 packed_decoded0 = pack_u16vec4_to_uvec2(u16vec4(decoded0)) & 0xff00ffu; + uvec2 packed_decoded1 = pack_u16vec4_to_uvec2(u16vec4(decoded1)) & 0xff00ffu; + uint linear_index = meshlet_get_linear_index(); + if (linear_index == 0) + { + packed_decoded0 += initial_value0; + packed_decoded1 += initial_value1; + } + + packed_decoded0 += pack_u16vec4_to_uvec2((predictor_a0 + predictor_b0 * uint16_t(linear_index)) >> 8us); + packed_decoded0 = subgroupInclusiveAdd(packed_decoded0); + packed_decoded1 += pack_u16vec4_to_uvec2((predictor_a1 + predictor_b1 * uint16_t(linear_index)) >> 8us); + packed_decoded1 = subgroupInclusiveAdd(packed_decoded1); + + barrier(); // Resolve WAR hazard from last iteration. + if (subgroup_lane == int(gl_SubgroupSize) - 1) + { + chunk_values0[chunk_id] = packed_decoded0 & 0xff00ffu; + chunk_values1[chunk_id] = packed_decoded1 & 0xff00ffu; + } + barrier(); + if (gl_SubgroupID == 0u && subgroup_lane < int(gl_WorkGroupSize.y)) + chunk_values0[subgroup_lane] = subgroupInclusiveAdd(chunk_values0[subgroup_lane]); + else if (gl_SubgroupID == 1u && subgroup_lane < int(gl_WorkGroupSize.y)) + chunk_values1[subgroup_lane] = subgroupInclusiveAdd(chunk_values1[subgroup_lane]); + barrier(); + if (chunk_id != 0) + { + packed_decoded0 += chunk_values0[chunk_id - 1]; + packed_decoded1 += chunk_values1[chunk_id - 1]; + } + + return uvec2(repack_uint(packed_decoded0), repack_uint(packed_decoded1)); +} + #endif \ No newline at end of file diff --git a/renderer/formats/meshlet.cpp b/renderer/formats/meshlet.cpp index a91cbef7..71e22f2d 100644 --- a/renderer/formats/meshlet.cpp +++ b/renderer/formats/meshlet.cpp @@ -137,8 +137,9 @@ bool decode_mesh(Vulkan::CommandBuffer &cmd, cmd.set_storage_buffer(0, 3, ibo, ibo_offset, view.total_primitives * 3 * sizeof(uint32_t)); cmd.set_storage_buffer(0, 4, payload, payload_offset, view.format_header->payload_size_words * sizeof(uint32_t)); cmd.set_storage_buffer(0, 5, *output_offset_strides_buffer); - cmd.set_specialization_constant_mask(1); + cmd.set_specialization_constant_mask(3); cmd.set_specialization_constant(0, view.format_header->u32_stream_count); + cmd.set_specialization_constant(1, Meshlet::MaxElements / 32); // TODO: Split dispatches for big chungus meshes. // (Starts to become a problem around 8-16 million primitives per dispatch). diff --git a/tests/assets/shaders/meshlet_debug.mesh b/tests/assets/shaders/meshlet_debug.mesh new file mode 100644 index 00000000..40513942 --- /dev/null +++ b/tests/assets/shaders/meshlet_debug.mesh @@ -0,0 +1,70 @@ +#version 450 +#extension GL_EXT_mesh_shader : require + +layout(max_primitives = 256, max_vertices = 255, triangles) out; + +#include "meshlet_payload_constants.h" +layout(local_size_x = 32, local_size_y = MESHLET_PAYLOAD_NUM_CHUNKS) in; +layout(constant_id = 0) const uint NUM_U32_STREAMS = 1; +#define MESHLET_PAYLOAD_NUM_U32_STREAMS NUM_U32_STREAMS +#define MESHLET_PAYLOAD_DESCRIPTOR_SET 0 +#define MESHLET_PAYLOAD_META_BINDING 0 +#define MESHLET_PAYLOAD_STREAM_BINDING 1 +#define MESHLET_PAYLOAD_PAYLOAD_BINDING 2 +#include "meshlet_payload_decode.h" +#include "meshlet_attribute_decode.h" + +layout(location = 0) perprimitiveEXT out uint vMeshletIndex[]; +layout(location = 1) out mediump vec3 vNormal[]; +layout(location = 2) out mediump vec4 vTangent[]; +layout(location = 3) out vec2 vUV[]; + +layout(set = 1, binding = 0) uniform UBO +{ + mat4 VP; +}; + +void main() +{ + uint meshlet_index = gl_WorkGroupID.x; + meshlet_init_workgroup(meshlet_index); + MeshletMeta meta = meshlet_metas.data[meshlet_index]; + + meshlet_init_workgroup(meshlet_index); + uint linear_index = meshlet_get_linear_index(); + uint packed_indices = meshlet_decode_stream(meshlet_index, 0); + + SetMeshOutputsEXT(meta.num_attributes_minus_1 + 1, meta.num_primitives_minus_1 + 1); + + // Mildly questionable reliance on LocalInvocationIndex mapping well to SubgroupInvocationID here. + // There is basically no way this will not work in practice however ... + // We have full subgroups and workgroup size X == SubgroupSize. + // Using LocalInvocationIndex specifically is important for AMD perf. + + if (linear_index <= meta.num_primitives_minus_1) + { + gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec4(unpack8(packed_indices)).xyz; + vMeshletIndex[gl_LocalInvocationIndex] = meshlet_index; + } + + uint pos0 = meshlet_decode_stream(meshlet_index, 1); + uint pos1 = meshlet_decode_stream(meshlet_index, 2); + if (linear_index <= meta.num_attributes_minus_1) + { + vec3 pos = attribute_decode_snorm_exp_position(uvec2(pos0, pos1)); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = VP * vec4(pos, 1.0); + } + + uint n = meshlet_decode_stream(meshlet_index, 3); + if (linear_index <= meta.num_attributes_minus_1) + vNormal[gl_LocalInvocationIndex] = attribute_decode_oct8_normal_tangent(n).xyz; + + uint t = meshlet_decode_stream(meshlet_index, 4); + if (linear_index <= meta.num_attributes_minus_1) + vTangent[gl_LocalInvocationIndex] = attribute_decode_oct8_normal_tangent(t); + + uint uv0 = meshlet_decode_stream(meshlet_index, 5); + uint uv1 = meshlet_decode_stream(meshlet_index, 6); + if (linear_index <= meta.num_attributes_minus_1) + vUV[gl_LocalInvocationIndex] = attribute_decode_snorm_exp_uv(uvec2(uv0, uv1)); +} \ No newline at end of file diff --git a/tests/assets/shaders/meshlet_debug.mesh.frag b/tests/assets/shaders/meshlet_debug.mesh.frag new file mode 100644 index 00000000..0a29a26f --- /dev/null +++ b/tests/assets/shaders/meshlet_debug.mesh.frag @@ -0,0 +1,24 @@ +#version 450 +#extension GL_EXT_mesh_shader : require + +layout(location = 0) perprimitiveEXT in flat uint vMeshletIndex; +layout(location = 1) in mediump vec3 vNormal; +layout(location = 2) in mediump vec4 vTangent; +layout(location = 3) in vec2 vUV; + +layout(location = 0) out vec4 FragColor; + +vec3 decode_mesh_color() +{ + uint index = vMeshletIndex * 1991u; + index ^= (index >> 5u); + uint r = bitfieldExtract(index, 0, 2); + uint g = bitfieldExtract(index, 2, 2); + uint b = bitfieldExtract(index, 4, 2); + return vec3(r, g, b) / 3.0; +} + +void main() +{ + FragColor = vec4(decode_mesh_color() * (vNormal.xyz * 0.5 + 0.5), 1.0); +} \ No newline at end of file diff --git a/tests/assets/shaders/meshlet_debug.vert b/tests/assets/shaders/meshlet_debug.vert index 1d7349fb..de688f31 100644 --- a/tests/assets/shaders/meshlet_debug.vert +++ b/tests/assets/shaders/meshlet_debug.vert @@ -6,46 +6,13 @@ layout(location = 0) out mediump vec3 vNormal; layout(location = 1) out mediump vec4 vTangent; layout(location = 2) out vec2 vUV; +#include "meshlet_attribute_decode.h" + layout(set = 0, binding = 0) uniform UBO { mat4 VP; }; -vec3 attribute_decode_snorm_exp_position(uvec2 payload) -{ - ivec3 sint_value = ivec3( - bitfieldExtract(int(payload.x), 0, 16), - bitfieldExtract(int(payload.x), 16, 16), - bitfieldExtract(int(payload.y), 0, 16)); - int exp = bitfieldExtract(int(payload.y), 16, 16); - return vec3( - ldexp(float(sint_value.x), exp), - ldexp(float(sint_value.y), exp), - ldexp(float(sint_value.z), exp)); -} - -vec2 attribute_decode_snorm_exp_uv(uvec2 payload) -{ - ivec2 sint_value = ivec2( - bitfieldExtract(int(payload.x), 0, 16), - bitfieldExtract(int(payload.x), 16, 16)); - int exp = bitfieldExtract(int(payload.y), 0, 16); - return vec2( - ldexp(float(sint_value.x), exp), - ldexp(float(sint_value.y), exp)) + 0.5; -} - -// Adapted from: https://knarkowicz.wordpress.com/2014/04/16/octahedron-normal-vector-encoding/ -// https://twitter.com/Stubbesaurus/status/9379947905532272640 -mediump vec4 attribute_decode_oct8_normal_tangent(uint payload) -{ - mediump vec4 f = unpackSnorm4x8(payload); - mediump vec3 n = vec3(f.x, f.y, 1.0 - abs(f.x) - abs(f.y)); - mediump float t = max(-n.z, 0.0); - n.xy += mix(vec2(t), vec2(-t), greaterThanEqual(n.xy, vec2(0.0))); - return vec4(normalize(n), f.w != 0.0 ? -1.0 : 1.0); -} - void main() { vec3 pos = attribute_decode_snorm_exp_position(ATTR0.xy); diff --git a/tests/meshlet_viewer.cpp b/tests/meshlet_viewer.cpp index caa98e3a..3fc4b467 100644 --- a/tests/meshlet_viewer.cpp +++ b/tests/meshlet_viewer.cpp @@ -58,11 +58,15 @@ struct MeshletViewerApplication : Granite::Application, Granite::EventHandler Vulkan::BufferHandle ibo; Vulkan::BufferHandle vbo; Vulkan::BufferHandle payload; + Vulkan::BufferHandle meshlet_meta_buffer; + Vulkan::BufferHandle meshlet_stream_buffer; AABB aabb; FPSCamera camera; void on_device_create(const DeviceCreatedEvent &e) { + e.get_device().get_shader_manager().add_include_directory("builtin://shaders/inc"); + auto view = SceneFormats::Meshlet::create_mesh_view(*mapping); if (!view.format_header) throw std::runtime_error("Failed to load meshlet."); @@ -106,8 +110,18 @@ struct MeshletViewerApplication : Granite::Application, Granite::EventHandler camera.set_depth_range(0.1f, 200.0f); camera.set_fovy(0.4f * pi()); - camera.look_at(aabb.get_center() + vec3(0.1f, 0.2f, 1.1f) * aabb.get_radius(), + camera.look_at(aabb.get_center() + vec3(0.1f, 0.2f, 2.1f) * aabb.get_radius(), aabb.get_center(), vec3(0.0f, 1.0f, 0.0f)); + + Vulkan::BufferCreateInfo buf_info = {}; + buf_info.domain = Vulkan::BufferDomain::LinkedDeviceHost; + buf_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + + buf_info.size = view.format_header->meshlet_count * sizeof(*view.headers); + meshlet_meta_buffer = e.get_device().create_buffer(buf_info, view.headers); + + buf_info.size = view.format_header->meshlet_count * view.format_header->u32_stream_count * sizeof(*view.streams); + meshlet_stream_buffer = e.get_device().create_buffer(buf_info, view.streams); } void on_device_destroy(const DeviceCreatedEvent &) @@ -115,6 +129,8 @@ struct MeshletViewerApplication : Granite::Application, Granite::EventHandler ibo.reset(); vbo.reset(); payload.reset(); + meshlet_meta_buffer.reset(); + meshlet_stream_buffer.reset(); } void render_frame(double, double) override @@ -126,19 +142,21 @@ struct MeshletViewerApplication : Granite::Application, Granite::EventHandler cmd->begin_render_pass(device.get_swapchain_render_pass(SwapchainRenderPass::Depth)); camera.set_aspect(cmd->get_viewport().width / cmd->get_viewport().height); - cmd->set_program("assets://shaders/meshlet_debug.vert", - "assets://shaders/meshlet_debug.frag"); + cmd->set_program("", "assets://shaders/meshlet_debug.mesh", + "assets://shaders/meshlet_debug.mesh.frag"); cmd->set_opaque_state(); auto vp = camera.get_projection() * camera.get_view(); - *cmd->allocate_typed_constant_data(0, 0, 1) = vp; + *cmd->allocate_typed_constant_data(1, 0, 1) = vp; + + cmd->set_storage_buffer(0, 0, *meshlet_meta_buffer); + cmd->set_storage_buffer(0, 1, *meshlet_stream_buffer); + cmd->set_storage_buffer(0, 2, *payload); - cmd->set_index_buffer(*ibo, 0, VK_INDEX_TYPE_UINT32); - cmd->set_vertex_binding(0, *vbo, 0, 6 * sizeof(uint32_t)); - cmd->set_vertex_attrib(0, 0, VK_FORMAT_R32G32B32A32_UINT, 0); - cmd->set_vertex_attrib(1, 0, VK_FORMAT_R32G32_UINT, 4 * sizeof(uint32_t)); + cmd->enable_subgroup_size_control(true, VK_SHADER_STAGE_MESH_BIT_EXT); + cmd->set_subgroup_size_log2(true, 5, 5, VK_SHADER_STAGE_MESH_BIT_EXT); + cmd->draw_mesh_tasks(meshlet_meta_buffer->get_create_info().size / sizeof(SceneFormats::Meshlet::Header), 1, 1); - cmd->draw_indexed(ibo->get_create_info().size / sizeof(uint32_t), 1); cmd->end_render_pass(); device.submit(cmd); } From acdea38eb778cdae135e12a01e92eddcde27a19f Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Tue, 1 Aug 2023 13:35:02 +0200 Subject: [PATCH 36/71] Refactor out some common decode logic. --- assets/shaders/inc/meshlet_payload_decode.h | 162 +++++++++----------- renderer/formats/meshlet.cpp | 2 +- 2 files changed, 74 insertions(+), 90 deletions(-) diff --git a/assets/shaders/inc/meshlet_payload_decode.h b/assets/shaders/inc/meshlet_payload_decode.h index c002023d..3422b948 100644 --- a/assets/shaders/inc/meshlet_payload_decode.h +++ b/assets/shaders/inc/meshlet_payload_decode.h @@ -13,6 +13,10 @@ #error "Must define MESHLET_PAYLOAD_NUM_U32_STREAMS before including meshlet_payload_decode.h" #endif +#ifndef MESHLET_PAYLOAD_LARGE_WORKGROUP +#error "Must define MESHLET_PAYLOAD_LARGE_WORKGROUP" +#endif + #ifndef MESHLET_PAYLOAD_DESCRIPTOR_SET #error "Must define MESHLET_PAYLOAD_DESCRIPTOR_SET" #endif @@ -63,8 +67,10 @@ layout(set = MESHLET_PAYLOAD_DESCRIPTOR_SET, binding = MESHLET_PAYLOAD_PAYLOAD_B shared u8vec4 shared_chunk_bit_counts[MESHLET_PAYLOAD_NUM_U32_STREAMS][MESHLET_PAYLOAD_NUM_CHUNKS]; shared uint shared_chunk_offset[MESHLET_PAYLOAD_NUM_U32_STREAMS][MESHLET_PAYLOAD_NUM_CHUNKS]; +#if MESHLET_PAYLOAD_LARGE_WORKGROUP shared uvec2 chunk_values0[MESHLET_PAYLOAD_NUM_CHUNKS]; shared uvec2 chunk_values1[MESHLET_PAYLOAD_NUM_CHUNKS]; +#endif // Hardcodes wave32 atm. Need fallback. @@ -81,10 +87,11 @@ uint repack_uint(uvec2 v) void meshlet_barrier() { - if (gl_WorkGroupSize.y == 1) - subgroupBarrier(); - else - barrier(); +#if MESHLET_PAYLOAD_LARGE_WORKGROUP + barrier(); +#else + subgroupBarrier(); +#endif } void meshlet_init_workgroup(uint meshlet_index) @@ -94,13 +101,13 @@ void meshlet_init_workgroup(uint meshlet_index) for (uint stream_index = gl_SubgroupID; stream_index < MESHLET_PAYLOAD_NUM_U32_STREAMS; stream_index += gl_NumSubgroups) { // Start by decoding the offset for bitplanes for all u32 streams. - if (subgroup_lane < int(gl_WorkGroupSize.y)) + if (subgroup_lane < MESHLET_PAYLOAD_NUM_CHUNKS) { uint bitplane_value = uint(meshlet_streams.data[stream_index + MESHLET_PAYLOAD_NUM_U32_STREAMS * meshlet_index].bitplane_meta[subgroup_lane]); u16vec4 bit_counts = (u16vec4(bitplane_value) >> u16vec4(0, 4, 8, 12)) & 0xfus; u16vec2 bit_counts2 = bit_counts.xy + bit_counts.zw; uint total_bits = bit_counts2.x + bit_counts2.y; - uint offset = meshlet_streams.data[stream_index + NUM_U32_STREAMS * meshlet_index].offset_from_base; + uint offset = meshlet_streams.data[stream_index + MESHLET_PAYLOAD_NUM_U32_STREAMS * meshlet_index].offset_from_base; shared_chunk_offset[stream_index][subgroup_lane] = subgroupExclusiveAdd(total_bits) + offset; shared_chunk_bit_counts[stream_index][subgroup_lane] = u8vec4(bit_counts); } @@ -111,128 +118,93 @@ void meshlet_init_workgroup(uint meshlet_index) uint meshlet_get_linear_index() { +#if MESHLET_PAYLOAD_LARGE_WORKGROUP // Rely on SubgroupInvocationID == LocalInvocationID.x here. return gl_WorkGroupSize.x * gl_LocalInvocationID.y + gl_SubgroupInvocationID; +#else + return gl_SubgroupInvocationID; +#endif } +// Overlap load with consumption. +// Helps RDNA2 quite a lot here! #define MESHLET_FETCH_BITPLANES(decoded_value, counts, payload_value, offset) \ for (int i = 0; i < counts; i++) \ { \ - decoded_value |= bitfieldExtract(payload_value, subgroup_lane, 1) << i; \ + decoded_value |= bitfieldExtract(payload_value, int(gl_SubgroupInvocationID), 1) << i; \ payload_value = payload.data[++offset]; \ } \ decoded_value = bitfieldExtract(int(decoded_value), 0, counts) // Add some specialized variants. +#define MESHLET_PAYLOAD_DECL_STREAM(unrolled_stream_index, iter) \ + u16vec4 predictor_a##iter = meshlet_streams.data[unrolled_stream_index].predictor_a; \ + u16vec4 predictor_b##iter = meshlet_streams.data[unrolled_stream_index].predictor_b; \ + u8vec4 initial_value_##iter = meshlet_streams.data[unrolled_stream_index].initial_value; \ + uvec2 initial_value##iter = pack_u16vec4_to_uvec2(u16vec4(initial_value_##iter)); \ + uvec4 decoded##iter = ivec4(0) + +#define MESHLET_PAYLOAD_PROCESS_CHUNK(stream_index, chunk_id, iter) \ + uint bitplane_offsets##iter = shared_chunk_offset[stream_index][chunk_id]; \ + ivec4 bit_counts##iter = ivec4(shared_chunk_bit_counts[stream_index][chunk_id]); \ + uint value##iter = payload.data[bitplane_offsets##iter]; \ + MESHLET_FETCH_BITPLANES(decoded##iter.x, bit_counts##iter.x, value##iter, bitplane_offsets##iter); \ + MESHLET_FETCH_BITPLANES(decoded##iter.y, bit_counts##iter.y, value##iter, bitplane_offsets##iter); \ + MESHLET_FETCH_BITPLANES(decoded##iter.z, bit_counts##iter.z, value##iter, bitplane_offsets##iter); \ + MESHLET_FETCH_BITPLANES(decoded##iter.w, bit_counts##iter.w, value##iter, bitplane_offsets##iter); \ + uvec2 packed_decoded##iter = pack_u16vec4_to_uvec2(u16vec4(decoded##iter)) & 0xff00ffu; \ + if (linear_index == 0) \ + packed_decoded##iter += initial_value##iter; \ + packed_decoded##iter += pack_u16vec4_to_uvec2((predictor_a##iter + predictor_b##iter * uint16_t(linear_index)) >> 8us); \ + packed_decoded##iter = subgroupInclusiveAdd(packed_decoded##iter) + +#if MESHLET_PAYLOAD_LARGE_WORKGROUP uint meshlet_decode_stream_32_wg256(uint meshlet_index, uint stream_index) { uint unrolled_stream_index = MESHLET_PAYLOAD_NUM_U32_STREAMS * meshlet_index + stream_index; - u16vec4 predictor_a = meshlet_streams.data[unrolled_stream_index].predictor_a; - u16vec4 predictor_b = meshlet_streams.data[unrolled_stream_index].predictor_b; - u8vec4 initial_value_ = meshlet_streams.data[unrolled_stream_index].initial_value; - uvec2 initial_value = pack_u16vec4_to_uvec2(u16vec4(initial_value_)); - + uint linear_index = meshlet_get_linear_index(); uint chunk_id = gl_LocalInvocationID.y; - int subgroup_lane = int(gl_SubgroupInvocationID); - uint bitplane_offsets = shared_chunk_offset[stream_index][chunk_id]; - ivec4 bit_counts = ivec4(shared_chunk_bit_counts[stream_index][chunk_id]); - - uvec4 decoded = ivec4(0); - // Overlap load with consumption. - // Helps RDNA2 quite a lot here! - uint value = payload.data[bitplane_offsets]; - MESHLET_FETCH_BITPLANES(decoded.x, bit_counts.x, value, bitplane_offsets); - MESHLET_FETCH_BITPLANES(decoded.y, bit_counts.y, value, bitplane_offsets); - MESHLET_FETCH_BITPLANES(decoded.z, bit_counts.z, value, bitplane_offsets); - MESHLET_FETCH_BITPLANES(decoded.w, bit_counts.w, value, bitplane_offsets); - - // Resolve deltas in packed 4x8 math. - uvec2 packed_decoded = pack_u16vec4_to_uvec2(u16vec4(decoded)) & 0xff00ffu; - uint linear_index = meshlet_get_linear_index(); - if (linear_index == 0) - packed_decoded += initial_value; - packed_decoded += pack_u16vec4_to_uvec2((predictor_a + predictor_b * uint16_t(linear_index)) >> 8us); - packed_decoded = subgroupInclusiveAdd(packed_decoded); + MESHLET_PAYLOAD_DECL_STREAM(unrolled_stream_index, 0); + MESHLET_PAYLOAD_PROCESS_CHUNK(stream_index, chunk_id, 0); barrier(); // Resolve WAR hazard from last iteration. - if (subgroup_lane == int(gl_SubgroupSize) - 1) - chunk_values0[chunk_id] = packed_decoded & 0xff00ffu; + if (gl_SubgroupInvocationID == MESHLET_PAYLOAD_MAX_ELEMENTS / MESHLET_PAYLOAD_NUM_CHUNKS - 1) + chunk_values0[chunk_id] = packed_decoded0 & 0xff00ffu; barrier(); - if (gl_SubgroupID == 0u && subgroup_lane < int(gl_WorkGroupSize.y)) - chunk_values0[subgroup_lane] = subgroupInclusiveAdd(chunk_values0[subgroup_lane]); + if (gl_SubgroupID == 0u && gl_SubgroupInvocationID < gl_WorkGroupSize.y) + chunk_values0[gl_SubgroupInvocationID] = subgroupInclusiveAdd(chunk_values0[gl_SubgroupInvocationID]); barrier(); if (chunk_id != 0) - packed_decoded += chunk_values0[chunk_id - 1]; + packed_decoded0 += chunk_values0[chunk_id - 1]; - return repack_uint(packed_decoded); + return repack_uint(packed_decoded0); } uvec2 meshlet_decode_stream_64_wg256(uint meshlet_index, uint stream_index) { // Dual-pump the computation. VGPR use is quite low either way, so this is fine. uint unrolled_stream_index = MESHLET_PAYLOAD_NUM_U32_STREAMS * meshlet_index + stream_index; - u8vec4 initial_value_; - - uint chunk_id = gl_LocalInvocationID.y; - int subgroup_lane = int(gl_SubgroupInvocationID); - - u16vec4 predictor_a0 = meshlet_streams.data[unrolled_stream_index].predictor_a; - u16vec4 predictor_b0 = meshlet_streams.data[unrolled_stream_index].predictor_b; - initial_value_ = meshlet_streams.data[unrolled_stream_index].initial_value; - uvec2 initial_value0 = pack_u16vec4_to_uvec2(u16vec4(initial_value_)); - uint bitplane_offsets0 = shared_chunk_offset[stream_index][chunk_id]; - ivec4 bit_counts0 = ivec4(shared_chunk_bit_counts[stream_index][chunk_id]); - uvec4 decoded0 = ivec4(0); - - u16vec4 predictor_a1 = meshlet_streams.data[unrolled_stream_index + 1].predictor_a; - u16vec4 predictor_b1 = meshlet_streams.data[unrolled_stream_index + 1].predictor_b; - initial_value_ = meshlet_streams.data[unrolled_stream_index + 1].initial_value; - uvec2 initial_value1 = pack_u16vec4_to_uvec2(u16vec4(initial_value_)); - uint bitplane_offsets1 = shared_chunk_offset[stream_index + 1][chunk_id]; - ivec4 bit_counts1 = ivec4(shared_chunk_bit_counts[stream_index + 1][chunk_id]); - uvec4 decoded1 = ivec4(0); - - // Overlap load with consumption. - // Helps RDNA2 quite a lot here! - uint value0 = payload.data[bitplane_offsets0]; - uint value1 = payload.data[bitplane_offsets1]; - MESHLET_FETCH_BITPLANES(decoded0.x, bit_counts0.x, value0, bitplane_offsets0); - MESHLET_FETCH_BITPLANES(decoded0.y, bit_counts0.y, value0, bitplane_offsets0); - MESHLET_FETCH_BITPLANES(decoded0.z, bit_counts0.z, value0, bitplane_offsets0); - MESHLET_FETCH_BITPLANES(decoded0.w, bit_counts0.w, value0, bitplane_offsets0); - MESHLET_FETCH_BITPLANES(decoded1.x, bit_counts1.x, value1, bitplane_offsets1); - MESHLET_FETCH_BITPLANES(decoded1.y, bit_counts1.y, value1, bitplane_offsets1); - MESHLET_FETCH_BITPLANES(decoded1.z, bit_counts1.z, value1, bitplane_offsets1); - MESHLET_FETCH_BITPLANES(decoded1.w, bit_counts1.w, value1, bitplane_offsets1); - - // Resolve deltas in packed 4x8 math. - uvec2 packed_decoded0 = pack_u16vec4_to_uvec2(u16vec4(decoded0)) & 0xff00ffu; - uvec2 packed_decoded1 = pack_u16vec4_to_uvec2(u16vec4(decoded1)) & 0xff00ffu; uint linear_index = meshlet_get_linear_index(); - if (linear_index == 0) - { - packed_decoded0 += initial_value0; - packed_decoded1 += initial_value1; - } + uint chunk_id = gl_LocalInvocationID.y; - packed_decoded0 += pack_u16vec4_to_uvec2((predictor_a0 + predictor_b0 * uint16_t(linear_index)) >> 8us); - packed_decoded0 = subgroupInclusiveAdd(packed_decoded0); - packed_decoded1 += pack_u16vec4_to_uvec2((predictor_a1 + predictor_b1 * uint16_t(linear_index)) >> 8us); - packed_decoded1 = subgroupInclusiveAdd(packed_decoded1); + MESHLET_PAYLOAD_DECL_STREAM(unrolled_stream_index, 0); + MESHLET_PAYLOAD_DECL_STREAM(unrolled_stream_index + 1, 1); + MESHLET_PAYLOAD_PROCESS_CHUNK(stream_index, chunk_id, 0); + MESHLET_PAYLOAD_PROCESS_CHUNK(stream_index + 1, chunk_id, 1); barrier(); // Resolve WAR hazard from last iteration. - if (subgroup_lane == int(gl_SubgroupSize) - 1) + if (gl_SubgroupInvocationID == gl_SubgroupSize - 1) { chunk_values0[chunk_id] = packed_decoded0 & 0xff00ffu; chunk_values1[chunk_id] = packed_decoded1 & 0xff00ffu; } barrier(); - if (gl_SubgroupID == 0u && subgroup_lane < int(gl_WorkGroupSize.y)) - chunk_values0[subgroup_lane] = subgroupInclusiveAdd(chunk_values0[subgroup_lane]); - else if (gl_SubgroupID == 1u && subgroup_lane < int(gl_WorkGroupSize.y)) - chunk_values1[subgroup_lane] = subgroupInclusiveAdd(chunk_values1[subgroup_lane]); + if (gl_SubgroupID == 0u && gl_SubgroupInvocationID < gl_WorkGroupSize.y) + chunk_values0[gl_SubgroupInvocationID] = subgroupInclusiveAdd(chunk_values0[gl_SubgroupInvocationID]); + else if (gl_SubgroupID == 1u && gl_SubgroupInvocationID < gl_WorkGroupSize.y) + chunk_values1[gl_SubgroupInvocationID] = subgroupInclusiveAdd(chunk_values1[gl_SubgroupInvocationID]); barrier(); if (chunk_id != 0) { @@ -243,4 +215,16 @@ uvec2 meshlet_decode_stream_64_wg256(uint meshlet_index, uint stream_index) return uvec2(repack_uint(packed_decoded0), repack_uint(packed_decoded1)); } +#define MESHLET_DECODE_STREAM_32(meshlet_index, stream_index, report_cb) { \ + uint value = meshlet_decode_stream_32_wg256(meshlet_index, stream_index); \ + report_cb(gl_LocalInvocationIndex, value); } + +#define MESHLET_DECODE_STREAM_64(meshlet_index, stream_index, report_cb) { \ + uvec2 value = meshlet_decode_stream_64_wg256(meshlet_index, stream_index); \ + report_cb(gl_LocalInvocationIndex, value); } + +#else + +#endif + #endif \ No newline at end of file diff --git a/renderer/formats/meshlet.cpp b/renderer/formats/meshlet.cpp index 71e22f2d..c5587e3e 100644 --- a/renderer/formats/meshlet.cpp +++ b/renderer/formats/meshlet.cpp @@ -127,7 +127,7 @@ bool decode_mesh(Vulkan::CommandBuffer &cmd, buf_info.size = output_offset_strides.size() * sizeof(uvec2); auto output_offset_strides_buffer = cmd.get_device().create_buffer(buf_info, output_offset_strides.data()); - cmd.set_program("builtin://shaders/decode/meshlet_decode.comp"); + cmd.set_program("builtin://shaders/decode/meshlet_decode.comp", {{"MESHLET_PAYLOAD_LARGE_WORKGROUP", 1}}); cmd.enable_subgroup_size_control(true); cmd.set_subgroup_size_log2(true, 5, 5); From f08d38f2e77d863b91b4ddb1219d360ddcb48c6a Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Tue, 1 Aug 2023 13:39:43 +0200 Subject: [PATCH 37/71] Decode with macro callback. Surprised this even worked! --- assets/shaders/decode/meshlet_decode.comp | 27 ++++++++++++++--------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/assets/shaders/decode/meshlet_decode.comp b/assets/shaders/decode/meshlet_decode.comp index 3758c86f..36d26e27 100644 --- a/assets/shaders/decode/meshlet_decode.comp +++ b/assets/shaders/decode/meshlet_decode.comp @@ -33,21 +33,26 @@ void main() MeshletMeta meta = meshlet_metas.data[meshlet_index]; uint linear_index = meshlet_get_linear_index(); +#define INDEX(linear_index, packed_indices) { \ + uvec3 indices = uvec4(unpack8(packed_indices)).xyz; \ + indices += meta.base_vertex_offset; \ + uint output_offset = output_offset_strides.data[meshlet_index * NUM_U32_STREAMS].x; \ + if (linear_index <= uint(meta.num_primitives_minus_1)) \ + output_indices.data[output_offset + linear_index] = indices; \ +} + +#define ATTR(linear_index, packed_decoded) { \ + uvec2 output_offset_stride0 = output_offset_strides.data[meshlet_index * NUM_U32_STREAMS + i]; \ + if (linear_index <= uint(meta.num_attributes_minus_1)) \ + output_payload.data[output_offset_stride0.x + linear_index * output_offset_stride0.y] = packed_decoded; \ +} + { - uint packed_indices = meshlet_decode_stream_32_wg256(meshlet_index, 0); - // Write index buffer. - uvec3 indices = uvec4(unpack8(packed_indices)).xyz; - indices += meta.base_vertex_offset; - uint output_offset = output_offset_strides.data[meshlet_index * NUM_U32_STREAMS].x; - if (linear_index <= uint(meta.num_primitives_minus_1)) - output_indices.data[output_offset + linear_index] = indices; + MESHLET_DECODE_STREAM_32(meshlet_index, 0, INDEX); } for (uint i = 1; i < NUM_U32_STREAMS; i++) { - uint packed_decoded = meshlet_decode_stream_32_wg256(meshlet_index, i); - uvec2 output_offset_stride0 = output_offset_strides.data[meshlet_index * NUM_U32_STREAMS + i]; - if (linear_index <= uint(meta.num_attributes_minus_1)) - output_payload.data[output_offset_stride0.x + linear_index * output_offset_stride0.y] = packed_decoded; + MESHLET_DECODE_STREAM_32(meshlet_index, i, ATTR); } } From a6d6fc4fa601471ecf6808c69b4621ff94920d57 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Tue, 1 Aug 2023 13:59:23 +0200 Subject: [PATCH 38/71] Get small WG path working. --- assets/shaders/decode/meshlet_decode.comp | 10 ++++++++-- assets/shaders/inc/meshlet_payload_decode.h | 21 +++++++++++++++++++-- renderer/formats/meshlet.cpp | 5 ++--- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/assets/shaders/decode/meshlet_decode.comp b/assets/shaders/decode/meshlet_decode.comp index 36d26e27..5772f3a5 100644 --- a/assets/shaders/decode/meshlet_decode.comp +++ b/assets/shaders/decode/meshlet_decode.comp @@ -2,7 +2,14 @@ #extension GL_EXT_scalar_block_layout : require #include "../inc/meshlet_payload_constants.h" -layout(local_size_x = 32, local_size_y_id = 1) in; + +#if MESHLET_PAYLOAD_LARGE_WORKGROUP +#define MESHLET_PAYLOAD_WG_Y MESHLET_PAYLOAD_NUM_CHUNKS +#else +#define MESHLET_PAYLOAD_WG_Y 1 +#endif +layout(local_size_x = 32, local_size_y = MESHLET_PAYLOAD_WG_Y) in; + layout(constant_id = 0) const uint NUM_U32_STREAMS = 1; #define MESHLET_PAYLOAD_NUM_U32_STREAMS NUM_U32_STREAMS #define MESHLET_PAYLOAD_DESCRIPTOR_SET 0 @@ -31,7 +38,6 @@ void main() uint meshlet_index = gl_WorkGroupID.x; meshlet_init_workgroup(meshlet_index); MeshletMeta meta = meshlet_metas.data[meshlet_index]; - uint linear_index = meshlet_get_linear_index(); #define INDEX(linear_index, packed_indices) { \ uvec3 indices = uvec4(unpack8(packed_indices)).xyz; \ diff --git a/assets/shaders/inc/meshlet_payload_decode.h b/assets/shaders/inc/meshlet_payload_decode.h index 3422b948..4b3ef773 100644 --- a/assets/shaders/inc/meshlet_payload_decode.h +++ b/assets/shaders/inc/meshlet_payload_decode.h @@ -3,6 +3,7 @@ #extension GL_EXT_shader_explicit_arithmetic_types_int16 : require #extension GL_KHR_shader_subgroup_arithmetic : require +#extension GL_KHR_shader_subgroup_ballot : require #extension GL_KHR_shader_subgroup_basic : require #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require #extension GL_EXT_scalar_block_layout : require @@ -142,10 +143,10 @@ uint meshlet_get_linear_index() u16vec4 predictor_a##iter = meshlet_streams.data[unrolled_stream_index].predictor_a; \ u16vec4 predictor_b##iter = meshlet_streams.data[unrolled_stream_index].predictor_b; \ u8vec4 initial_value_##iter = meshlet_streams.data[unrolled_stream_index].initial_value; \ - uvec2 initial_value##iter = pack_u16vec4_to_uvec2(u16vec4(initial_value_##iter)); \ - uvec4 decoded##iter = ivec4(0) + uvec2 initial_value##iter = pack_u16vec4_to_uvec2(u16vec4(initial_value_##iter)) #define MESHLET_PAYLOAD_PROCESS_CHUNK(stream_index, chunk_id, iter) \ + uvec4 decoded##iter = ivec4(0); \ uint bitplane_offsets##iter = shared_chunk_offset[stream_index][chunk_id]; \ ivec4 bit_counts##iter = ivec4(shared_chunk_bit_counts[stream_index][chunk_id]); \ uint value##iter = payload.data[bitplane_offsets##iter]; \ @@ -225,6 +226,22 @@ uvec2 meshlet_decode_stream_64_wg256(uint meshlet_index, uint stream_index) #else +// Have to iterate and report once per chunk. Avoids having to spend a lot of LDS memory. +#define MESHLET_DECODE_STREAM_32(meshlet_index, stream_index, report_cb) { \ + uint unrolled_stream_index = MESHLET_PAYLOAD_NUM_U32_STREAMS * meshlet_index + stream_index; \ + uint linear_index = meshlet_get_linear_index(); \ + uvec2 prev_value0 = uvec2(0); \ + MESHLET_PAYLOAD_DECL_STREAM(unrolled_stream_index, 0); \ + for (uint chunk_id = 0; chunk_id < MESHLET_PAYLOAD_NUM_CHUNKS; chunk_id++) \ + { \ + MESHLET_PAYLOAD_PROCESS_CHUNK(stream_index, chunk_id, 0); \ + packed_decoded0 += prev_value0; \ + prev_value0 = subgroupBroadcast(packed_decoded0, 31) & 0xff00ffu; \ + report_cb(linear_index, repack_uint(packed_decoded0)); \ + linear_index += gl_SubgroupSize; \ + } \ +} + #endif #endif \ No newline at end of file diff --git a/renderer/formats/meshlet.cpp b/renderer/formats/meshlet.cpp index c5587e3e..4ce57f78 100644 --- a/renderer/formats/meshlet.cpp +++ b/renderer/formats/meshlet.cpp @@ -127,7 +127,7 @@ bool decode_mesh(Vulkan::CommandBuffer &cmd, buf_info.size = output_offset_strides.size() * sizeof(uvec2); auto output_offset_strides_buffer = cmd.get_device().create_buffer(buf_info, output_offset_strides.data()); - cmd.set_program("builtin://shaders/decode/meshlet_decode.comp", {{"MESHLET_PAYLOAD_LARGE_WORKGROUP", 1}}); + cmd.set_program("builtin://shaders/decode/meshlet_decode.comp", {{"MESHLET_PAYLOAD_LARGE_WORKGROUP", 0}}); cmd.enable_subgroup_size_control(true); cmd.set_subgroup_size_log2(true, 5, 5); @@ -137,9 +137,8 @@ bool decode_mesh(Vulkan::CommandBuffer &cmd, cmd.set_storage_buffer(0, 3, ibo, ibo_offset, view.total_primitives * 3 * sizeof(uint32_t)); cmd.set_storage_buffer(0, 4, payload, payload_offset, view.format_header->payload_size_words * sizeof(uint32_t)); cmd.set_storage_buffer(0, 5, *output_offset_strides_buffer); - cmd.set_specialization_constant_mask(3); + cmd.set_specialization_constant_mask(1); cmd.set_specialization_constant(0, view.format_header->u32_stream_count); - cmd.set_specialization_constant(1, Meshlet::MaxElements / 32); // TODO: Split dispatches for big chungus meshes. // (Starts to become a problem around 8-16 million primitives per dispatch). From 91548a93659036fd074f6cdb3d9c65ad847f5cc5 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Tue, 1 Aug 2023 14:36:22 +0200 Subject: [PATCH 39/71] Get mesh shader compiling. --- assets/shaders/decode/meshlet_decode.comp | 2 +- .../shaders/inc/meshlet_payload_constants.h | 1 + assets/shaders/inc/meshlet_payload_decode.h | 23 ++++++ tests/assets/shaders/meshlet_debug.mesh | 70 +++++++++++-------- tests/meshlet_viewer.cpp | 15 +++- 5 files changed, 77 insertions(+), 34 deletions(-) diff --git a/assets/shaders/decode/meshlet_decode.comp b/assets/shaders/decode/meshlet_decode.comp index 5772f3a5..ba88938e 100644 --- a/assets/shaders/decode/meshlet_decode.comp +++ b/assets/shaders/decode/meshlet_decode.comp @@ -10,7 +10,7 @@ #endif layout(local_size_x = 32, local_size_y = MESHLET_PAYLOAD_WG_Y) in; -layout(constant_id = 0) const uint NUM_U32_STREAMS = 1; +layout(constant_id = 0) const uint NUM_U32_STREAMS = MESHLET_PAYLOAD_MAX_STREAMS; #define MESHLET_PAYLOAD_NUM_U32_STREAMS NUM_U32_STREAMS #define MESHLET_PAYLOAD_DESCRIPTOR_SET 0 #define MESHLET_PAYLOAD_META_BINDING 0 diff --git a/assets/shaders/inc/meshlet_payload_constants.h b/assets/shaders/inc/meshlet_payload_constants.h index 2cb1bbe0..2a91ff53 100644 --- a/assets/shaders/inc/meshlet_payload_constants.h +++ b/assets/shaders/inc/meshlet_payload_constants.h @@ -3,5 +3,6 @@ #define MESHLET_PAYLOAD_MAX_ELEMENTS 256 #define MESHLET_PAYLOAD_NUM_CHUNKS 8 +#define MESHLET_PAYLOAD_MAX_STREAMS 16 #endif \ No newline at end of file diff --git a/assets/shaders/inc/meshlet_payload_decode.h b/assets/shaders/inc/meshlet_payload_decode.h index 4b3ef773..61f363d6 100644 --- a/assets/shaders/inc/meshlet_payload_decode.h +++ b/assets/shaders/inc/meshlet_payload_decode.h @@ -216,6 +216,8 @@ uvec2 meshlet_decode_stream_64_wg256(uint meshlet_index, uint stream_index) return uvec2(repack_uint(packed_decoded0), repack_uint(packed_decoded1)); } +// For large workgroups, we imply AMD, where LocalInvocationIndex indexing is preferred. +// We assume that SubgroupInvocationID == LocalInvocationID.x here since it's the only reasonable it would work. #define MESHLET_DECODE_STREAM_32(meshlet_index, stream_index, report_cb) { \ uint value = meshlet_decode_stream_32_wg256(meshlet_index, stream_index); \ report_cb(gl_LocalInvocationIndex, value); } @@ -242,6 +244,27 @@ uvec2 meshlet_decode_stream_64_wg256(uint meshlet_index, uint stream_index) } \ } +// Have to iterate and report once per chunk. Avoids having to spend a lot of LDS memory. +#define MESHLET_DECODE_STREAM_64(meshlet_index, stream_index, report_cb) { \ + uint unrolled_stream_index = MESHLET_PAYLOAD_NUM_U32_STREAMS * meshlet_index + stream_index; \ + uint linear_index = meshlet_get_linear_index(); \ + uvec2 prev_value0 = uvec2(0); \ + uvec2 prev_value1 = uvec2(0); \ + MESHLET_PAYLOAD_DECL_STREAM(unrolled_stream_index, 0); \ + MESHLET_PAYLOAD_DECL_STREAM(unrolled_stream_index + 1, 1); \ + for (uint chunk_id = 0; chunk_id < MESHLET_PAYLOAD_NUM_CHUNKS; chunk_id++) \ + { \ + MESHLET_PAYLOAD_PROCESS_CHUNK(stream_index, chunk_id, 0); \ + MESHLET_PAYLOAD_PROCESS_CHUNK(stream_index + 1, chunk_id, 1); \ + packed_decoded0 += prev_value0; \ + packed_decoded1 += prev_value1; \ + prev_value0 = subgroupBroadcast(packed_decoded0, 31) & 0xff00ffu; \ + prev_value1 = subgroupBroadcast(packed_decoded1, 31) & 0xff00ffu; \ + report_cb(linear_index, uvec2(repack_uint(packed_decoded0), repack_uint(packed_decoded1))); \ + linear_index += gl_SubgroupSize; \ + } \ +} + #endif #endif \ No newline at end of file diff --git a/tests/assets/shaders/meshlet_debug.mesh b/tests/assets/shaders/meshlet_debug.mesh index 40513942..8170bd48 100644 --- a/tests/assets/shaders/meshlet_debug.mesh +++ b/tests/assets/shaders/meshlet_debug.mesh @@ -4,8 +4,15 @@ layout(max_primitives = 256, max_vertices = 255, triangles) out; #include "meshlet_payload_constants.h" -layout(local_size_x = 32, local_size_y = MESHLET_PAYLOAD_NUM_CHUNKS) in; -layout(constant_id = 0) const uint NUM_U32_STREAMS = 1; + +#if MESHLET_PAYLOAD_LARGE_WORKGROUP +#define MESHLET_PAYLOAD_WG_Y MESHLET_PAYLOAD_NUM_CHUNKS +#else +#define MESHLET_PAYLOAD_WG_Y 1 +#endif +layout(local_size_x = 32, local_size_y = MESHLET_PAYLOAD_WG_Y) in; + +layout(constant_id = 0) const uint NUM_U32_STREAMS = MESHLET_PAYLOAD_MAX_STREAMS; #define MESHLET_PAYLOAD_NUM_U32_STREAMS NUM_U32_STREAMS #define MESHLET_PAYLOAD_DESCRIPTOR_SET 0 #define MESHLET_PAYLOAD_META_BINDING 0 @@ -30,41 +37,42 @@ void main() meshlet_init_workgroup(meshlet_index); MeshletMeta meta = meshlet_metas.data[meshlet_index]; - meshlet_init_workgroup(meshlet_index); - uint linear_index = meshlet_get_linear_index(); - uint packed_indices = meshlet_decode_stream(meshlet_index, 0); - SetMeshOutputsEXT(meta.num_attributes_minus_1 + 1, meta.num_primitives_minus_1 + 1); - // Mildly questionable reliance on LocalInvocationIndex mapping well to SubgroupInvocationID here. - // There is basically no way this will not work in practice however ... - // We have full subgroups and workgroup size X == SubgroupSize. - // Using LocalInvocationIndex specifically is important for AMD perf. - - if (linear_index <= meta.num_primitives_minus_1) - { - gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec4(unpack8(packed_indices)).xyz; - vMeshletIndex[gl_LocalInvocationIndex] = meshlet_index; +#define INDEX(index, value) \ + if (index <= meta.num_primitives_minus_1) \ + { \ + gl_PrimitiveTriangleIndicesEXT[index] = uvec4(unpack8(value)).xyz; \ + vMeshletIndex[index] = meshlet_index; \ } + MESHLET_DECODE_STREAM_32(meshlet_index, 0, INDEX); - uint pos0 = meshlet_decode_stream(meshlet_index, 1); - uint pos1 = meshlet_decode_stream(meshlet_index, 2); - if (linear_index <= meta.num_attributes_minus_1) - { - vec3 pos = attribute_decode_snorm_exp_position(uvec2(pos0, pos1)); - gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = VP * vec4(pos, 1.0); +#define POSITION(index, value) \ + if (index <= meta.num_attributes_minus_1) \ + { \ + vec3 pos = attribute_decode_snorm_exp_position(value); \ + gl_MeshVerticesEXT[index].gl_Position = VP * vec4(pos, 1.0); \ } + MESHLET_DECODE_STREAM_64(meshlet_index, 1, POSITION); - uint n = meshlet_decode_stream(meshlet_index, 3); - if (linear_index <= meta.num_attributes_minus_1) - vNormal[gl_LocalInvocationIndex] = attribute_decode_oct8_normal_tangent(n).xyz; +#define NORMAL(index, value) \ + if (index <= meta.num_attributes_minus_1) \ + { \ + vNormal[index] = attribute_decode_oct8_normal_tangent(value).xyz; \ + } + MESHLET_DECODE_STREAM_32(meshlet_index, 3, NORMAL); - uint t = meshlet_decode_stream(meshlet_index, 4); - if (linear_index <= meta.num_attributes_minus_1) - vTangent[gl_LocalInvocationIndex] = attribute_decode_oct8_normal_tangent(t); +#define TANGENT(index, value) \ + if (index <= meta.num_attributes_minus_1) \ + { \ + vTangent[index] = attribute_decode_oct8_normal_tangent(value); \ + } + MESHLET_DECODE_STREAM_32(meshlet_index, 4, TANGENT); - uint uv0 = meshlet_decode_stream(meshlet_index, 5); - uint uv1 = meshlet_decode_stream(meshlet_index, 6); - if (linear_index <= meta.num_attributes_minus_1) - vUV[gl_LocalInvocationIndex] = attribute_decode_snorm_exp_uv(uvec2(uv0, uv1)); +#define UV(index, value) \ + if (index <= meta.num_attributes_minus_1) \ + { \ + vUV[index] = attribute_decode_snorm_exp_uv(value); \ + } + MESHLET_DECODE_STREAM_64(meshlet_index, 5, UV); } \ No newline at end of file diff --git a/tests/meshlet_viewer.cpp b/tests/meshlet_viewer.cpp index 3fc4b467..e46f544c 100644 --- a/tests/meshlet_viewer.cpp +++ b/tests/meshlet_viewer.cpp @@ -62,6 +62,7 @@ struct MeshletViewerApplication : Granite::Application, Granite::EventHandler Vulkan::BufferHandle meshlet_stream_buffer; AABB aabb; FPSCamera camera; + SceneFormats::Meshlet::FormatHeader header; void on_device_create(const DeviceCreatedEvent &e) { @@ -71,6 +72,8 @@ struct MeshletViewerApplication : Granite::Application, Granite::EventHandler if (!view.format_header) throw std::runtime_error("Failed to load meshlet."); + header = *view.format_header; + Vulkan::BufferCreateInfo info = {}; info.size = view.total_primitives * sizeof(uvec3); info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT; @@ -142,8 +145,14 @@ struct MeshletViewerApplication : Granite::Application, Granite::EventHandler cmd->begin_render_pass(device.get_swapchain_render_pass(SwapchainRenderPass::Depth)); camera.set_aspect(cmd->get_viewport().width / cmd->get_viewport().height); + bool large_workgroup = + device.get_device_features().mesh_shader_properties.maxPreferredMeshWorkGroupInvocations > 32 && + device.get_device_features().mesh_shader_properties.maxMeshWorkGroupInvocations >= 256; + cmd->set_program("", "assets://shaders/meshlet_debug.mesh", - "assets://shaders/meshlet_debug.mesh.frag"); + "assets://shaders/meshlet_debug.mesh.frag", + {{"MESHLET_PAYLOAD_LARGE_WORKGROUP", int(large_workgroup)}}); + cmd->set_opaque_state(); auto vp = camera.get_projection() * camera.get_view(); @@ -155,7 +164,9 @@ struct MeshletViewerApplication : Granite::Application, Granite::EventHandler cmd->enable_subgroup_size_control(true, VK_SHADER_STAGE_MESH_BIT_EXT); cmd->set_subgroup_size_log2(true, 5, 5, VK_SHADER_STAGE_MESH_BIT_EXT); - cmd->draw_mesh_tasks(meshlet_meta_buffer->get_create_info().size / sizeof(SceneFormats::Meshlet::Header), 1, 1); + cmd->set_specialization_constant_mask(1); + cmd->set_specialization_constant(0, header.u32_stream_count); + cmd->draw_mesh_tasks(header.meshlet_count, 1, 1); cmd->end_render_pass(); device.submit(cmd); From fd953c86ceedede838a0658e747e8b09d92be48a Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Tue, 1 Aug 2023 14:55:20 +0200 Subject: [PATCH 40/71] Update Fossilize. --- third_party/fossilize | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/fossilize b/third_party/fossilize index 3ae3d7ce..134f04d2 160000 --- a/third_party/fossilize +++ b/third_party/fossilize @@ -1 +1 @@ -Subproject commit 3ae3d7ce16fdf8afd240cba8811bdf5a1071e001 +Subproject commit 134f04d2c4ce1fc80cb3db508afcc1abe0b86c01 From fe74b27927e1300e487175aa690bc2d2dfd5dd89 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 2 Aug 2023 10:07:11 +0200 Subject: [PATCH 41/71] Rewrite WG32 mesh shader implementation. Works around compiler segfault on NV too. --- assets/shaders/decode/meshlet_decode.comp | 2 + assets/shaders/inc/meshlet_payload_decode.h | 63 +++++++++++++------- renderer/formats/meshlet.cpp | 2 +- tests/assets/shaders/meshlet_debug.mesh | 1 + tests/assets/shaders/meshlet_debug.mesh.frag | 2 +- 5 files changed, 48 insertions(+), 22 deletions(-) diff --git a/assets/shaders/decode/meshlet_decode.comp b/assets/shaders/decode/meshlet_decode.comp index ba88938e..f2d0f0a8 100644 --- a/assets/shaders/decode/meshlet_decode.comp +++ b/assets/shaders/decode/meshlet_decode.comp @@ -3,6 +3,8 @@ #extension GL_EXT_scalar_block_layout : require #include "../inc/meshlet_payload_constants.h" +#define MESHLET_PAYLOAD_LARGE_WORKGROUP 1 + #if MESHLET_PAYLOAD_LARGE_WORKGROUP #define MESHLET_PAYLOAD_WG_Y MESHLET_PAYLOAD_NUM_CHUNKS #else diff --git a/assets/shaders/inc/meshlet_payload_decode.h b/assets/shaders/inc/meshlet_payload_decode.h index 61f363d6..5c8d45e0 100644 --- a/assets/shaders/inc/meshlet_payload_decode.h +++ b/assets/shaders/inc/meshlet_payload_decode.h @@ -4,9 +4,11 @@ #extension GL_EXT_shader_explicit_arithmetic_types_int16 : require #extension GL_KHR_shader_subgroup_arithmetic : require #extension GL_KHR_shader_subgroup_ballot : require +#extension GL_KHR_shader_subgroup_shuffle : require #extension GL_KHR_shader_subgroup_basic : require #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require #extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_shader_subgroup_extended_types_int8 : require #include "meshlet_payload_constants.h" @@ -66,9 +68,9 @@ layout(set = MESHLET_PAYLOAD_DESCRIPTOR_SET, binding = MESHLET_PAYLOAD_PAYLOAD_B uint data[]; } payload; +#if MESHLET_PAYLOAD_LARGE_WORKGROUP shared u8vec4 shared_chunk_bit_counts[MESHLET_PAYLOAD_NUM_U32_STREAMS][MESHLET_PAYLOAD_NUM_CHUNKS]; shared uint shared_chunk_offset[MESHLET_PAYLOAD_NUM_U32_STREAMS][MESHLET_PAYLOAD_NUM_CHUNKS]; -#if MESHLET_PAYLOAD_LARGE_WORKGROUP shared uvec2 chunk_values0[MESHLET_PAYLOAD_NUM_CHUNKS]; shared uvec2 chunk_values1[MESHLET_PAYLOAD_NUM_CHUNKS]; #endif @@ -86,35 +88,38 @@ uint repack_uint(uvec2 v) return pack32(u8vec4(v16)); } -void meshlet_barrier() +void meshlet_compute_stream_offsets(uint meshlet_index, uint stream_index, + out uint out_stream_chunk_offset, out u8vec4 out_bit_counts) { -#if MESHLET_PAYLOAD_LARGE_WORKGROUP - barrier(); -#else - subgroupBarrier(); -#endif + if (gl_SubgroupInvocationID < MESHLET_PAYLOAD_NUM_CHUNKS) + { + uint bitplane_value = uint(meshlet_streams.data[stream_index + MESHLET_PAYLOAD_NUM_U32_STREAMS * meshlet_index].bitplane_meta[gl_SubgroupInvocationID]); + u16vec4 bit_counts = (u16vec4(bitplane_value) >> u16vec4(0, 4, 8, 12)) & 0xfus; + u16vec2 bit_counts2 = bit_counts.xy + bit_counts.zw; + uint total_bits = bit_counts2.x + bit_counts2.y; + uint offset = meshlet_streams.data[stream_index + MESHLET_PAYLOAD_NUM_U32_STREAMS * meshlet_index].offset_from_base; + out_stream_chunk_offset = subgroupExclusiveAdd(total_bits) + offset; + out_bit_counts = u8vec4(bit_counts); + } } void meshlet_init_workgroup(uint meshlet_index) { - int subgroup_lane = int(gl_SubgroupInvocationID); +#if MESHLET_PAYLOAD_LARGE_WORKGROUP for (uint stream_index = gl_SubgroupID; stream_index < MESHLET_PAYLOAD_NUM_U32_STREAMS; stream_index += gl_NumSubgroups) { - // Start by decoding the offset for bitplanes for all u32 streams. - if (subgroup_lane < MESHLET_PAYLOAD_NUM_CHUNKS) + if (gl_SubgroupInvocationID < MESHLET_PAYLOAD_NUM_CHUNKS) { - uint bitplane_value = uint(meshlet_streams.data[stream_index + MESHLET_PAYLOAD_NUM_U32_STREAMS * meshlet_index].bitplane_meta[subgroup_lane]); - u16vec4 bit_counts = (u16vec4(bitplane_value) >> u16vec4(0, 4, 8, 12)) & 0xfus; - u16vec2 bit_counts2 = bit_counts.xy + bit_counts.zw; - uint total_bits = bit_counts2.x + bit_counts2.y; - uint offset = meshlet_streams.data[stream_index + MESHLET_PAYLOAD_NUM_U32_STREAMS * meshlet_index].offset_from_base; - shared_chunk_offset[stream_index][subgroup_lane] = subgroupExclusiveAdd(total_bits) + offset; - shared_chunk_bit_counts[stream_index][subgroup_lane] = u8vec4(bit_counts); + // Start by decoding the offset for bitplanes for all u32 streams. + meshlet_compute_stream_offsets(meshlet_index, stream_index, + shared_chunk_offset[stream_index][gl_SubgroupInvocationID], + shared_chunk_bit_counts[stream_index][gl_SubgroupInvocationID]); } } - meshlet_barrier(); + barrier(); +#endif } uint meshlet_get_linear_index() @@ -145,10 +150,19 @@ uint meshlet_get_linear_index() u8vec4 initial_value_##iter = meshlet_streams.data[unrolled_stream_index].initial_value; \ uvec2 initial_value##iter = pack_u16vec4_to_uvec2(u16vec4(initial_value_##iter)) +#if MESHLET_PAYLOAD_LARGE_WORKGROUP +#define MESHLET_PAYLOAD_DECL_CHUNK_OFFSETS(stream_index, chunk_id, iter) \ + uint bitplane_offsets##iter = shared_chunk_offset[stream_index][chunk_id]; \ + ivec4 bit_counts##iter = ivec4(shared_chunk_bit_counts[stream_index][chunk_id]) +#else +#define MESHLET_PAYLOAD_DECL_CHUNK_OFFSETS(stream_index, chunk_id, iter) \ + uint bitplane_offsets##iter = subgroupShuffle(shared_chunk_offset##iter, chunk_id); \ + ivec4 bit_counts##iter = ivec4(subgroupShuffle(shared_chunk_bit_counts##iter, chunk_id)) +#endif + #define MESHLET_PAYLOAD_PROCESS_CHUNK(stream_index, chunk_id, iter) \ uvec4 decoded##iter = ivec4(0); \ - uint bitplane_offsets##iter = shared_chunk_offset[stream_index][chunk_id]; \ - ivec4 bit_counts##iter = ivec4(shared_chunk_bit_counts[stream_index][chunk_id]); \ + MESHLET_PAYLOAD_DECL_CHUNK_OFFSETS(stream_index, chunk_id, iter); \ uint value##iter = payload.data[bitplane_offsets##iter]; \ MESHLET_FETCH_BITPLANES(decoded##iter.x, bit_counts##iter.x, value##iter, bitplane_offsets##iter); \ MESHLET_FETCH_BITPLANES(decoded##iter.y, bit_counts##iter.y, value##iter, bitplane_offsets##iter); \ @@ -233,6 +247,9 @@ uvec2 meshlet_decode_stream_64_wg256(uint meshlet_index, uint stream_index) uint unrolled_stream_index = MESHLET_PAYLOAD_NUM_U32_STREAMS * meshlet_index + stream_index; \ uint linear_index = meshlet_get_linear_index(); \ uvec2 prev_value0 = uvec2(0); \ + uint shared_chunk_offset0; \ + u8vec4 shared_chunk_bit_counts0; \ + meshlet_compute_stream_offsets(meshlet_index, stream_index, shared_chunk_offset0, shared_chunk_bit_counts0); \ MESHLET_PAYLOAD_DECL_STREAM(unrolled_stream_index, 0); \ for (uint chunk_id = 0; chunk_id < MESHLET_PAYLOAD_NUM_CHUNKS; chunk_id++) \ { \ @@ -250,6 +267,12 @@ uvec2 meshlet_decode_stream_64_wg256(uint meshlet_index, uint stream_index) uint linear_index = meshlet_get_linear_index(); \ uvec2 prev_value0 = uvec2(0); \ uvec2 prev_value1 = uvec2(0); \ + uint shared_chunk_offset0; \ + u8vec4 shared_chunk_bit_counts0; \ + meshlet_compute_stream_offsets(meshlet_index, stream_index, shared_chunk_offset0, shared_chunk_bit_counts0); \ + uint shared_chunk_offset1; \ + u8vec4 shared_chunk_bit_counts1; \ + meshlet_compute_stream_offsets(meshlet_index, stream_index + 1, shared_chunk_offset1, shared_chunk_bit_counts1); \ MESHLET_PAYLOAD_DECL_STREAM(unrolled_stream_index, 0); \ MESHLET_PAYLOAD_DECL_STREAM(unrolled_stream_index + 1, 1); \ for (uint chunk_id = 0; chunk_id < MESHLET_PAYLOAD_NUM_CHUNKS; chunk_id++) \ diff --git a/renderer/formats/meshlet.cpp b/renderer/formats/meshlet.cpp index 4ce57f78..a91cbef7 100644 --- a/renderer/formats/meshlet.cpp +++ b/renderer/formats/meshlet.cpp @@ -127,7 +127,7 @@ bool decode_mesh(Vulkan::CommandBuffer &cmd, buf_info.size = output_offset_strides.size() * sizeof(uvec2); auto output_offset_strides_buffer = cmd.get_device().create_buffer(buf_info, output_offset_strides.data()); - cmd.set_program("builtin://shaders/decode/meshlet_decode.comp", {{"MESHLET_PAYLOAD_LARGE_WORKGROUP", 0}}); + cmd.set_program("builtin://shaders/decode/meshlet_decode.comp"); cmd.enable_subgroup_size_control(true); cmd.set_subgroup_size_log2(true, 5, 5); diff --git a/tests/assets/shaders/meshlet_debug.mesh b/tests/assets/shaders/meshlet_debug.mesh index 8170bd48..a61979d8 100644 --- a/tests/assets/shaders/meshlet_debug.mesh +++ b/tests/assets/shaders/meshlet_debug.mesh @@ -14,6 +14,7 @@ layout(local_size_x = 32, local_size_y = MESHLET_PAYLOAD_WG_Y) in; layout(constant_id = 0) const uint NUM_U32_STREAMS = MESHLET_PAYLOAD_MAX_STREAMS; #define MESHLET_PAYLOAD_NUM_U32_STREAMS NUM_U32_STREAMS + #define MESHLET_PAYLOAD_DESCRIPTOR_SET 0 #define MESHLET_PAYLOAD_META_BINDING 0 #define MESHLET_PAYLOAD_STREAM_BINDING 1 diff --git a/tests/assets/shaders/meshlet_debug.mesh.frag b/tests/assets/shaders/meshlet_debug.mesh.frag index 0a29a26f..594eef7f 100644 --- a/tests/assets/shaders/meshlet_debug.mesh.frag +++ b/tests/assets/shaders/meshlet_debug.mesh.frag @@ -15,7 +15,7 @@ vec3 decode_mesh_color() uint r = bitfieldExtract(index, 0, 2); uint g = bitfieldExtract(index, 2, 2); uint b = bitfieldExtract(index, 4, 2); - return vec3(r, g, b) / 3.0; + return (vec3(r, g, b) + 1.0 / 3.0) / 4.0; } void main() From c7e076b6808c70139ad91100198b5a96cda65f37 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 2 Aug 2023 11:46:24 +0200 Subject: [PATCH 42/71] Optimize mesh before exporting it. --- renderer/formats/scene_formats.cpp | 233 ++++++++++++------- renderer/formats/scene_formats.hpp | 9 +- scene-export/gltf_export.cpp | 12 +- scene-export/meshlet_export.cpp | 78 +------ tests/assets/shaders/meshlet_debug.mesh.frag | 3 +- tests/meshopt_sandbox.cpp | 27 ++- 6 files changed, 194 insertions(+), 168 deletions(-) diff --git a/renderer/formats/scene_formats.cpp b/renderer/formats/scene_formats.cpp index 4b8ba1ea..6802e5c6 100644 --- a/renderer/formats/scene_formats.cpp +++ b/renderer/formats/scene_formats.cpp @@ -44,15 +44,20 @@ static vec3 compute_normal(const vec3 &a, const vec3 &b, const vec3 &c) struct IndexRemapping { - std::vector index_remap; - std::vector unique_attrib_to_source_index; + std::vector index_remap; + std::vector unique_attrib_to_source_index; }; // Find duplicate indices. -static IndexRemapping build_index_remap_list(const Mesh &mesh) +static IndexRemapping build_attribute_remap_indices(const Mesh &mesh) { auto attribute_count = unsigned(mesh.positions.size() / mesh.position_stride); - std::unordered_map attribute_remapper; + struct RemappedAttribute + { + unsigned unique_index; + unsigned source_index; + }; + std::unordered_map attribute_remapper; IndexRemapping remapped; remapped.index_remap.reserve(attribute_count); @@ -72,7 +77,7 @@ static IndexRemapping build_index_remap_list(const Mesh &mesh) { bool match = true; if (memcmp(mesh.positions.data() + i * mesh.position_stride, - mesh.positions.data() + itr->second * mesh.position_stride, + mesh.positions.data() + itr->second.source_index * mesh.position_stride, mesh.position_stride) != 0) { match = false; @@ -80,14 +85,14 @@ static IndexRemapping build_index_remap_list(const Mesh &mesh) if (match && !mesh.attributes.empty() && memcmp(mesh.attributes.data() + i * mesh.attribute_stride, - mesh.attributes.data() + itr->second * mesh.attribute_stride, + mesh.attributes.data() + itr->second.source_index * mesh.attribute_stride, mesh.attribute_stride) != 0) { match = false; } if (match) - remapped.index_remap.push_back(itr->second); + remapped.index_remap.push_back(itr->second.unique_index); else LOGW("Hash collision in vertex dedup.\n"); @@ -95,7 +100,7 @@ static IndexRemapping build_index_remap_list(const Mesh &mesh) } else { - attribute_remapper[hash] = unique_count; + attribute_remapper[hash] = { unique_count, i }; is_unique = true; } @@ -110,33 +115,15 @@ static IndexRemapping build_index_remap_list(const Mesh &mesh) return remapped; } -static std::vector build_canonical_index_buffer(const Mesh &mesh, const std::vector &index_remap) +static std::vector build_remapped_index_buffer(const Mesh &mesh, const std::vector &index_remap) { - assert(mesh.topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST); + assert(mesh.topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST && mesh.index_type == VK_INDEX_TYPE_UINT32); std::vector index_buffer; index_buffer.reserve(mesh.count); - if (mesh.indices.empty()) - { - for (unsigned i = 0; i < mesh.count; i++) - index_buffer.push_back(index_remap[i]); - } - else if (mesh.index_type == VK_INDEX_TYPE_UINT32) - { - for (unsigned i = 0; i < mesh.count; i++) - index_buffer.push_back(index_remap[reinterpret_cast(mesh.indices.data())[i]]); - } - else if (mesh.index_type == VK_INDEX_TYPE_UINT16) - { - for (unsigned i = 0; i < mesh.count; i++) - index_buffer.push_back(index_remap[reinterpret_cast(mesh.indices.data())[i]]); - } - else if (mesh.index_type == VK_INDEX_TYPE_UINT8_EXT) - { - for (unsigned i = 0; i < mesh.count; i++) - index_buffer.push_back(index_remap[reinterpret_cast(mesh.indices.data())[i]]); - } - + const auto *indices = reinterpret_cast(mesh.indices.data()); + for (unsigned i = 0; i < mesh.count; i++) + index_buffer.push_back(index_remap[indices[i]]); return index_buffer; } @@ -223,7 +210,7 @@ static bool mesh_unroll_vertices(Mesh &mesh) if (mesh.index_type == VK_INDEX_TYPE_UINT32) { - const uint32_t *ibo = reinterpret_cast(mesh.indices.data()); + const auto *ibo = reinterpret_cast(mesh.indices.data()); for (unsigned i = 0; i < mesh.count; i++) { uint32_t index = ibo[i]; @@ -237,7 +224,21 @@ static bool mesh_unroll_vertices(Mesh &mesh) } else if (mesh.index_type == VK_INDEX_TYPE_UINT16) { - const uint16_t *ibo = reinterpret_cast(mesh.indices.data()); + const auto *ibo = reinterpret_cast(mesh.indices.data()); + for (unsigned i = 0; i < mesh.count; i++) + { + uint16_t index = ibo[i]; + memcpy(positions.data() + i * mesh.position_stride, + mesh.positions.data() + index * mesh.position_stride, + mesh.position_stride); + memcpy(attributes.data() + i * mesh.attribute_stride, + mesh.attributes.data() + index * mesh.attribute_stride, + mesh.attribute_stride); + } + } + else if (mesh.index_type == VK_INDEX_TYPE_UINT8_EXT) + { + const auto *ibo = mesh.indices.data(); for (unsigned i = 0; i < mesh.count; i++) { uint16_t index = ibo[i]; @@ -256,54 +257,122 @@ static bool mesh_unroll_vertices(Mesh &mesh) return true; } +bool mesh_canonicalize_indices(SceneFormats::Mesh &mesh) +{ + if (mesh.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST && + mesh.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP) + { + LOGE("Topology must be trilist or tristrip.\n"); + return false; + } + + std::vector unrolled_indices; + unrolled_indices.reserve(mesh.count); + + if (mesh.indices.empty()) + { + for (unsigned i = 0; i < mesh.count; i++) + unrolled_indices.push_back(i); + mesh.index_type = VK_INDEX_TYPE_UINT32; + } + else if (mesh.index_type == VK_INDEX_TYPE_UINT32) + { + auto *indices = reinterpret_cast(mesh.indices.data()); + for (unsigned i = 0; i < mesh.count; i++) + unrolled_indices.push_back(indices[i]); + } + else if (mesh.index_type == VK_INDEX_TYPE_UINT16) + { + auto *indices = reinterpret_cast(mesh.indices.data()); + for (unsigned i = 0; i < mesh.count; i++) + unrolled_indices.push_back(mesh.primitive_restart && indices[i] == UINT16_MAX ? UINT32_MAX : indices[i]); + } + else if (mesh.index_type == VK_INDEX_TYPE_UINT8_EXT) + { + auto *indices = reinterpret_cast(mesh.indices.data()); + for (unsigned i = 0; i < mesh.count; i++) + unrolled_indices.push_back(mesh.primitive_restart && indices[i] == UINT8_MAX ? UINT32_MAX : indices[i]); + } + + if (mesh.topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP) + { + std::vector unstripped_indices; + unstripped_indices.reserve(mesh.count * 3); + unsigned primitive_count_since_restart = 0; + + for (unsigned i = 2; i < mesh.count; i++) + { + bool emit_primitive = true; + if (mesh.primitive_restart && + unrolled_indices[i - 2] == UINT32_MAX && + unrolled_indices[i - 1] == UINT32_MAX && + unrolled_indices[i - 0] == UINT32_MAX) + { + emit_primitive = false; + primitive_count_since_restart = 0; + } + + if (emit_primitive) + { + unstripped_indices.push_back(unrolled_indices[i - 2]); + unstripped_indices.push_back(unrolled_indices[i - (1 ^ (primitive_count_since_restart & 1))]); + unstripped_indices.push_back(unrolled_indices[i - (primitive_count_since_restart & 1)]); + primitive_count_since_restart++; + } + } + + unrolled_indices = std::move(unstripped_indices); + mesh.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + } + + mesh.index_type = VK_INDEX_TYPE_UINT32; + mesh.count = uint32_t(unrolled_indices.size()); + mesh.indices.resize(unrolled_indices.size() * sizeof(uint32_t)); + memcpy(mesh.indices.data(), unrolled_indices.data(), mesh.indices.size()); + return true; +} + void mesh_deduplicate_vertices(Mesh &mesh) { - auto index_remap = build_index_remap_list(mesh); - auto index_buffer = build_canonical_index_buffer(mesh, index_remap.index_remap); + mesh_canonicalize_indices(mesh); + auto index_remap = build_attribute_remap_indices(mesh); + auto index_buffer = build_remapped_index_buffer(mesh, index_remap.index_remap); rebuild_new_attributes_remap_src(mesh.positions, mesh.position_stride, mesh.attributes, mesh.attribute_stride, mesh.positions, mesh.attributes, index_remap.unique_attrib_to_source_index); - mesh.index_type = VK_INDEX_TYPE_UINT32; mesh.indices.resize(index_buffer.size() * sizeof(uint32_t)); memcpy(mesh.indices.data(), index_buffer.data(), index_buffer.size() * sizeof(uint32_t)); mesh.count = unsigned(index_buffer.size()); } -Mesh mesh_optimize_index_buffer(const Mesh &mesh, bool stripify) +bool mesh_optimize_index_buffer(Mesh &mesh, const IndexBufferOptimizeOptions &options) { - if (mesh.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST) - return mesh; - - Mesh optimized; - optimized.position_stride = mesh.position_stride; - optimized.attribute_stride = mesh.attribute_stride; + if (!mesh_canonicalize_indices(mesh) || mesh.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST) + return false; // Remove redundant indices and rewrite index and attribute buffers. - auto index_remap = build_index_remap_list(mesh); - auto index_buffer = build_canonical_index_buffer(mesh, index_remap.index_remap); - rebuild_new_attributes_remap_src(optimized.positions, optimized.position_stride, - optimized.attributes, optimized.attribute_stride, + auto index_remap = build_attribute_remap_indices(mesh); + auto index_buffer = build_remapped_index_buffer(mesh, index_remap.index_remap); + rebuild_new_attributes_remap_src(mesh.positions, mesh.position_stride, + mesh.attributes, mesh.attribute_stride, mesh.positions, mesh.attributes, index_remap.unique_attrib_to_source_index); - size_t vertex_count = optimized.positions.size() / optimized.position_stride; + size_t vertex_count = mesh.positions.size() / mesh.position_stride; // Optimize for vertex cache. meshopt_optimizeVertexCache(index_buffer.data(), index_buffer.data(), index_buffer.size(), vertex_count); // Remap vertex fetch to get contiguous indices as much as possible. - std::vector remap_table(optimized.positions.size() / optimized.position_stride); + std::vector remap_table(mesh.positions.size() / mesh.position_stride); meshopt_optimizeVertexFetchRemap(remap_table.data(), index_buffer.data(), index_buffer.size(), vertex_count); index_buffer = remap_indices(index_buffer, remap_table); - rebuild_new_attributes_remap_dst(optimized.positions, optimized.position_stride, - optimized.attributes, optimized.attribute_stride, - optimized.positions, optimized.attributes, remap_table); - - optimized.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; - optimized.primitive_restart = false; + rebuild_new_attributes_remap_dst(mesh.positions, mesh.position_stride, + mesh.attributes, mesh.attribute_stride, + mesh.positions, mesh.attributes, remap_table); - if (stripify) + if (options.stripify) { // Try to stripify the mesh. If we end up with fewer indices, use that. std::vector stripped_index_buffer((index_buffer.size() / 3) * 4); @@ -314,45 +383,41 @@ Mesh mesh_optimize_index_buffer(const Mesh &mesh, bool stripify) stripped_index_buffer.resize(stripped_index_count); if (stripped_index_count < index_buffer.size()) { - optimized.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; + mesh.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; index_buffer = std::move(stripped_index_buffer); - optimized.primitive_restart = true; + mesh.primitive_restart = true; } } - uint32_t max_index = 0; - for (auto &i : index_buffer) - if (i != ~0u) - max_index = muglm::max(max_index, i); - - if (max_index <= 0xffff) // 16-bit indices are enough. + bool emit_u32 = true; + if (options.narrow_index_buffer) { - optimized.index_type = VK_INDEX_TYPE_UINT16; - optimized.indices.resize(index_buffer.size() * sizeof(uint16_t)); - size_t count = index_buffer.size(); - for (size_t i = 0; i < count; i++) + uint32_t max_index = 0; + for (auto &i: index_buffer) + if (i != ~0u) + max_index = muglm::max(max_index, i); + + if (max_index <= 0xffff) // 16-bit indices are enough. { - reinterpret_cast(optimized.indices.data())[i] = - index_buffer[i] == ~0u ? uint16_t(0xffffu) : uint16_t(index_buffer[i]); + mesh.index_type = VK_INDEX_TYPE_UINT16; + mesh.indices.resize(index_buffer.size() * sizeof(uint16_t)); + size_t count = index_buffer.size(); + emit_u32 = false; + + auto *out_indices = reinterpret_cast(mesh.indices.data()); + for (size_t i = 0; i < count; i++) + out_indices[i] = index_buffer[i] == ~0u ? uint16_t(0xffffu) : uint16_t(index_buffer[i]); } } - else + + if (emit_u32) { - optimized.index_type = VK_INDEX_TYPE_UINT32; - optimized.indices.resize(index_buffer.size() * sizeof(uint32_t)); - size_t count = index_buffer.size(); - for (size_t i = 0; i < count; i++) - reinterpret_cast(optimized.indices.data())[i] = index_buffer[i]; + mesh.indices.resize(index_buffer.size() * sizeof(uint32_t)); + memcpy(mesh.indices.data(), index_buffer.data(), index_buffer.size() * sizeof(uint32_t)); } - optimized.count = unsigned(index_buffer.size()); - - memcpy(optimized.attribute_layout, mesh.attribute_layout, sizeof(mesh.attribute_layout)); - optimized.material_index = mesh.material_index; - optimized.has_material = mesh.has_material; - optimized.static_aabb = mesh.static_aabb; - - return optimized; + mesh.count = unsigned(index_buffer.size()); + return true; } bool mesh_recompute_tangents(Mesh &mesh) diff --git a/renderer/formats/scene_formats.hpp b/renderer/formats/scene_formats.hpp index 18661805..ab273580 100644 --- a/renderer/formats/scene_formats.hpp +++ b/renderer/formats/scene_formats.hpp @@ -256,7 +256,14 @@ bool mesh_flip_tangents_w(Mesh &mesh); bool extract_collision_mesh(CollisionMesh &collision_mesh, const Mesh &mesh); void mesh_deduplicate_vertices(Mesh &mesh); -Mesh mesh_optimize_index_buffer(const Mesh &mesh, bool stripify); +bool mesh_canonicalize_indices(Mesh &mesh); + +struct IndexBufferOptimizeOptions +{ + bool narrow_index_buffer; + bool stripify; +}; +bool mesh_optimize_index_buffer(Mesh &mesh, const IndexBufferOptimizeOptions &options); std::unordered_set build_used_nodes_in_scene(const SceneNodes &scene, const std::vector &nodes); } } diff --git a/scene-export/gltf_export.cpp b/scene-export/gltf_export.cpp index a2c0084a..9aa9e1cb 100644 --- a/scene-export/gltf_export.cpp +++ b/scene-export/gltf_export.cpp @@ -892,7 +892,17 @@ void RemapState::emit_mesh(unsigned remapped_index) { Mesh new_mesh; if (options->optimize_meshes) - new_mesh = mesh_optimize_index_buffer(*mesh.info[remapped_index], options->stripify_meshes); + { + new_mesh = *mesh.info[remapped_index]; + IndexBufferOptimizeOptions opts = {}; + opts.narrow_index_buffer = true; + opts.stripify = options->stripify_meshes; + if (!mesh_optimize_index_buffer(new_mesh, opts)) + { + LOGE("Failed to optimize index buffer.\n"); + return; + } + } auto &output_mesh = options->optimize_meshes ? new_mesh : *mesh.info[remapped_index]; mesh_cache.resize(std::max(mesh_cache.size(), remapped_index + 1)); diff --git a/scene-export/meshlet_export.cpp b/scene-export/meshlet_export.cpp index 5911ae10..78d8b630 100644 --- a/scene-export/meshlet_export.cpp +++ b/scene-export/meshlet_export.cpp @@ -65,80 +65,6 @@ struct PrimitiveAnalysisResult uint32_t num_vertices; }; -static bool mesh_canonicalize_indices(SceneFormats::Mesh &mesh) -{ - if (mesh.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST && - mesh.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP) - { - LOGE("Topology must be trilist or tristrip.\n"); - return false; - } - - std::vector unrolled_indices; - unrolled_indices.reserve(mesh.count); - - if (mesh.indices.empty()) - { - for (unsigned i = 0; i < mesh.count; i++) - unrolled_indices.push_back(i); - mesh.index_type = VK_INDEX_TYPE_UINT32; - } - else if (mesh.index_type == VK_INDEX_TYPE_UINT32) - { - auto *indices = reinterpret_cast(mesh.indices.data()); - for (unsigned i = 0; i < mesh.count; i++) - unrolled_indices.push_back(indices[i]); - } - else if (mesh.index_type == VK_INDEX_TYPE_UINT16) - { - auto *indices = reinterpret_cast(mesh.indices.data()); - for (unsigned i = 0; i < mesh.count; i++) - unrolled_indices.push_back(mesh.primitive_restart && indices[i] == UINT16_MAX ? UINT32_MAX : indices[i]); - } - else if (mesh.index_type == VK_INDEX_TYPE_UINT8_EXT) - { - auto *indices = reinterpret_cast(mesh.indices.data()); - for (unsigned i = 0; i < mesh.count; i++) - unrolled_indices.push_back(mesh.primitive_restart && indices[i] == UINT8_MAX ? UINT32_MAX : indices[i]); - } - - if (mesh.topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP) - { - std::vector unstripped_indices; - unstripped_indices.reserve(mesh.count * 3); - unsigned primitive_count_since_restart = 0; - - for (unsigned i = 2; i < mesh.count; i++) - { - bool emit_primitive = true; - if (mesh.primitive_restart && - unrolled_indices[i - 2] == UINT32_MAX && - unrolled_indices[i - 1] == UINT32_MAX && - unrolled_indices[i - 0] == UINT32_MAX) - { - emit_primitive = false; - primitive_count_since_restart = 0; - } - - if (emit_primitive) - { - unstripped_indices.push_back(unrolled_indices[i - 2]); - unstripped_indices.push_back(unrolled_indices[i - (1 ^ (primitive_count_since_restart & 1))]); - unstripped_indices.push_back(unrolled_indices[i - (primitive_count_since_restart & 1)]); - primitive_count_since_restart++; - } - } - - unrolled_indices = std::move(unstripped_indices); - mesh.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; - } - - mesh.count = uint32_t(unrolled_indices.size()); - mesh.indices.resize(unrolled_indices.size() * sizeof(uint32_t)); - memcpy(mesh.indices.data(), unrolled_indices.data(), mesh.indices.size()); - return true; -} - static i16vec4 encode_vec3_to_snorm_exp(vec3 v) { vec3 vabs = abs(v); @@ -659,7 +585,7 @@ static bool export_encoded_mesh(const std::string &path, const Encoded &encoded) bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh, SceneFormats::Meshlet::MeshStyle style) { - if (!mesh_canonicalize_indices(mesh)) + if (!mesh_optimize_index_buffer(mesh, {})) return false; std::vector positions, uv; @@ -706,7 +632,7 @@ bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh, Sc return false; } - std::vector attributes(num_u32_streams * positions.size()); + std::vector attributes(num_u32_streams * positions.size()); uint32_t *ptr = attributes.data(); for (size_t i = 0, n = positions.size(); i < n; i++) { diff --git a/tests/assets/shaders/meshlet_debug.mesh.frag b/tests/assets/shaders/meshlet_debug.mesh.frag index 594eef7f..9eb97dab 100644 --- a/tests/assets/shaders/meshlet_debug.mesh.frag +++ b/tests/assets/shaders/meshlet_debug.mesh.frag @@ -15,7 +15,8 @@ vec3 decode_mesh_color() uint r = bitfieldExtract(index, 0, 2); uint g = bitfieldExtract(index, 2, 2); uint b = bitfieldExtract(index, 4, 2); - return (vec3(r, g, b) + 1.0 / 3.0) / 4.0; + //return (vec3(r, g, b) + 1.0 / 3.0) / 4.0; + return vec3(1.0); } void main() diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp index aca7f96b..f41b6490 100644 --- a/tests/meshopt_sandbox.cpp +++ b/tests/meshopt_sandbox.cpp @@ -218,8 +218,10 @@ int main(int argc, char *argv[]) dev.set_context(ctx); dev.init_frame_contexts(4); + auto mesh = parser.get_meshes().front(); + if (!Meshlet::export_mesh_to_meshlet("/tmp/export.msh1", - parser.get_meshes().front(), SceneFormats::Meshlet::MeshStyle::Textured)) + mesh, SceneFormats::Meshlet::MeshStyle::Textured)) { return EXIT_FAILURE; } @@ -232,22 +234,37 @@ int main(int argc, char *argv[]) if (!mapped) return EXIT_FAILURE; - auto mesh = SceneFormats::Meshlet::create_mesh_view(*mapped); + auto view = SceneFormats::Meshlet::create_mesh_view(*mapped); std::vector reference_index_buffer; std::vector reference_attributes; std::vector gpu_index_buffer; std::vector gpu_attributes; - decode_mesh(reference_index_buffer, reference_attributes, mesh); - decode_mesh_gpu(dev, gpu_index_buffer, gpu_attributes, mesh); + decode_mesh(reference_index_buffer, reference_attributes, view); + decode_mesh_gpu(dev, gpu_index_buffer, gpu_attributes, view); if (!validate_mesh_decode(gpu_index_buffer, gpu_attributes, reference_index_buffer, reference_attributes, - mesh.format_header->u32_stream_count - 1)) + view.format_header->u32_stream_count - 1)) { return EXIT_FAILURE; } + { + LOGI("Total primitives: %u\n", view.total_primitives); + LOGI("Total vertices: %u\n", view.total_vertices); + LOGI("Payload size: %llu bytes.\n", static_cast(view.format_header->payload_size_words * sizeof(uint32_t))); + + unsigned long long uncompressed_mesh_size = + view.total_primitives * sizeof(uint32_t) * 3 + + view.total_vertices * (view.format_header->u32_stream_count - 1) * sizeof(uint32_t); + unsigned long long uncompressed_payload_size = + view.total_primitives * sizeof(uint32_t) + + view.total_vertices * (view.format_header->u32_stream_count - 1) * sizeof(uint32_t); + LOGI("Uncompressed mesh size: %llu bytes.\n", uncompressed_mesh_size); + LOGI("Uncompressed payload size: %llu bytes.\n", uncompressed_payload_size); + } + return 0; } \ No newline at end of file From ddc47ffd953905fb0fda3a2f358b0dff1d7e824e Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 2 Aug 2023 12:00:17 +0200 Subject: [PATCH 43/71] Enable 8-bit index extension. --- vulkan/context.cpp | 8 ++++++++ vulkan/context.hpp | 1 + 2 files changed, 9 insertions(+) diff --git a/vulkan/context.cpp b/vulkan/context.cpp index 4993adc1..b4771320 100644 --- a/vulkan/context.cpp +++ b/vulkan/context.cpp @@ -1295,6 +1295,7 @@ bool Context::create_device(VkPhysicalDevice gpu_, VkSurfaceKHR surface, ext.pageable_device_local_memory_features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PAGEABLE_DEVICE_LOCAL_MEMORY_FEATURES_EXT }; ext.mesh_shader_features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_EXT }; ext.shader_subgroup_extended_types_features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES }; + ext.index_type_uint8_features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT }; ext.compute_shader_derivative_features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV }; ext.device_generated_commands_features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_FEATURES_NV }; @@ -1484,6 +1485,13 @@ bool Context::create_device(VkPhysicalDevice gpu_, VkSurfaceKHR surface, ppNext = &ext.shader_subgroup_extended_types_features.pNext; } + if (has_extension(VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME)) + { + enabled_extensions.push_back(VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME); + *ppNext = &ext.index_type_uint8_features; + ppNext = &ext.index_type_uint8_features.pNext; + } + if ((flags & CONTEXT_CREATION_ENABLE_ADVANCED_WSI_BIT) != 0 && requires_swapchain) { bool broken_present_wait = ext.driver_properties.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY && diff --git a/vulkan/context.hpp b/vulkan/context.hpp index 680571fe..a2ab4f7c 100644 --- a/vulkan/context.hpp +++ b/vulkan/context.hpp @@ -104,6 +104,7 @@ struct DeviceFeatures VkPhysicalDeviceBufferDeviceAddressFeaturesKHR buffer_device_address_features = {}; VkPhysicalDeviceIDProperties id_properties = {}; VkPhysicalDeviceShaderSubgroupExtendedTypesFeaturesKHR shader_subgroup_extended_types_features = {}; + VkPhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8_features = {}; // EXT VkPhysicalDeviceExternalMemoryHostPropertiesEXT host_memory_properties = {}; From 2d1ae8f4fa35551c86b22ec141486dc038213007 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 2 Aug 2023 12:15:50 +0200 Subject: [PATCH 44/71] Don't reoptimize vertex cache. --- scene-export/meshlet_export.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/scene-export/meshlet_export.cpp b/scene-export/meshlet_export.cpp index 78d8b630..26f4c244 100644 --- a/scene-export/meshlet_export.cpp +++ b/scene-export/meshlet_export.cpp @@ -667,19 +667,15 @@ bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh, Sc // Special meshoptimizer limit. constexpr unsigned max_vertices = 255; constexpr unsigned max_primitives = 256; - std::vector optimized_index_buffer(mesh.count); - meshopt_optimizeVertexCache( - optimized_index_buffer.data(), reinterpret_cast(mesh.indices.data()), - mesh.count, positions.size()); size_t num_meshlets = meshopt_buildMeshletsBound(mesh.count, max_vertices, max_primitives); std::vector out_vertex_redirection_buffer(num_meshlets * max_vertices); std::vector local_index_buffer(num_meshlets * max_primitives * 3); - std::vector meshlets(num_meshlets); + std::vector meshlets(num_meshlets); num_meshlets = meshopt_buildMeshlets(meshlets.data(), out_vertex_redirection_buffer.data(), local_index_buffer.data(), - optimized_index_buffer.data(), mesh.count, + reinterpret_cast(mesh.indices.data()), mesh.count, position_buffer[0].data, positions.size(), sizeof(vec3), max_vertices, max_primitives, 0.75f); From 9fec7a3eeb2b066d8769fedf4d7cdcb4f894d569 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 2 Aug 2023 12:25:18 +0200 Subject: [PATCH 45/71] Exporting raw payload for testing. --- scene-export/meshlet_export.cpp | 2 +- tests/meshopt_sandbox.cpp | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/scene-export/meshlet_export.cpp b/scene-export/meshlet_export.cpp index 26f4c244..99a071ce 100644 --- a/scene-export/meshlet_export.cpp +++ b/scene-export/meshlet_export.cpp @@ -717,7 +717,7 @@ bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh, Sc encode_mesh(encoded, out_meshlets.data(), out_meshlets.size(), out_index_buffer[0].data, out_index_buffer.size(), attributes.data(), num_u32_streams); - encoded.mesh.mesh_style = MeshStyle::Textured; + encoded.mesh.mesh_style = style; assert(bounds.size() == encoded.mesh.meshlets.size()); const auto *pbounds = bounds.data(); diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp index f41b6490..77ce80c6 100644 --- a/tests/meshopt_sandbox.cpp +++ b/tests/meshopt_sandbox.cpp @@ -266,5 +266,13 @@ int main(int argc, char *argv[]) LOGI("Uncompressed payload size: %llu bytes.\n", uncompressed_payload_size); } + { + file = GRANITE_FILESYSTEM()->open("/tmp/export.bin", FileMode::WriteOnly); + mapped = file->map_write((reference_index_buffer.size() + reference_attributes.size()) * sizeof(uint32_t)); + auto *ptr = mapped->mutable_data(); + memcpy(ptr, reference_index_buffer.data(), reference_index_buffer.size() * sizeof(uint32_t)); + memcpy(ptr + reference_index_buffer.size(), reference_attributes.data(), reference_attributes.size() * sizeof(uint32_t)); + } + return 0; } \ No newline at end of file From 03522ff3bbcc283641e80496a042590bd1a090d0 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Thu, 3 Aug 2023 10:04:43 +0200 Subject: [PATCH 46/71] Fix MSVC build. --- scene-export/meshlet_export.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scene-export/meshlet_export.cpp b/scene-export/meshlet_export.cpp index 99a071ce..09b27ee6 100644 --- a/scene-export/meshlet_export.cpp +++ b/scene-export/meshlet_export.cpp @@ -69,7 +69,7 @@ static i16vec4 encode_vec3_to_snorm_exp(vec3 v) { vec3 vabs = abs(v); float max_scale = max(max(vabs.x, vabs.y), vabs.z); - int max_scale_log2 = int(floor(log2(max_scale))); + int max_scale_log2 = int(muglm::floor(log2(max_scale))); int scale_log2 = 14 - max_scale_log2; // Maximum component should have range of [1, 2) since we use floor of log2, so scale with 2^14 instead of 15. @@ -85,7 +85,7 @@ static i16vec3 encode_vec2_to_snorm_exp(vec2 v) { vec2 vabs = abs(v); float max_scale = max(vabs.x, vabs.y); - int max_scale_log2 = int(floor(log2(max_scale))); + int max_scale_log2 = int(muglm::floor(log2(max_scale))); int scale_log2 = 14 - max_scale_log2; // UVs are unorm scaled, don't need more accuracy than this. From 4ff66fd662b6ef8617e9f0e4ff3b12c90c82ecc8 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Thu, 3 Aug 2023 11:27:26 +0200 Subject: [PATCH 47/71] Fix headless build with audio disabled. --- application/platforms/application_headless.cpp | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/application/platforms/application_headless.cpp b/application/platforms/application_headless.cpp index 3a00b0ac..a8cff0e9 100644 --- a/application/platforms/application_headless.cpp +++ b/application/platforms/application_headless.cpp @@ -235,19 +235,10 @@ struct WSIPlatformHeadless : Granite::GraniteWSIPlatform enc_opts.frame_timebase.den = int(frame_rate); #ifdef HAVE_GRANITE_AUDIO -#if 1 enc_opts.realtime = true; record_stream.reset(Audio::create_default_audio_record_backend("headless", 44100.0f, 2)); if (record_stream) encoder.set_audio_record_stream(record_stream.get()); -#else - auto *mixer = new Audio::Mixer; - auto *audio_dumper = new Audio::DumpBackend( - mixer, 48000.0f, 2, - unsigned(std::ceil(48000.0f / frame_rate))); - Global::install_audio_system(audio_dumper, mixer); - encoder.set_audio_source(audio_dumper); -#endif #endif if (!encoder.init(&app->get_wsi().get_device(), video_encode_path.c_str(), enc_opts)) @@ -266,7 +257,9 @@ struct WSIPlatformHeadless : Granite::GraniteWSIPlatform ycbcr_pipelines.push_back(encoder.create_ycbcr_pipeline(rgb_to_yuv, chroma_downsample)); } +#ifdef HAVE_GRANITE_AUDIO record_stream->start(); +#endif } #endif From bd4ac44bb66c41a9bf52db785459b159c97306dc Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Thu, 3 Aug 2023 11:27:48 +0200 Subject: [PATCH 48/71] Fix FFmpeg build on MSVC. --- video/CMakeLists.txt | 42 +++++++++++++++++++++++++++++++------- video/ffmpeg_hw_device.cpp | 2 ++ 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/video/CMakeLists.txt b/video/CMakeLists.txt index f21945ba..54437032 100644 --- a/video/CMakeLists.txt +++ b/video/CMakeLists.txt @@ -1,7 +1,3 @@ -include(FindPkgConfig) -pkg_check_modules(LIBAV REQUIRED IMPORTED_TARGET - libavdevice libavformat libavcodec libavutil) - add_granite_internal_lib(granite-video ffmpeg_encode.cpp ffmpeg_encode.hpp ffmpeg_hw_device.cpp ffmpeg_hw_device.hpp) @@ -9,9 +5,41 @@ if (GRANITE_VULKAN_SYSTEM_HANDLES) target_sources(granite-video PRIVATE ffmpeg_decode.cpp ffmpeg_decode.hpp) endif() +# FFmpeg macro uses designated initializer. +target_compile_features(granite-video PRIVATE cxx_std_20) + +option(GRANITE_FFMPEG_INSTALL_PREFIX "Override FFmpeg install prefix." "") +if (GRANITE_FFMPEG_INSTALL_PREFIX) + # For MSVC. It does not play well with Msys2 pkg-configs. + message("FFmpeg: install prefix ${GRANITE_FFMPEG_INSTALL_PREFIX}.") + target_include_directories(granite-video PRIVATE ${GRANITE_FFMPEG_INSTALL_PREFIX}/include) + find_library(AVDEVICE avdevice NAMES libavdevice + PATHS ${GRANITE_FFMPEG_INSTALL_PREFIX}/lib ${GRANITE_FFMPEG_INSTALL_PREFIX}/bin + NO_DEFAULT_PATH) + find_library(AVFORMAT avformat NAMES libavformat + PATHS ${GRANITE_FFMPEG_INSTALL_PREFIX}/lib ${GRANITE_FFMPEG_INSTALL_PREFIX}/bin + NO_DEFAULT_PATH) + find_library(AVCODEC avcodec NAMES libavcodec + PATHS ${GRANITE_FFMPEG_INSTALL_PREFIX}/lib ${GRANITE_FFMPEG_INSTALL_PREFIX}/bin + NO_DEFAULT_PATH) + find_library(AVUTIL avutil NAMES libavutil + PATHS ${GRANITE_FFMPEG_INSTALL_PREFIX}/lib ${GRANITE_FFMPEG_INSTALL_PREFIX}/bin + NO_DEFAULT_PATH) + message("FFmpeg: libavdevice (${AVDEVICE}).") + message("FFmpeg: libavformat (${AVFORMAT}).") + message("FFmpeg: libavcodec (${AVCODEC}).") + message("FFmpeg: libavutil (${AVUTIL}).") + target_link_libraries(granite-video PRIVATE ${AVDEVICE} ${AVFORMAT} ${AVCODEC} ${AVUTIL}) +else() + include(FindPkgConfig) + pkg_check_modules(LIBAV REQUIRED IMPORTED_TARGET + libavdevice libavformat libavcodec libavutil) + target_link_libraries(granite-video PRIVATE PkgConfig::LIBAV) +endif() + target_link_libraries(granite-video - PUBLIC granite-vulkan - PRIVATE PkgConfig::LIBAV granite-threading granite-math) + PUBLIC granite-vulkan + PRIVATE granite-threading granite-math) if (GRANITE_AUDIO) target_link_libraries(granite-video PRIVATE granite-audio) endif() @@ -22,4 +50,4 @@ if (GRANITE_FFMPEG_VULKAN_ENCODE) target_compile_definitions(granite-video PRIVATE HAVE_FFMPEG_VULKAN_ENCODE) endif() target_include_directories(granite-video PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -target_compile_definitions(granite-video PUBLIC HAVE_GRANITE_FFMPEG) \ No newline at end of file +target_compile_definitions(granite-video PUBLIC HAVE_GRANITE_FFMPEG) diff --git a/video/ffmpeg_hw_device.cpp b/video/ffmpeg_hw_device.cpp index d7678517..a7175350 100644 --- a/video/ffmpeg_hw_device.cpp +++ b/video/ffmpeg_hw_device.cpp @@ -21,6 +21,8 @@ */ #define __STDC_LIMIT_MACROS 1 +#define __STDC_CONSTANT_MACROS 1 + #include "ffmpeg_hw_device.hpp" #include "logging.hpp" #include "device.hpp" From 560836509d8fc85a07727e7a575c39157eb40a85 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Thu, 3 Aug 2023 11:37:17 +0200 Subject: [PATCH 49/71] Export meshlet to local folder. --- tests/meshopt_sandbox.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp index 77ce80c6..72c58394 100644 --- a/tests/meshopt_sandbox.cpp +++ b/tests/meshopt_sandbox.cpp @@ -220,13 +220,13 @@ int main(int argc, char *argv[]) auto mesh = parser.get_meshes().front(); - if (!Meshlet::export_mesh_to_meshlet("/tmp/export.msh1", + if (!Meshlet::export_mesh_to_meshlet("export.msh1", mesh, SceneFormats::Meshlet::MeshStyle::Textured)) { return EXIT_FAILURE; } - auto file = GRANITE_FILESYSTEM()->open("/tmp/export.msh1", FileMode::ReadOnly); + auto file = GRANITE_FILESYSTEM()->open("export.msh1", FileMode::ReadOnly); if (!file) return EXIT_FAILURE; @@ -267,7 +267,7 @@ int main(int argc, char *argv[]) } { - file = GRANITE_FILESYSTEM()->open("/tmp/export.bin", FileMode::WriteOnly); + file = GRANITE_FILESYSTEM()->open("export.bin", FileMode::WriteOnly); mapped = file->map_write((reference_index_buffer.size() + reference_attributes.size()) * sizeof(uint32_t)); auto *ptr = mapped->mutable_data(); memcpy(ptr, reference_index_buffer.data(), reference_index_buffer.size() * sizeof(uint32_t)); From b6476463b0152ddbd5a3c70f2e3594f460ae8dcb Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Thu, 3 Aug 2023 12:32:09 +0200 Subject: [PATCH 50/71] More build shenanigans. --- application/application.cpp | 8 ++-- application/global/global_managers.cpp | 18 ++++----- application/global/global_managers.hpp | 6 +-- application/global/global_managers_init.cpp | 4 +- .../global/global_managers_interface.hpp | 4 +- .../platforms/application_headless.cpp | 2 +- application/scene_viewer_application.cpp | 2 +- filesystem/asset_manager.cpp | 37 ++++++++++--------- filesystem/asset_manager.hpp | 18 ++++----- renderer/common_renderer_data.cpp | 4 +- renderer/common_renderer_data.hpp | 2 +- renderer/ground.cpp | 17 ++++++--- renderer/lights/decal_volume.cpp | 5 +-- renderer/material.hpp | 2 +- renderer/mesh_util.cpp | 6 +-- renderer/post/smaa.cpp | 8 ++-- ui/image_widget.cpp | 2 +- vulkan/context.hpp | 4 +- vulkan/managers/resource_manager.cpp | 6 +-- vulkan/managers/resource_manager.hpp | 8 ++-- 20 files changed, 84 insertions(+), 79 deletions(-) diff --git a/application/application.cpp b/application/application.cpp index 265a8213..6c3ce65b 100644 --- a/application/application.cpp +++ b/application/application.cpp @@ -35,7 +35,7 @@ namespace Granite { Application::Application() { - GRANITE_COMMON_RENDERER_DATA()->initialize_static_assets(GRANITE_ASSET_MANAGER(), GRANITE_FILESYSTEM()); + GRANITE_COMMON_RENDERER_DATA()->initialize_static_assets(GRANITE_ASSET_MANAGER_IMAGES(), GRANITE_FILESYSTEM()); } Application::~Application() @@ -75,7 +75,7 @@ bool Application::init_wsi(Vulkan::ContextHandle context) Context::SystemHandles system_handles; system_handles.filesystem = GRANITE_FILESYSTEM(); system_handles.thread_group = GRANITE_THREAD_GROUP(); - system_handles.asset_manager = GRANITE_ASSET_MANAGER(); + system_handles.asset_manager_images = GRANITE_ASSET_MANAGER_IMAGES(); system_handles.timeline_trace_file = system_handles.thread_group->get_timeline_trace_file(); if (!application_wsi.init_context_from_platform( @@ -152,7 +152,7 @@ void Application::check_initialization_progress() device.query_initialization_progress(Device::InitializationStage::ShaderModules) >= 100) { // Now is a good time to kick shader manager since it might require compute shaders for decode. - GRANITE_ASSET_MANAGER()->iterate(GRANITE_THREAD_GROUP()); + GRANITE_ASSET_MANAGER_IMAGES()->iterate(GRANITE_THREAD_GROUP()); GRANITE_SCOPED_TIMELINE_EVENT("dispatch-ready-modules"); GRANITE_EVENT_MANAGER()->enqueue_latched(&device, &device.get_shader_manager()); @@ -278,6 +278,6 @@ void Application::post_frame() { // Texture manager might require shaders to be ready before we can submit work. if (ready_modules) - GRANITE_ASSET_MANAGER()->iterate(GRANITE_THREAD_GROUP()); + GRANITE_ASSET_MANAGER_IMAGES()->iterate(GRANITE_THREAD_GROUP()); } } diff --git a/application/global/global_managers.cpp b/application/global/global_managers.cpp index d7848914..be7ba621 100644 --- a/application/global/global_managers.cpp +++ b/application/global/global_managers.cpp @@ -36,7 +36,7 @@ struct GlobalManagers Factory *factory; FilesystemInterface *filesystem; - AssetManagerInterface *asset_manager; + AssetManagerImagesInterface *asset_manager_images; EventManagerInterface *event_manager; ThreadGroupInterface *thread_group; UI::UIManagerInterface *ui_manager; @@ -90,11 +90,11 @@ FilesystemInterface *filesystem() return global_managers.filesystem; } -AssetManagerInterface *asset_manager() +AssetManagerImagesInterface *asset_manager_images() { - if (!global_managers.asset_manager) + if (!global_managers.asset_manager_images) LOGE("Asset manager was not initialized.\n"); - return global_managers.asset_manager; + return global_managers.asset_manager_images; } EventManagerInterface *event_manager() @@ -162,8 +162,8 @@ void init(Factory &factory, ManagerFeatureFlags flags, unsigned max_threads) if (flags & MANAGER_FEATURE_ASSET_MANAGER_BIT) { - if (!global_managers.asset_manager) - global_managers.asset_manager = factory.create_asset_manager(); + if (!global_managers.asset_manager_images) + global_managers.asset_manager_images = factory.create_asset_manager_images(); } bool kick_threads = false; @@ -243,7 +243,7 @@ void deinit() delete global_managers.common_renderer_data; delete global_managers.ui_manager; delete global_managers.thread_group; - delete global_managers.asset_manager; + delete global_managers.asset_manager_images; delete global_managers.filesystem; delete global_managers.event_manager; delete global_managers.logging; @@ -253,7 +253,7 @@ void deinit() global_managers.physics = nullptr; global_managers.common_renderer_data = nullptr; global_managers.filesystem = nullptr; - global_managers.asset_manager = nullptr; + global_managers.asset_manager_images = nullptr; global_managers.event_manager = nullptr; global_managers.thread_group = nullptr; global_managers.ui_manager = nullptr; @@ -290,7 +290,7 @@ void stop_audio_system() } FilesystemInterface *Factory::create_filesystem() { return nullptr; } -AssetManagerInterface *Factory::create_asset_manager() { return nullptr; } +AssetManagerImagesInterface *Factory::create_asset_manager_images() { return nullptr; } EventManagerInterface *Factory::create_event_manager() { return nullptr; } ThreadGroupInterface *Factory::create_thread_group() { return nullptr; } CommonRendererDataInterface *Factory::create_common_renderer_data() { return nullptr; } diff --git a/application/global/global_managers.hpp b/application/global/global_managers.hpp index 848004f4..ddecd113 100644 --- a/application/global/global_managers.hpp +++ b/application/global/global_managers.hpp @@ -61,7 +61,7 @@ class Factory virtual ~Factory() = default; virtual FilesystemInterface *create_filesystem(); - virtual AssetManagerInterface *create_asset_manager(); + virtual AssetManagerImagesInterface *create_asset_manager_images(); virtual EventManagerInterface *create_event_manager(); virtual ThreadGroupInterface *create_thread_group(); virtual CommonRendererDataInterface *create_common_renderer_data(); @@ -97,7 +97,7 @@ void install_audio_system(Audio::BackendInterface *backend, Audio::MixerInterfac Util::MessageQueueInterface *message_queue(); FilesystemInterface *filesystem(); -AssetManagerInterface *asset_manager(); +AssetManagerImagesInterface *asset_manager_images(); EventManagerInterface *event_manager(); ThreadGroupInterface *thread_group(); UI::UIManagerInterface *ui_manager(); @@ -110,7 +110,7 @@ PhysicsSystemInterface *physics(); #define GRANITE_MESSAGE_QUEUE() static_cast<::Util::MessageQueue *>(::Granite::Global::message_queue()) #define GRANITE_FILESYSTEM() static_cast<::Granite::Filesystem *>(::Granite::Global::filesystem()) -#define GRANITE_ASSET_MANAGER() static_cast<::Granite::AssetManager *>(::Granite::Global::asset_manager()) +#define GRANITE_ASSET_MANAGER_IMAGES() static_cast<::Granite::AssetManagerImages *>(::Granite::Global::asset_manager_images()) #define GRANITE_EVENT_MANAGER() static_cast<::Granite::EventManager *>(::Granite::Global::event_manager()) #define GRANITE_THREAD_GROUP() static_cast<::Granite::ThreadGroup *>(::Granite::Global::thread_group()) #define GRANITE_UI_MANAGER() static_cast<::Granite::UI::UIManager *>(::Granite::Global::ui_manager()) diff --git a/application/global/global_managers_init.cpp b/application/global/global_managers_init.cpp index c4bf855c..6dac1d1c 100644 --- a/application/global/global_managers_init.cpp +++ b/application/global/global_managers_init.cpp @@ -47,9 +47,9 @@ struct FactoryImplementation : Factory return new Filesystem; } - AssetManagerInterface *create_asset_manager() override + AssetManagerImagesInterface *create_asset_manager_images() override { - return new AssetManager; + return new AssetManagerImages; } EventManagerInterface *create_event_manager() override diff --git a/application/global/global_managers_interface.hpp b/application/global/global_managers_interface.hpp index b8910f97..a493d3b4 100644 --- a/application/global/global_managers_interface.hpp +++ b/application/global/global_managers_interface.hpp @@ -44,10 +44,10 @@ class FilesystemInterface virtual bool load_text_file(const std::string &path, std::string &str) = 0; }; -class AssetManagerInterface +class AssetManagerImagesInterface { public: - virtual ~AssetManagerInterface() = default; + virtual ~AssetManagerImagesInterface() = default; }; class ThreadGroupInterface diff --git a/application/platforms/application_headless.cpp b/application/platforms/application_headless.cpp index a8cff0e9..24023811 100644 --- a/application/platforms/application_headless.cpp +++ b/application/platforms/application_headless.cpp @@ -173,7 +173,7 @@ struct WSIPlatformHeadless : Granite::GraniteWSIPlatform system_handles.filesystem = GRANITE_FILESYSTEM(); system_handles.thread_group = GRANITE_THREAD_GROUP(); system_handles.timeline_trace_file = system_handles.thread_group->get_timeline_trace_file(); - system_handles.asset_manager = GRANITE_ASSET_MANAGER(); + system_handles.asset_manager_images = GRANITE_ASSET_MANAGER_IMAGES(); context->set_system_handles(system_handles); context->set_num_thread_indices(GRANITE_THREAD_GROUP()->get_num_threads() + 1); diff --git a/application/scene_viewer_application.cpp b/application/scene_viewer_application.cpp index 72765b4e..cdd1681f 100644 --- a/application/scene_viewer_application.cpp +++ b/application/scene_viewer_application.cpp @@ -1419,7 +1419,7 @@ void SceneViewerApplication::render_ui(CommandBuffer &cmd) snprintf(pos_text, sizeof(pos_text), "Pos: %.3f, %.3f, %.3f", cam_pos.x, cam_pos.y, cam_pos.z); snprintf(rot_text, sizeof(rot_text), "Rot: %.3f, %.3f, %.3f, %.3f", cam_ori.x, cam_ori.y, cam_ori.z, cam_ori.w); snprintf(tex_text, sizeof(tex_text), "Texture: %u MiB", - unsigned(GRANITE_ASSET_MANAGER()->get_current_total_consumed() / (1024 * 1024))); + unsigned(GRANITE_ASSET_MANAGER_IMAGES()->get_current_total_consumed() / (1024 * 1024))); vec3 offset(5.0f, 5.0f, 0.0f); vec2 size(cmd.get_viewport().width - 10.0f, cmd.get_viewport().height - 10.0f); diff --git a/filesystem/asset_manager.cpp b/filesystem/asset_manager.cpp index 8b033a73..b9e6ec8c 100644 --- a/filesystem/asset_manager.cpp +++ b/filesystem/asset_manager.cpp @@ -27,21 +27,21 @@ namespace Granite { -AssetManager::AssetManager() +AssetManagerImages::AssetManagerImages() { signal = std::make_unique(); for (uint64_t i = 0; i < timestamp; i++) signal->signal_increment(); } -AssetManager::~AssetManager() +AssetManagerImages::~AssetManagerImages() { signal->wait_until_at_least(timestamp); for (auto *a : asset_bank) pool.free(a); } -ImageAssetID AssetManager::register_image_resource_nolock(FileHandle file, ImageClass image_class, int prio) +ImageAssetID AssetManagerImages::register_image_resource_nolock(FileHandle file, ImageClass image_class, int prio) { auto *info = pool.allocate(); info->handle = std::move(file); @@ -59,17 +59,18 @@ ImageAssetID AssetManager::register_image_resource_nolock(FileHandle file, Image return ret; } -void AssetInstantiatorInterface::set_image_class(ImageAssetID, ImageClass) +void AssetInstantiatorImagesInterface::set_image_class(ImageAssetID, ImageClass) { } -ImageAssetID AssetManager::register_image_resource(FileHandle file, ImageClass image_class, int prio) +ImageAssetID AssetManagerImages::register_image_resource(FileHandle file, ImageClass image_class, int prio) { std::lock_guard holder{asset_bank_lock}; return register_image_resource_nolock(std::move(file), image_class, prio); } -ImageAssetID AssetManager::register_image_resource(Filesystem &fs, const std::string &path, ImageClass image_class, int prio) +ImageAssetID AssetManagerImages::register_image_resource(Filesystem &fs, const std::string &path, + ImageClass image_class, int prio) { std::lock_guard holder{asset_bank_lock}; @@ -88,13 +89,13 @@ ImageAssetID AssetManager::register_image_resource(Filesystem &fs, const std::st return id; } -void AssetManager::update_cost(ImageAssetID id, uint64_t cost) +void AssetManagerImages::update_cost(ImageAssetID id, uint64_t cost) { std::lock_guard holder{cost_update_lock}; thread_cost_updates.push_back({ id, cost }); } -void AssetManager::set_asset_instantiator_interface(AssetInstantiatorInterface *iface_) +void AssetManagerImages::set_asset_instantiator_interface(AssetInstantiatorImagesInterface *iface_) { if (iface) { @@ -120,22 +121,22 @@ void AssetManager::set_asset_instantiator_interface(AssetInstantiatorInterface * } } -void AssetManager::mark_used_resource(ImageAssetID id) +void AssetManagerImages::mark_used_resource(ImageAssetID id) { lru_append.push(id); } -void AssetManager::set_image_budget(uint64_t cost) +void AssetManagerImages::set_image_budget(uint64_t cost) { image_budget = cost; } -void AssetManager::set_image_budget_per_iteration(uint64_t cost) +void AssetManagerImages::set_image_budget_per_iteration(uint64_t cost) { image_budget_per_iteration = cost; } -bool AssetManager::set_image_residency_priority(ImageAssetID id, int prio) +bool AssetManagerImages::set_image_residency_priority(ImageAssetID id, int prio) { std::lock_guard holder{asset_bank_lock}; if (id.id >= asset_bank.size()) @@ -144,7 +145,7 @@ bool AssetManager::set_image_residency_priority(ImageAssetID id, int prio) return true; } -void AssetManager::adjust_update(const CostUpdate &update) +void AssetManagerImages::adjust_update(const CostUpdate &update) { if (update.id.id < asset_bank.size()) { @@ -159,12 +160,12 @@ void AssetManager::adjust_update(const CostUpdate &update) } } -uint64_t AssetManager::get_current_total_consumed() const +uint64_t AssetManagerImages::get_current_total_consumed() const { return total_consumed; } -void AssetManager::update_costs_locked_assets() +void AssetManagerImages::update_costs_locked_assets() { { std::lock_guard holder_cost{cost_update_lock}; @@ -176,7 +177,7 @@ void AssetManager::update_costs_locked_assets() cost_updates.clear(); } -void AssetManager::update_lru_locked_assets() +void AssetManagerImages::update_lru_locked_assets() { lru_append.for_each_ranged([this](const ImageAssetID *id, size_t count) { for (size_t i = 0; i < count; i++) @@ -186,7 +187,7 @@ void AssetManager::update_lru_locked_assets() lru_append.clear(); } -bool AssetManager::iterate_blocking(ThreadGroup &group, ImageAssetID id) +bool AssetManagerImages::iterate_blocking(ThreadGroup &group, ImageAssetID id) { if (!iface) return false; @@ -219,7 +220,7 @@ bool AssetManager::iterate_blocking(ThreadGroup &group, ImageAssetID id) return true; } -void AssetManager::iterate(ThreadGroup *group) +void AssetManagerImages::iterate(ThreadGroup *group) { if (!iface) return; diff --git a/filesystem/asset_manager.hpp b/filesystem/asset_manager.hpp index 1ecb0ced..9f96734a 100644 --- a/filesystem/asset_manager.hpp +++ b/filesystem/asset_manager.hpp @@ -52,7 +52,7 @@ struct MeshAssetID inline bool operator!=(const MeshAssetID &other) const { return !(*this == other); } }; -class AssetManager; +class AssetManagerImages; // If we have to fall back due to no image being present, // lets asset instantiator know what to substitute. @@ -75,17 +75,17 @@ class ThreadGroup; struct TaskGroup; struct TaskSignal; -class AssetInstantiatorInterface +class AssetInstantiatorImagesInterface { public: - virtual ~AssetInstantiatorInterface() = default; + virtual ~AssetInstantiatorImagesInterface() = default; // This estimate should be an upper bound. virtual uint64_t estimate_cost_image_resource(ImageAssetID id, File &mapping) = 0; // When instantiation completes, manager.update_cost() must be called with the real cost. // The real cost may only be known after async parsing of the file. - virtual void instantiate_image_resource(AssetManager &manager, TaskGroup *group, ImageAssetID id, File &mapping) = 0; + virtual void instantiate_image_resource(AssetManagerImages &manager, TaskGroup *group, ImageAssetID id, File &mapping) = 0; // Will only be called after an upload completes through manager.update_cost(). virtual void release_image_resource(ImageAssetID id) = 0; @@ -96,16 +96,16 @@ class AssetInstantiatorInterface virtual void latch_handles() = 0; }; -class AssetManager final : public AssetManagerInterface +class AssetManagerImages final : public AssetManagerImagesInterface { public: // Persistent prio means the resource is treated as an internal LUT that must always be resident, no matter what. constexpr static int persistent_prio() { return 0x7fffffff; } - AssetManager(); - ~AssetManager() override; + AssetManagerImages(); + ~AssetManagerImages() override; - void set_asset_instantiator_interface(AssetInstantiatorInterface *iface); + void set_asset_instantiator_interface(AssetInstantiatorImagesInterface *iface); void set_image_budget(uint64_t cost); void set_image_budget_per_iteration(uint64_t cost); @@ -152,7 +152,7 @@ class AssetManager final : public AssetManagerInterface Util::AtomicAppendBuffer lru_append; Util::IntrusiveHashMapHolder file_to_assets; - AssetInstantiatorInterface *iface = nullptr; + AssetInstantiatorImagesInterface *iface = nullptr; uint32_t id_count = 0; uint64_t total_consumed = 0; uint64_t image_budget = 0; diff --git a/renderer/common_renderer_data.cpp b/renderer/common_renderer_data.cpp index dc9fda34..da151ddf 100644 --- a/renderer/common_renderer_data.cpp +++ b/renderer/common_renderer_data.cpp @@ -109,10 +109,10 @@ void LightMesh::on_device_destroyed(const Vulkan::DeviceCreatedEvent &) point_ibo.reset(); } -void CommonRendererData::initialize_static_assets(AssetManager *iface, Filesystem *fs) +void CommonRendererData::initialize_static_assets(AssetManagerImages *iface, Filesystem *fs) { LOGI("Initializing static assets.\n"); brdf_tables = iface->register_image_resource(*fs, "builtin://textures/ibl_brdf_lut.gtx", ImageClass::Zeroable, - AssetManager::persistent_prio()); + AssetManagerImages::persistent_prio()); } } diff --git a/renderer/common_renderer_data.hpp b/renderer/common_renderer_data.hpp index 4e147860..b5ce23b2 100644 --- a/renderer/common_renderer_data.hpp +++ b/renderer/common_renderer_data.hpp @@ -58,6 +58,6 @@ class CommonRendererData final : public CommonRendererDataInterface public: LightMesh light_mesh; ImageAssetID brdf_tables; - void initialize_static_assets(AssetManager *iface, Filesystem *file_iface); + void initialize_static_assets(AssetManagerImages *iface, Filesystem *file_iface); }; } \ No newline at end of file diff --git a/renderer/ground.cpp b/renderer/ground.cpp index c25def4a..92a6ea67 100644 --- a/renderer/ground.cpp +++ b/renderer/ground.cpp @@ -182,12 +182,17 @@ Ground::Ground(unsigned size_, const TerrainInfo &info_) num_patches_z = size / info.base_patch_size; patch_lods.resize(num_patches_x * num_patches_z); - heights = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), info.heightmap, ImageClass::Zeroable); - normals = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), info.normalmap, ImageClass::Normal); - occlusion = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), info.occlusionmap, ImageClass::Zeroable); - normals_fine = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), info.normalmap_fine, ImageClass::Normal); - base_color = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), info.base_color, ImageClass::Color); - type_map = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), info.splatmap, ImageClass::Zeroable); + heights = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource(*GRANITE_FILESYSTEM(), info.heightmap, ImageClass::Zeroable); + normals = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource(*GRANITE_FILESYSTEM(), info.normalmap, + ImageClass::Normal); + occlusion = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource(*GRANITE_FILESYSTEM(), info.occlusionmap, + ImageClass::Zeroable); + normals_fine = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource(*GRANITE_FILESYSTEM(), info.normalmap_fine, + ImageClass::Normal); + base_color = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource(*GRANITE_FILESYSTEM(), info.base_color, + ImageClass::Color); + type_map = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource(*GRANITE_FILESYSTEM(), info.splatmap, + ImageClass::Zeroable); EVENT_MANAGER_REGISTER_LATCH(Ground, on_device_created, on_device_destroyed, DeviceCreatedEvent); } diff --git a/renderer/lights/decal_volume.cpp b/renderer/lights/decal_volume.cpp index 3e5e9cb6..284b4c7c 100644 --- a/renderer/lights/decal_volume.cpp +++ b/renderer/lights/decal_volume.cpp @@ -29,9 +29,8 @@ namespace Granite { VolumetricDecal::VolumetricDecal() { - tex = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), - "builtin://textures/decal.png", - ImageClass::Color); + tex = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource( + *GRANITE_FILESYSTEM(), "builtin://textures/decal.png", ImageClass::Color); } const Vulkan::ImageView *VolumetricDecal::get_decal_view(Vulkan::Device &device) const diff --git a/renderer/material.hpp b/renderer/material.hpp index b7c4ce1e..14863ade 100644 --- a/renderer/material.hpp +++ b/renderer/material.hpp @@ -97,7 +97,7 @@ struct Material { if (!info.paths[i].empty()) { - textures[i] = GRANITE_ASSET_MANAGER()->register_image_resource( + textures[i] = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource( *GRANITE_FILESYSTEM(), info.paths[i], image_classes[i]); } } diff --git a/renderer/mesh_util.cpp b/renderer/mesh_util.cpp index 69e0b682..295b528b 100644 --- a/renderer/mesh_util.cpp +++ b/renderer/mesh_util.cpp @@ -890,7 +890,7 @@ SkyCylinder::SkyCylinder(const std::string &bg_path) { if (!bg_path.empty()) { - texture = GRANITE_ASSET_MANAGER()->register_image_resource( + texture = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource( *GRANITE_FILESYSTEM(), bg_path, ImageClass::Color); } @@ -1056,7 +1056,7 @@ Skybox::Skybox(const std::string &bg_path) { if (!bg_path.empty()) { - texture = GRANITE_ASSET_MANAGER()->register_image_resource( + texture = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource( *GRANITE_FILESYSTEM(), bg_path, ImageClass::Color); } } @@ -1195,7 +1195,7 @@ static void texture_plane_render(CommandBuffer &cmd, const RenderQueueData *info TexturePlane::TexturePlane(const std::string &normal_path) { - normalmap = GRANITE_ASSET_MANAGER()->register_image_resource( + normalmap = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource( *GRANITE_FILESYSTEM(), normal_path, ImageClass::Normal); } diff --git a/renderer/post/smaa.cpp b/renderer/post/smaa.cpp index 8952fcb7..c278d9cc 100644 --- a/renderer/post/smaa.cpp +++ b/renderer/post/smaa.cpp @@ -145,10 +145,10 @@ void setup_smaa_postprocess(RenderGraph &graph, TemporalJitter &jitter, return true; }); - auto area = GRANITE_ASSET_MANAGER()->register_image_resource( - *GRANITE_FILESYSTEM(), "builtin://textures/smaa/area.gtx", ImageClass::Zeroable, AssetManager::persistent_prio()); - auto search = GRANITE_ASSET_MANAGER()->register_image_resource( - *GRANITE_FILESYSTEM(), "builtin://textures/smaa/search.gtx", ImageClass::Zeroable, AssetManager::persistent_prio()); + auto area = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource( + *GRANITE_FILESYSTEM(), "builtin://textures/smaa/area.gtx", ImageClass::Zeroable, AssetManagerImages::persistent_prio()); + auto search = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource( + *GRANITE_FILESYSTEM(), "builtin://textures/smaa/search.gtx", ImageClass::Zeroable, AssetManagerImages::persistent_prio()); smaa_weight.set_build_render_pass([&, area, search, edge = masked_edge, q = smaa_quality](Vulkan::CommandBuffer &cmd) { auto &input_image = graph.get_physical_texture_resource(weight_input_res); diff --git a/ui/image_widget.cpp b/ui/image_widget.cpp index 687bae07..861a8a41 100644 --- a/ui/image_widget.cpp +++ b/ui/image_widget.cpp @@ -33,7 +33,7 @@ namespace UI { Image::Image(const std::string &path, vec2 target) { - texture = GRANITE_ASSET_MANAGER()->register_image_resource( + texture = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource( *GRANITE_FILESYSTEM(), path, ImageClass::Color); diff --git a/vulkan/context.hpp b/vulkan/context.hpp index a2ab4f7c..eb2101c6 100644 --- a/vulkan/context.hpp +++ b/vulkan/context.hpp @@ -41,7 +41,7 @@ namespace Granite { class Filesystem; class ThreadGroup; -class AssetManager; +class AssetManagerImages; } namespace Vulkan @@ -326,7 +326,7 @@ class Context Util::TimelineTraceFile *timeline_trace_file = nullptr; Granite::Filesystem *filesystem = nullptr; Granite::ThreadGroup *thread_group = nullptr; - Granite::AssetManager *asset_manager = nullptr; + Granite::AssetManagerImages *asset_manager_images = nullptr; }; void set_system_handles(const SystemHandles &handles_) diff --git a/vulkan/managers/resource_manager.cpp b/vulkan/managers/resource_manager.cpp index 073d66ee..841dc486 100644 --- a/vulkan/managers/resource_manager.cpp +++ b/vulkan/managers/resource_manager.cpp @@ -73,7 +73,7 @@ uint64_t ResourceManager::estimate_cost_image_resource(Granite::ImageAssetID, Gr void ResourceManager::init() { - manager = device->get_system_handles().asset_manager; + manager = device->get_system_handles().asset_manager_images; // Need to initialize these before setting the interface. { @@ -242,7 +242,7 @@ const Vulkan::ImageView *ResourceManager::get_image_view_blocking(Granite::Image return &textures[id.id].image->get_view(); } -void ResourceManager::instantiate_image_resource(Granite::AssetManager &manager_, Granite::TaskGroup *task, +void ResourceManager::instantiate_image_resource(Granite::AssetManagerImages &manager_, Granite::TaskGroup *task, Granite::ImageAssetID id, Granite::File &file) { if (task) @@ -257,7 +257,7 @@ void ResourceManager::instantiate_image_resource(Granite::AssetManager &manager_ } } -void ResourceManager::instantiate_image_resource(Granite::AssetManager &manager_, +void ResourceManager::instantiate_image_resource(Granite::AssetManagerImages &manager_, Granite::ImageAssetID id, Granite::File &file) { diff --git a/vulkan/managers/resource_manager.hpp b/vulkan/managers/resource_manager.hpp index 3c129491..857d2cbd 100644 --- a/vulkan/managers/resource_manager.hpp +++ b/vulkan/managers/resource_manager.hpp @@ -31,7 +31,7 @@ namespace Vulkan { class MemoryMappedTexture; -class ResourceManager final : private Granite::AssetInstantiatorInterface +class ResourceManager final : private Granite::AssetInstantiatorImagesInterface { public: explicit ResourceManager(Device *device); @@ -50,11 +50,11 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface private: Device *device; - Granite::AssetManager *manager = nullptr; + Granite::AssetManagerImages *manager = nullptr; void latch_handles() override; uint64_t estimate_cost_image_resource(Granite::ImageAssetID id, Granite::File &file) override; - void instantiate_image_resource(Granite::AssetManager &manager, Granite::TaskGroup *task, + void instantiate_image_resource(Granite::AssetManagerImages &manager, Granite::TaskGroup *task, Granite::ImageAssetID id, Granite::File &file) override; void release_image_resource(Granite::ImageAssetID id) override; void set_id_bounds(uint32_t bound) override; @@ -83,6 +83,6 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface ImageHandle create_other(const Granite::FileMapping &mapping, Granite::ImageClass image_class, Granite::ImageAssetID id); const ImageHandle &get_fallback_image(Granite::ImageClass image_class); - void instantiate_image_resource(Granite::AssetManager &manager, Granite::ImageAssetID id, Granite::File &file); + void instantiate_image_resource(Granite::AssetManagerImages &manager, Granite::ImageAssetID id, Granite::File &file); }; } From 6aeda2da82c5841cedf5e8011dd399c0caec4584 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Thu, 3 Aug 2023 13:31:59 +0200 Subject: [PATCH 51/71] More fixups. --- tests/asset_manager_test.cpp | 6 +++--- tests/bandlimited_pixel_test.cpp | 2 +- tests/ui_sandbox.cpp | 8 ++++---- tools/aa_bench.cpp | 4 ++-- tools/convert_cube_to_environment.cpp | 2 +- tools/convert_equirect_to_environment.cpp | 2 +- tools/texture_viewer.cpp | 4 ++-- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/asset_manager_test.cpp b/tests/asset_manager_test.cpp index 7f1bb3db..086c9233 100644 --- a/tests/asset_manager_test.cpp +++ b/tests/asset_manager_test.cpp @@ -4,14 +4,14 @@ using namespace Granite; -struct ActivationInterface final : AssetInstantiatorInterface +struct ActivationInterface final : AssetInstantiatorImagesInterface { uint64_t estimate_cost_image_resource(ImageAssetID, File &mapping) override { return mapping.get_size(); } - void instantiate_image_resource(AssetManager &manager, TaskGroup *, ImageAssetID id, File &mapping) override + void instantiate_image_resource(AssetManagerImages &manager, TaskGroup *, ImageAssetID id, File &mapping) override { LOGI("Instantiating ID: %u\n", id.id); manager.update_cost(id, mapping.get_size()); @@ -38,7 +38,7 @@ struct ActivationInterface final : AssetInstantiatorInterface int main() { Filesystem fs; - AssetManager manager; + AssetManagerImages manager; ActivationInterface iface; fs.register_protocol("tmp", std::make_unique()); diff --git a/tests/bandlimited_pixel_test.cpp b/tests/bandlimited_pixel_test.cpp index 4e9d34f1..bfd5e92a 100644 --- a/tests/bandlimited_pixel_test.cpp +++ b/tests/bandlimited_pixel_test.cpp @@ -117,7 +117,7 @@ struct BandlimitedPixelTestApplication : Application, EventHandler { "BANDLIMITED_PIXEL_USE_TRANSCENDENTAL", 1 }, }); - auto texture = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), "assets://textures/sprite.png", ImageClass::Color); + auto texture = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource(*GRANITE_FILESYSTEM(), "assets://textures/sprite.png", ImageClass::Color); auto *view = cmd->get_device().get_resource_manager().get_image_view_blocking(texture); cmd->set_texture(2, 0, *view, mode == 0 ? StockSampler::NearestWrap : StockSampler::TrilinearWrap); diff --git a/tests/ui_sandbox.cpp b/tests/ui_sandbox.cpp index c2d7b444..39f346b2 100644 --- a/tests/ui_sandbox.cpp +++ b/tests/ui_sandbox.cpp @@ -50,7 +50,7 @@ UIApplication::UIApplication() window->show_title_bar(false); window->set_floating(false); window->set_background_color(vec4(0.0f, 1.0f, 0.0f, 1.0f)); - window->set_background_image(GRANITE_ASSET_MANAGER()->register_image_resource( + window->set_background_image(GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource( *GRANITE_FILESYSTEM(), "builtin://textures/checkerboard.png", ImageClass::Color)); auto button = make_handle(); @@ -90,7 +90,7 @@ UIApplication::UIApplication() slider->show_value(false); slider->set_margin(5.0f); slider->show_tooltip(true); - slider->set_background_image(GRANITE_ASSET_MANAGER()->register_image_resource( + slider->set_background_image(GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource( *GRANITE_FILESYSTEM(), "builtin://textures/checkerboard.png", ImageClass::Color)); slider->set_background_color(vec4(1.0f)); } @@ -111,7 +111,7 @@ UIApplication::UIApplication() sli.show_value(false); sli.set_margin(5.0f); sli.show_tooltip(true); - sli.set_background_image(GRANITE_ASSET_MANAGER()->register_image_resource( + sli.set_background_image(GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource( *GRANITE_FILESYSTEM(), "builtin://textures/checkerboard.png", ImageClass::Color)); sli.set_background_color(vec4(1.0f)); } @@ -126,7 +126,7 @@ UIApplication::UIApplication() btn.set_text("Mjuu"); btn.set_toggled_font_color(vec4(0.0f, 1.0f, 0.0f, 1.0f)); btn.set_untoggled_font_color(vec4(1.0f, 0.0f, 0.0f, 1.0f)); - btn.set_background_image(GRANITE_ASSET_MANAGER()->register_image_resource( + btn.set_background_image(GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource( *GRANITE_FILESYSTEM(), "builtin://textures/checkerboard.png", ImageClass::Color)); btn.set_background_color(vec4(1.0f)); } diff --git a/tools/aa_bench.cpp b/tools/aa_bench.cpp index 21b73cc3..bb45c1fa 100644 --- a/tools/aa_bench.cpp +++ b/tools/aa_bench.cpp @@ -39,8 +39,8 @@ AABenchApplication::AABenchApplication(const std::string &input0, const std::str : input_path0(input0), input_path1(input1), scale(scale_) { type = string_to_post_antialiasing_type(method); - images[0] = input_path0.empty() ? ImageAssetID{} : GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), input_path0, ImageClass::Color); - images[1] = input_path1.empty() ? ImageAssetID{} : GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), input_path1, ImageClass::Color); + images[0] = input_path0.empty() ? ImageAssetID{} : GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource(*GRANITE_FILESYSTEM(), input_path0, ImageClass::Color); + images[1] = input_path1.empty() ? ImageAssetID{} : GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource(*GRANITE_FILESYSTEM(), input_path1, ImageClass::Color); EVENT_MANAGER_REGISTER_LATCH(AABenchApplication, on_swapchain_changed, on_swapchain_destroyed, SwapchainParameterEvent); EVENT_MANAGER_REGISTER_LATCH(AABenchApplication, on_device_created, on_device_destroyed, DeviceCreatedEvent); } diff --git a/tools/convert_cube_to_environment.cpp b/tools/convert_cube_to_environment.cpp index d1659cc8..cba2705c 100644 --- a/tools/convert_cube_to_environment.cpp +++ b/tools/convert_cube_to_environment.cpp @@ -81,7 +81,7 @@ int main(int argc, char *argv[]) device.set_context(context); device.init_external_swapchain({ ImageHandle(nullptr) }); - auto cube = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), args.cube, ImageClass::Color); + auto cube = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource(*GRANITE_FILESYSTEM(), args.cube, ImageClass::Color); auto *view = device.get_resource_manager().get_image_view_blocking(cube); auto specular = convert_cube_to_ibl_specular(device, *view); auto diffuse = convert_cube_to_ibl_diffuse(device, *view); diff --git a/tools/convert_equirect_to_environment.cpp b/tools/convert_equirect_to_environment.cpp index 7b6252d3..c2031c22 100644 --- a/tools/convert_equirect_to_environment.cpp +++ b/tools/convert_equirect_to_environment.cpp @@ -86,7 +86,7 @@ int main(int argc, char *argv[]) device.init_external_swapchain({ ImageHandle(nullptr) }); auto &textures = device.get_resource_manager(); - auto equirect = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), args.equirect, ImageClass::Color); + auto equirect = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource(*GRANITE_FILESYSTEM(), args.equirect, ImageClass::Color); auto *view = textures.get_image_view_blocking(equirect); auto cube = convert_equirect_to_cube(device, *view, args.cube_scale); diff --git a/tools/texture_viewer.cpp b/tools/texture_viewer.cpp index 6cd40d83..4e1f0484 100644 --- a/tools/texture_viewer.cpp +++ b/tools/texture_viewer.cpp @@ -37,8 +37,8 @@ struct TextureViewerApplication : Granite::Application, Granite::EventHandler TextureViewerApplication(std::string path_) : path(std::move(path_)) { - texture = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), - path, ImageClass::Color); + texture = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource(*GRANITE_FILESYSTEM(), + path, ImageClass::Color); EVENT_MANAGER_REGISTER(TextureViewerApplication, on_key_pressed, KeyboardEvent); } From 31aa48c35bc7c29610cd1a7bb4044b75791d3a91 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Thu, 3 Aug 2023 15:25:57 +0200 Subject: [PATCH 52/71] More asset manager noodling. --- .../global/global_managers_interface.hpp | 6 ++ filesystem/asset_manager.cpp | 83 +++++++++++++++++-- filesystem/asset_manager.hpp | 67 ++++++++++++++- tests/asset_manager_test.cpp | 4 +- vulkan/managers/resource_manager.cpp | 4 +- vulkan/managers/resource_manager.hpp | 4 +- 6 files changed, 154 insertions(+), 14 deletions(-) diff --git a/application/global/global_managers_interface.hpp b/application/global/global_managers_interface.hpp index a493d3b4..c2b0fefc 100644 --- a/application/global/global_managers_interface.hpp +++ b/application/global/global_managers_interface.hpp @@ -50,6 +50,12 @@ class AssetManagerImagesInterface virtual ~AssetManagerImagesInterface() = default; }; +class AssetManagerMeshesInterface +{ +public: + virtual ~AssetManagerMeshesInterface() = default; +}; + class ThreadGroupInterface { public: diff --git a/filesystem/asset_manager.cpp b/filesystem/asset_manager.cpp index b9e6ec8c..063122b0 100644 --- a/filesystem/asset_manager.cpp +++ b/filesystem/asset_manager.cpp @@ -53,7 +53,7 @@ ImageAssetID AssetManagerImages::register_image_resource_nolock(FileHandle file, sorted_assets.reserve(asset_bank.size()); if (iface) { - iface->set_id_bounds(id_count); + iface->set_image_id_bounds(id_count); iface->set_image_class(info->id, image_class); } return ret; @@ -115,7 +115,7 @@ void AssetManagerImages::set_asset_instantiator_interface(AssetInstantiatorImage iface = iface_; if (iface) { - iface->set_id_bounds(id_count); + iface->set_image_id_bounds(id_count); for (uint32_t i = 0; i < id_count; i++) iface->set_image_class(ImageAssetID{i}, asset_bank[i]->image_class); } @@ -232,7 +232,7 @@ void AssetManagerImages::iterate(ThreadGroup *group) uint64_t current_count = signal->get_count(); if (current_count + 3 < timestamp) { - iface->latch_handles(); + iface->latch_image_handles(); LOGI("Asset manager skipping iteration due to too much pending work.\n"); return; } @@ -241,7 +241,7 @@ void AssetManagerImages::iterate(ThreadGroup *group) if (group) { task = group->create_task(); - task->set_desc("asset-manager-instantiate"); + task->set_desc("asset-manager-instantiate-image"); task->set_fence_counter_signal(signal.get()); task->set_task_class(TaskClass::Background); } @@ -365,7 +365,80 @@ void AssetManagerImages::iterate(ThreadGroup *group) static_cast(activated_cost_this_iteration / 1024)); } - iface->latch_handles(); + iface->latch_image_handles(); timestamp++; } + +AssetManagerMeshes::AssetManagerMeshes() +{ + signal = std::make_unique(); + for (uint64_t i = 0; i < timestamp; i++) + signal->signal_increment(); +} + +AssetManagerMeshes::~AssetManagerMeshes() +{ + signal->wait_until_at_least(timestamp); + for (auto *a : asset_bank) + pool.free(a); +} + +MeshAssetID AssetManagerMeshes::register_mesh_resource_nolock(FileHandle file) +{ + auto *info = pool.allocate(); + info->handle = std::move(file); + info->id.id = id_count++; + MeshAssetID ret = info->id; + asset_bank.push_back(info); + if (iface) + iface->set_mesh_id_bounds(id_count); + return ret; +} + +MeshAssetID AssetManagerMeshes::register_mesh_resource(FileHandle file) +{ + std::lock_guard holder{asset_bank_lock}; + return register_mesh_resource_nolock(std::move(file)); +} + +MeshAssetID AssetManagerMeshes::register_mesh_resource(Filesystem &fs, const std::string &path) +{ + std::lock_guard holder{asset_bank_lock}; + + Util::Hasher h; + h.string(path); + if (auto *asset = file_to_assets.find(h.get())) + return asset->id; + + auto file = fs.open(path); + if (!file) + return {}; + + auto id = register_mesh_resource_nolock(std::move(file)); + asset_bank[id.id]->set_hash(h.get()); + file_to_assets.insert_replace(asset_bank[id.id]); + return id; +} + +void AssetManagerMeshes::set_asset_instantiator_interface(AssetInstantiatorMeshesInterface *iface_) +{ + if (iface) + { + signal->wait_until_at_least(timestamp); + for (uint32_t id = 0; id < id_count; id++) + iface->release_mesh_resource(MeshAssetID{id}); + } + + for (auto *a : asset_bank) + { + a->consumed = 0; + a->pending_consumed = 0; + a->last_used = 0; + } + total_consumed = 0; + + iface = iface_; + if (iface) + iface->set_mesh_id_bounds(id_count); +} } diff --git a/filesystem/asset_manager.hpp b/filesystem/asset_manager.hpp index 9f96734a..be4e195e 100644 --- a/filesystem/asset_manager.hpp +++ b/filesystem/asset_manager.hpp @@ -52,7 +52,18 @@ struct MeshAssetID inline bool operator!=(const MeshAssetID &other) const { return !(*this == other); } }; +struct MaterialAssetID +{ + uint32_t id = uint32_t(-1); + MaterialAssetID() = default; + explicit MaterialAssetID(uint32_t id_) : id{ id_ } {} + explicit inline operator bool() const { return id != uint32_t(-1); } + inline bool operator==(const MaterialAssetID &other) const { return id == other.id; } + inline bool operator!=(const MaterialAssetID &other) const { return !(*this == other); } +}; + class AssetManagerImages; +class AssetManagerMeshes; // If we have to fall back due to no image being present, // lets asset instantiator know what to substitute. @@ -89,11 +100,21 @@ class AssetInstantiatorImagesInterface // Will only be called after an upload completes through manager.update_cost(). virtual void release_image_resource(ImageAssetID id) = 0; - virtual void set_id_bounds(uint32_t bound) = 0; + virtual void set_image_id_bounds(uint32_t bound) = 0; virtual void set_image_class(ImageAssetID id, ImageClass image_class); - // Called in AssetManager::iterate(). - virtual void latch_handles() = 0; + // Called in AssetManagerImages::iterate(). + virtual void latch_image_handles() = 0; +}; + +class AssetInstantiatorMeshesInterface +{ +public: + virtual ~AssetInstantiatorMeshesInterface() = default; + virtual void instantiate_mesh_resource(AssetManagerMeshes &manager, TaskGroup *group, MeshAssetID id, File &mapping) = 0; + virtual void set_mesh_id_bounds(uint32_t bound) = 0; + virtual void release_mesh_resource(MeshAssetID id) = 0; + virtual void latch_mesh_handles() = 0; }; class AssetManagerImages final : public AssetManagerImagesInterface @@ -176,4 +197,44 @@ class AssetManagerImages final : public AssetManagerImagesInterface void update_costs_locked_assets(); void update_lru_locked_assets(); }; + +class AssetManagerMeshes final : public AssetManagerMeshesInterface +{ +public: + AssetManagerMeshes(); + ~AssetManagerMeshes() override; + + // FileHandle is intended to be used with FileSlice or similar here so that we don't need + // a ton of open files at once. + MeshAssetID register_mesh_resource(FileHandle file); + MeshAssetID register_mesh_resource(Filesystem &fs, const std::string &path); + + void iterate(ThreadGroup *group); + + void set_asset_instantiator_interface(AssetInstantiatorMeshesInterface *iface); + +private: + struct AssetInfo : Util::IntrusiveHashMapEnabled + { + uint64_t pending_consumed = 0; + uint64_t consumed = 0; + uint64_t last_used = 0; + FileHandle handle; + MeshAssetID id = {}; + int prio = 0; + }; + uint32_t id_count = 0; + std::vector sorted_assets; + std::mutex asset_bank_lock; + std::vector asset_bank; + Util::ObjectPool pool; + Util::IntrusiveHashMapHolder file_to_assets; + AssetInstantiatorMeshesInterface *iface = nullptr; + uint64_t timestamp = 1; + uint64_t total_consumed = 0; + uint32_t blocking_signals = 0; + std::unique_ptr signal; + + MeshAssetID register_mesh_resource_nolock(FileHandle file); +}; } diff --git a/tests/asset_manager_test.cpp b/tests/asset_manager_test.cpp index 086c9233..a079c696 100644 --- a/tests/asset_manager_test.cpp +++ b/tests/asset_manager_test.cpp @@ -22,13 +22,13 @@ struct ActivationInterface final : AssetInstantiatorImagesInterface LOGI("Releasing ID: %u\n", id.id); } - void set_id_bounds(uint32_t bound_) override + void set_image_id_bounds(uint32_t bound_) override { bound = bound_; LOGI("ID bound: %u\n", bound); } - void latch_handles() override + void latch_image_handles() override { } diff --git a/vulkan/managers/resource_manager.cpp b/vulkan/managers/resource_manager.cpp index 841dc486..23d36065 100644 --- a/vulkan/managers/resource_manager.cpp +++ b/vulkan/managers/resource_manager.cpp @@ -43,7 +43,7 @@ ResourceManager::~ResourceManager() manager->set_asset_instantiator_interface(nullptr); } -void ResourceManager::set_id_bounds(uint32_t bound) +void ResourceManager::set_image_id_bounds(uint32_t bound) { textures.resize(bound); views.resize(bound); @@ -304,7 +304,7 @@ const ImageHandle &ResourceManager::get_fallback_image(Granite::ImageClass image } } -void ResourceManager::latch_handles() +void ResourceManager::latch_image_handles() { std::lock_guard holder{lock}; for (auto &update : updates) diff --git a/vulkan/managers/resource_manager.hpp b/vulkan/managers/resource_manager.hpp index 857d2cbd..8ec17efe 100644 --- a/vulkan/managers/resource_manager.hpp +++ b/vulkan/managers/resource_manager.hpp @@ -52,12 +52,12 @@ class ResourceManager final : private Granite::AssetInstantiatorImagesInterface Device *device; Granite::AssetManagerImages *manager = nullptr; - void latch_handles() override; + void latch_image_handles() override; uint64_t estimate_cost_image_resource(Granite::ImageAssetID id, Granite::File &file) override; void instantiate_image_resource(Granite::AssetManagerImages &manager, Granite::TaskGroup *task, Granite::ImageAssetID id, Granite::File &file) override; void release_image_resource(Granite::ImageAssetID id) override; - void set_id_bounds(uint32_t bound) override; + void set_image_id_bounds(uint32_t bound) override; void set_image_class(Granite::ImageAssetID id, Granite::ImageClass image_class) override; struct Texture From 9825620317283b45d21144d251d50d6566b8c9dd Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 4 Aug 2023 09:51:18 +0200 Subject: [PATCH 53/71] Revert "More asset manager noodling." This reverts commit 31aa48c35bc7c29610cd1a7bb4044b75791d3a91. --- .../global/global_managers_interface.hpp | 6 -- filesystem/asset_manager.cpp | 83 ++----------------- filesystem/asset_manager.hpp | 67 +-------------- tests/asset_manager_test.cpp | 4 +- vulkan/managers/resource_manager.cpp | 4 +- vulkan/managers/resource_manager.hpp | 4 +- 6 files changed, 14 insertions(+), 154 deletions(-) diff --git a/application/global/global_managers_interface.hpp b/application/global/global_managers_interface.hpp index c2b0fefc..a493d3b4 100644 --- a/application/global/global_managers_interface.hpp +++ b/application/global/global_managers_interface.hpp @@ -50,12 +50,6 @@ class AssetManagerImagesInterface virtual ~AssetManagerImagesInterface() = default; }; -class AssetManagerMeshesInterface -{ -public: - virtual ~AssetManagerMeshesInterface() = default; -}; - class ThreadGroupInterface { public: diff --git a/filesystem/asset_manager.cpp b/filesystem/asset_manager.cpp index 063122b0..b9e6ec8c 100644 --- a/filesystem/asset_manager.cpp +++ b/filesystem/asset_manager.cpp @@ -53,7 +53,7 @@ ImageAssetID AssetManagerImages::register_image_resource_nolock(FileHandle file, sorted_assets.reserve(asset_bank.size()); if (iface) { - iface->set_image_id_bounds(id_count); + iface->set_id_bounds(id_count); iface->set_image_class(info->id, image_class); } return ret; @@ -115,7 +115,7 @@ void AssetManagerImages::set_asset_instantiator_interface(AssetInstantiatorImage iface = iface_; if (iface) { - iface->set_image_id_bounds(id_count); + iface->set_id_bounds(id_count); for (uint32_t i = 0; i < id_count; i++) iface->set_image_class(ImageAssetID{i}, asset_bank[i]->image_class); } @@ -232,7 +232,7 @@ void AssetManagerImages::iterate(ThreadGroup *group) uint64_t current_count = signal->get_count(); if (current_count + 3 < timestamp) { - iface->latch_image_handles(); + iface->latch_handles(); LOGI("Asset manager skipping iteration due to too much pending work.\n"); return; } @@ -241,7 +241,7 @@ void AssetManagerImages::iterate(ThreadGroup *group) if (group) { task = group->create_task(); - task->set_desc("asset-manager-instantiate-image"); + task->set_desc("asset-manager-instantiate"); task->set_fence_counter_signal(signal.get()); task->set_task_class(TaskClass::Background); } @@ -365,80 +365,7 @@ void AssetManagerImages::iterate(ThreadGroup *group) static_cast(activated_cost_this_iteration / 1024)); } - iface->latch_image_handles(); + iface->latch_handles(); timestamp++; } - -AssetManagerMeshes::AssetManagerMeshes() -{ - signal = std::make_unique(); - for (uint64_t i = 0; i < timestamp; i++) - signal->signal_increment(); -} - -AssetManagerMeshes::~AssetManagerMeshes() -{ - signal->wait_until_at_least(timestamp); - for (auto *a : asset_bank) - pool.free(a); -} - -MeshAssetID AssetManagerMeshes::register_mesh_resource_nolock(FileHandle file) -{ - auto *info = pool.allocate(); - info->handle = std::move(file); - info->id.id = id_count++; - MeshAssetID ret = info->id; - asset_bank.push_back(info); - if (iface) - iface->set_mesh_id_bounds(id_count); - return ret; -} - -MeshAssetID AssetManagerMeshes::register_mesh_resource(FileHandle file) -{ - std::lock_guard holder{asset_bank_lock}; - return register_mesh_resource_nolock(std::move(file)); -} - -MeshAssetID AssetManagerMeshes::register_mesh_resource(Filesystem &fs, const std::string &path) -{ - std::lock_guard holder{asset_bank_lock}; - - Util::Hasher h; - h.string(path); - if (auto *asset = file_to_assets.find(h.get())) - return asset->id; - - auto file = fs.open(path); - if (!file) - return {}; - - auto id = register_mesh_resource_nolock(std::move(file)); - asset_bank[id.id]->set_hash(h.get()); - file_to_assets.insert_replace(asset_bank[id.id]); - return id; -} - -void AssetManagerMeshes::set_asset_instantiator_interface(AssetInstantiatorMeshesInterface *iface_) -{ - if (iface) - { - signal->wait_until_at_least(timestamp); - for (uint32_t id = 0; id < id_count; id++) - iface->release_mesh_resource(MeshAssetID{id}); - } - - for (auto *a : asset_bank) - { - a->consumed = 0; - a->pending_consumed = 0; - a->last_used = 0; - } - total_consumed = 0; - - iface = iface_; - if (iface) - iface->set_mesh_id_bounds(id_count); -} } diff --git a/filesystem/asset_manager.hpp b/filesystem/asset_manager.hpp index be4e195e..9f96734a 100644 --- a/filesystem/asset_manager.hpp +++ b/filesystem/asset_manager.hpp @@ -52,18 +52,7 @@ struct MeshAssetID inline bool operator!=(const MeshAssetID &other) const { return !(*this == other); } }; -struct MaterialAssetID -{ - uint32_t id = uint32_t(-1); - MaterialAssetID() = default; - explicit MaterialAssetID(uint32_t id_) : id{ id_ } {} - explicit inline operator bool() const { return id != uint32_t(-1); } - inline bool operator==(const MaterialAssetID &other) const { return id == other.id; } - inline bool operator!=(const MaterialAssetID &other) const { return !(*this == other); } -}; - class AssetManagerImages; -class AssetManagerMeshes; // If we have to fall back due to no image being present, // lets asset instantiator know what to substitute. @@ -100,21 +89,11 @@ class AssetInstantiatorImagesInterface // Will only be called after an upload completes through manager.update_cost(). virtual void release_image_resource(ImageAssetID id) = 0; - virtual void set_image_id_bounds(uint32_t bound) = 0; + virtual void set_id_bounds(uint32_t bound) = 0; virtual void set_image_class(ImageAssetID id, ImageClass image_class); - // Called in AssetManagerImages::iterate(). - virtual void latch_image_handles() = 0; -}; - -class AssetInstantiatorMeshesInterface -{ -public: - virtual ~AssetInstantiatorMeshesInterface() = default; - virtual void instantiate_mesh_resource(AssetManagerMeshes &manager, TaskGroup *group, MeshAssetID id, File &mapping) = 0; - virtual void set_mesh_id_bounds(uint32_t bound) = 0; - virtual void release_mesh_resource(MeshAssetID id) = 0; - virtual void latch_mesh_handles() = 0; + // Called in AssetManager::iterate(). + virtual void latch_handles() = 0; }; class AssetManagerImages final : public AssetManagerImagesInterface @@ -197,44 +176,4 @@ class AssetManagerImages final : public AssetManagerImagesInterface void update_costs_locked_assets(); void update_lru_locked_assets(); }; - -class AssetManagerMeshes final : public AssetManagerMeshesInterface -{ -public: - AssetManagerMeshes(); - ~AssetManagerMeshes() override; - - // FileHandle is intended to be used with FileSlice or similar here so that we don't need - // a ton of open files at once. - MeshAssetID register_mesh_resource(FileHandle file); - MeshAssetID register_mesh_resource(Filesystem &fs, const std::string &path); - - void iterate(ThreadGroup *group); - - void set_asset_instantiator_interface(AssetInstantiatorMeshesInterface *iface); - -private: - struct AssetInfo : Util::IntrusiveHashMapEnabled - { - uint64_t pending_consumed = 0; - uint64_t consumed = 0; - uint64_t last_used = 0; - FileHandle handle; - MeshAssetID id = {}; - int prio = 0; - }; - uint32_t id_count = 0; - std::vector sorted_assets; - std::mutex asset_bank_lock; - std::vector asset_bank; - Util::ObjectPool pool; - Util::IntrusiveHashMapHolder file_to_assets; - AssetInstantiatorMeshesInterface *iface = nullptr; - uint64_t timestamp = 1; - uint64_t total_consumed = 0; - uint32_t blocking_signals = 0; - std::unique_ptr signal; - - MeshAssetID register_mesh_resource_nolock(FileHandle file); -}; } diff --git a/tests/asset_manager_test.cpp b/tests/asset_manager_test.cpp index a079c696..086c9233 100644 --- a/tests/asset_manager_test.cpp +++ b/tests/asset_manager_test.cpp @@ -22,13 +22,13 @@ struct ActivationInterface final : AssetInstantiatorImagesInterface LOGI("Releasing ID: %u\n", id.id); } - void set_image_id_bounds(uint32_t bound_) override + void set_id_bounds(uint32_t bound_) override { bound = bound_; LOGI("ID bound: %u\n", bound); } - void latch_image_handles() override + void latch_handles() override { } diff --git a/vulkan/managers/resource_manager.cpp b/vulkan/managers/resource_manager.cpp index 23d36065..841dc486 100644 --- a/vulkan/managers/resource_manager.cpp +++ b/vulkan/managers/resource_manager.cpp @@ -43,7 +43,7 @@ ResourceManager::~ResourceManager() manager->set_asset_instantiator_interface(nullptr); } -void ResourceManager::set_image_id_bounds(uint32_t bound) +void ResourceManager::set_id_bounds(uint32_t bound) { textures.resize(bound); views.resize(bound); @@ -304,7 +304,7 @@ const ImageHandle &ResourceManager::get_fallback_image(Granite::ImageClass image } } -void ResourceManager::latch_image_handles() +void ResourceManager::latch_handles() { std::lock_guard holder{lock}; for (auto &update : updates) diff --git a/vulkan/managers/resource_manager.hpp b/vulkan/managers/resource_manager.hpp index 8ec17efe..857d2cbd 100644 --- a/vulkan/managers/resource_manager.hpp +++ b/vulkan/managers/resource_manager.hpp @@ -52,12 +52,12 @@ class ResourceManager final : private Granite::AssetInstantiatorImagesInterface Device *device; Granite::AssetManagerImages *manager = nullptr; - void latch_image_handles() override; + void latch_handles() override; uint64_t estimate_cost_image_resource(Granite::ImageAssetID id, Granite::File &file) override; void instantiate_image_resource(Granite::AssetManagerImages &manager, Granite::TaskGroup *task, Granite::ImageAssetID id, Granite::File &file) override; void release_image_resource(Granite::ImageAssetID id) override; - void set_image_id_bounds(uint32_t bound) override; + void set_id_bounds(uint32_t bound) override; void set_image_class(Granite::ImageAssetID id, Granite::ImageClass image_class) override; struct Texture From c5c476c9ef72355c91c5396cff8e5ecf0dbb7d83 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 4 Aug 2023 09:51:21 +0200 Subject: [PATCH 54/71] Revert "More fixups." This reverts commit 6aeda2da82c5841cedf5e8011dd399c0caec4584. --- tests/asset_manager_test.cpp | 6 +++--- tests/bandlimited_pixel_test.cpp | 2 +- tests/ui_sandbox.cpp | 8 ++++---- tools/aa_bench.cpp | 4 ++-- tools/convert_cube_to_environment.cpp | 2 +- tools/convert_equirect_to_environment.cpp | 2 +- tools/texture_viewer.cpp | 4 ++-- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/asset_manager_test.cpp b/tests/asset_manager_test.cpp index 086c9233..7f1bb3db 100644 --- a/tests/asset_manager_test.cpp +++ b/tests/asset_manager_test.cpp @@ -4,14 +4,14 @@ using namespace Granite; -struct ActivationInterface final : AssetInstantiatorImagesInterface +struct ActivationInterface final : AssetInstantiatorInterface { uint64_t estimate_cost_image_resource(ImageAssetID, File &mapping) override { return mapping.get_size(); } - void instantiate_image_resource(AssetManagerImages &manager, TaskGroup *, ImageAssetID id, File &mapping) override + void instantiate_image_resource(AssetManager &manager, TaskGroup *, ImageAssetID id, File &mapping) override { LOGI("Instantiating ID: %u\n", id.id); manager.update_cost(id, mapping.get_size()); @@ -38,7 +38,7 @@ struct ActivationInterface final : AssetInstantiatorImagesInterface int main() { Filesystem fs; - AssetManagerImages manager; + AssetManager manager; ActivationInterface iface; fs.register_protocol("tmp", std::make_unique()); diff --git a/tests/bandlimited_pixel_test.cpp b/tests/bandlimited_pixel_test.cpp index bfd5e92a..4e9d34f1 100644 --- a/tests/bandlimited_pixel_test.cpp +++ b/tests/bandlimited_pixel_test.cpp @@ -117,7 +117,7 @@ struct BandlimitedPixelTestApplication : Application, EventHandler { "BANDLIMITED_PIXEL_USE_TRANSCENDENTAL", 1 }, }); - auto texture = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource(*GRANITE_FILESYSTEM(), "assets://textures/sprite.png", ImageClass::Color); + auto texture = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), "assets://textures/sprite.png", ImageClass::Color); auto *view = cmd->get_device().get_resource_manager().get_image_view_blocking(texture); cmd->set_texture(2, 0, *view, mode == 0 ? StockSampler::NearestWrap : StockSampler::TrilinearWrap); diff --git a/tests/ui_sandbox.cpp b/tests/ui_sandbox.cpp index 39f346b2..c2d7b444 100644 --- a/tests/ui_sandbox.cpp +++ b/tests/ui_sandbox.cpp @@ -50,7 +50,7 @@ UIApplication::UIApplication() window->show_title_bar(false); window->set_floating(false); window->set_background_color(vec4(0.0f, 1.0f, 0.0f, 1.0f)); - window->set_background_image(GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource( + window->set_background_image(GRANITE_ASSET_MANAGER()->register_image_resource( *GRANITE_FILESYSTEM(), "builtin://textures/checkerboard.png", ImageClass::Color)); auto button = make_handle(); @@ -90,7 +90,7 @@ UIApplication::UIApplication() slider->show_value(false); slider->set_margin(5.0f); slider->show_tooltip(true); - slider->set_background_image(GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource( + slider->set_background_image(GRANITE_ASSET_MANAGER()->register_image_resource( *GRANITE_FILESYSTEM(), "builtin://textures/checkerboard.png", ImageClass::Color)); slider->set_background_color(vec4(1.0f)); } @@ -111,7 +111,7 @@ UIApplication::UIApplication() sli.show_value(false); sli.set_margin(5.0f); sli.show_tooltip(true); - sli.set_background_image(GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource( + sli.set_background_image(GRANITE_ASSET_MANAGER()->register_image_resource( *GRANITE_FILESYSTEM(), "builtin://textures/checkerboard.png", ImageClass::Color)); sli.set_background_color(vec4(1.0f)); } @@ -126,7 +126,7 @@ UIApplication::UIApplication() btn.set_text("Mjuu"); btn.set_toggled_font_color(vec4(0.0f, 1.0f, 0.0f, 1.0f)); btn.set_untoggled_font_color(vec4(1.0f, 0.0f, 0.0f, 1.0f)); - btn.set_background_image(GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource( + btn.set_background_image(GRANITE_ASSET_MANAGER()->register_image_resource( *GRANITE_FILESYSTEM(), "builtin://textures/checkerboard.png", ImageClass::Color)); btn.set_background_color(vec4(1.0f)); } diff --git a/tools/aa_bench.cpp b/tools/aa_bench.cpp index bb45c1fa..21b73cc3 100644 --- a/tools/aa_bench.cpp +++ b/tools/aa_bench.cpp @@ -39,8 +39,8 @@ AABenchApplication::AABenchApplication(const std::string &input0, const std::str : input_path0(input0), input_path1(input1), scale(scale_) { type = string_to_post_antialiasing_type(method); - images[0] = input_path0.empty() ? ImageAssetID{} : GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource(*GRANITE_FILESYSTEM(), input_path0, ImageClass::Color); - images[1] = input_path1.empty() ? ImageAssetID{} : GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource(*GRANITE_FILESYSTEM(), input_path1, ImageClass::Color); + images[0] = input_path0.empty() ? ImageAssetID{} : GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), input_path0, ImageClass::Color); + images[1] = input_path1.empty() ? ImageAssetID{} : GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), input_path1, ImageClass::Color); EVENT_MANAGER_REGISTER_LATCH(AABenchApplication, on_swapchain_changed, on_swapchain_destroyed, SwapchainParameterEvent); EVENT_MANAGER_REGISTER_LATCH(AABenchApplication, on_device_created, on_device_destroyed, DeviceCreatedEvent); } diff --git a/tools/convert_cube_to_environment.cpp b/tools/convert_cube_to_environment.cpp index cba2705c..d1659cc8 100644 --- a/tools/convert_cube_to_environment.cpp +++ b/tools/convert_cube_to_environment.cpp @@ -81,7 +81,7 @@ int main(int argc, char *argv[]) device.set_context(context); device.init_external_swapchain({ ImageHandle(nullptr) }); - auto cube = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource(*GRANITE_FILESYSTEM(), args.cube, ImageClass::Color); + auto cube = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), args.cube, ImageClass::Color); auto *view = device.get_resource_manager().get_image_view_blocking(cube); auto specular = convert_cube_to_ibl_specular(device, *view); auto diffuse = convert_cube_to_ibl_diffuse(device, *view); diff --git a/tools/convert_equirect_to_environment.cpp b/tools/convert_equirect_to_environment.cpp index c2031c22..7b6252d3 100644 --- a/tools/convert_equirect_to_environment.cpp +++ b/tools/convert_equirect_to_environment.cpp @@ -86,7 +86,7 @@ int main(int argc, char *argv[]) device.init_external_swapchain({ ImageHandle(nullptr) }); auto &textures = device.get_resource_manager(); - auto equirect = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource(*GRANITE_FILESYSTEM(), args.equirect, ImageClass::Color); + auto equirect = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), args.equirect, ImageClass::Color); auto *view = textures.get_image_view_blocking(equirect); auto cube = convert_equirect_to_cube(device, *view, args.cube_scale); diff --git a/tools/texture_viewer.cpp b/tools/texture_viewer.cpp index 4e1f0484..6cd40d83 100644 --- a/tools/texture_viewer.cpp +++ b/tools/texture_viewer.cpp @@ -37,8 +37,8 @@ struct TextureViewerApplication : Granite::Application, Granite::EventHandler TextureViewerApplication(std::string path_) : path(std::move(path_)) { - texture = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource(*GRANITE_FILESYSTEM(), - path, ImageClass::Color); + texture = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), + path, ImageClass::Color); EVENT_MANAGER_REGISTER(TextureViewerApplication, on_key_pressed, KeyboardEvent); } From 4139f2e32ccf1a9ebbca1c810d4cb7d3996f0cd7 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 4 Aug 2023 09:51:24 +0200 Subject: [PATCH 55/71] Revert "More build shenanigans." This reverts commit b6476463b0152ddbd5a3c70f2e3594f460ae8dcb. --- application/application.cpp | 8 ++-- application/global/global_managers.cpp | 18 ++++----- application/global/global_managers.hpp | 6 +-- application/global/global_managers_init.cpp | 4 +- .../global/global_managers_interface.hpp | 4 +- .../platforms/application_headless.cpp | 2 +- application/scene_viewer_application.cpp | 2 +- filesystem/asset_manager.cpp | 37 +++++++++---------- filesystem/asset_manager.hpp | 18 ++++----- renderer/common_renderer_data.cpp | 4 +- renderer/common_renderer_data.hpp | 2 +- renderer/ground.cpp | 17 +++------ renderer/lights/decal_volume.cpp | 5 ++- renderer/material.hpp | 2 +- renderer/mesh_util.cpp | 6 +-- renderer/post/smaa.cpp | 8 ++-- ui/image_widget.cpp | 2 +- vulkan/context.hpp | 4 +- vulkan/managers/resource_manager.cpp | 6 +-- vulkan/managers/resource_manager.hpp | 8 ++-- 20 files changed, 79 insertions(+), 84 deletions(-) diff --git a/application/application.cpp b/application/application.cpp index 6c3ce65b..265a8213 100644 --- a/application/application.cpp +++ b/application/application.cpp @@ -35,7 +35,7 @@ namespace Granite { Application::Application() { - GRANITE_COMMON_RENDERER_DATA()->initialize_static_assets(GRANITE_ASSET_MANAGER_IMAGES(), GRANITE_FILESYSTEM()); + GRANITE_COMMON_RENDERER_DATA()->initialize_static_assets(GRANITE_ASSET_MANAGER(), GRANITE_FILESYSTEM()); } Application::~Application() @@ -75,7 +75,7 @@ bool Application::init_wsi(Vulkan::ContextHandle context) Context::SystemHandles system_handles; system_handles.filesystem = GRANITE_FILESYSTEM(); system_handles.thread_group = GRANITE_THREAD_GROUP(); - system_handles.asset_manager_images = GRANITE_ASSET_MANAGER_IMAGES(); + system_handles.asset_manager = GRANITE_ASSET_MANAGER(); system_handles.timeline_trace_file = system_handles.thread_group->get_timeline_trace_file(); if (!application_wsi.init_context_from_platform( @@ -152,7 +152,7 @@ void Application::check_initialization_progress() device.query_initialization_progress(Device::InitializationStage::ShaderModules) >= 100) { // Now is a good time to kick shader manager since it might require compute shaders for decode. - GRANITE_ASSET_MANAGER_IMAGES()->iterate(GRANITE_THREAD_GROUP()); + GRANITE_ASSET_MANAGER()->iterate(GRANITE_THREAD_GROUP()); GRANITE_SCOPED_TIMELINE_EVENT("dispatch-ready-modules"); GRANITE_EVENT_MANAGER()->enqueue_latched(&device, &device.get_shader_manager()); @@ -278,6 +278,6 @@ void Application::post_frame() { // Texture manager might require shaders to be ready before we can submit work. if (ready_modules) - GRANITE_ASSET_MANAGER_IMAGES()->iterate(GRANITE_THREAD_GROUP()); + GRANITE_ASSET_MANAGER()->iterate(GRANITE_THREAD_GROUP()); } } diff --git a/application/global/global_managers.cpp b/application/global/global_managers.cpp index be7ba621..d7848914 100644 --- a/application/global/global_managers.cpp +++ b/application/global/global_managers.cpp @@ -36,7 +36,7 @@ struct GlobalManagers Factory *factory; FilesystemInterface *filesystem; - AssetManagerImagesInterface *asset_manager_images; + AssetManagerInterface *asset_manager; EventManagerInterface *event_manager; ThreadGroupInterface *thread_group; UI::UIManagerInterface *ui_manager; @@ -90,11 +90,11 @@ FilesystemInterface *filesystem() return global_managers.filesystem; } -AssetManagerImagesInterface *asset_manager_images() +AssetManagerInterface *asset_manager() { - if (!global_managers.asset_manager_images) + if (!global_managers.asset_manager) LOGE("Asset manager was not initialized.\n"); - return global_managers.asset_manager_images; + return global_managers.asset_manager; } EventManagerInterface *event_manager() @@ -162,8 +162,8 @@ void init(Factory &factory, ManagerFeatureFlags flags, unsigned max_threads) if (flags & MANAGER_FEATURE_ASSET_MANAGER_BIT) { - if (!global_managers.asset_manager_images) - global_managers.asset_manager_images = factory.create_asset_manager_images(); + if (!global_managers.asset_manager) + global_managers.asset_manager = factory.create_asset_manager(); } bool kick_threads = false; @@ -243,7 +243,7 @@ void deinit() delete global_managers.common_renderer_data; delete global_managers.ui_manager; delete global_managers.thread_group; - delete global_managers.asset_manager_images; + delete global_managers.asset_manager; delete global_managers.filesystem; delete global_managers.event_manager; delete global_managers.logging; @@ -253,7 +253,7 @@ void deinit() global_managers.physics = nullptr; global_managers.common_renderer_data = nullptr; global_managers.filesystem = nullptr; - global_managers.asset_manager_images = nullptr; + global_managers.asset_manager = nullptr; global_managers.event_manager = nullptr; global_managers.thread_group = nullptr; global_managers.ui_manager = nullptr; @@ -290,7 +290,7 @@ void stop_audio_system() } FilesystemInterface *Factory::create_filesystem() { return nullptr; } -AssetManagerImagesInterface *Factory::create_asset_manager_images() { return nullptr; } +AssetManagerInterface *Factory::create_asset_manager() { return nullptr; } EventManagerInterface *Factory::create_event_manager() { return nullptr; } ThreadGroupInterface *Factory::create_thread_group() { return nullptr; } CommonRendererDataInterface *Factory::create_common_renderer_data() { return nullptr; } diff --git a/application/global/global_managers.hpp b/application/global/global_managers.hpp index ddecd113..848004f4 100644 --- a/application/global/global_managers.hpp +++ b/application/global/global_managers.hpp @@ -61,7 +61,7 @@ class Factory virtual ~Factory() = default; virtual FilesystemInterface *create_filesystem(); - virtual AssetManagerImagesInterface *create_asset_manager_images(); + virtual AssetManagerInterface *create_asset_manager(); virtual EventManagerInterface *create_event_manager(); virtual ThreadGroupInterface *create_thread_group(); virtual CommonRendererDataInterface *create_common_renderer_data(); @@ -97,7 +97,7 @@ void install_audio_system(Audio::BackendInterface *backend, Audio::MixerInterfac Util::MessageQueueInterface *message_queue(); FilesystemInterface *filesystem(); -AssetManagerImagesInterface *asset_manager_images(); +AssetManagerInterface *asset_manager(); EventManagerInterface *event_manager(); ThreadGroupInterface *thread_group(); UI::UIManagerInterface *ui_manager(); @@ -110,7 +110,7 @@ PhysicsSystemInterface *physics(); #define GRANITE_MESSAGE_QUEUE() static_cast<::Util::MessageQueue *>(::Granite::Global::message_queue()) #define GRANITE_FILESYSTEM() static_cast<::Granite::Filesystem *>(::Granite::Global::filesystem()) -#define GRANITE_ASSET_MANAGER_IMAGES() static_cast<::Granite::AssetManagerImages *>(::Granite::Global::asset_manager_images()) +#define GRANITE_ASSET_MANAGER() static_cast<::Granite::AssetManager *>(::Granite::Global::asset_manager()) #define GRANITE_EVENT_MANAGER() static_cast<::Granite::EventManager *>(::Granite::Global::event_manager()) #define GRANITE_THREAD_GROUP() static_cast<::Granite::ThreadGroup *>(::Granite::Global::thread_group()) #define GRANITE_UI_MANAGER() static_cast<::Granite::UI::UIManager *>(::Granite::Global::ui_manager()) diff --git a/application/global/global_managers_init.cpp b/application/global/global_managers_init.cpp index 6dac1d1c..c4bf855c 100644 --- a/application/global/global_managers_init.cpp +++ b/application/global/global_managers_init.cpp @@ -47,9 +47,9 @@ struct FactoryImplementation : Factory return new Filesystem; } - AssetManagerImagesInterface *create_asset_manager_images() override + AssetManagerInterface *create_asset_manager() override { - return new AssetManagerImages; + return new AssetManager; } EventManagerInterface *create_event_manager() override diff --git a/application/global/global_managers_interface.hpp b/application/global/global_managers_interface.hpp index a493d3b4..b8910f97 100644 --- a/application/global/global_managers_interface.hpp +++ b/application/global/global_managers_interface.hpp @@ -44,10 +44,10 @@ class FilesystemInterface virtual bool load_text_file(const std::string &path, std::string &str) = 0; }; -class AssetManagerImagesInterface +class AssetManagerInterface { public: - virtual ~AssetManagerImagesInterface() = default; + virtual ~AssetManagerInterface() = default; }; class ThreadGroupInterface diff --git a/application/platforms/application_headless.cpp b/application/platforms/application_headless.cpp index 24023811..a8cff0e9 100644 --- a/application/platforms/application_headless.cpp +++ b/application/platforms/application_headless.cpp @@ -173,7 +173,7 @@ struct WSIPlatformHeadless : Granite::GraniteWSIPlatform system_handles.filesystem = GRANITE_FILESYSTEM(); system_handles.thread_group = GRANITE_THREAD_GROUP(); system_handles.timeline_trace_file = system_handles.thread_group->get_timeline_trace_file(); - system_handles.asset_manager_images = GRANITE_ASSET_MANAGER_IMAGES(); + system_handles.asset_manager = GRANITE_ASSET_MANAGER(); context->set_system_handles(system_handles); context->set_num_thread_indices(GRANITE_THREAD_GROUP()->get_num_threads() + 1); diff --git a/application/scene_viewer_application.cpp b/application/scene_viewer_application.cpp index cdd1681f..72765b4e 100644 --- a/application/scene_viewer_application.cpp +++ b/application/scene_viewer_application.cpp @@ -1419,7 +1419,7 @@ void SceneViewerApplication::render_ui(CommandBuffer &cmd) snprintf(pos_text, sizeof(pos_text), "Pos: %.3f, %.3f, %.3f", cam_pos.x, cam_pos.y, cam_pos.z); snprintf(rot_text, sizeof(rot_text), "Rot: %.3f, %.3f, %.3f, %.3f", cam_ori.x, cam_ori.y, cam_ori.z, cam_ori.w); snprintf(tex_text, sizeof(tex_text), "Texture: %u MiB", - unsigned(GRANITE_ASSET_MANAGER_IMAGES()->get_current_total_consumed() / (1024 * 1024))); + unsigned(GRANITE_ASSET_MANAGER()->get_current_total_consumed() / (1024 * 1024))); vec3 offset(5.0f, 5.0f, 0.0f); vec2 size(cmd.get_viewport().width - 10.0f, cmd.get_viewport().height - 10.0f); diff --git a/filesystem/asset_manager.cpp b/filesystem/asset_manager.cpp index b9e6ec8c..8b033a73 100644 --- a/filesystem/asset_manager.cpp +++ b/filesystem/asset_manager.cpp @@ -27,21 +27,21 @@ namespace Granite { -AssetManagerImages::AssetManagerImages() +AssetManager::AssetManager() { signal = std::make_unique(); for (uint64_t i = 0; i < timestamp; i++) signal->signal_increment(); } -AssetManagerImages::~AssetManagerImages() +AssetManager::~AssetManager() { signal->wait_until_at_least(timestamp); for (auto *a : asset_bank) pool.free(a); } -ImageAssetID AssetManagerImages::register_image_resource_nolock(FileHandle file, ImageClass image_class, int prio) +ImageAssetID AssetManager::register_image_resource_nolock(FileHandle file, ImageClass image_class, int prio) { auto *info = pool.allocate(); info->handle = std::move(file); @@ -59,18 +59,17 @@ ImageAssetID AssetManagerImages::register_image_resource_nolock(FileHandle file, return ret; } -void AssetInstantiatorImagesInterface::set_image_class(ImageAssetID, ImageClass) +void AssetInstantiatorInterface::set_image_class(ImageAssetID, ImageClass) { } -ImageAssetID AssetManagerImages::register_image_resource(FileHandle file, ImageClass image_class, int prio) +ImageAssetID AssetManager::register_image_resource(FileHandle file, ImageClass image_class, int prio) { std::lock_guard holder{asset_bank_lock}; return register_image_resource_nolock(std::move(file), image_class, prio); } -ImageAssetID AssetManagerImages::register_image_resource(Filesystem &fs, const std::string &path, - ImageClass image_class, int prio) +ImageAssetID AssetManager::register_image_resource(Filesystem &fs, const std::string &path, ImageClass image_class, int prio) { std::lock_guard holder{asset_bank_lock}; @@ -89,13 +88,13 @@ ImageAssetID AssetManagerImages::register_image_resource(Filesystem &fs, const s return id; } -void AssetManagerImages::update_cost(ImageAssetID id, uint64_t cost) +void AssetManager::update_cost(ImageAssetID id, uint64_t cost) { std::lock_guard holder{cost_update_lock}; thread_cost_updates.push_back({ id, cost }); } -void AssetManagerImages::set_asset_instantiator_interface(AssetInstantiatorImagesInterface *iface_) +void AssetManager::set_asset_instantiator_interface(AssetInstantiatorInterface *iface_) { if (iface) { @@ -121,22 +120,22 @@ void AssetManagerImages::set_asset_instantiator_interface(AssetInstantiatorImage } } -void AssetManagerImages::mark_used_resource(ImageAssetID id) +void AssetManager::mark_used_resource(ImageAssetID id) { lru_append.push(id); } -void AssetManagerImages::set_image_budget(uint64_t cost) +void AssetManager::set_image_budget(uint64_t cost) { image_budget = cost; } -void AssetManagerImages::set_image_budget_per_iteration(uint64_t cost) +void AssetManager::set_image_budget_per_iteration(uint64_t cost) { image_budget_per_iteration = cost; } -bool AssetManagerImages::set_image_residency_priority(ImageAssetID id, int prio) +bool AssetManager::set_image_residency_priority(ImageAssetID id, int prio) { std::lock_guard holder{asset_bank_lock}; if (id.id >= asset_bank.size()) @@ -145,7 +144,7 @@ bool AssetManagerImages::set_image_residency_priority(ImageAssetID id, int prio) return true; } -void AssetManagerImages::adjust_update(const CostUpdate &update) +void AssetManager::adjust_update(const CostUpdate &update) { if (update.id.id < asset_bank.size()) { @@ -160,12 +159,12 @@ void AssetManagerImages::adjust_update(const CostUpdate &update) } } -uint64_t AssetManagerImages::get_current_total_consumed() const +uint64_t AssetManager::get_current_total_consumed() const { return total_consumed; } -void AssetManagerImages::update_costs_locked_assets() +void AssetManager::update_costs_locked_assets() { { std::lock_guard holder_cost{cost_update_lock}; @@ -177,7 +176,7 @@ void AssetManagerImages::update_costs_locked_assets() cost_updates.clear(); } -void AssetManagerImages::update_lru_locked_assets() +void AssetManager::update_lru_locked_assets() { lru_append.for_each_ranged([this](const ImageAssetID *id, size_t count) { for (size_t i = 0; i < count; i++) @@ -187,7 +186,7 @@ void AssetManagerImages::update_lru_locked_assets() lru_append.clear(); } -bool AssetManagerImages::iterate_blocking(ThreadGroup &group, ImageAssetID id) +bool AssetManager::iterate_blocking(ThreadGroup &group, ImageAssetID id) { if (!iface) return false; @@ -220,7 +219,7 @@ bool AssetManagerImages::iterate_blocking(ThreadGroup &group, ImageAssetID id) return true; } -void AssetManagerImages::iterate(ThreadGroup *group) +void AssetManager::iterate(ThreadGroup *group) { if (!iface) return; diff --git a/filesystem/asset_manager.hpp b/filesystem/asset_manager.hpp index 9f96734a..1ecb0ced 100644 --- a/filesystem/asset_manager.hpp +++ b/filesystem/asset_manager.hpp @@ -52,7 +52,7 @@ struct MeshAssetID inline bool operator!=(const MeshAssetID &other) const { return !(*this == other); } }; -class AssetManagerImages; +class AssetManager; // If we have to fall back due to no image being present, // lets asset instantiator know what to substitute. @@ -75,17 +75,17 @@ class ThreadGroup; struct TaskGroup; struct TaskSignal; -class AssetInstantiatorImagesInterface +class AssetInstantiatorInterface { public: - virtual ~AssetInstantiatorImagesInterface() = default; + virtual ~AssetInstantiatorInterface() = default; // This estimate should be an upper bound. virtual uint64_t estimate_cost_image_resource(ImageAssetID id, File &mapping) = 0; // When instantiation completes, manager.update_cost() must be called with the real cost. // The real cost may only be known after async parsing of the file. - virtual void instantiate_image_resource(AssetManagerImages &manager, TaskGroup *group, ImageAssetID id, File &mapping) = 0; + virtual void instantiate_image_resource(AssetManager &manager, TaskGroup *group, ImageAssetID id, File &mapping) = 0; // Will only be called after an upload completes through manager.update_cost(). virtual void release_image_resource(ImageAssetID id) = 0; @@ -96,16 +96,16 @@ class AssetInstantiatorImagesInterface virtual void latch_handles() = 0; }; -class AssetManagerImages final : public AssetManagerImagesInterface +class AssetManager final : public AssetManagerInterface { public: // Persistent prio means the resource is treated as an internal LUT that must always be resident, no matter what. constexpr static int persistent_prio() { return 0x7fffffff; } - AssetManagerImages(); - ~AssetManagerImages() override; + AssetManager(); + ~AssetManager() override; - void set_asset_instantiator_interface(AssetInstantiatorImagesInterface *iface); + void set_asset_instantiator_interface(AssetInstantiatorInterface *iface); void set_image_budget(uint64_t cost); void set_image_budget_per_iteration(uint64_t cost); @@ -152,7 +152,7 @@ class AssetManagerImages final : public AssetManagerImagesInterface Util::AtomicAppendBuffer lru_append; Util::IntrusiveHashMapHolder file_to_assets; - AssetInstantiatorImagesInterface *iface = nullptr; + AssetInstantiatorInterface *iface = nullptr; uint32_t id_count = 0; uint64_t total_consumed = 0; uint64_t image_budget = 0; diff --git a/renderer/common_renderer_data.cpp b/renderer/common_renderer_data.cpp index da151ddf..dc9fda34 100644 --- a/renderer/common_renderer_data.cpp +++ b/renderer/common_renderer_data.cpp @@ -109,10 +109,10 @@ void LightMesh::on_device_destroyed(const Vulkan::DeviceCreatedEvent &) point_ibo.reset(); } -void CommonRendererData::initialize_static_assets(AssetManagerImages *iface, Filesystem *fs) +void CommonRendererData::initialize_static_assets(AssetManager *iface, Filesystem *fs) { LOGI("Initializing static assets.\n"); brdf_tables = iface->register_image_resource(*fs, "builtin://textures/ibl_brdf_lut.gtx", ImageClass::Zeroable, - AssetManagerImages::persistent_prio()); + AssetManager::persistent_prio()); } } diff --git a/renderer/common_renderer_data.hpp b/renderer/common_renderer_data.hpp index b5ce23b2..4e147860 100644 --- a/renderer/common_renderer_data.hpp +++ b/renderer/common_renderer_data.hpp @@ -58,6 +58,6 @@ class CommonRendererData final : public CommonRendererDataInterface public: LightMesh light_mesh; ImageAssetID brdf_tables; - void initialize_static_assets(AssetManagerImages *iface, Filesystem *file_iface); + void initialize_static_assets(AssetManager *iface, Filesystem *file_iface); }; } \ No newline at end of file diff --git a/renderer/ground.cpp b/renderer/ground.cpp index 92a6ea67..c25def4a 100644 --- a/renderer/ground.cpp +++ b/renderer/ground.cpp @@ -182,17 +182,12 @@ Ground::Ground(unsigned size_, const TerrainInfo &info_) num_patches_z = size / info.base_patch_size; patch_lods.resize(num_patches_x * num_patches_z); - heights = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource(*GRANITE_FILESYSTEM(), info.heightmap, ImageClass::Zeroable); - normals = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource(*GRANITE_FILESYSTEM(), info.normalmap, - ImageClass::Normal); - occlusion = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource(*GRANITE_FILESYSTEM(), info.occlusionmap, - ImageClass::Zeroable); - normals_fine = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource(*GRANITE_FILESYSTEM(), info.normalmap_fine, - ImageClass::Normal); - base_color = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource(*GRANITE_FILESYSTEM(), info.base_color, - ImageClass::Color); - type_map = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource(*GRANITE_FILESYSTEM(), info.splatmap, - ImageClass::Zeroable); + heights = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), info.heightmap, ImageClass::Zeroable); + normals = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), info.normalmap, ImageClass::Normal); + occlusion = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), info.occlusionmap, ImageClass::Zeroable); + normals_fine = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), info.normalmap_fine, ImageClass::Normal); + base_color = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), info.base_color, ImageClass::Color); + type_map = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), info.splatmap, ImageClass::Zeroable); EVENT_MANAGER_REGISTER_LATCH(Ground, on_device_created, on_device_destroyed, DeviceCreatedEvent); } diff --git a/renderer/lights/decal_volume.cpp b/renderer/lights/decal_volume.cpp index 284b4c7c..3e5e9cb6 100644 --- a/renderer/lights/decal_volume.cpp +++ b/renderer/lights/decal_volume.cpp @@ -29,8 +29,9 @@ namespace Granite { VolumetricDecal::VolumetricDecal() { - tex = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource( - *GRANITE_FILESYSTEM(), "builtin://textures/decal.png", ImageClass::Color); + tex = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), + "builtin://textures/decal.png", + ImageClass::Color); } const Vulkan::ImageView *VolumetricDecal::get_decal_view(Vulkan::Device &device) const diff --git a/renderer/material.hpp b/renderer/material.hpp index 14863ade..b7c4ce1e 100644 --- a/renderer/material.hpp +++ b/renderer/material.hpp @@ -97,7 +97,7 @@ struct Material { if (!info.paths[i].empty()) { - textures[i] = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource( + textures[i] = GRANITE_ASSET_MANAGER()->register_image_resource( *GRANITE_FILESYSTEM(), info.paths[i], image_classes[i]); } } diff --git a/renderer/mesh_util.cpp b/renderer/mesh_util.cpp index 295b528b..69e0b682 100644 --- a/renderer/mesh_util.cpp +++ b/renderer/mesh_util.cpp @@ -890,7 +890,7 @@ SkyCylinder::SkyCylinder(const std::string &bg_path) { if (!bg_path.empty()) { - texture = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource( + texture = GRANITE_ASSET_MANAGER()->register_image_resource( *GRANITE_FILESYSTEM(), bg_path, ImageClass::Color); } @@ -1056,7 +1056,7 @@ Skybox::Skybox(const std::string &bg_path) { if (!bg_path.empty()) { - texture = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource( + texture = GRANITE_ASSET_MANAGER()->register_image_resource( *GRANITE_FILESYSTEM(), bg_path, ImageClass::Color); } } @@ -1195,7 +1195,7 @@ static void texture_plane_render(CommandBuffer &cmd, const RenderQueueData *info TexturePlane::TexturePlane(const std::string &normal_path) { - normalmap = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource( + normalmap = GRANITE_ASSET_MANAGER()->register_image_resource( *GRANITE_FILESYSTEM(), normal_path, ImageClass::Normal); } diff --git a/renderer/post/smaa.cpp b/renderer/post/smaa.cpp index c278d9cc..8952fcb7 100644 --- a/renderer/post/smaa.cpp +++ b/renderer/post/smaa.cpp @@ -145,10 +145,10 @@ void setup_smaa_postprocess(RenderGraph &graph, TemporalJitter &jitter, return true; }); - auto area = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource( - *GRANITE_FILESYSTEM(), "builtin://textures/smaa/area.gtx", ImageClass::Zeroable, AssetManagerImages::persistent_prio()); - auto search = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource( - *GRANITE_FILESYSTEM(), "builtin://textures/smaa/search.gtx", ImageClass::Zeroable, AssetManagerImages::persistent_prio()); + auto area = GRANITE_ASSET_MANAGER()->register_image_resource( + *GRANITE_FILESYSTEM(), "builtin://textures/smaa/area.gtx", ImageClass::Zeroable, AssetManager::persistent_prio()); + auto search = GRANITE_ASSET_MANAGER()->register_image_resource( + *GRANITE_FILESYSTEM(), "builtin://textures/smaa/search.gtx", ImageClass::Zeroable, AssetManager::persistent_prio()); smaa_weight.set_build_render_pass([&, area, search, edge = masked_edge, q = smaa_quality](Vulkan::CommandBuffer &cmd) { auto &input_image = graph.get_physical_texture_resource(weight_input_res); diff --git a/ui/image_widget.cpp b/ui/image_widget.cpp index 861a8a41..687bae07 100644 --- a/ui/image_widget.cpp +++ b/ui/image_widget.cpp @@ -33,7 +33,7 @@ namespace UI { Image::Image(const std::string &path, vec2 target) { - texture = GRANITE_ASSET_MANAGER_IMAGES()->register_image_resource( + texture = GRANITE_ASSET_MANAGER()->register_image_resource( *GRANITE_FILESYSTEM(), path, ImageClass::Color); diff --git a/vulkan/context.hpp b/vulkan/context.hpp index eb2101c6..a2ab4f7c 100644 --- a/vulkan/context.hpp +++ b/vulkan/context.hpp @@ -41,7 +41,7 @@ namespace Granite { class Filesystem; class ThreadGroup; -class AssetManagerImages; +class AssetManager; } namespace Vulkan @@ -326,7 +326,7 @@ class Context Util::TimelineTraceFile *timeline_trace_file = nullptr; Granite::Filesystem *filesystem = nullptr; Granite::ThreadGroup *thread_group = nullptr; - Granite::AssetManagerImages *asset_manager_images = nullptr; + Granite::AssetManager *asset_manager = nullptr; }; void set_system_handles(const SystemHandles &handles_) diff --git a/vulkan/managers/resource_manager.cpp b/vulkan/managers/resource_manager.cpp index 841dc486..073d66ee 100644 --- a/vulkan/managers/resource_manager.cpp +++ b/vulkan/managers/resource_manager.cpp @@ -73,7 +73,7 @@ uint64_t ResourceManager::estimate_cost_image_resource(Granite::ImageAssetID, Gr void ResourceManager::init() { - manager = device->get_system_handles().asset_manager_images; + manager = device->get_system_handles().asset_manager; // Need to initialize these before setting the interface. { @@ -242,7 +242,7 @@ const Vulkan::ImageView *ResourceManager::get_image_view_blocking(Granite::Image return &textures[id.id].image->get_view(); } -void ResourceManager::instantiate_image_resource(Granite::AssetManagerImages &manager_, Granite::TaskGroup *task, +void ResourceManager::instantiate_image_resource(Granite::AssetManager &manager_, Granite::TaskGroup *task, Granite::ImageAssetID id, Granite::File &file) { if (task) @@ -257,7 +257,7 @@ void ResourceManager::instantiate_image_resource(Granite::AssetManagerImages &ma } } -void ResourceManager::instantiate_image_resource(Granite::AssetManagerImages &manager_, +void ResourceManager::instantiate_image_resource(Granite::AssetManager &manager_, Granite::ImageAssetID id, Granite::File &file) { diff --git a/vulkan/managers/resource_manager.hpp b/vulkan/managers/resource_manager.hpp index 857d2cbd..3c129491 100644 --- a/vulkan/managers/resource_manager.hpp +++ b/vulkan/managers/resource_manager.hpp @@ -31,7 +31,7 @@ namespace Vulkan { class MemoryMappedTexture; -class ResourceManager final : private Granite::AssetInstantiatorImagesInterface +class ResourceManager final : private Granite::AssetInstantiatorInterface { public: explicit ResourceManager(Device *device); @@ -50,11 +50,11 @@ class ResourceManager final : private Granite::AssetInstantiatorImagesInterface private: Device *device; - Granite::AssetManagerImages *manager = nullptr; + Granite::AssetManager *manager = nullptr; void latch_handles() override; uint64_t estimate_cost_image_resource(Granite::ImageAssetID id, Granite::File &file) override; - void instantiate_image_resource(Granite::AssetManagerImages &manager, Granite::TaskGroup *task, + void instantiate_image_resource(Granite::AssetManager &manager, Granite::TaskGroup *task, Granite::ImageAssetID id, Granite::File &file) override; void release_image_resource(Granite::ImageAssetID id) override; void set_id_bounds(uint32_t bound) override; @@ -83,6 +83,6 @@ class ResourceManager final : private Granite::AssetInstantiatorImagesInterface ImageHandle create_other(const Granite::FileMapping &mapping, Granite::ImageClass image_class, Granite::ImageAssetID id); const ImageHandle &get_fallback_image(Granite::ImageClass image_class); - void instantiate_image_resource(Granite::AssetManagerImages &manager, Granite::ImageAssetID id, Granite::File &file); + void instantiate_image_resource(Granite::AssetManager &manager, Granite::ImageAssetID id, Granite::File &file); }; } From 0e2cbfcfabc3dcb0d3fa4956cfd2c249995af698 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 4 Aug 2023 10:12:57 +0200 Subject: [PATCH 56/71] Move toward making the AssetManager a bit more general. --- filesystem/asset_manager.cpp | 64 +++++++++--------- filesystem/asset_manager.hpp | 73 ++++++++++----------- renderer/common_renderer_data.cpp | 4 +- renderer/common_renderer_data.hpp | 2 +- renderer/ground.cpp | 14 ++-- renderer/ground.hpp | 2 +- renderer/lights/decal_volume.cpp | 6 +- renderer/lights/decal_volume.hpp | 2 +- renderer/material.hpp | 16 ++--- renderer/mesh_util.cpp | 14 ++-- renderer/mesh_util.hpp | 8 +-- renderer/post/smaa.cpp | 10 +-- renderer/sprite.hpp | 4 +- tests/asset_manager_test.cpp | 34 +++++----- tests/bandlimited_pixel_test.cpp | 3 +- tests/ui_sandbox.cpp | 16 ++--- tools/aa_bench.cpp | 10 ++- tools/convert_cube_to_environment.cpp | 2 +- tools/convert_equirect_to_environment.cpp | 3 +- tools/texture_viewer.cpp | 6 +- ui/image_widget.cpp | 4 +- ui/image_widget.hpp | 2 +- ui/widget.hpp | 4 +- vulkan/managers/resource_manager.cpp | 80 +++++++++++------------ vulkan/managers/resource_manager.hpp | 30 ++++----- 25 files changed, 208 insertions(+), 205 deletions(-) diff --git a/filesystem/asset_manager.cpp b/filesystem/asset_manager.cpp index 8b033a73..c591e45c 100644 --- a/filesystem/asset_manager.cpp +++ b/filesystem/asset_manager.cpp @@ -41,35 +41,35 @@ AssetManager::~AssetManager() pool.free(a); } -ImageAssetID AssetManager::register_image_resource_nolock(FileHandle file, ImageClass image_class, int prio) +AssetID AssetManager::register_asset_nolock(FileHandle file, AssetClass asset_class, int prio) { auto *info = pool.allocate(); info->handle = std::move(file); info->id.id = id_count++; info->prio = prio; - info->image_class = image_class; - ImageAssetID ret = info->id; + info->asset_class = asset_class; + AssetID ret = info->id; asset_bank.push_back(info); sorted_assets.reserve(asset_bank.size()); if (iface) { iface->set_id_bounds(id_count); - iface->set_image_class(info->id, image_class); + iface->set_asset_class(info->id, asset_class); } return ret; } -void AssetInstantiatorInterface::set_image_class(ImageAssetID, ImageClass) +void AssetInstantiatorInterface::set_asset_class(AssetID, AssetClass) { } -ImageAssetID AssetManager::register_image_resource(FileHandle file, ImageClass image_class, int prio) +AssetID AssetManager::register_asset(FileHandle file, AssetClass asset_class, int prio) { std::lock_guard holder{asset_bank_lock}; - return register_image_resource_nolock(std::move(file), image_class, prio); + return register_asset_nolock(std::move(file), asset_class, prio); } -ImageAssetID AssetManager::register_image_resource(Filesystem &fs, const std::string &path, ImageClass image_class, int prio) +AssetID AssetManager::register_asset(Filesystem &fs, const std::string &path, AssetClass asset_class, int prio) { std::lock_guard holder{asset_bank_lock}; @@ -82,13 +82,13 @@ ImageAssetID AssetManager::register_image_resource(Filesystem &fs, const std::st if (!file) return {}; - auto id = register_image_resource_nolock(std::move(file), image_class, prio); + auto id = register_asset_nolock(std::move(file), asset_class, prio); asset_bank[id.id]->set_hash(h.get()); file_to_assets.insert_replace(asset_bank[id.id]); return id; } -void AssetManager::update_cost(ImageAssetID id, uint64_t cost) +void AssetManager::update_cost(AssetID id, uint64_t cost) { std::lock_guard holder{cost_update_lock}; thread_cost_updates.push_back({ id, cost }); @@ -100,7 +100,7 @@ void AssetManager::set_asset_instantiator_interface(AssetInstantiatorInterface * { signal->wait_until_at_least(timestamp); for (uint32_t id = 0; id < id_count; id++) - iface->release_image_resource(ImageAssetID{id}); + iface->release_asset(AssetID{id}); } for (auto *a : asset_bank) @@ -116,26 +116,26 @@ void AssetManager::set_asset_instantiator_interface(AssetInstantiatorInterface * { iface->set_id_bounds(id_count); for (uint32_t i = 0; i < id_count; i++) - iface->set_image_class(ImageAssetID{i}, asset_bank[i]->image_class); + iface->set_asset_class(AssetID{i}, asset_bank[i]->asset_class); } } -void AssetManager::mark_used_resource(ImageAssetID id) +void AssetManager::mark_used_asset(AssetID id) { lru_append.push(id); } -void AssetManager::set_image_budget(uint64_t cost) +void AssetManager::set_asset_budget(uint64_t cost) { - image_budget = cost; + transfer_budget = cost; } -void AssetManager::set_image_budget_per_iteration(uint64_t cost) +void AssetManager::set_asset_budget_per_iteration(uint64_t cost) { - image_budget_per_iteration = cost; + transfer_budget_per_iteration = cost; } -bool AssetManager::set_image_residency_priority(ImageAssetID id, int prio) +bool AssetManager::set_asset_residency_priority(AssetID id, int prio) { std::lock_guard holder{asset_bank_lock}; if (id.id >= asset_bank.size()) @@ -178,7 +178,7 @@ void AssetManager::update_costs_locked_assets() void AssetManager::update_lru_locked_assets() { - lru_append.for_each_ranged([this](const ImageAssetID *id, size_t count) { + lru_append.for_each_ranged([this](const AssetID *id, size_t count) { for (size_t i = 0; i < count; i++) if (id[i].id < asset_bank.size()) asset_bank[id[i].id]->last_used = timestamp; @@ -186,7 +186,7 @@ void AssetManager::update_lru_locked_assets() lru_append.clear(); } -bool AssetManager::iterate_blocking(ThreadGroup &group, ImageAssetID id) +bool AssetManager::iterate_blocking(ThreadGroup &group, AssetID id) { if (!iface) return false; @@ -202,12 +202,12 @@ bool AssetManager::iterate_blocking(ThreadGroup &group, ImageAssetID id) if (candidate->consumed != 0 || candidate->pending_consumed != 0) return true; - uint64_t estimate = iface->estimate_cost_image_resource(candidate->id, *candidate->handle); + uint64_t estimate = iface->estimate_cost_asset(candidate->id, *candidate->handle); auto task = group.create_task(); task->set_task_class(TaskClass::Background); task->set_fence_counter_signal(signal.get()); task->set_desc("asset-manager-instantiate-single"); - iface->instantiate_image_resource(*this, task.get(), candidate->id, *candidate->handle); + iface->instantiate_asset(*this, task.get(), candidate->id, *candidate->handle); candidate->pending_consumed = estimate; candidate->last_used = timestamp; total_consumed += estimate; @@ -281,8 +281,8 @@ void AssetManager::iterate(ThreadGroup *group) // Activate in order from highest priority to lowest. bool can_activate = true; while (can_activate && - total_consumed < image_budget && - activated_cost_this_iteration < image_budget_per_iteration && + total_consumed < transfer_budget && + activated_cost_this_iteration < transfer_budget_per_iteration && activate_index != release_index) { auto *candidate = sorted_assets[activate_index]; @@ -296,26 +296,26 @@ void AssetManager::iterate(ThreadGroup *group) continue; } - uint64_t estimate = iface->estimate_cost_image_resource(candidate->id, *candidate->handle); + uint64_t estimate = iface->estimate_cost_asset(candidate->id, *candidate->handle); - can_activate = (total_consumed + estimate <= image_budget) || (candidate->prio >= persistent_prio()); + can_activate = (total_consumed + estimate <= transfer_budget) || (candidate->prio >= persistent_prio()); while (!can_activate && activate_index + 1 != release_index) { auto *release_candidate = sorted_assets[--release_index]; if (release_candidate->consumed) { LOGI("Releasing ID %u due to page-in pressure.\n", release_candidate->id.id); - iface->release_image_resource(release_candidate->id); + iface->release_asset(release_candidate->id); total_consumed -= release_candidate->consumed; release_candidate->consumed = 0; } - can_activate = total_consumed + estimate <= image_budget; + can_activate = total_consumed + estimate <= transfer_budget; } if (can_activate) { // We're trivially in budget. - iface->instantiate_image_resource(*this, task.get(), candidate->id, *candidate->handle); + iface->instantiate_asset(*this, task.get(), candidate->id, *candidate->handle); activation_count++; candidate->pending_consumed = estimate; @@ -328,7 +328,7 @@ void AssetManager::iterate(ThreadGroup *group) } // If we're 75% of budget, start garbage collecting non-resident resources ahead of time. - const uint64_t low_image_budget = (image_budget * 3) / 4; + const uint64_t low_image_budget = (transfer_budget * 3) / 4; const auto should_release = [&]() -> bool { if (release_index == activate_index) @@ -336,7 +336,7 @@ void AssetManager::iterate(ThreadGroup *group) if (sorted_assets[release_index - 1]->prio == persistent_prio()) return false; - if (total_consumed > image_budget) + if (total_consumed > transfer_budget) return true; else if (total_consumed > low_image_budget && sorted_assets[release_index - 1]->prio == 0) return true; @@ -351,7 +351,7 @@ void AssetManager::iterate(ThreadGroup *group) if (candidate->consumed) { LOGI("Releasing 0-prio ID %u due to page-in pressure.\n", candidate->id.id); - iface->release_image_resource(candidate->id); + iface->release_asset(candidate->id); total_consumed -= candidate->consumed; candidate->consumed = 0; candidate->last_used = 0; diff --git a/filesystem/asset_manager.hpp b/filesystem/asset_manager.hpp index 1ecb0ced..ebbc8ea0 100644 --- a/filesystem/asset_manager.hpp +++ b/filesystem/asset_manager.hpp @@ -32,43 +32,34 @@ namespace Granite { -struct ImageAssetID +struct AssetID { uint32_t id = uint32_t(-1); - ImageAssetID() = default; - explicit ImageAssetID(uint32_t id_) : id{id_} {} + AssetID() = default; + explicit AssetID(uint32_t id_) : id{id_} {} explicit inline operator bool() const { return id != uint32_t(-1); } - inline bool operator==(const ImageAssetID &other) const { return id == other.id; } - inline bool operator!=(const ImageAssetID &other) const { return !(*this == other); } -}; - -struct MeshAssetID -{ - uint32_t id = uint32_t(-1); - MeshAssetID() = default; - explicit MeshAssetID(uint32_t id_) : id{id_} {} - explicit inline operator bool() const { return id != uint32_t(-1); } - inline bool operator==(const MeshAssetID &other) const { return id == other.id; } - inline bool operator!=(const MeshAssetID &other) const { return !(*this == other); } + inline bool operator==(const AssetID &other) const { return id == other.id; } + inline bool operator!=(const AssetID &other) const { return !(*this == other); } }; class AssetManager; // If we have to fall back due to no image being present, // lets asset instantiator know what to substitute. -enum class ImageClass +enum class AssetClass { // Substitute with 0. - Zeroable, + ImageZeroable, // Substitute with missing color. - Color, + ImageColor, // Substitute with RG8_UNORM 0.5 - Normal, + ImageNormal, // Substitute with M = 0, R = 1. - MetallicRoughness, + ImageMetallicRoughness, // Substitute with mid-gray (0.5, 0.5, 0.5, 1.0) UNORM8. // Somewhat compatible with everything. - Generic + ImageGeneric, + Mesh }; class ThreadGroup; @@ -81,16 +72,16 @@ class AssetInstantiatorInterface virtual ~AssetInstantiatorInterface() = default; // This estimate should be an upper bound. - virtual uint64_t estimate_cost_image_resource(ImageAssetID id, File &mapping) = 0; + virtual uint64_t estimate_cost_asset(AssetID id, File &mapping) = 0; // When instantiation completes, manager.update_cost() must be called with the real cost. // The real cost may only be known after async parsing of the file. - virtual void instantiate_image_resource(AssetManager &manager, TaskGroup *group, ImageAssetID id, File &mapping) = 0; + virtual void instantiate_asset(AssetManager &manager, TaskGroup *group, AssetID id, File &mapping) = 0; // Will only be called after an upload completes through manager.update_cost(). - virtual void release_image_resource(ImageAssetID id) = 0; + virtual void release_asset(AssetID id) = 0; virtual void set_id_bounds(uint32_t bound) = 0; - virtual void set_image_class(ImageAssetID id, ImageClass image_class); + virtual void set_asset_class(AssetID id, AssetClass asset_class); // Called in AssetManager::iterate(). virtual void latch_handles() = 0; @@ -106,24 +97,26 @@ class AssetManager final : public AssetManagerInterface ~AssetManager() override; void set_asset_instantiator_interface(AssetInstantiatorInterface *iface); - void set_image_budget(uint64_t cost); - void set_image_budget_per_iteration(uint64_t cost); + + // We might want to consider different budgets per asset class. + void set_asset_budget(uint64_t cost); + void set_asset_budget_per_iteration(uint64_t cost); // FileHandle is intended to be used with FileSlice or similar here so that we don't need // a ton of open files at once. - ImageAssetID register_image_resource(FileHandle file, ImageClass image_class, int prio = 1); - ImageAssetID register_image_resource(Filesystem &fs, const std::string &path, ImageClass image_class, int prio = 1); + AssetID register_asset(FileHandle file, AssetClass asset_class, int prio = 1); + AssetID register_asset(Filesystem &fs, const std::string &path, AssetClass asset_class, int prio = 1); // Prio 0: Not resident, resource may not exist. - bool set_image_residency_priority(ImageAssetID id, int prio); + bool set_asset_residency_priority(AssetID id, int prio); // Intended to be called in Application::post_frame(). Not thread safe. // This function updates internal state. void iterate(ThreadGroup *group); - bool iterate_blocking(ThreadGroup &group, ImageAssetID id); + bool iterate_blocking(ThreadGroup &group, AssetID id); // Always thread safe, used by AssetInstantiatorInterfaces to update cost estimates. - void update_cost(ImageAssetID id, uint64_t cost); + void update_cost(AssetID id, uint64_t cost); // May be called concurrently, except when calling iterate(). uint64_t get_current_total_consumed() const; @@ -131,7 +124,7 @@ class AssetManager final : public AssetManagerInterface // May be called concurrently, except when calling iterate(). // Intended to be called by asset instantiator interface or similar. // When a resource is actually accessed, this is called. - void mark_used_resource(ImageAssetID id); + void mark_used_asset(AssetID id); private: struct AssetInfo : Util::IntrusiveHashMapEnabled @@ -140,8 +133,8 @@ class AssetManager final : public AssetManagerInterface uint64_t consumed = 0; uint64_t last_used = 0; FileHandle handle; - ImageAssetID id = {}; - ImageClass image_class = ImageClass::Zeroable; + AssetID id = {}; + AssetClass asset_class = AssetClass::ImageZeroable; int prio = 0; }; @@ -149,20 +142,20 @@ class AssetManager final : public AssetManagerInterface std::mutex asset_bank_lock; std::vector asset_bank; Util::ObjectPool pool; - Util::AtomicAppendBuffer lru_append; + Util::AtomicAppendBuffer lru_append; Util::IntrusiveHashMapHolder file_to_assets; AssetInstantiatorInterface *iface = nullptr; uint32_t id_count = 0; uint64_t total_consumed = 0; - uint64_t image_budget = 0; - uint64_t image_budget_per_iteration = 0; + uint64_t transfer_budget = 0; + uint64_t transfer_budget_per_iteration = 0; uint64_t timestamp = 1; uint32_t blocking_signals = 0; struct CostUpdate { - ImageAssetID id; + AssetID id; uint64_t cost = 0; }; std::mutex cost_update_lock; @@ -171,7 +164,7 @@ class AssetManager final : public AssetManagerInterface void adjust_update(const CostUpdate &update); std::unique_ptr signal; - ImageAssetID register_image_resource_nolock(FileHandle file, ImageClass image_class, int prio); + AssetID register_asset_nolock(FileHandle file, AssetClass asset_class, int prio); void update_costs_locked_assets(); void update_lru_locked_assets(); diff --git a/renderer/common_renderer_data.cpp b/renderer/common_renderer_data.cpp index dc9fda34..b1a178d1 100644 --- a/renderer/common_renderer_data.cpp +++ b/renderer/common_renderer_data.cpp @@ -112,7 +112,7 @@ void LightMesh::on_device_destroyed(const Vulkan::DeviceCreatedEvent &) void CommonRendererData::initialize_static_assets(AssetManager *iface, Filesystem *fs) { LOGI("Initializing static assets.\n"); - brdf_tables = iface->register_image_resource(*fs, "builtin://textures/ibl_brdf_lut.gtx", ImageClass::Zeroable, - AssetManager::persistent_prio()); + brdf_tables = iface->register_asset(*fs, "builtin://textures/ibl_brdf_lut.gtx", AssetClass::ImageZeroable, + AssetManager::persistent_prio()); } } diff --git a/renderer/common_renderer_data.hpp b/renderer/common_renderer_data.hpp index 4e147860..0465a926 100644 --- a/renderer/common_renderer_data.hpp +++ b/renderer/common_renderer_data.hpp @@ -57,7 +57,7 @@ class CommonRendererData final : public CommonRendererDataInterface { public: LightMesh light_mesh; - ImageAssetID brdf_tables; + AssetID brdf_tables; void initialize_static_assets(AssetManager *iface, Filesystem *file_iface); }; } \ No newline at end of file diff --git a/renderer/ground.cpp b/renderer/ground.cpp index c25def4a..57072e87 100644 --- a/renderer/ground.cpp +++ b/renderer/ground.cpp @@ -182,12 +182,14 @@ Ground::Ground(unsigned size_, const TerrainInfo &info_) num_patches_z = size / info.base_patch_size; patch_lods.resize(num_patches_x * num_patches_z); - heights = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), info.heightmap, ImageClass::Zeroable); - normals = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), info.normalmap, ImageClass::Normal); - occlusion = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), info.occlusionmap, ImageClass::Zeroable); - normals_fine = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), info.normalmap_fine, ImageClass::Normal); - base_color = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), info.base_color, ImageClass::Color); - type_map = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), info.splatmap, ImageClass::Zeroable); + heights = GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(), info.heightmap, AssetClass::ImageZeroable); + normals = GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(), info.normalmap, AssetClass::ImageNormal); + occlusion = GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(), info.occlusionmap, + AssetClass::ImageZeroable); + normals_fine = GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(), info.normalmap_fine, + AssetClass::ImageNormal); + base_color = GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(), info.base_color, AssetClass::ImageColor); + type_map = GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(), info.splatmap, AssetClass::ImageZeroable); EVENT_MANAGER_REGISTER_LATCH(Ground, on_device_created, on_device_destroyed, DeviceCreatedEvent); } diff --git a/renderer/ground.hpp b/renderer/ground.hpp index 27333870..3377f711 100644 --- a/renderer/ground.hpp +++ b/renderer/ground.hpp @@ -149,7 +149,7 @@ class Ground : public Util::IntrusivePtrEnabled, public PerFrameRefresha void refresh(const RenderContext &context, TaskComposer &composer) override; - ImageAssetID heights, normals, occlusion, normals_fine, base_color, type_map; + AssetID heights, normals, occlusion, normals_fine, base_color, type_map; Vulkan::ImageHandle lod_map; void on_device_created(const Vulkan::DeviceCreatedEvent &e); void on_device_destroyed(const Vulkan::DeviceCreatedEvent &e); diff --git a/renderer/lights/decal_volume.cpp b/renderer/lights/decal_volume.cpp index 3e5e9cb6..30095675 100644 --- a/renderer/lights/decal_volume.cpp +++ b/renderer/lights/decal_volume.cpp @@ -29,9 +29,9 @@ namespace Granite { VolumetricDecal::VolumetricDecal() { - tex = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), - "builtin://textures/decal.png", - ImageClass::Color); + tex = GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(), + "builtin://textures/decal.png", + AssetClass::ImageColor); } const Vulkan::ImageView *VolumetricDecal::get_decal_view(Vulkan::Device &device) const diff --git a/renderer/lights/decal_volume.hpp b/renderer/lights/decal_volume.hpp index 62f86d55..062370cf 100644 --- a/renderer/lights/decal_volume.hpp +++ b/renderer/lights/decal_volume.hpp @@ -38,6 +38,6 @@ class VolumetricDecal static const AABB &get_static_aabb(); private: - ImageAssetID tex; + AssetID tex; }; } diff --git a/renderer/material.hpp b/renderer/material.hpp index b7c4ce1e..c051a83d 100644 --- a/renderer/material.hpp +++ b/renderer/material.hpp @@ -85,19 +85,19 @@ struct Material { info = std::move(info_); - static const ImageClass image_classes[] = { - ImageClass::Color, - ImageClass::Normal, - ImageClass::MetallicRoughness, - ImageClass::Color, - ImageClass::Color, + static const AssetClass image_classes[] = { + AssetClass::ImageColor, + AssetClass::ImageNormal, + AssetClass::ImageMetallicRoughness, + AssetClass::ImageColor, + AssetClass::ImageColor, }; for (unsigned i = 0; i < Util::ecast(TextureKind::Count); i++) { if (!info.paths[i].empty()) { - textures[i] = GRANITE_ASSET_MANAGER()->register_image_resource( + textures[i] = GRANITE_ASSET_MANAGER()->register_asset( *GRANITE_FILESYSTEM(), info.paths[i], image_classes[i]); } } @@ -116,7 +116,7 @@ struct Material return info; } - ImageAssetID textures[Util::ecast(TextureKind::Count)]; + AssetID textures[Util::ecast(TextureKind::Count)]; bool needs_emissive = false; uint32_t shader_variant = 0; diff --git a/renderer/mesh_util.cpp b/renderer/mesh_util.cpp index 69e0b682..f89f2981 100644 --- a/renderer/mesh_util.cpp +++ b/renderer/mesh_util.cpp @@ -890,8 +890,8 @@ SkyCylinder::SkyCylinder(const std::string &bg_path) { if (!bg_path.empty()) { - texture = GRANITE_ASSET_MANAGER()->register_image_resource( - *GRANITE_FILESYSTEM(), bg_path, ImageClass::Color); + texture = GRANITE_ASSET_MANAGER()->register_asset( + *GRANITE_FILESYSTEM(), bg_path, AssetClass::ImageColor); } EVENT_MANAGER_REGISTER_LATCH(SkyCylinder, on_device_created, on_device_destroyed, DeviceCreatedEvent); @@ -1056,12 +1056,12 @@ Skybox::Skybox(const std::string &bg_path) { if (!bg_path.empty()) { - texture = GRANITE_ASSET_MANAGER()->register_image_resource( - *GRANITE_FILESYSTEM(), bg_path, ImageClass::Color); + texture = GRANITE_ASSET_MANAGER()->register_asset( + *GRANITE_FILESYSTEM(), bg_path, AssetClass::ImageColor); } } -void Skybox::set_image(ImageAssetID skybox) +void Skybox::set_image(AssetID skybox) { texture = skybox; } @@ -1195,8 +1195,8 @@ static void texture_plane_render(CommandBuffer &cmd, const RenderQueueData *info TexturePlane::TexturePlane(const std::string &normal_path) { - normalmap = GRANITE_ASSET_MANAGER()->register_image_resource( - *GRANITE_FILESYSTEM(), normal_path, ImageClass::Normal); + normalmap = GRANITE_ASSET_MANAGER()->register_asset( + *GRANITE_FILESYSTEM(), normal_path, AssetClass::ImageNormal); } void TexturePlane::setup_render_pass_resources(RenderGraph &graph) diff --git a/renderer/mesh_util.hpp b/renderer/mesh_util.hpp index 3394f788..1b1b4fc1 100644 --- a/renderer/mesh_util.hpp +++ b/renderer/mesh_util.hpp @@ -195,7 +195,7 @@ class Skybox : public AbstractRenderable, public EventHandler { public: Skybox(const std::string &bg_path = ""); - void set_image(ImageAssetID skybox); + void set_image(AssetID skybox); void get_render_info(const RenderContext &context, const RenderInfoComponent *transform, RenderQueue &queue) const override; @@ -207,7 +207,7 @@ class Skybox : public AbstractRenderable, public EventHandler private: vec3 color = vec3(1.0f); - ImageAssetID texture; + AssetID texture; }; class SkyCylinder : public AbstractRenderable, public EventHandler @@ -231,7 +231,7 @@ class SkyCylinder : public AbstractRenderable, public EventHandler private: vec3 color = vec3(1.0f); float scale = 1.0f; - ImageAssetID texture; + AssetID texture; void on_device_created(const Vulkan::DeviceCreatedEvent &event); void on_device_destroyed(const Vulkan::DeviceCreatedEvent &event); @@ -284,7 +284,7 @@ class TexturePlane : public AbstractRenderable, public RenderPassCreator private: const Vulkan::ImageView *reflection = nullptr; const Vulkan::ImageView *refraction = nullptr; - ImageAssetID normalmap; + AssetID normalmap; RenderQueue internal_queue; vec3 position; diff --git a/renderer/post/smaa.cpp b/renderer/post/smaa.cpp index 8952fcb7..1912bc0f 100644 --- a/renderer/post/smaa.cpp +++ b/renderer/post/smaa.cpp @@ -145,10 +145,12 @@ void setup_smaa_postprocess(RenderGraph &graph, TemporalJitter &jitter, return true; }); - auto area = GRANITE_ASSET_MANAGER()->register_image_resource( - *GRANITE_FILESYSTEM(), "builtin://textures/smaa/area.gtx", ImageClass::Zeroable, AssetManager::persistent_prio()); - auto search = GRANITE_ASSET_MANAGER()->register_image_resource( - *GRANITE_FILESYSTEM(), "builtin://textures/smaa/search.gtx", ImageClass::Zeroable, AssetManager::persistent_prio()); + auto area = GRANITE_ASSET_MANAGER()->register_asset( + *GRANITE_FILESYSTEM(), "builtin://textures/smaa/area.gtx", AssetClass::ImageZeroable, + AssetManager::persistent_prio()); + auto search = GRANITE_ASSET_MANAGER()->register_asset( + *GRANITE_FILESYSTEM(), "builtin://textures/smaa/search.gtx", AssetClass::ImageZeroable, + AssetManager::persistent_prio()); smaa_weight.set_build_render_pass([&, area, search, edge = masked_edge, q = smaa_quality](Vulkan::CommandBuffer &cmd) { auto &input_image = graph.get_physical_texture_resource(weight_input_res); diff --git a/renderer/sprite.hpp b/renderer/sprite.hpp index bb2446ff..7b078e5b 100644 --- a/renderer/sprite.hpp +++ b/renderer/sprite.hpp @@ -61,8 +61,8 @@ struct SpriteRenderInfo struct Sprite : AbstractRenderable { DrawPipeline pipeline = DrawPipeline::Opaque; - ImageAssetID texture; - ImageAssetID texture_alt; + AssetID texture; + AssetID texture_alt; Vulkan::StockSampler sampler = Vulkan::StockSampler::LinearWrap; enum ShaderVariantFlagBits diff --git a/tests/asset_manager_test.cpp b/tests/asset_manager_test.cpp index 7f1bb3db..0a70e4ce 100644 --- a/tests/asset_manager_test.cpp +++ b/tests/asset_manager_test.cpp @@ -6,18 +6,18 @@ using namespace Granite; struct ActivationInterface final : AssetInstantiatorInterface { - uint64_t estimate_cost_image_resource(ImageAssetID, File &mapping) override + uint64_t estimate_cost_asset(AssetID, File &mapping) override { return mapping.get_size(); } - void instantiate_image_resource(AssetManager &manager, TaskGroup *, ImageAssetID id, File &mapping) override + void instantiate_asset(AssetManager &manager, TaskGroup *, AssetID id, File &mapping) override { LOGI("Instantiating ID: %u\n", id.id); manager.update_cost(id, mapping.get_size()); } - void release_image_resource(ImageAssetID id) override + void release_asset(AssetID id) override { LOGI("Releasing ID: %u\n", id.id); } @@ -54,29 +54,29 @@ int main() auto d = fs.open("tmp://d"); auto e = fs.open("tmp://e"); - auto id_a = manager.register_image_resource(std::move(a), ImageClass::Zeroable); - auto id_b = manager.register_image_resource(std::move(b), ImageClass::Zeroable); - auto id_c = manager.register_image_resource(std::move(c), ImageClass::Zeroable); - auto id_d = manager.register_image_resource(std::move(d), ImageClass::Zeroable); + auto id_a = manager.register_asset(std::move(a), AssetClass::ImageZeroable); + auto id_b = manager.register_asset(std::move(b), AssetClass::ImageZeroable); + auto id_c = manager.register_asset(std::move(c), AssetClass::ImageZeroable); + auto id_d = manager.register_asset(std::move(d), AssetClass::ImageZeroable); manager.set_asset_instantiator_interface(&iface); - auto id_e = manager.register_image_resource(std::move(e), ImageClass::Zeroable); + auto id_e = manager.register_asset(std::move(e), AssetClass::ImageZeroable); - manager.set_image_budget(25); - manager.set_image_budget_per_iteration(5); + manager.set_asset_budget(25); + manager.set_asset_budget_per_iteration(5); - manager.set_image_residency_priority(id_a, 1); - manager.set_image_residency_priority(id_b, 1); - manager.set_image_residency_priority(id_c, 1); - manager.set_image_residency_priority(id_d, 1); - manager.set_image_residency_priority(id_e, 2); + manager.set_asset_residency_priority(id_a, 1); + manager.set_asset_residency_priority(id_b, 1); + manager.set_asset_residency_priority(id_c, 1); + manager.set_asset_residency_priority(id_d, 1); + manager.set_asset_residency_priority(id_e, 2); manager.iterate(nullptr); LOGI("Cost: %u\n", unsigned(manager.get_current_total_consumed())); manager.iterate(nullptr); LOGI("Cost: %u\n", unsigned(manager.get_current_total_consumed())); - manager.set_image_residency_priority(id_e, 0); + manager.set_asset_residency_priority(id_e, 0); manager.iterate(nullptr); LOGI("Cost: %u\n", unsigned(manager.get_current_total_consumed())); - manager.set_image_budget(10); + manager.set_asset_budget(10); manager.iterate(nullptr); LOGI("Cost: %u\n", unsigned(manager.get_current_total_consumed())); } \ No newline at end of file diff --git a/tests/bandlimited_pixel_test.cpp b/tests/bandlimited_pixel_test.cpp index 4e9d34f1..83fa0f36 100644 --- a/tests/bandlimited_pixel_test.cpp +++ b/tests/bandlimited_pixel_test.cpp @@ -117,7 +117,8 @@ struct BandlimitedPixelTestApplication : Application, EventHandler { "BANDLIMITED_PIXEL_USE_TRANSCENDENTAL", 1 }, }); - auto texture = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), "assets://textures/sprite.png", ImageClass::Color); + auto texture = GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(), "assets://textures/sprite.png", + AssetClass::ImageColor); auto *view = cmd->get_device().get_resource_manager().get_image_view_blocking(texture); cmd->set_texture(2, 0, *view, mode == 0 ? StockSampler::NearestWrap : StockSampler::TrilinearWrap); diff --git a/tests/ui_sandbox.cpp b/tests/ui_sandbox.cpp index c2d7b444..faddfb44 100644 --- a/tests/ui_sandbox.cpp +++ b/tests/ui_sandbox.cpp @@ -50,8 +50,8 @@ UIApplication::UIApplication() window->show_title_bar(false); window->set_floating(false); window->set_background_color(vec4(0.0f, 1.0f, 0.0f, 1.0f)); - window->set_background_image(GRANITE_ASSET_MANAGER()->register_image_resource( - *GRANITE_FILESYSTEM(), "builtin://textures/checkerboard.png", ImageClass::Color)); + window->set_background_image(GRANITE_ASSET_MANAGER()->register_asset( + *GRANITE_FILESYSTEM(), "builtin://textures/checkerboard.png", AssetClass::ImageColor)); auto button = make_handle(); window->add_child(button); @@ -90,8 +90,8 @@ UIApplication::UIApplication() slider->show_value(false); slider->set_margin(5.0f); slider->show_tooltip(true); - slider->set_background_image(GRANITE_ASSET_MANAGER()->register_image_resource( - *GRANITE_FILESYSTEM(), "builtin://textures/checkerboard.png", ImageClass::Color)); + slider->set_background_image(GRANITE_ASSET_MANAGER()->register_asset( + *GRANITE_FILESYSTEM(), "builtin://textures/checkerboard.png", AssetClass::ImageColor)); slider->set_background_color(vec4(1.0f)); } @@ -111,8 +111,8 @@ UIApplication::UIApplication() sli.show_value(false); sli.set_margin(5.0f); sli.show_tooltip(true); - sli.set_background_image(GRANITE_ASSET_MANAGER()->register_image_resource( - *GRANITE_FILESYSTEM(), "builtin://textures/checkerboard.png", ImageClass::Color)); + sli.set_background_image(GRANITE_ASSET_MANAGER()->register_asset( + *GRANITE_FILESYSTEM(), "builtin://textures/checkerboard.png", AssetClass::ImageColor)); sli.set_background_color(vec4(1.0f)); } @@ -126,8 +126,8 @@ UIApplication::UIApplication() btn.set_text("Mjuu"); btn.set_toggled_font_color(vec4(0.0f, 1.0f, 0.0f, 1.0f)); btn.set_untoggled_font_color(vec4(1.0f, 0.0f, 0.0f, 1.0f)); - btn.set_background_image(GRANITE_ASSET_MANAGER()->register_image_resource( - *GRANITE_FILESYSTEM(), "builtin://textures/checkerboard.png", ImageClass::Color)); + btn.set_background_image(GRANITE_ASSET_MANAGER()->register_asset( + *GRANITE_FILESYSTEM(), "builtin://textures/checkerboard.png", AssetClass::ImageColor)); btn.set_background_color(vec4(1.0f)); } } diff --git a/tools/aa_bench.cpp b/tools/aa_bench.cpp index 21b73cc3..77df0433 100644 --- a/tools/aa_bench.cpp +++ b/tools/aa_bench.cpp @@ -27,7 +27,7 @@ class AABenchApplication : public Application, public EventHandler void on_swapchain_changed(const SwapchainParameterEvent &e); void on_swapchain_destroyed(const SwapchainParameterEvent &e); - ImageAssetID images[2] = {}; + AssetID images[2] = {}; RenderGraph graph; TemporalJitter jitter; RenderContext render_context; @@ -39,8 +39,12 @@ AABenchApplication::AABenchApplication(const std::string &input0, const std::str : input_path0(input0), input_path1(input1), scale(scale_) { type = string_to_post_antialiasing_type(method); - images[0] = input_path0.empty() ? ImageAssetID{} : GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), input_path0, ImageClass::Color); - images[1] = input_path1.empty() ? ImageAssetID{} : GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), input_path1, ImageClass::Color); + images[0] = input_path0.empty() ? AssetID{} : GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(), + input_path0, + AssetClass::ImageColor); + images[1] = input_path1.empty() ? AssetID{} : GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(), + input_path1, + AssetClass::ImageColor); EVENT_MANAGER_REGISTER_LATCH(AABenchApplication, on_swapchain_changed, on_swapchain_destroyed, SwapchainParameterEvent); EVENT_MANAGER_REGISTER_LATCH(AABenchApplication, on_device_created, on_device_destroyed, DeviceCreatedEvent); } diff --git a/tools/convert_cube_to_environment.cpp b/tools/convert_cube_to_environment.cpp index d1659cc8..857ae3ac 100644 --- a/tools/convert_cube_to_environment.cpp +++ b/tools/convert_cube_to_environment.cpp @@ -81,7 +81,7 @@ int main(int argc, char *argv[]) device.set_context(context); device.init_external_swapchain({ ImageHandle(nullptr) }); - auto cube = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), args.cube, ImageClass::Color); + auto cube = GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(), args.cube, AssetClass::ImageColor); auto *view = device.get_resource_manager().get_image_view_blocking(cube); auto specular = convert_cube_to_ibl_specular(device, *view); auto diffuse = convert_cube_to_ibl_diffuse(device, *view); diff --git a/tools/convert_equirect_to_environment.cpp b/tools/convert_equirect_to_environment.cpp index 7b6252d3..48856159 100644 --- a/tools/convert_equirect_to_environment.cpp +++ b/tools/convert_equirect_to_environment.cpp @@ -86,7 +86,8 @@ int main(int argc, char *argv[]) device.init_external_swapchain({ ImageHandle(nullptr) }); auto &textures = device.get_resource_manager(); - auto equirect = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), args.equirect, ImageClass::Color); + auto equirect = GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(), args.equirect, + AssetClass::ImageColor); auto *view = textures.get_image_view_blocking(equirect); auto cube = convert_equirect_to_cube(device, *view, args.cube_scale); diff --git a/tools/texture_viewer.cpp b/tools/texture_viewer.cpp index 6cd40d83..61704918 100644 --- a/tools/texture_viewer.cpp +++ b/tools/texture_viewer.cpp @@ -37,8 +37,8 @@ struct TextureViewerApplication : Granite::Application, Granite::EventHandler TextureViewerApplication(std::string path_) : path(std::move(path_)) { - texture = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), - path, ImageClass::Color); + texture = GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(), + path, AssetClass::ImageColor); EVENT_MANAGER_REGISTER(TextureViewerApplication, on_key_pressed, KeyboardEvent); } @@ -138,7 +138,7 @@ struct TextureViewerApplication : Granite::Application, Granite::EventHandler unsigned layer = 0; unsigned level = 0; - ImageAssetID texture; + AssetID texture; std::string path; VkComponentMapping swiz = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A }; }; diff --git a/ui/image_widget.cpp b/ui/image_widget.cpp index 687bae07..67ca4a75 100644 --- a/ui/image_widget.cpp +++ b/ui/image_widget.cpp @@ -33,9 +33,9 @@ namespace UI { Image::Image(const std::string &path, vec2 target) { - texture = GRANITE_ASSET_MANAGER()->register_image_resource( + texture = GRANITE_ASSET_MANAGER()->register_asset( *GRANITE_FILESYSTEM(), path, - ImageClass::Color); + AssetClass::ImageColor); geometry.minimum = target; geometry.target = target; diff --git a/ui/image_widget.hpp b/ui/image_widget.hpp index 39575f31..70f2ecd9 100644 --- a/ui/image_widget.hpp +++ b/ui/image_widget.hpp @@ -45,7 +45,7 @@ class Image : public Widget private: float render(FlatRenderer &renderer, float layout, vec2 offset, vec2 size) override; void reconfigure_to_canvas(vec2 offset, vec2 size) override; - ImageAssetID texture; + AssetID texture; Vulkan::StockSampler sampler = Vulkan::StockSampler::LinearClamp; vec2 sprite_offset; diff --git a/ui/widget.hpp b/ui/widget.hpp index e13a7b3e..5ede6134 100644 --- a/ui/widget.hpp +++ b/ui/widget.hpp @@ -129,7 +129,7 @@ class Widget : public Util::IntrusivePtrEnabled needs_redraw = true; } - void set_background_image(ImageAssetID texture) + void set_background_image(AssetID texture) { bg_image = texture; needs_redraw = true; @@ -181,7 +181,7 @@ class Widget : public Util::IntrusivePtrEnabled vec2 floating_position = vec2(0.0f); vec4 bg_color = vec4(1.0f, 1.0f, 1.0f, 0.0f); - ImageAssetID bg_image; + AssetID bg_image; bool needs_redraw = true; bool floating = false; diff --git a/vulkan/managers/resource_manager.cpp b/vulkan/managers/resource_manager.cpp index 073d66ee..8b2fc788 100644 --- a/vulkan/managers/resource_manager.cpp +++ b/vulkan/managers/resource_manager.cpp @@ -45,27 +45,27 @@ ResourceManager::~ResourceManager() void ResourceManager::set_id_bounds(uint32_t bound) { - textures.resize(bound); + assets.resize(bound); views.resize(bound); } -void ResourceManager::set_image_class(Granite::ImageAssetID id, Granite::ImageClass image_class) +void ResourceManager::set_asset_class(Granite::AssetID id, Granite::AssetClass asset_class) { if (id) { - textures[id.id].image_class = image_class; + assets[id.id].asset_class = asset_class; if (!views[id.id]) - views[id.id] = &get_fallback_image(image_class)->get_view(); + views[id.id] = &get_fallback_image(asset_class)->get_view(); } } -void ResourceManager::release_image_resource(Granite::ImageAssetID id) +void ResourceManager::release_asset(Granite::AssetID id) { if (id) - textures[id.id].image.reset(); + assets[id.id].image.reset(); } -uint64_t ResourceManager::estimate_cost_image_resource(Granite::ImageAssetID, Granite::File &file) +uint64_t ResourceManager::estimate_cost_asset(Granite::AssetID, Granite::File &file) { // TODO: When we get compressed BC/ASTC, this will have to change. return file.get_size(); @@ -116,14 +116,14 @@ void ResourceManager::init() } LOGI("Using texture budget of %u MiB.\n", unsigned(size / (1024 * 1024))); - manager->set_image_budget(size); + manager->set_asset_budget(size); // This is somewhat arbitrary. - manager->set_image_budget_per_iteration(2 * 1000 * 1000); + manager->set_asset_budget_per_iteration(2 * 1000 * 1000); } } -ImageHandle ResourceManager::create_gtx(const MemoryMappedTexture &mapped_file, Granite::ImageAssetID id) +ImageHandle ResourceManager::create_gtx(const MemoryMappedTexture &mapped_file, Granite::AssetID id) { if (mapped_file.empty()) return {}; @@ -189,13 +189,13 @@ ImageHandle ResourceManager::create_gtx(const MemoryMappedTexture &mapped_file, if (image) { - auto name = Util::join("ImageAssetID-", id.id); + auto name = Util::join("AssetID-", id.id); device->set_name(*image, name.c_str()); } return image; } -ImageHandle ResourceManager::create_gtx(Granite::FileMappingHandle mapping, Granite::ImageAssetID id) +ImageHandle ResourceManager::create_gtx(Granite::FileMappingHandle mapping, Granite::AssetID id) { MemoryMappedTexture mapped_file; if (!mapped_file.map_read(std::move(mapping))) @@ -207,27 +207,27 @@ ImageHandle ResourceManager::create_gtx(Granite::FileMappingHandle mapping, Gran return create_gtx(mapped_file, id); } -ImageHandle ResourceManager::create_other(const Granite::FileMapping &mapping, Granite::ImageClass image_class, - Granite::ImageAssetID id) +ImageHandle ResourceManager::create_other(const Granite::FileMapping &mapping, Granite::AssetClass asset_class, + Granite::AssetID id) { auto tex = load_texture_from_memory(mapping.data(), - mapping.get_size(), image_class == Granite::ImageClass::Color ? + mapping.get_size(), asset_class == Granite::AssetClass::ImageColor ? ColorSpace::sRGB : ColorSpace::Linear); return create_gtx(tex, id); } -const Vulkan::ImageView *ResourceManager::get_image_view_blocking(Granite::ImageAssetID id) +const Vulkan::ImageView *ResourceManager::get_image_view_blocking(Granite::AssetID id) { std::unique_lock holder{lock}; - if (id.id >= textures.size()) + if (id.id >= assets.size()) { LOGE("ID %u is out of bounds.\n", id.id); return nullptr; } - if (textures[id.id].image) - return &textures[id.id].image->get_view(); + if (assets[id.id].image) + return &assets[id.id].image->get_view(); if (!manager->iterate_blocking(*device->get_system_handles().thread_group, id)) { @@ -236,30 +236,30 @@ const Vulkan::ImageView *ResourceManager::get_image_view_blocking(Granite::Image } cond.wait(holder, [this, id]() -> bool { - return bool(textures[id.id].image); + return bool(assets[id.id].image); }); - return &textures[id.id].image->get_view(); + return &assets[id.id].image->get_view(); } -void ResourceManager::instantiate_image_resource(Granite::AssetManager &manager_, Granite::TaskGroup *task, - Granite::ImageAssetID id, Granite::File &file) +void ResourceManager::instantiate_asset(Granite::AssetManager &manager_, Granite::TaskGroup *task, + Granite::AssetID id, Granite::File &file) { if (task) { task->enqueue_task([this, &manager_, &file, id]() { - instantiate_image_resource(manager_, id, file); + instantiate_asset(manager_, id, file); }); } else { - instantiate_image_resource(manager_, id, file); + instantiate_asset(manager_, id, file); } } -void ResourceManager::instantiate_image_resource(Granite::AssetManager &manager_, - Granite::ImageAssetID id, - Granite::File &file) +void ResourceManager::instantiate_asset(Granite::AssetManager &manager_, + Granite::AssetID id, + Granite::File &file) { ImageHandle image; if (file.get_size()) @@ -270,7 +270,7 @@ void ResourceManager::instantiate_image_resource(Granite::AssetManager &manager_ if (MemoryMappedTexture::is_header(mapping->data(), mapping->get_size())) image = create_gtx(std::move(mapping), id); else - image = create_other(*mapping, textures[id.id].image_class, id); + image = create_other(*mapping, assets[id.id].asset_class, id); } else LOGE("Failed to map file.\n"); @@ -280,26 +280,26 @@ void ResourceManager::instantiate_image_resource(Granite::AssetManager &manager_ // Have to signal something. if (!image) - image = get_fallback_image(textures[id.id].image_class); + image = get_fallback_image(assets[id.id].asset_class); std::lock_guard holder{lock}; updates.push_back(id); - textures[id.id].image = std::move(image); + assets[id.id].image = std::move(image); cond.notify_all(); } -const ImageHandle &ResourceManager::get_fallback_image(Granite::ImageClass image_class) +const ImageHandle &ResourceManager::get_fallback_image(Granite::AssetClass asset_class) { - switch (image_class) + switch (asset_class) { default: - case Granite::ImageClass::Zeroable: + case Granite::AssetClass::ImageZeroable: return fallback_zero; - case Granite::ImageClass::Color: + case Granite::AssetClass::ImageColor: return fallback_color; - case Granite::ImageClass::Normal: + case Granite::AssetClass::ImageNormal: return fallback_normal; - case Granite::ImageClass::MetallicRoughness: + case Granite::AssetClass::ImageMetallicRoughness: return fallback_pbr; } } @@ -314,13 +314,13 @@ void ResourceManager::latch_handles() const ImageView *view; - if (textures[update.id].image) + if (assets[update.id].image) { - view = &textures[update.id].image->get_view(); + view = &assets[update.id].image->get_view(); } else { - auto &img = get_fallback_image(textures[update.id].image_class); + auto &img = get_fallback_image(assets[update.id].asset_class); view = &img->get_view(); } diff --git a/vulkan/managers/resource_manager.hpp b/vulkan/managers/resource_manager.hpp index 3c129491..ebb2e2dd 100644 --- a/vulkan/managers/resource_manager.hpp +++ b/vulkan/managers/resource_manager.hpp @@ -38,7 +38,7 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface ~ResourceManager() override; void init(); - inline const Vulkan::ImageView *get_image_view(Granite::ImageAssetID id) const + inline const Vulkan::ImageView *get_image_view(Granite::AssetID id) const { if (id.id < views.size()) return views[id.id]; @@ -46,43 +46,43 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface return nullptr; } - const Vulkan::ImageView *get_image_view_blocking(Granite::ImageAssetID id); + const Vulkan::ImageView *get_image_view_blocking(Granite::AssetID id); private: Device *device; Granite::AssetManager *manager = nullptr; void latch_handles() override; - uint64_t estimate_cost_image_resource(Granite::ImageAssetID id, Granite::File &file) override; - void instantiate_image_resource(Granite::AssetManager &manager, Granite::TaskGroup *task, - Granite::ImageAssetID id, Granite::File &file) override; - void release_image_resource(Granite::ImageAssetID id) override; + uint64_t estimate_cost_asset(Granite::AssetID id, Granite::File &file) override; + void instantiate_asset(Granite::AssetManager &manager, Granite::TaskGroup *task, + Granite::AssetID id, Granite::File &file) override; + void release_asset(Granite::AssetID id) override; void set_id_bounds(uint32_t bound) override; - void set_image_class(Granite::ImageAssetID id, Granite::ImageClass image_class) override; + void set_asset_class(Granite::AssetID id, Granite::AssetClass asset_class) override; struct Texture { ImageHandle image; - Granite::ImageClass image_class = Granite::ImageClass::Zeroable; + Granite::AssetClass asset_class = Granite::AssetClass::ImageZeroable; }; std::mutex lock; std::condition_variable cond; - std::vector textures; + std::vector assets; std::vector views; - std::vector updates; + std::vector updates; ImageHandle fallback_color; ImageHandle fallback_normal; ImageHandle fallback_zero; ImageHandle fallback_pbr; - ImageHandle create_gtx(Granite::FileMappingHandle mapping, Granite::ImageAssetID id); - ImageHandle create_gtx(const MemoryMappedTexture &mapping, Granite::ImageAssetID id); - ImageHandle create_other(const Granite::FileMapping &mapping, Granite::ImageClass image_class, Granite::ImageAssetID id); - const ImageHandle &get_fallback_image(Granite::ImageClass image_class); + ImageHandle create_gtx(Granite::FileMappingHandle mapping, Granite::AssetID id); + ImageHandle create_gtx(const MemoryMappedTexture &mapping, Granite::AssetID id); + ImageHandle create_other(const Granite::FileMapping &mapping, Granite::AssetClass asset_class, Granite::AssetID id); + const ImageHandle &get_fallback_image(Granite::AssetClass asset_class); - void instantiate_image_resource(Granite::AssetManager &manager, Granite::ImageAssetID id, Granite::File &file); + void instantiate_asset(Granite::AssetManager &manager, Granite::AssetID id, Granite::File &file); }; } From 2ffdacf1929a84078a01a2ef52954055def5cd1a Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 4 Aug 2023 11:51:56 +0200 Subject: [PATCH 57/71] Start hooking up global IBO/VBO allocator. --- util/arena_allocator.hpp | 20 ++-- vulkan/managers/resource_manager.cpp | 133 ++++++++++++++++++++++++++- vulkan/managers/resource_manager.hpp | 62 +++++++++++++ vulkan/memory_allocator.cpp | 5 +- vulkan/memory_allocator.hpp | 3 +- 5 files changed, 207 insertions(+), 16 deletions(-) diff --git a/util/arena_allocator.hpp b/util/arena_allocator.hpp index 7a923240..6c810ced 100644 --- a/util/arena_allocator.hpp +++ b/util/arena_allocator.hpp @@ -98,6 +98,13 @@ struct AllocationArena uint32_t heap_availability_mask = 0; }; +struct SuballocationResult +{ + uint32_t offset; + uint32_t size; + uint32_t mask; +}; + template class ArenaAllocator { @@ -149,7 +156,7 @@ class ArenaAllocator assert(index >= (num_blocks - 1)); auto &heap = *itr; - static_cast(this)->prepare_allocation(alloc, heap, suballocate(num_blocks, heap)); + static_cast(this)->prepare_allocation(alloc, itr, suballocate(num_blocks, heap)); unsigned new_index = heap.heap.get_longest_run() - 1; @@ -168,7 +175,6 @@ class ArenaAllocator heap_arena.heap_availability_mask &= ~(1u << index); } - alloc->heap = itr; return true; } @@ -186,9 +192,8 @@ class ArenaAllocator } // This cannot fail. - static_cast(this)->prepare_allocation(alloc, heap, suballocate(num_blocks, heap)); + static_cast(this)->prepare_allocation(alloc, node, suballocate(num_blocks, heap)); - alloc->heap = node; if (heap.heap.full()) { heap_arena.full_heaps.insert_front(node); @@ -254,13 +259,6 @@ class ArenaAllocator uint32_t sub_block_size = 1; uint32_t sub_block_size_log2 = 0; - struct SuballocationResult - { - uint32_t offset; - uint32_t size; - uint32_t mask; - }; - private: inline SuballocationResult suballocate(uint32_t num_blocks, MiniHeap &heap) { diff --git a/vulkan/managers/resource_manager.cpp b/vulkan/managers/resource_manager.cpp index 8b2fc788..80d4d3aa 100644 --- a/vulkan/managers/resource_manager.cpp +++ b/vulkan/managers/resource_manager.cpp @@ -32,7 +32,7 @@ namespace Vulkan { ResourceManager::ResourceManager(Device *device_) - : device(device_) + : device(device_), index_buffer_allocator(*device_) { } @@ -133,7 +133,7 @@ ImageHandle ResourceManager::create_gtx(const MemoryMappedTexture &mapped_file, VkComponentMapping swizzle = {}; mapped_file.remap_swizzle(swizzle); - Vulkan::ImageHandle image; + ImageHandle image; if (!device->image_format_is_supported(layout.get_format(), VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) && format_compression_type(layout.get_format()) != FormatCompressionType::Uncompressed) { @@ -216,7 +216,7 @@ ImageHandle ResourceManager::create_other(const Granite::FileMapping &mapping, G return create_gtx(tex, id); } -const Vulkan::ImageView *ResourceManager::get_image_view_blocking(Granite::AssetID id) +const ImageView *ResourceManager::get_image_view_blocking(Granite::AssetID id) { std::unique_lock holder{lock}; @@ -328,4 +328,131 @@ void ResourceManager::latch_handles() } updates.clear(); } + +MeshBufferAllocator::MeshBufferAllocator(Device &device) + : global_allocator(device) +{ + for (int i = 0; i < SliceAllocatorCount - 1; i++) + allocators[i].parent = &allocators[i + 1]; + allocators[SliceAllocatorCount - 1].global_allocator = &global_allocator; + + // Basic unit of a meshlet is 256 prims / attributes. + // Maximum element count = 32M prims. + allocators[0].sub_block_size = 256; + for (int i = 1; i < SliceAllocatorCount; i++) + allocators[i].sub_block_size = allocators[i - 1].sub_block_size * (Util::LegionAllocator::NumSubBlocks / 2); +} + +void MeshBufferAllocator::set_element_size(uint32_t element_size) +{ + global_allocator.set_element_size(element_size); +} + +namespace Internal +{ +uint32_t MeshGlobalAllocator::allocate(uint32_t count) +{ + BufferCreateInfo info = {}; + info.size = VkDeviceSize(count) * element_size; + info.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + info.domain = BufferDomain::Device; + auto buf = device.create_buffer(info); + + for (uint32_t i = 0, n = global_buffers.size(); i < n; i++) + { + if (!global_buffers[i]) + { + global_buffers[i] = std::move(buf); + return i; + } + } + + // For now, have one global buffer for VBO / IBO. + if (!global_buffers.empty()) + return UINT32_MAX; + + uint32_t ret = global_buffers.size(); + global_buffers.push_back(std::move(buf)); + return ret; +} + +void MeshGlobalAllocator::set_element_size(uint32_t element_size_) +{ + element_size = element_size_; +} + +void MeshGlobalAllocator::free(uint32_t index) +{ + VK_ASSERT(index < global_buffers.size()); + global_buffers[index].reset(); +} + +MeshGlobalAllocator::MeshGlobalAllocator(Device &device_) + : device(device_) +{} + +bool SliceAllocator::allocate_backing_heap(AllocatedSlice *allocation) +{ + uint32_t count = sub_block_size * Util::LegionAllocator::NumSubBlocks; + + if (parent) + { + return parent->allocate(count, allocation); + } + else if (global_allocator) + { + uint32_t index = global_allocator->allocate(count); + if (index == UINT32_MAX) + return false; + + *allocation = {}; + allocation->count = count; + allocation->buffer_index = index; + return true; + } + else + { + return false; + } +} + +void SliceAllocator::free_backing_heap(AllocatedSlice *allocation) +{ + if (parent) + parent->free(allocation->heap, allocation->mask); + else if (global_allocator) + global_allocator->free(allocation->buffer_index); +} + +void SliceAllocator::prepare_allocation(AllocatedSlice *allocation, Util::IntrusiveList::Iterator heap, + const Util::SuballocationResult &suballoc) +{ + allocation->buffer_index = heap->allocation.buffer_index; + allocation->offset = heap->allocation.offset + suballoc.offset; + allocation->count = suballoc.size; + allocation->mask = suballoc.mask; + allocation->heap = heap; + allocation->alloc = this; +} +} + +bool MeshBufferAllocator::allocate(uint32_t count, Internal::AllocatedSlice *slice) +{ + for (auto &alloc : allocators) + if (count <= alloc.get_max_allocation_size()) + return alloc.allocate(count, slice); + + LOGE("Allocation of %u elements is too large for MeshBufferAllocator.\n", count); + return false; +} + +void MeshBufferAllocator::free(const Internal::AllocatedSlice &slice) +{ + if (slice.alloc) + slice.alloc->free(slice.heap, slice.mask); + else + global_allocator.free(slice.buffer_index); +} } diff --git a/vulkan/managers/resource_manager.hpp b/vulkan/managers/resource_manager.hpp index ebb2e2dd..ea7cf2da 100644 --- a/vulkan/managers/resource_manager.hpp +++ b/vulkan/managers/resource_manager.hpp @@ -23,7 +23,10 @@ #pragma once #include "image.hpp" +#include "buffer.hpp" #include "asset_manager.hpp" +#include "arena_allocator.hpp" +#include "small_vector.hpp" #include #include @@ -31,6 +34,63 @@ namespace Vulkan { class MemoryMappedTexture; +namespace Internal +{ +struct SliceAllocator; +struct AllocatedSlice +{ + uint32_t buffer_index = 0; + uint32_t offset = 0; + uint32_t count = 0; + uint32_t mask = 0; + + SliceAllocator *alloc = nullptr; + Util::IntrusiveList>::Iterator heap = {}; +}; + +class MeshGlobalAllocator +{ +public: + explicit MeshGlobalAllocator(Device &device); + void set_element_size(uint32_t element_size); + uint32_t allocate(uint32_t count); + void free(uint32_t index); + +private: + Device &device; + uint32_t element_size = 0; + Util::SmallVector global_buffers; +}; + +struct SliceAllocator : Util::ArenaAllocator +{ + SliceAllocator *parent = nullptr; + MeshGlobalAllocator *global_allocator = nullptr; + uint32_t sub_block_size = 0; + + // Implements curious recurring template pattern calls. + bool allocate_backing_heap(AllocatedSlice *allocation); + void free_backing_heap(AllocatedSlice *allocation); + void prepare_allocation(AllocatedSlice *allocation, Util::IntrusiveList::Iterator heap, + const Util::SuballocationResult &suballoc); +}; +} + +class MeshBufferAllocator +{ +public: + explicit MeshBufferAllocator(Device &device); + bool allocate(uint32_t count, Internal::AllocatedSlice *slice); + void free(const Internal::AllocatedSlice &slice); + void set_element_size(uint32_t element_size); + +private: + Util::ObjectPool> object_pool; + Internal::MeshGlobalAllocator global_allocator; + enum { SliceAllocatorCount = 4 }; + Internal::SliceAllocator allocators[SliceAllocatorCount]; +}; + class ResourceManager final : private Granite::AssetInstantiatorInterface { public: @@ -84,5 +144,7 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface const ImageHandle &get_fallback_image(Granite::AssetClass asset_class); void instantiate_asset(Granite::AssetManager &manager, Granite::AssetID id, Granite::File &file); + + MeshBufferAllocator index_buffer_allocator; }; } diff --git a/vulkan/memory_allocator.cpp b/vulkan/memory_allocator.cpp index f938a4bf..bc156863 100644 --- a/vulkan/memory_allocator.cpp +++ b/vulkan/memory_allocator.cpp @@ -123,8 +123,11 @@ void DeviceAllocation::free_global(DeviceAllocator &allocator, uint32_t size_, u } } -void ClassAllocator::prepare_allocation(DeviceAllocation *alloc, MiniHeap &heap, const SuballocationResult &suballoc) +void ClassAllocator::prepare_allocation(DeviceAllocation *alloc, Util::IntrusiveList::Iterator heap_itr, + const Util::SuballocationResult &suballoc) { + auto &heap = *heap_itr; + alloc->heap = heap_itr; alloc->base = heap.allocation.base; alloc->offset = suballoc.offset + heap.allocation.offset; alloc->mask = suballoc.mask; diff --git a/vulkan/memory_allocator.hpp b/vulkan/memory_allocator.hpp index b5525617..38b1f094 100644 --- a/vulkan/memory_allocator.hpp +++ b/vulkan/memory_allocator.hpp @@ -196,7 +196,8 @@ class ClassAllocator : public Util::ArenaAllocator::Iterator heap_itr, + const Util::SuballocationResult &suballoc); }; class Allocator From 1d1bd82029ee2ca232c6c49417cf4afb73a81765 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 4 Aug 2023 12:16:36 +0200 Subject: [PATCH 58/71] Hook up more of the mesh asset ID. --- vulkan/managers/resource_manager.cpp | 34 ++++++++++++++++++++++++++-- vulkan/managers/resource_manager.hpp | 15 ++++++++---- 2 files changed, 43 insertions(+), 6 deletions(-) diff --git a/vulkan/managers/resource_manager.cpp b/vulkan/managers/resource_manager.cpp index 80d4d3aa..11c95859 100644 --- a/vulkan/managers/resource_manager.cpp +++ b/vulkan/managers/resource_manager.cpp @@ -32,8 +32,15 @@ namespace Vulkan { ResourceManager::ResourceManager(Device *device_) - : device(device_), index_buffer_allocator(*device_) + : device(device_) + , index_buffer_allocator(*device_) + , position_buffer_allocator(*device_) + , attribute_buffer_allocator(*device_) { + // Simplified style. + index_buffer_allocator.set_element_size(sizeof(uint32_t) * 3); + position_buffer_allocator.set_element_size(sizeof(float) * 3); + attribute_buffer_allocator.set_element_size(sizeof(float) * 2 + sizeof(uint32_t) * 2); } ResourceManager::~ResourceManager() @@ -329,6 +336,21 @@ void ResourceManager::latch_handles() updates.clear(); } +const Buffer *ResourceManager::get_index_buffer() const +{ + return index_buffer_allocator.get_buffer(0); +} + +const Buffer *ResourceManager::get_position_buffer() const +{ + return position_buffer_allocator.get_buffer(0); +} + +const Buffer *ResourceManager::get_attribute_buffer() const +{ + return attribute_buffer_allocator.get_buffer(0); +} + MeshBufferAllocator::MeshBufferAllocator(Device &device) : global_allocator(device) { @@ -348,6 +370,14 @@ void MeshBufferAllocator::set_element_size(uint32_t element_size) global_allocator.set_element_size(element_size); } +const Buffer *MeshBufferAllocator::get_buffer(unsigned index) const +{ + if (index < global_allocator.global_buffers.size()) + return global_allocator.global_buffers[index].get(); + else + return nullptr; +} + namespace Internal { uint32_t MeshGlobalAllocator::allocate(uint32_t count) @@ -418,7 +448,7 @@ bool SliceAllocator::allocate_backing_heap(AllocatedSlice *allocation) } } -void SliceAllocator::free_backing_heap(AllocatedSlice *allocation) +void SliceAllocator::free_backing_heap(AllocatedSlice *allocation) const { if (parent) parent->free(allocation->heap, allocation->mask); diff --git a/vulkan/managers/resource_manager.hpp b/vulkan/managers/resource_manager.hpp index ea7cf2da..591b85cf 100644 --- a/vulkan/managers/resource_manager.hpp +++ b/vulkan/managers/resource_manager.hpp @@ -48,15 +48,13 @@ struct AllocatedSlice Util::IntrusiveList>::Iterator heap = {}; }; -class MeshGlobalAllocator +struct MeshGlobalAllocator { -public: explicit MeshGlobalAllocator(Device &device); void set_element_size(uint32_t element_size); uint32_t allocate(uint32_t count); void free(uint32_t index); -private: Device &device; uint32_t element_size = 0; Util::SmallVector global_buffers; @@ -70,7 +68,7 @@ struct SliceAllocator : Util::ArenaAllocator // Implements curious recurring template pattern calls. bool allocate_backing_heap(AllocatedSlice *allocation); - void free_backing_heap(AllocatedSlice *allocation); + void free_backing_heap(AllocatedSlice *allocation) const; void prepare_allocation(AllocatedSlice *allocation, Util::IntrusiveList::Iterator heap, const Util::SuballocationResult &suballoc); }; @@ -84,6 +82,8 @@ class MeshBufferAllocator void free(const Internal::AllocatedSlice &slice); void set_element_size(uint32_t element_size); + const Buffer *get_buffer(unsigned index) const; + private: Util::ObjectPool> object_pool; Internal::MeshGlobalAllocator global_allocator; @@ -108,6 +108,11 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface const Vulkan::ImageView *get_image_view_blocking(Granite::AssetID id); + VkDrawIndexedIndirectCommand get_mesh_indexed_draw(Granite::AssetID id) const; + const Buffer *get_index_buffer() const; + const Buffer *get_position_buffer() const; + const Buffer *get_attribute_buffer() const; + private: Device *device; Granite::AssetManager *manager = nullptr; @@ -146,5 +151,7 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface void instantiate_asset(Granite::AssetManager &manager, Granite::AssetID id, Granite::File &file); MeshBufferAllocator index_buffer_allocator; + MeshBufferAllocator position_buffer_allocator; + MeshBufferAllocator attribute_buffer_allocator; }; } From e84ef45ca573b68951411bd5aef4d8e2792ca286 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 4 Aug 2023 14:20:36 +0200 Subject: [PATCH 59/71] Preallocate asset banks. --- filesystem/asset_manager.cpp | 27 +++++++++++++++------------ filesystem/asset_manager.hpp | 7 +++++-- vulkan/managers/resource_manager.cpp | 26 +++++++++++++++++++++++--- vulkan/managers/resource_manager.hpp | 9 +++++++-- 4 files changed, 50 insertions(+), 19 deletions(-) diff --git a/filesystem/asset_manager.cpp b/filesystem/asset_manager.cpp index c591e45c..6e3b1f1b 100644 --- a/filesystem/asset_manager.cpp +++ b/filesystem/asset_manager.cpp @@ -29,6 +29,8 @@ namespace Granite { AssetManager::AssetManager() { + asset_bank.reserve(AssetID::MaxIDs); + sorted_assets.reserve(AssetID::MaxIDs); signal = std::make_unique(); for (uint64_t i = 0; i < timestamp; i++) signal->signal_increment(); @@ -36,21 +38,21 @@ AssetManager::AssetManager() AssetManager::~AssetManager() { + set_asset_instantiator_interface(nullptr); signal->wait_until_at_least(timestamp); - for (auto *a : asset_bank) - pool.free(a); + for (uint32_t i = 0; i < id_count; i++) + pool.free(asset_bank[i]); } AssetID AssetManager::register_asset_nolock(FileHandle file, AssetClass asset_class, int prio) { auto *info = pool.allocate(); info->handle = std::move(file); - info->id.id = id_count++; + info->id.id = id_count; info->prio = prio; info->asset_class = asset_class; AssetID ret = info->id; - asset_bank.push_back(info); - sorted_assets.reserve(asset_bank.size()); + asset_bank[id_count++] = info; if (iface) { iface->set_id_bounds(id_count); @@ -103,8 +105,9 @@ void AssetManager::set_asset_instantiator_interface(AssetInstantiatorInterface * iface->release_asset(AssetID{id}); } - for (auto *a : asset_bank) + for (uint32_t i = 0; i < id_count; i++) { + auto *a = asset_bank[i]; a->consumed = 0; a->pending_consumed = 0; a->last_used = 0; @@ -138,7 +141,7 @@ void AssetManager::set_asset_budget_per_iteration(uint64_t cost) bool AssetManager::set_asset_residency_priority(AssetID id, int prio) { std::lock_guard holder{asset_bank_lock}; - if (id.id >= asset_bank.size()) + if (id.id >= id_count) return false; asset_bank[id.id]->prio = prio; return true; @@ -146,7 +149,7 @@ bool AssetManager::set_asset_residency_priority(AssetID id, int prio) void AssetManager::adjust_update(const CostUpdate &update) { - if (update.id.id < asset_bank.size()) + if (update.id.id < id_count) { auto *a = asset_bank[update.id.id]; total_consumed += update.cost - (a->consumed + a->pending_consumed); @@ -180,7 +183,7 @@ void AssetManager::update_lru_locked_assets() { lru_append.for_each_ranged([this](const AssetID *id, size_t count) { for (size_t i = 0; i < count; i++) - if (id[i].id < asset_bank.size()) + if (id[i].id < id_count) asset_bank[id[i].id]->last_used = timestamp; }); lru_append.clear(); @@ -251,8 +254,8 @@ void AssetManager::iterate(ThreadGroup *group) update_costs_locked_assets(); update_lru_locked_assets(); - sorted_assets = asset_bank; - std::sort(sorted_assets.begin(), sorted_assets.end(), [](const AssetInfo *a, const AssetInfo *b) -> bool { + memcpy(sorted_assets.data(), asset_bank.data(), id_count * sizeof(sorted_assets[0])); + std::sort(sorted_assets.data(), sorted_assets.data() + id_count, [](const AssetInfo *a, const AssetInfo *b) -> bool { // High prios come first since they will be activated. // Then we sort by LRU. // High consumption should be moved last, so they are candidates to be paged out if we're over budget. @@ -272,7 +275,7 @@ void AssetManager::iterate(ThreadGroup *group) return a->id.id < b->id.id; }); - size_t release_index = sorted_assets.size(); + size_t release_index = id_count; uint64_t activated_cost_this_iteration = 0; unsigned activation_count = 0; size_t activate_index = 0; diff --git a/filesystem/asset_manager.hpp b/filesystem/asset_manager.hpp index ebbc8ea0..c672995e 100644 --- a/filesystem/asset_manager.hpp +++ b/filesystem/asset_manager.hpp @@ -26,6 +26,7 @@ #include "filesystem.hpp" #include "object_pool.hpp" #include "intrusive_hash_map.hpp" +#include "dynamic_array.hpp" #include #include #include @@ -35,6 +36,7 @@ namespace Granite struct AssetID { uint32_t id = uint32_t(-1); + enum { MaxIDs = 1u << 20 }; AssetID() = default; explicit AssetID(uint32_t id_) : id{id_} {} explicit inline operator bool() const { return id != uint32_t(-1); } @@ -80,6 +82,7 @@ class AssetInstantiatorInterface // Will only be called after an upload completes through manager.update_cost(). virtual void release_asset(AssetID id) = 0; + virtual void set_id_bounds(uint32_t bound) = 0; virtual void set_asset_class(AssetID id, AssetClass asset_class); @@ -138,9 +141,9 @@ class AssetManager final : public AssetManagerInterface int prio = 0; }; - std::vector sorted_assets; + Util::DynamicArray sorted_assets; + Util::DynamicArray asset_bank; std::mutex asset_bank_lock; - std::vector asset_bank; Util::ObjectPool pool; Util::AtomicAppendBuffer lru_append; Util::IntrusiveHashMapHolder file_to_assets; diff --git a/vulkan/managers/resource_manager.cpp b/vulkan/managers/resource_manager.cpp index 11c95859..175c44cc 100644 --- a/vulkan/managers/resource_manager.cpp +++ b/vulkan/managers/resource_manager.cpp @@ -41,6 +41,7 @@ ResourceManager::ResourceManager(Device *device_) index_buffer_allocator.set_element_size(sizeof(uint32_t) * 3); position_buffer_allocator.set_element_size(sizeof(float) * 3); attribute_buffer_allocator.set_element_size(sizeof(float) * 2 + sizeof(uint32_t) * 2); + assets.reserve(Granite::AssetID::MaxIDs); } ResourceManager::~ResourceManager() @@ -52,6 +53,8 @@ ResourceManager::~ResourceManager() void ResourceManager::set_id_bounds(uint32_t bound) { + // We must avoid reallocation here to avoid a ton of extra silly locking. + VK_ASSERT(bound <= Granite::AssetID::MaxIDs); assets.resize(bound); views.resize(bound); } @@ -69,7 +72,22 @@ void ResourceManager::set_asset_class(Granite::AssetID id, Granite::AssetClass a void ResourceManager::release_asset(Granite::AssetID id) { if (id) - assets[id.id].image.reset(); + { + auto &a = assets[id.id]; + if (a.asset_class == Granite::AssetClass::Mesh) + { + if (a.mesh.index.count) + { + std::lock_guard holder{mesh_allocator_lock}; + index_buffer_allocator.free(a.mesh.index); + position_buffer_allocator.free(a.mesh.pos); + attribute_buffer_allocator.free(a.mesh.attr); + a.mesh = {}; + } + } + else + a.image.reset(); + } } uint64_t ResourceManager::estimate_cost_asset(Granite::AssetID, Granite::File &file) @@ -268,6 +286,8 @@ void ResourceManager::instantiate_asset(Granite::AssetManager &manager_, Granite::AssetID id, Granite::File &file) { + auto &asset = assets[id.id]; + ImageHandle image; if (file.get_size()) { @@ -277,7 +297,7 @@ void ResourceManager::instantiate_asset(Granite::AssetManager &manager_, if (MemoryMappedTexture::is_header(mapping->data(), mapping->get_size())) image = create_gtx(std::move(mapping), id); else - image = create_other(*mapping, assets[id.id].asset_class, id); + image = create_other(*mapping, asset.asset_class, id); } else LOGE("Failed to map file.\n"); @@ -287,7 +307,7 @@ void ResourceManager::instantiate_asset(Granite::AssetManager &manager_, // Have to signal something. if (!image) - image = get_fallback_image(assets[id.id].asset_class); + image = get_fallback_image(asset.asset_class); std::lock_guard holder{lock}; updates.push_back(id); diff --git a/vulkan/managers/resource_manager.hpp b/vulkan/managers/resource_manager.hpp index 591b85cf..204363e4 100644 --- a/vulkan/managers/resource_manager.hpp +++ b/vulkan/managers/resource_manager.hpp @@ -125,16 +125,20 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface void set_id_bounds(uint32_t bound) override; void set_asset_class(Granite::AssetID id, Granite::AssetClass asset_class) override; - struct Texture + struct Asset { ImageHandle image; + struct + { + Internal::AllocatedSlice index, pos, attr; + } mesh; Granite::AssetClass asset_class = Granite::AssetClass::ImageZeroable; }; std::mutex lock; std::condition_variable cond; - std::vector assets; + std::vector assets; std::vector views; std::vector updates; @@ -150,6 +154,7 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface void instantiate_asset(Granite::AssetManager &manager, Granite::AssetID id, Granite::File &file); + std::mutex mesh_allocator_lock; MeshBufferAllocator index_buffer_allocator; MeshBufferAllocator position_buffer_allocator; MeshBufferAllocator attribute_buffer_allocator; From c3d60ca676d4fab7573ee983924668991e48b5ce Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 4 Aug 2023 14:35:15 +0200 Subject: [PATCH 60/71] Flesh out more of the mesh allocator idea. --- vulkan/managers/resource_manager.cpp | 47 ++++++++++++++++++---------- vulkan/managers/resource_manager.hpp | 2 ++ 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/vulkan/managers/resource_manager.cpp b/vulkan/managers/resource_manager.cpp index 175c44cc..fdbab84a 100644 --- a/vulkan/managers/resource_manager.cpp +++ b/vulkan/managers/resource_manager.cpp @@ -73,20 +73,21 @@ void ResourceManager::release_asset(Granite::AssetID id) { if (id) { - auto &a = assets[id.id]; - if (a.asset_class == Granite::AssetClass::Mesh) + std::unique_lock holder{lock}; + auto &asset = assets[id.id]; + if (asset.asset_class == Granite::AssetClass::Mesh) { - if (a.mesh.index.count) + if (asset.mesh.index.count) { - std::lock_guard holder{mesh_allocator_lock}; - index_buffer_allocator.free(a.mesh.index); - position_buffer_allocator.free(a.mesh.pos); - attribute_buffer_allocator.free(a.mesh.attr); - a.mesh = {}; + std::lock_guard holder_alloc{mesh_allocator_lock}; + index_buffer_allocator.free(asset.mesh.index); + position_buffer_allocator.free(asset.mesh.pos); + attribute_buffer_allocator.free(asset.mesh.attr); + asset.mesh = {}; } } else - a.image.reset(); + asset.image.reset(); } } @@ -251,8 +252,10 @@ const ImageView *ResourceManager::get_image_view_blocking(Granite::AssetID id) return nullptr; } - if (assets[id.id].image) - return &assets[id.id].image->get_view(); + auto &asset = assets[id.id]; + + if (asset.image) + return &asset.image->get_view(); if (!manager->iterate_blocking(*device->get_system_handles().thread_group, id)) { @@ -260,11 +263,11 @@ const ImageView *ResourceManager::get_image_view_blocking(Granite::AssetID id) return nullptr; } - cond.wait(holder, [this, id]() -> bool { - return bool(assets[id.id].image); + cond.wait(holder, [&asset]() -> bool { + return bool(asset.image); }); - return &assets[id.id].image->get_view(); + return &asset.image->get_view(); } void ResourceManager::instantiate_asset(Granite::AssetManager &manager_, Granite::TaskGroup *task, @@ -287,6 +290,17 @@ void ResourceManager::instantiate_asset(Granite::AssetManager &manager_, Granite::File &file) { auto &asset = assets[id.id]; + if (asset.asset_class == Granite::AssetClass::Mesh) + instantiate_asset_mesh(manager_, id, file); + else + instantiate_asset_image(manager_, id, file); +} + +void ResourceManager::instantiate_asset_image(Granite::AssetManager &manager_, + Granite::AssetID id, + Granite::File &file) +{ + auto &asset = assets[id.id]; ImageHandle image; if (file.get_size()) @@ -303,15 +317,14 @@ void ResourceManager::instantiate_asset(Granite::AssetManager &manager_, LOGE("Failed to map file.\n"); } - manager_.update_cost(id, image ? image->get_allocation().get_size() : 0); - // Have to signal something. if (!image) image = get_fallback_image(asset.asset_class); std::lock_guard holder{lock}; updates.push_back(id); - assets[id.id].image = std::move(image); + asset.image = std::move(image); + manager_.update_cost(id, asset.image ? asset.image->get_allocation().get_size() : 0); cond.notify_all(); } diff --git a/vulkan/managers/resource_manager.hpp b/vulkan/managers/resource_manager.hpp index 204363e4..049e826e 100644 --- a/vulkan/managers/resource_manager.hpp +++ b/vulkan/managers/resource_manager.hpp @@ -153,6 +153,8 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface const ImageHandle &get_fallback_image(Granite::AssetClass asset_class); void instantiate_asset(Granite::AssetManager &manager, Granite::AssetID id, Granite::File &file); + void instantiate_asset_image(Granite::AssetManager &manager, Granite::AssetID id, Granite::File &file); + void instantiate_asset_mesh(Granite::AssetManager &manager, Granite::AssetID id, Granite::File &file); std::mutex mesh_allocator_lock; MeshBufferAllocator index_buffer_allocator; From 079ca367eca8899cd03a11a26f1eb3594e906d57 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sun, 13 Aug 2023 13:43:27 +0200 Subject: [PATCH 61/71] Continue hooking up the decode paths. --- filesystem/asset_manager.hpp | 2 +- renderer/CMakeLists.txt | 1 - scene-export/meshlet_export.cpp | 20 +- scene-export/meshlet_export.hpp | 2 +- tests/meshlet_viewer.cpp | 7 +- tests/meshopt_sandbox.cpp | 25 +-- vulkan/CMakeLists.txt | 4 +- vulkan/managers/resource_manager.cpp | 178 +++++++++++++++--- vulkan/managers/resource_manager.hpp | 13 +- {renderer/formats => vulkan/mesh}/meshlet.cpp | 31 ++- {renderer/formats => vulkan/mesh}/meshlet.hpp | 18 +- 11 files changed, 211 insertions(+), 90 deletions(-) rename {renderer/formats => vulkan/mesh}/meshlet.cpp (87%) rename {renderer/formats => vulkan/mesh}/meshlet.hpp (94%) diff --git a/filesystem/asset_manager.hpp b/filesystem/asset_manager.hpp index c672995e..6c613d67 100644 --- a/filesystem/asset_manager.hpp +++ b/filesystem/asset_manager.hpp @@ -36,7 +36,7 @@ namespace Granite struct AssetID { uint32_t id = uint32_t(-1); - enum { MaxIDs = 1u << 20 }; + enum { MaxIDs = 1u << 18 }; AssetID() = default; explicit AssetID(uint32_t id_) : id{id_} {} explicit inline operator bool() const { return id != uint32_t(-1); } diff --git a/renderer/CMakeLists.txt b/renderer/CMakeLists.txt index ca6db955..ca573aa0 100644 --- a/renderer/CMakeLists.txt +++ b/renderer/CMakeLists.txt @@ -38,7 +38,6 @@ add_granite_internal_lib(granite-renderer lights/volumetric_diffuse.hpp lights/volumetric_diffuse.cpp lights/decal_volume.hpp lights/decal_volume.cpp formats/scene_formats.hpp formats/scene_formats.cpp - formats/meshlet.hpp formats/meshlet.cpp formats/gltf.hpp formats/gltf.cpp scene_loader.cpp scene_loader.hpp ocean.hpp ocean.cpp diff --git a/scene-export/meshlet_export.cpp b/scene-export/meshlet_export.cpp index 09b27ee6..4f39eacc 100644 --- a/scene-export/meshlet_export.cpp +++ b/scene-export/meshlet_export.cpp @@ -31,7 +31,7 @@ namespace Granite { namespace Meshlet { -using namespace ::Granite::SceneFormats::Meshlet; +using namespace Vulkan::Meshlet; struct Metadata : Header { @@ -583,7 +583,7 @@ static bool export_encoded_mesh(const std::string &path, const Encoded &encoded) return true; } -bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh, SceneFormats::Meshlet::MeshStyle style) +bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh, MeshStyle style) { if (!mesh_optimize_index_buffer(mesh, {})) return false; @@ -595,10 +595,10 @@ bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh, Sc switch (style) { - case SceneFormats::Meshlet::MeshStyle::Skinned: + case MeshStyle::Skinned: LOGE("Unimplemented.\n"); return false; - case SceneFormats::Meshlet::MeshStyle::Textured: + case MeshStyle::Textured: uv = mesh_extract_uv_snorm_scale(mesh); num_u32_streams += 2; if (uv.empty()) @@ -607,7 +607,7 @@ bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh, Sc return false; } // Fallthrough - case SceneFormats::Meshlet::MeshStyle::Untextured: + case MeshStyle::Untextured: normals = mesh_extract_normal_tangent_oct8(mesh, MeshAttribute::Normal); tangent = mesh_extract_normal_tangent_oct8(mesh, MeshAttribute::Tangent); if (normals.empty() || tangent.empty()) @@ -617,7 +617,7 @@ bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh, Sc } num_u32_streams += 2; // Fallthrough - case SceneFormats::Meshlet::MeshStyle::Wireframe: + case MeshStyle::Wireframe: positions = mesh_extract_position_snorm_exp(mesh); if (positions.empty()) { @@ -723,12 +723,10 @@ bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh, Sc const auto *pbounds = bounds.data(); for (auto &meshlet: encoded.mesh.meshlets) { - meshlet.bound.center = vec3( - pbounds->center[0], pbounds->center[1], pbounds->center[2]); + memcpy(meshlet.bound.center, pbounds->center, sizeof(float) * 3); meshlet.bound.radius = pbounds->radius; - meshlet.bound.cone_axis_cutoff = i8vec4( - pbounds->cone_axis_s8[0], pbounds->cone_axis_s8[1], - pbounds->cone_axis_s8[2], pbounds->cone_cutoff_s8); + memcpy(meshlet.bound.cone_axis_cutoff, pbounds->cone_axis_s8, sizeof(pbounds->cone_axis_s8)); + meshlet.bound.cone_axis_cutoff[3] = pbounds->cone_cutoff_s8; } return export_encoded_mesh(path, encoded); diff --git a/scene-export/meshlet_export.hpp b/scene-export/meshlet_export.hpp index 6d6607c0..7527e80c 100644 --- a/scene-export/meshlet_export.hpp +++ b/scene-export/meshlet_export.hpp @@ -31,6 +31,6 @@ namespace Granite { namespace Meshlet { -bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh, SceneFormats::Meshlet::MeshStyle style); +bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh, Vulkan::Meshlet::MeshStyle style); } } diff --git a/tests/meshlet_viewer.cpp b/tests/meshlet_viewer.cpp index e46f544c..b2f1161c 100644 --- a/tests/meshlet_viewer.cpp +++ b/tests/meshlet_viewer.cpp @@ -36,6 +36,7 @@ using namespace Granite; using namespace Vulkan; +using namespace Vulkan::Meshlet; struct MeshletViewerApplication : Granite::Application, Granite::EventHandler { @@ -62,13 +63,13 @@ struct MeshletViewerApplication : Granite::Application, Granite::EventHandler Vulkan::BufferHandle meshlet_stream_buffer; AABB aabb; FPSCamera camera; - SceneFormats::Meshlet::FormatHeader header; + FormatHeader header; void on_device_create(const DeviceCreatedEvent &e) { e.get_device().get_shader_manager().add_include_directory("builtin://shaders/inc"); - auto view = SceneFormats::Meshlet::create_mesh_view(*mapping); + auto view = create_mesh_view(*mapping); if (!view.format_header) throw std::runtime_error("Failed to load meshlet."); @@ -91,7 +92,7 @@ struct MeshletViewerApplication : Granite::Application, Granite::EventHandler payload = e.get_device().create_buffer(info, view.payload); auto cmd = e.get_device().request_command_buffer(); - if (!SceneFormats::Meshlet::decode_mesh(*cmd, *ibo, 0, *vbo, 0, *payload, 0, view)) + if (!decode_mesh(*cmd, *ibo, 0, *vbo, 0, *payload, 0, view)) { e.get_device().submit_discard(cmd); throw std::runtime_error("Failed to decode mesh.\n"); diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp index 72c58394..10252525 100644 --- a/tests/meshopt_sandbox.cpp +++ b/tests/meshopt_sandbox.cpp @@ -10,10 +10,11 @@ #include "meshlet.hpp" #include using namespace Granite; +using namespace Vulkan::Meshlet; static void decode_mesh_setup_buffers( std::vector &out_index_buffer, std::vector &out_u32_stream, - const SceneFormats::Meshlet::MeshView &mesh) + const MeshView &mesh) { assert(mesh.format_header->u32_stream_count > 1); @@ -24,7 +25,7 @@ static void decode_mesh_setup_buffers( } static void decode_mesh(std::vector &out_index_buffer, std::vector &out_u32_stream, - const SceneFormats::Meshlet::MeshView &mesh) + const MeshView &mesh) { decode_mesh_setup_buffers(out_index_buffer, out_u32_stream, mesh); out_index_buffer.clear(); @@ -38,7 +39,7 @@ static void decode_mesh(std::vector &out_index_buffer, std::vectoru32_stream_count + stream_index]; const uint32_t *pdata = mesh.payload + stream.offset_from_base_u32; - u8vec4 deltas[SceneFormats::Meshlet::MaxElements] = {}; + u8vec4 deltas[MaxElements] = {}; const u16vec4 base_predictor = u16vec4( stream.predictor[0], stream.predictor[1], stream.predictor[2], stream.predictor[3]); @@ -48,7 +49,7 @@ static void decode_mesh(std::vector &out_index_buffer, std::vector> u16vec4(0, 8, 0, 8)); - for (unsigned chunk = 0; chunk < (SceneFormats::Meshlet::MaxElements / 32); chunk++) + for (unsigned chunk = 0; chunk < (MaxElements / 32); chunk++) { auto bits_per_u8 = (uvec4(stream.bitplane_meta[chunk]) >> uvec4(0, 4, 8, 12)) & 0xfu; uvec4 bitplanes[8] = {}; @@ -75,11 +76,11 @@ static void decode_mesh(std::vector &out_index_buffer, std::vector> u16vec4(8)); // Resolve deltas. - for (unsigned i = 1; i < SceneFormats::Meshlet::MaxElements; i++) + for (unsigned i = 1; i < MaxElements; i++) deltas[i] += deltas[i - 1]; if (stream_index == 0) @@ -105,7 +106,7 @@ static void decode_mesh(std::vector &out_index_buffer, std::vector &out_index_buffer, std::vector &out_u32_stream, - const SceneFormats::Meshlet::MeshView &mesh) + const MeshView &mesh) { decode_mesh_setup_buffers(out_index_buffer, out_u32_stream, mesh); @@ -129,9 +130,9 @@ static void decode_mesh_gpu( dev.begin_renderdoc_capture(); auto cmd = dev.request_command_buffer(); - SceneFormats::Meshlet::decode_mesh(*cmd, *readback_decoded_index_buffer, 0, - *readback_decoded_u32_buffer, 0, - *payload_buffer, 0, mesh); + decode_mesh(*cmd, *readback_decoded_index_buffer, 0, + *readback_decoded_u32_buffer, 0, + *payload_buffer, 0, mesh); cmd->barrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, VK_PIPELINE_STAGE_HOST_BIT, VK_ACCESS_HOST_READ_BIT); dev.submit(cmd); @@ -221,7 +222,7 @@ int main(int argc, char *argv[]) auto mesh = parser.get_meshes().front(); if (!Meshlet::export_mesh_to_meshlet("export.msh1", - mesh, SceneFormats::Meshlet::MeshStyle::Textured)) + mesh, MeshStyle::Textured)) { return EXIT_FAILURE; } @@ -234,7 +235,7 @@ int main(int argc, char *argv[]) if (!mapped) return EXIT_FAILURE; - auto view = SceneFormats::Meshlet::create_mesh_view(*mapped); + auto view = create_mesh_view(*mapped); std::vector reference_index_buffer; std::vector reference_attributes; diff --git a/vulkan/CMakeLists.txt b/vulkan/CMakeLists.txt index d25606bd..328221ca 100644 --- a/vulkan/CMakeLists.txt +++ b/vulkan/CMakeLists.txt @@ -55,6 +55,7 @@ if (GRANITE_VULKAN_SYSTEM_HANDLES) target_sources(granite-vulkan PRIVATE texture/memory_mapped_texture.cpp texture/memory_mapped_texture.hpp + mesh/meshlet.hpp mesh/meshlet.cpp texture/texture_files.cpp texture/texture_files.hpp texture/texture_decoder.cpp texture/texture_decoder.hpp) @@ -64,7 +65,8 @@ if (GRANITE_VULKAN_SYSTEM_HANDLES) target_include_directories(granite-vulkan PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/managers - ${CMAKE_CURRENT_SOURCE_DIR}/texture) + ${CMAKE_CURRENT_SOURCE_DIR}/texture + ${CMAKE_CURRENT_SOURCE_DIR}/mesh) if (GRANITE_VULKAN_SHADER_MANAGER_RUNTIME_COMPILER) target_compile_definitions(granite-vulkan PUBLIC GRANITE_VULKAN_SHADER_MANAGER_RUNTIME_COMPILER=1) diff --git a/vulkan/managers/resource_manager.cpp b/vulkan/managers/resource_manager.cpp index fdbab84a..fe5dda9d 100644 --- a/vulkan/managers/resource_manager.cpp +++ b/vulkan/managers/resource_manager.cpp @@ -28,19 +28,18 @@ #include "texture_decoder.hpp" #include "string_helpers.hpp" #include "thread_group.hpp" +#include "meshlet.hpp" namespace Vulkan { ResourceManager::ResourceManager(Device *device_) : device(device_) , index_buffer_allocator(*device_) - , position_buffer_allocator(*device_) , attribute_buffer_allocator(*device_) { // Simplified style. index_buffer_allocator.set_element_size(sizeof(uint32_t) * 3); - position_buffer_allocator.set_element_size(sizeof(float) * 3); - attribute_buffer_allocator.set_element_size(sizeof(float) * 2 + sizeof(uint32_t) * 2); + attribute_buffer_allocator.set_element_size(sizeof(float) * 3 + sizeof(float) * 2 + sizeof(uint32_t) * 2); assets.reserve(Granite::AssetID::MaxIDs); } @@ -56,7 +55,6 @@ void ResourceManager::set_id_bounds(uint32_t bound) // We must avoid reallocation here to avoid a ton of extra silly locking. VK_ASSERT(bound <= Granite::AssetID::MaxIDs); assets.resize(bound); - views.resize(bound); } void ResourceManager::set_asset_class(Granite::AssetID id, Granite::AssetClass asset_class) @@ -64,8 +62,14 @@ void ResourceManager::set_asset_class(Granite::AssetID id, Granite::AssetClass a if (id) { assets[id.id].asset_class = asset_class; - if (!views[id.id]) - views[id.id] = &get_fallback_image(asset_class)->get_view(); + if (asset_class != Granite::AssetClass::Mesh) + { + std::unique_lock holder{lock}; + views.resize(assets.size()); + + if (!views[id.id]) + views[id.id] = &get_fallback_image(asset_class)->get_view(); + } } } @@ -75,13 +79,13 @@ void ResourceManager::release_asset(Granite::AssetID id) { std::unique_lock holder{lock}; auto &asset = assets[id.id]; + asset.latchable = false; if (asset.asset_class == Granite::AssetClass::Mesh) { if (asset.mesh.index.count) { std::lock_guard holder_alloc{mesh_allocator_lock}; index_buffer_allocator.free(asset.mesh.index); - position_buffer_allocator.free(asset.mesh.pos); attribute_buffer_allocator.free(asset.mesh.attr); asset.mesh = {}; } @@ -91,10 +95,18 @@ void ResourceManager::release_asset(Granite::AssetID id) } } -uint64_t ResourceManager::estimate_cost_asset(Granite::AssetID, Granite::File &file) +uint64_t ResourceManager::estimate_cost_asset(Granite::AssetID id, Granite::File &file) { - // TODO: When we get compressed BC/ASTC, this will have to change. - return file.get_size(); + if (assets[id.id].asset_class == Granite::AssetClass::Mesh) + { + // Compression factor of 2x is reasonable to assume. + return file.get_size() * 2; + } + else + { + // TODO: When we get compressed BC/ASTC, this will have to change. + return file.get_size(); + } } void ResourceManager::init() @@ -129,7 +141,7 @@ void ResourceManager::init() HeapBudget budget[VK_MAX_MEMORY_HEAPS] = {}; device->get_memory_budget(budget); - // Try to set aside 50% of budgetable VRAM for the texture manager. Seems reasonable. + // Try to set aside 50% of budgetable VRAM for the resource manager. Seems reasonable. VkDeviceSize size = 0; for (uint32_t i = 0; i < device->get_memory_properties().memoryHeapCount; i++) if ((device->get_memory_properties().memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0) @@ -264,7 +276,7 @@ const ImageView *ResourceManager::get_image_view_blocking(Granite::AssetID id) } cond.wait(holder, [&asset]() -> bool { - return bool(asset.image); + return bool(asset.latchable); }); return &asset.image->get_view(); @@ -296,6 +308,89 @@ void ResourceManager::instantiate_asset(Granite::AssetManager &manager_, instantiate_asset_image(manager_, id, file); } +bool ResourceManager::allocate_asset_mesh(Granite::AssetID id, const Meshlet::MeshView &view) +{ + if (!view.format_header) + return false; + + Internal::AllocatedSlice index_slice, attribute_slice; + { + std::lock_guard holder{mesh_allocator_lock}; + if (!index_buffer_allocator.allocate(view.total_primitives, &index_slice)) + return false; + + if (!attribute_buffer_allocator.allocate(view.total_vertices, &attribute_slice)) + { + index_buffer_allocator.free(index_slice); + return false; + } + } + + auto &asset = assets[id.id]; + asset.mesh.index = index_slice; + asset.mesh.attr = attribute_slice; + return true; +} + +void ResourceManager::instantiate_asset_mesh(Granite::AssetManager &manager_, + Granite::AssetID id, + Granite::File &file) +{ + Granite::FileMappingHandle mapping; + if (file.get_size()) + mapping = file.map(); + + Meshlet::MeshView view = {}; + if (mapping) + view = Meshlet::create_mesh_view(*mapping); + bool ret = allocate_asset_mesh(id, view); + + // Decode the meshlet. Later, we'll have to do a lot of device specific stuff here to select optimal + // processing: + // - Native meshlets + // - Encoded attribute + // - Decoded attributes + // - Optimize for multi-draw-indirect or not? (8-bit indices). + + auto &asset = assets[id.id]; + + if (ret) + { + auto cmd = device->request_command_buffer(CommandBuffer::Type::AsyncCompute); + + BufferCreateInfo buf = {}; + buf.domain = BufferDomain::Host; + buf.size = view.format_header->payload_size_words * sizeof(uint32_t); + buf.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + auto payload = device->create_buffer(buf, view.payload); + + Meshlet::decode_mesh(*cmd, *index_buffer_allocator.get_buffer(0), + asset.mesh.index.offset * index_buffer_allocator.get_element_size(), + *attribute_buffer_allocator.get_buffer(0), + asset.mesh.attr.offset * attribute_buffer_allocator.get_element_size(), + *payload, 0, view); + + Semaphore sem[2]; + device->submit(cmd, nullptr, 2, sem); + device->add_wait_semaphore(CommandBuffer::Type::Generic, std::move(sem[0]), + VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT | + VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT, false); + device->add_wait_semaphore(CommandBuffer::Type::AsyncGraphics, std::move(sem[1]), + VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT | + VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT, false); + } + + uint64_t cost = 0; + cost += view.total_primitives * index_buffer_allocator.get_element_size(); + cost += view.total_vertices * attribute_buffer_allocator.get_element_size(); + + std::lock_guard holder{lock}; + updates.push_back(id); + manager_.update_cost(id, ret ? cost : 0); + asset.latchable = true; + cond.notify_all(); +} + void ResourceManager::instantiate_asset_image(Granite::AssetManager &manager_, Granite::AssetID id, Granite::File &file) @@ -324,6 +419,7 @@ void ResourceManager::instantiate_asset_image(Granite::AssetManager &manager_, std::lock_guard holder{lock}; updates.push_back(id); asset.image = std::move(image); + asset.latchable = true; manager_.update_cost(id, asset.image ? asset.image->get_allocation().get_size() : 0); cond.notify_all(); } @@ -347,24 +443,41 @@ const ImageHandle &ResourceManager::get_fallback_image(Granite::AssetClass asset void ResourceManager::latch_handles() { std::lock_guard holder{lock}; + + views.resize(assets.size()); + draws.resize(assets.size()); + for (auto &update : updates) { if (update.id >= views.size()) continue; + auto &asset = assets[update.id]; - const ImageView *view; - - if (assets[update.id].image) + if (asset.asset_class == Granite::AssetClass::Mesh) { - view = &assets[update.id].image->get_view(); + auto &d = draws[update.id]; + d.firstIndex = asset.mesh.index.offset * 3; + d.indexCount = asset.mesh.index.count * 3; + d.firstInstance = 0; + d.instanceCount = 1; + d.vertexOffset = int32_t(asset.mesh.attr.offset); } else { - auto &img = get_fallback_image(assets[update.id].asset_class); - view = &img->get_view(); - } + const ImageView *view; + + if (asset.image) + { + view = &asset.image->get_view(); + } + else + { + auto &img = get_fallback_image(asset.asset_class); + view = &img->get_view(); + } - views[update.id] = view; + views[update.id] = view; + } } updates.clear(); } @@ -374,14 +487,19 @@ const Buffer *ResourceManager::get_index_buffer() const return index_buffer_allocator.get_buffer(0); } -const Buffer *ResourceManager::get_position_buffer() const +const Buffer *ResourceManager::get_attribute_buffer() const { - return position_buffer_allocator.get_buffer(0); + return attribute_buffer_allocator.get_buffer(0); } -const Buffer *ResourceManager::get_attribute_buffer() const +static const VkDrawIndexedIndirectCommand empty_draw = {}; + +const VkDrawIndexedIndirectCommand &ResourceManager::get_mesh_indexed_draw(Granite::AssetID id) const { - return attribute_buffer_allocator.get_buffer(0); + if (id.id < draws.size()) + return draws[id.id]; + else + return empty_draw; } MeshBufferAllocator::MeshBufferAllocator(Device &device) @@ -400,7 +518,12 @@ MeshBufferAllocator::MeshBufferAllocator(Device &device) void MeshBufferAllocator::set_element_size(uint32_t element_size) { - global_allocator.set_element_size(element_size); + global_allocator.element_size = element_size; +} + +uint32_t MeshBufferAllocator::get_element_size() const +{ + return global_allocator.element_size; } const Buffer *MeshBufferAllocator::get_buffer(unsigned index) const @@ -441,11 +564,6 @@ uint32_t MeshGlobalAllocator::allocate(uint32_t count) return ret; } -void MeshGlobalAllocator::set_element_size(uint32_t element_size_) -{ - element_size = element_size_; -} - void MeshGlobalAllocator::free(uint32_t index) { VK_ASSERT(index < global_buffers.size()); diff --git a/vulkan/managers/resource_manager.hpp b/vulkan/managers/resource_manager.hpp index 049e826e..4eeab6d5 100644 --- a/vulkan/managers/resource_manager.hpp +++ b/vulkan/managers/resource_manager.hpp @@ -25,6 +25,7 @@ #include "image.hpp" #include "buffer.hpp" #include "asset_manager.hpp" +#include "meshlet.hpp" #include "arena_allocator.hpp" #include "small_vector.hpp" #include @@ -51,7 +52,6 @@ struct AllocatedSlice struct MeshGlobalAllocator { explicit MeshGlobalAllocator(Device &device); - void set_element_size(uint32_t element_size); uint32_t allocate(uint32_t count); void free(uint32_t index); @@ -81,6 +81,7 @@ class MeshBufferAllocator bool allocate(uint32_t count, Internal::AllocatedSlice *slice); void free(const Internal::AllocatedSlice &slice); void set_element_size(uint32_t element_size); + uint32_t get_element_size() const; const Buffer *get_buffer(unsigned index) const; @@ -108,9 +109,8 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface const Vulkan::ImageView *get_image_view_blocking(Granite::AssetID id); - VkDrawIndexedIndirectCommand get_mesh_indexed_draw(Granite::AssetID id) const; + const VkDrawIndexedIndirectCommand &get_mesh_indexed_draw(Granite::AssetID id) const; const Buffer *get_index_buffer() const; - const Buffer *get_position_buffer() const; const Buffer *get_attribute_buffer() const; private: @@ -130,9 +130,10 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface ImageHandle image; struct { - Internal::AllocatedSlice index, pos, attr; + Internal::AllocatedSlice index, attr; } mesh; Granite::AssetClass asset_class = Granite::AssetClass::ImageZeroable; + bool latchable = false; }; std::mutex lock; @@ -140,6 +141,7 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface std::vector assets; std::vector views; + std::vector draws; std::vector updates; ImageHandle fallback_color; @@ -158,7 +160,8 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface std::mutex mesh_allocator_lock; MeshBufferAllocator index_buffer_allocator; - MeshBufferAllocator position_buffer_allocator; MeshBufferAllocator attribute_buffer_allocator; + + bool allocate_asset_mesh(Granite::AssetID id, const Meshlet::MeshView &view); }; } diff --git a/renderer/formats/meshlet.cpp b/vulkan/mesh/meshlet.cpp similarity index 87% rename from renderer/formats/meshlet.cpp rename to vulkan/mesh/meshlet.cpp index a91cbef7..4146339f 100644 --- a/renderer/formats/meshlet.cpp +++ b/vulkan/mesh/meshlet.cpp @@ -24,14 +24,13 @@ #include "command_buffer.hpp" #include "buffer.hpp" #include "device.hpp" +#include "filesystem.hpp" -namespace Granite -{ -namespace SceneFormats +namespace Vulkan { namespace Meshlet { -MeshView create_mesh_view(const FileMapping &mapping) +MeshView create_mesh_view(const Granite::FileMapping &mapping) { MeshView view = {}; @@ -86,10 +85,10 @@ MeshView create_mesh_view(const FileMapping &mapping) return view; } -bool decode_mesh(Vulkan::CommandBuffer &cmd, - const Vulkan::Buffer &ibo, uint64_t ibo_offset, - const Vulkan::Buffer &vbo, uint64_t vbo_offset, - const Vulkan::Buffer &payload, uint64_t payload_offset, +bool decode_mesh(CommandBuffer &cmd, + const Buffer &ibo, uint64_t ibo_offset, + const Buffer &vbo, uint64_t vbo_offset, + const Buffer &payload, uint64_t payload_offset, const MeshView &view) { // TODO: Implement LDS fallback. @@ -101,8 +100,8 @@ bool decode_mesh(Vulkan::CommandBuffer &cmd, const uint32_t u32_stride = view.format_header->u32_stream_count - 1; - Vulkan::BufferCreateInfo buf_info = {}; - buf_info.domain = Vulkan::BufferDomain::LinkedDeviceHost; + BufferCreateInfo buf_info = {}; + buf_info.domain = BufferDomain::LinkedDeviceHost; buf_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; buf_info.size = view.format_header->meshlet_count * sizeof(*view.headers); @@ -111,20 +110,21 @@ bool decode_mesh(Vulkan::CommandBuffer &cmd, buf_info.size = view.format_header->meshlet_count * view.format_header->u32_stream_count * sizeof(*view.streams); auto meshlet_stream_buffer = cmd.get_device().create_buffer(buf_info, view.streams); - std::vector output_offset_strides; + struct OffsetStride { uint32_t offset, stride; }; + std::vector output_offset_strides; output_offset_strides.reserve(view.format_header->meshlet_count * view.format_header->u32_stream_count); uint32_t index_count = 0; for (uint32_t i = 0; i < view.format_header->meshlet_count; i++) { - output_offset_strides.emplace_back(index_count, 0); + output_offset_strides.push_back({ index_count, 0 }); index_count += view.headers[i].num_primitives_minus_1 + 1; for (uint32_t j = 1; j < view.format_header->u32_stream_count; j++) - output_offset_strides.emplace_back(view.headers[i].base_vertex_offset * u32_stride + (j - 1), u32_stride); + output_offset_strides.push_back({ view.headers[i].base_vertex_offset * u32_stride + (j - 1), u32_stride }); } - buf_info.domain = Vulkan::BufferDomain::LinkedDeviceHost; - buf_info.size = output_offset_strides.size() * sizeof(uvec2); + buf_info.domain = BufferDomain::LinkedDeviceHost; + buf_info.size = output_offset_strides.size() * sizeof(OffsetStride); auto output_offset_strides_buffer = cmd.get_device().create_buffer(buf_info, output_offset_strides.data()); cmd.set_program("builtin://shaders/decode/meshlet_decode.comp"); @@ -154,4 +154,3 @@ bool decode_mesh(Vulkan::CommandBuffer &cmd, } } } -} diff --git a/renderer/formats/meshlet.hpp b/vulkan/mesh/meshlet.hpp similarity index 94% rename from renderer/formats/meshlet.hpp rename to vulkan/mesh/meshlet.hpp index a6dc9212..8c3a49cd 100644 --- a/renderer/formats/meshlet.hpp +++ b/vulkan/mesh/meshlet.hpp @@ -23,8 +23,11 @@ #pragma once #include -#include "filesystem.hpp" -#include "math.hpp" + +namespace Granite +{ +class FileMapping; +} namespace Vulkan { @@ -32,9 +35,7 @@ class CommandBuffer; class Buffer; } -namespace Granite -{ -namespace SceneFormats +namespace Vulkan { // MESHLET1 format. namespace Meshlet @@ -61,9 +62,9 @@ struct Header struct Bound { - vec3 center; + float center[3]; float radius; - i8vec4 cone_axis_cutoff; + int8_t cone_axis_cutoff[4]; }; enum class StreamType @@ -106,7 +107,7 @@ struct MeshView static const char magic[8] = { 'M', 'E', 'S', 'H', 'L', 'E', 'T', '1' }; -MeshView create_mesh_view(const FileMapping &mapping); +MeshView create_mesh_view(const Granite::FileMapping &mapping); bool decode_mesh(Vulkan::CommandBuffer &cmd, const Vulkan::Buffer &ibo, uint64_t ibo_offset, @@ -115,4 +116,3 @@ bool decode_mesh(Vulkan::CommandBuffer &cmd, const MeshView &view); } } -} From 580252028f6496dd52c5a08a5c54df624cf3e880 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sun, 13 Aug 2023 14:37:49 +0200 Subject: [PATCH 62/71] Get basic viewer working with VBO/IBO path. --- tests/assets/shaders/meshlet_debug.vert | 2 +- tests/meshlet_viewer.cpp | 108 ++++++------------------ util/arena_allocator.hpp | 7 +- vulkan/managers/resource_manager.cpp | 22 ++--- vulkan/managers/resource_manager.hpp | 21 ++++- 5 files changed, 59 insertions(+), 101 deletions(-) diff --git a/tests/assets/shaders/meshlet_debug.vert b/tests/assets/shaders/meshlet_debug.vert index de688f31..582bd202 100644 --- a/tests/assets/shaders/meshlet_debug.vert +++ b/tests/assets/shaders/meshlet_debug.vert @@ -8,7 +8,7 @@ layout(location = 2) out vec2 vUV; #include "meshlet_attribute_decode.h" -layout(set = 0, binding = 0) uniform UBO +layout(set = 1, binding = 0) uniform UBO { mat4 VP; }; diff --git a/tests/meshlet_viewer.cpp b/tests/meshlet_viewer.cpp index b2f1161c..ec022453 100644 --- a/tests/meshlet_viewer.cpp +++ b/tests/meshlet_viewer.cpp @@ -43,98 +43,21 @@ struct MeshletViewerApplication : Granite::Application, Granite::EventHandler MeshletViewerApplication(const char *path) { get_wsi().set_backbuffer_srgb(false); - - auto file = GRANITE_FILESYSTEM()->open(path, FileMode::ReadOnly); - if (!file) - throw std::runtime_error("Failed to open file."); - - mapping = file->map(); - if (!mapping) - throw std::runtime_error("Failed to map file."); - + mesh_id = GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(), path, Granite::AssetClass::Mesh); EVENT_MANAGER_REGISTER_LATCH(MeshletViewerApplication, on_device_create, on_device_destroy, DeviceCreatedEvent); } - FileMappingHandle mapping; - Vulkan::BufferHandle ibo; - Vulkan::BufferHandle vbo; - Vulkan::BufferHandle payload; - Vulkan::BufferHandle meshlet_meta_buffer; - Vulkan::BufferHandle meshlet_stream_buffer; AABB aabb; FPSCamera camera; - FormatHeader header; + Granite::AssetID mesh_id; void on_device_create(const DeviceCreatedEvent &e) { e.get_device().get_shader_manager().add_include_directory("builtin://shaders/inc"); - - auto view = create_mesh_view(*mapping); - if (!view.format_header) - throw std::runtime_error("Failed to load meshlet."); - - header = *view.format_header; - - Vulkan::BufferCreateInfo info = {}; - info.size = view.total_primitives * sizeof(uvec3); - info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT; - info.domain = Vulkan::BufferDomain::Device; - ibo = e.get_device().create_buffer(info); - - info.size = view.total_vertices * (view.format_header->u32_stream_count - 1) * sizeof(uint32_t); - info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; - info.domain = Vulkan::BufferDomain::Device; - vbo = e.get_device().create_buffer(info); - - info.size = view.format_header->payload_size_words * sizeof(uint32_t); - info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; - info.domain = Vulkan::BufferDomain::LinkedDeviceHost; - payload = e.get_device().create_buffer(info, view.payload); - - auto cmd = e.get_device().request_command_buffer(); - if (!decode_mesh(*cmd, *ibo, 0, *vbo, 0, *payload, 0, view)) - { - e.get_device().submit_discard(cmd); - throw std::runtime_error("Failed to decode mesh.\n"); - } - - cmd->barrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT); - e.get_device().submit(cmd); - - aabb = { vec3(FLT_MAX), vec3(FLT_MIN) }; - for (uint32_t i = 0; i < view.format_header->meshlet_count; i++) - { - auto cluster_aabb = AABB{ - view.bounds[i].center - view.bounds[i].radius, - view.bounds[i].center + view.bounds[i].radius, - }; - aabb.expand(cluster_aabb); - } - - camera.set_depth_range(0.1f, 200.0f); - camera.set_fovy(0.4f * pi()); - camera.look_at(aabb.get_center() + vec3(0.1f, 0.2f, 2.1f) * aabb.get_radius(), - aabb.get_center(), vec3(0.0f, 1.0f, 0.0f)); - - Vulkan::BufferCreateInfo buf_info = {}; - buf_info.domain = Vulkan::BufferDomain::LinkedDeviceHost; - buf_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; - - buf_info.size = view.format_header->meshlet_count * sizeof(*view.headers); - meshlet_meta_buffer = e.get_device().create_buffer(buf_info, view.headers); - - buf_info.size = view.format_header->meshlet_count * view.format_header->u32_stream_count * sizeof(*view.streams); - meshlet_stream_buffer = e.get_device().create_buffer(buf_info, view.streams); } void on_device_destroy(const DeviceCreatedEvent &) { - ibo.reset(); - vbo.reset(); - payload.reset(); - meshlet_meta_buffer.reset(); - meshlet_stream_buffer.reset(); } void render_frame(double, double) override @@ -146,6 +69,12 @@ struct MeshletViewerApplication : Granite::Application, Granite::EventHandler cmd->begin_render_pass(device.get_swapchain_render_pass(SwapchainRenderPass::Depth)); camera.set_aspect(cmd->get_viewport().width / cmd->get_viewport().height); + cmd->set_opaque_state(); + + auto vp = camera.get_projection() * camera.get_view(); + *cmd->allocate_typed_constant_data(1, 0, 1) = vp; + +#if 0 bool large_workgroup = device.get_device_features().mesh_shader_properties.maxPreferredMeshWorkGroupInvocations > 32 && device.get_device_features().mesh_shader_properties.maxMeshWorkGroupInvocations >= 256; @@ -154,11 +83,6 @@ struct MeshletViewerApplication : Granite::Application, Granite::EventHandler "assets://shaders/meshlet_debug.mesh.frag", {{"MESHLET_PAYLOAD_LARGE_WORKGROUP", int(large_workgroup)}}); - cmd->set_opaque_state(); - - auto vp = camera.get_projection() * camera.get_view(); - *cmd->allocate_typed_constant_data(1, 0, 1) = vp; - cmd->set_storage_buffer(0, 0, *meshlet_meta_buffer); cmd->set_storage_buffer(0, 1, *meshlet_stream_buffer); cmd->set_storage_buffer(0, 2, *payload); @@ -168,6 +92,22 @@ struct MeshletViewerApplication : Granite::Application, Granite::EventHandler cmd->set_specialization_constant_mask(1); cmd->set_specialization_constant(0, header.u32_stream_count); cmd->draw_mesh_tasks(header.meshlet_count, 1, 1); +#else + auto *ibo = device.get_resource_manager().get_index_buffer(); + auto *vbo = device.get_resource_manager().get_attribute_buffer(); + if (ibo && vbo) + { + cmd->set_program("assets://shaders/meshlet_debug.vert", "assets://shaders/meshlet_debug.frag"); + cmd->set_index_buffer(*ibo, 0, VK_INDEX_TYPE_UINT32); + cmd->set_vertex_binding(0, *vbo, 0, 6 * sizeof(uint32_t)); + cmd->set_vertex_attrib(0, 0, VK_FORMAT_R32G32B32A32_UINT, 0); + cmd->set_vertex_attrib(1, 0, VK_FORMAT_R32G32_UINT, 4 * sizeof(uint32_t)); + + auto draw = device.get_resource_manager().get_mesh_indexed_draw(mesh_id); + cmd->draw_indexed(draw.indexCount, draw.instanceCount, draw.firstIndex, draw.vertexOffset, + draw.firstInstance); + } +#endif cmd->end_render_pass(); device.submit(cmd); diff --git a/util/arena_allocator.hpp b/util/arena_allocator.hpp index 6c810ced..68c2378e 100644 --- a/util/arena_allocator.hpp +++ b/util/arena_allocator.hpp @@ -138,11 +138,16 @@ class ArenaAllocator return sub_block_size * Util::LegionAllocator::NumSubBlocks; } - inline uint32_t get_block_alignment() const + inline uint32_t get_sub_block_size() const { return sub_block_size; } + inline uint32_t get_block_alignment() const + { + return get_sub_block_size(); + } + inline bool allocate(uint32_t size, BackingAllocation *alloc) { unsigned num_blocks = (size + sub_block_size - 1) >> sub_block_size_log2; diff --git a/vulkan/managers/resource_manager.cpp b/vulkan/managers/resource_manager.cpp index fe5dda9d..5637006a 100644 --- a/vulkan/managers/resource_manager.cpp +++ b/vulkan/managers/resource_manager.cpp @@ -492,16 +492,6 @@ const Buffer *ResourceManager::get_attribute_buffer() const return attribute_buffer_allocator.get_buffer(0); } -static const VkDrawIndexedIndirectCommand empty_draw = {}; - -const VkDrawIndexedIndirectCommand &ResourceManager::get_mesh_indexed_draw(Granite::AssetID id) const -{ - if (id.id < draws.size()) - return draws[id.id]; - else - return empty_draw; -} - MeshBufferAllocator::MeshBufferAllocator(Device &device) : global_allocator(device) { @@ -511,9 +501,12 @@ MeshBufferAllocator::MeshBufferAllocator(Device &device) // Basic unit of a meshlet is 256 prims / attributes. // Maximum element count = 32M prims. - allocators[0].sub_block_size = 256; + allocators[0].set_sub_block_size(256); for (int i = 1; i < SliceAllocatorCount; i++) - allocators[i].sub_block_size = allocators[i - 1].sub_block_size * (Util::LegionAllocator::NumSubBlocks / 2); + allocators[i].set_sub_block_size(allocators[i - 1].get_sub_block_size() * (Util::LegionAllocator::NumSubBlocks / 2)); + + for (auto &alloc : allocators) + alloc.set_object_pool(&object_pool); } void MeshBufferAllocator::set_element_size(uint32_t element_size) @@ -622,8 +615,11 @@ void SliceAllocator::prepare_allocation(AllocatedSlice *allocation, Util::Intrus bool MeshBufferAllocator::allocate(uint32_t count, Internal::AllocatedSlice *slice) { for (auto &alloc : allocators) - if (count <= alloc.get_max_allocation_size()) + { + uint32_t max_alloc_size = alloc.get_max_allocation_size(); + if (count <= max_alloc_size) return alloc.allocate(count, slice); + } LOGE("Allocation of %u elements is too large for MeshBufferAllocator.\n", count); return false; diff --git a/vulkan/managers/resource_manager.hpp b/vulkan/managers/resource_manager.hpp index 4eeab6d5..f4b40147 100644 --- a/vulkan/managers/resource_manager.hpp +++ b/vulkan/managers/resource_manager.hpp @@ -64,7 +64,6 @@ struct SliceAllocator : Util::ArenaAllocator { SliceAllocator *parent = nullptr; MeshGlobalAllocator *global_allocator = nullptr; - uint32_t sub_block_size = 0; // Implements curious recurring template pattern calls. bool allocate_backing_heap(AllocatedSlice *allocation); @@ -99,6 +98,12 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface ~ResourceManager() override; void init(); + enum class MeshEncoding + { + Meshlet, + EncodedVBOAndIBO, + }; + inline const Vulkan::ImageView *get_image_view(Granite::AssetID id) const { if (id.id < views.size()) @@ -109,7 +114,19 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface const Vulkan::ImageView *get_image_view_blocking(Granite::AssetID id); - const VkDrawIndexedIndirectCommand &get_mesh_indexed_draw(Granite::AssetID id) const; + inline VkDrawIndexedIndirectCommand get_mesh_indexed_draw(Granite::AssetID id) const + { + if (id.id < draws.size()) + return draws[id.id]; + else + return {}; + } + + inline MeshEncoding get_mesh_encoding() const + { + return MeshEncoding::EncodedVBOAndIBO; + } + const Buffer *get_index_buffer() const; const Buffer *get_attribute_buffer() const; From 5c8c9d94c3aa415b1a9c2c5144a9e3655e00d0d5 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sun, 13 Aug 2023 17:13:28 +0200 Subject: [PATCH 63/71] Add decode to proper types. --- assets/shaders/decode/meshlet_decode.comp | 141 ++++++++++++++++++--- tests/assets/shaders/meshlet_debug.vert | 18 +-- tests/meshlet_viewer.cpp | 14 ++- vulkan/managers/resource_manager.cpp | 121 +++++++++++++----- vulkan/managers/resource_manager.hpp | 14 ++- vulkan/mesh/meshlet.cpp | 142 ++++++++++++++++++---- vulkan/mesh/meshlet.hpp | 25 +++- 7 files changed, 376 insertions(+), 99 deletions(-) diff --git a/assets/shaders/decode/meshlet_decode.comp b/assets/shaders/decode/meshlet_decode.comp index f2d0f0a8..1da0fcfe 100644 --- a/assets/shaders/decode/meshlet_decode.comp +++ b/assets/shaders/decode/meshlet_decode.comp @@ -13,28 +13,75 @@ layout(local_size_x = 32, local_size_y = MESHLET_PAYLOAD_WG_Y) in; layout(constant_id = 0) const uint NUM_U32_STREAMS = MESHLET_PAYLOAD_MAX_STREAMS; +layout(constant_id = 1) const uint NUM_OUTPUT_U32_STREAMS = 1; +layout(constant_id = 2) const bool RAW_PAYLOAD = false; #define MESHLET_PAYLOAD_NUM_U32_STREAMS NUM_U32_STREAMS #define MESHLET_PAYLOAD_DESCRIPTOR_SET 0 #define MESHLET_PAYLOAD_META_BINDING 0 #define MESHLET_PAYLOAD_STREAM_BINDING 1 -#define MESHLET_PAYLOAD_PAYLOAD_BINDING 4 +#define MESHLET_PAYLOAD_PAYLOAD_BINDING 2 #include "../inc/meshlet_payload_decode.h" +#include "../inc/meshlet_attribute_decode.h" -layout(set = 0, binding = 2, std430) writeonly buffer OutputAttributes -{ - uint data[]; -} output_payload; +const int MESH_STYLE = int(NUM_OUTPUT_U32_STREAMS); +const int MESH_STYLE_WIREFRAME = 0; +const int MESH_STYLE_UNTEXTURED = 1; +const int MESH_STYLE_TEXTURED = 2; +const int MESH_STYLE_SKINNED = 3; layout(set = 0, binding = 3, scalar) writeonly buffer OutputIndices { uvec3 data[]; -} output_indices; +} output_indices32; + +layout(set = 0, binding = 4, std430) writeonly buffer OutputStream0 +{ + uint data[]; +} output_stream_raw; + +layout(set = 0, binding = 4, scalar) writeonly buffer OutputStreamPos +{ + vec3 data[]; +} output_stream_pos; + +struct UntexturedAttr +{ + uint normal; +}; + +layout(set = 0, binding = 5, std430) writeonly buffer OutputStreamUntextured +{ + UntexturedAttr data[]; +} output_stream_untextured_attr; -layout(set = 0, binding = 5, std430) readonly buffer OutputOffsets +struct TexturedAttr +{ + uint normal; + uint tangent; + vec2 uv; +}; + +layout(set = 0, binding = 5, std430) writeonly buffer OutputStreamTextured +{ + TexturedAttr data[]; +} output_stream_textured_attr; + +layout(set = 0, binding = 6, std430) writeonly buffer OutputStreamSkin +{ + uvec2 data[]; +} output_stream_skin; + +layout(set = 0, binding = 7, std430) readonly buffer OutputOffsets { uvec2 data[]; } output_offset_strides; +uint pack_a2bgr10(vec4 v) +{ + ivec4 quantized = ivec4(round(clamp(v, vec4(-1.0), vec4(1.0)) * vec4(511.0, 511.0, 511.0, 1.0))) & ivec4(1023, 1023, 1023, 3); + return (quantized.a << 30) | (quantized.b << 20) | (quantized.g << 10) | (quantized.r << 0); +} + void main() { uint meshlet_index = gl_WorkGroupID.x; @@ -44,23 +91,85 @@ void main() #define INDEX(linear_index, packed_indices) { \ uvec3 indices = uvec4(unpack8(packed_indices)).xyz; \ indices += meta.base_vertex_offset; \ - uint output_offset = output_offset_strides.data[meshlet_index * NUM_U32_STREAMS].x; \ + uint output_offset; \ + if (RAW_PAYLOAD) \ + output_offset = output_offset_strides.data[meshlet_index * NUM_OUTPUT_U32_STREAMS].x; \ + else \ + output_offset = output_offset_strides.data[meshlet_index].x; \ if (linear_index <= uint(meta.num_primitives_minus_1)) \ - output_indices.data[output_offset + linear_index] = indices; \ + output_indices32.data[output_offset + linear_index] = indices; \ } + { + MESHLET_DECODE_STREAM_32(meshlet_index, 0, INDEX); + } + + if (RAW_PAYLOAD) + { #define ATTR(linear_index, packed_decoded) { \ - uvec2 output_offset_stride0 = output_offset_strides.data[meshlet_index * NUM_U32_STREAMS + i]; \ + uvec2 output_offset_stride0 = output_offset_strides.data[meshlet_index * NUM_OUTPUT_U32_STREAMS + i]; \ if (linear_index <= uint(meta.num_attributes_minus_1)) \ - output_payload.data[output_offset_stride0.x + linear_index * output_offset_stride0.y] = packed_decoded; \ + output_stream_raw.data[output_offset_stride0.x + linear_index * output_offset_stride0.y] = packed_decoded; \ } - { - MESHLET_DECODE_STREAM_32(meshlet_index, 0, INDEX); + for (uint i = 1; i < NUM_OUTPUT_U32_STREAMS; i++) + { + MESHLET_DECODE_STREAM_32(meshlet_index, i, ATTR); + } } - - for (uint i = 1; i < NUM_U32_STREAMS; i++) + else { - MESHLET_DECODE_STREAM_32(meshlet_index, i, ATTR); + uint output_offset = output_offset_strides.data[meshlet_index].y; + +#define POS(linear_index, packed_decoded) { \ + if (linear_index <= uint(meta.num_attributes_minus_1)) \ + output_stream_pos.data[output_offset + linear_index] = attribute_decode_snorm_exp_position(packed_decoded); \ +} + +#define NORMAL(linear_index, packed_decoded) { \ + if (linear_index <= uint(meta.num_attributes_minus_1)) { \ + if (MESH_STYLE >= MESH_STYLE_TEXTURED) \ + output_stream_textured_attr.data[output_offset + linear_index].normal = pack_a2bgr10(attribute_decode_oct8_normal_tangent(packed_decoded)); \ + else \ + output_stream_untextured_attr.data[output_offset + linear_index].normal = pack_a2bgr10(attribute_decode_oct8_normal_tangent(packed_decoded)); \ + } \ +} + +#define TANGENT(linear_index, packed_decoded) { \ + if (linear_index <= uint(meta.num_attributes_minus_1)) { \ + output_stream_textured_attr.data[output_offset + linear_index].tangent = pack_a2bgr10(attribute_decode_oct8_normal_tangent(packed_decoded)); \ + } \ +} + +#define UV(linear_index, packed_decoded) { \ + if (linear_index <= uint(meta.num_attributes_minus_1)) { \ + output_stream_textured_attr.data[output_offset + linear_index].uv = attribute_decode_snorm_exp_uv(packed_decoded); \ + } \ +} + +#define SKIN(linear_index, packed_decoded) { \ + if (linear_index <= uint(meta.num_attributes_minus_1)) { \ + output_stream_skin.data[output_offset + linear_index] = packed_decoded; \ + } \ +} + { + MESHLET_DECODE_STREAM_64(meshlet_index, 1, POS); + } + + if (MESH_STYLE >= MESH_STYLE_UNTEXTURED) + { + MESHLET_DECODE_STREAM_32(meshlet_index, 3, NORMAL); + } + + if (MESH_STYLE >= MESH_STYLE_TEXTURED) + { + MESHLET_DECODE_STREAM_32(meshlet_index, 4, TANGENT); + MESHLET_DECODE_STREAM_64(meshlet_index, 5, UV); + } + + if (MESH_STYLE >= MESH_STYLE_SKINNED) + { + MESHLET_DECODE_STREAM_64(meshlet_index, 7, SKIN); + } } } diff --git a/tests/assets/shaders/meshlet_debug.vert b/tests/assets/shaders/meshlet_debug.vert index 582bd202..e0708815 100644 --- a/tests/assets/shaders/meshlet_debug.vert +++ b/tests/assets/shaders/meshlet_debug.vert @@ -1,13 +1,14 @@ #version 450 -layout(location = 0) in uvec4 ATTR0; -layout(location = 1) in uvec2 ATTR1; +layout(location = 0) in vec3 POS; +layout(location = 1) in mediump vec3 N; +layout(location = 2) in mediump vec4 T; +layout(location = 3) in vec2 UV; + layout(location = 0) out mediump vec3 vNormal; layout(location = 1) out mediump vec4 vTangent; layout(location = 2) out vec2 vUV; -#include "meshlet_attribute_decode.h" - layout(set = 1, binding = 0) uniform UBO { mat4 VP; @@ -15,9 +16,8 @@ layout(set = 1, binding = 0) uniform UBO void main() { - vec3 pos = attribute_decode_snorm_exp_position(ATTR0.xy); - vNormal = attribute_decode_oct8_normal_tangent(ATTR0.z).xyz; - vTangent = attribute_decode_oct8_normal_tangent(ATTR0.w); - vUV = attribute_decode_snorm_exp_uv(ATTR1); - gl_Position = VP * vec4(pos, 1.0); + vNormal = N; + vTangent = T; + vUV = UV; + gl_Position = VP * vec4(POS, 1.0); } diff --git a/tests/meshlet_viewer.cpp b/tests/meshlet_viewer.cpp index ec022453..ab2ead29 100644 --- a/tests/meshlet_viewer.cpp +++ b/tests/meshlet_viewer.cpp @@ -94,14 +94,18 @@ struct MeshletViewerApplication : Granite::Application, Granite::EventHandler cmd->draw_mesh_tasks(header.meshlet_count, 1, 1); #else auto *ibo = device.get_resource_manager().get_index_buffer(); - auto *vbo = device.get_resource_manager().get_attribute_buffer(); - if (ibo && vbo) + auto *pos = device.get_resource_manager().get_position_buffer(); + auto *attr = device.get_resource_manager().get_attribute_buffer(); + if (ibo && pos && attr) { cmd->set_program("assets://shaders/meshlet_debug.vert", "assets://shaders/meshlet_debug.frag"); cmd->set_index_buffer(*ibo, 0, VK_INDEX_TYPE_UINT32); - cmd->set_vertex_binding(0, *vbo, 0, 6 * sizeof(uint32_t)); - cmd->set_vertex_attrib(0, 0, VK_FORMAT_R32G32B32A32_UINT, 0); - cmd->set_vertex_attrib(1, 0, VK_FORMAT_R32G32_UINT, 4 * sizeof(uint32_t)); + cmd->set_vertex_binding(0, *pos, 0, 12); + cmd->set_vertex_binding(1, *attr, 0, 16); + cmd->set_vertex_attrib(0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0); + cmd->set_vertex_attrib(1, 1, VK_FORMAT_A2B10G10R10_SNORM_PACK32, 0); + cmd->set_vertex_attrib(2, 1, VK_FORMAT_A2B10G10R10_SNORM_PACK32, 4); + cmd->set_vertex_attrib(3, 1, VK_FORMAT_R32G32_SFLOAT, 8); auto draw = device.get_resource_manager().get_mesh_indexed_draw(mesh_id); cmd->draw_indexed(draw.indexCount, draw.instanceCount, draw.firstIndex, draw.vertexOffset, diff --git a/vulkan/managers/resource_manager.cpp b/vulkan/managers/resource_manager.cpp index 5637006a..a22c0263 100644 --- a/vulkan/managers/resource_manager.cpp +++ b/vulkan/managers/resource_manager.cpp @@ -38,8 +38,11 @@ ResourceManager::ResourceManager(Device *device_) , attribute_buffer_allocator(*device_) { // Simplified style. - index_buffer_allocator.set_element_size(sizeof(uint32_t) * 3); - attribute_buffer_allocator.set_element_size(sizeof(float) * 3 + sizeof(float) * 2 + sizeof(uint32_t) * 2); + index_buffer_allocator.set_element_size(0, sizeof(uint32_t) * 3); + attribute_buffer_allocator.set_soa_count(3); + attribute_buffer_allocator.set_element_size(0, sizeof(float) * 3); + attribute_buffer_allocator.set_element_size(1, sizeof(float) * 2 + sizeof(uint32_t) * 2); + attribute_buffer_allocator.set_element_size(2, sizeof(uint32_t) * 2); assets.reserve(Granite::AssetID::MaxIDs); } @@ -364,11 +367,24 @@ void ResourceManager::instantiate_asset_mesh(Granite::AssetManager &manager_, buf.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; auto payload = device->create_buffer(buf, view.payload); - Meshlet::decode_mesh(*cmd, *index_buffer_allocator.get_buffer(0), - asset.mesh.index.offset * index_buffer_allocator.get_element_size(), - *attribute_buffer_allocator.get_buffer(0), - asset.mesh.attr.offset * attribute_buffer_allocator.get_element_size(), - *payload, 0, view); + Meshlet::DecodeInfo info = {}; + info.target_style = Meshlet::MeshStyle::Textured; + info.ibo = { + index_buffer_allocator.get_buffer(0, 0), + asset.mesh.index.offset * index_buffer_allocator.get_element_size(0), + }; + + for (unsigned i = 0; i < 3; i++) + { + info.streams[i] = { + attribute_buffer_allocator.get_buffer(0, i), + asset.mesh.index.offset * attribute_buffer_allocator.get_element_size(i), + }; + } + + info.payload = { payload.get(), 0 }; + + Meshlet::decode_mesh(*cmd, info, view); Semaphore sem[2]; device->submit(cmd, nullptr, 2, sem); @@ -381,8 +397,13 @@ void ResourceManager::instantiate_asset_mesh(Granite::AssetManager &manager_, } uint64_t cost = 0; - cost += view.total_primitives * index_buffer_allocator.get_element_size(); - cost += view.total_vertices * attribute_buffer_allocator.get_element_size(); + if (ret) + { + cost += view.total_primitives * index_buffer_allocator.get_element_size(0); + cost += view.total_vertices * attribute_buffer_allocator.get_element_size(0); + cost += view.total_vertices * attribute_buffer_allocator.get_element_size(1); + cost += view.total_vertices * attribute_buffer_allocator.get_element_size(2); + } std::lock_guard holder{lock}; updates.push_back(id); @@ -484,12 +505,22 @@ void ResourceManager::latch_handles() const Buffer *ResourceManager::get_index_buffer() const { - return index_buffer_allocator.get_buffer(0); + return index_buffer_allocator.get_buffer(0, 0); +} + +const Buffer *ResourceManager::get_position_buffer() const +{ + return attribute_buffer_allocator.get_buffer(0, 0); } const Buffer *ResourceManager::get_attribute_buffer() const { - return attribute_buffer_allocator.get_buffer(0); + return attribute_buffer_allocator.get_buffer(0, 1); +} + +const Buffer *ResourceManager::get_skinning_buffer() const +{ + return attribute_buffer_allocator.get_buffer(0, 2); } MeshBufferAllocator::MeshBufferAllocator(Device &device) @@ -509,18 +540,29 @@ MeshBufferAllocator::MeshBufferAllocator(Device &device) alloc.set_object_pool(&object_pool); } -void MeshBufferAllocator::set_element_size(uint32_t element_size) +void MeshBufferAllocator::set_soa_count(unsigned soa_count) +{ + VK_ASSERT(soa_count <= Internal::MeshGlobalAllocator::MaxSoACount); + global_allocator.soa_count = soa_count; +} + +void MeshBufferAllocator::set_element_size(unsigned soa_index, uint32_t element_size) { - global_allocator.element_size = element_size; + VK_ASSERT(soa_index < global_allocator.soa_count); + global_allocator.element_size[soa_index] = element_size; } -uint32_t MeshBufferAllocator::get_element_size() const +uint32_t MeshBufferAllocator::get_element_size(unsigned soa_index) const { - return global_allocator.element_size; + VK_ASSERT(soa_index < global_allocator.soa_count); + return global_allocator.element_size[soa_index]; } -const Buffer *MeshBufferAllocator::get_buffer(unsigned index) const +const Buffer *MeshBufferAllocator::get_buffer(unsigned index, unsigned soa_index) const { + VK_ASSERT(soa_index < global_allocator.soa_count); + index = index * global_allocator.soa_count + soa_index; + if (index < global_allocator.global_buffers.size()) return global_allocator.global_buffers[index].get(); else @@ -532,35 +574,48 @@ namespace Internal uint32_t MeshGlobalAllocator::allocate(uint32_t count) { BufferCreateInfo info = {}; - info.size = VkDeviceSize(count) * element_size; - info.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; - info.domain = BufferDomain::Device; - auto buf = device.create_buffer(info); - - for (uint32_t i = 0, n = global_buffers.size(); i < n; i++) + + uint32_t target_index = UINT32_MAX; + uint32_t search_index = 0; + + for (uint32_t i = 0, n = global_buffers.size(); i < n; i += soa_count, search_index++) { if (!global_buffers[i]) { - global_buffers[i] = std::move(buf); - return i; + target_index = search_index; + break; } } - // For now, have one global buffer for VBO / IBO. - if (!global_buffers.empty()) - return UINT32_MAX; + if (target_index == UINT32_MAX) + { + if (!global_buffers.empty()) + return UINT32_MAX; + + target_index = search_index; + for (uint32_t i = 0; i < soa_count; i++) + global_buffers.emplace_back(); + } + + for (uint32_t soa_index = 0; soa_index < soa_count; soa_index++) + { + info.size = VkDeviceSize(count) * element_size[soa_index]; + info.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + info.domain = BufferDomain::Device; + global_buffers[target_index * soa_count + soa_index] = device.create_buffer(info); + } - uint32_t ret = global_buffers.size(); - global_buffers.push_back(std::move(buf)); - return ret; + return target_index; } void MeshGlobalAllocator::free(uint32_t index) { + index *= soa_count; VK_ASSERT(index < global_buffers.size()); - global_buffers[index].reset(); + for (uint32_t i = 0; i < soa_count; i++) + global_buffers[index + i].reset(); } MeshGlobalAllocator::MeshGlobalAllocator(Device &device_) diff --git a/vulkan/managers/resource_manager.hpp b/vulkan/managers/resource_manager.hpp index f4b40147..3c47586f 100644 --- a/vulkan/managers/resource_manager.hpp +++ b/vulkan/managers/resource_manager.hpp @@ -55,8 +55,11 @@ struct MeshGlobalAllocator uint32_t allocate(uint32_t count); void free(uint32_t index); + enum { MaxSoACount = 3 }; // Position, attribute, skinning. + Device &device; - uint32_t element_size = 0; + uint32_t element_size[MaxSoACount] = {}; + uint32_t soa_count = 1; Util::SmallVector global_buffers; }; @@ -79,10 +82,11 @@ class MeshBufferAllocator explicit MeshBufferAllocator(Device &device); bool allocate(uint32_t count, Internal::AllocatedSlice *slice); void free(const Internal::AllocatedSlice &slice); - void set_element_size(uint32_t element_size); - uint32_t get_element_size() const; + void set_soa_count(unsigned soa_count); + void set_element_size(unsigned soa_index, uint32_t element_size); + uint32_t get_element_size(unsigned soa_index) const; - const Buffer *get_buffer(unsigned index) const; + const Buffer *get_buffer(unsigned index, unsigned soa_index) const; private: Util::ObjectPool> object_pool; @@ -128,7 +132,9 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface } const Buffer *get_index_buffer() const; + const Buffer *get_position_buffer() const; const Buffer *get_attribute_buffer() const; + const Buffer *get_skinning_buffer() const; private: Device *device; diff --git a/vulkan/mesh/meshlet.cpp b/vulkan/mesh/meshlet.cpp index 4146339f..792a6a2a 100644 --- a/vulkan/mesh/meshlet.cpp +++ b/vulkan/mesh/meshlet.cpp @@ -85,11 +85,7 @@ MeshView create_mesh_view(const Granite::FileMapping &mapping) return view; } -bool decode_mesh(CommandBuffer &cmd, - const Buffer &ibo, uint64_t ibo_offset, - const Buffer &vbo, uint64_t vbo_offset, - const Buffer &payload, uint64_t payload_offset, - const MeshView &view) +bool decode_mesh(CommandBuffer &cmd, const DecodeInfo &info, const MeshView &view) { // TODO: Implement LDS fallback. if (!cmd.get_device().supports_subgroup_size_log2(true, 5, 5)) @@ -98,7 +94,17 @@ bool decode_mesh(CommandBuffer &cmd, return false; } - const uint32_t u32_stride = view.format_header->u32_stream_count - 1; + if (!info.streams[0].buffer) + { + LOGE("Decode stream 0 must be set.\n"); + return false; + } + + if (!info.ibo.buffer) + { + LOGE("Output IBO must be set.\n"); + return false; + } BufferCreateInfo buf_info = {}; buf_info.domain = BufferDomain::LinkedDeviceHost; @@ -110,22 +116,10 @@ bool decode_mesh(CommandBuffer &cmd, buf_info.size = view.format_header->meshlet_count * view.format_header->u32_stream_count * sizeof(*view.streams); auto meshlet_stream_buffer = cmd.get_device().create_buffer(buf_info, view.streams); - struct OffsetStride { uint32_t offset, stride; }; - std::vector output_offset_strides; - output_offset_strides.reserve(view.format_header->meshlet_count * view.format_header->u32_stream_count); - - uint32_t index_count = 0; - for (uint32_t i = 0; i < view.format_header->meshlet_count; i++) - { - output_offset_strides.push_back({ index_count, 0 }); - index_count += view.headers[i].num_primitives_minus_1 + 1; - for (uint32_t j = 1; j < view.format_header->u32_stream_count; j++) - output_offset_strides.push_back({ view.headers[i].base_vertex_offset * u32_stride + (j - 1), u32_stride }); - } - - buf_info.domain = BufferDomain::LinkedDeviceHost; - buf_info.size = output_offset_strides.size() * sizeof(OffsetStride); - auto output_offset_strides_buffer = cmd.get_device().create_buffer(buf_info, output_offset_strides.data()); + // For Raw mode -> offset/stride + // For typed mode -> index offset / vertex offset + struct DecodeOffset { uint32_t arg0, arg1; }; + std::vector decode_offsets; cmd.set_program("builtin://shaders/decode/meshlet_decode.comp"); cmd.enable_subgroup_size_control(true); @@ -133,12 +127,106 @@ bool decode_mesh(CommandBuffer &cmd, cmd.set_storage_buffer(0, 0, *meshlet_meta_buffer); cmd.set_storage_buffer(0, 1, *meshlet_stream_buffer); - cmd.set_storage_buffer(0, 2, vbo, vbo_offset, view.total_vertices * u32_stride * sizeof(uint32_t)); - cmd.set_storage_buffer(0, 3, ibo, ibo_offset, view.total_primitives * 3 * sizeof(uint32_t)); - cmd.set_storage_buffer(0, 4, payload, payload_offset, view.format_header->payload_size_words * sizeof(uint32_t)); - cmd.set_storage_buffer(0, 5, *output_offset_strides_buffer); - cmd.set_specialization_constant_mask(1); + cmd.set_storage_buffer(0, 2, *info.payload.buffer, + info.payload.offset, + view.format_header->payload_size_words * sizeof(uint32_t)); + cmd.set_storage_buffer(0, 3, *info.ibo.buffer, info.ibo.offset, view.total_primitives * sizeof(uint32_t) * 3); + + cmd.set_specialization_constant_mask(0x7); cmd.set_specialization_constant(0, view.format_header->u32_stream_count); + cmd.set_specialization_constant(2, (info.flags & DECODE_MODE_RAW_PAYLOAD) != 0); + + if ((info.flags & DECODE_MODE_RAW_PAYLOAD) != 0) + { + uint32_t output_u32_streams; + switch (info.target_style) + { + case MeshStyle::Wireframe: + output_u32_streams = 2; + break; + + case MeshStyle::Untextured: + output_u32_streams = 3; + break; + + case MeshStyle::Textured: + output_u32_streams = 6; + break; + + case MeshStyle::Skinned: + output_u32_streams = 8; + break; + + default: + return false; + } + + if (output_u32_streams + 1 > view.format_header->u32_stream_count) + { + LOGE("Trying to decode more streams than exist in payload.\n"); + return false; + } + + for (unsigned i = 0; i < 3; i++) + { + cmd.set_storage_buffer(0, 4 + i, *info.streams[0].buffer, info.streams[0].offset, + view.total_vertices * output_u32_streams * sizeof(uint32_t)); + } + + decode_offsets.reserve(view.format_header->meshlet_count * (output_u32_streams + 1)); + uint32_t index_count = 0; + + for (uint32_t i = 0; i < view.format_header->meshlet_count; i++) + { + decode_offsets.push_back({ index_count, 0 }); + index_count += view.headers[i].num_primitives_minus_1 + 1; + for (uint32_t j = 0; j < output_u32_streams; j++) + decode_offsets.push_back({ view.headers[i].base_vertex_offset * output_u32_streams + j, output_u32_streams }); + } + + cmd.set_specialization_constant(1, output_u32_streams + 1); + } + else + { + for (unsigned i = 0; i < 3; i++) + cmd.set_storage_buffer(0, 4 + i, *info.streams[0].buffer); + + switch (info.target_style) + { + case MeshStyle::Skinned: + cmd.set_storage_buffer(0, 6, *info.streams[2].buffer, info.streams[2].offset, + view.total_vertices * sizeof(uint32_t) * 2); + // Fallthrough + case MeshStyle::Untextured: + case MeshStyle::Textured: + cmd.set_storage_buffer(0, 5, *info.streams[1].buffer, info.streams[1].offset, + view.total_vertices * sizeof(uint32_t) * + (info.target_style == MeshStyle::Textured ? 4 : 1)); + // Fallthrough + case MeshStyle::Wireframe: + cmd.set_storage_buffer(0, 4, *info.streams[0].buffer, info.streams[0].offset, + view.total_vertices * sizeof(float) * 3); + break; + + default: + return false; + } + + decode_offsets.reserve(view.format_header->meshlet_count); + uint32_t index_count = 0; + for (uint32_t i = 0; i < view.format_header->meshlet_count; i++) + { + decode_offsets.push_back({ index_count, view.headers[i].base_vertex_offset }); + index_count += view.headers[i].num_primitives_minus_1 + 1; + } + cmd.set_specialization_constant(1, uint32_t(info.target_style)); + } + + buf_info.domain = BufferDomain::LinkedDeviceHost; + buf_info.size = decode_offsets.size() * sizeof(DecodeOffset); + auto output_offset_strides_buffer = cmd.get_device().create_buffer(buf_info, decode_offsets.data()); + + cmd.set_storage_buffer(0, 7, *output_offset_strides_buffer); // TODO: Split dispatches for big chungus meshes. // (Starts to become a problem around 8-16 million primitives per dispatch). diff --git a/vulkan/mesh/meshlet.hpp b/vulkan/mesh/meshlet.hpp index 8c3a49cd..315a055a 100644 --- a/vulkan/mesh/meshlet.hpp +++ b/vulkan/mesh/meshlet.hpp @@ -109,10 +109,25 @@ static const char magic[8] = { 'M', 'E', 'S', 'H', 'L', 'E', 'T', '1' }; MeshView create_mesh_view(const Granite::FileMapping &mapping); -bool decode_mesh(Vulkan::CommandBuffer &cmd, - const Vulkan::Buffer &ibo, uint64_t ibo_offset, - const Vulkan::Buffer &vbo, uint64_t vbo_offset, - const Vulkan::Buffer &payload, uint64_t payload_offset, - const MeshView &view); +struct DecodeBuffer +{ + const Vulkan::Buffer *buffer; + uint64_t offset; +}; + +enum DecodeModeFlagBits : uint32_t +{ + DECODE_MODE_RAW_PAYLOAD = 1 << 0, +}; +using DecodeModeFlags = uint32_t; + +struct DecodeInfo +{ + DecodeBuffer ibo, streams[3], payload; + DecodeModeFlags flags; + MeshStyle target_style; +}; + +bool decode_mesh(Vulkan::CommandBuffer &cmd, const DecodeInfo &decode_info, const MeshView &view); } } From b4308eda80c8375486a226d10a72157a9d7040bd Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sun, 13 Aug 2023 17:20:14 +0200 Subject: [PATCH 64/71] Fix build. --- tests/meshopt_sandbox.cpp | 12 +++++++++--- vulkan/mesh/meshlet.cpp | 1 + 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp index 10252525..57152f4f 100644 --- a/tests/meshopt_sandbox.cpp +++ b/tests/meshopt_sandbox.cpp @@ -130,9 +130,15 @@ static void decode_mesh_gpu( dev.begin_renderdoc_capture(); auto cmd = dev.request_command_buffer(); - decode_mesh(*cmd, *readback_decoded_index_buffer, 0, - *readback_decoded_u32_buffer, 0, - *payload_buffer, 0, mesh); + + DecodeInfo info = {}; + info.ibo.buffer = readback_decoded_index_buffer.get(); + info.streams[0].buffer = readback_decoded_u32_buffer.get(); + info.target_style = mesh.format_header->style; + info.payload.buffer = payload_buffer.get(); + info.flags = DECODE_MODE_RAW_PAYLOAD; + + decode_mesh(*cmd, info, mesh); cmd->barrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, VK_PIPELINE_STAGE_HOST_BIT, VK_ACCESS_HOST_READ_BIT); dev.submit(cmd); diff --git a/vulkan/mesh/meshlet.cpp b/vulkan/mesh/meshlet.cpp index 792a6a2a..7cb9edbd 100644 --- a/vulkan/mesh/meshlet.cpp +++ b/vulkan/mesh/meshlet.cpp @@ -238,6 +238,7 @@ bool decode_mesh(CommandBuffer &cmd, const DecodeInfo &info, const MeshView &vie cmd.dispatch(view.format_header->meshlet_count, 1, 1); cmd.set_specialization_constant_mask(0); + cmd.enable_subgroup_size_control(false); return true; } } From 131afb2605a371021969d7422a1c63a347dc7fae Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sat, 7 Oct 2023 16:39:46 +0200 Subject: [PATCH 65/71] Fix asset freeing. --- vulkan/managers/resource_manager.cpp | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/vulkan/managers/resource_manager.cpp b/vulkan/managers/resource_manager.cpp index a22c0263..3677ce79 100644 --- a/vulkan/managers/resource_manager.cpp +++ b/vulkan/managers/resource_manager.cpp @@ -81,20 +81,10 @@ void ResourceManager::release_asset(Granite::AssetID id) if (id) { std::unique_lock holder{lock}; + VK_ASSERT(id.id < assets.size()); auto &asset = assets[id.id]; asset.latchable = false; - if (asset.asset_class == Granite::AssetClass::Mesh) - { - if (asset.mesh.index.count) - { - std::lock_guard holder_alloc{mesh_allocator_lock}; - index_buffer_allocator.free(asset.mesh.index); - attribute_buffer_allocator.free(asset.mesh.attr); - asset.mesh = {}; - } - } - else - asset.image.reset(); + updates.push_back(id); } } @@ -476,6 +466,16 @@ void ResourceManager::latch_handles() if (asset.asset_class == Granite::AssetClass::Mesh) { + if (!asset.latchable) + { + { + std::lock_guard holder_alloc{mesh_allocator_lock}; + index_buffer_allocator.free(asset.mesh.index); + attribute_buffer_allocator.free(asset.mesh.attr); + } + asset.mesh = {}; + } + auto &d = draws[update.id]; d.firstIndex = asset.mesh.index.offset * 3; d.indexCount = asset.mesh.index.count * 3; @@ -486,6 +486,8 @@ void ResourceManager::latch_handles() else { const ImageView *view; + if (!asset.latchable) + asset.image.reset(); if (asset.image) { From a07560d7917772103447249359963014e80f144a Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sat, 7 Oct 2023 17:12:31 +0200 Subject: [PATCH 66/71] Add indirect decode buffer as well. --- vulkan/managers/resource_manager.cpp | 12 +++++++++++- vulkan/managers/resource_manager.hpp | 4 +++- vulkan/mesh/meshlet.hpp | 2 +- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/vulkan/managers/resource_manager.cpp b/vulkan/managers/resource_manager.cpp index 3677ce79..0569a6f3 100644 --- a/vulkan/managers/resource_manager.cpp +++ b/vulkan/managers/resource_manager.cpp @@ -36,6 +36,7 @@ ResourceManager::ResourceManager(Device *device_) : device(device_) , index_buffer_allocator(*device_) , attribute_buffer_allocator(*device_) + , indirect_buffer_allocator(*device_) { // Simplified style. index_buffer_allocator.set_element_size(0, sizeof(uint32_t) * 3); @@ -43,6 +44,7 @@ ResourceManager::ResourceManager(Device *device_) attribute_buffer_allocator.set_element_size(0, sizeof(float) * 3); attribute_buffer_allocator.set_element_size(1, sizeof(float) * 2 + sizeof(uint32_t) * 2); attribute_buffer_allocator.set_element_size(2, sizeof(uint32_t) * 2); + indirect_buffer_allocator.set_element_size(0, sizeof(VkDrawIndexedIndirectCommand)); assets.reserve(Granite::AssetID::MaxIDs); } @@ -393,6 +395,7 @@ void ResourceManager::instantiate_asset_mesh(Granite::AssetManager &manager_, cost += view.total_vertices * attribute_buffer_allocator.get_element_size(0); cost += view.total_vertices * attribute_buffer_allocator.get_element_size(1); cost += view.total_vertices * attribute_buffer_allocator.get_element_size(2); + cost += view.format_header->meshlet_count * indirect_buffer_allocator.get_element_size(0); } std::lock_guard holder{lock}; @@ -472,6 +475,7 @@ void ResourceManager::latch_handles() std::lock_guard holder_alloc{mesh_allocator_lock}; index_buffer_allocator.free(asset.mesh.index); attribute_buffer_allocator.free(asset.mesh.attr); + indirect_buffer_allocator.free(asset.mesh.indirect); } asset.mesh = {}; } @@ -525,6 +529,11 @@ const Buffer *ResourceManager::get_skinning_buffer() const return attribute_buffer_allocator.get_buffer(0, 2); } +const Buffer *ResourceManager::get_indirect_buffer() const +{ + return indirect_buffer_allocator.get_buffer(0, 0); +} + MeshBufferAllocator::MeshBufferAllocator(Device &device) : global_allocator(device) { @@ -604,7 +613,8 @@ uint32_t MeshGlobalAllocator::allocate(uint32_t count) info.size = VkDeviceSize(count) * element_size[soa_index]; info.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; info.domain = BufferDomain::Device; global_buffers[target_index * soa_count + soa_index] = device.create_buffer(info); } diff --git a/vulkan/managers/resource_manager.hpp b/vulkan/managers/resource_manager.hpp index 3c47586f..92593d43 100644 --- a/vulkan/managers/resource_manager.hpp +++ b/vulkan/managers/resource_manager.hpp @@ -135,6 +135,7 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface const Buffer *get_position_buffer() const; const Buffer *get_attribute_buffer() const; const Buffer *get_skinning_buffer() const; + const Buffer *get_indirect_buffer() const; private: Device *device; @@ -153,7 +154,7 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface ImageHandle image; struct { - Internal::AllocatedSlice index, attr; + Internal::AllocatedSlice index, attr, indirect; } mesh; Granite::AssetClass asset_class = Granite::AssetClass::ImageZeroable; bool latchable = false; @@ -184,6 +185,7 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface std::mutex mesh_allocator_lock; MeshBufferAllocator index_buffer_allocator; MeshBufferAllocator attribute_buffer_allocator; + MeshBufferAllocator indirect_buffer_allocator; bool allocate_asset_mesh(Granite::AssetID id, const Meshlet::MeshView &view); }; diff --git a/vulkan/mesh/meshlet.hpp b/vulkan/mesh/meshlet.hpp index 315a055a..6b985b97 100644 --- a/vulkan/mesh/meshlet.hpp +++ b/vulkan/mesh/meshlet.hpp @@ -123,7 +123,7 @@ using DecodeModeFlags = uint32_t; struct DecodeInfo { - DecodeBuffer ibo, streams[3], payload; + DecodeBuffer ibo, streams[3], indirect, payload; DecodeModeFlags flags; MeshStyle target_style; }; From 1b5030ae32ce3571996eac7efdb1b0b5ad4c74d1 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sat, 2 Dec 2023 15:13:21 +0100 Subject: [PATCH 67/71] Decode to indirect buffers. --- assets/shaders/decode/meshlet_decode.comp | 54 ++++++++++++++++++++--- tests/meshlet_viewer.cpp | 14 ++++-- tests/meshopt_sandbox.cpp | 6 +-- vulkan/context.cpp | 2 + vulkan/managers/resource_manager.cpp | 52 ++++++++++++---------- vulkan/managers/resource_manager.hpp | 13 ++++-- vulkan/mesh/meshlet.cpp | 34 +++++++------- vulkan/mesh/meshlet.hpp | 15 ++++--- 8 files changed, 125 insertions(+), 65 deletions(-) diff --git a/assets/shaders/decode/meshlet_decode.comp b/assets/shaders/decode/meshlet_decode.comp index 1da0fcfe..288f448f 100644 --- a/assets/shaders/decode/meshlet_decode.comp +++ b/assets/shaders/decode/meshlet_decode.comp @@ -34,6 +34,11 @@ layout(set = 0, binding = 3, scalar) writeonly buffer OutputIndices uvec3 data[]; } output_indices32; +layout(set = 0, binding = 3, scalar) writeonly buffer OutputIndices8 +{ + u8vec3 data[]; +} output_indices8; + layout(set = 0, binding = 4, std430) writeonly buffer OutputStream0 { uint data[]; @@ -76,6 +81,27 @@ layout(set = 0, binding = 7, std430) readonly buffer OutputOffsets uvec2 data[]; } output_offset_strides; +struct IndirectIndexedDraw +{ + uint indexCount; + uint instanceCount; + uint firstIndex; + uint vertexOffset; + uint firstInstance; +}; + +layout(set = 0, binding = 8, std430) writeonly buffer IndirectCommands +{ + IndirectIndexedDraw draws[]; +} indirect_commands; + +layout(push_constant, std430) uniform Registers +{ + uint primitive_offset; + uint vertex_offset; + uint meshlet_offset; +} registers; + uint pack_a2bgr10(vec4 v) { ivec4 quantized = ivec4(round(clamp(v, vec4(-1.0), vec4(1.0)) * vec4(511.0, 511.0, 511.0, 1.0))) & ivec4(1023, 1023, 1023, 3); @@ -88,16 +114,32 @@ void main() meshlet_init_workgroup(meshlet_index); MeshletMeta meta = meshlet_metas.data[meshlet_index]; + if (!RAW_PAYLOAD) + { + IndirectIndexedDraw draw; + draw.indexCount = 3 * (meta.num_primitives_minus_1 + 1); + draw.instanceCount = 1; + draw.vertexOffset = meta.base_vertex_offset + registers.vertex_offset; + draw.firstIndex = 3 * (output_offset_strides.data[meshlet_index].x + registers.primitive_offset); + draw.firstInstance = 0; + indirect_commands.draws[meshlet_index + registers.meshlet_offset] = draw; + } + #define INDEX(linear_index, packed_indices) { \ - uvec3 indices = uvec4(unpack8(packed_indices)).xyz; \ - indices += meta.base_vertex_offset; \ uint output_offset; \ - if (RAW_PAYLOAD) \ + if (RAW_PAYLOAD) { \ + uvec3 indices = uvec4(unpack8(packed_indices)).xyz; \ + indices += meta.base_vertex_offset + registers.vertex_offset; \ output_offset = output_offset_strides.data[meshlet_index * NUM_OUTPUT_U32_STREAMS].x; \ - else \ + output_offset += registers.primitive_offset; \ + if (linear_index <= uint(meta.num_primitives_minus_1)) \ + output_indices32.data[output_offset + linear_index] = indices; \ + } else { \ output_offset = output_offset_strides.data[meshlet_index].x; \ - if (linear_index <= uint(meta.num_primitives_minus_1)) \ - output_indices32.data[output_offset + linear_index] = indices; \ + output_offset += registers.primitive_offset; \ + if (linear_index <= uint(meta.num_primitives_minus_1)) \ + output_indices8.data[output_offset + linear_index] = unpack8(packed_indices).xyz; \ + } \ } { diff --git a/tests/meshlet_viewer.cpp b/tests/meshlet_viewer.cpp index ab2ead29..ee32c0e3 100644 --- a/tests/meshlet_viewer.cpp +++ b/tests/meshlet_viewer.cpp @@ -96,10 +96,12 @@ struct MeshletViewerApplication : Granite::Application, Granite::EventHandler auto *ibo = device.get_resource_manager().get_index_buffer(); auto *pos = device.get_resource_manager().get_position_buffer(); auto *attr = device.get_resource_manager().get_attribute_buffer(); + auto *indirect = device.get_resource_manager().get_indirect_buffer(); + if (ibo && pos && attr) { cmd->set_program("assets://shaders/meshlet_debug.vert", "assets://shaders/meshlet_debug.frag"); - cmd->set_index_buffer(*ibo, 0, VK_INDEX_TYPE_UINT32); + cmd->set_index_buffer(*ibo, 0, VK_INDEX_TYPE_UINT8_EXT); cmd->set_vertex_binding(0, *pos, 0, 12); cmd->set_vertex_binding(1, *attr, 0, 16); cmd->set_vertex_attrib(0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0); @@ -107,9 +109,13 @@ struct MeshletViewerApplication : Granite::Application, Granite::EventHandler cmd->set_vertex_attrib(2, 1, VK_FORMAT_A2B10G10R10_SNORM_PACK32, 4); cmd->set_vertex_attrib(3, 1, VK_FORMAT_R32G32_SFLOAT, 8); - auto draw = device.get_resource_manager().get_mesh_indexed_draw(mesh_id); - cmd->draw_indexed(draw.indexCount, draw.instanceCount, draw.firstIndex, draw.vertexOffset, - draw.firstInstance); + auto draw = device.get_resource_manager().get_mesh_indexed_indirect_draw(mesh_id); + if (draw.count) + { + cmd->draw_indexed_indirect(*indirect, + draw.offset * sizeof(VkDrawIndexedIndirectCommand), + draw.count, sizeof(VkDrawIndexedIndirectCommand)); + } } #endif diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp index 57152f4f..84180e1e 100644 --- a/tests/meshopt_sandbox.cpp +++ b/tests/meshopt_sandbox.cpp @@ -132,10 +132,10 @@ static void decode_mesh_gpu( auto cmd = dev.request_command_buffer(); DecodeInfo info = {}; - info.ibo.buffer = readback_decoded_index_buffer.get(); - info.streams[0].buffer = readback_decoded_u32_buffer.get(); + info.ibo = readback_decoded_index_buffer.get(); + info.streams[0] = readback_decoded_u32_buffer.get(); info.target_style = mesh.format_header->style; - info.payload.buffer = payload_buffer.get(); + info.payload = payload_buffer.get(); info.flags = DECODE_MODE_RAW_PAYLOAD; decode_mesh(*cmd, info, mesh); diff --git a/vulkan/context.cpp b/vulkan/context.cpp index a5690eaf..ccb0170c 100644 --- a/vulkan/context.cpp +++ b/vulkan/context.cpp @@ -1602,6 +1602,8 @@ bool Context::create_device(VkPhysicalDevice gpu_, VkSurfaceKHR surface, enabled_features.shaderStorageImageWriteWithoutFormat = VK_TRUE; if (pdf2.features.shaderStorageImageReadWithoutFormat) enabled_features.shaderStorageImageReadWithoutFormat = VK_TRUE; + if (pdf2.features.multiDrawIndirect) + enabled_features.multiDrawIndirect = VK_TRUE; if (pdf2.features.shaderSampledImageArrayDynamicIndexing) enabled_features.shaderSampledImageArrayDynamicIndexing = VK_TRUE; diff --git a/vulkan/managers/resource_manager.cpp b/vulkan/managers/resource_manager.cpp index 0569a6f3..a9c1826f 100644 --- a/vulkan/managers/resource_manager.cpp +++ b/vulkan/managers/resource_manager.cpp @@ -34,12 +34,12 @@ namespace Vulkan { ResourceManager::ResourceManager(Device *device_) : device(device_) - , index_buffer_allocator(*device_) - , attribute_buffer_allocator(*device_) - , indirect_buffer_allocator(*device_) + , index_buffer_allocator(*device_, 256) + , attribute_buffer_allocator(*device_, 256) + , indirect_buffer_allocator(*device_, 1) { // Simplified style. - index_buffer_allocator.set_element_size(0, sizeof(uint32_t) * 3); + index_buffer_allocator.set_element_size(0, 3); // 8-bit indices. attribute_buffer_allocator.set_soa_count(3); attribute_buffer_allocator.set_element_size(0, sizeof(float) * 3); attribute_buffer_allocator.set_element_size(1, sizeof(float) * 2 + sizeof(uint32_t) * 2); @@ -53,6 +53,9 @@ ResourceManager::~ResourceManager() // Also works as a teardown mechanism to make sure there are no async threads in flight. if (manager) manager->set_asset_instantiator_interface(nullptr); + + // Ensure resource releases go through. + latch_handles(); } void ResourceManager::set_id_bounds(uint32_t bound) @@ -308,7 +311,7 @@ bool ResourceManager::allocate_asset_mesh(Granite::AssetID id, const Meshlet::Me if (!view.format_header) return false; - Internal::AllocatedSlice index_slice, attribute_slice; + Internal::AllocatedSlice index_slice, attribute_slice, indirect_slice; { std::lock_guard holder{mesh_allocator_lock}; if (!index_buffer_allocator.allocate(view.total_primitives, &index_slice)) @@ -319,11 +322,20 @@ bool ResourceManager::allocate_asset_mesh(Granite::AssetID id, const Meshlet::Me index_buffer_allocator.free(index_slice); return false; } + + if (!indirect_buffer_allocator.allocate(view.format_header->meshlet_count, &indirect_slice)) + { + attribute_buffer_allocator.free(attribute_slice); + index_buffer_allocator.free(index_slice); + return false; + } } auto &asset = assets[id.id]; asset.mesh.index = index_slice; asset.mesh.attr = attribute_slice; + asset.mesh.indirect = indirect_slice; + asset.mesh.draw = { indirect_slice.offset, view.format_header->meshlet_count }; return true; } @@ -361,20 +373,17 @@ void ResourceManager::instantiate_asset_mesh(Granite::AssetManager &manager_, Meshlet::DecodeInfo info = {}; info.target_style = Meshlet::MeshStyle::Textured; - info.ibo = { - index_buffer_allocator.get_buffer(0, 0), - asset.mesh.index.offset * index_buffer_allocator.get_element_size(0), - }; + info.ibo = index_buffer_allocator.get_buffer(0, 0); for (unsigned i = 0; i < 3; i++) - { - info.streams[i] = { - attribute_buffer_allocator.get_buffer(0, i), - asset.mesh.index.offset * attribute_buffer_allocator.get_element_size(i), - }; - } + info.streams[i] = attribute_buffer_allocator.get_buffer(0, i); + + info.payload = payload.get(); + info.indirect = indirect_buffer_allocator.get_buffer(0, 0); - info.payload = { payload.get(), 0 }; + info.push.meshlet_offset = asset.mesh.indirect.offset; + info.push.primitive_offset = asset.mesh.index.offset; + info.push.vertex_offset = asset.mesh.attr.offset; Meshlet::decode_mesh(*cmd, info, view); @@ -480,12 +489,7 @@ void ResourceManager::latch_handles() asset.mesh = {}; } - auto &d = draws[update.id]; - d.firstIndex = asset.mesh.index.offset * 3; - d.indexCount = asset.mesh.index.count * 3; - d.firstInstance = 0; - d.instanceCount = 1; - d.vertexOffset = int32_t(asset.mesh.attr.offset); + draws[update.id] = asset.mesh.draw; } else { @@ -534,7 +538,7 @@ const Buffer *ResourceManager::get_indirect_buffer() const return indirect_buffer_allocator.get_buffer(0, 0); } -MeshBufferAllocator::MeshBufferAllocator(Device &device) +MeshBufferAllocator::MeshBufferAllocator(Device &device, uint32_t sub_block_size) : global_allocator(device) { for (int i = 0; i < SliceAllocatorCount - 1; i++) @@ -543,7 +547,7 @@ MeshBufferAllocator::MeshBufferAllocator(Device &device) // Basic unit of a meshlet is 256 prims / attributes. // Maximum element count = 32M prims. - allocators[0].set_sub_block_size(256); + allocators[0].set_sub_block_size(sub_block_size); for (int i = 1; i < SliceAllocatorCount; i++) allocators[i].set_sub_block_size(allocators[i - 1].get_sub_block_size() * (Util::LegionAllocator::NumSubBlocks / 2)); diff --git a/vulkan/managers/resource_manager.hpp b/vulkan/managers/resource_manager.hpp index 92593d43..90a2dc9f 100644 --- a/vulkan/managers/resource_manager.hpp +++ b/vulkan/managers/resource_manager.hpp @@ -79,7 +79,7 @@ struct SliceAllocator : Util::ArenaAllocator class MeshBufferAllocator { public: - explicit MeshBufferAllocator(Device &device); + MeshBufferAllocator(Device &device, uint32_t sub_block_size); bool allocate(uint32_t count, Internal::AllocatedSlice *slice); void free(const Internal::AllocatedSlice &slice); void set_soa_count(unsigned soa_count); @@ -118,7 +118,13 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface const Vulkan::ImageView *get_image_view_blocking(Granite::AssetID id); - inline VkDrawIndexedIndirectCommand get_mesh_indexed_draw(Granite::AssetID id) const + struct MultiIndirectDraw + { + uint32_t offset = 0; + uint32_t count = 0; + }; + + inline MultiIndirectDraw get_mesh_indexed_indirect_draw(Granite::AssetID id) const { if (id.id < draws.size()) return draws[id.id]; @@ -155,6 +161,7 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface struct { Internal::AllocatedSlice index, attr, indirect; + MultiIndirectDraw draw; } mesh; Granite::AssetClass asset_class = Granite::AssetClass::ImageZeroable; bool latchable = false; @@ -165,7 +172,7 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface std::vector assets; std::vector views; - std::vector draws; + std::vector draws; std::vector updates; ImageHandle fallback_color; diff --git a/vulkan/mesh/meshlet.cpp b/vulkan/mesh/meshlet.cpp index 7cb9edbd..381635c8 100644 --- a/vulkan/mesh/meshlet.cpp +++ b/vulkan/mesh/meshlet.cpp @@ -94,18 +94,20 @@ bool decode_mesh(CommandBuffer &cmd, const DecodeInfo &info, const MeshView &vie return false; } - if (!info.streams[0].buffer) + if (!info.streams[0]) { LOGE("Decode stream 0 must be set.\n"); return false; } - if (!info.ibo.buffer) + if (!info.ibo) { LOGE("Output IBO must be set.\n"); return false; } + cmd.push_constants(&info.push, 0, sizeof(info.push)); + BufferCreateInfo buf_info = {}; buf_info.domain = BufferDomain::LinkedDeviceHost; buf_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; @@ -127,10 +129,8 @@ bool decode_mesh(CommandBuffer &cmd, const DecodeInfo &info, const MeshView &vie cmd.set_storage_buffer(0, 0, *meshlet_meta_buffer); cmd.set_storage_buffer(0, 1, *meshlet_stream_buffer); - cmd.set_storage_buffer(0, 2, *info.payload.buffer, - info.payload.offset, - view.format_header->payload_size_words * sizeof(uint32_t)); - cmd.set_storage_buffer(0, 3, *info.ibo.buffer, info.ibo.offset, view.total_primitives * sizeof(uint32_t) * 3); + cmd.set_storage_buffer(0, 2, *info.payload); + cmd.set_storage_buffer(0, 3, *info.ibo); cmd.set_specialization_constant_mask(0x7); cmd.set_specialization_constant(0, view.format_header->u32_stream_count); @@ -168,10 +168,7 @@ bool decode_mesh(CommandBuffer &cmd, const DecodeInfo &info, const MeshView &vie } for (unsigned i = 0; i < 3; i++) - { - cmd.set_storage_buffer(0, 4 + i, *info.streams[0].buffer, info.streams[0].offset, - view.total_vertices * output_u32_streams * sizeof(uint32_t)); - } + cmd.set_storage_buffer(0, 4 + i, *info.streams[0]); decode_offsets.reserve(view.format_header->meshlet_count * (output_u32_streams + 1)); uint32_t index_count = 0; @@ -185,27 +182,26 @@ bool decode_mesh(CommandBuffer &cmd, const DecodeInfo &info, const MeshView &vie } cmd.set_specialization_constant(1, output_u32_streams + 1); + + // Dummy bind for indirect_buffer. + cmd.set_storage_buffer(0, 8, *info.streams[0]); } else { for (unsigned i = 0; i < 3; i++) - cmd.set_storage_buffer(0, 4 + i, *info.streams[0].buffer); + cmd.set_storage_buffer(0, 4 + i, *info.streams[0]); switch (info.target_style) { case MeshStyle::Skinned: - cmd.set_storage_buffer(0, 6, *info.streams[2].buffer, info.streams[2].offset, - view.total_vertices * sizeof(uint32_t) * 2); + cmd.set_storage_buffer(0, 6, *info.streams[2]); // Fallthrough case MeshStyle::Untextured: case MeshStyle::Textured: - cmd.set_storage_buffer(0, 5, *info.streams[1].buffer, info.streams[1].offset, - view.total_vertices * sizeof(uint32_t) * - (info.target_style == MeshStyle::Textured ? 4 : 1)); + cmd.set_storage_buffer(0, 5, *info.streams[1]); // Fallthrough case MeshStyle::Wireframe: - cmd.set_storage_buffer(0, 4, *info.streams[0].buffer, info.streams[0].offset, - view.total_vertices * sizeof(float) * 3); + cmd.set_storage_buffer(0, 4, *info.streams[0]); break; default: @@ -220,6 +216,8 @@ bool decode_mesh(CommandBuffer &cmd, const DecodeInfo &info, const MeshView &vie index_count += view.headers[i].num_primitives_minus_1 + 1; } cmd.set_specialization_constant(1, uint32_t(info.target_style)); + + cmd.set_storage_buffer(0, 8, *info.indirect); } buf_info.domain = BufferDomain::LinkedDeviceHost; diff --git a/vulkan/mesh/meshlet.hpp b/vulkan/mesh/meshlet.hpp index 6b985b97..c695eb0b 100644 --- a/vulkan/mesh/meshlet.hpp +++ b/vulkan/mesh/meshlet.hpp @@ -109,12 +109,6 @@ static const char magic[8] = { 'M', 'E', 'S', 'H', 'L', 'E', 'T', '1' }; MeshView create_mesh_view(const Granite::FileMapping &mapping); -struct DecodeBuffer -{ - const Vulkan::Buffer *buffer; - uint64_t offset; -}; - enum DecodeModeFlagBits : uint32_t { DECODE_MODE_RAW_PAYLOAD = 1 << 0, @@ -123,9 +117,16 @@ using DecodeModeFlags = uint32_t; struct DecodeInfo { - DecodeBuffer ibo, streams[3], indirect, payload; + const Vulkan::Buffer *ibo, *streams[3], *indirect, *payload; DecodeModeFlags flags; MeshStyle target_style; + + struct + { + uint32_t primitive_offset; + uint32_t vertex_offset; + uint32_t meshlet_offset; + } push; }; bool decode_mesh(Vulkan::CommandBuffer &cmd, const DecodeInfo &decode_info, const MeshView &view); From 44e220e32521ba5570aa510b8f89cdcc75731007 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sun, 3 Dec 2023 12:22:50 +0100 Subject: [PATCH 68/71] Explicitly disable Wayland when RenderDoc is loaded. --- application/platforms/CMakeLists.txt | 3 +++ application/platforms/application_sdl3.cpp | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/application/platforms/CMakeLists.txt b/application/platforms/CMakeLists.txt index 28619c62..0ca56f3c 100644 --- a/application/platforms/CMakeLists.txt +++ b/application/platforms/CMakeLists.txt @@ -31,6 +31,9 @@ elseif (${GRANITE_PLATFORM} MATCHES "SDL") #find_package(SDL3 REQUIRED CONFIG REQUIRED COMPONENTS SDL3-shared) #target_link_libraries(granite-platform PRIVATE SDL3::SDL3-shared) target_link_libraries(granite-platform PRIVATE SDL3-static granite-input-sdl) + if (NOT WIN32) + target_link_libraries(granite-platform PRIVATE dl) + endif() else() message(FATAL "GRANITE_PLATFORM is not set.") endif() diff --git a/application/platforms/application_sdl3.cpp b/application/platforms/application_sdl3.cpp index de926e9f..4e0e6a3a 100644 --- a/application/platforms/application_sdl3.cpp +++ b/application/platforms/application_sdl3.cpp @@ -40,6 +40,10 @@ #include #endif +#ifdef __linux__ +#include +#endif + namespace Granite { static Key sdl_key_to_granite_key(SDL_Keycode key) @@ -104,6 +108,17 @@ struct WSIPlatformSDL : GraniteWSIPlatform if (options.override_height) height = options.override_height; +#ifdef __linux__ + // RenderDoc doesn't support Wayland, and SDL3 uses Wayland by default. + // Opt in to X11 to avoid having to manually remember to pass down SDL_VIDEO_DRIVER=x11. + void *renderdoc_module = dlopen("librenderdoc.so", RTLD_NOW | RTLD_NOLOAD); + if (renderdoc_module) + { + LOGI("RenderDoc is loaded, disabling Wayland.\n"); + setenv("SDL_VIDEO_DRIVER", "x11", 0); + } +#endif + if (SDL_Init(SDL_INIT_EVENTS | SDL_INIT_GAMEPAD | SDL_INIT_VIDEO) < 0) { LOGE("Failed to init SDL.\n"); From 096ce89e1c11400ad78f88d9ce63395b9079cc79 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sun, 3 Dec 2023 13:17:52 +0100 Subject: [PATCH 69/71] Upload raw mesh payloads. Refactor the shader to support this. --- assets/shaders/decode/meshlet_decode.comp | 18 +- assets/shaders/inc/meshlet_payload_decode.h | 52 +++-- tests/assets/shaders/meshlet_debug.mesh | 26 +-- tests/meshlet_viewer.cpp | 83 ++++---- vulkan/managers/resource_manager.cpp | 213 +++++++++++++++----- vulkan/managers/resource_manager.hpp | 23 ++- vulkan/mesh/meshlet.hpp | 8 + 7 files changed, 292 insertions(+), 131 deletions(-) diff --git a/assets/shaders/decode/meshlet_decode.comp b/assets/shaders/decode/meshlet_decode.comp index 288f448f..ddd2665d 100644 --- a/assets/shaders/decode/meshlet_decode.comp +++ b/assets/shaders/decode/meshlet_decode.comp @@ -111,8 +111,8 @@ uint pack_a2bgr10(vec4 v) void main() { uint meshlet_index = gl_WorkGroupID.x; - meshlet_init_workgroup(meshlet_index); - MeshletMeta meta = meshlet_metas.data[meshlet_index]; + meshlet_init_workgroup(meshlet_index * NUM_U32_STREAMS); + MeshletMetaRaw meta = meshlet_metas_raw.data[meshlet_index]; if (!RAW_PAYLOAD) { @@ -143,7 +143,7 @@ void main() } { - MESHLET_DECODE_STREAM_32(meshlet_index, 0, INDEX); + MESHLET_DECODE_STREAM_32(meshlet_index * NUM_U32_STREAMS, 0, INDEX); } if (RAW_PAYLOAD) @@ -156,7 +156,7 @@ void main() for (uint i = 1; i < NUM_OUTPUT_U32_STREAMS; i++) { - MESHLET_DECODE_STREAM_32(meshlet_index, i, ATTR); + MESHLET_DECODE_STREAM_32(meshlet_index * NUM_U32_STREAMS, i, ATTR); } } else @@ -195,23 +195,23 @@ void main() } \ } { - MESHLET_DECODE_STREAM_64(meshlet_index, 1, POS); + MESHLET_DECODE_STREAM_64(meshlet_index * NUM_U32_STREAMS, 1, POS); } if (MESH_STYLE >= MESH_STYLE_UNTEXTURED) { - MESHLET_DECODE_STREAM_32(meshlet_index, 3, NORMAL); + MESHLET_DECODE_STREAM_32(meshlet_index * NUM_U32_STREAMS, 3, NORMAL); } if (MESH_STYLE >= MESH_STYLE_TEXTURED) { - MESHLET_DECODE_STREAM_32(meshlet_index, 4, TANGENT); - MESHLET_DECODE_STREAM_64(meshlet_index, 5, UV); + MESHLET_DECODE_STREAM_32(meshlet_index * NUM_U32_STREAMS, 4, TANGENT); + MESHLET_DECODE_STREAM_64(meshlet_index * NUM_U32_STREAMS, 5, UV); } if (MESH_STYLE >= MESH_STYLE_SKINNED) { - MESHLET_DECODE_STREAM_64(meshlet_index, 7, SKIN); + MESHLET_DECODE_STREAM_64(meshlet_index * NUM_U32_STREAMS, 7, SKIN); } } } diff --git a/assets/shaders/inc/meshlet_payload_decode.h b/assets/shaders/inc/meshlet_payload_decode.h index 5c8d45e0..0673e3a3 100644 --- a/assets/shaders/inc/meshlet_payload_decode.h +++ b/assets/shaders/inc/meshlet_payload_decode.h @@ -45,7 +45,7 @@ struct MeshletStream uint16_t bitplane_meta[MESHLET_PAYLOAD_NUM_CHUNKS]; }; -struct MeshletMeta +struct MeshletMetaRaw { uint base_vertex_offset; uint8_t num_primitives_minus_1; @@ -53,10 +53,22 @@ struct MeshletMeta uint16_t reserved; }; -layout(set = MESHLET_PAYLOAD_DESCRIPTOR_SET, binding = MESHLET_PAYLOAD_META_BINDING, std430) readonly buffer MeshletMetas +struct MeshletMetaRuntime { - MeshletMeta data[]; -} meshlet_metas; + uint stream_offset; + uint16_t num_primitives; + uint16_t num_attributes; +}; + +layout(set = MESHLET_PAYLOAD_DESCRIPTOR_SET, binding = MESHLET_PAYLOAD_META_BINDING, std430) readonly buffer MeshletMetasRaw +{ + MeshletMetaRaw data[]; +} meshlet_metas_raw; + +layout(set = MESHLET_PAYLOAD_DESCRIPTOR_SET, binding = MESHLET_PAYLOAD_META_BINDING, std430) readonly buffer MeshletMetasRuntime +{ + MeshletMetaRuntime data[]; +} meshlet_metas_runtime; layout(set = MESHLET_PAYLOAD_DESCRIPTOR_SET, binding = MESHLET_PAYLOAD_STREAM_BINDING, std430) readonly buffer MeshletStreams { @@ -88,22 +100,22 @@ uint repack_uint(uvec2 v) return pack32(u8vec4(v16)); } -void meshlet_compute_stream_offsets(uint meshlet_index, uint stream_index, +void meshlet_compute_stream_offsets(uint stream_index, out uint out_stream_chunk_offset, out u8vec4 out_bit_counts) { if (gl_SubgroupInvocationID < MESHLET_PAYLOAD_NUM_CHUNKS) { - uint bitplane_value = uint(meshlet_streams.data[stream_index + MESHLET_PAYLOAD_NUM_U32_STREAMS * meshlet_index].bitplane_meta[gl_SubgroupInvocationID]); + uint bitplane_value = uint(meshlet_streams.data[stream_index].bitplane_meta[gl_SubgroupInvocationID]); u16vec4 bit_counts = (u16vec4(bitplane_value) >> u16vec4(0, 4, 8, 12)) & 0xfus; u16vec2 bit_counts2 = bit_counts.xy + bit_counts.zw; uint total_bits = bit_counts2.x + bit_counts2.y; - uint offset = meshlet_streams.data[stream_index + MESHLET_PAYLOAD_NUM_U32_STREAMS * meshlet_index].offset_from_base; + uint offset = meshlet_streams.data[stream_index].offset_from_base; out_stream_chunk_offset = subgroupExclusiveAdd(total_bits) + offset; out_bit_counts = u8vec4(bit_counts); } } -void meshlet_init_workgroup(uint meshlet_index) +void meshlet_init_workgroup(uint base_stream_index) { #if MESHLET_PAYLOAD_LARGE_WORKGROUP @@ -112,7 +124,7 @@ void meshlet_init_workgroup(uint meshlet_index) if (gl_SubgroupInvocationID < MESHLET_PAYLOAD_NUM_CHUNKS) { // Start by decoding the offset for bitplanes for all u32 streams. - meshlet_compute_stream_offsets(meshlet_index, stream_index, + meshlet_compute_stream_offsets(base_stream_index + stream_index, shared_chunk_offset[stream_index][gl_SubgroupInvocationID], shared_chunk_bit_counts[stream_index][gl_SubgroupInvocationID]); } @@ -175,9 +187,9 @@ uint meshlet_get_linear_index() packed_decoded##iter = subgroupInclusiveAdd(packed_decoded##iter) #if MESHLET_PAYLOAD_LARGE_WORKGROUP -uint meshlet_decode_stream_32_wg256(uint meshlet_index, uint stream_index) +uint meshlet_decode_stream_32_wg256(uint base_stream_index, uint stream_index) { - uint unrolled_stream_index = MESHLET_PAYLOAD_NUM_U32_STREAMS * meshlet_index + stream_index; + uint unrolled_stream_index = base_stream_index + stream_index; uint linear_index = meshlet_get_linear_index(); uint chunk_id = gl_LocalInvocationID.y; @@ -197,10 +209,10 @@ uint meshlet_decode_stream_32_wg256(uint meshlet_index, uint stream_index) return repack_uint(packed_decoded0); } -uvec2 meshlet_decode_stream_64_wg256(uint meshlet_index, uint stream_index) +uvec2 meshlet_decode_stream_64_wg256(uint base_stream_index, uint stream_index) { // Dual-pump the computation. VGPR use is quite low either way, so this is fine. - uint unrolled_stream_index = MESHLET_PAYLOAD_NUM_U32_STREAMS * meshlet_index + stream_index; + uint unrolled_stream_index = base_stream_index + stream_index; uint linear_index = meshlet_get_linear_index(); uint chunk_id = gl_LocalInvocationID.y; @@ -243,13 +255,13 @@ uvec2 meshlet_decode_stream_64_wg256(uint meshlet_index, uint stream_index) #else // Have to iterate and report once per chunk. Avoids having to spend a lot of LDS memory. -#define MESHLET_DECODE_STREAM_32(meshlet_index, stream_index, report_cb) { \ - uint unrolled_stream_index = MESHLET_PAYLOAD_NUM_U32_STREAMS * meshlet_index + stream_index; \ +#define MESHLET_DECODE_STREAM_32(base_stream_index, stream_index, report_cb) { \ + uint unrolled_stream_index = base_stream_index + stream_index; \ uint linear_index = meshlet_get_linear_index(); \ uvec2 prev_value0 = uvec2(0); \ uint shared_chunk_offset0; \ u8vec4 shared_chunk_bit_counts0; \ - meshlet_compute_stream_offsets(meshlet_index, stream_index, shared_chunk_offset0, shared_chunk_bit_counts0); \ + meshlet_compute_stream_offsets(unrolled_stream_index, shared_chunk_offset0, shared_chunk_bit_counts0); \ MESHLET_PAYLOAD_DECL_STREAM(unrolled_stream_index, 0); \ for (uint chunk_id = 0; chunk_id < MESHLET_PAYLOAD_NUM_CHUNKS; chunk_id++) \ { \ @@ -262,17 +274,17 @@ uvec2 meshlet_decode_stream_64_wg256(uint meshlet_index, uint stream_index) } // Have to iterate and report once per chunk. Avoids having to spend a lot of LDS memory. -#define MESHLET_DECODE_STREAM_64(meshlet_index, stream_index, report_cb) { \ - uint unrolled_stream_index = MESHLET_PAYLOAD_NUM_U32_STREAMS * meshlet_index + stream_index; \ +#define MESHLET_DECODE_STREAM_64(base_stream_index, stream_index, report_cb) { \ + uint unrolled_stream_index = base_stream_index + stream_index; \ uint linear_index = meshlet_get_linear_index(); \ uvec2 prev_value0 = uvec2(0); \ uvec2 prev_value1 = uvec2(0); \ uint shared_chunk_offset0; \ u8vec4 shared_chunk_bit_counts0; \ - meshlet_compute_stream_offsets(meshlet_index, stream_index, shared_chunk_offset0, shared_chunk_bit_counts0); \ + meshlet_compute_stream_offsets(unrolled_stream_index, shared_chunk_offset0, shared_chunk_bit_counts0); \ uint shared_chunk_offset1; \ u8vec4 shared_chunk_bit_counts1; \ - meshlet_compute_stream_offsets(meshlet_index, stream_index + 1, shared_chunk_offset1, shared_chunk_bit_counts1); \ + meshlet_compute_stream_offsets(unrolled_stream_index + 1, shared_chunk_offset1, shared_chunk_bit_counts1); \ MESHLET_PAYLOAD_DECL_STREAM(unrolled_stream_index, 0); \ MESHLET_PAYLOAD_DECL_STREAM(unrolled_stream_index + 1, 1); \ for (uint chunk_id = 0; chunk_id < MESHLET_PAYLOAD_NUM_CHUNKS; chunk_id++) \ diff --git a/tests/assets/shaders/meshlet_debug.mesh b/tests/assets/shaders/meshlet_debug.mesh index a61979d8..5098e49a 100644 --- a/tests/assets/shaders/meshlet_debug.mesh +++ b/tests/assets/shaders/meshlet_debug.mesh @@ -35,45 +35,45 @@ layout(set = 1, binding = 0) uniform UBO void main() { uint meshlet_index = gl_WorkGroupID.x; - meshlet_init_workgroup(meshlet_index); - MeshletMeta meta = meshlet_metas.data[meshlet_index]; + MeshletMetaRuntime meta = meshlet_metas_runtime.data[meshlet_index]; + meshlet_init_workgroup(meta.stream_offset); - SetMeshOutputsEXT(meta.num_attributes_minus_1 + 1, meta.num_primitives_minus_1 + 1); + SetMeshOutputsEXT(meta.num_attributes, meta.num_primitives); #define INDEX(index, value) \ - if (index <= meta.num_primitives_minus_1) \ + if (index < meta.num_primitives) \ { \ gl_PrimitiveTriangleIndicesEXT[index] = uvec4(unpack8(value)).xyz; \ vMeshletIndex[index] = meshlet_index; \ } - MESHLET_DECODE_STREAM_32(meshlet_index, 0, INDEX); + MESHLET_DECODE_STREAM_32(meta.stream_offset, 0, INDEX); #define POSITION(index, value) \ - if (index <= meta.num_attributes_minus_1) \ + if (index < meta.num_attributes) \ { \ vec3 pos = attribute_decode_snorm_exp_position(value); \ gl_MeshVerticesEXT[index].gl_Position = VP * vec4(pos, 1.0); \ } - MESHLET_DECODE_STREAM_64(meshlet_index, 1, POSITION); + MESHLET_DECODE_STREAM_64(meta.stream_offset, 1, POSITION); #define NORMAL(index, value) \ - if (index <= meta.num_attributes_minus_1) \ + if (index < meta.num_attributes) \ { \ vNormal[index] = attribute_decode_oct8_normal_tangent(value).xyz; \ } - MESHLET_DECODE_STREAM_32(meshlet_index, 3, NORMAL); + MESHLET_DECODE_STREAM_32(meta.stream_offset, 3, NORMAL); #define TANGENT(index, value) \ - if (index <= meta.num_attributes_minus_1) \ + if (index < meta.num_attributes) \ { \ vTangent[index] = attribute_decode_oct8_normal_tangent(value); \ } - MESHLET_DECODE_STREAM_32(meshlet_index, 4, TANGENT); + MESHLET_DECODE_STREAM_32(meta.stream_offset, 4, TANGENT); #define UV(index, value) \ - if (index <= meta.num_attributes_minus_1) \ + if (index < meta.num_attributes) \ { \ vUV[index] = attribute_decode_snorm_exp_uv(value); \ } - MESHLET_DECODE_STREAM_64(meshlet_index, 5, UV); + MESHLET_DECODE_STREAM_64(meta.stream_offset, 5, UV); } \ No newline at end of file diff --git a/tests/meshlet_viewer.cpp b/tests/meshlet_viewer.cpp index ee32c0e3..2e19ef3f 100644 --- a/tests/meshlet_viewer.cpp +++ b/tests/meshlet_viewer.cpp @@ -38,6 +38,23 @@ using namespace Granite; using namespace Vulkan; using namespace Vulkan::Meshlet; +static uint32_t style_to_u32_streams(MeshStyle style) +{ + switch (style) + { + case MeshStyle::Wireframe: + return 3; + case MeshStyle::Untextured: + return 4; + case MeshStyle::Textured: + return 7; + case MeshStyle::Skinned: + return 9; + default: + return 0; + } +} + struct MeshletViewerApplication : Granite::Application, Granite::EventHandler { MeshletViewerApplication(const char *path) @@ -73,33 +90,37 @@ struct MeshletViewerApplication : Granite::Application, Granite::EventHandler auto vp = camera.get_projection() * camera.get_view(); *cmd->allocate_typed_constant_data(1, 0, 1) = vp; + auto draw = device.get_resource_manager().get_mesh_draw_range(mesh_id); -#if 0 - bool large_workgroup = - device.get_device_features().mesh_shader_properties.maxPreferredMeshWorkGroupInvocations > 32 && - device.get_device_features().mesh_shader_properties.maxMeshWorkGroupInvocations >= 256; - - cmd->set_program("", "assets://shaders/meshlet_debug.mesh", - "assets://shaders/meshlet_debug.mesh.frag", - {{"MESHLET_PAYLOAD_LARGE_WORKGROUP", int(large_workgroup)}}); - - cmd->set_storage_buffer(0, 0, *meshlet_meta_buffer); - cmd->set_storage_buffer(0, 1, *meshlet_stream_buffer); - cmd->set_storage_buffer(0, 2, *payload); - - cmd->enable_subgroup_size_control(true, VK_SHADER_STAGE_MESH_BIT_EXT); - cmd->set_subgroup_size_log2(true, 5, 5, VK_SHADER_STAGE_MESH_BIT_EXT); - cmd->set_specialization_constant_mask(1); - cmd->set_specialization_constant(0, header.u32_stream_count); - cmd->draw_mesh_tasks(header.meshlet_count, 1, 1); -#else - auto *ibo = device.get_resource_manager().get_index_buffer(); - auto *pos = device.get_resource_manager().get_position_buffer(); - auto *attr = device.get_resource_manager().get_attribute_buffer(); - auto *indirect = device.get_resource_manager().get_indirect_buffer(); - - if (ibo && pos && attr) + if (draw.count && device.get_resource_manager().get_mesh_encoding() == Vulkan::ResourceManager::MeshEncoding::Meshlet) { + bool large_workgroup = + device.get_device_features().mesh_shader_properties.maxPreferredMeshWorkGroupInvocations > 32 && + device.get_device_features().mesh_shader_properties.maxMeshWorkGroupInvocations >= 256; + + cmd->set_program("", "assets://shaders/meshlet_debug.mesh", + "assets://shaders/meshlet_debug.mesh.frag", + {{"MESHLET_PAYLOAD_LARGE_WORKGROUP", int(large_workgroup)}}); + + cmd->set_storage_buffer(0, 0, *device.get_resource_manager().get_meshlet_header_buffer()); + cmd->set_storage_buffer(0, 1, *device.get_resource_manager().get_meshlet_stream_header_buffer()); + cmd->set_storage_buffer(0, 2, *device.get_resource_manager().get_meshlet_payload_buffer()); + + cmd->enable_subgroup_size_control(true, VK_SHADER_STAGE_MESH_BIT_EXT); + cmd->set_subgroup_size_log2(true, 5, 5, VK_SHADER_STAGE_MESH_BIT_EXT); + cmd->set_specialization_constant_mask(1); + cmd->set_specialization_constant(0, style_to_u32_streams(draw.style)); + + cmd->push_constants(&draw.offset, 0, sizeof(draw.offset)); + cmd->draw_mesh_tasks(draw.count, 1, 1); + } + else if (draw.count) + { + auto *ibo = device.get_resource_manager().get_index_buffer(); + auto *pos = device.get_resource_manager().get_position_buffer(); + auto *attr = device.get_resource_manager().get_attribute_buffer(); + auto *indirect = device.get_resource_manager().get_indirect_buffer(); + cmd->set_program("assets://shaders/meshlet_debug.vert", "assets://shaders/meshlet_debug.frag"); cmd->set_index_buffer(*ibo, 0, VK_INDEX_TYPE_UINT8_EXT); cmd->set_vertex_binding(0, *pos, 0, 12); @@ -108,16 +129,10 @@ struct MeshletViewerApplication : Granite::Application, Granite::EventHandler cmd->set_vertex_attrib(1, 1, VK_FORMAT_A2B10G10R10_SNORM_PACK32, 0); cmd->set_vertex_attrib(2, 1, VK_FORMAT_A2B10G10R10_SNORM_PACK32, 4); cmd->set_vertex_attrib(3, 1, VK_FORMAT_R32G32_SFLOAT, 8); - - auto draw = device.get_resource_manager().get_mesh_indexed_indirect_draw(mesh_id); - if (draw.count) - { - cmd->draw_indexed_indirect(*indirect, - draw.offset * sizeof(VkDrawIndexedIndirectCommand), - draw.count, sizeof(VkDrawIndexedIndirectCommand)); - } + cmd->draw_indexed_indirect(*indirect, + draw.offset * sizeof(VkDrawIndexedIndirectCommand), + draw.count, sizeof(VkDrawIndexedIndirectCommand)); } -#endif cmd->end_render_pass(); device.submit(cmd); diff --git a/vulkan/managers/resource_manager.cpp b/vulkan/managers/resource_manager.cpp index a9c1826f..7dcd8f83 100644 --- a/vulkan/managers/resource_manager.cpp +++ b/vulkan/managers/resource_manager.cpp @@ -37,6 +37,9 @@ ResourceManager::ResourceManager(Device *device_) , index_buffer_allocator(*device_, 256) , attribute_buffer_allocator(*device_, 256) , indirect_buffer_allocator(*device_, 1) + , mesh_header_allocator(*device_, 1) + , mesh_stream_allocator(*device_, 8) + , mesh_payload_allocator(*device_, 128) { // Simplified style. index_buffer_allocator.set_element_size(0, 3); // 8-bit indices. @@ -45,7 +48,14 @@ ResourceManager::ResourceManager(Device *device_) attribute_buffer_allocator.set_element_size(1, sizeof(float) * 2 + sizeof(uint32_t) * 2); attribute_buffer_allocator.set_element_size(2, sizeof(uint32_t) * 2); indirect_buffer_allocator.set_element_size(0, sizeof(VkDrawIndexedIndirectCommand)); + + mesh_header_allocator.set_element_size(0, sizeof(Meshlet::RuntimeHeader)); + mesh_stream_allocator.set_element_size(0, sizeof(Meshlet::Stream)); + mesh_payload_allocator.set_element_size(0, sizeof(uint32_t)); + assets.reserve(Granite::AssetID::MaxIDs); + + mesh_encoding = MeshEncoding::Meshlet; } ResourceManager::~ResourceManager() @@ -98,7 +108,10 @@ uint64_t ResourceManager::estimate_cost_asset(Granite::AssetID id, Granite::File if (assets[id.id].asset_class == Granite::AssetClass::Mesh) { // Compression factor of 2x is reasonable to assume. - return file.get_size() * 2; + if (mesh_encoding == MeshEncoding::VBOAndIBOMDI) + return file.get_size() * 2; + else + return file.get_size(); } else { @@ -311,31 +324,54 @@ bool ResourceManager::allocate_asset_mesh(Granite::AssetID id, const Meshlet::Me if (!view.format_header) return false; - Internal::AllocatedSlice index_slice, attribute_slice, indirect_slice; + std::lock_guard holder{mesh_allocator_lock}; + auto &asset = assets[id.id]; + + if (mesh_encoding == MeshEncoding::VBOAndIBOMDI) { - std::lock_guard holder{mesh_allocator_lock}; - if (!index_buffer_allocator.allocate(view.total_primitives, &index_slice)) + if (!index_buffer_allocator.allocate(view.total_primitives, &asset.mesh.index_or_payload)) return false; - if (!attribute_buffer_allocator.allocate(view.total_vertices, &attribute_slice)) + if (!attribute_buffer_allocator.allocate(view.total_vertices, &asset.mesh.attr_or_stream)) { - index_buffer_allocator.free(index_slice); + index_buffer_allocator.free(asset.mesh.index_or_payload); + asset.mesh.index_or_payload = {}; return false; } - if (!indirect_buffer_allocator.allocate(view.format_header->meshlet_count, &indirect_slice)) + if (!indirect_buffer_allocator.allocate(view.format_header->meshlet_count, &asset.mesh.indirect_or_header)) { - attribute_buffer_allocator.free(attribute_slice); - index_buffer_allocator.free(index_slice); + index_buffer_allocator.free(asset.mesh.index_or_payload); + attribute_buffer_allocator.free(asset.mesh.attr_or_stream); + asset.mesh.index_or_payload = {}; + asset.mesh.attr_or_stream = {}; return false; } } + else + { + if (!mesh_header_allocator.allocate(view.format_header->meshlet_count, &asset.mesh.indirect_or_header)) + return false; - auto &asset = assets[id.id]; - asset.mesh.index = index_slice; - asset.mesh.attr = attribute_slice; - asset.mesh.indirect = indirect_slice; - asset.mesh.draw = { indirect_slice.offset, view.format_header->meshlet_count }; + if (!mesh_stream_allocator.allocate(view.format_header->meshlet_count * view.format_header->u32_stream_count, + &asset.mesh.attr_or_stream)) + { + mesh_header_allocator.free(asset.mesh.indirect_or_header); + asset.mesh.indirect_or_header = {}; + return false; + } + + if (!mesh_payload_allocator.allocate(view.format_header->payload_size_words, &asset.mesh.index_or_payload)) + { + mesh_header_allocator.free(asset.mesh.indirect_or_header); + mesh_stream_allocator.free(asset.mesh.attr_or_stream); + asset.mesh.indirect_or_header = {}; + asset.mesh.attr_or_stream = {}; + return false; + } + } + + asset.mesh.draw = { asset.mesh.indirect_or_header.offset, view.format_header->meshlet_count }; return true; } @@ -363,48 +399,105 @@ void ResourceManager::instantiate_asset_mesh(Granite::AssetManager &manager_, if (ret) { - auto cmd = device->request_command_buffer(CommandBuffer::Type::AsyncCompute); - - BufferCreateInfo buf = {}; - buf.domain = BufferDomain::Host; - buf.size = view.format_header->payload_size_words * sizeof(uint32_t); - buf.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; - auto payload = device->create_buffer(buf, view.payload); + if (mesh_encoding == MeshEncoding::Meshlet) + { + auto cmd = device->request_command_buffer(CommandBuffer::Type::AsyncTransfer); - Meshlet::DecodeInfo info = {}; - info.target_style = Meshlet::MeshStyle::Textured; - info.ibo = index_buffer_allocator.get_buffer(0, 0); + void *payload_data = cmd->update_buffer(*mesh_payload_allocator.get_buffer(0, 0), + asset.mesh.index_or_payload.offset * sizeof(uint32_t), + view.format_header->payload_size_words * sizeof(uint32_t)); + memcpy(payload_data, view.payload, view.format_header->payload_size_words * sizeof(uint32_t)); - for (unsigned i = 0; i < 3; i++) - info.streams[i] = attribute_buffer_allocator.get_buffer(0, i); + auto *headers = static_cast( + cmd->update_buffer(*mesh_header_allocator.get_buffer(0, 0), + asset.mesh.indirect_or_header.offset * sizeof(Meshlet::RuntimeHeader), + view.format_header->meshlet_count * sizeof(Meshlet::RuntimeHeader))); - info.payload = payload.get(); - info.indirect = indirect_buffer_allocator.get_buffer(0, 0); + for (uint32_t i = 0, n = view.format_header->meshlet_count; i < n; i++) + { + headers[i].stream_offset = asset.mesh.attr_or_stream.offset + i * view.format_header->u32_stream_count; + headers[i].num_attributes = view.headers[i].num_attributes_minus_1 + 1; + headers[i].num_primitives = view.headers[i].num_primitives_minus_1 + 1; + } - info.push.meshlet_offset = asset.mesh.indirect.offset; - info.push.primitive_offset = asset.mesh.index.offset; - info.push.vertex_offset = asset.mesh.attr.offset; + auto *streams = static_cast( + cmd->update_buffer(*mesh_stream_allocator.get_buffer(0, 0), + asset.mesh.attr_or_stream.offset * sizeof(Meshlet::Stream), + view.format_header->meshlet_count * view.format_header->u32_stream_count * + sizeof(Meshlet::Stream))); - Meshlet::decode_mesh(*cmd, info, view); + for (uint32_t i = 0, n = view.format_header->meshlet_count * view.format_header->u32_stream_count; i < n; i++) + { + auto in_stream = view.streams[i]; + in_stream.offset_from_base_u32 += asset.mesh.index_or_payload.offset; + streams[i] = in_stream; + } - Semaphore sem[2]; - device->submit(cmd, nullptr, 2, sem); - device->add_wait_semaphore(CommandBuffer::Type::Generic, std::move(sem[0]), - VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT | - VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT, false); - device->add_wait_semaphore(CommandBuffer::Type::AsyncGraphics, std::move(sem[1]), - VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT | - VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT, false); + Semaphore sem[2]; + device->submit(cmd, nullptr, 2, sem); + device->add_wait_semaphore(CommandBuffer::Type::Generic, std::move(sem[0]), + VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT | + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, false); + device->add_wait_semaphore(CommandBuffer::Type::AsyncGraphics, std::move(sem[1]), + VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT | + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, false); + } + else + { + auto cmd = device->request_command_buffer(CommandBuffer::Type::AsyncCompute); + + BufferCreateInfo buf = {}; + buf.domain = BufferDomain::Host; + buf.size = view.format_header->payload_size_words * sizeof(uint32_t); + buf.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + auto payload = device->create_buffer(buf, view.payload); + + Meshlet::DecodeInfo info = {}; + info.target_style = Meshlet::MeshStyle::Textured; + info.ibo = index_buffer_allocator.get_buffer(0, 0); + + for (unsigned i = 0; i < 3; i++) + info.streams[i] = attribute_buffer_allocator.get_buffer(0, i); + + info.payload = payload.get(); + info.indirect = indirect_buffer_allocator.get_buffer(0, 0); + + info.push.meshlet_offset = asset.mesh.indirect_or_header.offset; + info.push.primitive_offset = asset.mesh.index_or_payload.offset; + info.push.vertex_offset = asset.mesh.attr_or_stream.offset; + + Meshlet::decode_mesh(*cmd, info, view); + + Semaphore sem[2]; + device->submit(cmd, nullptr, 2, sem); + device->add_wait_semaphore(CommandBuffer::Type::Generic, std::move(sem[0]), + VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT | + VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT, false); + device->add_wait_semaphore(CommandBuffer::Type::AsyncGraphics, std::move(sem[1]), + VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT | + VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT, false); + } } uint64_t cost = 0; if (ret) { - cost += view.total_primitives * index_buffer_allocator.get_element_size(0); - cost += view.total_vertices * attribute_buffer_allocator.get_element_size(0); - cost += view.total_vertices * attribute_buffer_allocator.get_element_size(1); - cost += view.total_vertices * attribute_buffer_allocator.get_element_size(2); - cost += view.format_header->meshlet_count * indirect_buffer_allocator.get_element_size(0); + if (mesh_encoding == MeshEncoding::Meshlet) + { + cost += view.format_header->payload_size_words * mesh_payload_allocator.get_element_size(0); + cost += view.format_header->meshlet_count * mesh_header_allocator.get_element_size(0); + cost += view.format_header->meshlet_count * view.format_header->u32_stream_count * mesh_stream_allocator.get_element_size(0); + } + else + { + cost += view.total_primitives * index_buffer_allocator.get_element_size(0); + cost += view.total_vertices * attribute_buffer_allocator.get_element_size(0); + cost += view.total_vertices * attribute_buffer_allocator.get_element_size(1); + cost += view.total_vertices * attribute_buffer_allocator.get_element_size(2); + cost += view.format_header->meshlet_count * indirect_buffer_allocator.get_element_size(0); + } + + asset.mesh.draw.style = view.format_header->style; } std::lock_guard holder{lock}; @@ -482,9 +575,18 @@ void ResourceManager::latch_handles() { { std::lock_guard holder_alloc{mesh_allocator_lock}; - index_buffer_allocator.free(asset.mesh.index); - attribute_buffer_allocator.free(asset.mesh.attr); - indirect_buffer_allocator.free(asset.mesh.indirect); + if (mesh_encoding == MeshEncoding::Meshlet) + { + mesh_payload_allocator.free(asset.mesh.index_or_payload); + mesh_stream_allocator.free(asset.mesh.attr_or_stream); + mesh_header_allocator.free(asset.mesh.indirect_or_header); + } + else + { + index_buffer_allocator.free(asset.mesh.index_or_payload); + attribute_buffer_allocator.free(asset.mesh.attr_or_stream); + indirect_buffer_allocator.free(asset.mesh.indirect_or_header); + } } asset.mesh = {}; } @@ -538,6 +640,21 @@ const Buffer *ResourceManager::get_indirect_buffer() const return indirect_buffer_allocator.get_buffer(0, 0); } +const Buffer *ResourceManager::get_meshlet_payload_buffer() const +{ + return mesh_payload_allocator.get_buffer(0, 0); +} + +const Buffer *ResourceManager::get_meshlet_header_buffer() const +{ + return mesh_header_allocator.get_buffer(0, 0); +} + +const Buffer *ResourceManager::get_meshlet_stream_header_buffer() const +{ + return mesh_stream_allocator.get_buffer(0, 0); +} + MeshBufferAllocator::MeshBufferAllocator(Device &device, uint32_t sub_block_size) : global_allocator(device) { diff --git a/vulkan/managers/resource_manager.hpp b/vulkan/managers/resource_manager.hpp index 90a2dc9f..a700cf3d 100644 --- a/vulkan/managers/resource_manager.hpp +++ b/vulkan/managers/resource_manager.hpp @@ -105,7 +105,7 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface enum class MeshEncoding { Meshlet, - EncodedVBOAndIBO, + VBOAndIBOMDI, }; inline const Vulkan::ImageView *get_image_view(Granite::AssetID id) const @@ -118,13 +118,14 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface const Vulkan::ImageView *get_image_view_blocking(Granite::AssetID id); - struct MultiIndirectDraw + struct DrawRange { uint32_t offset = 0; uint32_t count = 0; + Meshlet::MeshStyle style = Meshlet::MeshStyle::Wireframe; }; - inline MultiIndirectDraw get_mesh_indexed_indirect_draw(Granite::AssetID id) const + inline DrawRange get_mesh_draw_range(Granite::AssetID id) const { if (id.id < draws.size()) return draws[id.id]; @@ -134,7 +135,7 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface inline MeshEncoding get_mesh_encoding() const { - return MeshEncoding::EncodedVBOAndIBO; + return mesh_encoding; } const Buffer *get_index_buffer() const; @@ -143,6 +144,10 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface const Buffer *get_skinning_buffer() const; const Buffer *get_indirect_buffer() const; + const Buffer *get_meshlet_payload_buffer() const; + const Buffer *get_meshlet_header_buffer() const; + const Buffer *get_meshlet_stream_header_buffer() const; + private: Device *device; Granite::AssetManager *manager = nullptr; @@ -160,8 +165,8 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface ImageHandle image; struct { - Internal::AllocatedSlice index, attr, indirect; - MultiIndirectDraw draw; + Internal::AllocatedSlice index_or_payload, attr_or_stream, indirect_or_header; + DrawRange draw; } mesh; Granite::AssetClass asset_class = Granite::AssetClass::ImageZeroable; bool latchable = false; @@ -172,7 +177,7 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface std::vector assets; std::vector views; - std::vector draws; + std::vector draws; std::vector updates; ImageHandle fallback_color; @@ -193,6 +198,10 @@ class ResourceManager final : private Granite::AssetInstantiatorInterface MeshBufferAllocator index_buffer_allocator; MeshBufferAllocator attribute_buffer_allocator; MeshBufferAllocator indirect_buffer_allocator; + MeshBufferAllocator mesh_header_allocator; + MeshBufferAllocator mesh_stream_allocator; + MeshBufferAllocator mesh_payload_allocator; + MeshEncoding mesh_encoding = MeshEncoding::VBOAndIBOMDI; bool allocate_asset_mesh(Granite::AssetID id, const Meshlet::MeshView &view); }; diff --git a/vulkan/mesh/meshlet.hpp b/vulkan/mesh/meshlet.hpp index c695eb0b..8a9e7c13 100644 --- a/vulkan/mesh/meshlet.hpp +++ b/vulkan/mesh/meshlet.hpp @@ -60,6 +60,14 @@ struct Header uint16_t reserved; }; +// For GPU use +struct RuntimeHeader +{ + uint32_t stream_offset; + uint16_t num_primitives; + uint16_t num_attributes; +}; + struct Bound { float center[3]; From 50634bba896ff46e34345519ecf70be275887d0d Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Sun, 3 Dec 2023 13:23:14 +0100 Subject: [PATCH 70/71] Opt-in to mesh shader path later. --- assets/shaders/decode/meshlet_decode.comp | 2 ++ vulkan/managers/resource_manager.cpp | 10 ++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/assets/shaders/decode/meshlet_decode.comp b/assets/shaders/decode/meshlet_decode.comp index ddd2665d..8056519a 100644 --- a/assets/shaders/decode/meshlet_decode.comp +++ b/assets/shaders/decode/meshlet_decode.comp @@ -150,6 +150,7 @@ void main() { #define ATTR(linear_index, packed_decoded) { \ uvec2 output_offset_stride0 = output_offset_strides.data[meshlet_index * NUM_OUTPUT_U32_STREAMS + i]; \ + output_offset_stride0.x += registers.vertex_offset; \ if (linear_index <= uint(meta.num_attributes_minus_1)) \ output_stream_raw.data[output_offset_stride0.x + linear_index * output_offset_stride0.y] = packed_decoded; \ } @@ -162,6 +163,7 @@ void main() else { uint output_offset = output_offset_strides.data[meshlet_index].y; + output_offset += registers.vertex_offset; #define POS(linear_index, packed_decoded) { \ if (linear_index <= uint(meta.num_attributes_minus_1)) \ diff --git a/vulkan/managers/resource_manager.cpp b/vulkan/managers/resource_manager.cpp index 7dcd8f83..febf6eb0 100644 --- a/vulkan/managers/resource_manager.cpp +++ b/vulkan/managers/resource_manager.cpp @@ -54,8 +54,6 @@ ResourceManager::ResourceManager(Device *device_) mesh_payload_allocator.set_element_size(0, sizeof(uint32_t)); assets.reserve(Granite::AssetID::MaxIDs); - - mesh_encoding = MeshEncoding::Meshlet; } ResourceManager::~ResourceManager() @@ -170,6 +168,14 @@ void ResourceManager::init() // This is somewhat arbitrary. manager->set_asset_budget_per_iteration(2 * 1000 * 1000); } + + if (device->get_device_features().mesh_shader_features.taskShader && + device->get_device_features().mesh_shader_features.meshShader && + device->supports_subgroup_size_log2(true, 5, 5, VK_SHADER_STAGE_MESH_BIT_EXT)) + { + mesh_encoding = MeshEncoding::Meshlet; + LOGI("Opting in to meshlet path.\n"); + } } ImageHandle ResourceManager::create_gtx(const MemoryMappedTexture &mapped_file, Granite::AssetID id) From e975a24f1c5f44da95b9431cc33493279455ba43 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 8 Dec 2023 12:14:58 +0100 Subject: [PATCH 71/71] Revert "Fix FFmpeg build on MSVC." This reverts commit bd4ac44bb66c41a9bf52db785459b159c97306dc. --- video/CMakeLists.txt | 42 +++++++----------------------------------- 1 file changed, 7 insertions(+), 35 deletions(-) diff --git a/video/CMakeLists.txt b/video/CMakeLists.txt index 7d5357d5..4b760b74 100644 --- a/video/CMakeLists.txt +++ b/video/CMakeLists.txt @@ -1,44 +1,16 @@ +include(FindPkgConfig) +pkg_check_modules(LIBAV REQUIRED IMPORTED_TARGET + libavdevice libavformat libavcodec libavutil) + add_granite_internal_lib(granite-video ffmpeg_encode.cpp ffmpeg_encode.hpp slangmosh_encode_iface.hpp ffmpeg_decode.cpp ffmpeg_decode.hpp slangmosh_decode_iface.hpp ffmpeg_hw_device.cpp ffmpeg_hw_device.hpp) -# FFmpeg macro uses designated initializer. -target_compile_features(granite-video PRIVATE cxx_std_20) - -option(GRANITE_FFMPEG_INSTALL_PREFIX "Override FFmpeg install prefix." "") -if (GRANITE_FFMPEG_INSTALL_PREFIX) - # For MSVC. It does not play well with Msys2 pkg-configs. - message("FFmpeg: install prefix ${GRANITE_FFMPEG_INSTALL_PREFIX}.") - target_include_directories(granite-video PRIVATE ${GRANITE_FFMPEG_INSTALL_PREFIX}/include) - find_library(AVDEVICE avdevice NAMES libavdevice - PATHS ${GRANITE_FFMPEG_INSTALL_PREFIX}/lib ${GRANITE_FFMPEG_INSTALL_PREFIX}/bin - NO_DEFAULT_PATH) - find_library(AVFORMAT avformat NAMES libavformat - PATHS ${GRANITE_FFMPEG_INSTALL_PREFIX}/lib ${GRANITE_FFMPEG_INSTALL_PREFIX}/bin - NO_DEFAULT_PATH) - find_library(AVCODEC avcodec NAMES libavcodec - PATHS ${GRANITE_FFMPEG_INSTALL_PREFIX}/lib ${GRANITE_FFMPEG_INSTALL_PREFIX}/bin - NO_DEFAULT_PATH) - find_library(AVUTIL avutil NAMES libavutil - PATHS ${GRANITE_FFMPEG_INSTALL_PREFIX}/lib ${GRANITE_FFMPEG_INSTALL_PREFIX}/bin - NO_DEFAULT_PATH) - message("FFmpeg: libavdevice (${AVDEVICE}).") - message("FFmpeg: libavformat (${AVFORMAT}).") - message("FFmpeg: libavcodec (${AVCODEC}).") - message("FFmpeg: libavutil (${AVUTIL}).") - target_link_libraries(granite-video PRIVATE ${AVDEVICE} ${AVFORMAT} ${AVCODEC} ${AVUTIL}) -else() - include(FindPkgConfig) - pkg_check_modules(LIBAV REQUIRED IMPORTED_TARGET - libavdevice libavformat libavcodec libavutil) - target_link_libraries(granite-video PRIVATE PkgConfig::LIBAV) -endif() - target_link_libraries(granite-video - PUBLIC granite-vulkan - PRIVATE granite-threading granite-math) + PUBLIC granite-vulkan + PRIVATE PkgConfig::LIBAV granite-threading granite-math) if (GRANITE_AUDIO) target_link_libraries(granite-video PRIVATE granite-audio) endif() @@ -49,4 +21,4 @@ if (GRANITE_FFMPEG_VULKAN_ENCODE) target_compile_definitions(granite-video PRIVATE HAVE_FFMPEG_VULKAN_ENCODE) endif() target_include_directories(granite-video PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -target_compile_definitions(granite-video PUBLIC HAVE_GRANITE_FFMPEG) +target_compile_definitions(granite-video PUBLIC HAVE_GRANITE_FFMPEG) \ No newline at end of file