diff --git a/application/platforms/CMakeLists.txt b/application/platforms/CMakeLists.txt
index 28619c62b..0ca56f3cc 100644
--- a/application/platforms/CMakeLists.txt
+++ b/application/platforms/CMakeLists.txt
@@ -31,6 +31,9 @@ elseif (${GRANITE_PLATFORM} MATCHES "SDL")
     #find_package(SDL3 REQUIRED CONFIG REQUIRED COMPONENTS SDL3-shared)
     #target_link_libraries(granite-platform PRIVATE SDL3::SDL3-shared)
     target_link_libraries(granite-platform PRIVATE SDL3-static granite-input-sdl)
+    if (NOT WIN32)
+        target_link_libraries(granite-platform PRIVATE dl)
+    endif()
 else()
     message(FATAL "GRANITE_PLATFORM is not set.")
 endif()
diff --git a/application/platforms/application_headless.cpp b/application/platforms/application_headless.cpp
index d62e9b2e8..1b8083906 100644
--- a/application/platforms/application_headless.cpp
+++ b/application/platforms/application_headless.cpp
@@ -248,19 +248,10 @@ struct WSIPlatformHeadless : Granite::GraniteWSIPlatform
 		enc_opts.frame_timebase.den = int(frame_rate);
 
 #ifdef HAVE_GRANITE_AUDIO
-#if 1
 		enc_opts.realtime = true;
 		record_stream.reset(Audio::create_default_audio_record_backend("headless", 44100.0f, 2));
 		if (record_stream)
 			encoder.set_audio_record_stream(record_stream.get());
-#else
-		auto *mixer = new Audio::Mixer;
-			auto *audio_dumper = new Audio::DumpBackend(
-					mixer, 48000.0f, 2,
-					unsigned(std::ceil(48000.0f / frame_rate)));
-			Global::install_audio_system(audio_dumper, mixer);
-			encoder.set_audio_source(audio_dumper);
-#endif
 #endif
 
 		if (!encoder.init(&app->get_wsi().get_device(), video_encode_path.c_str(), enc_opts))
@@ -284,7 +275,9 @@ struct WSIPlatformHeadless : Granite::GraniteWSIPlatform
 		}
 #endif
 
+#ifdef HAVE_GRANITE_AUDIO
 		record_stream->start();
+#endif
 	}
 #endif
 
diff --git a/application/platforms/application_sdl3.cpp b/application/platforms/application_sdl3.cpp
index de926e9f0..4e0e6a3a4 100644
--- a/application/platforms/application_sdl3.cpp
+++ b/application/platforms/application_sdl3.cpp
@@ -40,6 +40,10 @@
 #include <windows.h>
 #endif
 
+#ifdef __linux__
+#include <dlfcn.h>
+#endif
+
 namespace Granite
 {
 static Key sdl_key_to_granite_key(SDL_Keycode key)
@@ -104,6 +108,17 @@ struct WSIPlatformSDL : GraniteWSIPlatform
 		if (options.override_height)
 			height = options.override_height;
 
+#ifdef __linux__
+		// RenderDoc doesn't support Wayland, and SDL3 uses Wayland by default.
+		// Opt in to X11 to avoid having to manually remember to pass down SDL_VIDEO_DRIVER=x11.
+		void *renderdoc_module = dlopen("librenderdoc.so", RTLD_NOW | RTLD_NOLOAD);
+		if (renderdoc_module)
+		{
+			LOGI("RenderDoc is loaded, disabling Wayland.\n");
+			setenv("SDL_VIDEO_DRIVER", "x11", 0);
+		}
+#endif
+
 		if (SDL_Init(SDL_INIT_EVENTS | SDL_INIT_GAMEPAD | SDL_INIT_VIDEO) < 0)
 		{
 			LOGE("Failed to init SDL.\n");
diff --git a/assets/shaders/decode/meshlet_decode.comp b/assets/shaders/decode/meshlet_decode.comp
new file mode 100644
index 000000000..8056519ac
--- /dev/null
+++ b/assets/shaders/decode/meshlet_decode.comp
@@ -0,0 +1,219 @@
+#version 450
+
+#extension GL_EXT_scalar_block_layout : require
+#include "../inc/meshlet_payload_constants.h"
+
+#define MESHLET_PAYLOAD_LARGE_WORKGROUP 1
+
+#if MESHLET_PAYLOAD_LARGE_WORKGROUP
+#define MESHLET_PAYLOAD_WG_Y MESHLET_PAYLOAD_NUM_CHUNKS
+#else
+#define MESHLET_PAYLOAD_WG_Y 1
+#endif
+layout(local_size_x = 32, local_size_y = MESHLET_PAYLOAD_WG_Y) in;
+
+layout(constant_id = 0) const uint NUM_U32_STREAMS = MESHLET_PAYLOAD_MAX_STREAMS;
+layout(constant_id = 1) const uint NUM_OUTPUT_U32_STREAMS = 1;
+layout(constant_id = 2) const bool RAW_PAYLOAD = false;
+#define MESHLET_PAYLOAD_NUM_U32_STREAMS NUM_U32_STREAMS
+#define MESHLET_PAYLOAD_DESCRIPTOR_SET 0
+#define MESHLET_PAYLOAD_META_BINDING 0
+#define MESHLET_PAYLOAD_STREAM_BINDING 1
+#define MESHLET_PAYLOAD_PAYLOAD_BINDING 2
+#include "../inc/meshlet_payload_decode.h"
+#include "../inc/meshlet_attribute_decode.h"
+
+const int MESH_STYLE = int(NUM_OUTPUT_U32_STREAMS);
+const int MESH_STYLE_WIREFRAME = 0;
+const int MESH_STYLE_UNTEXTURED = 1;
+const int MESH_STYLE_TEXTURED = 2;
+const int MESH_STYLE_SKINNED = 3;
+
+layout(set = 0, binding = 3, scalar) writeonly buffer OutputIndices
+{
+    uvec3 data[];
+} output_indices32;
+
+layout(set = 0, binding = 3, scalar) writeonly buffer OutputIndices8
+{
+    u8vec3 data[];
+} output_indices8;
+
+layout(set = 0, binding = 4, std430) writeonly buffer OutputStream0
+{
+    uint data[];
+} output_stream_raw;
+
+layout(set = 0, binding = 4, scalar) writeonly buffer OutputStreamPos
+{
+    vec3 data[];
+} output_stream_pos;
+
+struct UntexturedAttr
+{
+    uint normal;
+};
+
+layout(set = 0, binding = 5, std430) writeonly buffer OutputStreamUntextured
+{
+    UntexturedAttr data[];
+} output_stream_untextured_attr;
+
+struct TexturedAttr
+{
+    uint normal;
+    uint tangent;
+    vec2 uv;
+};
+
+layout(set = 0, binding = 5, std430) writeonly buffer OutputStreamTextured
+{
+    TexturedAttr data[];
+} output_stream_textured_attr;
+
+layout(set = 0, binding = 6, std430) writeonly buffer OutputStreamSkin
+{
+    uvec2 data[];
+} output_stream_skin;
+
+layout(set = 0, binding = 7, std430) readonly buffer OutputOffsets
+{
+    uvec2 data[];
+} output_offset_strides;
+
+struct IndirectIndexedDraw
+{
+    uint indexCount;
+    uint instanceCount;
+    uint firstIndex;
+    uint vertexOffset;
+    uint firstInstance;
+};
+
+layout(set = 0, binding = 8, std430) writeonly buffer IndirectCommands
+{
+    IndirectIndexedDraw draws[];
+} indirect_commands;
+
+layout(push_constant, std430) uniform Registers
+{
+    uint primitive_offset;
+    uint vertex_offset;
+    uint meshlet_offset;
+} registers;
+
+uint pack_a2bgr10(vec4 v)
+{
+    ivec4 quantized = ivec4(round(clamp(v, vec4(-1.0), vec4(1.0)) * vec4(511.0, 511.0, 511.0, 1.0))) & ivec4(1023, 1023, 1023, 3);
+    return (quantized.a << 30) | (quantized.b << 20) | (quantized.g << 10) | (quantized.r << 0);
+}
+
+void main()
+{
+    uint meshlet_index = gl_WorkGroupID.x;
+    meshlet_init_workgroup(meshlet_index * NUM_U32_STREAMS);
+    MeshletMetaRaw meta = meshlet_metas_raw.data[meshlet_index];
+
+    if (!RAW_PAYLOAD)
+    {
+        IndirectIndexedDraw draw;
+        draw.indexCount = 3 * (meta.num_primitives_minus_1 + 1);
+        draw.instanceCount = 1;
+        draw.vertexOffset = meta.base_vertex_offset + registers.vertex_offset;
+        draw.firstIndex = 3 * (output_offset_strides.data[meshlet_index].x + registers.primitive_offset);
+        draw.firstInstance = 0;
+        indirect_commands.draws[meshlet_index + registers.meshlet_offset] = draw;
+    }
+
+#define INDEX(linear_index, packed_indices) { \
+    uint output_offset; \
+    if (RAW_PAYLOAD) { \
+        uvec3 indices = uvec4(unpack8(packed_indices)).xyz; \
+        indices += meta.base_vertex_offset + registers.vertex_offset; \
+        output_offset = output_offset_strides.data[meshlet_index * NUM_OUTPUT_U32_STREAMS].x; \
+        output_offset += registers.primitive_offset; \
+        if (linear_index <= uint(meta.num_primitives_minus_1)) \
+            output_indices32.data[output_offset + linear_index] = indices; \
+    } else { \
+        output_offset = output_offset_strides.data[meshlet_index].x; \
+        output_offset += registers.primitive_offset; \
+        if (linear_index <= uint(meta.num_primitives_minus_1)) \
+            output_indices8.data[output_offset + linear_index] = unpack8(packed_indices).xyz; \
+    } \
+}
+
+    {
+        MESHLET_DECODE_STREAM_32(meshlet_index * NUM_U32_STREAMS, 0, INDEX);
+    }
+
+    if (RAW_PAYLOAD)
+    {
+#define ATTR(linear_index, packed_decoded) { \
+    uvec2 output_offset_stride0 = output_offset_strides.data[meshlet_index * NUM_OUTPUT_U32_STREAMS + i]; \
+    output_offset_stride0.x += registers.vertex_offset; \
+    if (linear_index <= uint(meta.num_attributes_minus_1)) \
+        output_stream_raw.data[output_offset_stride0.x + linear_index * output_offset_stride0.y] = packed_decoded; \
+}
+
+        for (uint i = 1; i < NUM_OUTPUT_U32_STREAMS; i++)
+        {
+            MESHLET_DECODE_STREAM_32(meshlet_index * NUM_U32_STREAMS, i, ATTR);
+        }
+    }
+    else
+    {
+        uint output_offset = output_offset_strides.data[meshlet_index].y;
+        output_offset += registers.vertex_offset;
+
+#define POS(linear_index, packed_decoded) { \
+    if (linear_index <= uint(meta.num_attributes_minus_1)) \
+        output_stream_pos.data[output_offset + linear_index] = attribute_decode_snorm_exp_position(packed_decoded); \
+}
+
+#define NORMAL(linear_index, packed_decoded) { \
+    if (linear_index <= uint(meta.num_attributes_minus_1)) { \
+        if (MESH_STYLE >= MESH_STYLE_TEXTURED) \
+            output_stream_textured_attr.data[output_offset + linear_index].normal = pack_a2bgr10(attribute_decode_oct8_normal_tangent(packed_decoded)); \
+        else \
+            output_stream_untextured_attr.data[output_offset + linear_index].normal = pack_a2bgr10(attribute_decode_oct8_normal_tangent(packed_decoded)); \
+    } \
+}
+
+#define TANGENT(linear_index, packed_decoded) { \
+    if (linear_index <= uint(meta.num_attributes_minus_1)) { \
+        output_stream_textured_attr.data[output_offset + linear_index].tangent = pack_a2bgr10(attribute_decode_oct8_normal_tangent(packed_decoded)); \
+    } \
+}
+
+#define UV(linear_index, packed_decoded) { \
+    if (linear_index <= uint(meta.num_attributes_minus_1)) { \
+        output_stream_textured_attr.data[output_offset + linear_index].uv = attribute_decode_snorm_exp_uv(packed_decoded); \
+    } \
+}
+
+#define SKIN(linear_index, packed_decoded) { \
+    if (linear_index <= uint(meta.num_attributes_minus_1)) { \
+        output_stream_skin.data[output_offset + linear_index] = packed_decoded; \
+    } \
+}
+        {
+            MESHLET_DECODE_STREAM_64(meshlet_index * NUM_U32_STREAMS, 1, POS);
+        }
+
+        if (MESH_STYLE >= MESH_STYLE_UNTEXTURED)
+        {
+            MESHLET_DECODE_STREAM_32(meshlet_index * NUM_U32_STREAMS, 3, NORMAL);
+        }
+
+        if (MESH_STYLE >= MESH_STYLE_TEXTURED)
+        {
+            MESHLET_DECODE_STREAM_32(meshlet_index * NUM_U32_STREAMS, 4, TANGENT);
+            MESHLET_DECODE_STREAM_64(meshlet_index * NUM_U32_STREAMS, 5, UV);
+        }
+
+        if (MESH_STYLE >= MESH_STYLE_SKINNED)
+        {
+            MESHLET_DECODE_STREAM_64(meshlet_index * NUM_U32_STREAMS, 7, SKIN);
+        }
+    }
+}
diff --git a/assets/shaders/inc/meshlet_attribute_decode.h b/assets/shaders/inc/meshlet_attribute_decode.h
new file mode 100644
index 000000000..51a05bc05
--- /dev/null
+++ b/assets/shaders/inc/meshlet_attribute_decode.h
@@ -0,0 +1,39 @@
+#ifndef MESHLET_ATTRIBUTE_DECODE_H_
+#define MESHLET_ATTRIBUTE_DECODE_H_
+
+vec3 attribute_decode_snorm_exp_position(uvec2 payload)
+{
+	ivec3 sint_value = ivec3(
+		bitfieldExtract(int(payload.x), 0, 16),
+		bitfieldExtract(int(payload.x), 16, 16),
+		bitfieldExtract(int(payload.y), 0, 16));
+	int exp = bitfieldExtract(int(payload.y), 16, 16);
+	return vec3(
+		ldexp(float(sint_value.x), exp),
+		ldexp(float(sint_value.y), exp),
+		ldexp(float(sint_value.z), exp));
+}
+
+vec2 attribute_decode_snorm_exp_uv(uvec2 payload)
+{
+	ivec2 sint_value = ivec2(
+		bitfieldExtract(int(payload.x), 0, 16),
+		bitfieldExtract(int(payload.x), 16, 16));
+	int exp = bitfieldExtract(int(payload.y), 0, 16);
+	return vec2(
+		ldexp(float(sint_value.x), exp),
+		ldexp(float(sint_value.y), exp)) + 0.5;
+}
+
+// Adapted from: https://knarkowicz.wordpress.com/2014/04/16/octahedron-normal-vector-encoding/
+// https://twitter.com/Stubbesaurus/status/9379947905532272640
+mediump vec4 attribute_decode_oct8_normal_tangent(uint payload)
+{
+	mediump vec4 f = unpackSnorm4x8(payload);
+	mediump vec3 n = vec3(f.x, f.y, 1.0 - abs(f.x) - abs(f.y));
+	mediump float t = max(-n.z, 0.0);
+	n.xy += mix(vec2(t), vec2(-t), greaterThanEqual(n.xy, vec2(0.0)));
+	return vec4(normalize(n), f.w != 0.0 ? -1.0 : 1.0);
+}
+
+#endif
\ No newline at end of file
diff --git a/assets/shaders/inc/meshlet_payload_constants.h b/assets/shaders/inc/meshlet_payload_constants.h
new file mode 100644
index 000000000..2a91ff531
--- /dev/null
+++ b/assets/shaders/inc/meshlet_payload_constants.h
@@ -0,0 +1,8 @@
+#ifndef MESHLET_PAYLOAD_CONSTANTS_H_
+#define MESHLET_PAYLOAD_CONSTANTS_H_
+
+#define MESHLET_PAYLOAD_MAX_ELEMENTS 256
+#define MESHLET_PAYLOAD_NUM_CHUNKS 8
+#define MESHLET_PAYLOAD_MAX_STREAMS 16
+
+#endif
\ No newline at end of file
diff --git a/assets/shaders/inc/meshlet_payload_decode.h b/assets/shaders/inc/meshlet_payload_decode.h
new file mode 100644
index 000000000..0673e3a32
--- /dev/null
+++ b/assets/shaders/inc/meshlet_payload_decode.h
@@ -0,0 +1,305 @@
+#ifndef MESHLET_PAYLOAD_DECODE_H_
+#define MESHLET_PAYLOAD_DECODE_H_
+
+#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
+#extension GL_KHR_shader_subgroup_arithmetic : require
+#extension GL_KHR_shader_subgroup_ballot : require
+#extension GL_KHR_shader_subgroup_shuffle : require
+#extension GL_KHR_shader_subgroup_basic : require
+#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
+#extension GL_EXT_scalar_block_layout : require
+#extension GL_EXT_shader_subgroup_extended_types_int8 : require
+
+#include "meshlet_payload_constants.h"
+
+#ifndef MESHLET_PAYLOAD_NUM_U32_STREAMS
+#error "Must define MESHLET_PAYLOAD_NUM_U32_STREAMS before including meshlet_payload_decode.h"
+#endif
+
+#ifndef MESHLET_PAYLOAD_LARGE_WORKGROUP
+#error "Must define MESHLET_PAYLOAD_LARGE_WORKGROUP"
+#endif
+
+#ifndef MESHLET_PAYLOAD_DESCRIPTOR_SET
+#error "Must define MESHLET_PAYLOAD_DESCRIPTOR_SET"
+#endif
+
+#ifndef MESHLET_PAYLOAD_META_BINDING
+#error "Must define MESHLET_PAYLOAD_META_BINDING"
+#endif
+
+#ifndef MESHLET_PAYLOAD_STREAM_BINDING
+#error "Must define MESHLET_PAYLOAD_STREAM_BINDING"
+#endif
+
+#ifndef MESHLET_PAYLOAD_PAYLOAD_BINDING
+#error "Must define MESHLET_PAYLOAD_PAYLOAD_BINDING"
+#endif
+
+struct MeshletStream
+{
+	u16vec4 predictor_a;
+	u16vec4 predictor_b;
+	u8vec4 initial_value;
+	uint offset_from_base;
+	uint16_t bitplane_meta[MESHLET_PAYLOAD_NUM_CHUNKS];
+};
+
+struct MeshletMetaRaw
+{
+	uint base_vertex_offset;
+	uint8_t num_primitives_minus_1;
+	uint8_t num_attributes_minus_1;
+	uint16_t reserved;
+};
+
+struct MeshletMetaRuntime
+{
+	uint stream_offset;
+	uint16_t num_primitives;
+	uint16_t num_attributes;
+};
+
+layout(set = MESHLET_PAYLOAD_DESCRIPTOR_SET, binding = MESHLET_PAYLOAD_META_BINDING, std430) readonly buffer MeshletMetasRaw
+{
+	MeshletMetaRaw data[];
+} meshlet_metas_raw;
+
+layout(set = MESHLET_PAYLOAD_DESCRIPTOR_SET, binding = MESHLET_PAYLOAD_META_BINDING, std430) readonly buffer MeshletMetasRuntime
+{
+	MeshletMetaRuntime data[];
+} meshlet_metas_runtime;
+
+layout(set = MESHLET_PAYLOAD_DESCRIPTOR_SET, binding = MESHLET_PAYLOAD_STREAM_BINDING, std430) readonly buffer MeshletStreams
+{
+	MeshletStream data[];
+} meshlet_streams;
+
+layout(set = MESHLET_PAYLOAD_DESCRIPTOR_SET, binding = MESHLET_PAYLOAD_PAYLOAD_BINDING, std430) readonly buffer Payload
+{
+	uint data[];
+} payload;
+
+#if MESHLET_PAYLOAD_LARGE_WORKGROUP
+shared u8vec4 shared_chunk_bit_counts[MESHLET_PAYLOAD_NUM_U32_STREAMS][MESHLET_PAYLOAD_NUM_CHUNKS];
+shared uint shared_chunk_offset[MESHLET_PAYLOAD_NUM_U32_STREAMS][MESHLET_PAYLOAD_NUM_CHUNKS];
+shared uvec2 chunk_values0[MESHLET_PAYLOAD_NUM_CHUNKS];
+shared uvec2 chunk_values1[MESHLET_PAYLOAD_NUM_CHUNKS];
+#endif
+
+// Hardcodes wave32 atm. Need fallback.
+
+uvec2 pack_u16vec4_to_uvec2(u16vec4 v)
+{
+	return uvec2(pack32(v.xy), pack32(v.zw));
+}
+
+uint repack_uint(uvec2 v)
+{
+	u16vec4 v16 = u16vec4(unpack16(v.x), unpack16(v.y));
+	return pack32(u8vec4(v16));
+}
+
+void meshlet_compute_stream_offsets(uint stream_index,
+                                    out uint out_stream_chunk_offset, out u8vec4 out_bit_counts)
+{
+	if (gl_SubgroupInvocationID < MESHLET_PAYLOAD_NUM_CHUNKS)
+	{
+		uint bitplane_value = uint(meshlet_streams.data[stream_index].bitplane_meta[gl_SubgroupInvocationID]);
+		u16vec4 bit_counts = (u16vec4(bitplane_value) >> u16vec4(0, 4, 8, 12)) & 0xfus;
+		u16vec2 bit_counts2 = bit_counts.xy + bit_counts.zw;
+		uint total_bits = bit_counts2.x + bit_counts2.y;
+		uint offset = meshlet_streams.data[stream_index].offset_from_base;
+		out_stream_chunk_offset = subgroupExclusiveAdd(total_bits) + offset;
+		out_bit_counts = u8vec4(bit_counts);
+	}
+}
+
+void meshlet_init_workgroup(uint base_stream_index)
+{
+#if MESHLET_PAYLOAD_LARGE_WORKGROUP
+
+	for (uint stream_index = gl_SubgroupID; stream_index < MESHLET_PAYLOAD_NUM_U32_STREAMS; stream_index += gl_NumSubgroups)
+	{
+		if (gl_SubgroupInvocationID < MESHLET_PAYLOAD_NUM_CHUNKS)
+		{
+			// Start by decoding the offset for bitplanes for all u32 streams.
+			meshlet_compute_stream_offsets(base_stream_index + stream_index,
+										   shared_chunk_offset[stream_index][gl_SubgroupInvocationID],
+										   shared_chunk_bit_counts[stream_index][gl_SubgroupInvocationID]);
+		}
+	}
+
+	barrier();
+#endif
+}
+
+uint meshlet_get_linear_index()
+{
+#if MESHLET_PAYLOAD_LARGE_WORKGROUP
+	// Rely on SubgroupInvocationID == LocalInvocationID.x here.
+	return gl_WorkGroupSize.x * gl_LocalInvocationID.y + gl_SubgroupInvocationID;
+#else
+	return gl_SubgroupInvocationID;
+#endif
+}
+
+// Overlap load with consumption.
+// Helps RDNA2 quite a lot here!
+#define MESHLET_FETCH_BITPLANES(decoded_value, counts, payload_value, offset) \
+	for (int i = 0; i < counts; i++) \
+	{ \
+		decoded_value |= bitfieldExtract(payload_value, int(gl_SubgroupInvocationID), 1) << i; \
+		payload_value = payload.data[++offset]; \
+	} \
+	decoded_value = bitfieldExtract(int(decoded_value), 0, counts)
+
+// Add some specialized variants.
+
+#define MESHLET_PAYLOAD_DECL_STREAM(unrolled_stream_index, iter) \
+	u16vec4 predictor_a##iter = meshlet_streams.data[unrolled_stream_index].predictor_a; \
+	u16vec4 predictor_b##iter = meshlet_streams.data[unrolled_stream_index].predictor_b; \
+	u8vec4 initial_value_##iter = meshlet_streams.data[unrolled_stream_index].initial_value; \
+	uvec2 initial_value##iter = pack_u16vec4_to_uvec2(u16vec4(initial_value_##iter))
+
+#if MESHLET_PAYLOAD_LARGE_WORKGROUP
+#define MESHLET_PAYLOAD_DECL_CHUNK_OFFSETS(stream_index, chunk_id, iter) \
+	uint bitplane_offsets##iter = shared_chunk_offset[stream_index][chunk_id]; \
+	ivec4 bit_counts##iter = ivec4(shared_chunk_bit_counts[stream_index][chunk_id])
+#else
+#define MESHLET_PAYLOAD_DECL_CHUNK_OFFSETS(stream_index, chunk_id, iter) \
+	uint bitplane_offsets##iter = subgroupShuffle(shared_chunk_offset##iter, chunk_id); \
+	ivec4 bit_counts##iter = ivec4(subgroupShuffle(shared_chunk_bit_counts##iter, chunk_id))
+#endif
+
+#define MESHLET_PAYLOAD_PROCESS_CHUNK(stream_index, chunk_id, iter) \
+	uvec4 decoded##iter = ivec4(0); \
+	MESHLET_PAYLOAD_DECL_CHUNK_OFFSETS(stream_index, chunk_id, iter); \
+	uint value##iter = payload.data[bitplane_offsets##iter]; \
+	MESHLET_FETCH_BITPLANES(decoded##iter.x, bit_counts##iter.x, value##iter, bitplane_offsets##iter); \
+	MESHLET_FETCH_BITPLANES(decoded##iter.y, bit_counts##iter.y, value##iter, bitplane_offsets##iter); \
+	MESHLET_FETCH_BITPLANES(decoded##iter.z, bit_counts##iter.z, value##iter, bitplane_offsets##iter); \
+	MESHLET_FETCH_BITPLANES(decoded##iter.w, bit_counts##iter.w, value##iter, bitplane_offsets##iter); \
+	uvec2 packed_decoded##iter = pack_u16vec4_to_uvec2(u16vec4(decoded##iter)) & 0xff00ffu; \
+	if (linear_index == 0) \
+		packed_decoded##iter += initial_value##iter; \
+	packed_decoded##iter += pack_u16vec4_to_uvec2((predictor_a##iter + predictor_b##iter * uint16_t(linear_index)) >> 8us); \
+	packed_decoded##iter = subgroupInclusiveAdd(packed_decoded##iter)
+
+#if MESHLET_PAYLOAD_LARGE_WORKGROUP
+uint meshlet_decode_stream_32_wg256(uint base_stream_index, uint stream_index)
+{
+	uint unrolled_stream_index = base_stream_index + stream_index;
+	uint linear_index = meshlet_get_linear_index();
+	uint chunk_id = gl_LocalInvocationID.y;
+
+	MESHLET_PAYLOAD_DECL_STREAM(unrolled_stream_index, 0);
+	MESHLET_PAYLOAD_PROCESS_CHUNK(stream_index, chunk_id, 0);
+
+	barrier(); // Resolve WAR hazard from last iteration.
+	if (gl_SubgroupInvocationID == MESHLET_PAYLOAD_MAX_ELEMENTS / MESHLET_PAYLOAD_NUM_CHUNKS - 1)
+		chunk_values0[chunk_id] = packed_decoded0 & 0xff00ffu;
+	barrier();
+	if (gl_SubgroupID == 0u && gl_SubgroupInvocationID < gl_WorkGroupSize.y)
+		chunk_values0[gl_SubgroupInvocationID] = subgroupInclusiveAdd(chunk_values0[gl_SubgroupInvocationID]);
+	barrier();
+	if (chunk_id != 0)
+		packed_decoded0 += chunk_values0[chunk_id - 1];
+
+	return repack_uint(packed_decoded0);
+}
+
+uvec2 meshlet_decode_stream_64_wg256(uint base_stream_index, uint stream_index)
+{
+	// Dual-pump the computation. VGPR use is quite low either way, so this is fine.
+	uint unrolled_stream_index = base_stream_index + stream_index;
+	uint linear_index = meshlet_get_linear_index();
+	uint chunk_id = gl_LocalInvocationID.y;
+
+	MESHLET_PAYLOAD_DECL_STREAM(unrolled_stream_index, 0);
+	MESHLET_PAYLOAD_DECL_STREAM(unrolled_stream_index + 1, 1);
+	MESHLET_PAYLOAD_PROCESS_CHUNK(stream_index, chunk_id, 0);
+	MESHLET_PAYLOAD_PROCESS_CHUNK(stream_index + 1, chunk_id, 1);
+
+	barrier(); // Resolve WAR hazard from last iteration.
+	if (gl_SubgroupInvocationID == gl_SubgroupSize - 1)
+	{
+		chunk_values0[chunk_id] = packed_decoded0 & 0xff00ffu;
+		chunk_values1[chunk_id] = packed_decoded1 & 0xff00ffu;
+	}
+	barrier();
+	if (gl_SubgroupID == 0u && gl_SubgroupInvocationID < gl_WorkGroupSize.y)
+		chunk_values0[gl_SubgroupInvocationID] = subgroupInclusiveAdd(chunk_values0[gl_SubgroupInvocationID]);
+	else if (gl_SubgroupID == 1u && gl_SubgroupInvocationID < gl_WorkGroupSize.y)
+		chunk_values1[gl_SubgroupInvocationID] = subgroupInclusiveAdd(chunk_values1[gl_SubgroupInvocationID]);
+	barrier();
+	if (chunk_id != 0)
+	{
+		packed_decoded0 += chunk_values0[chunk_id - 1];
+		packed_decoded1 += chunk_values1[chunk_id - 1];
+	}
+
+	return uvec2(repack_uint(packed_decoded0), repack_uint(packed_decoded1));
+}
+
+// For large workgroups, we imply AMD, where LocalInvocationIndex indexing is preferred.
+// We assume that SubgroupInvocationID == LocalInvocationID.x here since it's the only reasonable it would work.
+#define MESHLET_DECODE_STREAM_32(meshlet_index, stream_index, report_cb) { \
+	uint value = meshlet_decode_stream_32_wg256(meshlet_index, stream_index); \
+	report_cb(gl_LocalInvocationIndex, value); }
+
+#define MESHLET_DECODE_STREAM_64(meshlet_index, stream_index, report_cb) { \
+	uvec2 value = meshlet_decode_stream_64_wg256(meshlet_index, stream_index); \
+	report_cb(gl_LocalInvocationIndex, value); }
+
+#else
+
+// Have to iterate and report once per chunk. Avoids having to spend a lot of LDS memory.
+#define MESHLET_DECODE_STREAM_32(base_stream_index, stream_index, report_cb) { \
+	uint unrolled_stream_index = base_stream_index + stream_index; \
+	uint linear_index = meshlet_get_linear_index(); \
+	uvec2 prev_value0 = uvec2(0); \
+	uint shared_chunk_offset0; \
+	u8vec4 shared_chunk_bit_counts0; \
+	meshlet_compute_stream_offsets(unrolled_stream_index, shared_chunk_offset0, shared_chunk_bit_counts0); \
+	MESHLET_PAYLOAD_DECL_STREAM(unrolled_stream_index, 0); \
+	for (uint chunk_id = 0; chunk_id < MESHLET_PAYLOAD_NUM_CHUNKS; chunk_id++) \
+	{ \
+		MESHLET_PAYLOAD_PROCESS_CHUNK(stream_index, chunk_id, 0); \
+		packed_decoded0 += prev_value0; \
+		prev_value0 = subgroupBroadcast(packed_decoded0, 31) & 0xff00ffu; \
+		report_cb(linear_index, repack_uint(packed_decoded0)); \
+		linear_index += gl_SubgroupSize; \
+	} \
+}
+
+// Have to iterate and report once per chunk. Avoids having to spend a lot of LDS memory.
+#define MESHLET_DECODE_STREAM_64(base_stream_index, stream_index, report_cb) { \
+	uint unrolled_stream_index = base_stream_index + stream_index; \
+	uint linear_index = meshlet_get_linear_index(); \
+	uvec2 prev_value0 = uvec2(0); \
+	uvec2 prev_value1 = uvec2(0); \
+	uint shared_chunk_offset0; \
+	u8vec4 shared_chunk_bit_counts0; \
+	meshlet_compute_stream_offsets(unrolled_stream_index, shared_chunk_offset0, shared_chunk_bit_counts0); \
+	uint shared_chunk_offset1; \
+	u8vec4 shared_chunk_bit_counts1; \
+	meshlet_compute_stream_offsets(unrolled_stream_index + 1, shared_chunk_offset1, shared_chunk_bit_counts1); \
+	MESHLET_PAYLOAD_DECL_STREAM(unrolled_stream_index, 0); \
+	MESHLET_PAYLOAD_DECL_STREAM(unrolled_stream_index + 1, 1); \
+	for (uint chunk_id = 0; chunk_id < MESHLET_PAYLOAD_NUM_CHUNKS; chunk_id++) \
+	{ \
+		MESHLET_PAYLOAD_PROCESS_CHUNK(stream_index, chunk_id, 0); \
+		MESHLET_PAYLOAD_PROCESS_CHUNK(stream_index + 1, chunk_id, 1); \
+		packed_decoded0 += prev_value0; \
+		packed_decoded1 += prev_value1; \
+		prev_value0 = subgroupBroadcast(packed_decoded0, 31) & 0xff00ffu; \
+		prev_value1 = subgroupBroadcast(packed_decoded1, 31) & 0xff00ffu; \
+		report_cb(linear_index, uvec2(repack_uint(packed_decoded0), repack_uint(packed_decoded1))); \
+		linear_index += gl_SubgroupSize; \
+	} \
+}
+
+#endif
+
+#endif
\ No newline at end of file
diff --git a/filesystem/asset_manager.cpp b/filesystem/asset_manager.cpp
index c49d8a52e..6e3b1f1b8 100644
--- a/filesystem/asset_manager.cpp
+++ b/filesystem/asset_manager.cpp
@@ -29,6 +29,8 @@ namespace Granite
 {
 AssetManager::AssetManager()
 {
+	asset_bank.reserve(AssetID::MaxIDs);
+	sorted_assets.reserve(AssetID::MaxIDs);
 	signal = std::make_unique<TaskSignal>();
 	for (uint64_t i = 0; i < timestamp; i++)
 		signal->signal_increment();
@@ -36,40 +38,40 @@ AssetManager::AssetManager()
 
 AssetManager::~AssetManager()
 {
+	set_asset_instantiator_interface(nullptr);
 	signal->wait_until_at_least(timestamp);
-	for (auto *a : asset_bank)
-		pool.free(a);
+	for (uint32_t i = 0; i < id_count; i++)
+		pool.free(asset_bank[i]);
 }
 
-ImageAssetID AssetManager::register_image_resource_nolock(FileHandle file, ImageClass image_class, int prio)
+AssetID AssetManager::register_asset_nolock(FileHandle file, AssetClass asset_class, int prio)
 {
 	auto *info = pool.allocate();
 	info->handle = std::move(file);
-	info->id.id = id_count++;
+	info->id.id = id_count;
 	info->prio = prio;
-	info->image_class = image_class;
-	ImageAssetID ret = info->id;
-	asset_bank.push_back(info);
-	sorted_assets.reserve(asset_bank.size());
+	info->asset_class = asset_class;
+	AssetID ret = info->id;
+	asset_bank[id_count++] = info;
 	if (iface)
 	{
 		iface->set_id_bounds(id_count);
-		iface->set_image_class(info->id, image_class);
+		iface->set_asset_class(info->id, asset_class);
 	}
 	return ret;
 }
 
-void AssetInstantiatorInterface::set_image_class(ImageAssetID, ImageClass)
+void AssetInstantiatorInterface::set_asset_class(AssetID, AssetClass)
 {
 }
 
-ImageAssetID AssetManager::register_image_resource(FileHandle file, ImageClass image_class, int prio)
+AssetID AssetManager::register_asset(FileHandle file, AssetClass asset_class, int prio)
 {
 	std::lock_guard<std::mutex> holder{asset_bank_lock};
-	return register_image_resource_nolock(std::move(file), image_class, prio);
+	return register_asset_nolock(std::move(file), asset_class, prio);
 }
 
-ImageAssetID AssetManager::register_image_resource(Filesystem &fs, const std::string &path, ImageClass image_class, int prio)
+AssetID AssetManager::register_asset(Filesystem &fs, const std::string &path, AssetClass asset_class, int prio)
 {
 	std::lock_guard<std::mutex> holder{asset_bank_lock};
 
@@ -82,13 +84,13 @@ ImageAssetID AssetManager::register_image_resource(Filesystem &fs, const std::st
 	if (!file)
 		return {};
 
-	auto id = register_image_resource_nolock(std::move(file), image_class, prio);
+	auto id = register_asset_nolock(std::move(file), asset_class, prio);
 	asset_bank[id.id]->set_hash(h.get());
 	file_to_assets.insert_replace(asset_bank[id.id]);
 	return id;
 }
 
-void AssetManager::update_cost(ImageAssetID id, uint64_t cost)
+void AssetManager::update_cost(AssetID id, uint64_t cost)
 {
 	std::lock_guard<std::mutex> holder{cost_update_lock};
 	thread_cost_updates.push_back({ id, cost });
@@ -100,11 +102,12 @@ void AssetManager::set_asset_instantiator_interface(AssetInstantiatorInterface *
 	{
 		signal->wait_until_at_least(timestamp);
 		for (uint32_t id = 0; id < id_count; id++)
-			iface->release_image_resource({ id });
+			iface->release_asset(AssetID{id});
 	}
 
-	for (auto *a : asset_bank)
+	for (uint32_t i = 0; i < id_count; i++)
 	{
+		auto *a = asset_bank[i];
 		a->consumed = 0;
 		a->pending_consumed = 0;
 		a->last_used = 0;
@@ -116,29 +119,29 @@ void AssetManager::set_asset_instantiator_interface(AssetInstantiatorInterface *
 	{
 		iface->set_id_bounds(id_count);
 		for (uint32_t i = 0; i < id_count; i++)
-			iface->set_image_class({ i }, asset_bank[i]->image_class);
+			iface->set_asset_class(AssetID{i}, asset_bank[i]->asset_class);
 	}
 }
 
-void AssetManager::mark_used_resource(ImageAssetID id)
+void AssetManager::mark_used_asset(AssetID id)
 {
 	lru_append.push(id);
 }
 
-void AssetManager::set_image_budget(uint64_t cost)
+void AssetManager::set_asset_budget(uint64_t cost)
 {
-	image_budget = cost;
+	transfer_budget = cost;
 }
 
-void AssetManager::set_image_budget_per_iteration(uint64_t cost)
+void AssetManager::set_asset_budget_per_iteration(uint64_t cost)
 {
-	image_budget_per_iteration = cost;
+	transfer_budget_per_iteration = cost;
 }
 
-bool AssetManager::set_image_residency_priority(ImageAssetID id, int prio)
+bool AssetManager::set_asset_residency_priority(AssetID id, int prio)
 {
 	std::lock_guard<std::mutex> holder{asset_bank_lock};
-	if (id.id >= asset_bank.size())
+	if (id.id >= id_count)
 		return false;
 	asset_bank[id.id]->prio = prio;
 	return true;
@@ -146,7 +149,7 @@ bool AssetManager::set_image_residency_priority(ImageAssetID id, int prio)
 
 void AssetManager::adjust_update(const CostUpdate &update)
 {
-	if (update.id.id < asset_bank.size())
+	if (update.id.id < id_count)
 	{
 		auto *a = asset_bank[update.id.id];
 		total_consumed += update.cost - (a->consumed + a->pending_consumed);
@@ -178,15 +181,15 @@ void AssetManager::update_costs_locked_assets()
 
 void AssetManager::update_lru_locked_assets()
 {
-	lru_append.for_each_ranged([this](const ImageAssetID *id, size_t count) {
+	lru_append.for_each_ranged([this](const AssetID *id, size_t count) {
 		for (size_t i = 0; i < count; i++)
-			if (id[i].id < asset_bank.size())
+			if (id[i].id < id_count)
 				asset_bank[id[i].id]->last_used = timestamp;
 	});
 	lru_append.clear();
 }
 
-bool AssetManager::iterate_blocking(ThreadGroup &group, ImageAssetID id)
+bool AssetManager::iterate_blocking(ThreadGroup &group, AssetID id)
 {
 	if (!iface)
 		return false;
@@ -202,12 +205,12 @@ bool AssetManager::iterate_blocking(ThreadGroup &group, ImageAssetID id)
 	if (candidate->consumed != 0 || candidate->pending_consumed != 0)
 		return true;
 
-	uint64_t estimate = iface->estimate_cost_image_resource(candidate->id, *candidate->handle);
+	uint64_t estimate = iface->estimate_cost_asset(candidate->id, *candidate->handle);
 	auto task = group.create_task();
 	task->set_task_class(TaskClass::Background);
 	task->set_fence_counter_signal(signal.get());
 	task->set_desc("asset-manager-instantiate-single");
-	iface->instantiate_image_resource(*this, task.get(), candidate->id, *candidate->handle);
+	iface->instantiate_asset(*this, task.get(), candidate->id, *candidate->handle);
 	candidate->pending_consumed = estimate;
 	candidate->last_used = timestamp;
 	total_consumed += estimate;
@@ -251,8 +254,8 @@ void AssetManager::iterate(ThreadGroup *group)
 	update_costs_locked_assets();
 	update_lru_locked_assets();
 
-	sorted_assets = asset_bank;
-	std::sort(sorted_assets.begin(), sorted_assets.end(), [](const AssetInfo *a, const AssetInfo *b) -> bool {
+	memcpy(sorted_assets.data(), asset_bank.data(), id_count * sizeof(sorted_assets[0]));
+	std::sort(sorted_assets.data(), sorted_assets.data() + id_count, [](const AssetInfo *a, const AssetInfo *b) -> bool {
 		// High prios come first since they will be activated.
 		// Then we sort by LRU.
 		// High consumption should be moved last, so they are candidates to be paged out if we're over budget.
@@ -272,7 +275,7 @@ void AssetManager::iterate(ThreadGroup *group)
 			return a->id.id < b->id.id;
 	});
 
-	size_t release_index = sorted_assets.size();
+	size_t release_index = id_count;
 	uint64_t activated_cost_this_iteration = 0;
 	unsigned activation_count = 0;
 	size_t activate_index = 0;
@@ -281,8 +284,8 @@ void AssetManager::iterate(ThreadGroup *group)
 	// Activate in order from highest priority to lowest.
 	bool can_activate = true;
 	while (can_activate &&
-	       total_consumed < image_budget &&
-	       activated_cost_this_iteration < image_budget_per_iteration &&
+	       total_consumed < transfer_budget &&
+	       activated_cost_this_iteration < transfer_budget_per_iteration &&
 	       activate_index != release_index)
 	{
 		auto *candidate = sorted_assets[activate_index];
@@ -296,26 +299,26 @@ void AssetManager::iterate(ThreadGroup *group)
 			continue;
 		}
 
-		uint64_t estimate = iface->estimate_cost_image_resource(candidate->id, *candidate->handle);
+		uint64_t estimate = iface->estimate_cost_asset(candidate->id, *candidate->handle);
 
-		can_activate = (total_consumed + estimate <= image_budget) || (candidate->prio >= persistent_prio());
+		can_activate = (total_consumed + estimate <= transfer_budget) || (candidate->prio >= persistent_prio());
 		while (!can_activate && activate_index + 1 != release_index)
 		{
 			auto *release_candidate = sorted_assets[--release_index];
 			if (release_candidate->consumed)
 			{
 				LOGI("Releasing ID %u due to page-in pressure.\n", release_candidate->id.id);
-				iface->release_image_resource(release_candidate->id);
+				iface->release_asset(release_candidate->id);
 				total_consumed -= release_candidate->consumed;
 				release_candidate->consumed = 0;
 			}
-			can_activate = total_consumed + estimate <= image_budget;
+			can_activate = total_consumed + estimate <= transfer_budget;
 		}
 
 		if (can_activate)
 		{
 			// We're trivially in budget.
-			iface->instantiate_image_resource(*this, task.get(), candidate->id, *candidate->handle);
+			iface->instantiate_asset(*this, task.get(), candidate->id, *candidate->handle);
 			activation_count++;
 
 			candidate->pending_consumed = estimate;
@@ -328,7 +331,7 @@ void AssetManager::iterate(ThreadGroup *group)
 	}
 
 	// If we're 75% of budget, start garbage collecting non-resident resources ahead of time.
-	const uint64_t low_image_budget = (image_budget * 3) / 4;
+	const uint64_t low_image_budget = (transfer_budget * 3) / 4;
 
 	const auto should_release = [&]() -> bool {
 		if (release_index == activate_index)
@@ -336,7 +339,7 @@ void AssetManager::iterate(ThreadGroup *group)
 		if (sorted_assets[release_index - 1]->prio == persistent_prio())
 			return false;
 
-		if (total_consumed > image_budget)
+		if (total_consumed > transfer_budget)
 			return true;
 		else if (total_consumed > low_image_budget && sorted_assets[release_index - 1]->prio == 0)
 			return true;
@@ -351,7 +354,7 @@ void AssetManager::iterate(ThreadGroup *group)
 		if (candidate->consumed)
 		{
 			LOGI("Releasing 0-prio ID %u due to page-in pressure.\n", candidate->id.id);
-			iface->release_image_resource(candidate->id);
+			iface->release_asset(candidate->id);
 			total_consumed -= candidate->consumed;
 			candidate->consumed = 0;
 			candidate->last_used = 0;
diff --git a/filesystem/asset_manager.hpp b/filesystem/asset_manager.hpp
index 75b541072..6c613d67a 100644
--- a/filesystem/asset_manager.hpp
+++ b/filesystem/asset_manager.hpp
@@ -26,35 +26,42 @@
 #include "filesystem.hpp"
 #include "object_pool.hpp"
 #include "intrusive_hash_map.hpp"
+#include "dynamic_array.hpp"
 #include <vector>
 #include <mutex>
 #include <memory>
 
 namespace Granite
 {
-struct ImageAssetID
+struct AssetID
 {
 	uint32_t id = uint32_t(-1);
+	enum { MaxIDs = 1u << 18 };
+	AssetID() = default;
+	explicit AssetID(uint32_t id_) : id{id_} {}
 	explicit inline operator bool() const { return id != uint32_t(-1); }
+	inline bool operator==(const AssetID &other) const { return id == other.id; }
+	inline bool operator!=(const AssetID &other) const { return !(*this == other); }
 };
 
 class AssetManager;
 
 // If we have to fall back due to no image being present,
 // lets asset instantiator know what to substitute.
-enum class ImageClass
+enum class AssetClass
 {
 	// Substitute with 0.
-	Zeroable,
+	ImageZeroable,
 	// Substitute with missing color.
-	Color,
+	ImageColor,
 	// Substitute with RG8_UNORM 0.5
-	Normal,
+	ImageNormal,
 	// Substitute with M = 0, R = 1.
-	MetallicRoughness,
+	ImageMetallicRoughness,
 	// Substitute with mid-gray (0.5, 0.5, 0.5, 1.0) UNORM8.
 	// Somewhat compatible with everything.
-	Generic
+	ImageGeneric,
+	Mesh
 };
 
 class ThreadGroup;
@@ -67,16 +74,17 @@ class AssetInstantiatorInterface
 	virtual ~AssetInstantiatorInterface() = default;
 
 	// This estimate should be an upper bound.
-	virtual uint64_t estimate_cost_image_resource(ImageAssetID id, File &mapping) = 0;
+	virtual uint64_t estimate_cost_asset(AssetID id, File &mapping) = 0;
 
 	// When instantiation completes, manager.update_cost() must be called with the real cost.
 	// The real cost may only be known after async parsing of the file.
-	virtual void instantiate_image_resource(AssetManager &manager, TaskGroup *group, ImageAssetID id, File &mapping) = 0;
+	virtual void instantiate_asset(AssetManager &manager, TaskGroup *group, AssetID id, File &mapping) = 0;
 
 	// Will only be called after an upload completes through manager.update_cost().
-	virtual void release_image_resource(ImageAssetID id) = 0;
+	virtual void release_asset(AssetID id) = 0;
+
 	virtual void set_id_bounds(uint32_t bound) = 0;
-	virtual void set_image_class(ImageAssetID id, ImageClass image_class);
+	virtual void set_asset_class(AssetID id, AssetClass asset_class);
 
 	// Called in AssetManager::iterate().
 	virtual void latch_handles() = 0;
@@ -92,24 +100,26 @@ class AssetManager final : public AssetManagerInterface
 	~AssetManager() override;
 
 	void set_asset_instantiator_interface(AssetInstantiatorInterface *iface);
-	void set_image_budget(uint64_t cost);
-	void set_image_budget_per_iteration(uint64_t cost);
+
+	// We might want to consider different budgets per asset class.
+	void set_asset_budget(uint64_t cost);
+	void set_asset_budget_per_iteration(uint64_t cost);
 
 	// FileHandle is intended to be used with FileSlice or similar here so that we don't need
 	// a ton of open files at once.
-	ImageAssetID register_image_resource(FileHandle file, ImageClass image_class, int prio = 1);
-	ImageAssetID register_image_resource(Filesystem &fs, const std::string &path, ImageClass image_class, int prio = 1);
+	AssetID register_asset(FileHandle file, AssetClass asset_class, int prio = 1);
+	AssetID register_asset(Filesystem &fs, const std::string &path, AssetClass asset_class, int prio = 1);
 
 	// Prio 0: Not resident, resource may not exist.
-	bool set_image_residency_priority(ImageAssetID id, int prio);
+	bool set_asset_residency_priority(AssetID id, int prio);
 
 	// Intended to be called in Application::post_frame(). Not thread safe.
 	// This function updates internal state.
 	void iterate(ThreadGroup *group);
-	bool iterate_blocking(ThreadGroup &group, ImageAssetID id);
+	bool iterate_blocking(ThreadGroup &group, AssetID id);
 
 	// Always thread safe, used by AssetInstantiatorInterfaces to update cost estimates.
-	void update_cost(ImageAssetID id, uint64_t cost);
+	void update_cost(AssetID id, uint64_t cost);
 
 	// May be called concurrently, except when calling iterate().
 	uint64_t get_current_total_consumed() const;
@@ -117,7 +127,7 @@ class AssetManager final : public AssetManagerInterface
 	// May be called concurrently, except when calling iterate().
 	// Intended to be called by asset instantiator interface or similar.
 	// When a resource is actually accessed, this is called.
-	void mark_used_resource(ImageAssetID id);
+	void mark_used_asset(AssetID id);
 
 private:
 	struct AssetInfo : Util::IntrusiveHashMapEnabled<AssetInfo>
@@ -126,29 +136,29 @@ class AssetManager final : public AssetManagerInterface
 		uint64_t consumed = 0;
 		uint64_t last_used = 0;
 		FileHandle handle;
-		ImageAssetID id = {};
-		ImageClass image_class = ImageClass::Zeroable;
+		AssetID id = {};
+		AssetClass asset_class = AssetClass::ImageZeroable;
 		int prio = 0;
 	};
 
-	std::vector<AssetInfo *> sorted_assets;
+	Util::DynamicArray<AssetInfo *> sorted_assets;
+	Util::DynamicArray<AssetInfo *> asset_bank;
 	std::mutex asset_bank_lock;
-	std::vector<AssetInfo *> asset_bank;
 	Util::ObjectPool<AssetInfo> pool;
-	Util::AtomicAppendBuffer<ImageAssetID> lru_append;
+	Util::AtomicAppendBuffer<AssetID> lru_append;
 	Util::IntrusiveHashMapHolder<AssetInfo> file_to_assets;
 
 	AssetInstantiatorInterface *iface = nullptr;
 	uint32_t id_count = 0;
 	uint64_t total_consumed = 0;
-	uint64_t image_budget = 0;
-	uint64_t image_budget_per_iteration = 0;
+	uint64_t transfer_budget = 0;
+	uint64_t transfer_budget_per_iteration = 0;
 	uint64_t timestamp = 1;
 	uint32_t blocking_signals = 0;
 
 	struct CostUpdate
 	{
-		ImageAssetID id;
+		AssetID id;
 		uint64_t cost = 0;
 	};
 	std::mutex cost_update_lock;
@@ -157,7 +167,7 @@ class AssetManager final : public AssetManagerInterface
 
 	void adjust_update(const CostUpdate &update);
 	std::unique_ptr<TaskSignal> signal;
-	ImageAssetID register_image_resource_nolock(FileHandle file, ImageClass image_class, int prio);
+	AssetID register_asset_nolock(FileHandle file, AssetClass asset_class, int prio);
 
 	void update_costs_locked_assets();
 	void update_lru_locked_assets();
diff --git a/renderer/common_renderer_data.cpp b/renderer/common_renderer_data.cpp
index dc9fda342..b1a178d1a 100644
--- a/renderer/common_renderer_data.cpp
+++ b/renderer/common_renderer_data.cpp
@@ -112,7 +112,7 @@ void LightMesh::on_device_destroyed(const Vulkan::DeviceCreatedEvent &)
 void CommonRendererData::initialize_static_assets(AssetManager *iface, Filesystem *fs)
 {
 	LOGI("Initializing static assets.\n");
-	brdf_tables = iface->register_image_resource(*fs, "builtin://textures/ibl_brdf_lut.gtx", ImageClass::Zeroable,
-	                                             AssetManager::persistent_prio());
+	brdf_tables = iface->register_asset(*fs, "builtin://textures/ibl_brdf_lut.gtx", AssetClass::ImageZeroable,
+	                                    AssetManager::persistent_prio());
 }
 }
diff --git a/renderer/common_renderer_data.hpp b/renderer/common_renderer_data.hpp
index 4e147860c..0465a926c 100644
--- a/renderer/common_renderer_data.hpp
+++ b/renderer/common_renderer_data.hpp
@@ -57,7 +57,7 @@ class CommonRendererData final : public CommonRendererDataInterface
 {
 public:
 	LightMesh light_mesh;
-	ImageAssetID brdf_tables;
+	AssetID brdf_tables;
 	void initialize_static_assets(AssetManager *iface, Filesystem *file_iface);
 };
 }
\ No newline at end of file
diff --git a/renderer/formats/scene_formats.cpp b/renderer/formats/scene_formats.cpp
index 7dbda1a8b..6802e5c69 100644
--- a/renderer/formats/scene_formats.cpp
+++ b/renderer/formats/scene_formats.cpp
@@ -44,15 +44,20 @@ static vec3 compute_normal(const vec3 &a, const vec3 &b, const vec3 &c)
 
 struct IndexRemapping
 {
-	std::vector<unsigned> index_remap;
-	std::vector<unsigned> unique_attrib_to_source_index;
+	std::vector<uint32_t> index_remap;
+	std::vector<uint32_t> unique_attrib_to_source_index;
 };
 
 // Find duplicate indices.
-static IndexRemapping build_index_remap_list(const Mesh &mesh)
+static IndexRemapping build_attribute_remap_indices(const Mesh &mesh)
 {
-	unsigned attribute_count = unsigned(mesh.positions.size() / mesh.position_stride);
-	std::unordered_map<Hash, unsigned> attribute_remapper;
+	auto attribute_count = unsigned(mesh.positions.size() / mesh.position_stride);
+	struct RemappedAttribute
+	{
+		unsigned unique_index;
+		unsigned source_index;
+	};
+	std::unordered_map<Hash, RemappedAttribute> attribute_remapper;
 	IndexRemapping remapped;
 	remapped.index_remap.reserve(attribute_count);
 
@@ -66,13 +71,41 @@ static IndexRemapping build_index_remap_list(const Mesh &mesh)
 
 		auto hash = h.get();
 		auto itr = attribute_remapper.find(hash);
+		bool is_unique;
+
 		if (itr != end(attribute_remapper))
 		{
-			remapped.index_remap.push_back(itr->second);
+			bool match = true;
+			if (memcmp(mesh.positions.data() + i * mesh.position_stride,
+			           mesh.positions.data() + itr->second.source_index * mesh.position_stride,
+			           mesh.position_stride) != 0)
+			{
+				match = false;
+			}
+
+			if (match && !mesh.attributes.empty() &&
+			    memcmp(mesh.attributes.data() + i * mesh.attribute_stride,
+			           mesh.attributes.data() + itr->second.source_index * mesh.attribute_stride,
+			           mesh.attribute_stride) != 0)
+			{
+				match = false;
+			}
+
+			if (match)
+				remapped.index_remap.push_back(itr->second.unique_index);
+			else
+				LOGW("Hash collision in vertex dedup.\n");
+
+			is_unique = !match;
 		}
 		else
 		{
-			attribute_remapper[hash] = unique_count;
+			attribute_remapper[hash] = { unique_count, i };
+			is_unique = true;
+		}
+
+		if (is_unique)
+		{
 			remapped.index_remap.push_back(unique_count);
 			remapped.unique_attrib_to_source_index.push_back(i);
 			unique_count++;
@@ -82,28 +115,15 @@ static IndexRemapping build_index_remap_list(const Mesh &mesh)
 	return remapped;
 }
 
-static std::vector<uint32_t> build_canonical_index_buffer(const Mesh &mesh, const std::vector<unsigned> &index_remap)
+static std::vector<uint32_t> build_remapped_index_buffer(const Mesh &mesh, const std::vector<uint32_t> &index_remap)
 {
-	std::vector<uint32_t> index_buffer;
-	if (mesh.indices.empty())
-	{
-		index_buffer.reserve(mesh.count);
-		for (unsigned i = 0; i < mesh.count; i++)
-			index_buffer.push_back(index_remap[i]);
-	}
-	else if (mesh.index_type == VK_INDEX_TYPE_UINT32)
-	{
-		index_buffer.reserve(mesh.count);
-		for (unsigned i = 0; i < mesh.count; i++)
-			index_buffer.push_back(index_remap[reinterpret_cast<const uint32_t *>(mesh.indices.data())[i]]);
-	}
-	else if (mesh.index_type == VK_INDEX_TYPE_UINT16)
-	{
-		index_buffer.reserve(mesh.count);
-		for (unsigned i = 0; i < mesh.count; i++)
-			index_buffer.push_back(index_remap[reinterpret_cast<const uint16_t *>(mesh.indices.data())[i]]);
-	}
+	assert(mesh.topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST && mesh.index_type == VK_INDEX_TYPE_UINT32);
 
+	std::vector<uint32_t> index_buffer;
+	index_buffer.reserve(mesh.count);
+	const auto *indices = reinterpret_cast<const uint32_t *>(mesh.indices.data());
+	for (unsigned i = 0; i < mesh.count; i++)
+		index_buffer.push_back(index_remap[indices[i]]);
 	return index_buffer;
 }
 
@@ -190,7 +210,7 @@ static bool mesh_unroll_vertices(Mesh &mesh)
 
 	if (mesh.index_type == VK_INDEX_TYPE_UINT32)
 	{
-		const uint32_t *ibo = reinterpret_cast<const uint32_t *>(mesh.indices.data());
+		const auto *ibo = reinterpret_cast<const uint32_t *>(mesh.indices.data());
 		for (unsigned i = 0; i < mesh.count; i++)
 		{
 			uint32_t index = ibo[i];
@@ -204,7 +224,21 @@ static bool mesh_unroll_vertices(Mesh &mesh)
 	}
 	else if (mesh.index_type == VK_INDEX_TYPE_UINT16)
 	{
-		const uint16_t *ibo = reinterpret_cast<const uint16_t *>(mesh.indices.data());
+		const auto *ibo = reinterpret_cast<const uint16_t *>(mesh.indices.data());
+		for (unsigned i = 0; i < mesh.count; i++)
+		{
+			uint16_t index = ibo[i];
+			memcpy(positions.data() + i * mesh.position_stride,
+			       mesh.positions.data() + index * mesh.position_stride,
+			       mesh.position_stride);
+			memcpy(attributes.data() + i * mesh.attribute_stride,
+			       mesh.attributes.data() + index * mesh.attribute_stride,
+			       mesh.attribute_stride);
+		}
+	}
+	else if (mesh.index_type == VK_INDEX_TYPE_UINT8_EXT)
+	{
+		const auto *ibo = mesh.indices.data();
 		for (unsigned i = 0; i < mesh.count; i++)
 		{
 			uint16_t index = ibo[i];
@@ -223,56 +257,122 @@ static bool mesh_unroll_vertices(Mesh &mesh)
 	return true;
 }
 
+bool mesh_canonicalize_indices(SceneFormats::Mesh &mesh)
+{
+	if (mesh.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST &&
+	    mesh.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP)
+	{
+		LOGE("Topology must be trilist or tristrip.\n");
+		return false;
+	}
+
+	std::vector<uint32_t> unrolled_indices;
+	unrolled_indices.reserve(mesh.count);
+
+	if (mesh.indices.empty())
+	{
+		for (unsigned i = 0; i < mesh.count; i++)
+			unrolled_indices.push_back(i);
+		mesh.index_type = VK_INDEX_TYPE_UINT32;
+	}
+	else if (mesh.index_type == VK_INDEX_TYPE_UINT32)
+	{
+		auto *indices = reinterpret_cast<const uint32_t *>(mesh.indices.data());
+		for (unsigned i = 0; i < mesh.count; i++)
+			unrolled_indices.push_back(indices[i]);
+	}
+	else if (mesh.index_type == VK_INDEX_TYPE_UINT16)
+	{
+		auto *indices = reinterpret_cast<const uint16_t *>(mesh.indices.data());
+		for (unsigned i = 0; i < mesh.count; i++)
+			unrolled_indices.push_back(mesh.primitive_restart && indices[i] == UINT16_MAX ? UINT32_MAX : indices[i]);
+	}
+	else if (mesh.index_type == VK_INDEX_TYPE_UINT8_EXT)
+	{
+		auto *indices = reinterpret_cast<const uint8_t *>(mesh.indices.data());
+		for (unsigned i = 0; i < mesh.count; i++)
+			unrolled_indices.push_back(mesh.primitive_restart && indices[i] == UINT8_MAX ? UINT32_MAX : indices[i]);
+	}
+
+	if (mesh.topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP)
+	{
+		std::vector<uint32_t> unstripped_indices;
+		unstripped_indices.reserve(mesh.count * 3);
+		unsigned primitive_count_since_restart = 0;
+
+		for (unsigned i = 2; i < mesh.count; i++)
+		{
+			bool emit_primitive = true;
+			if (mesh.primitive_restart &&
+			    unrolled_indices[i - 2] == UINT32_MAX &&
+			    unrolled_indices[i - 1] == UINT32_MAX &&
+			    unrolled_indices[i - 0] == UINT32_MAX)
+			{
+				emit_primitive = false;
+				primitive_count_since_restart = 0;
+			}
+
+			if (emit_primitive)
+			{
+				unstripped_indices.push_back(unrolled_indices[i - 2]);
+				unstripped_indices.push_back(unrolled_indices[i - (1 ^ (primitive_count_since_restart & 1))]);
+				unstripped_indices.push_back(unrolled_indices[i - (primitive_count_since_restart & 1)]);
+				primitive_count_since_restart++;
+			}
+		}
+
+		unrolled_indices = std::move(unstripped_indices);
+		mesh.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
+	}
+
+	mesh.index_type = VK_INDEX_TYPE_UINT32;
+	mesh.count = uint32_t(unrolled_indices.size());
+	mesh.indices.resize(unrolled_indices.size() * sizeof(uint32_t));
+	memcpy(mesh.indices.data(), unrolled_indices.data(), mesh.indices.size());
+	return true;
+}
+
 void mesh_deduplicate_vertices(Mesh &mesh)
 {
-	auto index_remap = build_index_remap_list(mesh);
-	auto index_buffer = build_canonical_index_buffer(mesh, index_remap.index_remap);
+	mesh_canonicalize_indices(mesh);
+	auto index_remap = build_attribute_remap_indices(mesh);
+	auto index_buffer = build_remapped_index_buffer(mesh, index_remap.index_remap);
 	rebuild_new_attributes_remap_src(mesh.positions, mesh.position_stride,
 	                                 mesh.attributes, mesh.attribute_stride,
 	                                 mesh.positions, mesh.attributes, index_remap.unique_attrib_to_source_index);
 
-	mesh.index_type = VK_INDEX_TYPE_UINT32;
 	mesh.indices.resize(index_buffer.size() * sizeof(uint32_t));
-	size_t count = index_buffer.size();
-	for (size_t i = 0; i < count; i++)
-		reinterpret_cast<uint32_t *>(mesh.indices.data())[i] = index_buffer[i];
+	memcpy(mesh.indices.data(), index_buffer.data(), index_buffer.size() * sizeof(uint32_t));
 	mesh.count = unsigned(index_buffer.size());
 }
 
-Mesh mesh_optimize_index_buffer(const Mesh &mesh, bool stripify)
+bool mesh_optimize_index_buffer(Mesh &mesh, const IndexBufferOptimizeOptions &options)
 {
-	if (mesh.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST)
-		return mesh;
-
-	Mesh optimized;
-	optimized.position_stride = mesh.position_stride;
-	optimized.attribute_stride = mesh.attribute_stride;
+	if (!mesh_canonicalize_indices(mesh) || mesh.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST)
+		return false;
 
 	// Remove redundant indices and rewrite index and attribute buffers.
-	auto index_remap = build_index_remap_list(mesh);
-	auto index_buffer = build_canonical_index_buffer(mesh, index_remap.index_remap);
-	rebuild_new_attributes_remap_src(optimized.positions, optimized.position_stride,
-	                                 optimized.attributes, optimized.attribute_stride,
+	auto index_remap = build_attribute_remap_indices(mesh);
+	auto index_buffer = build_remapped_index_buffer(mesh, index_remap.index_remap);
+	rebuild_new_attributes_remap_src(mesh.positions, mesh.position_stride,
+	                                 mesh.attributes, mesh.attribute_stride,
 	                                 mesh.positions, mesh.attributes, index_remap.unique_attrib_to_source_index);
 
-	size_t vertex_count = optimized.positions.size() / optimized.position_stride;
+	size_t vertex_count = mesh.positions.size() / mesh.position_stride;
 
 	// Optimize for vertex cache.
 	meshopt_optimizeVertexCache(index_buffer.data(), index_buffer.data(), index_buffer.size(),
 	                            vertex_count);
 
 	// Remap vertex fetch to get contiguous indices as much as possible.
-	std::vector<uint32_t> remap_table(optimized.positions.size() / optimized.position_stride);
+	std::vector<uint32_t> remap_table(mesh.positions.size() / mesh.position_stride);
 	meshopt_optimizeVertexFetchRemap(remap_table.data(), index_buffer.data(), index_buffer.size(), vertex_count);
 	index_buffer = remap_indices(index_buffer, remap_table);
-	rebuild_new_attributes_remap_dst(optimized.positions, optimized.position_stride,
-	                                 optimized.attributes, optimized.attribute_stride,
-	                                 optimized.positions, optimized.attributes, remap_table);
-
-	optimized.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
-	optimized.primitive_restart = false;
+	rebuild_new_attributes_remap_dst(mesh.positions, mesh.position_stride,
+	                                 mesh.attributes, mesh.attribute_stride,
+	                                 mesh.positions, mesh.attributes, remap_table);
 
-	if (stripify)
+	if (options.stripify)
 	{
 		// Try to stripify the mesh. If we end up with fewer indices, use that.
 		std::vector<uint32_t> stripped_index_buffer((index_buffer.size() / 3) * 4);
@@ -283,45 +383,41 @@ Mesh mesh_optimize_index_buffer(const Mesh &mesh, bool stripify)
 		stripped_index_buffer.resize(stripped_index_count);
 		if (stripped_index_count < index_buffer.size())
 		{
-			optimized.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
+			mesh.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
 			index_buffer = std::move(stripped_index_buffer);
-			optimized.primitive_restart = true;
+			mesh.primitive_restart = true;
 		}
 	}
 
-	uint32_t max_index = 0;
-	for (auto &i : index_buffer)
-		if (i != ~0u)
-			max_index = muglm::max(max_index, i);
-
-	if (max_index <= 0xffff) // 16-bit indices are enough.
+	bool emit_u32 = true;
+	if (options.narrow_index_buffer)
 	{
-		optimized.index_type = VK_INDEX_TYPE_UINT16;
-		optimized.indices.resize(index_buffer.size() * sizeof(uint16_t));
-		size_t count = index_buffer.size();
-		for (size_t i = 0; i < count; i++)
+		uint32_t max_index = 0;
+		for (auto &i: index_buffer)
+			if (i != ~0u)
+				max_index = muglm::max(max_index, i);
+
+		if (max_index <= 0xffff) // 16-bit indices are enough.
 		{
-			reinterpret_cast<uint16_t *>(optimized.indices.data())[i] =
-					index_buffer[i] == ~0u ? uint16_t(0xffffu) : uint16_t(index_buffer[i]);
+			mesh.index_type = VK_INDEX_TYPE_UINT16;
+			mesh.indices.resize(index_buffer.size() * sizeof(uint16_t));
+			size_t count = index_buffer.size();
+			emit_u32 = false;
+
+			auto *out_indices = reinterpret_cast<uint16_t *>(mesh.indices.data());
+			for (size_t i = 0; i < count; i++)
+				out_indices[i] = index_buffer[i] == ~0u ? uint16_t(0xffffu) : uint16_t(index_buffer[i]);
 		}
 	}
-	else
+
+	if (emit_u32)
 	{
-		optimized.index_type = VK_INDEX_TYPE_UINT32;
-		optimized.indices.resize(index_buffer.size() * sizeof(uint32_t));
-		size_t count = index_buffer.size();
-		for (size_t i = 0; i < count; i++)
-			reinterpret_cast<uint32_t *>(optimized.indices.data())[i] = index_buffer[i];
+		mesh.indices.resize(index_buffer.size() * sizeof(uint32_t));
+		memcpy(mesh.indices.data(), index_buffer.data(), index_buffer.size() * sizeof(uint32_t));
 	}
 
-	optimized.count = unsigned(index_buffer.size());
-
-	memcpy(optimized.attribute_layout, mesh.attribute_layout, sizeof(mesh.attribute_layout));
-	optimized.material_index = mesh.material_index;
-	optimized.has_material = mesh.has_material;
-	optimized.static_aabb = mesh.static_aabb;
-
-	return optimized;
+	mesh.count = unsigned(index_buffer.size());
+	return true;
 }
 
 bool mesh_recompute_tangents(Mesh &mesh)
diff --git a/renderer/formats/scene_formats.hpp b/renderer/formats/scene_formats.hpp
index 18661805e..ab2735803 100644
--- a/renderer/formats/scene_formats.hpp
+++ b/renderer/formats/scene_formats.hpp
@@ -256,7 +256,14 @@ bool mesh_flip_tangents_w(Mesh &mesh);
 bool extract_collision_mesh(CollisionMesh &collision_mesh, const Mesh &mesh);
 
 void mesh_deduplicate_vertices(Mesh &mesh);
-Mesh mesh_optimize_index_buffer(const Mesh &mesh, bool stripify);
+bool mesh_canonicalize_indices(Mesh &mesh);
+
+struct IndexBufferOptimizeOptions
+{
+	bool narrow_index_buffer;
+	bool stripify;
+};
+bool mesh_optimize_index_buffer(Mesh &mesh, const IndexBufferOptimizeOptions &options);
 std::unordered_set<uint32_t> build_used_nodes_in_scene(const SceneNodes &scene, const std::vector<Node> &nodes);
 }
 }
diff --git a/renderer/ground.cpp b/renderer/ground.cpp
index c25def4af..57072e87a 100644
--- a/renderer/ground.cpp
+++ b/renderer/ground.cpp
@@ -182,12 +182,14 @@ Ground::Ground(unsigned size_, const TerrainInfo &info_)
 	num_patches_z = size / info.base_patch_size;
 	patch_lods.resize(num_patches_x * num_patches_z);
 
-	heights = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), info.heightmap, ImageClass::Zeroable);
-	normals = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), info.normalmap, ImageClass::Normal);
-	occlusion = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), info.occlusionmap, ImageClass::Zeroable);
-	normals_fine = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), info.normalmap_fine, ImageClass::Normal);
-	base_color = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), info.base_color, ImageClass::Color);
-	type_map = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), info.splatmap, ImageClass::Zeroable);
+	heights = GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(), info.heightmap, AssetClass::ImageZeroable);
+	normals = GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(), info.normalmap, AssetClass::ImageNormal);
+	occlusion = GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(), info.occlusionmap,
+	                                                    AssetClass::ImageZeroable);
+	normals_fine = GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(), info.normalmap_fine,
+	                                                       AssetClass::ImageNormal);
+	base_color = GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(), info.base_color, AssetClass::ImageColor);
+	type_map = GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(), info.splatmap, AssetClass::ImageZeroable);
 
 	EVENT_MANAGER_REGISTER_LATCH(Ground, on_device_created, on_device_destroyed, DeviceCreatedEvent);
 }
diff --git a/renderer/ground.hpp b/renderer/ground.hpp
index 273338708..3377f711d 100644
--- a/renderer/ground.hpp
+++ b/renderer/ground.hpp
@@ -149,7 +149,7 @@ class Ground : public Util::IntrusivePtrEnabled<Ground>, public PerFrameRefresha
 
 	void refresh(const RenderContext &context, TaskComposer &composer) override;
 
-	ImageAssetID heights, normals, occlusion, normals_fine, base_color, type_map;
+	AssetID heights, normals, occlusion, normals_fine, base_color, type_map;
 	Vulkan::ImageHandle lod_map;
 	void on_device_created(const Vulkan::DeviceCreatedEvent &e);
 	void on_device_destroyed(const Vulkan::DeviceCreatedEvent &e);
diff --git a/renderer/lights/decal_volume.cpp b/renderer/lights/decal_volume.cpp
index 3e5e9cb66..30095675b 100644
--- a/renderer/lights/decal_volume.cpp
+++ b/renderer/lights/decal_volume.cpp
@@ -29,9 +29,9 @@ namespace Granite
 {
 VolumetricDecal::VolumetricDecal()
 {
-	tex = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(),
-	                                                       "builtin://textures/decal.png",
-	                                                       ImageClass::Color);
+	tex = GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(),
+	                                              "builtin://textures/decal.png",
+	                                              AssetClass::ImageColor);
 }
 
 const Vulkan::ImageView *VolumetricDecal::get_decal_view(Vulkan::Device &device) const
diff --git a/renderer/lights/decal_volume.hpp b/renderer/lights/decal_volume.hpp
index 62f86d556..062370cf9 100644
--- a/renderer/lights/decal_volume.hpp
+++ b/renderer/lights/decal_volume.hpp
@@ -38,6 +38,6 @@ class VolumetricDecal
 	static const AABB &get_static_aabb();
 
 private:
-	ImageAssetID tex;
+	AssetID tex;
 };
 }
diff --git a/renderer/material.hpp b/renderer/material.hpp
index b7c4ce1ed..c051a83d8 100644
--- a/renderer/material.hpp
+++ b/renderer/material.hpp
@@ -85,19 +85,19 @@ struct Material
 	{
 		info = std::move(info_);
 
-		static const ImageClass image_classes[] = {
-			ImageClass::Color,
-			ImageClass::Normal,
-			ImageClass::MetallicRoughness,
-			ImageClass::Color,
-			ImageClass::Color,
+		static const AssetClass image_classes[] = {
+			AssetClass::ImageColor,
+			AssetClass::ImageNormal,
+			AssetClass::ImageMetallicRoughness,
+			AssetClass::ImageColor,
+			AssetClass::ImageColor,
 		};
 
 		for (unsigned i = 0; i < Util::ecast(TextureKind::Count); i++)
 		{
 			if (!info.paths[i].empty())
 			{
-				textures[i] = GRANITE_ASSET_MANAGER()->register_image_resource(
+				textures[i] = GRANITE_ASSET_MANAGER()->register_asset(
 						*GRANITE_FILESYSTEM(), info.paths[i], image_classes[i]);
 			}
 		}
@@ -116,7 +116,7 @@ struct Material
 		return info;
 	}
 
-	ImageAssetID textures[Util::ecast(TextureKind::Count)];
+	AssetID textures[Util::ecast(TextureKind::Count)];
 	bool needs_emissive = false;
 	uint32_t shader_variant = 0;
 
diff --git a/renderer/mesh_util.cpp b/renderer/mesh_util.cpp
index 69e0b6829..f89f29816 100644
--- a/renderer/mesh_util.cpp
+++ b/renderer/mesh_util.cpp
@@ -890,8 +890,8 @@ SkyCylinder::SkyCylinder(const std::string &bg_path)
 {
 	if (!bg_path.empty())
 	{
-		texture = GRANITE_ASSET_MANAGER()->register_image_resource(
-				*GRANITE_FILESYSTEM(), bg_path, ImageClass::Color);
+		texture = GRANITE_ASSET_MANAGER()->register_asset(
+				*GRANITE_FILESYSTEM(), bg_path, AssetClass::ImageColor);
 	}
 
 	EVENT_MANAGER_REGISTER_LATCH(SkyCylinder, on_device_created, on_device_destroyed, DeviceCreatedEvent);
@@ -1056,12 +1056,12 @@ Skybox::Skybox(const std::string &bg_path)
 {
 	if (!bg_path.empty())
 	{
-		texture = GRANITE_ASSET_MANAGER()->register_image_resource(
-				*GRANITE_FILESYSTEM(), bg_path, ImageClass::Color);
+		texture = GRANITE_ASSET_MANAGER()->register_asset(
+				*GRANITE_FILESYSTEM(), bg_path, AssetClass::ImageColor);
 	}
 }
 
-void Skybox::set_image(ImageAssetID skybox)
+void Skybox::set_image(AssetID skybox)
 {
 	texture = skybox;
 }
@@ -1195,8 +1195,8 @@ static void texture_plane_render(CommandBuffer &cmd, const RenderQueueData *info
 
 TexturePlane::TexturePlane(const std::string &normal_path)
 {
-	normalmap = GRANITE_ASSET_MANAGER()->register_image_resource(
-			*GRANITE_FILESYSTEM(), normal_path, ImageClass::Normal);
+	normalmap = GRANITE_ASSET_MANAGER()->register_asset(
+			*GRANITE_FILESYSTEM(), normal_path, AssetClass::ImageNormal);
 }
 
 void TexturePlane::setup_render_pass_resources(RenderGraph &graph)
diff --git a/renderer/mesh_util.hpp b/renderer/mesh_util.hpp
index 3394f7884..1b1b4fc13 100644
--- a/renderer/mesh_util.hpp
+++ b/renderer/mesh_util.hpp
@@ -195,7 +195,7 @@ class Skybox : public AbstractRenderable, public EventHandler
 {
 public:
 	Skybox(const std::string &bg_path = "");
-	void set_image(ImageAssetID skybox);
+	void set_image(AssetID skybox);
 
 	void get_render_info(const RenderContext &context, const RenderInfoComponent *transform,
 	                     RenderQueue &queue) const override;
@@ -207,7 +207,7 @@ class Skybox : public AbstractRenderable, public EventHandler
 
 private:
 	vec3 color = vec3(1.0f);
-	ImageAssetID texture;
+	AssetID texture;
 };
 
 class SkyCylinder : public AbstractRenderable, public EventHandler
@@ -231,7 +231,7 @@ class SkyCylinder : public AbstractRenderable, public EventHandler
 private:
 	vec3 color = vec3(1.0f);
 	float scale = 1.0f;
-	ImageAssetID texture;
+	AssetID texture;
 
 	void on_device_created(const Vulkan::DeviceCreatedEvent &event);
 	void on_device_destroyed(const Vulkan::DeviceCreatedEvent &event);
@@ -284,7 +284,7 @@ class TexturePlane : public AbstractRenderable, public RenderPassCreator
 private:
 	const Vulkan::ImageView *reflection = nullptr;
 	const Vulkan::ImageView *refraction = nullptr;
-	ImageAssetID normalmap;
+	AssetID normalmap;
 	RenderQueue internal_queue;
 
 	vec3 position;
diff --git a/renderer/post/smaa.cpp b/renderer/post/smaa.cpp
index 8952fcb7e..1912bc0fc 100644
--- a/renderer/post/smaa.cpp
+++ b/renderer/post/smaa.cpp
@@ -145,10 +145,12 @@ void setup_smaa_postprocess(RenderGraph &graph, TemporalJitter &jitter,
 		return true;
 	});
 
-	auto area = GRANITE_ASSET_MANAGER()->register_image_resource(
-			*GRANITE_FILESYSTEM(), "builtin://textures/smaa/area.gtx", ImageClass::Zeroable, AssetManager::persistent_prio());
-	auto search = GRANITE_ASSET_MANAGER()->register_image_resource(
-			*GRANITE_FILESYSTEM(), "builtin://textures/smaa/search.gtx", ImageClass::Zeroable, AssetManager::persistent_prio());
+	auto area = GRANITE_ASSET_MANAGER()->register_asset(
+			*GRANITE_FILESYSTEM(), "builtin://textures/smaa/area.gtx", AssetClass::ImageZeroable,
+			AssetManager::persistent_prio());
+	auto search = GRANITE_ASSET_MANAGER()->register_asset(
+			*GRANITE_FILESYSTEM(), "builtin://textures/smaa/search.gtx", AssetClass::ImageZeroable,
+			AssetManager::persistent_prio());
 
 	smaa_weight.set_build_render_pass([&, area, search, edge = masked_edge, q = smaa_quality](Vulkan::CommandBuffer &cmd) {
 		auto &input_image = graph.get_physical_texture_resource(weight_input_res);
diff --git a/renderer/sprite.hpp b/renderer/sprite.hpp
index bb2446ffa..7b078e5b4 100644
--- a/renderer/sprite.hpp
+++ b/renderer/sprite.hpp
@@ -61,8 +61,8 @@ struct SpriteRenderInfo
 struct Sprite : AbstractRenderable
 {
 	DrawPipeline pipeline = DrawPipeline::Opaque;
-	ImageAssetID texture;
-	ImageAssetID texture_alt;
+	AssetID texture;
+	AssetID texture_alt;
 	Vulkan::StockSampler sampler = Vulkan::StockSampler::LinearWrap;
 
 	enum ShaderVariantFlagBits
diff --git a/scene-export/CMakeLists.txt b/scene-export/CMakeLists.txt
index b217e6f3d..fa741831a 100644
--- a/scene-export/CMakeLists.txt
+++ b/scene-export/CMakeLists.txt
@@ -6,6 +6,7 @@ add_granite_internal_lib(granite-scene-export
         gltf_export.cpp gltf_export.hpp
         rgtc_compressor.cpp rgtc_compressor.hpp
         tmx_parser.cpp tmx_parser.hpp
+        meshlet_export.cpp meshlet_export.hpp
         texture_utils.cpp texture_utils.hpp)
 
 target_include_directories(granite-scene-export PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
diff --git a/scene-export/gltf_export.cpp b/scene-export/gltf_export.cpp
index a2c0084a3..9aa9e1cbb 100644
--- a/scene-export/gltf_export.cpp
+++ b/scene-export/gltf_export.cpp
@@ -892,7 +892,17 @@ void RemapState::emit_mesh(unsigned remapped_index)
 {
 	Mesh new_mesh;
 	if (options->optimize_meshes)
-		new_mesh = mesh_optimize_index_buffer(*mesh.info[remapped_index], options->stripify_meshes);
+	{
+		new_mesh = *mesh.info[remapped_index];
+		IndexBufferOptimizeOptions opts = {};
+		opts.narrow_index_buffer = true;
+		opts.stripify = options->stripify_meshes;
+		if (!mesh_optimize_index_buffer(new_mesh, opts))
+		{
+			LOGE("Failed to optimize index buffer.\n");
+			return;
+		}
+	}
 	auto &output_mesh = options->optimize_meshes ? new_mesh : *mesh.info[remapped_index];
 
 	mesh_cache.resize(std::max<size_t>(mesh_cache.size(), remapped_index + 1));
diff --git a/scene-export/meshlet_export.cpp b/scene-export/meshlet_export.cpp
new file mode 100644
index 000000000..4f39eacc2
--- /dev/null
+++ b/scene-export/meshlet_export.cpp
@@ -0,0 +1,735 @@
+/* Copyright (c) 2017-2023 Hans-Kristian Arntzen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "meshlet_export.hpp"
+#include "meshoptimizer.h"
+#include "enum_cast.hpp"
+#include "math.hpp"
+#include "filesystem.hpp"
+#include "meshlet.hpp"
+
+namespace Granite
+{
+namespace Meshlet
+{
+using namespace Vulkan::Meshlet;
+
+struct Metadata : Header
+{
+	Bound bound;
+	Stream u32_streams[MaxU32Streams];
+};
+
+struct CombinedMesh
+{
+	uint32_t stream_count;
+	MeshStyle mesh_style;
+
+	std::vector<Metadata> meshlets;
+};
+
+struct Encoded
+{
+	std::vector<uint32_t> payload;
+	CombinedMesh mesh;
+};
+
+struct Meshlet
+{
+	uint32_t offset;
+	uint32_t count;
+};
+
+struct PrimitiveAnalysisResult
+{
+	uint32_t num_primitives;
+	uint32_t num_vertices;
+};
+
+static i16vec4 encode_vec3_to_snorm_exp(vec3 v)
+{
+	vec3 vabs = abs(v);
+	float max_scale = max(max(vabs.x, vabs.y), vabs.z);
+	int max_scale_log2 = int(muglm::floor(log2(max_scale)));
+	int scale_log2 = 14 - max_scale_log2;
+
+	// Maximum component should have range of [1, 2) since we use floor of log2, so scale with 2^14 instead of 15.
+	v.x = ldexpf(v.x, scale_log2);
+	v.y = ldexpf(v.y, scale_log2);
+	v.z = ldexpf(v.z, scale_log2);
+	v = clamp(round(v), vec3(-0x8000), vec3(0x7fff));
+
+	return i16vec4(i16vec3(v), int16_t(-scale_log2));
+}
+
+static i16vec3 encode_vec2_to_snorm_exp(vec2 v)
+{
+	vec2 vabs = abs(v);
+	float max_scale = max(vabs.x, vabs.y);
+	int max_scale_log2 = int(muglm::floor(log2(max_scale)));
+	int scale_log2 = 14 - max_scale_log2;
+
+	// UVs are unorm scaled, don't need more accuracy than this.
+	// If all UVs are in range of [0, 1] space, we should get a constant exponent which aids compression.
+	scale_log2 = min(scale_log2, 15);
+
+	// Maximum component should have range of [1, 2) since we use floor of log2, so scale with 2^14 instead of 15.
+	v.x = ldexpf(v.x, scale_log2);
+	v.y = ldexpf(v.y, scale_log2);
+	v = clamp(round(v), vec2(-0x8000), vec2(0x7fff));
+
+	return i16vec3(i16vec2(v), int16_t(-scale_log2));
+}
+
+static std::vector<i16vec4> mesh_extract_position_snorm_exp(const SceneFormats::Mesh &mesh)
+{
+	std::vector<i16vec4> encoded_positions;
+	std::vector<vec3> positions;
+
+	size_t num_positions = mesh.positions.size() / mesh.position_stride;
+	positions.resize(num_positions);
+	auto &layout = mesh.attribute_layout[Util::ecast(MeshAttribute::Position)];
+	auto fmt = layout.format;
+
+	if (fmt == VK_FORMAT_R32G32B32A32_SFLOAT || fmt == VK_FORMAT_R32G32B32_SFLOAT)
+	{
+		for (size_t i = 0; i < num_positions; i++)
+			memcpy(positions[i].data, mesh.positions.data() + i * mesh.position_stride + layout.offset, sizeof(float) * 3);
+	}
+	else if (fmt == VK_FORMAT_UNDEFINED)
+		return {};
+	else
+	{
+		LOGE("Unexpected format %u.\n", fmt);
+		return {};
+	}
+
+	encoded_positions.reserve(positions.size());
+	for (auto &pos : positions)
+		encoded_positions.push_back(encode_vec3_to_snorm_exp(pos));
+
+	return encoded_positions;
+}
+
+static std::vector<i8vec4> mesh_extract_normal_tangent_oct8(const SceneFormats::Mesh &mesh, MeshAttribute attr)
+{
+	std::vector<i8vec4> encoded_attributes;
+	std::vector<vec4> normals;
+
+	auto &layout = mesh.attribute_layout[Util::ecast(attr)];
+	auto fmt = layout.format;
+
+	size_t num_attrs = mesh.attributes.size() / mesh.attribute_stride;
+	normals.resize(num_attrs);
+
+	if (fmt == VK_FORMAT_R32G32B32_SFLOAT)
+	{
+		for (size_t i = 0; i < num_attrs; i++)
+		{
+			memcpy(normals[i].data,
+			       mesh.attributes.data() + i * mesh.attribute_stride + layout.offset,
+			       sizeof(float) * 3);
+			normals[i].w = 0.0f;
+		}
+	}
+	else if (fmt == VK_FORMAT_R32G32B32A32_SFLOAT)
+	{
+		for (size_t i = 0; i < num_attrs; i++)
+		{
+			memcpy(normals[i].data,
+			       mesh.attributes.data() + i * mesh.attribute_stride + layout.offset,
+			       sizeof(float) * 4);
+		}
+	}
+	else if (fmt == VK_FORMAT_UNDEFINED)
+		return {};
+	else
+	{
+		LOGE("Unexpected format %u.\n", fmt);
+		return {};
+	}
+
+	encoded_attributes.resize(normals.size());
+	meshopt_encodeFilterOct(encoded_attributes.data(), encoded_attributes.size(),
+	                        sizeof(i8vec4), 8, normals[0].data);
+	for (auto &n : encoded_attributes)
+		n.w = n.w <= 0 ? -1 : 0;
+
+	return encoded_attributes;
+}
+
+static i16vec4 encode_uv_to_snorm_scale(vec2 uv)
+{
+	// UVs tend to be in [0, 1] range. Readjust to use more of the available range.
+	uv = 2.0f * uv - 1.0f;
+	return i16vec4(encode_vec2_to_snorm_exp(uv), 0);
+}
+
+static std::vector<i16vec4> mesh_extract_uv_snorm_scale(const SceneFormats::Mesh &mesh)
+{
+	std::vector<i16vec4> encoded_uvs;
+	std::vector<vec2> uvs;
+
+	size_t num_uvs = mesh.attributes.size() / mesh.attribute_stride;
+	uvs.resize(num_uvs);
+	auto &layout = mesh.attribute_layout[Util::ecast(MeshAttribute::UV)];
+	auto fmt = layout.format;
+
+	if (fmt == VK_FORMAT_R32G32_SFLOAT)
+	{
+		for (size_t i = 0; i < num_uvs; i++)
+			memcpy(uvs[i].data, mesh.attributes.data() + i * mesh.attribute_stride + layout.offset, sizeof(float) * 2);
+	}
+	else if (fmt == VK_FORMAT_R16G16_UNORM)
+	{
+		for (size_t i = 0; i < num_uvs; i++)
+		{
+			u16vec2 u16;
+			memcpy(u16.data, mesh.attributes.data() + i * mesh.attribute_stride + layout.offset, sizeof(uint16_t) * 2);
+			uvs[i] = vec2(u16) * float(1.0f / 0xffff);
+		}
+	}
+	else if (fmt == VK_FORMAT_UNDEFINED)
+		return {};
+	else
+	{
+		LOGE("Unexpected format %u.\n", fmt);
+		return {};
+	}
+
+	encoded_uvs.reserve(uvs.size());
+	for (auto &uv : uvs)
+		encoded_uvs.push_back(encode_uv_to_snorm_scale(uv));
+
+	return encoded_uvs;
+}
+
+static vec3 decode_snorm_exp(i16vec4 p)
+{
+	vec3 result;
+	result.x = ldexpf(float(p.x), p.w);
+	result.y = ldexpf(float(p.y), p.w);
+	result.z = ldexpf(float(p.z), p.w);
+	return result;
+}
+
+static PrimitiveAnalysisResult analyze_primitive_count(std::unordered_map <uint32_t, uint32_t> &vertex_remap,
+                                                       const uint32_t *index_buffer, uint32_t max_num_primitives)
+{
+	PrimitiveAnalysisResult result = {};
+	uint32_t vertex_count = 0;
+
+	// We can reference a maximum of 256 vertices.
+	vertex_remap.clear();
+
+	for (uint32_t i = 0; i < max_num_primitives; i++)
+	{
+		uint32_t index0 = index_buffer[3 * i + 0];
+		uint32_t index1 = index_buffer[3 * i + 1];
+		uint32_t index2 = index_buffer[3 * i + 2];
+
+		vertex_count = uint32_t(vertex_remap.size());
+
+		vertex_remap.insert({index0, uint32_t(vertex_remap.size())});
+		vertex_remap.insert({index1, uint32_t(vertex_remap.size())});
+		vertex_remap.insert({index2, uint32_t(vertex_remap.size())});
+
+		// If this primitive causes us to go out of bounds, reset.
+		if (vertex_remap.size() > MaxVertices)
+		{
+			max_num_primitives = i;
+			break;
+		}
+
+		vertex_count = uint32_t(vertex_remap.size());
+	}
+
+	result.num_primitives = max_num_primitives;
+	result.num_vertices = vertex_count;
+	return result;
+}
+
+// Analyze bits required to encode a signed delta.
+static uvec4 compute_required_bits_unsigned(u8vec4 delta)
+{
+	uvec4 result;
+	for (unsigned i = 0; i < 4; i++)
+	{
+		uint32_t v = delta[i];
+		result[i] = v == 0 ? 0 : (32 - leading_zeroes(v));
+	}
+	return result;
+}
+
+static uvec4 compute_required_bits_signed(u8vec4 delta)
+{
+	uvec4 result;
+	for (unsigned i = 0; i < 4; i++)
+	{
+		uint32_t v = delta[i];
+
+		if (v == 0)
+		{
+			result[i] = 0;
+		} else
+		{
+			if (v >= 0x80u)
+				v ^= 0xffu;
+			result[i] = v == 0 ? 1 : (33 - leading_zeroes(v));
+		}
+	}
+	return result;
+}
+
+static uint32_t extract_bit_plane(const uint8_t *bytes, unsigned bit_index)
+{
+	uint32_t u32 = 0;
+	for (unsigned i = 0; i < 32; i++)
+		u32 |= ((bytes[4 * i] >> bit_index) & 1u) << i;
+	return u32;
+}
+
+static void find_linear_predictor(uint16_t *predictor,
+                                  const u8vec4 (&stream_buffer)[MaxElements],
+                                  unsigned num_elements)
+{
+	// Sign-extend since the deltas are considered to be signed ints.
+	ivec4 unrolled_data[MaxElements];
+	for (unsigned i = 0; i < num_elements; i++)
+		unrolled_data[i] = ivec4(i8vec4(stream_buffer[i]));
+
+	// Simple linear regression.
+	// Pilfered from: https://www.codesansar.com/numerical-methods/linear-regression-method-using-c-programming.htm
+	ivec4 x{0}, x2{0}, y{0}, xy{0};
+	for (unsigned i = 0; i < num_elements; i++)
+	{
+		x += int(i);
+		x2 += int(i * i);
+		y += unrolled_data[i];
+		xy += int(i) * unrolled_data[i];
+	}
+
+	int n = int(num_elements);
+	ivec4 b_denom = (n * x2 - x * x);
+	b_denom = select(b_denom, ivec4(1), equal(ivec4(0), b_denom));
+
+	// Encode in u8.8 fixed point.
+	ivec4 b = (ivec4(256) * (n * xy - x * y)) / b_denom;
+	ivec4 a = ((ivec4(256) * y - b * x)) / n;
+
+	for (unsigned i = 0; i < 4; i++)
+		predictor[i] = uint16_t(a[i]);
+	for (unsigned i = 0; i < 4; i++)
+		predictor[4 + i] = uint16_t(b[i]);
+}
+
+static void encode_stream(std::vector <uint32_t> &out_payload_buffer,
+                          Stream &stream, u8vec4 (&stream_buffer)[MaxElements],
+                          unsigned num_elements)
+{
+	stream.offset_from_base_u32 = uint32_t(out_payload_buffer.size());
+
+	// Delta-encode
+	u8vec4 current_value;
+	if (num_elements > 1)
+		current_value = u8vec4(2) * stream_buffer[0] - stream_buffer[1];
+	else
+		current_value = stream_buffer[0];
+	u8vec4 bias_value = current_value;
+
+	for (unsigned i = 0; i < num_elements; i++)
+	{
+		u8vec4 next_value = stream_buffer[i];
+		stream_buffer[i] = next_value - current_value;
+		current_value = next_value;
+	}
+
+	// Find optimal linear predictor.
+	find_linear_predictor(stream.predictor, stream_buffer, num_elements);
+
+	// u8.8 fixed point.
+	auto base_predictor = u16vec4(stream.predictor[0], stream.predictor[1], stream.predictor[2], stream.predictor[3]);
+	auto linear_predictor = u16vec4(stream.predictor[4], stream.predictor[5], stream.predictor[6], stream.predictor[7]);
+
+	for (unsigned i = 0; i < num_elements; i++)
+	{
+		// Only predict in-bounds elements, since we want all out of bounds elements to be encoded to 0 delta
+		// without having them affect the predictor.
+		stream_buffer[i] -= u8vec4((base_predictor + linear_predictor * uint16_t(i)) >> uint16_t(8));
+	}
+
+	for (unsigned i = num_elements; i < MaxElements; i++)
+		stream_buffer[i] = u8vec4(0);
+
+	// Try to adjust the range such that it can fit in fewer bits.
+	// We can use the constant term in the linear predictor to nudge values in place.
+	i8vec4 lo(127);
+	i8vec4 hi(-128);
+
+	for (unsigned i = 0; i < num_elements; i++)
+	{
+		lo = min(lo, i8vec4(stream_buffer[i]));
+		hi = max(hi, i8vec4(stream_buffer[i]));
+	}
+
+	uvec4 full_bits = compute_required_bits_unsigned(u8vec4(hi - lo));
+	u8vec4 target_lo_value = u8vec4(-((uvec4(1) << full_bits) >> 1u));
+	u8vec4 bias = target_lo_value - u8vec4(lo);
+
+	for (unsigned i = 0; i < num_elements; i++)
+		stream_buffer[i] += bias;
+
+	for (unsigned i = 0; i < 4; i++)
+		stream.predictor[i] -= uint16_t(bias[i]) << 8;
+
+	// Based on the linear predictor, it's possible that the encoded value in stream_buffer[0] becomes non-zero again.
+	// This is undesirable, since we can use the initial value to force a delta of 0 here, saving precious bits.
+	bias_value += stream_buffer[0];
+	stream_buffer[0] = u8vec4(0);
+
+	// Simple linear predictor, base equal elements[0], gradient = 0.
+	stream.predictor[8] = uint16_t((bias_value.y << 8) | bias_value.x);
+	stream.predictor[9] = uint16_t((bias_value.w << 8) | bias_value.z);
+
+	// Encode 32 elements at once.
+	for (unsigned chunk_index = 0; chunk_index < MaxElements / 32; chunk_index++)
+	{
+		uvec4 required_bits = {};
+		for (unsigned i = 0; i < 32; i++)
+			required_bits = max(required_bits, compute_required_bits_signed(stream_buffer[chunk_index * 32 + i]));
+
+		// Encode bit counts.
+		stream.bitplane_meta[chunk_index] = uint16_t((required_bits.x << 0) | (required_bits.y << 4) |
+		                                             (required_bits.z << 8) | (required_bits.w << 12));
+
+		for (unsigned i = 0; i < required_bits.x; i++)
+			out_payload_buffer.push_back(extract_bit_plane(&stream_buffer[chunk_index * 32][0], i));
+		for (unsigned i = 0; i < required_bits.y; i++)
+			out_payload_buffer.push_back(extract_bit_plane(&stream_buffer[chunk_index * 32][1], i));
+		for (unsigned i = 0; i < required_bits.z; i++)
+			out_payload_buffer.push_back(extract_bit_plane(&stream_buffer[chunk_index * 32][2], i));
+		for (unsigned i = 0; i < required_bits.w; i++)
+			out_payload_buffer.push_back(extract_bit_plane(&stream_buffer[chunk_index * 32][3], i));
+	}
+}
+
+static void encode_mesh(Encoded &encoded,
+                        const Meshlet *meshlets, size_t num_meshlets,
+                        const uint32_t *index_buffer, uint32_t primitive_count,
+                        const uint32_t *attributes,
+                        unsigned num_u32_streams)
+{
+	encoded = {};
+	auto &mesh = encoded.mesh;
+	mesh.stream_count = num_u32_streams + 1;
+	mesh.meshlets.reserve(num_meshlets);
+	uint32_t base_vertex_offset = 0;
+
+	std::unordered_map <uint32_t, uint32_t> vbo_remap;
+	uint32_t primitive_index = 0;
+
+	for (uint32_t meshlet_index = 0; meshlet_index < num_meshlets; meshlet_index++)
+	{
+		uint32_t primitives_to_process = min(primitive_count - primitive_index, meshlets[meshlet_index].count);
+		assert(primitives_to_process);
+		assert(primitive_count > primitive_index);
+
+		primitive_index = meshlets[meshlet_index].offset;
+
+		auto analysis_result = analyze_primitive_count(
+				vbo_remap, index_buffer + 3 * primitive_index,
+				primitives_to_process);
+
+		assert(analysis_result.num_primitives);
+		assert(analysis_result.num_vertices);
+
+		primitives_to_process = analysis_result.num_primitives;
+
+		Metadata meshlet = {};
+		u8vec4 stream_buffer[MaxElements];
+
+		meshlet.base_vertex_offset = base_vertex_offset;
+		meshlet.num_primitives_minus_1 = analysis_result.num_primitives - 1;
+		meshlet.num_attributes_minus_1 = analysis_result.num_vertices - 1;
+		meshlet.reserved = 0;
+
+		// Encode index buffer.
+		for (uint32_t i = 0; i < analysis_result.num_primitives; i++)
+		{
+			uint8_t i0 = vbo_remap[index_buffer[3 * (primitive_index + i) + 0]];
+			uint8_t i1 = vbo_remap[index_buffer[3 * (primitive_index + i) + 1]];
+			uint8_t i2 = vbo_remap[index_buffer[3 * (primitive_index + i) + 2]];
+			stream_buffer[i] = u8vec4(i0, i1, i2, 0);
+		}
+
+		encode_stream(encoded.payload, meshlet.u32_streams[0], stream_buffer, analysis_result.num_primitives);
+
+		// Handle spill region just in case.
+		uint64_t vbo_remapping[MaxVertices + 3];
+		unsigned vbo_index = 0;
+		for (auto &v: vbo_remap)
+		{
+			assert(vbo_index < MaxVertices + 3);
+			vbo_remapping[vbo_index++] = (uint64_t(v.second) << 32) | v.first;
+		}
+		std::sort(vbo_remapping, vbo_remapping + vbo_index);
+
+		for (uint32_t stream_index = 0; stream_index < num_u32_streams; stream_index++)
+		{
+			for (uint32_t i = 0; i < analysis_result.num_vertices; i++)
+			{
+				auto vertex_index = uint32_t(vbo_remapping[i]);
+				uint32_t payload = attributes[stream_index + num_u32_streams * vertex_index];
+				memcpy(stream_buffer[i].data, &payload, sizeof(payload));
+			}
+
+			encode_stream(encoded.payload, meshlet.u32_streams[stream_index + 1], stream_buffer,
+			              analysis_result.num_vertices);
+		}
+
+		mesh.meshlets.push_back(meshlet);
+		base_vertex_offset += analysis_result.num_vertices;
+		primitive_index += primitives_to_process;
+	}
+}
+
+static bool export_encoded_mesh(const std::string &path, const Encoded &encoded)
+{
+	size_t required_size = 0;
+
+	FormatHeader header = {};
+
+	header.style = encoded.mesh.mesh_style;
+	header.u32_stream_count = encoded.mesh.stream_count;
+	header.meshlet_count = uint32_t(encoded.mesh.meshlets.size());
+	header.payload_size_words = uint32_t(encoded.payload.size());
+
+	required_size += sizeof(magic);
+	required_size += sizeof(FormatHeader);
+
+	// Per-meshlet metadata.
+	required_size += encoded.mesh.meshlets.size() * sizeof(Header);
+
+	// Bounds.
+	required_size += encoded.mesh.meshlets.size() * sizeof(Bound);
+
+	// Stream metadata.
+	required_size += encoded.mesh.stream_count * encoded.mesh.meshlets.size() * sizeof(Stream);
+
+	// Payload.
+	// Need a padding word to speed up decoder.
+	required_size += (encoded.payload.size() + 1) * sizeof(uint32_t);
+
+	auto file = GRANITE_FILESYSTEM()->open(path, FileMode::WriteOnly);
+	if (!file)
+		return false;
+
+	auto mapping = file->map_write(required_size);
+	if (!mapping)
+		return false;
+
+	auto *ptr = mapping->mutable_data<unsigned char>();
+
+	memcpy(ptr, magic, sizeof(magic));
+	ptr += sizeof(magic);
+	memcpy(ptr, &header, sizeof(header));
+	ptr += sizeof(header);
+
+	for (uint32_t i = 0; i < header.meshlet_count; i++)
+	{
+		auto &gpu = static_cast<const Header &>(encoded.mesh.meshlets[i]);
+		memcpy(ptr, &gpu, sizeof(gpu));
+		ptr += sizeof(gpu);
+	}
+
+	for (uint32_t i = 0; i < header.meshlet_count; i++)
+	{
+		auto &bound = encoded.mesh.meshlets[i].bound;
+		memcpy(ptr, &bound, sizeof(bound));
+		ptr += sizeof(bound);
+	}
+
+	for (uint32_t i = 0; i < header.meshlet_count; i++)
+	{
+		for (uint32_t j = 0; j < header.u32_stream_count; j++)
+		{
+			memcpy(ptr, &encoded.mesh.meshlets[i].u32_streams[j], sizeof(Stream));
+			ptr += sizeof(Stream);
+		}
+	}
+
+	memcpy(ptr, encoded.payload.data(), encoded.payload.size() * sizeof(uint32_t));
+	ptr += encoded.payload.size() * sizeof(uint32_t);
+	memset(ptr, 0, sizeof(uint32_t));
+	return true;
+}
+
+bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh, MeshStyle style)
+{
+	if (!mesh_optimize_index_buffer(mesh, {}))
+		return false;
+
+	std::vector<i16vec4> positions, uv;
+	std::vector<i8vec4> normals, tangent;
+
+	unsigned num_u32_streams = 0;
+
+	switch (style)
+	{
+	case MeshStyle::Skinned:
+		LOGE("Unimplemented.\n");
+		return false;
+	case MeshStyle::Textured:
+		uv = mesh_extract_uv_snorm_scale(mesh);
+		num_u32_streams += 2;
+		if (uv.empty())
+		{
+			LOGE("No UVs.\n");
+			return false;
+		}
+		// Fallthrough
+	case MeshStyle::Untextured:
+		normals = mesh_extract_normal_tangent_oct8(mesh, MeshAttribute::Normal);
+		tangent = mesh_extract_normal_tangent_oct8(mesh, MeshAttribute::Tangent);
+		if (normals.empty() || tangent.empty())
+		{
+			LOGE("No normal or tangent.\n");
+			return false;
+		}
+		num_u32_streams += 2;
+		// Fallthrough
+	case MeshStyle::Wireframe:
+		positions = mesh_extract_position_snorm_exp(mesh);
+		if (positions.empty())
+		{
+			LOGE("No positions.\n");
+			return false;
+		}
+		num_u32_streams += 2;
+		break;
+
+	default:
+		LOGE("Unknown mesh style.\n");
+		return false;
+	}
+
+	std::vector<uint32_t> attributes(num_u32_streams * positions.size());
+	uint32_t *ptr = attributes.data();
+	for (size_t i = 0, n = positions.size(); i < n; i++)
+	{
+		memcpy(ptr, positions[i].data, sizeof(positions.front()));
+		ptr += sizeof(positions.front()) / sizeof(uint32_t);
+
+		if (!normals.empty())
+		{
+			memcpy(ptr, normals[i].data, sizeof(normals.front()));
+			ptr += sizeof(normals.front()) / sizeof(uint32_t);
+		}
+
+		if (!tangent.empty())
+		{
+			memcpy(ptr, tangent[i].data, sizeof(tangent.front()));
+			ptr += sizeof(tangent.front()) / sizeof(uint32_t);
+		}
+
+		if (!uv.empty())
+		{
+			memcpy(ptr, uv[i].data, sizeof(uv.front()));
+			ptr += sizeof(uv.front()) / sizeof(uint32_t);
+		}
+	}
+
+	// Use quantized position to guide the clustering.
+	std::vector<vec3> position_buffer;
+	position_buffer.reserve(positions.size());
+	for (auto &p: positions)
+		position_buffer.push_back(decode_snorm_exp(p));
+
+	// Special meshoptimizer limit.
+	constexpr unsigned max_vertices = 255;
+	constexpr unsigned max_primitives = 256;
+	size_t num_meshlets = meshopt_buildMeshletsBound(mesh.count, max_vertices, max_primitives);
+
+	std::vector<unsigned> out_vertex_redirection_buffer(num_meshlets * max_vertices);
+	std::vector<unsigned char> local_index_buffer(num_meshlets * max_primitives * 3);
+	std::vector<meshopt_Meshlet> meshlets(num_meshlets);
+
+	num_meshlets = meshopt_buildMeshlets(meshlets.data(),
+	                                     out_vertex_redirection_buffer.data(), local_index_buffer.data(),
+	                                     reinterpret_cast<const uint32_t *>(mesh.indices.data()), mesh.count,
+	                                     position_buffer[0].data, positions.size(), sizeof(vec3),
+	                                     max_vertices, max_primitives, 0.75f);
+
+	meshlets.resize(num_meshlets);
+
+	std::vector<Meshlet> out_meshlets;
+	std::vector<uvec3> out_index_buffer;
+
+	out_meshlets.reserve(num_meshlets);
+	for (auto &meshlet: meshlets)
+	{
+		Meshlet m = {};
+		m.offset = uint32_t(out_index_buffer.size());
+		m.count = meshlet.triangle_count;
+		out_meshlets.push_back(m);
+
+		auto *local_indices = local_index_buffer.data() + meshlet.triangle_offset;
+		for (unsigned i = 0; i < meshlet.triangle_count; i++)
+		{
+			out_index_buffer.emplace_back(
+					out_vertex_redirection_buffer[local_indices[3 * i + 0] + meshlet.vertex_offset],
+					out_vertex_redirection_buffer[local_indices[3 * i + 1] + meshlet.vertex_offset],
+					out_vertex_redirection_buffer[local_indices[3 * i + 2] + meshlet.vertex_offset]);
+		}
+	}
+
+	std::vector<meshopt_Bounds> bounds;
+	bounds.clear();
+	bounds.reserve(num_meshlets);
+	for (auto &meshlet: out_meshlets)
+	{
+		auto bound = meshopt_computeClusterBounds(
+				out_index_buffer[meshlet.offset].data, meshlet.count * 3,
+				position_buffer[0].data, positions.size(), sizeof(vec3));
+		bounds.push_back(bound);
+	}
+
+	Encoded encoded;
+	encode_mesh(encoded, out_meshlets.data(), out_meshlets.size(),
+	            out_index_buffer[0].data, out_index_buffer.size(),
+	            attributes.data(), num_u32_streams);
+	encoded.mesh.mesh_style = style;
+
+	assert(bounds.size() == encoded.mesh.meshlets.size());
+	const auto *pbounds = bounds.data();
+	for (auto &meshlet: encoded.mesh.meshlets)
+	{
+		memcpy(meshlet.bound.center, pbounds->center, sizeof(float) * 3);
+		meshlet.bound.radius = pbounds->radius;
+		memcpy(meshlet.bound.cone_axis_cutoff, pbounds->cone_axis_s8, sizeof(pbounds->cone_axis_s8));
+		meshlet.bound.cone_axis_cutoff[3] = pbounds->cone_cutoff_s8;
+	}
+
+	return export_encoded_mesh(path, encoded);
+}
+}
+}
diff --git a/scene-export/meshlet_export.hpp b/scene-export/meshlet_export.hpp
new file mode 100644
index 000000000..7527e80c4
--- /dev/null
+++ b/scene-export/meshlet_export.hpp
@@ -0,0 +1,36 @@
+/* Copyright (c) 2017-2023 Hans-Kristian Arntzen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <vector>
+#include <string>
+#include "scene_formats.hpp"
+#include "meshlet.hpp"
+
+namespace Granite
+{
+namespace Meshlet
+{
+bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh, Vulkan::Meshlet::MeshStyle style);
+}
+}
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index aceb84851..6c123829e 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -161,8 +161,22 @@ add_granite_offline_tool(linkage-test linkage_test.cpp)
 add_granite_offline_tool(external-objects external_objects.cpp)
 add_granite_offline_tool(performance-query performance_query.cpp)
 add_granite_offline_tool(asset-manager-test asset_manager_test.cpp)
+
+add_granite_offline_tool(meshopt-sandbox meshopt_sandbox.cpp)
+if (NOT ANDROID)
+    target_compile_definitions(meshopt-sandbox PRIVATE ASSET_DIRECTORY=\"${CMAKE_CURRENT_SOURCE_DIR}/assets\")
+endif()
+target_link_libraries(meshopt-sandbox PRIVATE granite-scene-export)
+
+add_granite_application(meshlet-viewer meshlet_viewer.cpp)
+if (NOT ANDROID)
+    target_compile_definitions(meshlet-viewer PRIVATE ASSET_DIRECTORY=\"${CMAKE_CURRENT_SOURCE_DIR}/assets\")
+endif()
+target_link_libraries(meshlet-viewer PRIVATE granite-scene-export)
+
 add_granite_application(dgc-test-graphics dgc_test_graphics.cpp)
 add_granite_application(dgc-test-compute dgc_test_compute.cpp)
+
 if (NOT ANDROID)
     target_compile_definitions(dgc-test-graphics PRIVATE ASSET_DIRECTORY=\"${CMAKE_CURRENT_SOURCE_DIR}/assets\")
     target_compile_definitions(dgc-test-compute PRIVATE ASSET_DIRECTORY=\"${CMAKE_CURRENT_SOURCE_DIR}/assets\")
diff --git a/tests/asset_manager_test.cpp b/tests/asset_manager_test.cpp
index 7f1bb3dbf..0a70e4cef 100644
--- a/tests/asset_manager_test.cpp
+++ b/tests/asset_manager_test.cpp
@@ -6,18 +6,18 @@ using namespace Granite;
 
 struct ActivationInterface final : AssetInstantiatorInterface
 {
-	uint64_t estimate_cost_image_resource(ImageAssetID, File &mapping) override
+	uint64_t estimate_cost_asset(AssetID, File &mapping) override
 	{
 		return mapping.get_size();
 	}
 
-	void instantiate_image_resource(AssetManager &manager, TaskGroup *, ImageAssetID id, File &mapping) override
+	void instantiate_asset(AssetManager &manager, TaskGroup *, AssetID id, File &mapping) override
 	{
 		LOGI("Instantiating ID: %u\n", id.id);
 		manager.update_cost(id, mapping.get_size());
 	}
 
-	void release_image_resource(ImageAssetID id) override
+	void release_asset(AssetID id) override
 	{
 		LOGI("Releasing ID: %u\n", id.id);
 	}
@@ -54,29 +54,29 @@ int main()
 	auto d = fs.open("tmp://d");
 	auto e = fs.open("tmp://e");
 
-	auto id_a = manager.register_image_resource(std::move(a), ImageClass::Zeroable);
-	auto id_b = manager.register_image_resource(std::move(b), ImageClass::Zeroable);
-	auto id_c = manager.register_image_resource(std::move(c), ImageClass::Zeroable);
-	auto id_d = manager.register_image_resource(std::move(d), ImageClass::Zeroable);
+	auto id_a = manager.register_asset(std::move(a), AssetClass::ImageZeroable);
+	auto id_b = manager.register_asset(std::move(b), AssetClass::ImageZeroable);
+	auto id_c = manager.register_asset(std::move(c), AssetClass::ImageZeroable);
+	auto id_d = manager.register_asset(std::move(d), AssetClass::ImageZeroable);
 	manager.set_asset_instantiator_interface(&iface);
-	auto id_e = manager.register_image_resource(std::move(e), ImageClass::Zeroable);
+	auto id_e = manager.register_asset(std::move(e), AssetClass::ImageZeroable);
 
-	manager.set_image_budget(25);
-	manager.set_image_budget_per_iteration(5);
+	manager.set_asset_budget(25);
+	manager.set_asset_budget_per_iteration(5);
 
-	manager.set_image_residency_priority(id_a, 1);
-	manager.set_image_residency_priority(id_b, 1);
-	manager.set_image_residency_priority(id_c, 1);
-	manager.set_image_residency_priority(id_d, 1);
-	manager.set_image_residency_priority(id_e, 2);
+	manager.set_asset_residency_priority(id_a, 1);
+	manager.set_asset_residency_priority(id_b, 1);
+	manager.set_asset_residency_priority(id_c, 1);
+	manager.set_asset_residency_priority(id_d, 1);
+	manager.set_asset_residency_priority(id_e, 2);
 	manager.iterate(nullptr);
 	LOGI("Cost: %u\n", unsigned(manager.get_current_total_consumed()));
 	manager.iterate(nullptr);
 	LOGI("Cost: %u\n", unsigned(manager.get_current_total_consumed()));
-	manager.set_image_residency_priority(id_e, 0);
+	manager.set_asset_residency_priority(id_e, 0);
 	manager.iterate(nullptr);
 	LOGI("Cost: %u\n", unsigned(manager.get_current_total_consumed()));
-	manager.set_image_budget(10);
+	manager.set_asset_budget(10);
 	manager.iterate(nullptr);
 	LOGI("Cost: %u\n", unsigned(manager.get_current_total_consumed()));
 }
\ No newline at end of file
diff --git a/tests/assets/shaders/meshlet_debug.frag b/tests/assets/shaders/meshlet_debug.frag
new file mode 100644
index 000000000..2f2f02484
--- /dev/null
+++ b/tests/assets/shaders/meshlet_debug.frag
@@ -0,0 +1,12 @@
+#version 450
+
+layout(location = 0) in mediump vec3 vNormal;
+layout(location = 1) in mediump vec4 vTangent;
+layout(location = 2) in vec2 vUV;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = vec4(vNormal.xyz * 0.5 + 0.5, 1.0);
+}
diff --git a/tests/assets/shaders/meshlet_debug.mesh b/tests/assets/shaders/meshlet_debug.mesh
new file mode 100644
index 000000000..5098e49aa
--- /dev/null
+++ b/tests/assets/shaders/meshlet_debug.mesh
@@ -0,0 +1,79 @@
+#version 450
+#extension GL_EXT_mesh_shader : require
+
+layout(max_primitives = 256, max_vertices = 255, triangles) out;
+
+#include "meshlet_payload_constants.h"
+
+#if MESHLET_PAYLOAD_LARGE_WORKGROUP
+#define MESHLET_PAYLOAD_WG_Y MESHLET_PAYLOAD_NUM_CHUNKS
+#else
+#define MESHLET_PAYLOAD_WG_Y 1
+#endif
+layout(local_size_x = 32, local_size_y = MESHLET_PAYLOAD_WG_Y) in;
+
+layout(constant_id = 0) const uint NUM_U32_STREAMS = MESHLET_PAYLOAD_MAX_STREAMS;
+#define MESHLET_PAYLOAD_NUM_U32_STREAMS NUM_U32_STREAMS
+
+#define MESHLET_PAYLOAD_DESCRIPTOR_SET 0
+#define MESHLET_PAYLOAD_META_BINDING 0
+#define MESHLET_PAYLOAD_STREAM_BINDING 1
+#define MESHLET_PAYLOAD_PAYLOAD_BINDING 2
+#include "meshlet_payload_decode.h"
+#include "meshlet_attribute_decode.h"
+
+layout(location = 0) perprimitiveEXT out uint vMeshletIndex[];
+layout(location = 1) out mediump vec3 vNormal[];
+layout(location = 2) out mediump vec4 vTangent[];
+layout(location = 3) out vec2 vUV[];
+
+layout(set = 1, binding = 0) uniform UBO
+{
+    mat4 VP;
+};
+
+void main()
+{
+    uint meshlet_index = gl_WorkGroupID.x;
+    MeshletMetaRuntime meta = meshlet_metas_runtime.data[meshlet_index];
+    meshlet_init_workgroup(meta.stream_offset);
+
+    SetMeshOutputsEXT(meta.num_attributes, meta.num_primitives);
+
+#define INDEX(index, value) \
+    if (index < meta.num_primitives) \
+    { \
+        gl_PrimitiveTriangleIndicesEXT[index] = uvec4(unpack8(value)).xyz; \
+        vMeshletIndex[index] = meshlet_index; \
+    }
+    MESHLET_DECODE_STREAM_32(meta.stream_offset, 0, INDEX);
+
+#define POSITION(index, value) \
+    if (index < meta.num_attributes) \
+    { \
+        vec3 pos = attribute_decode_snorm_exp_position(value); \
+        gl_MeshVerticesEXT[index].gl_Position = VP * vec4(pos, 1.0); \
+    }
+    MESHLET_DECODE_STREAM_64(meta.stream_offset, 1, POSITION);
+
+#define NORMAL(index, value) \
+    if (index < meta.num_attributes) \
+    { \
+        vNormal[index] = attribute_decode_oct8_normal_tangent(value).xyz; \
+    }
+    MESHLET_DECODE_STREAM_32(meta.stream_offset, 3, NORMAL);
+
+#define TANGENT(index, value) \
+    if (index < meta.num_attributes) \
+    { \
+        vTangent[index] = attribute_decode_oct8_normal_tangent(value); \
+    }
+    MESHLET_DECODE_STREAM_32(meta.stream_offset, 4, TANGENT);
+
+#define UV(index, value) \
+    if (index < meta.num_attributes) \
+    { \
+        vUV[index] = attribute_decode_snorm_exp_uv(value); \
+    }
+    MESHLET_DECODE_STREAM_64(meta.stream_offset, 5, UV);
+}
\ No newline at end of file
diff --git a/tests/assets/shaders/meshlet_debug.mesh.frag b/tests/assets/shaders/meshlet_debug.mesh.frag
new file mode 100644
index 000000000..9eb97dabb
--- /dev/null
+++ b/tests/assets/shaders/meshlet_debug.mesh.frag
@@ -0,0 +1,25 @@
+#version 450
+#extension GL_EXT_mesh_shader : require
+
+layout(location = 0) perprimitiveEXT in flat uint vMeshletIndex;
+layout(location = 1) in mediump vec3 vNormal;
+layout(location = 2) in mediump vec4 vTangent;
+layout(location = 3) in vec2 vUV;
+
+layout(location = 0) out vec4 FragColor;
+
+vec3 decode_mesh_color()
+{
+    uint index = vMeshletIndex * 1991u;
+    index ^= (index >> 5u);
+    uint r = bitfieldExtract(index, 0, 2);
+    uint g = bitfieldExtract(index, 2, 2);
+    uint b = bitfieldExtract(index, 4, 2);
+    //return (vec3(r, g, b) + 1.0 / 3.0) / 4.0;
+    return vec3(1.0);
+}
+
+void main()
+{
+    FragColor = vec4(decode_mesh_color() * (vNormal.xyz * 0.5 + 0.5), 1.0);
+}
\ No newline at end of file
diff --git a/tests/assets/shaders/meshlet_debug.vert b/tests/assets/shaders/meshlet_debug.vert
new file mode 100644
index 000000000..e07088150
--- /dev/null
+++ b/tests/assets/shaders/meshlet_debug.vert
@@ -0,0 +1,23 @@
+#version 450
+
+layout(location = 0) in vec3 POS;
+layout(location = 1) in mediump vec3 N;
+layout(location = 2) in mediump vec4 T;
+layout(location = 3) in vec2 UV;
+
+layout(location = 0) out mediump vec3 vNormal;
+layout(location = 1) out mediump vec4 vTangent;
+layout(location = 2) out vec2 vUV;
+
+layout(set = 1, binding = 0) uniform UBO
+{
+    mat4 VP;
+};
+
+void main()
+{
+    vNormal = N;
+    vTangent = T;
+    vUV = UV;
+    gl_Position = VP * vec4(POS, 1.0);
+}
diff --git a/tests/bandlimited_pixel_test.cpp b/tests/bandlimited_pixel_test.cpp
index 4e9d34f18..83fa0f36d 100644
--- a/tests/bandlimited_pixel_test.cpp
+++ b/tests/bandlimited_pixel_test.cpp
@@ -117,7 +117,8 @@ struct BandlimitedPixelTestApplication : Application, EventHandler
 				                 { "BANDLIMITED_PIXEL_USE_TRANSCENDENTAL", 1 },
 		                 });
 
-		auto texture = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), "assets://textures/sprite.png", ImageClass::Color);
+		auto texture = GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(), "assets://textures/sprite.png",
+		                                                       AssetClass::ImageColor);
 		auto *view = cmd->get_device().get_resource_manager().get_image_view_blocking(texture);
 		cmd->set_texture(2, 0, *view, mode == 0 ? StockSampler::NearestWrap : StockSampler::TrilinearWrap);
 
diff --git a/tests/meshlet_viewer.cpp b/tests/meshlet_viewer.cpp
new file mode 100644
index 000000000..2e19ef3f5
--- /dev/null
+++ b/tests/meshlet_viewer.cpp
@@ -0,0 +1,165 @@
+/* Copyright (c) 2017-2023 Hans-Kristian Arntzen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "application.hpp"
+#include "command_buffer.hpp"
+#include "device.hpp"
+#include "os_filesystem.hpp"
+#include "muglm/muglm_impl.hpp"
+#include "meshlet.hpp"
+#include "aabb.hpp"
+#include "event.hpp"
+#include "camera.hpp"
+#include "event_manager.hpp"
+#include <string.h>
+#include <float.h>
+#include <stdexcept>
+
+using namespace Granite;
+using namespace Vulkan;
+using namespace Vulkan::Meshlet;
+
+static uint32_t style_to_u32_streams(MeshStyle style)
+{
+	switch (style)
+	{
+	case MeshStyle::Wireframe:
+		return 3;
+	case MeshStyle::Untextured:
+		return 4;
+	case MeshStyle::Textured:
+		return 7;
+	case MeshStyle::Skinned:
+		return 9;
+	default:
+		return 0;
+	}
+}
+
+struct MeshletViewerApplication : Granite::Application, Granite::EventHandler
+{
+	MeshletViewerApplication(const char *path)
+	{
+		get_wsi().set_backbuffer_srgb(false);
+		mesh_id = GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(), path, Granite::AssetClass::Mesh);
+		EVENT_MANAGER_REGISTER_LATCH(MeshletViewerApplication, on_device_create, on_device_destroy, DeviceCreatedEvent);
+	}
+
+	AABB aabb;
+	FPSCamera camera;
+	Granite::AssetID mesh_id;
+
+	void on_device_create(const DeviceCreatedEvent &e)
+	{
+		e.get_device().get_shader_manager().add_include_directory("builtin://shaders/inc");
+	}
+
+	void on_device_destroy(const DeviceCreatedEvent &)
+	{
+	}
+
+	void render_frame(double, double) override
+	{
+		auto &wsi = get_wsi();
+		auto &device = wsi.get_device();
+		auto cmd = device.request_command_buffer();
+
+		cmd->begin_render_pass(device.get_swapchain_render_pass(SwapchainRenderPass::Depth));
+		camera.set_aspect(cmd->get_viewport().width / cmd->get_viewport().height);
+
+		cmd->set_opaque_state();
+
+		auto vp = camera.get_projection() * camera.get_view();
+		*cmd->allocate_typed_constant_data<mat4>(1, 0, 1) = vp;
+		auto draw = device.get_resource_manager().get_mesh_draw_range(mesh_id);
+
+		if (draw.count && device.get_resource_manager().get_mesh_encoding() == Vulkan::ResourceManager::MeshEncoding::Meshlet)
+		{
+			bool large_workgroup =
+					device.get_device_features().mesh_shader_properties.maxPreferredMeshWorkGroupInvocations > 32 &&
+					device.get_device_features().mesh_shader_properties.maxMeshWorkGroupInvocations >= 256;
+
+			cmd->set_program("", "assets://shaders/meshlet_debug.mesh",
+			                 "assets://shaders/meshlet_debug.mesh.frag",
+			                 {{"MESHLET_PAYLOAD_LARGE_WORKGROUP", int(large_workgroup)}});
+
+			cmd->set_storage_buffer(0, 0, *device.get_resource_manager().get_meshlet_header_buffer());
+			cmd->set_storage_buffer(0, 1, *device.get_resource_manager().get_meshlet_stream_header_buffer());
+			cmd->set_storage_buffer(0, 2, *device.get_resource_manager().get_meshlet_payload_buffer());
+
+			cmd->enable_subgroup_size_control(true, VK_SHADER_STAGE_MESH_BIT_EXT);
+			cmd->set_subgroup_size_log2(true, 5, 5, VK_SHADER_STAGE_MESH_BIT_EXT);
+			cmd->set_specialization_constant_mask(1);
+			cmd->set_specialization_constant(0, style_to_u32_streams(draw.style));
+
+			cmd->push_constants(&draw.offset, 0, sizeof(draw.offset));
+			cmd->draw_mesh_tasks(draw.count, 1, 1);
+		}
+		else if (draw.count)
+		{
+			auto *ibo = device.get_resource_manager().get_index_buffer();
+			auto *pos = device.get_resource_manager().get_position_buffer();
+			auto *attr = device.get_resource_manager().get_attribute_buffer();
+			auto *indirect = device.get_resource_manager().get_indirect_buffer();
+
+			cmd->set_program("assets://shaders/meshlet_debug.vert", "assets://shaders/meshlet_debug.frag");
+			cmd->set_index_buffer(*ibo, 0, VK_INDEX_TYPE_UINT8_EXT);
+			cmd->set_vertex_binding(0, *pos, 0, 12);
+			cmd->set_vertex_binding(1, *attr, 0, 16);
+			cmd->set_vertex_attrib(0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0);
+			cmd->set_vertex_attrib(1, 1, VK_FORMAT_A2B10G10R10_SNORM_PACK32, 0);
+			cmd->set_vertex_attrib(2, 1, VK_FORMAT_A2B10G10R10_SNORM_PACK32, 4);
+			cmd->set_vertex_attrib(3, 1, VK_FORMAT_R32G32_SFLOAT, 8);
+			cmd->draw_indexed_indirect(*indirect,
+			                           draw.offset * sizeof(VkDrawIndexedIndirectCommand),
+			                           draw.count, sizeof(VkDrawIndexedIndirectCommand));
+		}
+
+		cmd->end_render_pass();
+		device.submit(cmd);
+	}
+};
+
+namespace Granite
+{
+Application *application_create(int argc, char **argv)
+{
+	GRANITE_APPLICATION_SETUP_FILESYSTEM();
+
+	if (argc != 2)
+	{
+		LOGE("Usage: meshlet-viewer path.msh1\n");
+		return nullptr;
+	}
+
+	try
+	{
+		auto *app = new MeshletViewerApplication(argv[1]);
+		return app;
+	}
+	catch (const std::exception &e)
+	{
+		LOGE("application_create() threw exception: %s\n", e.what());
+		return nullptr;
+	}
+}
+}
diff --git a/tests/meshopt_sandbox.cpp b/tests/meshopt_sandbox.cpp
new file mode 100644
index 000000000..84180e1e6
--- /dev/null
+++ b/tests/meshopt_sandbox.cpp
@@ -0,0 +1,285 @@
+#include "logging.hpp"
+#include <vector>
+#include "math.hpp"
+#include "device.hpp"
+#include "context.hpp"
+#include "muglm/muglm_impl.hpp"
+#include "gltf.hpp"
+#include "global_managers_init.hpp"
+#include "meshlet_export.hpp"
+#include "meshlet.hpp"
+#include <assert.h>
+using namespace Granite;
+using namespace Vulkan::Meshlet;
+
+static void decode_mesh_setup_buffers(
+		std::vector<uint32_t> &out_index_buffer, std::vector<uint32_t> &out_u32_stream,
+		const MeshView &mesh)
+{
+	assert(mesh.format_header->u32_stream_count > 1);
+
+	out_index_buffer.clear();
+	out_u32_stream.clear();
+	out_index_buffer.resize(mesh.total_primitives * 3);
+	out_u32_stream.resize(mesh.total_vertices * (mesh.format_header->u32_stream_count - 1));
+}
+
+static void decode_mesh(std::vector<uint32_t> &out_index_buffer, std::vector<uint32_t> &out_u32_stream,
+                        const MeshView &mesh)
+{
+	decode_mesh_setup_buffers(out_index_buffer, out_u32_stream, mesh);
+	out_index_buffer.clear();
+	const unsigned u32_stride = mesh.format_header->u32_stream_count - 1;
+
+	for (uint32_t meshlet_index = 0; meshlet_index < mesh.format_header->meshlet_count; meshlet_index++)
+	{
+		auto &meshlet = mesh.headers[meshlet_index];
+		for (unsigned stream_index = 0; stream_index < mesh.format_header->u32_stream_count; stream_index++)
+		{
+			auto &stream = mesh.streams[meshlet_index * mesh.format_header->u32_stream_count + stream_index];
+			const uint32_t *pdata = mesh.payload + stream.offset_from_base_u32;
+
+			u8vec4 deltas[MaxElements] = {};
+			const u16vec4 base_predictor = u16vec4(
+					stream.predictor[0], stream.predictor[1],
+					stream.predictor[2], stream.predictor[3]);
+			const u16vec4 linear_predictor = u16vec4(
+					stream.predictor[4], stream.predictor[5],
+					stream.predictor[6], stream.predictor[7]);
+			const u8vec4 initial_value =
+					u8vec4(u16vec2(stream.predictor[8], stream.predictor[9]).xxyy() >> u16vec4(0, 8, 0, 8));
+
+			for (unsigned chunk = 0; chunk < (MaxElements / 32); chunk++)
+			{
+				auto bits_per_u8 = (uvec4(stream.bitplane_meta[chunk]) >> uvec4(0, 4, 8, 12)) & 0xfu;
+				uvec4 bitplanes[8] = {};
+
+				for (unsigned comp = 0; comp < 4; comp++)
+				{
+					for (unsigned bit = 0; bit < bits_per_u8[comp]; bit++)
+						bitplanes[bit][comp] = *pdata++;
+
+					// Sign-extend.
+
+					unsigned bit_count = bits_per_u8[comp];
+					if (bit_count)
+						for (unsigned bit = bit_count; bit < 8; bit++)
+							bitplanes[bit][comp] = bitplanes[bit_count - 1][comp];
+				}
+
+				for (unsigned i = 0; i < 32; i++)
+				{
+					for (uint32_t bit = 0; bit < 8; bit++)
+						deltas[chunk * 32 + i] |= u8vec4(((bitplanes[bit] >> i) & 1u) << bit);
+				}
+			}
+
+			// Apply predictors.
+			deltas[0] += initial_value;
+			for (unsigned i = 0; i < MaxElements; i++)
+				deltas[i] += u8vec4((base_predictor + linear_predictor * u16vec4(i)) >> u16vec4(8));
+
+			// Resolve deltas.
+			for (unsigned i = 1; i < MaxElements; i++)
+				deltas[i] += deltas[i - 1];
+
+			if (stream_index == 0)
+			{
+				// Index decode.
+				unsigned num_primitives = meshlet.num_primitives_minus_1 + 1;
+				for (unsigned i = 0; i < num_primitives; i++)
+					for (unsigned j = 0; j < 3; j++)
+						out_index_buffer.push_back(deltas[i][j] + meshlet.base_vertex_offset);
+			}
+			else
+			{
+				// Attributes.
+				unsigned num_attributes = meshlet.num_attributes_minus_1 + 1;
+				auto *out_attr = out_u32_stream.data() + meshlet.base_vertex_offset * u32_stride + (stream_index - 1);
+				for (unsigned i = 0; i < num_attributes; i++, out_attr += u32_stride)
+					memcpy(out_attr, deltas[i].data, sizeof(*out_attr));
+			}
+		}
+	}
+}
+
+static void decode_mesh_gpu(
+		Vulkan::Device &dev,
+		std::vector<uint32_t> &out_index_buffer, std::vector<uint32_t> &out_u32_stream,
+		const MeshView &mesh)
+{
+	decode_mesh_setup_buffers(out_index_buffer, out_u32_stream, mesh);
+
+	Vulkan::BufferCreateInfo buf_info = {};
+	buf_info.domain = Vulkan::BufferDomain::LinkedDeviceHost;
+	buf_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
+	buf_info.size = mesh.format_header->payload_size_words * sizeof(uint32_t);
+	auto payload_buffer = dev.create_buffer(buf_info, mesh.payload);
+
+	buf_info.size = out_index_buffer.size() * sizeof(uint32_t);
+	buf_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
+	buf_info.domain = Vulkan::BufferDomain::CachedHost;
+	auto readback_decoded_index_buffer = dev.create_buffer(buf_info);
+
+	buf_info.size = out_u32_stream.size() * sizeof(uint32_t);
+	buf_info.domain = Vulkan::BufferDomain::CachedHost;
+	auto readback_decoded_u32_buffer = dev.create_buffer(buf_info);
+
+	bool has_renderdoc = Vulkan::Device::init_renderdoc_capture();
+	if (has_renderdoc)
+		dev.begin_renderdoc_capture();
+
+	auto cmd = dev.request_command_buffer();
+
+	DecodeInfo info = {};
+	info.ibo = readback_decoded_index_buffer.get();
+	info.streams[0] = readback_decoded_u32_buffer.get();
+	info.target_style = mesh.format_header->style;
+	info.payload = payload_buffer.get();
+	info.flags = DECODE_MODE_RAW_PAYLOAD;
+
+	decode_mesh(*cmd, info, mesh);
+	cmd->barrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
+	             VK_PIPELINE_STAGE_HOST_BIT, VK_ACCESS_HOST_READ_BIT);
+	dev.submit(cmd);
+	dev.wait_idle();
+
+	if (has_renderdoc)
+		dev.end_renderdoc_capture();
+
+	memcpy(out_index_buffer.data(),
+	       dev.map_host_buffer(*readback_decoded_index_buffer, Vulkan::MEMORY_ACCESS_READ_BIT),
+	       out_index_buffer.size() * sizeof(uint32_t));
+
+	memcpy(out_u32_stream.data(),
+	       dev.map_host_buffer(*readback_decoded_u32_buffer, Vulkan::MEMORY_ACCESS_READ_BIT),
+	       out_u32_stream.size() * sizeof(uint32_t));
+}
+
+static bool validate_mesh_decode(const std::vector<uint32_t> &decoded_index_buffer,
+                                 const std::vector<uint32_t> &decoded_u32_stream,
+                                 const std::vector<uint32_t> &reference_index_buffer,
+                                 const std::vector<uint32_t> &reference_u32_stream, unsigned u32_stride)
+{
+	std::vector<uint32_t> decoded_output;
+	std::vector<uint32_t> reference_output;
+
+	if (decoded_index_buffer.size() != reference_index_buffer.size())
+		return false;
+
+	size_t count = decoded_index_buffer.size();
+
+	decoded_output.reserve(count * u32_stride);
+	reference_output.reserve(count * u32_stride);
+	for (size_t i = 0; i < count; i++)
+	{
+		uint32_t decoded_index = decoded_index_buffer[i];
+		decoded_output.insert(decoded_output.end(),
+		                      decoded_u32_stream.data() + decoded_index * u32_stride,
+		                      decoded_u32_stream.data() + (decoded_index + 1) * u32_stride);
+
+		uint32_t reference_index = reference_index_buffer[i];
+		reference_output.insert(reference_output.end(),
+		                        reference_u32_stream.data() + reference_index * u32_stride,
+		                        reference_u32_stream.data() + (reference_index + 1) * u32_stride);
+	}
+
+	for (size_t i = 0; i < count; i++)
+	{
+		for (unsigned j = 0; j < u32_stride; j++)
+		{
+			uint32_t decoded_value = decoded_output[i * u32_stride + j];
+			uint32_t reference_value = reference_output[i * u32_stride + j];
+			if (decoded_value != reference_value)
+			{
+				LOGI("Error in index %zu (prim %zu), word %u, expected %x, got %x.\n",
+				     i, i / 3, j, reference_value, decoded_value);
+				return false;
+			}
+		}
+	}
+
+	return true;
+}
+
+int main(int argc, char *argv[])
+{
+	if (argc != 2)
+		return EXIT_FAILURE;
+
+	Global::init(Global::MANAGER_FEATURE_FILESYSTEM_BIT);
+	Filesystem::setup_default_filesystem(GRANITE_FILESYSTEM(), ASSET_DIRECTORY);
+
+	GLTF::Parser parser(argv[1]);
+
+	Vulkan::Context ctx;
+	Vulkan::Device dev;
+	if (!Vulkan::Context::init_loader(nullptr))
+		return EXIT_FAILURE;
+
+	Vulkan::Context::SystemHandles handles;
+	handles.filesystem = GRANITE_FILESYSTEM();
+	ctx.set_system_handles(handles);
+	if (!ctx.init_instance_and_device(nullptr, 0, nullptr, 0))
+		return EXIT_FAILURE;
+	dev.set_context(ctx);
+	dev.init_frame_contexts(4);
+
+	auto mesh = parser.get_meshes().front();
+
+	if (!Meshlet::export_mesh_to_meshlet("export.msh1",
+	                                     mesh, MeshStyle::Textured))
+	{
+		return EXIT_FAILURE;
+	}
+
+	auto file = GRANITE_FILESYSTEM()->open("export.msh1", FileMode::ReadOnly);
+	if (!file)
+		return EXIT_FAILURE;
+
+	auto mapped = file->map();
+	if (!mapped)
+		return EXIT_FAILURE;
+
+	auto view = create_mesh_view(*mapped);
+
+	std::vector<uint32_t> reference_index_buffer;
+	std::vector<uint32_t> reference_attributes;
+	std::vector<uint32_t> gpu_index_buffer;
+	std::vector<uint32_t> gpu_attributes;
+
+	decode_mesh(reference_index_buffer, reference_attributes, view);
+	decode_mesh_gpu(dev, gpu_index_buffer, gpu_attributes, view);
+
+	if (!validate_mesh_decode(gpu_index_buffer, gpu_attributes,
+	                          reference_index_buffer, reference_attributes,
+	                          view.format_header->u32_stream_count - 1))
+	{
+		return EXIT_FAILURE;
+	}
+
+	{
+		LOGI("Total primitives: %u\n", view.total_primitives);
+		LOGI("Total vertices: %u\n", view.total_vertices);
+		LOGI("Payload size: %llu bytes.\n", static_cast<unsigned long long>(view.format_header->payload_size_words * sizeof(uint32_t)));
+
+		unsigned long long uncompressed_mesh_size =
+				view.total_primitives * sizeof(uint32_t) * 3 +
+				view.total_vertices * (view.format_header->u32_stream_count - 1) * sizeof(uint32_t);
+		unsigned long long uncompressed_payload_size =
+				view.total_primitives * sizeof(uint32_t) +
+				view.total_vertices * (view.format_header->u32_stream_count - 1) * sizeof(uint32_t);
+		LOGI("Uncompressed mesh size: %llu bytes.\n", uncompressed_mesh_size);
+		LOGI("Uncompressed payload size: %llu bytes.\n", uncompressed_payload_size);
+	}
+
+	{
+		file = GRANITE_FILESYSTEM()->open("export.bin", FileMode::WriteOnly);
+		mapped = file->map_write((reference_index_buffer.size() + reference_attributes.size()) * sizeof(uint32_t));
+		auto *ptr = mapped->mutable_data<uint32_t>();
+		memcpy(ptr, reference_index_buffer.data(), reference_index_buffer.size() * sizeof(uint32_t));
+		memcpy(ptr + reference_index_buffer.size(), reference_attributes.data(), reference_attributes.size() * sizeof(uint32_t));
+	}
+
+	return 0;
+}
\ No newline at end of file
diff --git a/tests/ui_sandbox.cpp b/tests/ui_sandbox.cpp
index c2d7b4440..faddfb44a 100644
--- a/tests/ui_sandbox.cpp
+++ b/tests/ui_sandbox.cpp
@@ -50,8 +50,8 @@ UIApplication::UIApplication()
 	window->show_title_bar(false);
 	window->set_floating(false);
 	window->set_background_color(vec4(0.0f, 1.0f, 0.0f, 1.0f));
-	window->set_background_image(GRANITE_ASSET_MANAGER()->register_image_resource(
-			*GRANITE_FILESYSTEM(), "builtin://textures/checkerboard.png", ImageClass::Color));
+	window->set_background_image(GRANITE_ASSET_MANAGER()->register_asset(
+			*GRANITE_FILESYSTEM(), "builtin://textures/checkerboard.png", AssetClass::ImageColor));
 
 	auto button = make_handle<UI::ClickButton>();
 	window->add_child(button);
@@ -90,8 +90,8 @@ UIApplication::UIApplication()
 		slider->show_value(false);
 		slider->set_margin(5.0f);
 		slider->show_tooltip(true);
-		slider->set_background_image(GRANITE_ASSET_MANAGER()->register_image_resource(
-				*GRANITE_FILESYSTEM(), "builtin://textures/checkerboard.png", ImageClass::Color));
+		slider->set_background_image(GRANITE_ASSET_MANAGER()->register_asset(
+				*GRANITE_FILESYSTEM(), "builtin://textures/checkerboard.png", AssetClass::ImageColor));
 		slider->set_background_color(vec4(1.0f));
 	}
 
@@ -111,8 +111,8 @@ UIApplication::UIApplication()
 		sli.show_value(false);
 		sli.set_margin(5.0f);
 		sli.show_tooltip(true);
-		sli.set_background_image(GRANITE_ASSET_MANAGER()->register_image_resource(
-				*GRANITE_FILESYSTEM(), "builtin://textures/checkerboard.png", ImageClass::Color));
+		sli.set_background_image(GRANITE_ASSET_MANAGER()->register_asset(
+				*GRANITE_FILESYSTEM(), "builtin://textures/checkerboard.png", AssetClass::ImageColor));
 		sli.set_background_color(vec4(1.0f));
 	}
 
@@ -126,8 +126,8 @@ UIApplication::UIApplication()
 		btn.set_text("Mjuu");
 		btn.set_toggled_font_color(vec4(0.0f, 1.0f, 0.0f, 1.0f));
 		btn.set_untoggled_font_color(vec4(1.0f, 0.0f, 0.0f, 1.0f));
-		btn.set_background_image(GRANITE_ASSET_MANAGER()->register_image_resource(
-				*GRANITE_FILESYSTEM(), "builtin://textures/checkerboard.png", ImageClass::Color));
+		btn.set_background_image(GRANITE_ASSET_MANAGER()->register_asset(
+				*GRANITE_FILESYSTEM(), "builtin://textures/checkerboard.png", AssetClass::ImageColor));
 		btn.set_background_color(vec4(1.0f));
 	}
 }
diff --git a/third_party/meshoptimizer b/third_party/meshoptimizer
index 5baa38ef5..eb385d698 160000
--- a/third_party/meshoptimizer
+++ b/third_party/meshoptimizer
@@ -1 +1 @@
-Subproject commit 5baa38ef5cd288c6a4d1b3a69f8a168943d593cd
+Subproject commit eb385d6987d12f33a4e0284cf2ba6660c9272602
diff --git a/tools/aa_bench.cpp b/tools/aa_bench.cpp
index 21b73cc33..77df0433e 100644
--- a/tools/aa_bench.cpp
+++ b/tools/aa_bench.cpp
@@ -27,7 +27,7 @@ class AABenchApplication : public Application, public EventHandler
 	void on_swapchain_changed(const SwapchainParameterEvent &e);
 	void on_swapchain_destroyed(const SwapchainParameterEvent &e);
 
-	ImageAssetID images[2] = {};
+	AssetID images[2] = {};
 	RenderGraph graph;
 	TemporalJitter jitter;
 	RenderContext render_context;
@@ -39,8 +39,12 @@ AABenchApplication::AABenchApplication(const std::string &input0, const std::str
 	: input_path0(input0), input_path1(input1), scale(scale_)
 {
 	type = string_to_post_antialiasing_type(method);
-	images[0] = input_path0.empty() ? ImageAssetID{} : GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), input_path0, ImageClass::Color);
-	images[1] = input_path1.empty() ? ImageAssetID{} : GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), input_path1, ImageClass::Color);
+	images[0] = input_path0.empty() ? AssetID{} : GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(),
+	                                                                                      input_path0,
+	                                                                                      AssetClass::ImageColor);
+	images[1] = input_path1.empty() ? AssetID{} : GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(),
+	                                                                                      input_path1,
+	                                                                                      AssetClass::ImageColor);
 	EVENT_MANAGER_REGISTER_LATCH(AABenchApplication, on_swapchain_changed, on_swapchain_destroyed, SwapchainParameterEvent);
 	EVENT_MANAGER_REGISTER_LATCH(AABenchApplication, on_device_created, on_device_destroyed, DeviceCreatedEvent);
 }
diff --git a/tools/convert_cube_to_environment.cpp b/tools/convert_cube_to_environment.cpp
index d1659cc8f..857ae3ac5 100644
--- a/tools/convert_cube_to_environment.cpp
+++ b/tools/convert_cube_to_environment.cpp
@@ -81,7 +81,7 @@ int main(int argc, char *argv[])
 	device.set_context(context);
 	device.init_external_swapchain({ ImageHandle(nullptr) });
 
-	auto cube = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), args.cube, ImageClass::Color);
+	auto cube = GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(), args.cube, AssetClass::ImageColor);
 	auto *view = device.get_resource_manager().get_image_view_blocking(cube);
 	auto specular = convert_cube_to_ibl_specular(device, *view);
 	auto diffuse = convert_cube_to_ibl_diffuse(device, *view);
diff --git a/tools/convert_equirect_to_environment.cpp b/tools/convert_equirect_to_environment.cpp
index 7b6252d37..488561597 100644
--- a/tools/convert_equirect_to_environment.cpp
+++ b/tools/convert_equirect_to_environment.cpp
@@ -86,7 +86,8 @@ int main(int argc, char *argv[])
 	device.init_external_swapchain({ ImageHandle(nullptr) });
 
 	auto &textures = device.get_resource_manager();
-	auto equirect = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(), args.equirect, ImageClass::Color);
+	auto equirect = GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(), args.equirect,
+	                                                        AssetClass::ImageColor);
 	auto *view = textures.get_image_view_blocking(equirect);
 
 	auto cube = convert_equirect_to_cube(device, *view, args.cube_scale);
diff --git a/tools/texture_viewer.cpp b/tools/texture_viewer.cpp
index 6cd40d83e..617049184 100644
--- a/tools/texture_viewer.cpp
+++ b/tools/texture_viewer.cpp
@@ -37,8 +37,8 @@ struct TextureViewerApplication : Granite::Application, Granite::EventHandler
 	TextureViewerApplication(std::string path_)
 	    : path(std::move(path_))
 	{
-		texture = GRANITE_ASSET_MANAGER()->register_image_resource(*GRANITE_FILESYSTEM(),
-		                                                           path, ImageClass::Color);
+		texture = GRANITE_ASSET_MANAGER()->register_asset(*GRANITE_FILESYSTEM(),
+		                                                  path, AssetClass::ImageColor);
 		EVENT_MANAGER_REGISTER(TextureViewerApplication, on_key_pressed, KeyboardEvent);
 	}
 
@@ -138,7 +138,7 @@ struct TextureViewerApplication : Granite::Application, Granite::EventHandler
 	unsigned layer = 0;
 	unsigned level = 0;
 
-	ImageAssetID texture;
+	AssetID texture;
 	std::string path;
 	VkComponentMapping swiz = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A };
 };
diff --git a/ui/image_widget.cpp b/ui/image_widget.cpp
index 687bae071..67ca4a751 100644
--- a/ui/image_widget.cpp
+++ b/ui/image_widget.cpp
@@ -33,9 +33,9 @@ namespace UI
 {
 Image::Image(const std::string &path, vec2 target)
 {
-	texture = GRANITE_ASSET_MANAGER()->register_image_resource(
+	texture = GRANITE_ASSET_MANAGER()->register_asset(
 			*GRANITE_FILESYSTEM(), path,
-			ImageClass::Color);
+			AssetClass::ImageColor);
 
 	geometry.minimum = target;
 	geometry.target = target;
diff --git a/ui/image_widget.hpp b/ui/image_widget.hpp
index 39575f316..70f2ecd95 100644
--- a/ui/image_widget.hpp
+++ b/ui/image_widget.hpp
@@ -45,7 +45,7 @@ class Image : public Widget
 private:
 	float render(FlatRenderer &renderer, float layout, vec2 offset, vec2 size) override;
 	void reconfigure_to_canvas(vec2 offset, vec2 size) override;
-	ImageAssetID texture;
+	AssetID texture;
 	Vulkan::StockSampler sampler = Vulkan::StockSampler::LinearClamp;
 
 	vec2 sprite_offset;
diff --git a/ui/widget.hpp b/ui/widget.hpp
index e13a7b3e9..5ede61344 100644
--- a/ui/widget.hpp
+++ b/ui/widget.hpp
@@ -129,7 +129,7 @@ class Widget : public Util::IntrusivePtrEnabled<Widget>
 		needs_redraw = true;
 	}
 
-	void set_background_image(ImageAssetID texture)
+	void set_background_image(AssetID texture)
 	{
 		bg_image = texture;
 		needs_redraw = true;
@@ -181,7 +181,7 @@ class Widget : public Util::IntrusivePtrEnabled<Widget>
 
 	vec2 floating_position = vec2(0.0f);
 	vec4 bg_color = vec4(1.0f, 1.0f, 1.0f, 0.0f);
-	ImageAssetID bg_image;
+	AssetID bg_image;
 	bool needs_redraw = true;
 	bool floating = false;
 
diff --git a/util/arena_allocator.hpp b/util/arena_allocator.hpp
index 7a923240d..68c2378e0 100644
--- a/util/arena_allocator.hpp
+++ b/util/arena_allocator.hpp
@@ -98,6 +98,13 @@ struct AllocationArena
 	uint32_t heap_availability_mask = 0;
 };
 
+struct SuballocationResult
+{
+	uint32_t offset;
+	uint32_t size;
+	uint32_t mask;
+};
+
 template <typename DerivedAllocator, typename BackingAllocation>
 class ArenaAllocator
 {
@@ -131,11 +138,16 @@ class ArenaAllocator
 		return sub_block_size * Util::LegionAllocator::NumSubBlocks;
 	}
 
-	inline uint32_t get_block_alignment() const
+	inline uint32_t get_sub_block_size() const
 	{
 		return sub_block_size;
 	}
 
+	inline uint32_t get_block_alignment() const
+	{
+		return get_sub_block_size();
+	}
+
 	inline bool allocate(uint32_t size, BackingAllocation *alloc)
 	{
 		unsigned num_blocks = (size + sub_block_size - 1) >> sub_block_size_log2;
@@ -149,7 +161,7 @@ class ArenaAllocator
 			assert(index >= (num_blocks - 1));
 
 			auto &heap = *itr;
-			static_cast<DerivedAllocator *>(this)->prepare_allocation(alloc, heap, suballocate(num_blocks, heap));
+			static_cast<DerivedAllocator *>(this)->prepare_allocation(alloc, itr, suballocate(num_blocks, heap));
 
 			unsigned new_index = heap.heap.get_longest_run() - 1;
 
@@ -168,7 +180,6 @@ class ArenaAllocator
 					heap_arena.heap_availability_mask &= ~(1u << index);
 			}
 
-			alloc->heap = itr;
 			return true;
 		}
 
@@ -186,9 +197,8 @@ class ArenaAllocator
 		}
 
 		// This cannot fail.
-		static_cast<DerivedAllocator *>(this)->prepare_allocation(alloc, heap, suballocate(num_blocks, heap));
+		static_cast<DerivedAllocator *>(this)->prepare_allocation(alloc, node, suballocate(num_blocks, heap));
 
-		alloc->heap = node;
 		if (heap.heap.full())
 		{
 			heap_arena.full_heaps.insert_front(node);
@@ -254,13 +264,6 @@ class ArenaAllocator
 	uint32_t sub_block_size = 1;
 	uint32_t sub_block_size_log2 = 0;
 
-	struct SuballocationResult
-	{
-		uint32_t offset;
-		uint32_t size;
-		uint32_t mask;
-	};
-
 private:
 	inline SuballocationResult suballocate(uint32_t num_blocks, MiniHeap &heap)
 	{
diff --git a/video/ffmpeg_hw_device.cpp b/video/ffmpeg_hw_device.cpp
index d952cfb8f..86131f2de 100644
--- a/video/ffmpeg_hw_device.cpp
+++ b/video/ffmpeg_hw_device.cpp
@@ -21,6 +21,8 @@
  */
 
 #define __STDC_LIMIT_MACROS 1
+#define __STDC_CONSTANT_MACROS 1
+
 #include "ffmpeg_hw_device.hpp"
 #include "logging.hpp"
 #include "device.hpp"
diff --git a/vulkan/CMakeLists.txt b/vulkan/CMakeLists.txt
index d25606bdb..328221cac 100644
--- a/vulkan/CMakeLists.txt
+++ b/vulkan/CMakeLists.txt
@@ -55,6 +55,7 @@ if (GRANITE_VULKAN_SYSTEM_HANDLES)
 
     target_sources(granite-vulkan PRIVATE
             texture/memory_mapped_texture.cpp texture/memory_mapped_texture.hpp
+            mesh/meshlet.hpp mesh/meshlet.cpp
             texture/texture_files.cpp texture/texture_files.hpp
             texture/texture_decoder.cpp texture/texture_decoder.hpp)
 
@@ -64,7 +65,8 @@ if (GRANITE_VULKAN_SYSTEM_HANDLES)
 
     target_include_directories(granite-vulkan PUBLIC
             ${CMAKE_CURRENT_SOURCE_DIR}/managers
-            ${CMAKE_CURRENT_SOURCE_DIR}/texture)
+            ${CMAKE_CURRENT_SOURCE_DIR}/texture
+            ${CMAKE_CURRENT_SOURCE_DIR}/mesh)
 
     if (GRANITE_VULKAN_SHADER_MANAGER_RUNTIME_COMPILER)
         target_compile_definitions(granite-vulkan PUBLIC GRANITE_VULKAN_SHADER_MANAGER_RUNTIME_COMPILER=1)
diff --git a/vulkan/context.cpp b/vulkan/context.cpp
index 272a8a38c..ccb0170c1 100644
--- a/vulkan/context.cpp
+++ b/vulkan/context.cpp
@@ -1304,6 +1304,7 @@ bool Context::create_device(VkPhysicalDevice gpu_, VkSurfaceKHR surface,
 	ext.pageable_device_local_memory_features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PAGEABLE_DEVICE_LOCAL_MEMORY_FEATURES_EXT };
 	ext.mesh_shader_features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_EXT };
 	ext.shader_subgroup_extended_types_features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES };
+	ext.index_type_uint8_features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT };
 
 	ext.compute_shader_derivative_features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV };
 	ext.device_generated_commands_features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_FEATURES_NV };
@@ -1501,6 +1502,13 @@ bool Context::create_device(VkPhysicalDevice gpu_, VkSurfaceKHR surface,
 		ppNext = &ext.shader_subgroup_extended_types_features.pNext;
 	}
 
+	if (has_extension(VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME))
+	{
+		enabled_extensions.push_back(VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME);
+		*ppNext = &ext.index_type_uint8_features;
+		ppNext = &ext.index_type_uint8_features.pNext;
+	}
+
 	if ((flags & CONTEXT_CREATION_ENABLE_ADVANCED_WSI_BIT) != 0 && requires_swapchain)
 	{
 		bool broken_present_wait = ext.driver_properties.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY &&
@@ -1594,6 +1602,8 @@ bool Context::create_device(VkPhysicalDevice gpu_, VkSurfaceKHR surface,
 			enabled_features.shaderStorageImageWriteWithoutFormat = VK_TRUE;
 		if (pdf2.features.shaderStorageImageReadWithoutFormat)
 			enabled_features.shaderStorageImageReadWithoutFormat = VK_TRUE;
+		if (pdf2.features.multiDrawIndirect)
+			enabled_features.multiDrawIndirect = VK_TRUE;
 
 		if (pdf2.features.shaderSampledImageArrayDynamicIndexing)
 			enabled_features.shaderSampledImageArrayDynamicIndexing = VK_TRUE;
diff --git a/vulkan/context.hpp b/vulkan/context.hpp
index df4e2b932..ab9db29ad 100644
--- a/vulkan/context.hpp
+++ b/vulkan/context.hpp
@@ -104,6 +104,7 @@ struct DeviceFeatures
 	VkPhysicalDeviceBufferDeviceAddressFeaturesKHR buffer_device_address_features = {};
 	VkPhysicalDeviceIDProperties id_properties = {};
 	VkPhysicalDeviceShaderSubgroupExtendedTypesFeaturesKHR shader_subgroup_extended_types_features = {};
+	VkPhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8_features = {};
 
 	// EXT
 	VkPhysicalDeviceExternalMemoryHostPropertiesEXT host_memory_properties = {};
diff --git a/vulkan/managers/resource_manager.cpp b/vulkan/managers/resource_manager.cpp
index 073d66ee8..febf6eb0e 100644
--- a/vulkan/managers/resource_manager.cpp
+++ b/vulkan/managers/resource_manager.cpp
@@ -28,12 +28,32 @@
 #include "texture_decoder.hpp"
 #include "string_helpers.hpp"
 #include "thread_group.hpp"
+#include "meshlet.hpp"
 
 namespace Vulkan
 {
 ResourceManager::ResourceManager(Device *device_)
 	: device(device_)
+	, index_buffer_allocator(*device_, 256)
+	, attribute_buffer_allocator(*device_, 256)
+	, indirect_buffer_allocator(*device_, 1)
+	, mesh_header_allocator(*device_, 1)
+	, mesh_stream_allocator(*device_, 8)
+	, mesh_payload_allocator(*device_, 128)
 {
+	// Simplified style.
+	index_buffer_allocator.set_element_size(0, 3); // 8-bit indices.
+	attribute_buffer_allocator.set_soa_count(3);
+	attribute_buffer_allocator.set_element_size(0, sizeof(float) * 3);
+	attribute_buffer_allocator.set_element_size(1, sizeof(float) * 2 + sizeof(uint32_t) * 2);
+	attribute_buffer_allocator.set_element_size(2, sizeof(uint32_t) * 2);
+	indirect_buffer_allocator.set_element_size(0, sizeof(VkDrawIndexedIndirectCommand));
+
+	mesh_header_allocator.set_element_size(0, sizeof(Meshlet::RuntimeHeader));
+	mesh_stream_allocator.set_element_size(0, sizeof(Meshlet::Stream));
+	mesh_payload_allocator.set_element_size(0, sizeof(uint32_t));
+
+	assets.reserve(Granite::AssetID::MaxIDs);
 }
 
 ResourceManager::~ResourceManager()
@@ -41,34 +61,61 @@ ResourceManager::~ResourceManager()
 	// Also works as a teardown mechanism to make sure there are no async threads in flight.
 	if (manager)
 		manager->set_asset_instantiator_interface(nullptr);
+
+	// Ensure resource releases go through.
+	latch_handles();
 }
 
 void ResourceManager::set_id_bounds(uint32_t bound)
 {
-	textures.resize(bound);
-	views.resize(bound);
+	// We must avoid reallocation here to avoid a ton of extra silly locking.
+	VK_ASSERT(bound <= Granite::AssetID::MaxIDs);
+	assets.resize(bound);
 }
 
-void ResourceManager::set_image_class(Granite::ImageAssetID id, Granite::ImageClass image_class)
+void ResourceManager::set_asset_class(Granite::AssetID id, Granite::AssetClass asset_class)
 {
 	if (id)
 	{
-		textures[id.id].image_class = image_class;
-		if (!views[id.id])
-			views[id.id] = &get_fallback_image(image_class)->get_view();
+		assets[id.id].asset_class = asset_class;
+		if (asset_class != Granite::AssetClass::Mesh)
+		{
+			std::unique_lock<std::mutex> holder{lock};
+			views.resize(assets.size());
+
+			if (!views[id.id])
+				views[id.id] = &get_fallback_image(asset_class)->get_view();
+		}
 	}
 }
 
-void ResourceManager::release_image_resource(Granite::ImageAssetID id)
+void ResourceManager::release_asset(Granite::AssetID id)
 {
 	if (id)
-		textures[id.id].image.reset();
+	{
+		std::unique_lock<std::mutex> holder{lock};
+		VK_ASSERT(id.id < assets.size());
+		auto &asset = assets[id.id];
+		asset.latchable = false;
+		updates.push_back(id);
+	}
 }
 
-uint64_t ResourceManager::estimate_cost_image_resource(Granite::ImageAssetID, Granite::File &file)
+uint64_t ResourceManager::estimate_cost_asset(Granite::AssetID id, Granite::File &file)
 {
-	// TODO: When we get compressed BC/ASTC, this will have to change.
-	return file.get_size();
+	if (assets[id.id].asset_class == Granite::AssetClass::Mesh)
+	{
+		// Compression factor of 2x is reasonable to assume.
+		if (mesh_encoding == MeshEncoding::VBOAndIBOMDI)
+			return file.get_size() * 2;
+		else
+			return file.get_size();
+	}
+	else
+	{
+		// TODO: When we get compressed BC/ASTC, this will have to change.
+		return file.get_size();
+	}
 }
 
 void ResourceManager::init()
@@ -103,7 +150,7 @@ void ResourceManager::init()
 		HeapBudget budget[VK_MAX_MEMORY_HEAPS] = {};
 		device->get_memory_budget(budget);
 
-		// Try to set aside 50% of budgetable VRAM for the texture manager. Seems reasonable.
+		// Try to set aside 50% of budgetable VRAM for the resource manager. Seems reasonable.
 		VkDeviceSize size = 0;
 		for (uint32_t i = 0; i < device->get_memory_properties().memoryHeapCount; i++)
 			if ((device->get_memory_properties().memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0)
@@ -116,14 +163,22 @@ void ResourceManager::init()
 		}
 
 		LOGI("Using texture budget of %u MiB.\n", unsigned(size / (1024 * 1024)));
-		manager->set_image_budget(size);
+		manager->set_asset_budget(size);
 
 		// This is somewhat arbitrary.
-		manager->set_image_budget_per_iteration(2 * 1000 * 1000);
+		manager->set_asset_budget_per_iteration(2 * 1000 * 1000);
+	}
+
+	if (device->get_device_features().mesh_shader_features.taskShader &&
+	    device->get_device_features().mesh_shader_features.meshShader &&
+	    device->supports_subgroup_size_log2(true, 5, 5, VK_SHADER_STAGE_MESH_BIT_EXT))
+	{
+		mesh_encoding = MeshEncoding::Meshlet;
+		LOGI("Opting in to meshlet path.\n");
 	}
 }
 
-ImageHandle ResourceManager::create_gtx(const MemoryMappedTexture &mapped_file, Granite::ImageAssetID id)
+ImageHandle ResourceManager::create_gtx(const MemoryMappedTexture &mapped_file, Granite::AssetID id)
 {
 	if (mapped_file.empty())
 		return {};
@@ -133,7 +188,7 @@ ImageHandle ResourceManager::create_gtx(const MemoryMappedTexture &mapped_file,
 	VkComponentMapping swizzle = {};
 	mapped_file.remap_swizzle(swizzle);
 
-	Vulkan::ImageHandle image;
+	ImageHandle image;
 	if (!device->image_format_is_supported(layout.get_format(), VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) &&
 	    format_compression_type(layout.get_format()) != FormatCompressionType::Uncompressed)
 	{
@@ -189,13 +244,13 @@ ImageHandle ResourceManager::create_gtx(const MemoryMappedTexture &mapped_file,
 
 	if (image)
 	{
-		auto name = Util::join("ImageAssetID-", id.id);
+		auto name = Util::join("AssetID-", id.id);
 		device->set_name(*image, name.c_str());
 	}
 	return image;
 }
 
-ImageHandle ResourceManager::create_gtx(Granite::FileMappingHandle mapping, Granite::ImageAssetID id)
+ImageHandle ResourceManager::create_gtx(Granite::FileMappingHandle mapping, Granite::AssetID id)
 {
 	MemoryMappedTexture mapped_file;
 	if (!mapped_file.map_read(std::move(mapping)))
@@ -207,27 +262,29 @@ ImageHandle ResourceManager::create_gtx(Granite::FileMappingHandle mapping, Gran
 	return create_gtx(mapped_file, id);
 }
 
-ImageHandle ResourceManager::create_other(const Granite::FileMapping &mapping, Granite::ImageClass image_class,
-                                          Granite::ImageAssetID id)
+ImageHandle ResourceManager::create_other(const Granite::FileMapping &mapping, Granite::AssetClass asset_class,
+                                          Granite::AssetID id)
 {
 	auto tex = load_texture_from_memory(mapping.data(),
-	                                    mapping.get_size(), image_class == Granite::ImageClass::Color ?
+	                                    mapping.get_size(), asset_class == Granite::AssetClass::ImageColor ?
 	                                                        ColorSpace::sRGB : ColorSpace::Linear);
 	return create_gtx(tex, id);
 }
 
-const Vulkan::ImageView *ResourceManager::get_image_view_blocking(Granite::ImageAssetID id)
+const ImageView *ResourceManager::get_image_view_blocking(Granite::AssetID id)
 {
 	std::unique_lock<std::mutex> holder{lock};
 
-	if (id.id >= textures.size())
+	if (id.id >= assets.size())
 	{
 		LOGE("ID %u is out of bounds.\n", id.id);
 		return nullptr;
 	}
 
-	if (textures[id.id].image)
-		return &textures[id.id].image->get_view();
+	auto &asset = assets[id.id];
+
+	if (asset.image)
+		return &asset.image->get_view();
 
 	if (!manager->iterate_blocking(*device->get_system_handles().thread_group, id))
 	{
@@ -235,32 +292,233 @@ const Vulkan::ImageView *ResourceManager::get_image_view_blocking(Granite::Image
 		return nullptr;
 	}
 
-	cond.wait(holder, [this, id]() -> bool {
-		return bool(textures[id.id].image);
+	cond.wait(holder, [&asset]() -> bool {
+		return bool(asset.latchable);
 	});
 
-	return &textures[id.id].image->get_view();
+	return &asset.image->get_view();
 }
 
-void ResourceManager::instantiate_image_resource(Granite::AssetManager &manager_, Granite::TaskGroup *task,
-                                                 Granite::ImageAssetID id, Granite::File &file)
+void ResourceManager::instantiate_asset(Granite::AssetManager &manager_, Granite::TaskGroup *task,
+                                        Granite::AssetID id, Granite::File &file)
 {
 	if (task)
 	{
 		task->enqueue_task([this, &manager_, &file, id]() {
-			instantiate_image_resource(manager_, id, file);
+			instantiate_asset(manager_, id, file);
 		});
 	}
 	else
 	{
-		instantiate_image_resource(manager_, id, file);
+		instantiate_asset(manager_, id, file);
+	}
+}
+
+void ResourceManager::instantiate_asset(Granite::AssetManager &manager_,
+                                        Granite::AssetID id,
+                                        Granite::File &file)
+{
+	auto &asset = assets[id.id];
+	if (asset.asset_class == Granite::AssetClass::Mesh)
+		instantiate_asset_mesh(manager_, id, file);
+	else
+		instantiate_asset_image(manager_, id, file);
+}
+
+bool ResourceManager::allocate_asset_mesh(Granite::AssetID id, const Meshlet::MeshView &view)
+{
+	if (!view.format_header)
+		return false;
+
+	std::lock_guard<std::mutex> holder{mesh_allocator_lock};
+	auto &asset = assets[id.id];
+
+	if (mesh_encoding == MeshEncoding::VBOAndIBOMDI)
+	{
+		if (!index_buffer_allocator.allocate(view.total_primitives, &asset.mesh.index_or_payload))
+			return false;
+
+		if (!attribute_buffer_allocator.allocate(view.total_vertices, &asset.mesh.attr_or_stream))
+		{
+			index_buffer_allocator.free(asset.mesh.index_or_payload);
+			asset.mesh.index_or_payload = {};
+			return false;
+		}
+
+		if (!indirect_buffer_allocator.allocate(view.format_header->meshlet_count, &asset.mesh.indirect_or_header))
+		{
+			index_buffer_allocator.free(asset.mesh.index_or_payload);
+			attribute_buffer_allocator.free(asset.mesh.attr_or_stream);
+			asset.mesh.index_or_payload = {};
+			asset.mesh.attr_or_stream = {};
+			return false;
+		}
+	}
+	else
+	{
+		if (!mesh_header_allocator.allocate(view.format_header->meshlet_count, &asset.mesh.indirect_or_header))
+			return false;
+
+		if (!mesh_stream_allocator.allocate(view.format_header->meshlet_count * view.format_header->u32_stream_count,
+		                                    &asset.mesh.attr_or_stream))
+		{
+			mesh_header_allocator.free(asset.mesh.indirect_or_header);
+			asset.mesh.indirect_or_header = {};
+			return false;
+		}
+
+		if (!mesh_payload_allocator.allocate(view.format_header->payload_size_words, &asset.mesh.index_or_payload))
+		{
+			mesh_header_allocator.free(asset.mesh.indirect_or_header);
+			mesh_stream_allocator.free(asset.mesh.attr_or_stream);
+			asset.mesh.indirect_or_header = {};
+			asset.mesh.attr_or_stream = {};
+			return false;
+		}
 	}
+
+	asset.mesh.draw = { asset.mesh.indirect_or_header.offset, view.format_header->meshlet_count };
+	return true;
 }
 
-void ResourceManager::instantiate_image_resource(Granite::AssetManager &manager_,
-                                                 Granite::ImageAssetID id,
-                                                 Granite::File &file)
+void ResourceManager::instantiate_asset_mesh(Granite::AssetManager &manager_,
+                                             Granite::AssetID id,
+                                             Granite::File &file)
 {
+	Granite::FileMappingHandle mapping;
+	if (file.get_size())
+		mapping = file.map();
+
+	Meshlet::MeshView view = {};
+	if (mapping)
+		view = Meshlet::create_mesh_view(*mapping);
+	bool ret = allocate_asset_mesh(id, view);
+
+	// Decode the meshlet. Later, we'll have to do a lot of device specific stuff here to select optimal
+	// processing:
+	// - Native meshlets
+	// - Encoded attribute
+	// - Decoded attributes
+	// - Optimize for multi-draw-indirect or not? (8-bit indices).
+
+	auto &asset = assets[id.id];
+
+	if (ret)
+	{
+		if (mesh_encoding == MeshEncoding::Meshlet)
+		{
+			auto cmd = device->request_command_buffer(CommandBuffer::Type::AsyncTransfer);
+
+			void *payload_data = cmd->update_buffer(*mesh_payload_allocator.get_buffer(0, 0),
+			                                        asset.mesh.index_or_payload.offset * sizeof(uint32_t),
+			                                        view.format_header->payload_size_words * sizeof(uint32_t));
+			memcpy(payload_data, view.payload, view.format_header->payload_size_words * sizeof(uint32_t));
+
+			auto *headers = static_cast<Meshlet::RuntimeHeader *>(
+					cmd->update_buffer(*mesh_header_allocator.get_buffer(0, 0),
+					                   asset.mesh.indirect_or_header.offset * sizeof(Meshlet::RuntimeHeader),
+					                   view.format_header->meshlet_count * sizeof(Meshlet::RuntimeHeader)));
+
+			for (uint32_t i = 0, n = view.format_header->meshlet_count; i < n; i++)
+			{
+				headers[i].stream_offset = asset.mesh.attr_or_stream.offset + i * view.format_header->u32_stream_count;
+				headers[i].num_attributes = view.headers[i].num_attributes_minus_1 + 1;
+				headers[i].num_primitives = view.headers[i].num_primitives_minus_1 + 1;
+			}
+
+			auto *streams = static_cast<Meshlet::Stream *>(
+					cmd->update_buffer(*mesh_stream_allocator.get_buffer(0, 0),
+					                   asset.mesh.attr_or_stream.offset * sizeof(Meshlet::Stream),
+					                   view.format_header->meshlet_count * view.format_header->u32_stream_count *
+					                   sizeof(Meshlet::Stream)));
+
+			for (uint32_t i = 0, n = view.format_header->meshlet_count * view.format_header->u32_stream_count; i < n; i++)
+			{
+				auto in_stream = view.streams[i];
+				in_stream.offset_from_base_u32 += asset.mesh.index_or_payload.offset;
+				streams[i] = in_stream;
+			}
+
+			Semaphore sem[2];
+			device->submit(cmd, nullptr, 2, sem);
+			device->add_wait_semaphore(CommandBuffer::Type::Generic, std::move(sem[0]),
+			                           VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT |
+			                           VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, false);
+			device->add_wait_semaphore(CommandBuffer::Type::AsyncGraphics, std::move(sem[1]),
+			                           VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT |
+			                           VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, false);
+		}
+		else
+		{
+			auto cmd = device->request_command_buffer(CommandBuffer::Type::AsyncCompute);
+
+			BufferCreateInfo buf = {};
+			buf.domain = BufferDomain::Host;
+			buf.size = view.format_header->payload_size_words * sizeof(uint32_t);
+			buf.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
+			auto payload = device->create_buffer(buf, view.payload);
+
+			Meshlet::DecodeInfo info = {};
+			info.target_style = Meshlet::MeshStyle::Textured;
+			info.ibo = index_buffer_allocator.get_buffer(0, 0);
+
+			for (unsigned i = 0; i < 3; i++)
+				info.streams[i] = attribute_buffer_allocator.get_buffer(0, i);
+
+			info.payload = payload.get();
+			info.indirect = indirect_buffer_allocator.get_buffer(0, 0);
+
+			info.push.meshlet_offset = asset.mesh.indirect_or_header.offset;
+			info.push.primitive_offset = asset.mesh.index_or_payload.offset;
+			info.push.vertex_offset = asset.mesh.attr_or_stream.offset;
+
+			Meshlet::decode_mesh(*cmd, info, view);
+
+			Semaphore sem[2];
+			device->submit(cmd, nullptr, 2, sem);
+			device->add_wait_semaphore(CommandBuffer::Type::Generic, std::move(sem[0]),
+			                           VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT |
+			                           VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT, false);
+			device->add_wait_semaphore(CommandBuffer::Type::AsyncGraphics, std::move(sem[1]),
+			                           VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT |
+			                           VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT, false);
+		}
+	}
+
+	uint64_t cost = 0;
+	if (ret)
+	{
+		if (mesh_encoding == MeshEncoding::Meshlet)
+		{
+			cost += view.format_header->payload_size_words * mesh_payload_allocator.get_element_size(0);
+			cost += view.format_header->meshlet_count * mesh_header_allocator.get_element_size(0);
+			cost += view.format_header->meshlet_count * view.format_header->u32_stream_count * mesh_stream_allocator.get_element_size(0);
+		}
+		else
+		{
+			cost += view.total_primitives * index_buffer_allocator.get_element_size(0);
+			cost += view.total_vertices * attribute_buffer_allocator.get_element_size(0);
+			cost += view.total_vertices * attribute_buffer_allocator.get_element_size(1);
+			cost += view.total_vertices * attribute_buffer_allocator.get_element_size(2);
+			cost += view.format_header->meshlet_count * indirect_buffer_allocator.get_element_size(0);
+		}
+
+		asset.mesh.draw.style = view.format_header->style;
+	}
+
+	std::lock_guard<std::mutex> holder{lock};
+	updates.push_back(id);
+	manager_.update_cost(id, ret ? cost : 0);
+	asset.latchable = true;
+	cond.notify_all();
+}
+
+void ResourceManager::instantiate_asset_image(Granite::AssetManager &manager_,
+                                              Granite::AssetID id,
+                                              Granite::File &file)
+{
+	auto &asset = assets[id.id];
+
 	ImageHandle image;
 	if (file.get_size())
 	{
@@ -270,36 +528,36 @@ void ResourceManager::instantiate_image_resource(Granite::AssetManager &manager_
 			if (MemoryMappedTexture::is_header(mapping->data(), mapping->get_size()))
 				image = create_gtx(std::move(mapping), id);
 			else
-				image = create_other(*mapping, textures[id.id].image_class, id);
+				image = create_other(*mapping, asset.asset_class, id);
 		}
 		else
 			LOGE("Failed to map file.\n");
 	}
 
-	manager_.update_cost(id, image ? image->get_allocation().get_size() : 0);
-
 	// Have to signal something.
 	if (!image)
-		image = get_fallback_image(textures[id.id].image_class);
+		image = get_fallback_image(asset.asset_class);
 
 	std::lock_guard<std::mutex> holder{lock};
 	updates.push_back(id);
-	textures[id.id].image = std::move(image);
+	asset.image = std::move(image);
+	asset.latchable = true;
+	manager_.update_cost(id, asset.image ? asset.image->get_allocation().get_size() : 0);
 	cond.notify_all();
 }
 
-const ImageHandle &ResourceManager::get_fallback_image(Granite::ImageClass image_class)
+const ImageHandle &ResourceManager::get_fallback_image(Granite::AssetClass asset_class)
 {
-	switch (image_class)
+	switch (asset_class)
 	{
 	default:
-	case Granite::ImageClass::Zeroable:
+	case Granite::AssetClass::ImageZeroable:
 		return fallback_zero;
-	case Granite::ImageClass::Color:
+	case Granite::AssetClass::ImageColor:
 		return fallback_color;
-	case Granite::ImageClass::Normal:
+	case Granite::AssetClass::ImageNormal:
 		return fallback_normal;
-	case Granite::ImageClass::MetallicRoughness:
+	case Granite::AssetClass::ImageMetallicRoughness:
 		return fallback_pbr;
 	}
 }
@@ -307,25 +565,265 @@ const ImageHandle &ResourceManager::get_fallback_image(Granite::ImageClass image
 void ResourceManager::latch_handles()
 {
 	std::lock_guard<std::mutex> holder{lock};
+
+	views.resize(assets.size());
+	draws.resize(assets.size());
+
 	for (auto &update : updates)
 	{
 		if (update.id >= views.size())
 			continue;
+		auto &asset = assets[update.id];
 
-		const ImageView *view;
-
-		if (textures[update.id].image)
+		if (asset.asset_class == Granite::AssetClass::Mesh)
 		{
-			view = &textures[update.id].image->get_view();
+			if (!asset.latchable)
+			{
+				{
+					std::lock_guard<std::mutex> holder_alloc{mesh_allocator_lock};
+					if (mesh_encoding == MeshEncoding::Meshlet)
+					{
+						mesh_payload_allocator.free(asset.mesh.index_or_payload);
+						mesh_stream_allocator.free(asset.mesh.attr_or_stream);
+						mesh_header_allocator.free(asset.mesh.indirect_or_header);
+					}
+					else
+					{
+						index_buffer_allocator.free(asset.mesh.index_or_payload);
+						attribute_buffer_allocator.free(asset.mesh.attr_or_stream);
+						indirect_buffer_allocator.free(asset.mesh.indirect_or_header);
+					}
+				}
+				asset.mesh = {};
+			}
+
+			draws[update.id] = asset.mesh.draw;
 		}
 		else
 		{
-			auto &img = get_fallback_image(textures[update.id].image_class);
-			view = &img->get_view();
-		}
+			const ImageView *view;
+			if (!asset.latchable)
+				asset.image.reset();
+
+			if (asset.image)
+			{
+				view = &asset.image->get_view();
+			}
+			else
+			{
+				auto &img = get_fallback_image(asset.asset_class);
+				view = &img->get_view();
+			}
 
-		views[update.id] = view;
+			views[update.id] = view;
+		}
 	}
 	updates.clear();
 }
+
+const Buffer *ResourceManager::get_index_buffer() const
+{
+	return index_buffer_allocator.get_buffer(0, 0);
+}
+
+const Buffer *ResourceManager::get_position_buffer() const
+{
+	return attribute_buffer_allocator.get_buffer(0, 0);
+}
+
+const Buffer *ResourceManager::get_attribute_buffer() const
+{
+	return attribute_buffer_allocator.get_buffer(0, 1);
+}
+
+const Buffer *ResourceManager::get_skinning_buffer() const
+{
+	return attribute_buffer_allocator.get_buffer(0, 2);
+}
+
+const Buffer *ResourceManager::get_indirect_buffer() const
+{
+	return indirect_buffer_allocator.get_buffer(0, 0);
+}
+
+const Buffer *ResourceManager::get_meshlet_payload_buffer() const
+{
+	return mesh_payload_allocator.get_buffer(0, 0);
+}
+
+const Buffer *ResourceManager::get_meshlet_header_buffer() const
+{
+	return mesh_header_allocator.get_buffer(0, 0);
+}
+
+const Buffer *ResourceManager::get_meshlet_stream_header_buffer() const
+{
+	return mesh_stream_allocator.get_buffer(0, 0);
+}
+
+MeshBufferAllocator::MeshBufferAllocator(Device &device, uint32_t sub_block_size)
+	: global_allocator(device)
+{
+	for (int i = 0; i < SliceAllocatorCount - 1; i++)
+		allocators[i].parent = &allocators[i + 1];
+	allocators[SliceAllocatorCount - 1].global_allocator = &global_allocator;
+
+	// Basic unit of a meshlet is 256 prims / attributes.
+	// Maximum element count = 32M prims.
+	allocators[0].set_sub_block_size(sub_block_size);
+	for (int i = 1; i < SliceAllocatorCount; i++)
+		allocators[i].set_sub_block_size(allocators[i - 1].get_sub_block_size() * (Util::LegionAllocator::NumSubBlocks / 2));
+
+	for (auto &alloc : allocators)
+		alloc.set_object_pool(&object_pool);
+}
+
+void MeshBufferAllocator::set_soa_count(unsigned soa_count)
+{
+	VK_ASSERT(soa_count <= Internal::MeshGlobalAllocator::MaxSoACount);
+	global_allocator.soa_count = soa_count;
+}
+
+void MeshBufferAllocator::set_element_size(unsigned soa_index, uint32_t element_size)
+{
+	VK_ASSERT(soa_index < global_allocator.soa_count);
+	global_allocator.element_size[soa_index] = element_size;
+}
+
+uint32_t MeshBufferAllocator::get_element_size(unsigned soa_index) const
+{
+	VK_ASSERT(soa_index < global_allocator.soa_count);
+	return global_allocator.element_size[soa_index];
+}
+
+const Buffer *MeshBufferAllocator::get_buffer(unsigned index, unsigned soa_index) const
+{
+	VK_ASSERT(soa_index < global_allocator.soa_count);
+	index = index * global_allocator.soa_count + soa_index;
+
+	if (index < global_allocator.global_buffers.size())
+		return global_allocator.global_buffers[index].get();
+	else
+		return nullptr;
+}
+
+namespace Internal
+{
+uint32_t MeshGlobalAllocator::allocate(uint32_t count)
+{
+	BufferCreateInfo info = {};
+
+	uint32_t target_index = UINT32_MAX;
+	uint32_t search_index = 0;
+
+	for (uint32_t i = 0, n = global_buffers.size(); i < n; i += soa_count, search_index++)
+	{
+		if (!global_buffers[i])
+		{
+			target_index = search_index;
+			break;
+		}
+	}
+
+	if (target_index == UINT32_MAX)
+	{
+		if (!global_buffers.empty())
+			return UINT32_MAX;
+
+		target_index = search_index;
+		for (uint32_t i = 0; i < soa_count; i++)
+			global_buffers.emplace_back();
+	}
+
+	for (uint32_t soa_index = 0; soa_index < soa_count; soa_index++)
+	{
+		info.size = VkDeviceSize(count) * element_size[soa_index];
+		info.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
+		             VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+		             VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
+		             VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
+		info.domain = BufferDomain::Device;
+		global_buffers[target_index * soa_count + soa_index] = device.create_buffer(info);
+	}
+
+	return target_index;
+}
+
+void MeshGlobalAllocator::free(uint32_t index)
+{
+	index *= soa_count;
+	VK_ASSERT(index < global_buffers.size());
+	for (uint32_t i = 0; i < soa_count; i++)
+		global_buffers[index + i].reset();
+}
+
+MeshGlobalAllocator::MeshGlobalAllocator(Device &device_)
+	: device(device_)
+{}
+
+bool SliceAllocator::allocate_backing_heap(AllocatedSlice *allocation)
+{
+	uint32_t count = sub_block_size * Util::LegionAllocator::NumSubBlocks;
+
+	if (parent)
+	{
+		return parent->allocate(count, allocation);
+	}
+	else if (global_allocator)
+	{
+		uint32_t index = global_allocator->allocate(count);
+		if (index == UINT32_MAX)
+			return false;
+
+		*allocation = {};
+		allocation->count = count;
+		allocation->buffer_index = index;
+		return true;
+	}
+	else
+	{
+		return false;
+	}
+}
+
+void SliceAllocator::free_backing_heap(AllocatedSlice *allocation) const
+{
+	if (parent)
+		parent->free(allocation->heap, allocation->mask);
+	else if (global_allocator)
+		global_allocator->free(allocation->buffer_index);
+}
+
+void SliceAllocator::prepare_allocation(AllocatedSlice *allocation, Util::IntrusiveList<MiniHeap>::Iterator heap,
+                                        const Util::SuballocationResult &suballoc)
+{
+	allocation->buffer_index = heap->allocation.buffer_index;
+	allocation->offset = heap->allocation.offset + suballoc.offset;
+	allocation->count = suballoc.size;
+	allocation->mask = suballoc.mask;
+	allocation->heap = heap;
+	allocation->alloc = this;
+}
+}
+
+bool MeshBufferAllocator::allocate(uint32_t count, Internal::AllocatedSlice *slice)
+{
+	for (auto &alloc : allocators)
+	{
+		uint32_t max_alloc_size = alloc.get_max_allocation_size();
+		if (count <= max_alloc_size)
+			return alloc.allocate(count, slice);
+	}
+
+	LOGE("Allocation of %u elements is too large for MeshBufferAllocator.\n", count);
+	return false;
+}
+
+void MeshBufferAllocator::free(const Internal::AllocatedSlice &slice)
+{
+	if (slice.alloc)
+		slice.alloc->free(slice.heap, slice.mask);
+	else
+		global_allocator.free(slice.buffer_index);
+}
 }
diff --git a/vulkan/managers/resource_manager.hpp b/vulkan/managers/resource_manager.hpp
index 5d1a7dbf7..a700cf3de 100644
--- a/vulkan/managers/resource_manager.hpp
+++ b/vulkan/managers/resource_manager.hpp
@@ -23,7 +23,11 @@
 #pragma once
 
 #include "image.hpp"
+#include "buffer.hpp"
 #include "asset_manager.hpp"
+#include "meshlet.hpp"
+#include "arena_allocator.hpp"
+#include "small_vector.hpp"
 #include <mutex>
 #include <condition_variable>
 
@@ -31,14 +35,80 @@ namespace Vulkan
 {
 class MemoryMappedTexture;
 
-class ResourceManager : private Granite::AssetInstantiatorInterface
+namespace Internal
+{
+struct SliceAllocator;
+struct AllocatedSlice
+{
+	uint32_t buffer_index = 0;
+	uint32_t offset = 0;
+	uint32_t count = 0;
+	uint32_t mask = 0;
+
+	SliceAllocator *alloc = nullptr;
+	Util::IntrusiveList<Util::LegionHeap<AllocatedSlice>>::Iterator heap = {};
+};
+
+struct MeshGlobalAllocator
+{
+	explicit MeshGlobalAllocator(Device &device);
+	uint32_t allocate(uint32_t count);
+	void free(uint32_t index);
+
+	enum { MaxSoACount = 3 }; // Position, attribute, skinning.
+
+	Device &device;
+	uint32_t element_size[MaxSoACount] = {};
+	uint32_t soa_count = 1;
+	Util::SmallVector<BufferHandle> global_buffers;
+};
+
+struct SliceAllocator : Util::ArenaAllocator<SliceAllocator, AllocatedSlice>
+{
+	SliceAllocator *parent = nullptr;
+	MeshGlobalAllocator *global_allocator = nullptr;
+
+	// Implements curious recurring template pattern calls.
+	bool allocate_backing_heap(AllocatedSlice *allocation);
+	void free_backing_heap(AllocatedSlice *allocation) const;
+	void prepare_allocation(AllocatedSlice *allocation, Util::IntrusiveList<MiniHeap>::Iterator heap,
+	                        const Util::SuballocationResult &suballoc);
+};
+}
+
+class MeshBufferAllocator
+{
+public:
+	MeshBufferAllocator(Device &device, uint32_t sub_block_size);
+	bool allocate(uint32_t count, Internal::AllocatedSlice *slice);
+	void free(const Internal::AllocatedSlice &slice);
+	void set_soa_count(unsigned soa_count);
+	void set_element_size(unsigned soa_index, uint32_t element_size);
+	uint32_t get_element_size(unsigned soa_index) const;
+
+	const Buffer *get_buffer(unsigned index, unsigned soa_index) const;
+
+private:
+	Util::ObjectPool<Util::LegionHeap<Internal::AllocatedSlice>> object_pool;
+	Internal::MeshGlobalAllocator global_allocator;
+	enum { SliceAllocatorCount = 4 };
+	Internal::SliceAllocator allocators[SliceAllocatorCount];
+};
+
+class ResourceManager final : private Granite::AssetInstantiatorInterface
 {
 public:
 	explicit ResourceManager(Device *device);
-	~ResourceManager();
+	~ResourceManager() override;
 	void init();
 
-	inline const Vulkan::ImageView *get_image_view(Granite::ImageAssetID id) const
+	enum class MeshEncoding
+	{
+		Meshlet,
+		VBOAndIBOMDI,
+	};
+
+	inline const Vulkan::ImageView *get_image_view(Granite::AssetID id) const
 	{
 		if (id.id < views.size())
 			return views[id.id];
@@ -46,43 +116,93 @@ class ResourceManager : private Granite::AssetInstantiatorInterface
 			return nullptr;
 	}
 
-	const Vulkan::ImageView *get_image_view_blocking(Granite::ImageAssetID id);
+	const Vulkan::ImageView *get_image_view_blocking(Granite::AssetID id);
+
+	struct DrawRange
+	{
+		uint32_t offset = 0;
+		uint32_t count = 0;
+		Meshlet::MeshStyle style = Meshlet::MeshStyle::Wireframe;
+	};
+
+	inline DrawRange get_mesh_draw_range(Granite::AssetID id) const
+	{
+		if (id.id < draws.size())
+			return draws[id.id];
+		else
+			return {};
+	}
+
+	inline MeshEncoding get_mesh_encoding() const
+	{
+		return mesh_encoding;
+	}
+
+	const Buffer *get_index_buffer() const;
+	const Buffer *get_position_buffer() const;
+	const Buffer *get_attribute_buffer() const;
+	const Buffer *get_skinning_buffer() const;
+	const Buffer *get_indirect_buffer() const;
+
+	const Buffer *get_meshlet_payload_buffer() const;
+	const Buffer *get_meshlet_header_buffer() const;
+	const Buffer *get_meshlet_stream_header_buffer() const;
 
 private:
 	Device *device;
 	Granite::AssetManager *manager = nullptr;
 
 	void latch_handles() override;
-	uint64_t estimate_cost_image_resource(Granite::ImageAssetID id, Granite::File &file) override;
-	void instantiate_image_resource(Granite::AssetManager &manager, Granite::TaskGroup *task,
-	                                Granite::ImageAssetID id, Granite::File &file) override;
-	void release_image_resource(Granite::ImageAssetID id) override;
+	uint64_t estimate_cost_asset(Granite::AssetID id, Granite::File &file) override;
+	void instantiate_asset(Granite::AssetManager &manager, Granite::TaskGroup *task,
+	                       Granite::AssetID id, Granite::File &file) override;
+	void release_asset(Granite::AssetID id) override;
 	void set_id_bounds(uint32_t bound) override;
-	void set_image_class(Granite::ImageAssetID id, Granite::ImageClass image_class) override;
+	void set_asset_class(Granite::AssetID id, Granite::AssetClass asset_class) override;
 
-	struct Texture
+	struct Asset
 	{
 		ImageHandle image;
-		Granite::ImageClass image_class = Granite::ImageClass::Zeroable;
+		struct
+		{
+			Internal::AllocatedSlice index_or_payload, attr_or_stream, indirect_or_header;
+			DrawRange draw;
+		} mesh;
+		Granite::AssetClass asset_class = Granite::AssetClass::ImageZeroable;
+		bool latchable = false;
 	};
 
 	std::mutex lock;
 	std::condition_variable cond;
 
-	std::vector<Texture> textures;
+	std::vector<Asset> assets;
 	std::vector<const ImageView *> views;
-	std::vector<Granite::ImageAssetID> updates;
+	std::vector<DrawRange> draws;
+	std::vector<Granite::AssetID> updates;
 
 	ImageHandle fallback_color;
 	ImageHandle fallback_normal;
 	ImageHandle fallback_zero;
 	ImageHandle fallback_pbr;
 
-	ImageHandle create_gtx(Granite::FileMappingHandle mapping, Granite::ImageAssetID id);
-	ImageHandle create_gtx(const MemoryMappedTexture &mapping, Granite::ImageAssetID id);
-	ImageHandle create_other(const Granite::FileMapping &mapping, Granite::ImageClass image_class, Granite::ImageAssetID id);
-	const ImageHandle &get_fallback_image(Granite::ImageClass image_class);
+	ImageHandle create_gtx(Granite::FileMappingHandle mapping, Granite::AssetID id);
+	ImageHandle create_gtx(const MemoryMappedTexture &mapping, Granite::AssetID id);
+	ImageHandle create_other(const Granite::FileMapping &mapping, Granite::AssetClass asset_class, Granite::AssetID id);
+	const ImageHandle &get_fallback_image(Granite::AssetClass asset_class);
+
+	void instantiate_asset(Granite::AssetManager &manager, Granite::AssetID id, Granite::File &file);
+	void instantiate_asset_image(Granite::AssetManager &manager, Granite::AssetID id, Granite::File &file);
+	void instantiate_asset_mesh(Granite::AssetManager &manager, Granite::AssetID id, Granite::File &file);
+
+	std::mutex mesh_allocator_lock;
+	MeshBufferAllocator index_buffer_allocator;
+	MeshBufferAllocator attribute_buffer_allocator;
+	MeshBufferAllocator indirect_buffer_allocator;
+	MeshBufferAllocator mesh_header_allocator;
+	MeshBufferAllocator mesh_stream_allocator;
+	MeshBufferAllocator mesh_payload_allocator;
+	MeshEncoding mesh_encoding = MeshEncoding::VBOAndIBOMDI;
 
-	void instantiate_image_resource(Granite::AssetManager &manager, Granite::ImageAssetID id, Granite::File &file);
+	bool allocate_asset_mesh(Granite::AssetID id, const Meshlet::MeshView &view);
 };
 }
diff --git a/vulkan/managers/shader_manager.cpp b/vulkan/managers/shader_manager.cpp
index 395686f24..809569da3 100644
--- a/vulkan/managers/shader_manager.cpp
+++ b/vulkan/managers/shader_manager.cpp
@@ -385,19 +385,21 @@ Vulkan::Program *ShaderProgramVariant::get_program_graphics()
 	auto *frag = stages[Util::ecast(Vulkan::ShaderStage::Fragment)];
 
 #ifdef GRANITE_SHIPPING
-	if (mesh)
+	if (mesh && frag)
 	{
 		ret = device->request_program(task ? task->resolve(*device) : nullptr,
 		                              mesh->resolve(*device),
 		                              frag->resolve(*device),
 		                              sampler_bank.get());
 	}
-	else
+	else if (vert && frag)
 	{
 		ret = device->request_program(vert->resolve(*device),
 		                              frag->resolve(*device),
 		                              sampler_bank.get());
 	}
+	else
+		return nullptr;
 #else
 	auto &vert_instance = shader_instance[Util::ecast(Vulkan::ShaderStage::Vertex)];
 	auto &frag_instance = shader_instance[Util::ecast(Vulkan::ShaderStage::Fragment)];
@@ -413,7 +415,7 @@ Vulkan::Program *ShaderProgramVariant::get_program_graphics()
 	// we can safely read program directly.
 	// comp->instance will only ever be incremented in the main thread on an inotify, so this is fine.
 	// If comp->instance changes in the interim, we are at least guaranteed to read a sensible value for program.
-	if (mesh)
+	if (mesh && frag)
 	{
 		if ((!task || (loaded_task_instance == task->instance)) &&
 		    loaded_mesh_instance == mesh->instance &&
@@ -422,11 +424,13 @@ Vulkan::Program *ShaderProgramVariant::get_program_graphics()
 			return program.load(std::memory_order_relaxed);
 		}
 	}
-	else
+	else if (vert && frag)
 	{
 		if (loaded_vert_instance == vert->instance && loaded_frag_instance == frag->instance)
 			return program.load(std::memory_order_relaxed);
 	}
+	else
+		return nullptr;
 
 	instance_lock.lock_write();
 
diff --git a/vulkan/memory_allocator.cpp b/vulkan/memory_allocator.cpp
index f938a4bf5..bc1568639 100644
--- a/vulkan/memory_allocator.cpp
+++ b/vulkan/memory_allocator.cpp
@@ -123,8 +123,11 @@ void DeviceAllocation::free_global(DeviceAllocator &allocator, uint32_t size_, u
 	}
 }
 
-void ClassAllocator::prepare_allocation(DeviceAllocation *alloc, MiniHeap &heap, const SuballocationResult &suballoc)
+void ClassAllocator::prepare_allocation(DeviceAllocation *alloc, Util::IntrusiveList<MiniHeap>::Iterator heap_itr,
+                                        const Util::SuballocationResult &suballoc)
 {
+	auto &heap = *heap_itr;
+	alloc->heap = heap_itr;
 	alloc->base = heap.allocation.base;
 	alloc->offset = suballoc.offset + heap.allocation.offset;
 	alloc->mask = suballoc.mask;
diff --git a/vulkan/memory_allocator.hpp b/vulkan/memory_allocator.hpp
index b5525617f..38b1f094a 100644
--- a/vulkan/memory_allocator.hpp
+++ b/vulkan/memory_allocator.hpp
@@ -196,7 +196,8 @@ class ClassAllocator : public Util::ArenaAllocator<ClassAllocator, DeviceAllocat
 	// Implements curious recurring template pattern calls.
 	bool allocate_backing_heap(DeviceAllocation *allocation);
 	void free_backing_heap(DeviceAllocation *allocation);
-	void prepare_allocation(DeviceAllocation *allocation, MiniHeap &heap, const SuballocationResult &suballoc);
+	void prepare_allocation(DeviceAllocation *allocation, Util::IntrusiveList<MiniHeap>::Iterator heap_itr,
+	                        const Util::SuballocationResult &suballoc);
 };
 
 class Allocator
diff --git a/vulkan/mesh/meshlet.cpp b/vulkan/mesh/meshlet.cpp
new file mode 100644
index 000000000..381635c8b
--- /dev/null
+++ b/vulkan/mesh/meshlet.cpp
@@ -0,0 +1,243 @@
+/* Copyright (c) 2017-2023 Hans-Kristian Arntzen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "meshlet.hpp"
+#include "command_buffer.hpp"
+#include "buffer.hpp"
+#include "device.hpp"
+#include "filesystem.hpp"
+
+namespace Vulkan
+{
+namespace Meshlet
+{
+MeshView create_mesh_view(const Granite::FileMapping &mapping)
+{
+	MeshView view = {};
+
+	if (mapping.get_size() < sizeof(magic) + sizeof(FormatHeader))
+	{
+		LOGE("MESHLET1 file too small.\n");
+		return view;
+	}
+
+	auto *ptr = mapping.data<unsigned char>();
+	auto *end_ptr = ptr + mapping.get_size();
+
+	if (memcmp(ptr, magic, sizeof(magic)) != 0)
+	{
+		LOGE("Invalid MESHLET1 magic.\n");
+		return {};
+	}
+
+	ptr += sizeof(magic);
+
+	view.format_header = reinterpret_cast<const FormatHeader *>(ptr);
+	ptr += sizeof(*view.format_header);
+
+	if (end_ptr - ptr < ptrdiff_t(view.format_header->meshlet_count * sizeof(Header)))
+		return {};
+	view.headers = reinterpret_cast<const Header *>(ptr);
+	ptr += view.format_header->meshlet_count * sizeof(Header);
+
+	if (end_ptr - ptr < ptrdiff_t(view.format_header->meshlet_count * sizeof(Bound)))
+		return {};
+	view.bounds = reinterpret_cast<const Bound *>(ptr);
+	ptr += view.format_header->meshlet_count * sizeof(Bound);
+
+	if (end_ptr - ptr < ptrdiff_t(view.format_header->meshlet_count * view.format_header->u32_stream_count * sizeof(Stream)))
+		return {};
+	view.streams = reinterpret_cast<const Stream *>(ptr);
+	ptr += view.format_header->meshlet_count * view.format_header->u32_stream_count * sizeof(Stream);
+
+	if (!view.format_header->payload_size_words)
+		return {};
+
+	if (end_ptr - ptr < ptrdiff_t(view.format_header->payload_size_words * sizeof(uint32_t)))
+		return {};
+	view.payload = reinterpret_cast<const uint32_t *>(ptr);
+
+	for (uint32_t i = 0, n = view.format_header->meshlet_count; i < n; i++)
+	{
+		view.total_primitives += view.headers[i].num_primitives_minus_1 + 1;
+		view.total_vertices += view.headers[i].num_attributes_minus_1 + 1;
+	}
+
+	return view;
+}
+
+bool decode_mesh(CommandBuffer &cmd, const DecodeInfo &info, const MeshView &view)
+{
+	// TODO: Implement LDS fallback.
+	if (!cmd.get_device().supports_subgroup_size_log2(true, 5, 5))
+	{
+		LOGE("Device does not support Wave32.\n");
+		return false;
+	}
+
+	if (!info.streams[0])
+	{
+		LOGE("Decode stream 0 must be set.\n");
+		return false;
+	}
+
+	if (!info.ibo)
+	{
+		LOGE("Output IBO must be set.\n");
+		return false;
+	}
+
+	cmd.push_constants(&info.push, 0, sizeof(info.push));
+
+	BufferCreateInfo buf_info = {};
+	buf_info.domain = BufferDomain::LinkedDeviceHost;
+	buf_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
+
+	buf_info.size = view.format_header->meshlet_count * sizeof(*view.headers);
+	auto meshlet_meta_buffer = cmd.get_device().create_buffer(buf_info, view.headers);
+
+	buf_info.size = view.format_header->meshlet_count * view.format_header->u32_stream_count * sizeof(*view.streams);
+	auto meshlet_stream_buffer = cmd.get_device().create_buffer(buf_info, view.streams);
+
+	// For Raw mode -> offset/stride
+	// For typed mode -> index offset / vertex offset
+	struct DecodeOffset { uint32_t arg0, arg1; };
+	std::vector<DecodeOffset> decode_offsets;
+
+	cmd.set_program("builtin://shaders/decode/meshlet_decode.comp");
+	cmd.enable_subgroup_size_control(true);
+	cmd.set_subgroup_size_log2(true, 5, 5);
+
+	cmd.set_storage_buffer(0, 0, *meshlet_meta_buffer);
+	cmd.set_storage_buffer(0, 1, *meshlet_stream_buffer);
+	cmd.set_storage_buffer(0, 2, *info.payload);
+	cmd.set_storage_buffer(0, 3, *info.ibo);
+
+	cmd.set_specialization_constant_mask(0x7);
+	cmd.set_specialization_constant(0, view.format_header->u32_stream_count);
+	cmd.set_specialization_constant(2, (info.flags & DECODE_MODE_RAW_PAYLOAD) != 0);
+
+	if ((info.flags & DECODE_MODE_RAW_PAYLOAD) != 0)
+	{
+		uint32_t output_u32_streams;
+		switch (info.target_style)
+		{
+		case MeshStyle::Wireframe:
+			output_u32_streams = 2;
+			break;
+
+		case MeshStyle::Untextured:
+			output_u32_streams = 3;
+			break;
+
+		case MeshStyle::Textured:
+			output_u32_streams = 6;
+			break;
+
+		case MeshStyle::Skinned:
+			output_u32_streams = 8;
+			break;
+
+		default:
+			return false;
+		}
+
+		if (output_u32_streams + 1 > view.format_header->u32_stream_count)
+		{
+			LOGE("Trying to decode more streams than exist in payload.\n");
+			return false;
+		}
+
+		for (unsigned i = 0; i < 3; i++)
+			cmd.set_storage_buffer(0, 4 + i, *info.streams[0]);
+
+		decode_offsets.reserve(view.format_header->meshlet_count * (output_u32_streams + 1));
+		uint32_t index_count = 0;
+
+		for (uint32_t i = 0; i < view.format_header->meshlet_count; i++)
+		{
+			decode_offsets.push_back({ index_count, 0 });
+			index_count += view.headers[i].num_primitives_minus_1 + 1;
+			for (uint32_t j = 0; j < output_u32_streams; j++)
+				decode_offsets.push_back({ view.headers[i].base_vertex_offset * output_u32_streams + j, output_u32_streams });
+		}
+
+		cmd.set_specialization_constant(1, output_u32_streams + 1);
+
+		// Dummy bind for indirect_buffer.
+		cmd.set_storage_buffer(0, 8, *info.streams[0]);
+	}
+	else
+	{
+		for (unsigned i = 0; i < 3; i++)
+			cmd.set_storage_buffer(0, 4 + i, *info.streams[0]);
+
+		switch (info.target_style)
+		{
+		case MeshStyle::Skinned:
+			cmd.set_storage_buffer(0, 6, *info.streams[2]);
+			// Fallthrough
+		case MeshStyle::Untextured:
+		case MeshStyle::Textured:
+			cmd.set_storage_buffer(0, 5, *info.streams[1]);
+			// Fallthrough
+		case MeshStyle::Wireframe:
+			cmd.set_storage_buffer(0, 4, *info.streams[0]);
+			break;
+
+		default:
+			return false;
+		}
+
+		decode_offsets.reserve(view.format_header->meshlet_count);
+		uint32_t index_count = 0;
+		for (uint32_t i = 0; i < view.format_header->meshlet_count; i++)
+		{
+			decode_offsets.push_back({ index_count, view.headers[i].base_vertex_offset });
+			index_count += view.headers[i].num_primitives_minus_1 + 1;
+		}
+		cmd.set_specialization_constant(1, uint32_t(info.target_style));
+
+		cmd.set_storage_buffer(0, 8, *info.indirect);
+	}
+
+	buf_info.domain = BufferDomain::LinkedDeviceHost;
+	buf_info.size = decode_offsets.size() * sizeof(DecodeOffset);
+	auto output_offset_strides_buffer = cmd.get_device().create_buffer(buf_info, decode_offsets.data());
+
+	cmd.set_storage_buffer(0, 7, *output_offset_strides_buffer);
+
+	// TODO: Split dispatches for big chungus meshes.
+	// (Starts to become a problem around 8-16 million primitives per dispatch).
+	if (view.format_header->meshlet_count > cmd.get_device().get_gpu_properties().limits.maxComputeWorkGroupCount[0])
+	{
+		LOGW("Exceeding workgroup limit (%u > %u).\n", view.format_header->meshlet_count,
+		     cmd.get_device().get_gpu_properties().limits.maxComputeWorkGroupCount[0]);
+	}
+
+	cmd.dispatch(view.format_header->meshlet_count, 1, 1);
+	cmd.set_specialization_constant_mask(0);
+	cmd.enable_subgroup_size_control(false);
+	return true;
+}
+}
+}
diff --git a/vulkan/mesh/meshlet.hpp b/vulkan/mesh/meshlet.hpp
new file mode 100644
index 000000000..8a9e7c133
--- /dev/null
+++ b/vulkan/mesh/meshlet.hpp
@@ -0,0 +1,142 @@
+/* Copyright (c) 2017-2023 Hans-Kristian Arntzen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+namespace Granite
+{
+class FileMapping;
+}
+
+namespace Vulkan
+{
+class CommandBuffer;
+class Buffer;
+}
+
+namespace Vulkan
+{
+// MESHLET1 format.
+namespace Meshlet
+{
+static constexpr unsigned MaxU32Streams = 16;
+static constexpr unsigned MaxElements = 256;
+static constexpr unsigned MaxPrimitives = MaxElements;
+static constexpr unsigned MaxVertices = MaxElements;
+
+struct Stream
+{
+	uint16_t predictor[4 * 2 + 2];
+	uint32_t offset_from_base_u32;
+	uint16_t bitplane_meta[MaxElements / 32];
+};
+
+struct Header
+{
+	uint32_t base_vertex_offset;
+	uint8_t num_primitives_minus_1;
+	uint8_t num_attributes_minus_1;
+	uint16_t reserved;
+};
+
+// For GPU use
+struct RuntimeHeader
+{
+	uint32_t stream_offset;
+	uint16_t num_primitives;
+	uint16_t num_attributes;
+};
+
+struct Bound
+{
+	float center[3];
+	float radius;
+	int8_t cone_axis_cutoff[4];
+};
+
+enum class StreamType
+{
+	Primitive = 0, // R8G8B8X8_UINT
+	PositionE16, // RGB16_SSCALED * 2^(A16_SINT)
+	NormalOct8, // Octahedron encoding in RG8.
+	TangentOct8, // Octahedron encoding in RG8, sign bit in B8 (if not zero, +1, otherwise -1).
+	UV, // R16G16_SNORM * B16_SSCALED
+	BoneIndices, // RGBA8_UINT
+	BoneWeights, // RGB8_UNORM (sums to 1, A is implied).
+};
+
+enum class MeshStyle : uint32_t
+{
+	Wireframe = 0, // Primitive + Position
+	Untextured, // Wireframe + NormalOct8
+	Textured, // Untextured + TangentOct8 + UV
+	Skinned // Textured + Bone*
+};
+
+struct FormatHeader
+{
+	MeshStyle style;
+	uint32_t u32_stream_count;
+	uint32_t meshlet_count;
+	uint32_t payload_size_words;
+};
+
+struct MeshView
+{
+	const FormatHeader *format_header;
+	const Header *headers;
+	const Bound *bounds;
+	const Stream *streams;
+	const uint32_t *payload;
+	uint32_t total_primitives;
+	uint32_t total_vertices;
+};
+
+static const char magic[8] = { 'M', 'E', 'S', 'H', 'L', 'E', 'T', '1' };
+
+MeshView create_mesh_view(const Granite::FileMapping &mapping);
+
+enum DecodeModeFlagBits : uint32_t
+{
+	DECODE_MODE_RAW_PAYLOAD = 1 << 0,
+};
+using DecodeModeFlags = uint32_t;
+
+struct DecodeInfo
+{
+	const Vulkan::Buffer *ibo, *streams[3], *indirect, *payload;
+	DecodeModeFlags flags;
+	MeshStyle target_style;
+
+	struct
+	{
+		uint32_t primitive_offset;
+		uint32_t vertex_offset;
+		uint32_t meshlet_offset;
+	} push;
+};
+
+bool decode_mesh(Vulkan::CommandBuffer &cmd, const DecodeInfo &decode_info, const MeshView &view);
+}
+}