Skip to content

Commit

Permalink
Merge pull request #121 from Themaister/meshopt-experiments
Browse files Browse the repository at this point in the history
Start moving towards meshlet based rendering
  • Loading branch information
Themaister authored Dec 8, 2023
2 parents f8fda67 + e975a24 commit 597464c
Show file tree
Hide file tree
Showing 56 changed files with 3,458 additions and 336 deletions.
3 changes: 3 additions & 0 deletions application/platforms/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ elseif (${GRANITE_PLATFORM} MATCHES "SDL")
#find_package(SDL3 REQUIRED CONFIG REQUIRED COMPONENTS SDL3-shared)
#target_link_libraries(granite-platform PRIVATE SDL3::SDL3-shared)
target_link_libraries(granite-platform PRIVATE SDL3-static granite-input-sdl)
if (NOT WIN32)
target_link_libraries(granite-platform PRIVATE dl)
endif()
else()
message(FATAL "GRANITE_PLATFORM is not set.")
endif()
Expand Down
11 changes: 2 additions & 9 deletions application/platforms/application_headless.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,19 +248,10 @@ struct WSIPlatformHeadless : Granite::GraniteWSIPlatform
enc_opts.frame_timebase.den = int(frame_rate);

#ifdef HAVE_GRANITE_AUDIO
#if 1
enc_opts.realtime = true;
record_stream.reset(Audio::create_default_audio_record_backend("headless", 44100.0f, 2));
if (record_stream)
encoder.set_audio_record_stream(record_stream.get());
#else
auto *mixer = new Audio::Mixer;
auto *audio_dumper = new Audio::DumpBackend(
mixer, 48000.0f, 2,
unsigned(std::ceil(48000.0f / frame_rate)));
Global::install_audio_system(audio_dumper, mixer);
encoder.set_audio_source(audio_dumper);
#endif
#endif

if (!encoder.init(&app->get_wsi().get_device(), video_encode_path.c_str(), enc_opts))
Expand All @@ -284,7 +275,9 @@ struct WSIPlatformHeadless : Granite::GraniteWSIPlatform
}
#endif

#ifdef HAVE_GRANITE_AUDIO
record_stream->start();
#endif
}
#endif

Expand Down
15 changes: 15 additions & 0 deletions application/platforms/application_sdl3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@
#include <windows.h>
#endif

#ifdef __linux__
#include <dlfcn.h>
#endif

namespace Granite
{
static Key sdl_key_to_granite_key(SDL_Keycode key)
Expand Down Expand Up @@ -104,6 +108,17 @@ struct WSIPlatformSDL : GraniteWSIPlatform
if (options.override_height)
height = options.override_height;

#ifdef __linux__
// RenderDoc doesn't support Wayland, and SDL3 uses Wayland by default.
// Opt in to X11 to avoid having to manually remember to pass down SDL_VIDEO_DRIVER=x11.
void *renderdoc_module = dlopen("librenderdoc.so", RTLD_NOW | RTLD_NOLOAD);
if (renderdoc_module)
{
LOGI("RenderDoc is loaded, disabling Wayland.\n");
setenv("SDL_VIDEO_DRIVER", "x11", 0);
}
#endif

if (SDL_Init(SDL_INIT_EVENTS | SDL_INIT_GAMEPAD | SDL_INIT_VIDEO) < 0)
{
LOGE("Failed to init SDL.\n");
Expand Down
219 changes: 219 additions & 0 deletions assets/shaders/decode/meshlet_decode.comp
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
#version 450

#extension GL_EXT_scalar_block_layout : require
#include "../inc/meshlet_payload_constants.h"

#define MESHLET_PAYLOAD_LARGE_WORKGROUP 1

#if MESHLET_PAYLOAD_LARGE_WORKGROUP
#define MESHLET_PAYLOAD_WG_Y MESHLET_PAYLOAD_NUM_CHUNKS
#else
#define MESHLET_PAYLOAD_WG_Y 1
#endif
layout(local_size_x = 32, local_size_y = MESHLET_PAYLOAD_WG_Y) in;

layout(constant_id = 0) const uint NUM_U32_STREAMS = MESHLET_PAYLOAD_MAX_STREAMS;
layout(constant_id = 1) const uint NUM_OUTPUT_U32_STREAMS = 1;
layout(constant_id = 2) const bool RAW_PAYLOAD = false;
#define MESHLET_PAYLOAD_NUM_U32_STREAMS NUM_U32_STREAMS
#define MESHLET_PAYLOAD_DESCRIPTOR_SET 0
#define MESHLET_PAYLOAD_META_BINDING 0
#define MESHLET_PAYLOAD_STREAM_BINDING 1
#define MESHLET_PAYLOAD_PAYLOAD_BINDING 2
#include "../inc/meshlet_payload_decode.h"
#include "../inc/meshlet_attribute_decode.h"

const int MESH_STYLE = int(NUM_OUTPUT_U32_STREAMS);
const int MESH_STYLE_WIREFRAME = 0;
const int MESH_STYLE_UNTEXTURED = 1;
const int MESH_STYLE_TEXTURED = 2;
const int MESH_STYLE_SKINNED = 3;

layout(set = 0, binding = 3, scalar) writeonly buffer OutputIndices
{
uvec3 data[];
} output_indices32;

layout(set = 0, binding = 3, scalar) writeonly buffer OutputIndices8
{
u8vec3 data[];
} output_indices8;

layout(set = 0, binding = 4, std430) writeonly buffer OutputStream0
{
uint data[];
} output_stream_raw;

layout(set = 0, binding = 4, scalar) writeonly buffer OutputStreamPos
{
vec3 data[];
} output_stream_pos;

struct UntexturedAttr
{
uint normal;
};

layout(set = 0, binding = 5, std430) writeonly buffer OutputStreamUntextured
{
UntexturedAttr data[];
} output_stream_untextured_attr;

struct TexturedAttr
{
uint normal;
uint tangent;
vec2 uv;
};

layout(set = 0, binding = 5, std430) writeonly buffer OutputStreamTextured
{
TexturedAttr data[];
} output_stream_textured_attr;

layout(set = 0, binding = 6, std430) writeonly buffer OutputStreamSkin
{
uvec2 data[];
} output_stream_skin;

layout(set = 0, binding = 7, std430) readonly buffer OutputOffsets
{
uvec2 data[];
} output_offset_strides;

struct IndirectIndexedDraw
{
uint indexCount;
uint instanceCount;
uint firstIndex;
uint vertexOffset;
uint firstInstance;
};

layout(set = 0, binding = 8, std430) writeonly buffer IndirectCommands
{
IndirectIndexedDraw draws[];
} indirect_commands;

layout(push_constant, std430) uniform Registers
{
uint primitive_offset;
uint vertex_offset;
uint meshlet_offset;
} registers;

uint pack_a2bgr10(vec4 v)
{
ivec4 quantized = ivec4(round(clamp(v, vec4(-1.0), vec4(1.0)) * vec4(511.0, 511.0, 511.0, 1.0))) & ivec4(1023, 1023, 1023, 3);
return (quantized.a << 30) | (quantized.b << 20) | (quantized.g << 10) | (quantized.r << 0);
}

void main()
{
uint meshlet_index = gl_WorkGroupID.x;
meshlet_init_workgroup(meshlet_index * NUM_U32_STREAMS);
MeshletMetaRaw meta = meshlet_metas_raw.data[meshlet_index];

if (!RAW_PAYLOAD)
{
IndirectIndexedDraw draw;
draw.indexCount = 3 * (meta.num_primitives_minus_1 + 1);
draw.instanceCount = 1;
draw.vertexOffset = meta.base_vertex_offset + registers.vertex_offset;
draw.firstIndex = 3 * (output_offset_strides.data[meshlet_index].x + registers.primitive_offset);
draw.firstInstance = 0;
indirect_commands.draws[meshlet_index + registers.meshlet_offset] = draw;
}

#define INDEX(linear_index, packed_indices) { \
uint output_offset; \
if (RAW_PAYLOAD) { \
uvec3 indices = uvec4(unpack8(packed_indices)).xyz; \
indices += meta.base_vertex_offset + registers.vertex_offset; \
output_offset = output_offset_strides.data[meshlet_index * NUM_OUTPUT_U32_STREAMS].x; \
output_offset += registers.primitive_offset; \
if (linear_index <= uint(meta.num_primitives_minus_1)) \
output_indices32.data[output_offset + linear_index] = indices; \
} else { \
output_offset = output_offset_strides.data[meshlet_index].x; \
output_offset += registers.primitive_offset; \
if (linear_index <= uint(meta.num_primitives_minus_1)) \
output_indices8.data[output_offset + linear_index] = unpack8(packed_indices).xyz; \
} \
}

{
MESHLET_DECODE_STREAM_32(meshlet_index * NUM_U32_STREAMS, 0, INDEX);
}

if (RAW_PAYLOAD)
{
#define ATTR(linear_index, packed_decoded) { \
uvec2 output_offset_stride0 = output_offset_strides.data[meshlet_index * NUM_OUTPUT_U32_STREAMS + i]; \
output_offset_stride0.x += registers.vertex_offset; \
if (linear_index <= uint(meta.num_attributes_minus_1)) \
output_stream_raw.data[output_offset_stride0.x + linear_index * output_offset_stride0.y] = packed_decoded; \
}

for (uint i = 1; i < NUM_OUTPUT_U32_STREAMS; i++)
{
MESHLET_DECODE_STREAM_32(meshlet_index * NUM_U32_STREAMS, i, ATTR);
}
}
else
{
uint output_offset = output_offset_strides.data[meshlet_index].y;
output_offset += registers.vertex_offset;

#define POS(linear_index, packed_decoded) { \
if (linear_index <= uint(meta.num_attributes_minus_1)) \
output_stream_pos.data[output_offset + linear_index] = attribute_decode_snorm_exp_position(packed_decoded); \
}

#define NORMAL(linear_index, packed_decoded) { \
if (linear_index <= uint(meta.num_attributes_minus_1)) { \
if (MESH_STYLE >= MESH_STYLE_TEXTURED) \
output_stream_textured_attr.data[output_offset + linear_index].normal = pack_a2bgr10(attribute_decode_oct8_normal_tangent(packed_decoded)); \
else \
output_stream_untextured_attr.data[output_offset + linear_index].normal = pack_a2bgr10(attribute_decode_oct8_normal_tangent(packed_decoded)); \
} \
}

#define TANGENT(linear_index, packed_decoded) { \
if (linear_index <= uint(meta.num_attributes_minus_1)) { \
output_stream_textured_attr.data[output_offset + linear_index].tangent = pack_a2bgr10(attribute_decode_oct8_normal_tangent(packed_decoded)); \
} \
}

#define UV(linear_index, packed_decoded) { \
if (linear_index <= uint(meta.num_attributes_minus_1)) { \
output_stream_textured_attr.data[output_offset + linear_index].uv = attribute_decode_snorm_exp_uv(packed_decoded); \
} \
}

#define SKIN(linear_index, packed_decoded) { \
if (linear_index <= uint(meta.num_attributes_minus_1)) { \
output_stream_skin.data[output_offset + linear_index] = packed_decoded; \
} \
}
{
MESHLET_DECODE_STREAM_64(meshlet_index * NUM_U32_STREAMS, 1, POS);
}

if (MESH_STYLE >= MESH_STYLE_UNTEXTURED)
{
MESHLET_DECODE_STREAM_32(meshlet_index * NUM_U32_STREAMS, 3, NORMAL);
}

if (MESH_STYLE >= MESH_STYLE_TEXTURED)
{
MESHLET_DECODE_STREAM_32(meshlet_index * NUM_U32_STREAMS, 4, TANGENT);
MESHLET_DECODE_STREAM_64(meshlet_index * NUM_U32_STREAMS, 5, UV);
}

if (MESH_STYLE >= MESH_STYLE_SKINNED)
{
MESHLET_DECODE_STREAM_64(meshlet_index * NUM_U32_STREAMS, 7, SKIN);
}
}
}
39 changes: 39 additions & 0 deletions assets/shaders/inc/meshlet_attribute_decode.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#ifndef MESHLET_ATTRIBUTE_DECODE_H_
#define MESHLET_ATTRIBUTE_DECODE_H_

vec3 attribute_decode_snorm_exp_position(uvec2 payload)
{
ivec3 sint_value = ivec3(
bitfieldExtract(int(payload.x), 0, 16),
bitfieldExtract(int(payload.x), 16, 16),
bitfieldExtract(int(payload.y), 0, 16));
int exp = bitfieldExtract(int(payload.y), 16, 16);
return vec3(
ldexp(float(sint_value.x), exp),
ldexp(float(sint_value.y), exp),
ldexp(float(sint_value.z), exp));
}

vec2 attribute_decode_snorm_exp_uv(uvec2 payload)
{
ivec2 sint_value = ivec2(
bitfieldExtract(int(payload.x), 0, 16),
bitfieldExtract(int(payload.x), 16, 16));
int exp = bitfieldExtract(int(payload.y), 0, 16);
return vec2(
ldexp(float(sint_value.x), exp),
ldexp(float(sint_value.y), exp)) + 0.5;
}

// Adapted from: https://knarkowicz.wordpress.com/2014/04/16/octahedron-normal-vector-encoding/
// https://twitter.com/Stubbesaurus/status/9379947905532272640
mediump vec4 attribute_decode_oct8_normal_tangent(uint payload)
{
mediump vec4 f = unpackSnorm4x8(payload);
mediump vec3 n = vec3(f.x, f.y, 1.0 - abs(f.x) - abs(f.y));
mediump float t = max(-n.z, 0.0);
n.xy += mix(vec2(t), vec2(-t), greaterThanEqual(n.xy, vec2(0.0)));
return vec4(normalize(n), f.w != 0.0 ? -1.0 : 1.0);
}

#endif
8 changes: 8 additions & 0 deletions assets/shaders/inc/meshlet_payload_constants.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#ifndef MESHLET_PAYLOAD_CONSTANTS_H_
#define MESHLET_PAYLOAD_CONSTANTS_H_

#define MESHLET_PAYLOAD_MAX_ELEMENTS 256
#define MESHLET_PAYLOAD_NUM_CHUNKS 8
#define MESHLET_PAYLOAD_MAX_STREAMS 16

#endif
Loading

0 comments on commit 597464c

Please sign in to comment.