Skip to content

Commit

Permalink
Merge pull request #124 from Themaister/mdi-poc-bringup
Browse files Browse the repository at this point in the history
Sketch out MDI/Meshlet bindless path.
  • Loading branch information
Themaister authored Dec 11, 2023
2 parents c4fc808 + fc4f84d commit 7a4230f
Show file tree
Hide file tree
Showing 15 changed files with 423 additions and 122 deletions.
26 changes: 4 additions & 22 deletions assets/shaders/decode/meshlet_decode.comp
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,8 @@ layout(constant_id = 2) const bool RAW_PAYLOAD = false;

const int MESH_STYLE = int(NUM_OUTPUT_U32_STREAMS);
const int MESH_STYLE_WIREFRAME = 0;
const int MESH_STYLE_UNTEXTURED = 1;
const int MESH_STYLE_TEXTURED = 2;
const int MESH_STYLE_SKINNED = 3;
const int MESH_STYLE_TEXTURED = 1;
const int MESH_STYLE_SKINNED = 2;

layout(set = 0, binding = 3, scalar) writeonly buffer OutputIndices
{
Expand All @@ -49,16 +48,6 @@ layout(set = 0, binding = 4, scalar) writeonly buffer OutputStreamPos
vec3 data[];
} output_stream_pos;

struct UntexturedAttr
{
uint normal;
};

layout(set = 0, binding = 5, std430) writeonly buffer OutputStreamUntextured
{
UntexturedAttr data[];
} output_stream_untextured_attr;

struct TexturedAttr
{
uint normal;
Expand Down Expand Up @@ -172,10 +161,7 @@ void main()

#define NORMAL(linear_index, packed_decoded) { \
if (linear_index <= uint(meta.num_attributes_minus_1)) { \
if (MESH_STYLE >= MESH_STYLE_TEXTURED) \
output_stream_textured_attr.data[output_offset + linear_index].normal = pack_a2bgr10(attribute_decode_oct8_normal_tangent(packed_decoded)); \
else \
output_stream_untextured_attr.data[output_offset + linear_index].normal = pack_a2bgr10(attribute_decode_oct8_normal_tangent(packed_decoded)); \
output_stream_textured_attr.data[output_offset + linear_index].normal = pack_a2bgr10(attribute_decode_oct8_normal_tangent(packed_decoded)); \
} \
}

Expand All @@ -200,13 +186,9 @@ void main()
MESHLET_DECODE_STREAM_64(meshlet_index * NUM_U32_STREAMS, 1, POS);
}

if (MESH_STYLE >= MESH_STYLE_UNTEXTURED)
{
MESHLET_DECODE_STREAM_32(meshlet_index * NUM_U32_STREAMS, 3, NORMAL);
}

if (MESH_STYLE >= MESH_STYLE_TEXTURED)
{
MESHLET_DECODE_STREAM_32(meshlet_index * NUM_U32_STREAMS, 3, NORMAL);
MESHLET_DECODE_STREAM_32(meshlet_index * NUM_U32_STREAMS, 4, TANGENT);
MESHLET_DECODE_STREAM_64(meshlet_index * NUM_U32_STREAMS, 5, UV);
}
Expand Down
2 changes: 1 addition & 1 deletion assets/shaders/inc/meshlet_attribute_decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ vec2 attribute_decode_snorm_exp_uv(uvec2 payload)
bitfieldExtract(int(payload.x), 0, 16),
bitfieldExtract(int(payload.x), 16, 16));
int exp = bitfieldExtract(int(payload.y), 0, 16);
return vec2(
return 0.5 * vec2(
ldexp(float(sint_value.x), exp),
ldexp(float(sint_value.y), exp)) + 0.5;
}
Expand Down
14 changes: 6 additions & 8 deletions scene-export/meshlet_export.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -486,7 +486,7 @@ static void encode_mesh(Encoded &encoded,
// Handle spill region just in case.
uint64_t vbo_remapping[MaxVertices + 3];
unsigned vbo_index = 0;
for (auto &v: vbo_remap)
for (auto &v : vbo_remap)
{
assert(vbo_index < MaxVertices + 3);
vbo_remapping[vbo_index++] = (uint64_t(v.second) << 32) | v.first;
Expand Down Expand Up @@ -585,6 +585,7 @@ static bool export_encoded_mesh(const std::string &path, const Encoded &encoded)

bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh, MeshStyle style)
{
mesh_deduplicate_vertices(mesh);
if (!mesh_optimize_index_buffer(mesh, {}))
return false;

Expand All @@ -600,22 +601,19 @@ bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh, Me
return false;
case MeshStyle::Textured:
uv = mesh_extract_uv_snorm_scale(mesh);
num_u32_streams += 2;
num_u32_streams += 4;
if (uv.empty())
{
LOGE("No UVs.\n");
return false;
}
// Fallthrough
case MeshStyle::Untextured:
normals = mesh_extract_normal_tangent_oct8(mesh, MeshAttribute::Normal);
tangent = mesh_extract_normal_tangent_oct8(mesh, MeshAttribute::Tangent);
if (normals.empty() || tangent.empty())
{
LOGE("No normal or tangent.\n");
LOGE("No tangent or normal.\n");
return false;
}
num_u32_streams += 2;
// Fallthrough
case MeshStyle::Wireframe:
positions = mesh_extract_position_snorm_exp(mesh);
Expand Down Expand Up @@ -661,7 +659,7 @@ bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh, Me
// Use quantized position to guide the clustering.
std::vector<vec3> position_buffer;
position_buffer.reserve(positions.size());
for (auto &p: positions)
for (auto &p : positions)
position_buffer.push_back(decode_snorm_exp(p));

// Special meshoptimizer limit.
Expand All @@ -685,7 +683,7 @@ bool export_mesh_to_meshlet(const std::string &path, SceneFormats::Mesh mesh, Me
std::vector<uvec3> out_index_buffer;

out_meshlets.reserve(num_meshlets);
for (auto &meshlet: meshlets)
for (auto &meshlet : meshlets)
{
Meshlet m = {};
m.offset = uint32_t(out_index_buffer.size());
Expand Down
69 changes: 69 additions & 0 deletions tests/assets/shaders/meshlet_cull.comp
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#version 450

layout(local_size_x = 32) in;

struct AABB
{
vec4 lo, hi;
};

layout(set = 0, binding = 0, std430) readonly buffer AABBSSBO
{
AABB data[];
} aabb;

layout(set = 0, binding = 1, std430) readonly buffer Transforms
{
mat4 data[];
} transforms;

layout(set = 0, binding = 2, std430) readonly buffer Tasks
{
uvec4 data[];
} task_info;

struct Draw
{
uint payload[5];
};

layout(set = 0, binding = 3, std430) readonly buffer InputDraws
{
Draw data[];
} input_draws;

layout(set = 0, binding = 4, std430) writeonly buffer OutputDraws
{
uint count;
uint padding[256 / 4 - 1];
Draw data[];
} output_draws;

layout(set = 0, binding = 5, std430) writeonly buffer CompactedDraws
{
uvec2 data[];
} output_draw_info;

layout(push_constant, std430) uniform Registers
{
uint count;
} registers;

void main()
{
uvec4 command_payload;
uint task_index = gl_GlobalInvocationID.x;
if (task_index < registers.count)
{
command_payload = task_info.data[task_index];
uint offset = command_payload.w & ~31u;
uint count = bitfieldExtract(command_payload.w, 0, 5) + 1;

uint draw_offset = atomicAdd(output_draws.count, count);
for (uint i = 0; i < count; i++)
{
output_draws.data[draw_offset + i] = input_draws.data[offset + i];
output_draw_info.data[draw_offset + i] = command_payload.yz;
}
}
}
10 changes: 8 additions & 2 deletions tests/assets/shaders/meshlet_debug.frag
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
#version 450
#extension GL_EXT_nonuniform_qualifier : require

layout(location = 0) in mediump vec3 vNormal;
layout(location = 1) in mediump vec4 vTangent;
layout(location = 2) in vec2 vUV;
layout(location = 3) flat in uint MaterialOffset;

layout(location = 0) out vec4 FragColor;
layout(location = 0) out vec3 FragColor;

layout(set = 0, binding = 2) uniform sampler DefaultSampler;
layout(set = 2, binding = 0) uniform texture2D Textures[];

void main()
{
FragColor = vec4(vNormal.xyz * 0.5 + 0.5, 1.0);
vec3 color = texture(sampler2D(Textures[MaterialOffset], DefaultSampler), vUV).rgb;
FragColor = color * (0.01 + clamp(dot(vNormal.xyz, vec3(5, 2, 20)), 0.0, 1.0));
}
43 changes: 28 additions & 15 deletions tests/assets/shaders/meshlet_debug.mesh
Original file line number Diff line number Diff line change
Expand Up @@ -16,48 +16,61 @@ layout(constant_id = 0) const uint NUM_U32_STREAMS = MESHLET_PAYLOAD_MAX_STREAMS
#define MESHLET_PAYLOAD_NUM_U32_STREAMS NUM_U32_STREAMS

#define MESHLET_PAYLOAD_DESCRIPTOR_SET 0
#define MESHLET_PAYLOAD_META_BINDING 0
#define MESHLET_PAYLOAD_STREAM_BINDING 1
#define MESHLET_PAYLOAD_PAYLOAD_BINDING 2
#define MESHLET_PAYLOAD_META_BINDING 3
#define MESHLET_PAYLOAD_STREAM_BINDING 4
#define MESHLET_PAYLOAD_PAYLOAD_BINDING 5
#include "meshlet_payload_decode.h"
#include "meshlet_attribute_decode.h"

layout(location = 0) perprimitiveEXT out uint vMeshletIndex[];
layout(location = 1) out mediump vec3 vNormal[];
layout(location = 2) out mediump vec4 vTangent[];
layout(location = 3) out vec2 vUV[];
layout(location = 0) out mediump vec3 vNormal[];
layout(location = 1) out mediump vec4 vTangent[];
layout(location = 2) out vec2 vUV[];
layout(location = 3) perprimitiveEXT out uint MaterialOffset[];

layout(set = 1, binding = 0) uniform UBO
{
mat4 VP;
};

layout(set = 1, binding = 1) uniform UBOModel
layout(set = 0, binding = 1) readonly buffer Transforms
{
mat4 M;
mat4 data[];
} transforms;

struct MeshTask
{
uint meshlet_index;
uint node_instance;
uint node_count_material_index;
};

layout(push_constant) uniform Registers
struct MeshPayload
{
uint offset;
} registers;
MeshTask meshlet[1024];
};

taskPayloadSharedEXT MeshPayload mesh_payload;

void main()
{
uint meshlet_index = gl_WorkGroupID.x + registers.offset;
MeshletMetaRuntime meta = meshlet_metas_runtime.data[meshlet_index];
MeshTask task = mesh_payload.meshlet[gl_WorkGroupID.x];
MeshletMetaRuntime meta = meshlet_metas_runtime.data[task.meshlet_index];
meshlet_init_workgroup(meta.stream_offset);

SetMeshOutputsEXT(meta.num_attributes, meta.num_primitives);

mat4 M = transforms.data[task.node_instance];

#define INDEX(index, value) \
if (index < meta.num_primitives) \
{ \
gl_PrimitiveTriangleIndicesEXT[index] = uvec4(unpack8(value)).xyz; \
vMeshletIndex[index] = meshlet_index; \
}
MESHLET_DECODE_STREAM_32(meta.stream_offset, 0, INDEX);

if (gl_LocalInvocationIndex < meta.num_primitives)
MaterialOffset[gl_LocalInvocationIndex] = bitfieldExtract(task.node_count_material_index, 8, 24);

#define POSITION(index, value) \
if (index < meta.num_attributes) \
{ \
Expand Down
26 changes: 10 additions & 16 deletions tests/assets/shaders/meshlet_debug.mesh.frag
Original file line number Diff line number Diff line change
@@ -1,25 +1,19 @@
#version 450
#extension GL_EXT_mesh_shader : require
#extension GL_EXT_nonuniform_qualifier : require

layout(location = 0) perprimitiveEXT in flat uint vMeshletIndex;
layout(location = 1) in mediump vec3 vNormal;
layout(location = 2) in mediump vec4 vTangent;
layout(location = 3) in vec2 vUV;
layout(location = 0) in mediump vec3 vNormal;
layout(location = 1) in mediump vec4 vTangent;
layout(location = 2) in vec2 vUV;
layout(location = 3) perprimitiveEXT flat in uint MaterialOffset;

layout(location = 0) out vec4 FragColor;
layout(location = 0) out vec3 FragColor;

vec3 decode_mesh_color()
{
uint index = vMeshletIndex * 1991u;
index ^= (index >> 5u);
uint r = bitfieldExtract(index, 0, 2);
uint g = bitfieldExtract(index, 2, 2);
uint b = bitfieldExtract(index, 4, 2);
//return (vec3(r, g, b) + 1.0 / 3.0) / 4.0;
return vec3(1.0);
}
layout(set = 0, binding = 6) uniform sampler DefaultSampler;
layout(set = 2, binding = 0) uniform texture2D Textures[];

void main()
{
FragColor = vec4(decode_mesh_color() * (vNormal.xyz * 0.5 + 0.5), 1.0);
vec3 color = texture(nonuniformEXT(sampler2D(Textures[MaterialOffset], DefaultSampler)), vUV).rgb;
FragColor = color * (0.01 + clamp(dot(vNormal.xyz, vec3(5, 2, 20)), 0.0, 1.0));
}
65 changes: 65 additions & 0 deletions tests/assets/shaders/meshlet_debug.task
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#version 450
#extension GL_EXT_mesh_shader : require
#extension GL_KHR_shader_subgroup_arithmetic : require
#extension GL_KHR_shader_subgroup_ballot : require
layout(local_size_x = 32) in;

struct TaskInfo
{
uint aabb_instance;
uint node_instance;
uint node_count_material_index; // Skinning
uint mesh_index_count;
};

layout(set = 0, binding = 2, std430) readonly buffer Tasks
{
TaskInfo data[];
} task_info;

layout(push_constant, std430) uniform Registers
{
uint count;
} registers;

struct MeshTask
{
uint meshlet_index;
uint node_instance;
uint node_count_material_index;
};

struct Payload
{
MeshTask meshlet[1024];
};

taskPayloadSharedEXT Payload payload;

void main()
{
TaskInfo command_payload;
uint task_index = gl_GlobalInvocationID.x;
uint task_count = 0;

if (task_index < registers.count)
{
command_payload = task_info.data[task_index];
uint mesh_offset = command_payload.mesh_index_count & ~31u;
uint mesh_count = bitfieldExtract(command_payload.mesh_index_count, 0, 5) + 1;

uint task_offset = subgroupExclusiveAdd(mesh_count);
task_count = subgroupAdd(mesh_count);

for (uint i = 0; i < mesh_count; i++)
{
MeshTask meshlet;
meshlet.meshlet_index = mesh_offset + i;
meshlet.node_instance = command_payload.node_instance;
meshlet.node_count_material_index = command_payload.node_count_material_index;
payload.meshlet[task_offset + i] = meshlet;
}
}

EmitMeshTasksEXT(task_count, 1, 1);
}
Loading

0 comments on commit 7a4230f

Please sign in to comment.