Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

clusterizer: Implement experimental meshlet optimizer #673

Merged
merged 7 commits into from
Apr 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,12 @@ meshlets.resize(meshlet_count);

However depending on the application other strategies of storing the data can be useful; for example, `meshlet_vertices` serves as indices into the original vertex buffer but it might be worthwhile to generate a mini vertex buffer for each meshlet to remove the extra indirection when accessing vertex data, or it might be desirable to compress vertex data as vertices in each meshlet are likely to be very spatially coherent.

For optimal performance, it is recommended to further optimize each meshlet in isolation for better triangle and vertex locality by calling `meshopt_optimizeMeshlet` on vertex and index data like so:

```c++
meshopt_optimizeMeshlet(&meshlet_vertices[m.vertex_offset], &meshlet_triangles[m.triangle_offset], m.triangle_count, m.vertex_count);
```

After generating the meshlet data, it's also possible to generate extra data for each meshlet that can be saved and used at runtime to perform cluster culling, where each meshlet can be discarded if it's guaranteed to be invisible. To generate the data, `meshlet_computeMeshletBounds` can be used:

```c++
Expand Down
3 changes: 3 additions & 0 deletions demo/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -866,6 +866,9 @@ void meshlets(const Mesh& mesh, bool scan)
else
meshlets.resize(meshopt_buildMeshlets(&meshlets[0], &meshlet_vertices[0], &meshlet_triangles[0], &mesh.indices[0], mesh.indices.size(), &mesh.vertices[0].px, mesh.vertices.size(), sizeof(Vertex), max_vertices, max_triangles, cone_weight));

for (size_t i = 0; i < meshlets.size(); ++i)
meshopt_optimizeMeshlet(&meshlet_vertices[meshlets[i].vertex_offset], &meshlet_triangles[meshlets[i].triangle_offset], meshlets[i].triangle_count, meshlets[i].vertex_count);

if (meshlets.size())
{
const meshopt_Meshlet& last = meshlets.back();
Expand Down
90 changes: 90 additions & 0 deletions src/clusterizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -882,3 +882,93 @@ meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices

return meshopt_computeClusterBounds(indices, triangle_count * 3, vertex_positions, vertex_count, vertex_positions_stride);
}

void meshopt_optimizeMeshlet(unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, size_t triangle_count, size_t vertex_count)
{
using namespace meshopt;

assert(triangle_count <= kMeshletMaxTriangles);
assert(vertex_count <= kMeshletMaxVertices);

unsigned char* indices = meshlet_triangles;
unsigned int* vertices = meshlet_vertices;

// cache tracks vertex timestamps (corresponding to triangle index! all 3 vertices are added at the same time and never removed)
unsigned char cache[kMeshletMaxVertices];
memset(cache, 0, vertex_count);

// note that we start from a value that means all vertices aren't in cache
unsigned char cache_last = 128;
const unsigned char cache_cutoff = 3; // 3 triangles = ~5..9 vertices depending on reuse

for (size_t i = 0; i < triangle_count; ++i)
{
int next = -1;
int next_match = -1;

for (size_t j = i; j < triangle_count; ++j)
{
unsigned char a = indices[j * 3 + 0], b = indices[j * 3 + 1], c = indices[j * 3 + 2];
assert(a < vertex_count && b < vertex_count && c < vertex_count);

// score each triangle by how many vertices are in cache
// note: the distance is computed using unsigned 8-bit values, so cache timestamp overflow is handled gracefully
int aok = (unsigned char)(cache_last - cache[a]) < cache_cutoff;
int bok = (unsigned char)(cache_last - cache[b]) < cache_cutoff;
int cok = (unsigned char)(cache_last - cache[c]) < cache_cutoff;

if (aok + bok + cok > next_match)
{
next = (int)j;
next_match = aok + bok + cok;

// note that we could end up with all 3 vertices in the cache, but 2 is enough for ~strip traversal
if (next_match >= 2)
break;
}
}

assert(next >= 0);

unsigned char a = indices[next * 3 + 0], b = indices[next * 3 + 1], c = indices[next * 3 + 2];

// shift triangles before the next one forward so that we always keep an ordered partition
// note: this could have swapped triangles [i] and [next] but that distorts the order and may skew the output sequence
memmove(indices + (i + 1) * 3, indices + i * 3, (next - i) * 3 * sizeof(unsigned char));

indices[i * 3 + 0] = a;
indices[i * 3 + 1] = b;
indices[i * 3 + 2] = c;

// cache timestamp is the same between all vertices of each triangle to reduce overflow
cache_last++;
cache[a] = cache_last;
cache[b] = cache_last;
cache[c] = cache_last;
}

// reorder meshlet vertices for access locality assuming index buffer is scanned sequentially
unsigned int order[kMeshletMaxVertices];

unsigned char remap[kMeshletMaxVertices];
memset(remap, -1, vertex_count);

size_t vertex_offset = 0;

for (size_t i = 0; i < triangle_count * 3; ++i)
{
unsigned char& r = remap[indices[i]];

if (r == 0xff)
{
r = (unsigned char)(vertex_offset);
order[vertex_offset] = vertices[indices[i]];
vertex_offset++;
}

indices[i] = r;
}

assert(vertex_offset <= vertex_count);
memcpy(vertices, order, vertex_offset * sizeof(unsigned int));
}
10 changes: 10 additions & 0 deletions src/meshoptimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,16 @@ MESHOPTIMIZER_API size_t meshopt_buildMeshlets(struct meshopt_Meshlet* meshlets,
MESHOPTIMIZER_API size_t meshopt_buildMeshletsScan(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles);
MESHOPTIMIZER_API size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles);

/**
* Experimental: Meshlet optimizer
* Reorders meshlet vertices and triangles to maximize locality to improve rasterizer throughput
*
* meshlet_triangles and meshlet_vertices must refer to meshlet triangle and vertex index data; when buildMeshlets* is used, these
* need to be computed from meshlet's vertex_offset and triangle_offset
* triangle_count and vertex_count must not exceed implementation limits (vertex_count <= 255 - not 256!, triangle_count <= 512)
*/
MESHOPTIMIZER_EXPERIMENTAL void meshopt_optimizeMeshlet(unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, size_t triangle_count, size_t vertex_count);

struct meshopt_Bounds
{
/* bounding sphere, useful for frustum and occlusion culling */
Expand Down