First published: 2024-02-25
Last updated: 2024-02-25
About the Mesh Shading Series
This post is part 7 of a series about mesh shading. My intent in this series is to introduce the various parts of mesh shading in an easy to understand fashion. Well, as easy as I can make it. My objective isn’t to convince you to use mesh shading. I assume you’re reading this post because you’re already interested in mesh shading. Instead, my objective is to explain the mechanics of how to do mesh shading in Direct3D 12, Metal, and Vulkan as best I can. My hope is that you’re able to use this information in your own graphics projects and experiments.
- Mesh Shading Part 1: Rendering Meshlets
- Mesh Shading Part 2: Amplification
- Mesh Shading Part 3: Instancing
- Mesh Shading Part 4: Culling
- Mesh Shading Part 5: LOD Selection
- Mesh Shading Part 6: LOD Calculation
- Mesh Shading Part 7: Culling + LOD
- Mesh Shading Part 8: Vertex Attributes (TBD)
- Mesh Shading Part 9: Barycentric Interpolation (TBD)
Sample Projects for This Post
117_mesh_shader_cull_lod - Demonstrates how to calculate LOD using distance to camera during amplification.
The D3D12 version of the above samples displays pipeline statistics. The Metal and Vulkan versions do not display pipeline statistics for different reasons. Metal doesn’t have pipeline statistics. Turning on pipeline statistics on the Vulkan version tanks the performance. I haven’t had a chance to investigate why this is and how it affects the various GPUs.
Introduction
In the previous posts we’ve covered instancing and LOD. Since both of these are used together in some cases, it seems natural to combine them.
117_mesh_shader_cull_lod builds on 116_mesh_shader_calc_lod by adding in the data and logic for culling from 114_mesh_shader_culling. The details for the data and logic are discussed in Mesh Shading Part 4: Culling.
The changes are relatively straightforward so this post should be on the shorter side.
Changes to SceneProperties Constant Data
We use the SceneProperties
from 116_mesh_shader_calc_lod as a starting point.
We add in the fields needed for culling: Frustum
, VisibilityFunc
, and EnableLOD
to SceenProperties.
EnableLOD` is the only new field here and it’s discussed a bit later in this post.
// -----------------------------------------------------------------------------
// D3D12 and Vulkan
// -----------------------------------------------------------------------------
struct SceneProperties
{
float3 EyePosition;
uint __pad0;
float4x4 CameraVP;
FrustumData Frustum;
uint InstanceCount;
uint MeshletCount;
uint VisibilityFunc;
float MaxLODDistance; // Use least detail level at or beyond this distance
uint4 Meshlet_LOD_Offsets[5]; // Align array element to 16 bytes
uint4 Meshlet_LOD_Counts[5]; // Align array element to 16 bytes
float3 MeshBoundsMin;
uint __pad1;
float3 MeshBoundsMax;
uint EnableLOD;
};
// -----------------------------------------------------------------------------
// Metal
// -----------------------------------------------------------------------------
//
// NOTE: Unlike D3D12 and Vulkan, it looks like Metal arrays are tightly
// packed for 32-bit scalar types. This means that Meshlet_LOD_Offsets
// and Meshlet_LOD_Counts are uint here instead of uint4/uvec4.
//
struct SceneProperties
{
float3 EyePosition;
uint __pad0;
float4x4 CameraVP;
FrustumData Frustum;
uint InstanceCount;
uint MeshletCount;
uint VisibilityFunc;
float MaxLODDistance; // Use least detail level at or beyond this distance
uint Meshlet_LOD_Offsets[5]; // Align array element to 16 bytes
uint Meshlet_LOD_Counts[5]; // Align array element to 16 bytes
float3 MeshBoundsMin;
float3 MeshBoundsMax;
uint EnableLOD;
uint __pad1[3]; // Make struct size aligned to 16
};
Updating SceneProperties Constant Data
We combine the updates for instancing and culling for SceneProperties
and add an update for scene.EnableLOD
.
// -----------------------------------------------------------------------------
// D3D12 and Vulkan
// -----------------------------------------------------------------------------
scene.EyePosition = camera.GetEyePosition();
scene.CameraVP = camera.GetViewProjectionMatrix();
scene.Frustum.Planes[FRUSTUM_PLANE_LEFT] = {frLeft.Normal, 0.0f, frLeft.Position, 0.0f};
scene.Frustum.Planes[FRUSTUM_PLANE_RIGHT] = {frRight.Normal, 0.0f, frRight.Position, 0.0f};
scene.Frustum.Planes[FRUSTUM_PLANE_TOP] = {frTop.Normal, 0.0f, frTop.Position, 0.0f};
scene.Frustum.Planes[FRUSTUM_PLANE_BOTTOM] = {frBottom.Normal, 0.0f, frBottom.Position, 0.0f};
scene.Frustum.Planes[FRUSTUM_PLANE_NEAR] = {frNear.Normal, 0.0f, frNear.Position, 0.0f};
scene.Frustum.Planes[FRUSTUM_PLANE_FAR] = {frFar.Normal, 0.0f, frFar.Position, 0.0f};
scene.Frustum.Sphere = camera.GetFrustumSphere();
scene.Frustum.Cone.Tip = frCone.Tip;
scene.Frustum.Cone.Height = frCone.Height;
scene.Frustum.Cone.Direction = frCone.Dir;
scene.Frustum.Cone.Angle = frCone.Angle;
scene.InstanceCount = static_cast<uint32_t>(instances.size());
scene.MeshletCount = meshlet_LOD_Counts[0];
scene.VisibilityFunc = gVisibilityFunc;
scene.MaxLODDistance = gMaxLODDistance;
scene.Meshlet_LOD_Offsets[0].x = meshlet_LOD_Offsets[0];
scene.Meshlet_LOD_Offsets[1].x = meshlet_LOD_Offsets[1];
scene.Meshlet_LOD_Offsets[2].x = meshlet_LOD_Offsets[2];
scene.Meshlet_LOD_Offsets[3].x = meshlet_LOD_Offsets[3];
scene.Meshlet_LOD_Offsets[4].x = meshlet_LOD_Offsets[4];
scene.Meshlet_LOD_Counts[0].x = meshlet_LOD_Counts[0];
scene.Meshlet_LOD_Counts[1].x = meshlet_LOD_Counts[1];
scene.Meshlet_LOD_Counts[2].x = meshlet_LOD_Counts[2];
scene.Meshlet_LOD_Counts[3].x = meshlet_LOD_Counts[3];
scene.Meshlet_LOD_Counts[4].x = meshlet_LOD_Counts[4];
scene.MeshBoundsMin = float3(meshBounds.min);
scene.MeshBoundsMax = float3(meshBounds.max);
scene.EnableLOD = gEnableLOD;
// -----------------------------------------------------------------------------
// Metal
// -----------------------------------------------------------------------------
scene.EyePosition = camera.GetEyePosition();
scene.CameraVP = camera.GetViewProjectionMatrix();
scene.Frustum.Planes[FRUSTUM_PLANE_LEFT] = {frLeft.Normal, 0.0f, frLeft.Position, 0.0f};
scene.Frustum.Planes[FRUSTUM_PLANE_RIGHT] = {frRight.Normal, 0.0f, frRight.Position, 0.0f};
scene.Frustum.Planes[FRUSTUM_PLANE_TOP] = {frTop.Normal, 0.0f, frTop.Position, 0.0f};
scene.Frustum.Planes[FRUSTUM_PLANE_BOTTOM] = {frBottom.Normal, 0.0f, frBottom.Position, 0.0f};
scene.Frustum.Planes[FRUSTUM_PLANE_NEAR] = {frNear.Normal, 0.0f, frNear.Position, 0.0f};
scene.Frustum.Planes[FRUSTUM_PLANE_FAR] = {frFar.Normal, 0.0f, frFar.Position, 0.0f};
scene.Frustum.Sphere = camera.GetFrustumSphere();
scene.Frustum.Cone.Tip = frCone.Tip;
scene.Frustum.Cone.Height = frCone.Height;
scene.Frustum.Cone.Direction = frCone.Dir;
scene.Frustum.Cone.Angle = frCone.Angle;
scene.InstanceCount = static_cast<uint32_t>(instances.size());
scene.MeshletCount = meshlet_LOD_Counts[0];
scene.VisibilityFunc = gVisibilityFunc;
scene.MaxLODDistance = gMaxLODDistance;
scene.Meshlet_LOD_Offsets[0] = meshlet_LOD_Offsets[0];
scene.Meshlet_LOD_Offsets[1] = meshlet_LOD_Offsets[1];
scene.Meshlet_LOD_Offsets[2] = meshlet_LOD_Offsets[2];
scene.Meshlet_LOD_Offsets[3] = meshlet_LOD_Offsets[3];
scene.Meshlet_LOD_Offsets[4] = meshlet_LOD_Offsets[4];
scene.Meshlet_LOD_Counts[0] = meshlet_LOD_Counts[0];
scene.Meshlet_LOD_Counts[1] = meshlet_LOD_Counts[1];
scene.Meshlet_LOD_Counts[2] = meshlet_LOD_Counts[2];
scene.Meshlet_LOD_Counts[3] = meshlet_LOD_Counts[3];
scene.Meshlet_LOD_Counts[4] = meshlet_LOD_Counts[4];
scene.MeshBoundsMin = float3(meshBounds.min);
scene.MeshBoundsMax = float3(meshBounds.max);
scene.EnableLOD = gEnableLOD;
Toggle for LOD
We add a toggle for LOD, scene.EnableLOD
, so we can easily see the difference between LOD and no LOD.
If you happen to be running on a GPU with pipeline statistics, you can see the effect that LOD has on the mesh shader invocations and mesh shader primitives. On my system, when LOD is enabled, mesh shader invocations and mesh shader primitives are an order of magnitude less than when LOD is not enabled.
Instances
To get the full effects of culling and LOD control, we’ll use the same number instances as the culling sample, 114_mesh_shader_culling, which is 1600.
const uint32_t kNumInstanceCols = 40;
const uint32_t kNumInstanceRows = 40;
std::vector<float4x4> instances(kNumInstanceCols * kNumInstanceRows);
Instance Positions
We’ll also borrow the position from 114_mesh_shader_culling as well.
// Update instance transforms
float farDist = 1000.0f;
{
float maxSpan = std::max<float>(meshBounds.Width(), meshBounds.Depth());
float instanceSpanX = 4.0f * maxSpan;
float instanceSpanZ = 4.5f * maxSpan;
float totalSpanX = kNumInstanceCols * instanceSpanX;
float totalSpanZ = kNumInstanceRows * instanceSpanZ;
farDist = std::max(totalSpanX, totalSpanZ);
for (uint32_t j = 0; j < kNumInstanceRows; ++j)
{
for (uint32_t i = 0; i < kNumInstanceCols; ++i)
{
float x = i * instanceSpanX - (totalSpanX / 2.0f) + instanceSpanX / 2.0f;
float y = 0;
float z = j * instanceSpanZ - (totalSpanZ / 2.0f) + instanceSpanZ / 2.0f;
uint32_t index = j * kNumInstanceCols + i;
float t = static_cast<float>(glfwGetTime()) + ((i ^ j + i) / 10.0f);
instances[index] = glm::translate(float3(x, y, z)) * glm::rotate(t, float3(0, 1, 0));
}
}
}
That’s all for changes in the C++ code. Next, let’s take a look at changes to the amplification shader code.
Amplification Shader
I’m purposely leaving the visibility functions out of this section to make this post shorter. If you need a refresher, you can find them in Mesh Shading Part 4: Culling or HLSL and MSL Shaders.
Amplification shader changes follow the same model as the LOD posts:
- Add fields
SceneProperties
fields. - Change logic in the amplification shader body to use added fields.
Should be straightforward at this point.
Changes to SceneProperties Constant Data
Add Frustum
, VisibilityFunc
, and EnableLOD
to SceneProperties
. Again, if you’d like to see what FrustumData
looks like, you can find it in Mesh Shading Part 4: Culling or HLSL and MSL Shaders.
// -----------------------------------------------------------------------------
// HLSL
// -----------------------------------------------------------------------------
struct SceneProperties {
float3 EyePosition;
float4x4 CameraVP;
FrustumData Frustum;
uint InstanceCount;
uint MeshletCount;
uint VisibilityFunc;
float MaxLODDistance;
uint Meshlet_LOD_Offsets[5];
uint Meshlet_LOD_Counts[5];
uint3 __pad1;
float3 MeshBoundsMin;
float3 MeshBoundsMax;
uint EnableLOD;
};
// -----------------------------------------------------------------------------
// MSL
// -----------------------------------------------------------------------------
struct SceneProperties {
packed_float3 EyePosition;
float4x4 CameraVP;
FrustumData Frustum;
uint InstanceCount;
uint MeshletCount;
uint VisibilityFunc;
float MaxLODDistance;
uint Meshlet_LOD_Offsets[5];
uint Meshlet_LOD_Counts[5];
packed_float3 MeshBoundsMin;
packed_float3 MeshBoundsMax;
uint EnableLOD;
};
Combining Logic LOD and Culling
Combining the logic for LOD and culling is also relatively straightforward.
Just as with the LOD-only shaders, the first thing we need to calculate the LOD is the instance’s model transform matrix.
// Instance's model transform matrix
float4x4 M = Instances[instanceIndex].M;
Next we calculate the LOD. We start out by assuming that the LOD is 0. If Scene.EnableLOD
is true, then do the LOD calculation to find out which LOD the instance falls into. If the instance is equal to or greater than Scene.MaxLODDistance
it will use the LOD with the lowest level of detail, which is LOD 4 in our case.
// Assume LOD 0
uint lod = 0;
if (Scene.EnableLOD) {
// Get center of transformed bounding box to use in LOD distance calculation
float4 instanceBoundsMinWS = M * float4(Scene.MeshBoundsMin, 1.0);
float4 instanceBoundsMaxWS = M * float4(Scene.MeshBoundsMax, 1.0);
float4 instanceCenter = (instanceBoundsMinWS + instanceBoundsMaxWS) / 2.0;
// Distance between transformed bounding box and camera eye position
float dist = distance(instanceCenter.xyz, Scene.EyePosition);
// Normalize distance using MaxLODDistance
float ndist = clamp(dist / Scene.MaxLODDistance, 0.0, 1.0);
// Calculate LOD using normalized distance
lod = (uint)(pow(ndist, 0.65) * (MAX_LOD_COUNT - 1));
}
And then finally, we determine the visibility of the meshlet at the selected LOD. The if/else if
chunk is straightforward, I’ll trust you can grok it easily.
// Get meshlet count for the LOD
uint lodMeshletCount = Scene.Meshlet_LOD_Counts[lod];
if (meshletIndex < lodMeshletCount) {
meshletIndex += Scene.Meshlet_LOD_Offsets[lod];
// Transform meshlet's bounding sphere into world space
float4 meshletBoundingSphere = M * float4(MeshletBounds[meshletIndex].xyz, 1.0);
meshletBoundingSphere.w = MeshletBounds[meshletIndex].w;
if (Scene.VisibilityFunc == VISIBILITY_FUNC_NONE) {
visible = 1;
}
else if (Scene.VisibilityFunc == VISIBILITY_FUNC_PLANES) {
visible = VisibleFrustumPlanes(Scene, meshletBoundingSphere) ? 1 : 0;
}
else if (Scene.VisibilityFunc == VISIBILITY_FUNC_SPHERE) {
visible = VisibleFrustumSphere(Scene, meshletBoundingSphere) ? 1 : 0;
}
else if (Scene.VisibilityFunc == VISIBILITY_FUNC_CONE) {
visible = VisibleFrustumCone(Scene, meshletBoundingSphere) ? 1 : 0;
}
else if (Scene.VisibilityFunc == VISIBILITY_FUNC_CONE_AND_NEAR_PLANE) {
visible = VisibleFrustumConeAndNearPlane(Scene, meshletBoundingSphere) ? 1 : 0;
}
}
Full Shader Body
HLSL for D3D12 and Vulkan
[numthreads(AS_GROUP_SIZE, 1, 1)]
void asmain(
uint gtid : SV_GroupThreadID,
uint dtid : SV_DispatchThreadID,
uint gid : SV_GroupID
)
{
bool visible = false;
uint instanceIndex = dtid / Scene.MeshletCount;
uint meshletIndex = dtid % Scene.MeshletCount;
// Make sure instance index is within bounds
if (instanceIndex < Scene.InstanceCount) {
// Instance's model transform matrix
float4x4 M = Instances[instanceIndex].M;
// Assume LOD 0
uint lod = 0;
if (Scene.EnableLOD) {
// Get center of transformed bounding box to use in LOD distance calculation
float4 instanceBoundsMinWS = mul(M, float4(Scene.MeshBoundsMin, 1.0));
float4 instanceBoundsMaxWS = mul(M, float4(Scene.MeshBoundsMax, 1.0));
float4 instanceCenter = (instanceBoundsMinWS + instanceBoundsMaxWS) / 2.0;
// Distance between transformed bounding box and camera eye position
float dist = distance(instanceCenter.xyz, Scene.EyePosition);
// Normalize distance using MaxLODDistance
float ndist = clamp(dist / Scene.MaxLODDistance, 0.0, 1.0);
// Calculate LOD using normalized distance
lod = (uint)(pow(ndist, 0.65) * (MAX_LOD_COUNT - 1));
}
// Get meshlet count for the LOD
uint lodMeshletCount = Scene.Meshlet_LOD_Counts[lod];
// Make sure meshlet index is within bounds of current LOD's meshlet count
if (meshletIndex < lodMeshletCount) {
meshletIndex += Scene.Meshlet_LOD_Offsets[lod];
// Transform meshlet's bounding sphere into world space
float4 meshletBoundingSphere = mul(M, float4(MeshletBounds[meshletIndex].xyz, 1.0));
meshletBoundingSphere.w = MeshletBounds[meshletIndex].w;
if (Scene.VisibilityFunc == VISIBILITY_FUNC_NONE) {
visible = true;
}
else if (Scene.VisibilityFunc == VISIBILITY_FUNC_PLANES) {
visible = VisibleFrustumPlanes(meshletBoundingSphere);
}
else if (Scene.VisibilityFunc == VISIBILITY_FUNC_SPHERE) {
visible = VisibleFrustumSphere(meshletBoundingSphere);
}
else if (Scene.VisibilityFunc == VISIBILITY_FUNC_CONE) {
visible = VisibleFrustumCone(meshletBoundingSphere);
}
else if (Scene.VisibilityFunc == VISIBILITY_FUNC_CONE_AND_NEAR_PLANE) {
visible = VisibleFrustumConeAndNearPlane(meshletBoundingSphere);
}
}
}
if (visible) {
uint index = WavePrefixCountBits(visible);
sPayload.InstanceIndices[index] = instanceIndex;
sPayload.MeshletIndices[index] = meshletIndex;
}
uint visibleCount = WaveActiveCountBits(visible);
DispatchMesh(visibleCount, 1, 1, sPayload);
}
MSL for Metal
[[object]]
void objectMain(
constant SceneProperties& Scene [[buffer(0)]],
device const float4* MeshletBounds [[buffer(1)]],
device const Instance* Instances [[buffer(2)]],
uint gtid [[thread_position_in_threadgroup]],
uint dtid [[thread_position_in_grid]],
object_data Payload& outPayload [[payload]],
mesh_grid_properties outGrid)
{
uint visible = 0;
uint instanceIndex = dtid / Scene.MeshletCount;
uint meshletIndex = dtid % Scene.MeshletCount;
if (instanceIndex < Scene.InstanceCount) {
// Instance's model transform matrix
float4x4 M = Instances[instanceIndex].M;
// Assume LOD 0
uint lod = 0;
if (Scene.EnableLOD) {
// Get center of transformed bounding box to use in LOD distance calculation
float4 instanceBoundsMinWS = M * float4(Scene.MeshBoundsMin, 1.0);
float4 instanceBoundsMaxWS = M * float4(Scene.MeshBoundsMax, 1.0);
float4 instanceCenter = (instanceBoundsMinWS + instanceBoundsMaxWS) / 2.0;
// Distance between transformed bounding box and camera eye position
float dist = distance(instanceCenter.xyz, Scene.EyePosition);
// Normalize distance using MaxLODDistance
float ndist = clamp(dist / Scene.MaxLODDistance, 0.0, 1.0);
// Calculate LOD using normalized distance
lod = (uint)(pow(ndist, 0.65) * (MAX_LOD_COUNT - 1));
}
// Get meshlet count for the LOD
uint lodMeshletCount = Scene.Meshlet_LOD_Counts[lod];
if (meshletIndex < lodMeshletCount) {
meshletIndex += Scene.Meshlet_LOD_Offsets[lod];
// Transform meshlet's bounding sphere into world space
float4 meshletBoundingSphere = M * float4(MeshletBounds[meshletIndex].xyz, 1.0);
meshletBoundingSphere.w = MeshletBounds[meshletIndex].w;
if (Scene.VisibilityFunc == VISIBILITY_FUNC_NONE) {
visible = 1;
}
else if (Scene.VisibilityFunc == VISIBILITY_FUNC_PLANES) {
visible = VisibleFrustumPlanes(Scene, meshletBoundingSphere) ? 1 : 0;
}
else if (Scene.VisibilityFunc == VISIBILITY_FUNC_SPHERE) {
visible = VisibleFrustumSphere(Scene, meshletBoundingSphere) ? 1 : 0;
}
else if (Scene.VisibilityFunc == VISIBILITY_FUNC_CONE) {
visible = VisibleFrustumCone(Scene, meshletBoundingSphere) ? 1 : 0;
}
else if (Scene.VisibilityFunc == VISIBILITY_FUNC_CONE_AND_NEAR_PLANE) {
visible = VisibleFrustumConeAndNearPlane(Scene, meshletBoundingSphere) ? 1 : 0;
}
}
}
if (visible) {
uint index = simd_prefix_exclusive_sum(visible);
outPayload.InstanceIndices[index] = instanceIndex;
outPayload.MeshletIndices[index] = meshletIndex;
}
uint visibleCount = simd_sum(visible);
outGrid.set_threadgroups_per_grid(uint3(visibleCount, 1, 1));
}
Rendered Image
The 117_mesh_shader_cull_lod renders 1600 instances with the LOD calculated using distance to camera.