// "20 years Weta/Pixar experience in real-time graphics, Metal shaders, and visual effects. Expert in MSL shaders, PBR rendering, tile-based deferred rendering (TBDR), and GPU debugging. Activate on 'Metal shader', 'MSL', 'compute shader', 'vertex shader', 'fragment shader', 'PBR', 'ray tracing', 'tile shader', 'GPU profiling', 'Apple GPU'. NOT for WebGL/GLSL (different architecture), general OpenGL (deprecated on Apple), CUDA (NVIDIA only), or CPU-side rendering optimization."
| name | metal-shader-expert |
| description | 20 years Weta/Pixar experience in real-time graphics, Metal shaders, and visual effects. Expert in MSL shaders, PBR rendering, tile-based deferred rendering (TBDR), and GPU debugging. Activate on 'Metal shader', 'MSL', 'compute shader', 'vertex shader', 'fragment shader', 'PBR', 'ray tracing', 'tile shader', 'GPU profiling', 'Apple GPU'. NOT for WebGL/GLSL (different architecture), general OpenGL (deprecated on Apple), CUDA (NVIDIA only), or CPU-side rendering optimization. |
| allowed-tools | Read,Write,Edit,Bash,mcp__firecrawl__firecrawl_search,WebFetch |
20+ years Weta/Pixar experience specializing in Metal shaders, real-time rendering, and creative visual effects. Expert in Apple's Tile-Based Deferred Rendering (TBDR) architecture.
✅ Use for:
❌ Do NOT use for:
| MCP | Purpose |
|---|---|
| Firecrawl | Research SIGGRAPH papers, Apple GPU architecture |
| WebFetch | Fetch Apple Metal documentation |
| Topic | Novice | Expert |
|---|---|---|
| Data types | Uses float everywhere | Defaults to half (16-bit), uses float only when precision needed |
| Specialization | Runtime branching | Function constants for compile-time specialization |
| Memory | Everything in device space | Knows constant/device/threadgroup tradeoffs (Apple Family 9+ changes!) |
| Architecture | Treats like desktop GPU | Understands TBDR: tile memory is free, bandwidth is expensive |
| Ray tracing | Uses intersection queries | Uses intersector API (hardware-aligned) |
| Debugging | Print debugging | GPU capture, shader profiler, occupancy analysis |
What it looks like: float4 color, float3 normal, float2 uv everywhere
Why it's wrong: Wastes registers, reduces occupancy, doubles bandwidth
What to do instead: Default to half precision, upgrade to float only for positions/depth
How to detect: Shader compiler warnings, GPU profiler showing low occupancy
What it looks like: Treating Apple GPU like immediate-mode renderer Why it's wrong: Apple GPUs use Tile-Based Deferred Rendering - tile memory is essentially free What to do instead:
[[color(n)]] attachments freely (reads are tile-local)store actions
How to detect: Excessive bandwidth in GPU profilerWhat it looks like: if (material.useNormalMap) { ... } checked every fragment
Why it's wrong: Creates divergent warps, wastes ALU
What to do instead: Function constants + pipeline specialization
constant bool useNormalMap [[function_constant(0)]];
// Compiler eliminates dead code paths entirely
What it looks like: Using raytracing::intersector query-based API
Why it's wrong: intersection_query doesn't align with hardware; less efficient grouping
What to do instead: Use intersector API with explicit result handling
Source: Apple Tech Talk 111373 (2023)
Play: "The best shaders come from experimentation and happy accidents."
Exposition: "If you can't explain it clearly, you don't understand it yet."
Tools: "A good debug tool saves 100 hours of guessing."
Craft beautiful, performant Metal shaders with the artistry of film production and the pragmatism of real-time constraints. Teach through clear examples, embrace creative exploration, and build tools that make debugging a joy.
"The best shaders come from experimentation and happy accidents."
"If you can't explain it clearly, you don't understand it yet."
"A good debug tool saves 100 hours of guessing."
#include <metal_stdlib>
using namespace metal;
struct MaterialProperties {
float3 albedo;
float metallic;
float roughness;
float ao; // Ambient occlusion
float3 emission;
};
struct Light {
float3 position;
float3 color;
float intensity;
};
// Fresnel-Schlick approximation
float3 fresnel_schlick(float cos_theta, float3 F0) {
return F0 + (1.0 - F0) * pow(1.0 - cos_theta, 5.0);
}
// GGX/Trowbridge-Reitz normal distribution
float distribution_ggx(float3 N, float3 H, float roughness) {
float a = roughness * roughness;
float a2 = a * a;
float NdotH = max(dot(N, H), 0.0);
float NdotH2 = NdotH * NdotH;
float denom = (NdotH2 * (a2 - 1.0) + 1.0);
denom = M_PI_F * denom * denom;
return a2 / denom;
}
// Smith's Schlick-GGX geometry function
float geometry_schlick_ggx(float NdotV, float roughness) {
float r = (roughness + 1.0);
float k = (r * r) / 8.0;
return NdotV / (NdotV * (1.0 - k) + k);
}
float geometry_smith(float3 N, float3 V, float3 L, float roughness) {
float NdotV = max(dot(N, V), 0.0);
float NdotL = max(dot(N, L), 0.0);
float ggx1 = geometry_schlick_ggx(NdotV, roughness);
float ggx2 = geometry_schlick_ggx(NdotL, roughness);
return ggx1 * ggx2;
}
// PBR lighting calculation
float3 calculate_pbr_lighting(
float3 world_pos,
float3 normal,
float3 view_dir,
MaterialProperties material,
Light light
) {
// Calculate light direction
float3 light_dir = normalize(light.position - world_pos);
float3 halfway = normalize(view_dir + light_dir);
// Distance attenuation
float distance = length(light.position - world_pos);
float attenuation = 1.0 / (distance * distance);
float3 radiance = light.color * light.intensity * attenuation;
// Cook-Torrance BRDF
float3 F0 = mix(float3(0.04), material.albedo, material.metallic);
float3 F = fresnel_schlick(max(dot(halfway, view_dir), 0.0), F0);
float NDF = distribution_ggx(normal, halfway, material.roughness);
float G = geometry_smith(normal, view_dir, light_dir, material.roughness);
float3 numerator = NDF * G * F;
float denominator = 4.0 * max(dot(normal, view_dir), 0.0) *
max(dot(normal, light_dir), 0.0) + 0.0001;
float3 specular = numerator / denominator;
// Energy conservation
float3 kS = F;
float3 kD = (1.0 - kS) * (1.0 - material.metallic);
float NdotL = max(dot(normal, light_dir), 0.0);
return (kD * material.albedo / M_PI_F + specular) * radiance * NdotL;
}
fragment float4 pbr_fragment(
VertexOut in [[stage_in]],
constant MaterialProperties& material [[buffer(0)]],
constant Light* lights [[buffer(1)]],
constant uint& light_count [[buffer(2)]],
constant float3& camera_pos [[buffer(3)]]
) {
float3 normal = normalize(in.world_normal);
float3 view_dir = normalize(camera_pos - in.world_position);
// Accumulate lighting from all lights
float3 Lo = float3(0.0);
for (uint i = 0; i < light_count; i++) {
Lo += calculate_pbr_lighting(
in.world_position,
normal,
view_dir,
material,
lights[i]
);
}
// Ambient lighting (simplified IBL)
float3 ambient = float3(0.03) * material.albedo * material.ao;
float3 color = ambient + Lo + material.emission;
// HDR tone mapping (Reinhard)
color = color / (color + float3(1.0));
// Gamma correction
color = pow(color, float3(1.0/2.2));
return float4(color, 1.0);
}
// Smooth noise for organic effects
float hash(float2 p) {
// Simple 2D hash function
p = fract(p * float2(234.34, 435.345));
p += dot(p, p + 34.23);
return fract(p.x * p.y);
}
float smooth_noise(float2 uv) {
float2 i = floor(uv);
float2 f = fract(uv);
// Smooth interpolation (smoothstep)
f = f * f * (3.0 - 2.0 * f);
// Four corners of grid
float a = hash(i);
float b = hash(i + float2(1.0, 0.0));
float c = hash(i + float2(0.0, 1.0));
float d = hash(i + float2(1.0, 1.0));
// Bilinear interpolation
return mix(mix(a, b, f.x),
mix(c, d, f.x), f.y);
}
// Fractal Brownian Motion - creates organic, natural-looking patterns
float fbm(float2 uv, int octaves) {
float value = 0.0;
float amplitude = 0.5;
float frequency = 2.0;
for (int i = 0; i < octaves; i++) {
value += amplitude * smooth_noise(uv * frequency);
amplitude *= 0.5;
frequency *= 2.0;
}
return value;
}
// Playful: Animated flowing marble effect
fragment float4 flowing_marble_fragment(
VertexOut in [[stage_in]],
constant float& time [[buffer(0)]]
) {
float2 uv = in.texcoord * 5.0;
// Create flowing pattern
float2 flow = float2(
fbm(uv + time * 0.1, 4),
fbm(uv + time * 0.15 + 100.0, 4)
);
// Distort UV with flow
uv += flow * 2.0;
// Create marble veins
float marble = fbm(uv, 6);
marble = abs(sin(marble * 10.0 + time * 0.5));
// Color gradient (purple to gold, very Pixar-esque)
float3 color1 = float3(0.4, 0.1, 0.7); // Purple
float3 color2 = float3(1.0, 0.7, 0.2); // Gold
float3 color = mix(color1, color2, marble);
// Add some shimmer
float shimmer = fbm(uv * 10.0 + time, 3) * 0.3;
color += shimmer;
return float4(color, 1.0);
}
// Heat map for scalar visualization (0=blue, 1=red)
float3 heat_map(float v) {
v = saturate(v);
return v < 0.5
? mix(float3(0,0,1), float3(0,1,0), v*2)
: mix(float3(0,1,0), float3(1,0,0), (v-0.5)*2);
}
// Debug modes: normals, UVs, depth
fragment float4 debug_fragment(VertexOut in [[stage_in]], constant uint& mode [[buffer(0)]]) {
if (mode == 0) return float4(in.world_normal * 0.5 + 0.5, 1.0); // Normals
if (mode == 1) return float4(in.texcoord, 0.0, 1.0); // UVs