Perlin Noise
After hammering at my math circuits while reading up the GPU Gems 2 article on Improved Perlin Noise, I came up with the following optimized version of their implementation using 3d textures. Note that this technique is not compatible with the sprite batch, at least I could not get the 3d textures to sample properly when trying to use a sprite batch.
This first part is CPU side pre-calculation of the 3d textures. These 3d textures remove a lot of dependent texture reads that existed in the GPU Gems article by pre-calculating factors up until the lerps. One could likely go even farther and optimize out the lerps entirely, but linear filtering on 3d textures is poorly supported on a wide range of hardware.
// generate an array containing 0-255 in random order
int[] values = Enumerable.Range(0, 256).OrderBy(a => Guid.NewGuid()).ToArray<int>();
// perform the pre-calculation step
Color[] pixels0 = new Color[256 * 256 * 256];
Color[] pixels1 = new Color[256 * 256 * 256];
for (int x = 0; x < 256; x++)
{
for (int y = 0; y < 256; y++)
{
for (int z = 0; z < 256; z++)
{
int index = x + (y * 256) + (z * 256 * 256);
int A = values[x] + y;
int B = values[A % 256] + z;
int C = values[(A + 1) % 256] + z;
int D = values[(x + 1) % 256] + y;
int E = values[D % 256] + z;
int F = values[(D + 1) % 256] + z;
pixels0[index] = new Color((float)values[B % 256] / 255f,
(float)values[E % 256] / 255f,
(float)values[C % 256] / 255f,
(float)values[F % 256] / 255f);
pixels1[index] = new Color((float)values[(B + 1) % 256] / 255f,
(float)values[(E + 1) % 256] / 255f,
(float)values[(C + 1) % 256] / 255f,
(float)values[(F + 1) % 256] / 255f);
}
}
}
m_permutationTexture0 = new Texture3D(GraphicsDevice, 256, 256, 256, 1, TextureUsage.Tiled, SurfaceFormat.Color);
m_permutationTexture0.SetData<Color>(pixels0);
m_permutationTexture1 = new Texture3D(GraphicsDevice, 256, 256, 256, 1, TextureUsage.Tiled, SurfaceFormat.Color);
m_permutationTexture1.SetData<Color>(pixels1);Now for loading off the gradient texture, which is a 16x1 2d texture:
NormalizedByte4[] gradients = new NormalizedByte4[16]
{
new NormalizedByte4(1, 1, 0, 0),
new NormalizedByte4(-1, 1, 0, 0),
new NormalizedByte4(1, -1, 0, 0),
new NormalizedByte4(-1, -1, 0, 0),
new NormalizedByte4(1, 0, 1, 0),
new NormalizedByte4(-1, 0, 1, 0),
new NormalizedByte4(1, 0, -1, 0),
new NormalizedByte4(-1, 0, -1, 0),
new NormalizedByte4(0, 1, 1, 0),
new NormalizedByte4(0, -1, 1, 0),
new NormalizedByte4(0, 1, -1, 0),
new NormalizedByte4(0, -1, -1, 0),
new NormalizedByte4(1, 1, 0, 0),
new NormalizedByte4(0, -1, 1, 0),
new NormalizedByte4(-1, 1, 0, 0),
new NormalizedByte4(0, -1, -1, 0)
};
m_gradientTexture = new Texture2D(GraphicsDevice, 16, 1, 1, TextureUsage.Tiled, SurfaceFormat.NormalizedByte4);
m_gradientTexture.SetData<NormalizedByte4>(gradients);That's all for the CPU side of things except for passing the textures in and rendering an appropriately sized quad which I will leave up to you. Now onto the shader code.
texture PermutationTexture0;
sampler PermutationSampler0 = sampler_state
{
Texture = <PermutationTexture0>;
MinFilter = Point; MagFilter = Point; MipFilter = None;
AddressU = Wrap; AddressV = Wrap; AddressW = Wrap;
};
texture PermutationTexture1;
sampler PermutationSampler1 = sampler_state
{
Texture = <PermutationTexture1>;
MinFilter = Point; MagFilter = Point; MipFilter = None;
AddressU = Wrap; AddressV = Wrap; AddressW = Wrap;
};
texture GradientTexture;
sampler GradientSampler = sampler_state
{
Texture = <GradientTexture>;
MinFilter = Point; MagFilter = Point; MipFilter = None;
AddressU = Wrap;
};
float3 fade(float3 t) { return t * t * t * (t * (t * 6 - 15) + 10); }
float gradient(float x, float3 p) { return dot(tex1D(GradientSampler, x), p); }
float inoise(float3 p)
{
float3 permCoord = fmod(floor(p), 256) / 256.0;
p -= floor(p);
float3 f = fade(p);
float3 fi = 1 - f;
float4 BECF0 = tex3D(PermutationSampler0, permCoord) * 256; // { B, E, C, F }
float4 BECF1 = tex3D(PermutationSampler1, permCoord) * 256; // { B+1, E+1, C+1, F+1 }
float4 a =
{
gradient(BECF0.x, p), // B
gradient(BECF0.z, p + float3(0, -1, 0)), // C
gradient(BECF1.x, p + float3(0, 0, -1)), // B + 1
gradient(BECF1.z, p + float3(0, -1, -1)) // C + 1
};
float4 b =
{
gradient(BECF0.y, p + float3(-1, 0, 0)), // E
gradient(BECF0.w, p + float3(-1, -1, 0)), // F
gradient(BECF1.y, p + float3(-1, 0, -1)), // E + 1
gradient(BECF1.w, p + float3(-1, -1, -1)) // F + 1
};
// the following is mathmatically equivalent to:
//lerp(
// lerp(lerp(a.x, b.x, f.x),
// lerp(a.y, b.y, f.x),
// f.y),
// lerp(lerp(a.z, b.z, f.x),
// lerp(a.w, b.w, f.x),
// f.y),
// f.z)
// but considerably cheaper to compute in real-time application
float4 m;
m.xz = fi.y; m.yw = f.y;
m.xy *= fi.z; m.zw *= f.z;
float4 c = a * fi.x * m + b * f.x * m;
return c.x + c.y + c.z + c.w;
}That's all there is to it, the result of inoise is a noise value between -1 and +1.
As an example, here is a shader which uses the above, with zPlane = minutes since game start. It creates a sort-of cloud texture which smoothly changes over time.
float zPlane = 0;
int Octaves = 12;
float4 PS_Sandbox(float2 texCoord : TEXCOORD0) : COLOR0
{
float nois = 0;
float persistance = 0.5;
for (int i = 0; i < Octaves; i++)
{
float amplitude = 1;
float frequency = 1;
if (i > 0)
{
frequency = pow(2, i + 1);
amplitude = pow(persistance, i);
}
float3 p = float3(texCoord, zPlane) * frequency;
nois += (inoise(p) * amplitude);
}
return nois * 0.5 + 0.5;
}