From d5a1aa45ca0eb2e464dde9082d3bffc415f7f8ea Mon Sep 17 00:00:00 2001 From: Francesco Date: Thu, 30 Apr 2026 15:22:09 +0200 Subject: [PATCH 01/32] Optimised image write to use SetData instead of calling SetPixel repeatedly --- src/microbe_stage/CompoundCloudPlane.cs | 37 ++++++++++++++++++++---- src/microbe_stage/CompoundCloudSystem.cs | 1 + 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index 2d6ee6a58d8..3ad4390bc57 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -3,6 +3,7 @@ #define CACHE_WORLD_COORDINATES using System; +using System.Buffers; using System.Collections.Generic; using System.Runtime.CompilerServices; using System.Threading; @@ -85,6 +86,11 @@ public partial class CompoundCloudPlane : MeshInstance3D, ISaveLoadedTracked, IA public bool IsLoadedFromSave { get; set; } + /// + /// This is used in data copy. + /// + public byte[]? TempBuffer { get; private set; } + public ushort CurrentArchiveVersion => SERIALIZATION_VERSION; public ArchiveObjectType ArchiveObjectType => (ArchiveObjectType)ThriveArchiveObjectType.CompoundCloudPlane; public bool CanBeReferencedInArchive => false; @@ -447,7 +453,12 @@ public void QueueAdvectCloud(float delta, List queue) /// public void QueueUpdateTextureImage(List queue) { - var planeChunkSize = PlaneSize / Constants.CLOUD_PLANE_SQUARES_PER_SIDE; + int planeChunkSize = PlaneSize / Constants.CLOUD_PLANE_SQUARES_PER_SIDE; + + int width = image!.GetWidth(); + int height = image.GetHeight(); + int size = width * height * 4; + TempBuffer = ArrayPool.Shared.Rent(size); for (var i = 0; i < Constants.CLOUD_PLANE_SQUARES_PER_SIDE; ++i) { @@ -458,7 +469,8 @@ public void QueueUpdateTextureImage(List queue) var y0 = j * planeChunkSize; // TODO: fix task allocations - var task = new Task(() => PartialUpdateTextureImage(x0, y0, planeChunkSize, planeChunkSize)); + var task = new Task(() => PartialUpdateTextureImage(x0, y0, planeChunkSize, planeChunkSize, + TempBuffer.AsSpan(0, size))); queue.Add(task); } } @@ -466,6 +478,11 @@ public void QueueUpdateTextureImage(List queue) public void UpdateTexture() { + int width = image!.GetWidth(); + int height = image.GetHeight(); + int size = width * height * 4; + + image!.SetData(width, height, false, image.GetFormat(), TempBuffer.AsSpan(0, size)); texture.Update(image); } @@ -883,6 +900,9 @@ protected override void Dispose(bool disposing) { if (image != null) { + if (TempBuffer is not null) + ArrayPool.Shared.Return(TempBuffer); + brightnessParameterName.Dispose(); uvOffsetParameterName.Dispose(); image.Dispose(); @@ -1104,14 +1124,21 @@ private void PartialAdvect(int x0, int y0, int size, float delta) } } - private void PartialUpdateTextureImage(int x0, int y0, int width, int height) + private void PartialUpdateTextureImage(int x0, int y0, int width, int height, Span bufferSpan) { + int imgWidth = image!.GetWidth(); + for (int x = x0; x < x0 + width; ++x) { for (int y = y0; y < y0 + height; ++y) { - var pixel = Density[x, y] * (1 / Constants.CLOUD_MAX_INTENSITY_SHOWN); - image!.SetPixel(x, y, new Color(pixel.X, pixel.Y, pixel.Z, pixel.W)); + var pixel = Density[x, y] * 1.0f / Constants.CLOUD_MAX_INTENSITY_SHOWN; + int offset = (y * imgWidth + x) * 4; + + bufferSpan[offset] = (byte)(Math.Clamp(pixel.X, 0, 1) * 255); + bufferSpan[offset + 1] = (byte)(Math.Clamp(pixel.Y, 0, 1) * 255); + bufferSpan[offset + 2] = (byte)(Math.Clamp(pixel.Z, 0, 1) * 255); + bufferSpan[offset + 3] = (byte)(Math.Clamp(pixel.W, 0, 1) * 255); } } } diff --git a/src/microbe_stage/CompoundCloudSystem.cs b/src/microbe_stage/CompoundCloudSystem.cs index 80a66eaadec..440c6e60814 100644 --- a/src/microbe_stage/CompoundCloudSystem.cs +++ b/src/microbe_stage/CompoundCloudSystem.cs @@ -1,4 +1,5 @@ using System; +using System.Buffers; using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; using System.Threading.Tasks; From a2bf367a92da5fb15ba79854d21a91d8183b7b38 Mon Sep 17 00:00:00 2001 From: Francesco Date: Thu, 30 Apr 2026 15:29:50 +0200 Subject: [PATCH 02/32] Fixed memory leak --- src/microbe_stage/CompoundCloudPlane.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index 3ad4390bc57..dd0da753630 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -458,7 +458,6 @@ public void QueueUpdateTextureImage(List queue) int width = image!.GetWidth(); int height = image.GetHeight(); int size = width * height * 4; - TempBuffer = ArrayPool.Shared.Rent(size); for (var i = 0; i < Constants.CLOUD_PLANE_SQUARES_PER_SIDE; ++i) { @@ -1190,6 +1189,8 @@ private int GetCompoundIndex(Compound compound) private void CreateDensityTexture() { + TempBuffer = ArrayPool.Shared.Rent(PlaneSize * PlaneSize * 4); + image = Image.CreateEmpty(PlaneSize, PlaneSize, false, Image.Format.Rgba8); texture = ImageTexture.CreateFromImage(image); From 12f08aa7fef17b09c37f10b2159c810349ff22d2 Mon Sep 17 00:00:00 2001 From: Francesco Date: Thu, 30 Apr 2026 15:35:15 +0200 Subject: [PATCH 03/32] Switched to resizing the buffer --- src/microbe_stage/CompoundCloudPlane.cs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index dd0da753630..fe64384bd50 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -8,6 +8,7 @@ using System.Runtime.CompilerServices; using System.Threading; using System.Threading.Tasks; +using CommunityToolkit.HighPerformance; using Godot; using SharedBase.Archive; using Systems; @@ -70,6 +71,8 @@ public partial class CompoundCloudPlane : MeshInstance3D, ISaveLoadedTracked, IA private Vector4 decayRates; + private byte[]? tempBuffer; + /// /// Which square plane player is in /// @@ -89,7 +92,7 @@ public partial class CompoundCloudPlane : MeshInstance3D, ISaveLoadedTracked, IA /// /// This is used in data copy. /// - public byte[]? TempBuffer { get; private set; } + public byte[]? TempBuffer => tempBuffer; public ushort CurrentArchiveVersion => SERIALIZATION_VERSION; public ArchiveObjectType ArchiveObjectType => (ArchiveObjectType)ThriveArchiveObjectType.CompoundCloudPlane; @@ -469,7 +472,7 @@ public void QueueUpdateTextureImage(List queue) // TODO: fix task allocations var task = new Task(() => PartialUpdateTextureImage(x0, y0, planeChunkSize, planeChunkSize, - TempBuffer.AsSpan(0, size))); + tempBuffer.AsSpan(0, size))); queue.Add(task); } } @@ -481,7 +484,7 @@ public void UpdateTexture() int height = image.GetHeight(); int size = width * height * 4; - image!.SetData(width, height, false, image.GetFormat(), TempBuffer.AsSpan(0, size)); + image!.SetData(width, height, false, image.GetFormat(), tempBuffer.AsSpan(0, size)); texture.Update(image); } @@ -899,8 +902,8 @@ protected override void Dispose(bool disposing) { if (image != null) { - if (TempBuffer is not null) - ArrayPool.Shared.Return(TempBuffer); + if (tempBuffer is not null) + ArrayPool.Shared.Return(tempBuffer); brightnessParameterName.Dispose(); uvOffsetParameterName.Dispose(); @@ -1189,7 +1192,7 @@ private int GetCompoundIndex(Compound compound) private void CreateDensityTexture() { - TempBuffer = ArrayPool.Shared.Rent(PlaneSize * PlaneSize * 4); + ArrayPool.Shared.Resize(ref tempBuffer, PlaneSize * PlaneSize * 4); image = Image.CreateEmpty(PlaneSize, PlaneSize, false, Image.Format.Rgba8); texture = ImageTexture.CreateFromImage(image); From 287b3a7aa025e60e998a74ce3a666c0525f36e21 Mon Sep 17 00:00:00 2001 From: Francesco Date: Thu, 30 Apr 2026 16:31:10 +0200 Subject: [PATCH 04/32] Removed pooling --- src/microbe_stage/CompoundCloudPlane.cs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index fe64384bd50..b7b10bd06e2 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -3,12 +3,10 @@ #define CACHE_WORLD_COORDINATES using System; -using System.Buffers; using System.Collections.Generic; using System.Runtime.CompilerServices; using System.Threading; using System.Threading.Tasks; -using CommunityToolkit.HighPerformance; using Godot; using SharedBase.Archive; using Systems; @@ -902,9 +900,6 @@ protected override void Dispose(bool disposing) { if (image != null) { - if (tempBuffer is not null) - ArrayPool.Shared.Return(tempBuffer); - brightnessParameterName.Dispose(); uvOffsetParameterName.Dispose(); image.Dispose(); @@ -1192,7 +1187,11 @@ private int GetCompoundIndex(Compound compound) private void CreateDensityTexture() { - ArrayPool.Shared.Resize(ref tempBuffer, PlaneSize * PlaneSize * 4); + int requestedSize = PlaneSize * PlaneSize * 4; + if (tempBuffer is null || requestedSize > tempBuffer.Length) + { + tempBuffer = new byte[requestedSize]; + } image = Image.CreateEmpty(PlaneSize, PlaneSize, false, Image.Format.Rgba8); texture = ImageTexture.CreateFromImage(image); From cc02bebd7697ffcfe7e78d21a76a4eea89da4268 Mon Sep 17 00:00:00 2001 From: Francesco Date: Fri, 1 May 2026 15:59:44 +0200 Subject: [PATCH 05/32] Changed how the clouds are sliced, flattened arrays and used SIMD --- src/microbe_stage/CompoundCloudPlane.cs | 440 +++++++++++++---------- src/microbe_stage/CompoundCloudSystem.cs | 1 - 2 files changed, 259 insertions(+), 182 deletions(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index b7b10bd06e2..23cfd4a8ddb 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -5,6 +5,9 @@ using System; using System.Collections.Generic; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; using System.Threading; using System.Threading.Tasks; using Godot; @@ -34,9 +37,9 @@ public partial class CompoundCloudPlane : MeshInstance3D, ISaveLoadedTracked, IA /// Because this is such a high-priority system, this uses a bit more happily null suppressing than elsewhere /// /// - public Vector4[,] Density = null!; + public Vector4[] Density = null!; - public Vector4[,] OldDensity = null!; + public Vector4[] OldDensity = null!; public Compound[] Compounds = null!; @@ -69,7 +72,7 @@ public partial class CompoundCloudPlane : MeshInstance3D, ISaveLoadedTracked, IA private Vector4 decayRates; - private byte[]? tempBuffer; + private byte[] tempBuffer = null!; /// /// Which square plane player is in @@ -90,7 +93,7 @@ public partial class CompoundCloudPlane : MeshInstance3D, ISaveLoadedTracked, IA /// /// This is used in data copy. /// - public byte[]? TempBuffer => tempBuffer; + public byte[] TempBuffer => tempBuffer; public ushort CurrentArchiveVersion => SERIALIZATION_VERSION; public ArchiveObjectType ArchiveObjectType => (ArchiveObjectType)ThriveArchiveObjectType.CompoundCloudPlane; @@ -134,7 +137,7 @@ public static CompoundCloudPlane ReadFromArchive(ISArchiveReader reader, ushort int dimensions = instance.PlaneSize; - var target = new Vector4[dimensions, dimensions]; + var target = new Vector4[dimensions * dimensions]; for (int x = 0; x < dimensions; ++x) { @@ -163,7 +166,7 @@ public static CompoundCloudPlane ReadFromArchive(ISArchiveReader reader, ushort (uint)buffer[bufferReadOffset++] << 16 | (uint)buffer[bufferReadOffset++] << 24; vector4.W = BitConverter.UInt32BitsToSingle(data); - target[x, y] = vector4; + target[x + y * dimensions] = vector4; } if (bufferReadOffset != buffer.Length) @@ -213,7 +216,7 @@ public void WriteToArchive(ISArchiveWriter writer) // Convert data into the buffer for (int y = 0; y < dimensions; ++y) { - var vector4 = localDensity[x, y]; + var vector4 = localDensity[x + y * dimensions]; var data = BitConverter.SingleToUInt32Bits(vector4.X); @@ -261,8 +264,8 @@ public override void _Ready() CloudResolution = Settings.Instance.CloudResolution; CreateDensityTexture(); - Density = new Vector4[PlaneSize, PlaneSize]; - OldDensity = new Vector4[PlaneSize, PlaneSize]; + Density = new Vector4[PlaneSize * PlaneSize]; + OldDensity = new Vector4[PlaneSize * PlaneSize]; ClearContents(); } else @@ -272,7 +275,7 @@ public override void _Ready() // without starting a new save CreateDensityTexture(); - OldDensity = new Vector4[PlaneSize, PlaneSize]; + OldDensity = new Vector4[PlaneSize * PlaneSize]; SetMaterialUVForPosition(); } @@ -375,54 +378,53 @@ public void UpdatePosition(Vector2I newPosition) /// Updates the edge concentrations of this cloud before the rest of the cloud. /// This is not run in parallel. /// - public void DiffuseEdges(float delta) + public void DiffuseEdges(float deltaTime) { - // Increase diffusion effect - delta *= 100.0f; + deltaTime *= 100.0f; + int planeSize = PlaneSize; int edgeWidth = Constants.CLOUD_PLANE_EDGE_WIDTH; int halfEdgeWidth = edgeWidth / 2; - int planeChunkSize = PlaneSize / Constants.CLOUD_PLANE_SQUARES_PER_SIDE; + int squaresPerSide = Constants.CLOUD_PLANE_SQUARES_PER_SIDE; + int planeChunkSize = planeSize / squaresPerSide; - // Vertical edge columns - PartialDiffuse(0, 0, halfEdgeWidth, PlaneSize, delta); - PartialDiffuse(1 * planeChunkSize - halfEdgeWidth, 0, edgeWidth, PlaneSize, delta); - PartialDiffuse(2 * planeChunkSize - halfEdgeWidth, 0, edgeWidth, PlaneSize, delta); - PartialDiffuse(3 * planeChunkSize - halfEdgeWidth, 0, halfEdgeWidth, PlaneSize, delta); + for (int column = 0; column <= squaresPerSide; column++) + { + int boundaryCenter = column * planeChunkSize; + int horizontalStart = Math.Max(0, boundaryCenter - halfEdgeWidth); + int horizontalEnd = Math.Min(planeSize, boundaryCenter + halfEdgeWidth); + + AreaDiffuse(horizontalStart, horizontalEnd, 0, planeSize, deltaTime); + } - // Horizontal edge rows - for (int square = 0; square < Constants.CLOUD_PLANE_SQUARES_PER_SIDE; ++square) + for (int square = 0; square < squaresPerSide; square++) { - int x = square * planeChunkSize + halfEdgeWidth; - int width = planeChunkSize - edgeWidth; + int horizontalStart = square * planeChunkSize + halfEdgeWidth; + int horizontalEnd = (square + 1) * planeChunkSize - halfEdgeWidth; - PartialDiffuse(x, 3 * planeChunkSize - halfEdgeWidth, width, halfEdgeWidth, delta); - PartialDiffuse(x, 2 * planeChunkSize - halfEdgeWidth, width, edgeWidth, delta); - PartialDiffuse(x, 1 * planeChunkSize - halfEdgeWidth, width, edgeWidth, delta); - PartialDiffuse(x, 0, width, halfEdgeWidth, delta); + for (int row = 0; row <= squaresPerSide; row++) + { + int boundaryCenter = row * planeChunkSize; + int verticalStart = Math.Max(0, boundaryCenter - halfEdgeWidth); + int verticalEnd = Math.Min(planeSize, boundaryCenter + halfEdgeWidth); + + AreaDiffuse(horizontalStart, horizontalEnd, verticalStart, verticalEnd, deltaTime); + } } } /// /// Updates the cloud in parallel. /// - public void QueueDiffuseCloud(float delta, List queue) + public void QueueDiffuseCloud(float deltaTime, List queue) { - delta *= 100.0f; - var planeChunkSize = PlaneSize / Constants.CLOUD_PLANE_SQUARES_PER_SIDE; + deltaTime *= 100.0f; + int slices = Constants.CLOUD_PLANE_SQUARES_PER_SIDE * Constants.CLOUD_PLANE_SQUARES_PER_SIDE; - for (var i = 0; i < Constants.CLOUD_PLANE_SQUARES_PER_SIDE; ++i) + for (int slice = 0; slice < slices; slice++) { - var x0 = i * planeChunkSize; - - for (var j = 0; j < Constants.CLOUD_PLANE_SQUARES_PER_SIDE; ++j) - { - var y0 = j * planeChunkSize; - - // TODO: fix task allocations - var task = new Task(() => PartialDiffuseCenter(x0, y0, planeChunkSize, delta)); - queue.Add(task); - } + int atSlice = slice; + queue.Add(new Task(() => PartialDiffuse(atSlice, slices, deltaTime))); } } @@ -432,20 +434,16 @@ public void QueueDiffuseCloud(float delta, List queue) public void QueueAdvectCloud(float delta, List queue) { delta *= 100.0f; - var planeChunkSize = PlaneSize / Constants.CLOUD_PLANE_SQUARES_PER_SIDE; - for (var i = 0; i < Constants.CLOUD_PLANE_SQUARES_PER_SIDE; ++i) - { - var x0 = i * planeChunkSize; + int slices = Constants.CLOUD_PLANE_SQUARES_PER_SIDE * Constants.CLOUD_PLANE_SQUARES_PER_SIDE; - for (var j = 0; j < Constants.CLOUD_PLANE_SQUARES_PER_SIDE; ++j) - { - var y0 = j * planeChunkSize; + for (int slice = 0; slice < slices; ++slice) + { + int atSlice = slice; - // TODO: fix task allocations - var task = new Task(() => PartialAdvect(x0, y0, planeChunkSize, delta)); - queue.Add(task); - } + // TODO: fix task allocations + var task = new Task(() => PartialAdvect(atSlice, slices, delta)); + queue.Add(task); } } @@ -454,25 +452,12 @@ public void QueueAdvectCloud(float delta, List queue) /// public void QueueUpdateTextureImage(List queue) { - int planeChunkSize = PlaneSize / Constants.CLOUD_PLANE_SQUARES_PER_SIDE; - - int width = image!.GetWidth(); - int height = image.GetHeight(); - int size = width * height * 4; + int slices = Constants.CLOUD_PLANE_SQUARES_PER_SIDE * Constants.CLOUD_PLANE_SQUARES_PER_SIDE; - for (var i = 0; i < Constants.CLOUD_PLANE_SQUARES_PER_SIDE; ++i) + for (int slice = 0; slice < slices; ++slice) { - var x0 = i * planeChunkSize; - - for (var j = 0; j < Constants.CLOUD_PLANE_SQUARES_PER_SIDE; ++j) - { - var y0 = j * planeChunkSize; - - // TODO: fix task allocations - var task = new Task(() => PartialUpdateTextureImage(x0, y0, planeChunkSize, planeChunkSize, - tempBuffer.AsSpan(0, size))); - queue.Add(task); - } + int atSlice = slice; + queue.Add(new Task(() => PartialUpdateTextureImage(atSlice, slices))); } } @@ -515,11 +500,11 @@ public void AddCloudInterlocked(Compound compound, int x, int y, float density) { do { - seenCurrentAmount = Density[x, y].X; + seenCurrentAmount = Density[x + y * PlaneSize].X; newValue = seenCurrentAmount + density; } - while (Interlocked.CompareExchange(ref Density[x, y].X, newValue, seenCurrentAmount) != - seenCurrentAmount); + while (Interlocked.CompareExchange(ref Density[x + y * PlaneSize].X, newValue, + seenCurrentAmount) != seenCurrentAmount); break; } @@ -528,11 +513,11 @@ public void AddCloudInterlocked(Compound compound, int x, int y, float density) { do { - seenCurrentAmount = Density[x, y].Y; + seenCurrentAmount = Density[x + y * PlaneSize].Y; newValue = seenCurrentAmount + density; } - while (Interlocked.CompareExchange(ref Density[x, y].Y, newValue, seenCurrentAmount) != - seenCurrentAmount); + while (Interlocked.CompareExchange(ref Density[x + y * PlaneSize].Y, newValue, + seenCurrentAmount) != seenCurrentAmount); break; } @@ -541,11 +526,11 @@ public void AddCloudInterlocked(Compound compound, int x, int y, float density) { do { - seenCurrentAmount = Density[x, y].Z; + seenCurrentAmount = Density[x + y * PlaneSize].Z; newValue = seenCurrentAmount + density; } - while (Interlocked.CompareExchange(ref Density[x, y].Z, newValue, seenCurrentAmount) != - seenCurrentAmount); + while (Interlocked.CompareExchange(ref Density[x + y * PlaneSize].Z, newValue, + seenCurrentAmount) != seenCurrentAmount); break; } @@ -554,11 +539,11 @@ public void AddCloudInterlocked(Compound compound, int x, int y, float density) { do { - seenCurrentAmount = Density[x, y].W; + seenCurrentAmount = Density[x + y * PlaneSize].W; newValue = seenCurrentAmount + density; } - while (Interlocked.CompareExchange(ref Density[x, y].W, newValue, seenCurrentAmount) != - seenCurrentAmount); + while (Interlocked.CompareExchange(ref Density[x + y * PlaneSize].W, newValue, + seenCurrentAmount) != seenCurrentAmount); break; } @@ -589,11 +574,11 @@ public bool AddCloudInterlockedIfHandlesType(Compound compound, int x, int y, fl { do { - seenCurrentAmount = Density[x, y].X; + seenCurrentAmount = Density[x + y * PlaneSize].X; newValue = seenCurrentAmount + density; } - while (Interlocked.CompareExchange(ref Density[x, y].X, newValue, seenCurrentAmount) != - seenCurrentAmount); + while (Interlocked.CompareExchange(ref Density[x + y * PlaneSize].X, newValue, + seenCurrentAmount) != seenCurrentAmount); return true; } @@ -602,11 +587,11 @@ public bool AddCloudInterlockedIfHandlesType(Compound compound, int x, int y, fl { do { - seenCurrentAmount = Density[x, y].Y; + seenCurrentAmount = Density[x + y * PlaneSize].Y; newValue = seenCurrentAmount + density; } - while (Interlocked.CompareExchange(ref Density[x, y].Y, newValue, seenCurrentAmount) != - seenCurrentAmount); + while (Interlocked.CompareExchange(ref Density[x + y * PlaneSize].Y, newValue, + seenCurrentAmount) != seenCurrentAmount); return true; } @@ -615,11 +600,11 @@ public bool AddCloudInterlockedIfHandlesType(Compound compound, int x, int y, fl { do { - seenCurrentAmount = Density[x, y].Z; + seenCurrentAmount = Density[x + y * PlaneSize].Z; newValue = seenCurrentAmount + density; } - while (Interlocked.CompareExchange(ref Density[x, y].Z, newValue, seenCurrentAmount) != - seenCurrentAmount); + while (Interlocked.CompareExchange(ref Density[x + y * PlaneSize].Z, newValue, + seenCurrentAmount) != seenCurrentAmount); return true; } @@ -628,11 +613,11 @@ public bool AddCloudInterlockedIfHandlesType(Compound compound, int x, int y, fl { do { - seenCurrentAmount = Density[x, y].W; + seenCurrentAmount = Density[x + y * PlaneSize].W; newValue = seenCurrentAmount + density; } - while (Interlocked.CompareExchange(ref Density[x, y].W, newValue, seenCurrentAmount) != - seenCurrentAmount); + while (Interlocked.CompareExchange(ref Density[x + y * PlaneSize].W, newValue, + seenCurrentAmount) != seenCurrentAmount); return true; } @@ -650,17 +635,17 @@ public bool AddCloudInterlockedIfHandlesType(Compound compound, int x, int y, fl /// The amount of compound taken public float TakeCompound(Compound compound, int x, int y, float fraction = 1.0f) { - float amountInCloud = HackyAddress(ref Density[x, y], GetCompoundIndex(compound)); + float amountInCloud = HackyAddress(ref Density[x + y * PlaneSize], GetCompoundIndex(compound)); var amountToGive = amountInCloud * fraction; if (amountInCloud - amountToGive < 0.1f) { // Taking basically everything in the cloud - Density[x, y] += CalculateCloudToAdd(compound, -amountInCloud); + Density[x + y * PlaneSize] += CalculateCloudToAdd(compound, -amountInCloud); } else { - Density[x, y] += CalculateCloudToAdd(compound, -amountToGive); + Density[x + y * PlaneSize] += CalculateCloudToAdd(compound, -amountToGive); } return amountToGive; @@ -694,17 +679,17 @@ public bool TakeCompoundInterlocked(int compoundIndex, int x, int y, float fract switch (compoundIndex) { case 0: - return Interlocked.CompareExchange(ref Density[x, y].X, newValue, seenCurrentAmount) == - seenCurrentAmount; + return Interlocked.CompareExchange(ref Density[x + y * PlaneSize].X, newValue, + seenCurrentAmount) == seenCurrentAmount; case 1: - return Interlocked.CompareExchange(ref Density[x, y].Y, newValue, seenCurrentAmount) == - seenCurrentAmount; + return Interlocked.CompareExchange(ref Density[x + y * PlaneSize].Y, newValue, + seenCurrentAmount) == seenCurrentAmount; case 2: - return Interlocked.CompareExchange(ref Density[x, y].Z, newValue, seenCurrentAmount) == - seenCurrentAmount; + return Interlocked.CompareExchange(ref Density[x + y * PlaneSize].Z, newValue, + seenCurrentAmount) == seenCurrentAmount; case 3: - return Interlocked.CompareExchange(ref Density[x, y].W, newValue, seenCurrentAmount) == - seenCurrentAmount; + return Interlocked.CompareExchange(ref Density[x + y * PlaneSize].W, newValue, + seenCurrentAmount) == seenCurrentAmount; default: throw new ArgumentException("Compound index out of range"); } @@ -718,7 +703,7 @@ public bool TakeCompoundInterlocked(int compoundIndex, int x, int y, float fract /// The amount available for taking public float AmountAvailable(Compound compound, int x, int y, float fraction = 1.0f) { - float amountInCloud = HackyAddress(ref Density[x, y], GetCompoundIndex(compound)); + float amountInCloud = HackyAddress(ref Density[x + y * PlaneSize], GetCompoundIndex(compound)); float amountToGive = amountInCloud * fraction; return amountToGive; } @@ -737,7 +722,7 @@ public void GetCompoundsAt(int x, int y, Dictionary result, boo if (onlyAbsorbable && !compoundDefinitions[i]!.IsAbsorbable) continue; - float amount = HackyAddress(ref Density[x, y], i); + float amount = HackyAddress(ref Density[x + y * PlaneSize], i); if (amount > 0) result[compound] = amount; } @@ -832,7 +817,7 @@ public void AbsorbCompounds(int localX, int localY, CompoundBag storage, while (true) { // Overestimate of how many compounds we get - float cloudAmount = HackyAddress(ref Density[localX, localY], i); + float cloudAmount = HackyAddress(ref Density[localX + localY * PlaneSize], i); float generousAmount = cloudAmount * Constants.SKIP_TRYING_TO_ABSORB_RATIO; // Skip if there isn't enough to absorb @@ -947,29 +932,97 @@ private Vector4 CalculateCloudToAdd(Compound compound, float density) Compounds[3] == compound ? density : 0.0f); } - private void PartialDiffuse(int x0, int y0, int width, int height, float delta) + private void PartialDiffuse(int slice, int slices, float delta) { - float a = delta * Constants.CLOUD_DIFFUSION_RATE; - var cellMultiplier = a * 0.25f; - var planeSize = PlaneSize; + int planeSize = PlaneSize; + int horizontalStart = slice * planeSize / slices; + int horizontalEnd = (slice + 1) * planeSize / slices; - for (int x = x0; x < x0 + width; ++x) + float diffusionAmount = delta * Constants.CLOUD_DIFFUSION_RATE; + float neighborWeight = diffusionAmount * 0.25f; + float centerWeight = 1.0f - diffusionAmount; + + var sourceDensity = Density.AsSpan(); + var destinationDensity = OldDensity.AsSpan(); + + for (int horizontalIndex = horizontalStart; horizontalIndex < horizontalEnd; horizontalIndex++) { - var xMinus = x == 0 ? planeSize - 1 : x - 1; - var xPlus = x == planeSize - 1 ? 0 : x + 1; + int currentRowOffset = horizontalIndex * planeSize; + int previousRowOffset = (horizontalIndex == 0 ? planeSize - 1 : horizontalIndex - 1) * planeSize; + int nextRowOffset = (horizontalIndex == planeSize - 1 ? 0 : horizontalIndex + 1) * planeSize; + + int firstIndex = currentRowOffset; + destinationDensity[firstIndex] = sourceDensity[firstIndex] * centerWeight + + (sourceDensity[currentRowOffset + (planeSize - 1)] + sourceDensity[currentRowOffset + 1] + + sourceDensity[previousRowOffset] + sourceDensity[nextRowOffset]) * neighborWeight; + + int verticalIndex = 1; + int safeLimit = planeSize - 1; + for (; verticalIndex < safeLimit; verticalIndex++) + { + int currentIndex = currentRowOffset + verticalIndex; + destinationDensity[currentIndex] = sourceDensity[currentIndex] * centerWeight + + (sourceDensity[currentIndex - 1] + sourceDensity[currentIndex + 1] + + sourceDensity[previousRowOffset + verticalIndex] + sourceDensity[nextRowOffset + verticalIndex]) + * neighborWeight; + } - for (int y = y0; y < y0 + height; ++y) + if (verticalIndex < planeSize) { - var yMinus = y == 0 ? planeSize - 1 : y - 1; - var yPlus = y == planeSize - 1 ? 0 : y + 1; - - OldDensity[x, y] = - Density[x, y] * (1 - a) + - ( - Density[x, yMinus] + - Density[x, yPlus] + - Density[xMinus, y] + - Density[xPlus, y]) * cellMultiplier; + int lastIndex = currentRowOffset + verticalIndex; + destinationDensity[lastIndex] = sourceDensity[lastIndex] * centerWeight + + (sourceDensity[lastIndex - 1] + sourceDensity[currentRowOffset] + + sourceDensity[previousRowOffset + verticalIndex] + sourceDensity[nextRowOffset + verticalIndex]) + * neighborWeight; + } + } + } + + private void AreaDiffuse(int horizontalStart, int horizontalEnd, int verticalStart, int verticalEnd, + float delta) + { + int planeSize = PlaneSize; + float diffusionAmount = delta * Constants.CLOUD_DIFFUSION_RATE; + float neighborWeight = diffusionAmount * 0.25f; + float centerWeight = 1.0f - diffusionAmount; + + var sourceDensity = Density.AsSpan(); + var destinationDensity = OldDensity.AsSpan(); + + for (int horizontalIndex = horizontalStart; horizontalIndex < horizontalEnd; horizontalIndex++) + { + int currentRowOffset = horizontalIndex * planeSize; + int previousRowOffset = (horizontalIndex == 0 ? planeSize - 1 : horizontalIndex - 1) * planeSize; + int nextRowOffset = (horizontalIndex == planeSize - 1 ? 0 : horizontalIndex + 1) * planeSize; + + int verticalIndex = verticalStart; + + if (verticalIndex == 0 && verticalIndex < verticalEnd) + { + int currentIndex = currentRowOffset + 0; + destinationDensity[currentIndex] = sourceDensity[currentIndex] * centerWeight + + (sourceDensity[currentRowOffset + (planeSize - 1)] + sourceDensity[currentRowOffset + 1] + + sourceDensity[previousRowOffset] + sourceDensity[nextRowOffset]) * neighborWeight; + verticalIndex++; + } + + int safeLimit = Math.Min(verticalEnd, planeSize - 1); + for (; verticalIndex < safeLimit; verticalIndex++) + { + int currentIndex = currentRowOffset + verticalIndex; + destinationDensity[currentIndex] = sourceDensity[currentIndex] * centerWeight + + (sourceDensity[currentIndex - 1] + sourceDensity[currentIndex + 1] + + sourceDensity[previousRowOffset + verticalIndex] + sourceDensity[nextRowOffset + verticalIndex]) + * neighborWeight; + } + + if (verticalIndex == planeSize - 1 && verticalIndex < verticalEnd) + { + int currentIndex = currentRowOffset + verticalIndex; + destinationDensity[currentIndex] = sourceDensity[currentIndex] * centerWeight + + (sourceDensity[currentIndex - 1] + sourceDensity[currentRowOffset] + + sourceDensity[previousRowOffset + verticalIndex] + sourceDensity[nextRowOffset + verticalIndex]) + * neighborWeight; } } } @@ -1066,97 +1119,122 @@ private int GetEdgeShift(int coord, int playerPos) return 0; } - private void PartialAdvect(int x0, int y0, int size, float delta) + private void PartialAdvect(int slice, int slices, float delta) { - var resolution = CloudResolution; - var worldPos = GetWorldPositionForAdvection(x0, y0); + int planeSize = PlaneSize; + int horizontalStart = slice * planeSize / slices; + int horizontalEnd = (slice + 1) * planeSize / slices; - for (int x = x0; x < x0 + size; ++x) - { - var worldX = worldPos.X + x * resolution; + float resolution = CloudResolution; + Vector2 worldPositionBase = GetWorldPositionForAdvection(horizontalStart, 0); - for (int y = y0; y < y0 + size; ++y) - { - var worldY = worldPos.Y + y * resolution; + var oldDensitySpan = OldDensity.AsSpan(); + var densitySpan = Density.AsSpan(); - var oldDensity = OldDensity[x, y]; + for (int x = horizontalStart; x < horizontalEnd; x++) + { + int horizontalOffset = x * planeSize; + float worldX = worldPositionBase.X + (x - horizontalStart) * resolution; - // This is better for performance than checking length squared of oldDensity although - // might cause issues if for some reason density would end up with negative value - if (oldDensity.X + oldDensity.Y + oldDensity.Z + oldDensity.W < 1) + for (int y = 0; y < planeSize; y++) + { + Vector4 oldDensity = oldDensitySpan[horizontalOffset + y]; + if (oldDensity.X + oldDensity.Y + oldDensity.Z + oldDensity.W < 1.0f) continue; - var velocity = - fluidSystem!.VelocityAt(new Vector2(worldX, worldY)); + float worldY = worldPositionBase.Y + y * resolution; + Vector2 velocity = fluidSystem!.VelocityAt(new Vector2(worldX, worldY)); - if (MathF.Abs(velocity.X) + MathF.Abs(velocity.Y) < - Constants.CURRENT_COMPOUND_CLOUD_ADVECT_THRESHOLD) + if (MathF.Abs(velocity.X) + MathF.Abs(velocity.Y) < Constants.CURRENT_COMPOUND_CLOUD_ADVECT_THRESHOLD) { velocity = Vector2.Zero; } velocity *= VISCOSITY; - float dx = x + delta * velocity.X; - float dy = y + delta * velocity.Y; + float destinationX = x + delta * velocity.X; + float destinationY = y + delta * velocity.Y; - CalculateMovementFactors(dx, dy, - out int floorX, out int ceilX, out int floorY, out int ceilY, + CalculateMovementFactors(destinationX, destinationY, + out int floorX, out int ceilingX, out int floorY, out int ceilingY, out float weightRight, out float weightLeft, out float weightBottom, out float weightTop); - floorX = floorX.PositiveModulo(PlaneSize); - ceilX = ceilX.PositiveModulo(PlaneSize); - floorY = floorY.PositiveModulo(PlaneSize); - ceilY = ceilY.PositiveModulo(PlaneSize); - - var oldDensityDecayed = oldDensity * decayRates; - var oldDensityDecayedLeft = oldDensityDecayed * weightLeft; - var oldDensityDecayedRight = oldDensityDecayed * weightRight; - - Density[floorX, floorY] += oldDensityDecayedLeft * weightTop; - Density[floorX, ceilY] += oldDensityDecayedLeft * weightBottom; - Density[ceilX, floorY] += oldDensityDecayedRight * weightTop; - Density[ceilX, ceilY] += oldDensityDecayedRight * weightBottom; + if ((uint)floorX >= (uint)planeSize) + floorX = (floorX < 0) ? floorX + planeSize : floorX - planeSize; + if ((uint)ceilingX >= (uint)planeSize) + ceilingX = (ceilingX < 0) ? ceilingX + planeSize : ceilingX - planeSize; + if ((uint)floorY >= (uint)planeSize) + floorY = (floorY < 0) ? floorY + planeSize : floorY - planeSize; + if ((uint)ceilingY >= (uint)planeSize) + ceilingY = (ceilingY < 0) ? ceilingY + planeSize : ceilingY - planeSize; + + int floorXRow = floorX * planeSize; + int ceilingXRow = ceilingX * planeSize; + + Vector4 decayed = oldDensity * decayRates; + Vector4 decayedLeft = decayed * weightLeft; + Vector4 decayedRight = decayed * weightRight; + + densitySpan[floorXRow + floorY] += decayedLeft * weightTop; + densitySpan[floorXRow + ceilingY] += decayedLeft * weightBottom; + densitySpan[ceilingXRow + floorY] += decayedRight * weightTop; + densitySpan[ceilingXRow + ceilingY] += decayedRight * weightBottom; } } } - private void PartialUpdateTextureImage(int x0, int y0, int width, int height, Span bufferSpan) + private void PartialUpdateTextureImage(int slice, int nOfSlices) { - int imgWidth = image!.GetWidth(); + float invMax = 255.0f / Constants.CLOUD_MAX_INTENSITY_SHOWN; + int rowsPerSlice = PlaneSize / nOfSlices; + int startRow = slice * rowsPerSlice; + int endRow = (slice == nOfSlices - 1) ? PlaneSize : (slice + 1) * rowsPerSlice; + int rowCount = endRow - startRow; + + var densitySpan = Density.AsSpan(startRow * PlaneSize, rowCount * PlaneSize); + var denFloats = MemoryMarshal.Cast(densitySpan); + + var bufferSpan = tempBuffer.AsSpan(startRow * PlaneSize * 4, rowCount * PlaneSize * 4); - for (int x = x0; x < x0 + width; ++x) + var vInvMax = Vector128.Create(invMax); + var vZero = Vector128.Zero; + var v255 = Vector128.Create(255.0f); + + int i = 0; + int floatLength = denFloats.Length; + + for (; i <= floatLength - 4; i += 4) { - for (int y = y0; y < y0 + height; ++y) - { - var pixel = Density[x, y] * 1.0f / Constants.CLOUD_MAX_INTENSITY_SHOWN; - int offset = (y * imgWidth + x) * 4; + Vector128 v = Vector128.LoadUnsafe(ref MemoryMarshal.GetReference(denFloats), (uint)i); - bufferSpan[offset] = (byte)(Math.Clamp(pixel.X, 0, 1) * 255); - bufferSpan[offset + 1] = (byte)(Math.Clamp(pixel.Y, 0, 1) * 255); - bufferSpan[offset + 2] = (byte)(Math.Clamp(pixel.Z, 0, 1) * 255); - bufferSpan[offset + 3] = (byte)(Math.Clamp(pixel.W, 0, 1) * 255); - } + v = Vector128.Multiply(v, vInvMax); + v = Vector128.Max(v, vZero); + v = Vector128.Min(v, v255); + + Vector128 vInt = Vector128.ConvertToInt32(v); + Vector128 packed16 = Sse2.PackSignedSaturate(vInt, vInt); + Vector128 packed8 = Sse2.PackUnsignedSaturate(packed16.AsInt16(), packed16.AsInt16()).AsByte(); + + MemoryMarshal.Write(bufferSpan.Slice(i, 4), packed8.AsUInt32().ToScalar()); + } + + for (; i < floatLength; i++) + { + bufferSpan[i] = (byte)Math.Clamp(denFloats[i] * invMax, 0, 255); } } private void PartialClearDensity(int x0, int y0, int width, int height) { - for (int x = x0; x < x0 + width; ++x) + for (int y = x0; y < x0 + width; ++y) { - for (int y = y0; y < y0 + height; ++y) + for (int x = y0; x < y0 + height; ++x) { - Density[x, y] = Vector4.Zero; + Density[x + y * PlaneSize] = Vector4.Zero; } } } - private void PartialDiffuseCenter(int x0, int y0, int size, float delta) - { - PartialDiffuse(x0 + Constants.CLOUD_PLANE_EDGE_WIDTH / 2, y0 + Constants.CLOUD_PLANE_EDGE_WIDTH / 2, size - - Constants.CLOUD_PLANE_EDGE_WIDTH, size - Constants.CLOUD_PLANE_EDGE_WIDTH, delta); - } - private float HackyAddress(ref Vector4 vector, int index) { switch (index) @@ -1188,7 +1266,7 @@ private int GetCompoundIndex(Compound compound) private void CreateDensityTexture() { int requestedSize = PlaneSize * PlaneSize * 4; - if (tempBuffer is null || requestedSize > tempBuffer.Length) + if (tempBuffer == null! || requestedSize > tempBuffer.Length) { tempBuffer = new byte[requestedSize]; } diff --git a/src/microbe_stage/CompoundCloudSystem.cs b/src/microbe_stage/CompoundCloudSystem.cs index 440c6e60814..80a66eaadec 100644 --- a/src/microbe_stage/CompoundCloudSystem.cs +++ b/src/microbe_stage/CompoundCloudSystem.cs @@ -1,5 +1,4 @@ using System; -using System.Buffers; using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; using System.Threading.Tasks; From 59e285074e7b6c1a61a4005a1c44a789919f9c2d Mon Sep 17 00:00:00 2001 From: Francesco Date: Fri, 1 May 2026 16:02:12 +0200 Subject: [PATCH 06/32] Polishing --- src/microbe_stage/CompoundCloudPlane.cs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index 23cfd4a8ddb..612d50fc9a5 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -388,7 +388,7 @@ public void DiffuseEdges(float deltaTime) int squaresPerSide = Constants.CLOUD_PLANE_SQUARES_PER_SIDE; int planeChunkSize = planeSize / squaresPerSide; - for (int column = 0; column <= squaresPerSide; column++) + for (int column = 0; column <= squaresPerSide; ++column) { int boundaryCenter = column * planeChunkSize; int horizontalStart = Math.Max(0, boundaryCenter - halfEdgeWidth); @@ -397,12 +397,12 @@ public void DiffuseEdges(float deltaTime) AreaDiffuse(horizontalStart, horizontalEnd, 0, planeSize, deltaTime); } - for (int square = 0; square < squaresPerSide; square++) + for (int square = 0; square < squaresPerSide; ++square) { int horizontalStart = square * planeChunkSize + halfEdgeWidth; int horizontalEnd = (square + 1) * planeChunkSize - halfEdgeWidth; - for (int row = 0; row <= squaresPerSide; row++) + for (int row = 0; row <= squaresPerSide; ++row) { int boundaryCenter = row * planeChunkSize; int verticalStart = Math.Max(0, boundaryCenter - halfEdgeWidth); @@ -421,7 +421,7 @@ public void QueueDiffuseCloud(float deltaTime, List queue) deltaTime *= 100.0f; int slices = Constants.CLOUD_PLANE_SQUARES_PER_SIDE * Constants.CLOUD_PLANE_SQUARES_PER_SIDE; - for (int slice = 0; slice < slices; slice++) + for (int slice = 0; slice < slices; ++slice) { int atSlice = slice; queue.Add(new Task(() => PartialDiffuse(atSlice, slices, deltaTime))); @@ -945,7 +945,7 @@ private void PartialDiffuse(int slice, int slices, float delta) var sourceDensity = Density.AsSpan(); var destinationDensity = OldDensity.AsSpan(); - for (int horizontalIndex = horizontalStart; horizontalIndex < horizontalEnd; horizontalIndex++) + for (int horizontalIndex = horizontalStart; horizontalIndex < horizontalEnd; ++horizontalIndex) { int currentRowOffset = horizontalIndex * planeSize; int previousRowOffset = (horizontalIndex == 0 ? planeSize - 1 : horizontalIndex - 1) * planeSize; @@ -958,7 +958,7 @@ private void PartialDiffuse(int slice, int slices, float delta) int verticalIndex = 1; int safeLimit = planeSize - 1; - for (; verticalIndex < safeLimit; verticalIndex++) + for (; verticalIndex < safeLimit; ++verticalIndex) { int currentIndex = currentRowOffset + verticalIndex; destinationDensity[currentIndex] = sourceDensity[currentIndex] * centerWeight + @@ -989,7 +989,7 @@ private void AreaDiffuse(int horizontalStart, int horizontalEnd, int verticalSta var sourceDensity = Density.AsSpan(); var destinationDensity = OldDensity.AsSpan(); - for (int horizontalIndex = horizontalStart; horizontalIndex < horizontalEnd; horizontalIndex++) + for (int horizontalIndex = horizontalStart; horizontalIndex < horizontalEnd; ++horizontalIndex) { int currentRowOffset = horizontalIndex * planeSize; int previousRowOffset = (horizontalIndex == 0 ? planeSize - 1 : horizontalIndex - 1) * planeSize; @@ -1003,11 +1003,11 @@ private void AreaDiffuse(int horizontalStart, int horizontalEnd, int verticalSta destinationDensity[currentIndex] = sourceDensity[currentIndex] * centerWeight + (sourceDensity[currentRowOffset + (planeSize - 1)] + sourceDensity[currentRowOffset + 1] + sourceDensity[previousRowOffset] + sourceDensity[nextRowOffset]) * neighborWeight; - verticalIndex++; + ++verticalIndex; } int safeLimit = Math.Min(verticalEnd, planeSize - 1); - for (; verticalIndex < safeLimit; verticalIndex++) + for (; verticalIndex < safeLimit; ++verticalIndex) { int currentIndex = currentRowOffset + verticalIndex; destinationDensity[currentIndex] = sourceDensity[currentIndex] * centerWeight + @@ -1131,12 +1131,12 @@ private void PartialAdvect(int slice, int slices, float delta) var oldDensitySpan = OldDensity.AsSpan(); var densitySpan = Density.AsSpan(); - for (int x = horizontalStart; x < horizontalEnd; x++) + for (int x = horizontalStart; x < horizontalEnd; ++x) { int horizontalOffset = x * planeSize; float worldX = worldPositionBase.X + (x - horizontalStart) * resolution; - for (int y = 0; y < planeSize; y++) + for (int y = 0; y < planeSize; ++y) { Vector4 oldDensity = oldDensitySpan[horizontalOffset + y]; if (oldDensity.X + oldDensity.Y + oldDensity.Z + oldDensity.W < 1.0f) @@ -1218,7 +1218,7 @@ private void PartialUpdateTextureImage(int slice, int nOfSlices) MemoryMarshal.Write(bufferSpan.Slice(i, 4), packed8.AsUInt32().ToScalar()); } - for (; i < floatLength; i++) + for (; i < floatLength; ++i) { bufferSpan[i] = (byte)Math.Clamp(denFloats[i] * invMax, 0, 255); } From dcf1fedfc6b5090b801fbe214ee8f1c685522465 Mon Sep 17 00:00:00 2001 From: Francesco Date: Fri, 1 May 2026 17:13:44 +0200 Subject: [PATCH 07/32] Abolished copy step by moving copy into advection loop --- src/microbe_stage/CompoundCloudPlane.cs | 114 +++++++---------------- src/microbe_stage/CompoundCloudSystem.cs | 9 -- 2 files changed, 35 insertions(+), 88 deletions(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index 612d50fc9a5..55023e982ae 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -447,20 +447,6 @@ public void QueueAdvectCloud(float delta, List queue) } } - /// - /// Updates the cloud in parallel. - /// - public void QueueUpdateTextureImage(List queue) - { - int slices = Constants.CLOUD_PLANE_SQUARES_PER_SIDE * Constants.CLOUD_PLANE_SQUARES_PER_SIDE; - - for (int slice = 0; slice < slices; ++slice) - { - int atSlice = slice; - queue.Add(new Task(() => PartialUpdateTextureImage(atSlice, slices))); - } - } - public void UpdateTexture() { int width = image!.GetWidth(); @@ -1122,40 +1108,51 @@ private int GetEdgeShift(int coord, int playerPos) private void PartialAdvect(int slice, int slices, float delta) { int planeSize = PlaneSize; - int horizontalStart = slice * planeSize / slices; - int horizontalEnd = (slice + 1) * planeSize / slices; + int rowStart = slice * planeSize / slices; + int rowEnd = (slice + 1) * planeSize / slices; + int rowCount = rowEnd - rowStart; + + var source = OldDensity.AsSpan(rowStart * planeSize, rowCount * planeSize); + var destination = Density.AsSpan(); + var bufferSpan = tempBuffer.AsSpan(rowStart * planeSize * 4, rowCount * planeSize * 4); float resolution = CloudResolution; - Vector2 worldPositionBase = GetWorldPositionForAdvection(horizontalStart, 0); + Vector2 worldPositionBase = GetWorldPositionForAdvection(0, rowStart); - var oldDensitySpan = OldDensity.AsSpan(); - var densitySpan = Density.AsSpan(); + const float intensityScale = 255.0f / Constants.CLOUD_MAX_INTENSITY_SHOWN; - for (int x = horizontalStart; x < horizontalEnd; ++x) + int bufferIndex = 0; + for (int y = 0; y < rowCount; y++) { - int horizontalOffset = x * planeSize; - float worldX = worldPositionBase.X + (x - horizontalStart) * resolution; + int rowOffset = y * planeSize; + float worldY = worldPositionBase.Y + y * resolution; + int absoluteY = y + rowStart; - for (int y = 0; y < planeSize; ++y) + for (int x = 0; x < planeSize; x++) { - Vector4 oldDensity = oldDensitySpan[horizontalOffset + y]; - if (oldDensity.X + oldDensity.Y + oldDensity.Z + oldDensity.W < 1.0f) + Vector4 currentDensity = source[rowOffset + x]; + + bufferSpan[bufferIndex] = (byte)Math.Clamp(currentDensity.X * intensityScale, 0, 255); + bufferSpan[bufferIndex + 1] = (byte)Math.Clamp(currentDensity.Y * intensityScale, 0, 255); + bufferSpan[bufferIndex + 2] = (byte)Math.Clamp(currentDensity.Z * intensityScale, 0, 255); + bufferSpan[bufferIndex + 3] = (byte)Math.Clamp(currentDensity.W * intensityScale, 0, 255); + bufferIndex += 4; + + if (currentDensity.X + currentDensity.Y + currentDensity.Z + currentDensity.W < 1.0f) continue; - float worldY = worldPositionBase.Y + y * resolution; + float worldX = worldPositionBase.X + x * resolution; Vector2 velocity = fluidSystem!.VelocityAt(new Vector2(worldX, worldY)); if (MathF.Abs(velocity.X) + MathF.Abs(velocity.Y) < Constants.CURRENT_COMPOUND_CLOUD_ADVECT_THRESHOLD) - { velocity = Vector2.Zero; - } velocity *= VISCOSITY; - float destinationX = x + delta * velocity.X; - float destinationY = y + delta * velocity.Y; + float targetX = x + delta * velocity.X; + float targetY = absoluteY + delta * velocity.Y; - CalculateMovementFactors(destinationX, destinationY, + CalculateMovementFactors(targetX, targetY, out int floorX, out int ceilingX, out int floorY, out int ceilingY, out float weightRight, out float weightLeft, out float weightBottom, out float weightTop); @@ -1168,62 +1165,21 @@ private void PartialAdvect(int slice, int slices, float delta) if ((uint)ceilingY >= (uint)planeSize) ceilingY = (ceilingY < 0) ? ceilingY + planeSize : ceilingY - planeSize; - int floorXRow = floorX * planeSize; - int ceilingXRow = ceilingX * planeSize; + int floorYOffset = floorY * planeSize; + int ceilingYOffset = ceilingY * planeSize; - Vector4 decayed = oldDensity * decayRates; + Vector4 decayed = currentDensity * decayRates; Vector4 decayedLeft = decayed * weightLeft; Vector4 decayedRight = decayed * weightRight; - densitySpan[floorXRow + floorY] += decayedLeft * weightTop; - densitySpan[floorXRow + ceilingY] += decayedLeft * weightBottom; - densitySpan[ceilingXRow + floorY] += decayedRight * weightTop; - densitySpan[ceilingXRow + ceilingY] += decayedRight * weightBottom; + destination[floorX + floorYOffset] += decayedLeft * weightTop; + destination[floorX + ceilingYOffset] += decayedLeft * weightBottom; + destination[ceilingX + floorYOffset] += decayedRight * weightTop; + destination[ceilingX + ceilingYOffset] += decayedRight * weightBottom; } } } - private void PartialUpdateTextureImage(int slice, int nOfSlices) - { - float invMax = 255.0f / Constants.CLOUD_MAX_INTENSITY_SHOWN; - int rowsPerSlice = PlaneSize / nOfSlices; - int startRow = slice * rowsPerSlice; - int endRow = (slice == nOfSlices - 1) ? PlaneSize : (slice + 1) * rowsPerSlice; - int rowCount = endRow - startRow; - - var densitySpan = Density.AsSpan(startRow * PlaneSize, rowCount * PlaneSize); - var denFloats = MemoryMarshal.Cast(densitySpan); - - var bufferSpan = tempBuffer.AsSpan(startRow * PlaneSize * 4, rowCount * PlaneSize * 4); - - var vInvMax = Vector128.Create(invMax); - var vZero = Vector128.Zero; - var v255 = Vector128.Create(255.0f); - - int i = 0; - int floatLength = denFloats.Length; - - for (; i <= floatLength - 4; i += 4) - { - Vector128 v = Vector128.LoadUnsafe(ref MemoryMarshal.GetReference(denFloats), (uint)i); - - v = Vector128.Multiply(v, vInvMax); - v = Vector128.Max(v, vZero); - v = Vector128.Min(v, v255); - - Vector128 vInt = Vector128.ConvertToInt32(v); - Vector128 packed16 = Sse2.PackSignedSaturate(vInt, vInt); - Vector128 packed8 = Sse2.PackUnsignedSaturate(packed16.AsInt16(), packed16.AsInt16()).AsByte(); - - MemoryMarshal.Write(bufferSpan.Slice(i, 4), packed8.AsUInt32().ToScalar()); - } - - for (; i < floatLength; ++i) - { - bufferSpan[i] = (byte)Math.Clamp(denFloats[i] * invMax, 0, 255); - } - } - private void PartialClearDensity(int x0, int y0, int width, int height) { for (int y = x0; y < x0 + width; ++y) diff --git a/src/microbe_stage/CompoundCloudSystem.cs b/src/microbe_stage/CompoundCloudSystem.cs index 80a66eaadec..c04959e5917 100644 --- a/src/microbe_stage/CompoundCloudSystem.cs +++ b/src/microbe_stage/CompoundCloudSystem.cs @@ -488,15 +488,6 @@ private void UpdateCloudContents(float delta) cloud.QueueAdvectCloud(delta, tasks); } - executor.RunTasks(tasks); - tasks.Clear(); - - // Update the cloud textures in parallel - foreach (var cloud in clouds) - { - cloud.QueueUpdateTextureImage(tasks); - } - executor.RunTasks(tasks); foreach (var cloud in clouds) From 58515a655d3b70d44b47119c6ab83812b1fee2e0 Mon Sep 17 00:00:00 2001 From: Francesco Date: Fri, 1 May 2026 17:21:26 +0200 Subject: [PATCH 08/32] Removed now unused SIMD imports --- src/microbe_stage/CompoundCloudPlane.cs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index 55023e982ae..4ab75d382a8 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -5,9 +5,6 @@ using System; using System.Collections.Generic; using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; using System.Threading; using System.Threading.Tasks; using Godot; From ecb7018069c711355a443c8e0f4958ec6b0108bb Mon Sep 17 00:00:00 2001 From: Francesco Date: Fri, 1 May 2026 17:26:29 +0200 Subject: [PATCH 09/32] Polished --- src/microbe_stage/CompoundCloudPlane.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index 4ab75d382a8..8a7659c1417 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -1119,13 +1119,13 @@ private void PartialAdvect(int slice, int slices, float delta) const float intensityScale = 255.0f / Constants.CLOUD_MAX_INTENSITY_SHOWN; int bufferIndex = 0; - for (int y = 0; y < rowCount; y++) + for (int y = 0; y < rowCount; ++y) { int rowOffset = y * planeSize; float worldY = worldPositionBase.Y + y * resolution; int absoluteY = y + rowStart; - for (int x = 0; x < planeSize; x++) + for (int x = 0; x < planeSize; ++x) { Vector4 currentDensity = source[rowOffset + x]; From 199621f1500931a588c23a3658e9a557fdf55179 Mon Sep 17 00:00:00 2001 From: Francesco Date: Sat, 2 May 2026 11:50:27 +0200 Subject: [PATCH 10/32] Implemented SIMD in the diffusion algorithm. --- src/microbe_stage/CompoundCloudPlane.cs | 46 +++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index 8a7659c1417..2be8aaf0e56 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -5,6 +5,9 @@ using System; using System.Collections.Generic; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; using System.Threading; using System.Threading.Tasks; using Godot; @@ -925,8 +928,16 @@ private void PartialDiffuse(int slice, int slices, float delta) float neighborWeight = diffusionAmount * 0.25f; float centerWeight = 1.0f - diffusionAmount; - var sourceDensity = Density.AsSpan(); - var destinationDensity = OldDensity.AsSpan(); + ReadOnlySpan sourceDensity = Density.AsSpan(); + Span destinationDensity = OldDensity.AsSpan(); + ReadOnlySpan sourceFloats = MemoryMarshal.Cast(sourceDensity); + Span destinationFloats = MemoryMarshal.Cast(destinationDensity); + + var centerWeightVector = Vector256.Create(centerWeight); + var neighborWeightVector = Vector256.Create(neighborWeight); + + ref float sourceReference = ref MemoryMarshal.GetReference(sourceFloats); + ref float destinationReference = ref MemoryMarshal.GetReference(destinationFloats); for (int horizontalIndex = horizontalStart; horizontalIndex < horizontalEnd; ++horizontalIndex) { @@ -941,6 +952,37 @@ private void PartialDiffuse(int slice, int slices, float delta) int verticalIndex = 1; int safeLimit = planeSize - 1; + + if (Avx2.IsSupported) + { + // Use Avx2 SIMD to vectorise diffusion. + + for (; verticalIndex <= safeLimit - 2; verticalIndex += 2) + { + uint offset = (uint)(currentRowOffset + verticalIndex) << 2; + + var center = Vector256.LoadUnsafe(ref sourceReference, offset); + + var up = Vector256.LoadUnsafe(ref sourceReference, offset - 4); + var down = Vector256.LoadUnsafe(ref sourceReference, offset + 4); + var left = Vector256.LoadUnsafe(ref sourceReference, (uint)(previousRowOffset + + verticalIndex) << 2); + var right = Vector256.LoadUnsafe(ref sourceReference, (uint)(nextRowOffset + verticalIndex) << 2); + + var neighbors = Avx.Add(Avx.Add(up, down), Avx.Add(left, right)); + + var result = Avx.Add( + Avx.Multiply(center, centerWeightVector), + Avx.Multiply(neighbors, neighborWeightVector)); + + result.StoreUnsafe(ref destinationReference, offset); + } + } + + // If Avx2 is unsupported, the following loops will take care of the scalar operations. That must be + // executed after the SIMD operations if Avx2 is supported anyway, as we need to take care of a possible + // "tail" if the PlaneSize is not aligned to the previous SIMD algorithm. + for (; verticalIndex < safeLimit; ++verticalIndex) { int currentIndex = currentRowOffset + verticalIndex; From f41f28319136611058177ce4f326a893c6376af8 Mon Sep 17 00:00:00 2001 From: Francesco Date: Sat, 2 May 2026 12:06:35 +0200 Subject: [PATCH 11/32] Added a few comments --- src/microbe_stage/CompoundCloudPlane.cs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index 2be8aaf0e56..681ce8b91e7 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -957,6 +957,8 @@ private void PartialDiffuse(int slice, int slices, float delta) { // Use Avx2 SIMD to vectorise diffusion. + // Most of the operations are now vectorised, except for a possible final tail that doesn't fit in a + // Vector256. This is taken care of after the current conditional branch. for (; verticalIndex <= safeLimit - 2; verticalIndex += 2) { uint offset = (uint)(currentRowOffset + verticalIndex) << 2; @@ -983,6 +985,8 @@ private void PartialDiffuse(int slice, int slices, float delta) // executed after the SIMD operations if Avx2 is supported anyway, as we need to take care of a possible // "tail" if the PlaneSize is not aligned to the previous SIMD algorithm. + // This is the scalar algorithm. It executes if Avx2 is not supported and on the tail we discussed in the + // previous comments. for (; verticalIndex < safeLimit; ++verticalIndex) { int currentIndex = currentRowOffset + verticalIndex; From 8464c7101933744d16e0f46cad319a55dcb69e6a Mon Sep 17 00:00:00 2001 From: Francesco Date: Sat, 2 May 2026 12:35:40 +0200 Subject: [PATCH 12/32] Moved avx supported variable out of the loop --- src/microbe_stage/CompoundCloudPlane.cs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index 681ce8b91e7..40b7263b89e 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -939,6 +939,8 @@ private void PartialDiffuse(int slice, int slices, float delta) ref float sourceReference = ref MemoryMarshal.GetReference(sourceFloats); ref float destinationReference = ref MemoryMarshal.GetReference(destinationFloats); + bool avx2Supported = Avx2.IsSupported; + for (int horizontalIndex = horizontalStart; horizontalIndex < horizontalEnd; ++horizontalIndex) { int currentRowOffset = horizontalIndex * planeSize; @@ -953,7 +955,7 @@ private void PartialDiffuse(int slice, int slices, float delta) int verticalIndex = 1; int safeLimit = planeSize - 1; - if (Avx2.IsSupported) + if (avx2Supported) { // Use Avx2 SIMD to vectorise diffusion. From d3a379829632a396858926d402a54dba5749a02c Mon Sep 17 00:00:00 2001 From: Francesco Date: Sat, 2 May 2026 13:15:15 +0200 Subject: [PATCH 13/32] Unrolled advection loop to use SIMD on sparse areas. --- src/microbe_stage/CompoundCloudPlane.cs | 136 +++++++++++++++--------- 1 file changed, 85 insertions(+), 51 deletions(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index 40b7263b89e..ebf3d7fc655 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -1157,70 +1157,104 @@ private void PartialAdvect(int slice, int slices, float delta) int rowEnd = (slice + 1) * planeSize / slices; int rowCount = rowEnd - rowStart; - var source = OldDensity.AsSpan(rowStart * planeSize, rowCount * planeSize); - var destination = Density.AsSpan(); - var bufferSpan = tempBuffer.AsSpan(rowStart * planeSize * 4, rowCount * planeSize * 4); + ReadOnlySpan source = OldDensity.AsSpan(rowStart * planeSize, rowCount * planeSize); + Span destination = Density.AsSpan(); + Span targetBuffer = tempBuffer.AsSpan(rowStart * planeSize * 4, rowCount * planeSize * 4); float resolution = CloudResolution; + float intensityScale = 255.0f / Constants.CLOUD_MAX_INTENSITY_SHOWN; Vector2 worldPositionBase = GetWorldPositionForAdvection(0, rowStart); - const float intensityScale = 255.0f / Constants.CLOUD_MAX_INTENSITY_SHOWN; + var vScale = Vector128.Create(intensityScale); + var vZero = Vector128.Zero; + var v255 = Vector128.Create(255.0f); - int bufferIndex = 0; - for (int y = 0; y < rowCount; ++y) + int sourceIdx = 0; + int bufferIdx = 0; + + for (int y = 0; y < rowCount; y++) { - int rowOffset = y * planeSize; - float worldY = worldPositionBase.Y + y * resolution; int absoluteY = y + rowStart; + float worldY = worldPositionBase.Y + y * resolution; - for (int x = 0; x < planeSize; ++x) - { - Vector4 currentDensity = source[rowOffset + x]; + int x = 0; - bufferSpan[bufferIndex] = (byte)Math.Clamp(currentDensity.X * intensityScale, 0, 255); - bufferSpan[bufferIndex + 1] = (byte)Math.Clamp(currentDensity.Y * intensityScale, 0, 255); - bufferSpan[bufferIndex + 2] = (byte)Math.Clamp(currentDensity.Z * intensityScale, 0, 255); - bufferSpan[bufferIndex + 3] = (byte)Math.Clamp(currentDensity.W * intensityScale, 0, 255); - bufferIndex += 4; + for (; x <= planeSize - 4; x += 4) + { + var p0 = source[sourceIdx].AsVector128(); + var p1 = source[sourceIdx + 1].AsVector128(); + var p2 = source[sourceIdx + 2].AsVector128(); + var p3 = source[sourceIdx + 3].AsVector128(); + + var sums = Vector128.Create( + p0[0] + p0[1] + p0[2] + p0[3], + p1[0] + p1[1] + p1[2] + p1[3], + p2[0] + p2[1] + p2[2] + p2[3], + p3[0] + p3[1] + p3[2] + p3[3]); + + if (Vector128.LessThanAll(sums, Vector128.Create(1.0f))) + { + Vector128.Zero.StoreUnsafe(ref targetBuffer[bufferIdx]); - if (currentDensity.X + currentDensity.Y + currentDensity.Z + currentDensity.W < 1.0f) + sourceIdx += 4; + bufferIdx += 16; continue; + } - float worldX = worldPositionBase.X + x * resolution; - Vector2 velocity = fluidSystem!.VelocityAt(new Vector2(worldX, worldY)); - - if (MathF.Abs(velocity.X) + MathF.Abs(velocity.Y) < Constants.CURRENT_COMPOUND_CLOUD_ADVECT_THRESHOLD) - velocity = Vector2.Zero; - - velocity *= VISCOSITY; - - float targetX = x + delta * velocity.X; - float targetY = absoluteY + delta * velocity.Y; - - CalculateMovementFactors(targetX, targetY, - out int floorX, out int ceilingX, out int floorY, out int ceilingY, - out float weightRight, out float weightLeft, out float weightBottom, out float weightTop); - - if ((uint)floorX >= (uint)planeSize) - floorX = (floorX < 0) ? floorX + planeSize : floorX - planeSize; - if ((uint)ceilingX >= (uint)planeSize) - ceilingX = (ceilingX < 0) ? ceilingX + planeSize : ceilingX - planeSize; - if ((uint)floorY >= (uint)planeSize) - floorY = (floorY < 0) ? floorY + planeSize : floorY - planeSize; - if ((uint)ceilingY >= (uint)planeSize) - ceilingY = (ceilingY < 0) ? ceilingY + planeSize : ceilingY - planeSize; - - int floorYOffset = floorY * planeSize; - int ceilingYOffset = ceilingY * planeSize; - - Vector4 decayed = currentDensity * decayRates; - Vector4 decayedLeft = decayed * weightLeft; - Vector4 decayedRight = decayed * weightRight; + for (int i = 0; i < 4; i++) + { + Vector4 currentDensity = source[sourceIdx++]; + + var vPixel = currentDensity.AsVector128(); + vPixel = Vector128.Multiply(vPixel, vScale); + vPixel = Vector128.Min(Vector128.Max(vPixel, vZero), v255); + var vInt = Vector128.ConvertToInt32(vPixel); + var packed16 = Sse2.PackSignedSaturate(vInt, vInt); + var packed8 = Sse2.PackUnsignedSaturate(packed16, packed16).AsByte(); + + MemoryMarshal.Write(targetBuffer.Slice(bufferIdx, 4), + packed8.AsUInt32().ToScalar()); + + bufferIdx += 4; + + if (currentDensity.X + currentDensity.Y + currentDensity.Z + currentDensity.W < 1.0f) + continue; + + float worldX = worldPositionBase.X + (x + i) * resolution; + Vector2 velocity = fluidSystem!.VelocityAt(new Vector2(worldX, worldY)); + if (MathF.Abs(velocity.X) + MathF.Abs(velocity.Y) < Constants.CURRENT_COMPOUND_CLOUD_ADVECT_THRESHOLD) + velocity = Vector2.Zero; + + velocity *= VISCOSITY; + float targetX = (x + i) + delta * velocity.X; + float targetY = absoluteY + delta * velocity.Y; + + CalculateMovementFactors(targetX, targetY, + out int fX, out int cX, out int fY, out int cY, + out float wR, out float wL, out float wB, out float wT); + + if ((uint)fX >= (uint)planeSize) + fX = (fX < 0) ? fX + planeSize : fX - planeSize; + if ((uint)cX >= (uint)planeSize) + cX = (cX < 0) ? cX + planeSize : cX - planeSize; + if ((uint)fY >= (uint)planeSize) + fY = (fY < 0) ? fY + planeSize : fY - planeSize; + if ((uint)cY >= (uint)planeSize) + cY = (cY < 0) ? cY + planeSize : cY - planeSize; + + int fYRow = fY * planeSize; + int cYRow = cY * planeSize; + Vector4 decayed = currentDensity * decayRates; + + destination[fX + fYRow] += decayed * (wL * wT); + destination[fX + cYRow] += decayed * (wL * wB); + destination[cX + fYRow] += decayed * (wR * wT); + destination[cX + cYRow] += decayed * (wR * wB); + } + } - destination[floorX + floorYOffset] += decayedLeft * weightTop; - destination[floorX + ceilingYOffset] += decayedLeft * weightBottom; - destination[ceilingX + floorYOffset] += decayedRight * weightTop; - destination[ceilingX + ceilingYOffset] += decayedRight * weightBottom; + for (; x < planeSize; x++) + { } } } From 282dfa4c21a9d6670e503bf1314a43ea4f2737a8 Mon Sep 17 00:00:00 2001 From: Francesco Date: Sat, 2 May 2026 13:23:08 +0200 Subject: [PATCH 14/32] Polishing --- src/microbe_stage/CompoundCloudPlane.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index ebf3d7fc655..832d62dde92 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -1172,7 +1172,7 @@ private void PartialAdvect(int slice, int slices, float delta) int sourceIdx = 0; int bufferIdx = 0; - for (int y = 0; y < rowCount; y++) + for (int y = 0; y < rowCount; ++y) { int absoluteY = y + rowStart; float worldY = worldPositionBase.Y + y * resolution; @@ -1201,7 +1201,7 @@ private void PartialAdvect(int slice, int slices, float delta) continue; } - for (int i = 0; i < 4; i++) + for (int i = 0; i < 4; ++i) { Vector4 currentDensity = source[sourceIdx++]; @@ -1253,7 +1253,7 @@ private void PartialAdvect(int slice, int slices, float delta) } } - for (; x < planeSize; x++) + for (; x < planeSize; ++x) { } } From 6baa6a878c9b2d17f7d18db088c98aba62c42b47 Mon Sep 17 00:00:00 2001 From: Francesco Date: Sat, 2 May 2026 13:24:25 +0200 Subject: [PATCH 15/32] Polishing --- src/microbe_stage/CompoundCloudPlane.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index 832d62dde92..c1b2b5984c3 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -1222,7 +1222,8 @@ private void PartialAdvect(int slice, int slices, float delta) float worldX = worldPositionBase.X + (x + i) * resolution; Vector2 velocity = fluidSystem!.VelocityAt(new Vector2(worldX, worldY)); - if (MathF.Abs(velocity.X) + MathF.Abs(velocity.Y) < Constants.CURRENT_COMPOUND_CLOUD_ADVECT_THRESHOLD) + if (MathF.Abs(velocity.X) + MathF.Abs(velocity.Y) < + Constants.CURRENT_COMPOUND_CLOUD_ADVECT_THRESHOLD) velocity = Vector2.Zero; velocity *= VISCOSITY; From 3c86d9e07cf4375f4186e8e5ae6030fa9a07aeda Mon Sep 17 00:00:00 2001 From: Francesco Date: Sat, 2 May 2026 13:40:51 +0200 Subject: [PATCH 16/32] Added tail scalar algorithm to the advection --- src/microbe_stage/CompoundCloudPlane.cs | 88 +++++++++++++++---------- 1 file changed, 54 insertions(+), 34 deletions(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index c1b2b5984c3..87001ee5d8c 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -1159,7 +1159,7 @@ private void PartialAdvect(int slice, int slices, float delta) ReadOnlySpan source = OldDensity.AsSpan(rowStart * planeSize, rowCount * planeSize); Span destination = Density.AsSpan(); - Span targetBuffer = tempBuffer.AsSpan(rowStart * planeSize * 4, rowCount * planeSize * 4); + Span bufferSpan = tempBuffer.AsSpan(rowStart * planeSize * 4, rowCount * planeSize * 4); float resolution = CloudResolution; float intensityScale = 255.0f / Constants.CLOUD_MAX_INTENSITY_SHOWN; @@ -1194,7 +1194,7 @@ private void PartialAdvect(int slice, int slices, float delta) if (Vector128.LessThanAll(sums, Vector128.Create(1.0f))) { - Vector128.Zero.StoreUnsafe(ref targetBuffer[bufferIdx]); + Vector128.Zero.StoreUnsafe(ref bufferSpan[bufferIdx]); sourceIdx += 4; bufferIdx += 16; @@ -1212,7 +1212,7 @@ private void PartialAdvect(int slice, int slices, float delta) var packed16 = Sse2.PackSignedSaturate(vInt, vInt); var packed8 = Sse2.PackUnsignedSaturate(packed16, packed16).AsByte(); - MemoryMarshal.Write(targetBuffer.Slice(bufferIdx, 4), + MemoryMarshal.Write(bufferSpan.Slice(bufferIdx, 4), packed8.AsUInt32().ToScalar()); bufferIdx += 4; @@ -1220,46 +1220,66 @@ private void PartialAdvect(int slice, int slices, float delta) if (currentDensity.X + currentDensity.Y + currentDensity.Z + currentDensity.W < 1.0f) continue; - float worldX = worldPositionBase.X + (x + i) * resolution; - Vector2 velocity = fluidSystem!.VelocityAt(new Vector2(worldX, worldY)); - if (MathF.Abs(velocity.X) + MathF.Abs(velocity.Y) < - Constants.CURRENT_COMPOUND_CLOUD_ADVECT_THRESHOLD) - velocity = Vector2.Zero; - - velocity *= VISCOSITY; - float targetX = (x + i) + delta * velocity.X; - float targetY = absoluteY + delta * velocity.Y; - - CalculateMovementFactors(targetX, targetY, - out int fX, out int cX, out int fY, out int cY, - out float wR, out float wL, out float wB, out float wT); - - if ((uint)fX >= (uint)planeSize) - fX = (fX < 0) ? fX + planeSize : fX - planeSize; - if ((uint)cX >= (uint)planeSize) - cX = (cX < 0) ? cX + planeSize : cX - planeSize; - if ((uint)fY >= (uint)planeSize) - fY = (fY < 0) ? fY + planeSize : fY - planeSize; - if ((uint)cY >= (uint)planeSize) - cY = (cY < 0) ? cY + planeSize : cY - planeSize; - - int fYRow = fY * planeSize; - int cYRow = cY * planeSize; - Vector4 decayed = currentDensity * decayRates; - - destination[fX + fYRow] += decayed * (wL * wT); - destination[fX + cYRow] += decayed * (wL * wB); - destination[cX + fYRow] += decayed * (wR * wT); - destination[cX + cYRow] += decayed * (wR * wB); + ProcessPixelAdvection(currentDensity, x + i, absoluteY, worldY, worldXOffset: (x + i) * resolution, + delta, destination, planeSize, worldPositionBase); } } for (; x < planeSize; ++x) { + Vector4 currentDensity = source[sourceIdx++]; + + bufferSpan[bufferIdx] = (byte)Math.Clamp(currentDensity.X * intensityScale, 0, 255); + bufferSpan[bufferIdx + 1] = (byte)Math.Clamp(currentDensity.Y * intensityScale, 0, 255); + bufferSpan[bufferIdx + 2] = (byte)Math.Clamp(currentDensity.Z * intensityScale, 0, 255); + bufferSpan[bufferIdx + 3] = (byte)Math.Clamp(currentDensity.W * intensityScale, 0, 255); + bufferIdx += 4; + + if (currentDensity.X + currentDensity.Y + currentDensity.Z + currentDensity.W < 1.0f) + continue; + + ProcessPixelAdvection(currentDensity, x, absoluteY, worldY, worldXOffset: x * resolution, delta, + destination, planeSize, worldPositionBase); } } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void ProcessPixelAdvection(Vector4 currentDensity, int x, int absoluteY, float worldY, float worldXOffset, + float delta, Span destination, int planeSize, Vector2 worldPositionBase) + { + float worldX = worldPositionBase.X + worldXOffset; + Vector2 velocity = fluidSystem!.VelocityAt(new Vector2(worldX, worldY)); + + if (MathF.Abs(velocity.X) + MathF.Abs(velocity.Y) < Constants.CURRENT_COMPOUND_CLOUD_ADVECT_THRESHOLD) + velocity = Vector2.Zero; + + velocity *= VISCOSITY; + float targetX = x + delta * velocity.X; + float targetY = absoluteY + delta * velocity.Y; + + CalculateMovementFactors(targetX, targetY, out int fX, out int cX, out int fY, out int cY, out float wR, + out float wL, out float wB, out float wT); + + if ((uint)fX >= (uint)planeSize) + fX = (fX < 0) ? fX + planeSize : fX - planeSize; + if ((uint)cX >= (uint)planeSize) + cX = (cX < 0) ? cX + planeSize : cX - planeSize; + if ((uint)fY >= (uint)planeSize) + fY = (fY < 0) ? fY + planeSize : fY - planeSize; + if ((uint)cY >= (uint)planeSize) + cY = (cY < 0) ? cY + planeSize : cY - planeSize; + + int fYRow = fY * planeSize; + int cYRow = cY * planeSize; + Vector4 decayed = currentDensity * decayRates; + + destination[fX + fYRow] += decayed * (wL * wT); + destination[fX + cYRow] += decayed * (wL * wB); + destination[cX + fYRow] += decayed * (wR * wT); + destination[cX + cYRow] += decayed * (wR * wB); + } + private void PartialClearDensity(int x0, int y0, int width, int height) { for (int y = x0; y < x0 + width; ++y) From 134dc5a0ddf0fdb339c9df642bd968e8cfa668f2 Mon Sep 17 00:00:00 2001 From: Francesco Date: Sun, 3 May 2026 11:47:20 +0200 Subject: [PATCH 17/32] Solved saving issue --- src/microbe_stage/CompoundCloudPlane.cs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index 87001ee5d8c..346d6802fc5 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -191,9 +191,8 @@ public void WriteToArchive(ISArchiveWriter writer) var localDensity = Density; - // If rank changes square root is not suitable - if (localDensity.Rank != 2) - throw new Exception("Cloud plane densities array rank is not 2"); + if (Math.Abs(Math.Sqrt(localDensity.Length) - (int)Math.Sqrt(localDensity.Length)) > 0.001f) + throw new Exception("Cloud plane densities size is not a perfect square"); int dimensions = (int)Math.Sqrt(localDensity.Length); From d67dc1a5925c7394b953a4f212cb530f96d052ad Mon Sep 17 00:00:00 2001 From: Francesco Date: Sun, 3 May 2026 16:23:24 +0200 Subject: [PATCH 18/32] Corrected wrong for loops in PartialClearDensity --- src/microbe_stage/CompoundCloudPlane.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index 346d6802fc5..a17406caf8f 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -1281,9 +1281,9 @@ private void ProcessPixelAdvection(Vector4 currentDensity, int x, int absoluteY, private void PartialClearDensity(int x0, int y0, int width, int height) { - for (int y = x0; y < x0 + width; ++y) + for (int x = x0; x < x0 + width; ++x) { - for (int x = y0; x < y0 + height; ++x) + for (int y = y0; y < y0 + height; ++y) { Density[x + y * PlaneSize] = Vector4.Zero; } From 3f1fe64251ee55eb637e209ef39249e5b030df85 Mon Sep 17 00:00:00 2001 From: Francesco Date: Mon, 4 May 2026 10:41:00 +0200 Subject: [PATCH 19/32] Cleanup based on review comments --- src/microbe_stage/CompoundCloudPlane.cs | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index a17406caf8f..b06f962912f 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -90,11 +90,6 @@ public partial class CompoundCloudPlane : MeshInstance3D, ISaveLoadedTracked, IA public bool IsLoadedFromSave { get; set; } - /// - /// This is used in data copy. - /// - public byte[] TempBuffer => tempBuffer; - public ushort CurrentArchiveVersion => SERIALIZATION_VERSION; public ArchiveObjectType ArchiveObjectType => (ArchiveObjectType)ThriveArchiveObjectType.CompoundCloudPlane; public bool CanBeReferencedInArchive => false; @@ -430,9 +425,9 @@ public void QueueDiffuseCloud(float deltaTime, List queue) /// /// Updates the cloud in parallel. /// - public void QueueAdvectCloud(float delta, List queue) + public void QueueAdvectCloud(float deltaTime, List queue) { - delta *= 100.0f; + deltaTime *= 100.0f; int slices = Constants.CLOUD_PLANE_SQUARES_PER_SIDE * Constants.CLOUD_PLANE_SQUARES_PER_SIDE; @@ -441,7 +436,7 @@ public void QueueAdvectCloud(float delta, List queue) int atSlice = slice; // TODO: fix task allocations - var task = new Task(() => PartialAdvect(atSlice, slices, delta)); + var task = new Task(() => PartialAdvect(atSlice, slices, deltaTime)); queue.Add(task); } } @@ -1219,7 +1214,7 @@ private void PartialAdvect(int slice, int slices, float delta) if (currentDensity.X + currentDensity.Y + currentDensity.Z + currentDensity.W < 1.0f) continue; - ProcessPixelAdvection(currentDensity, x + i, absoluteY, worldY, worldXOffset: (x + i) * resolution, + ProcessPixelAdvection(currentDensity, x + i, absoluteY, worldY, (x + i) * resolution, delta, destination, planeSize, worldPositionBase); } } From 21b4c9052d014b261a3320551d041124e414f563 Mon Sep 17 00:00:00 2001 From: Francesco Date: Mon, 4 May 2026 10:44:06 +0200 Subject: [PATCH 20/32] Cast safety check --- src/microbe_stage/CompoundCloudPlane.cs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index b06f962912f..68c0966ca66 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -252,6 +252,9 @@ public void WriteToArchive(ISArchiveWriter writer) // Called when the node enters the scene tree for the first time. public override void _Ready() { + if (Marshal.SizeOf() * 4 != Marshal.SizeOf()) + throw new InvalidCastException("The assumption that Vector4 is 4 floats is not valid in this context."); + if (!IsLoadedFromSave) { PlaneSize = Settings.Instance.CloudSimulationWidth; From ffc5fb2f3c5608a60012938cd9c3e1cd9d67134b Mon Sep 17 00:00:00 2001 From: Francesco Date: Mon, 4 May 2026 10:50:16 +0200 Subject: [PATCH 21/32] Modified perfect square check and added comments --- src/microbe_stage/CompoundCloudPlane.cs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index 68c0966ca66..7419c8cf8ef 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -186,7 +186,7 @@ public void WriteToArchive(ISArchiveWriter writer) var localDensity = Density; - if (Math.Abs(Math.Sqrt(localDensity.Length) - (int)Math.Sqrt(localDensity.Length)) > 0.001f) + if (Math.Abs(Math.Sqrt(localDensity.Length) - (int)Math.Round(Math.Sqrt(localDensity.Length))) > 0.001f) throw new Exception("Cloud plane densities size is not a perfect square"); int dimensions = (int)Math.Sqrt(localDensity.Length); @@ -1258,6 +1258,10 @@ private void ProcessPixelAdvection(Vector4 currentDensity, int x, int absoluteY, CalculateMovementFactors(targetX, targetY, out int fX, out int cX, out int fY, out int cY, out float wR, out float wL, out float wB, out float wT); + // Normally the checks here would be fX < 0 || fX >= planeSize + // By casting the operands to uint here, the first condition (fX < 0) is converted to fX becoming a really large + // number. This makes the comparison always true (as planeSize is usually much smaller than the max integer) + // thus saving us a few more instructions during the calculations. if ((uint)fX >= (uint)planeSize) fX = (fX < 0) ? fX + planeSize : fX - planeSize; if ((uint)cX >= (uint)planeSize) From a491661a397455784a77a902acff3db34a457da8 Mon Sep 17 00:00:00 2001 From: Francesco Date: Mon, 4 May 2026 11:11:31 +0200 Subject: [PATCH 22/32] Removed AggressiveInlining hint on ProcessPixelAdvection --- src/microbe_stage/CompoundCloudPlane.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index 7419c8cf8ef..f77614c398a 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -1241,7 +1241,6 @@ private void PartialAdvect(int slice, int slices, float delta) } } - [MethodImpl(MethodImplOptions.AggressiveInlining)] private void ProcessPixelAdvection(Vector4 currentDensity, int x, int absoluteY, float worldY, float worldXOffset, float delta, Span destination, int planeSize, Vector2 worldPositionBase) { From 665e6494003ef4f4de3e42e296751b661b7d898b Mon Sep 17 00:00:00 2001 From: Francesco Date: Mon, 4 May 2026 11:39:26 +0200 Subject: [PATCH 23/32] Linter cleanup --- src/microbe_stage/CompoundCloudPlane.cs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index f77614c398a..a3e7fe2a916 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -1183,8 +1183,7 @@ private void PartialAdvect(int slice, int slices, float delta) var p2 = source[sourceIdx + 2].AsVector128(); var p3 = source[sourceIdx + 3].AsVector128(); - var sums = Vector128.Create( - p0[0] + p0[1] + p0[2] + p0[3], + var sums = Vector128.Create(p0[0] + p0[1] + p0[2] + p0[3], p1[0] + p1[1] + p1[2] + p1[3], p2[0] + p2[1] + p2[2] + p2[3], p3[0] + p3[1] + p3[2] + p3[3]); @@ -1235,8 +1234,8 @@ private void PartialAdvect(int slice, int slices, float delta) if (currentDensity.X + currentDensity.Y + currentDensity.Z + currentDensity.W < 1.0f) continue; - ProcessPixelAdvection(currentDensity, x, absoluteY, worldY, worldXOffset: x * resolution, delta, - destination, planeSize, worldPositionBase); + ProcessPixelAdvection(currentDensity, x, absoluteY, worldY, x * resolution, delta, destination, + planeSize, worldPositionBase); } } } From 1210acb989bb3f8d446c50b3e6b54e6dbc1bfb09 Mon Sep 17 00:00:00 2001 From: Francesco Date: Mon, 4 May 2026 11:46:47 +0200 Subject: [PATCH 24/32] Arm support for SIMD in the advection loop and fallback --- src/microbe_stage/CompoundCloudPlane.cs | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index a3e7fe2a916..3da483fe42c 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -7,6 +7,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; using System.Threading; using System.Threading.Tasks; @@ -1205,8 +1206,27 @@ private void PartialAdvect(int slice, int slices, float delta) vPixel = Vector128.Multiply(vPixel, vScale); vPixel = Vector128.Min(Vector128.Max(vPixel, vZero), v255); var vInt = Vector128.ConvertToInt32(vPixel); - var packed16 = Sse2.PackSignedSaturate(vInt, vInt); - var packed8 = Sse2.PackUnsignedSaturate(packed16, packed16).AsByte(); + + // These check are JIT compile-time constants, so they are pruned during execution. + // This makes branching here de-facto non-existent. + Vector128 packed8; + if (Sse2.IsSupported) + { + var packed16 = Sse2.PackSignedSaturate(vInt, vInt); + packed8 = Sse2.PackUnsignedSaturate(packed16, packed16).AsByte(); + } + else if (AdvSimd.IsSupported) + { + var narrow16 = AdvSimd.ExtractNarrowingSaturateLower(vInt); + var narrow8 = AdvSimd.ExtractNarrowingSaturateUnsignedLower(narrow16.ToVector128()); + packed8 = narrow8.ToVector128(); + } + else + { + packed8 = Vector128.Create((byte)vInt.GetElement(0), (byte)vInt.GetElement(1), + (byte)vInt.GetElement(2), (byte)vInt.GetElement(3), + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0).AsByte(); + } MemoryMarshal.Write(bufferSpan.Slice(bufferIdx, 4), packed8.AsUInt32().ToScalar()); From 4fee99b1915f96c7fb0d08c7cb5034d4312e303b Mon Sep 17 00:00:00 2001 From: Francesco Date: Mon, 4 May 2026 12:18:04 +0200 Subject: [PATCH 25/32] Linter cleanup --- src/microbe_stage/CompoundCloudPlane.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index 3da483fe42c..684ea6b088b 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -973,8 +973,7 @@ private void PartialDiffuse(int slice, int slices, float delta) var neighbors = Avx.Add(Avx.Add(up, down), Avx.Add(left, right)); - var result = Avx.Add( - Avx.Multiply(center, centerWeightVector), + var result = Avx.Add(Avx.Multiply(center, centerWeightVector), Avx.Multiply(neighbors, neighborWeightVector)); result.StoreUnsafe(ref destinationReference, offset); From 73d902946d2af2be31900805c7388766296cd488 Mon Sep 17 00:00:00 2001 From: Francesco Date: Thu, 7 May 2026 10:39:48 +0200 Subject: [PATCH 26/32] Replaced DiffuseEdges with old scalar algorithm --- src/microbe_stage/CompoundCloudPlane.cs | 67 +++++++++++++++++-------- 1 file changed, 45 insertions(+), 22 deletions(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index 684ea6b088b..3121178ab63 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -380,34 +380,26 @@ public void DiffuseEdges(float deltaTime) { deltaTime *= 100.0f; - int planeSize = PlaneSize; int edgeWidth = Constants.CLOUD_PLANE_EDGE_WIDTH; int halfEdgeWidth = edgeWidth / 2; - int squaresPerSide = Constants.CLOUD_PLANE_SQUARES_PER_SIDE; - int planeChunkSize = planeSize / squaresPerSide; - - for (int column = 0; column <= squaresPerSide; ++column) - { - int boundaryCenter = column * planeChunkSize; - int horizontalStart = Math.Max(0, boundaryCenter - halfEdgeWidth); - int horizontalEnd = Math.Min(planeSize, boundaryCenter + halfEdgeWidth); + int planeChunkSize = PlaneSize / Constants.CLOUD_PLANE_SQUARES_PER_SIDE; - AreaDiffuse(horizontalStart, horizontalEnd, 0, planeSize, deltaTime); - } + // Vertical edge columns + PartialDiffuseScalar(0, 0, halfEdgeWidth, PlaneSize, deltaTime); + PartialDiffuseScalar(1 * planeChunkSize - halfEdgeWidth, 0, edgeWidth, PlaneSize, deltaTime); + PartialDiffuseScalar(2 * planeChunkSize - halfEdgeWidth, 0, edgeWidth, PlaneSize, deltaTime); + PartialDiffuseScalar(3 * planeChunkSize - halfEdgeWidth, 0, halfEdgeWidth, PlaneSize, deltaTime); - for (int square = 0; square < squaresPerSide; ++square) + // Horizontal edge rows + for (int square = 0; square < Constants.CLOUD_PLANE_SQUARES_PER_SIDE; ++square) { - int horizontalStart = square * planeChunkSize + halfEdgeWidth; - int horizontalEnd = (square + 1) * planeChunkSize - halfEdgeWidth; + int x = square * planeChunkSize + halfEdgeWidth; + int width = planeChunkSize - edgeWidth; - for (int row = 0; row <= squaresPerSide; ++row) - { - int boundaryCenter = row * planeChunkSize; - int verticalStart = Math.Max(0, boundaryCenter - halfEdgeWidth); - int verticalEnd = Math.Min(planeSize, boundaryCenter + halfEdgeWidth); - - AreaDiffuse(horizontalStart, horizontalEnd, verticalStart, verticalEnd, deltaTime); - } + PartialDiffuseScalar(x, 3 * planeChunkSize - halfEdgeWidth, width, halfEdgeWidth, deltaTime); + PartialDiffuseScalar(x, 2 * planeChunkSize - halfEdgeWidth, width, edgeWidth, deltaTime); + PartialDiffuseScalar(x, 1 * planeChunkSize - halfEdgeWidth, width, edgeWidth, deltaTime); + PartialDiffuseScalar(x, 0, width, halfEdgeWidth, deltaTime); } } @@ -1006,6 +998,37 @@ private void PartialDiffuse(int slice, int slices, float delta) } } + /// + /// This is the original implementation of the PartialDiffuse algorithm, which was scalar. + /// It has been kept to diffuse edges. + /// + private void PartialDiffuseScalar(int x0, int y0, int width, int height, float delta) + { + float a = delta * Constants.CLOUD_DIFFUSION_RATE; + var cellMultiplier = a * 0.25f; + var planeSize = PlaneSize; + + for (int x = x0; x < x0 + width; ++x) + { + var xMinus = x == 0 ? planeSize - 1 : x - 1; + var xPlus = x == planeSize - 1 ? 0 : x + 1; + + for (int y = y0; y < y0 + height; ++y) + { + var yMinus = y == 0 ? planeSize - 1 : y - 1; + var yPlus = y == planeSize - 1 ? 0 : y + 1; + + OldDensity[x + y * planeSize] = + Density[x + y * planeSize] * (1 - a) + + ( + Density[x + yMinus * planeSize] + + Density[x + yPlus * planeSize] + + Density[xMinus + y * planeSize] + + Density[xPlus + y * planeSize]) * cellMultiplier; + } + } + } + private void AreaDiffuse(int horizontalStart, int horizontalEnd, int verticalStart, int verticalEnd, float delta) { From 99feb01cf9921b888316ccc4398f934531ba607f Mon Sep 17 00:00:00 2001 From: Francesco Date: Thu, 7 May 2026 11:27:22 +0200 Subject: [PATCH 27/32] Replaced parallel DiffuseEdges to squares again --- src/microbe_stage/CompoundCloudPlane.cs | 81 ++++++++++--------------- 1 file changed, 31 insertions(+), 50 deletions(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index 3121178ab63..2a667a8e4b4 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -409,12 +409,17 @@ public void DiffuseEdges(float deltaTime) public void QueueDiffuseCloud(float deltaTime, List queue) { deltaTime *= 100.0f; - int slices = Constants.CLOUD_PLANE_SQUARES_PER_SIDE * Constants.CLOUD_PLANE_SQUARES_PER_SIDE; + int planeChunkSize = PlaneSize / Constants.CLOUD_PLANE_SQUARES_PER_SIDE; - for (int slice = 0; slice < slices; ++slice) + for (int i = 0; i < Constants.CLOUD_PLANE_SQUARES_PER_SIDE; ++i) { - int atSlice = slice; - queue.Add(new Task(() => PartialDiffuse(atSlice, slices, deltaTime))); + int x0 = i * planeChunkSize; + + for (int j = 0; j < Constants.CLOUD_PLANE_SQUARES_PER_SIDE; ++j) + { + int y0 = j * planeChunkSize; + queue.Add(new Task(() => PartialDiffuse(x0, y0, planeChunkSize, deltaTime))); + } } } @@ -908,12 +913,15 @@ private Vector4 CalculateCloudToAdd(Compound compound, float density) Compounds[3] == compound ? density : 0.0f); } - private void PartialDiffuse(int slice, int slices, float delta) + private void PartialDiffuse(int x0, int y0, int size, float delta) { - int planeSize = PlaneSize; - int horizontalStart = slice * planeSize / slices; - int horizontalEnd = (slice + 1) * planeSize / slices; + int halfEdge = Constants.CLOUD_PLANE_EDGE_WIDTH / 2; + int startX = x0 + halfEdge; + int endX = x0 + size - halfEdge; + int startY = y0 + halfEdge; + int endY = y0 + size - halfEdge; + int planeSize = PlaneSize; float diffusionAmount = delta * Constants.CLOUD_DIFFUSION_RATE; float neighborWeight = diffusionAmount * 0.25f; float centerWeight = 1.0f - diffusionAmount; @@ -931,40 +939,28 @@ private void PartialDiffuse(int slice, int slices, float delta) bool avx2Supported = Avx2.IsSupported; - for (int horizontalIndex = horizontalStart; horizontalIndex < horizontalEnd; ++horizontalIndex) + for (int y = startY; y < endY; ++y) { - int currentRowOffset = horizontalIndex * planeSize; - int previousRowOffset = (horizontalIndex == 0 ? planeSize - 1 : horizontalIndex - 1) * planeSize; - int nextRowOffset = (horizontalIndex == planeSize - 1 ? 0 : horizontalIndex + 1) * planeSize; + int currentColumnOffset = y * planeSize; + int previousColumnOffset = (y == 0 ? planeSize - 1 : y - 1) * planeSize; + int nextColumnOffset = (y == planeSize - 1 ? 0 : y + 1) * planeSize; - int firstIndex = currentRowOffset; - destinationDensity[firstIndex] = sourceDensity[firstIndex] * centerWeight + - (sourceDensity[currentRowOffset + (planeSize - 1)] + sourceDensity[currentRowOffset + 1] + - sourceDensity[previousRowOffset] + sourceDensity[nextRowOffset]) * neighborWeight; - - int verticalIndex = 1; - int safeLimit = planeSize - 1; + int x = startX; if (avx2Supported) { - // Use Avx2 SIMD to vectorise diffusion. - - // Most of the operations are now vectorised, except for a possible final tail that doesn't fit in a - // Vector256. This is taken care of after the current conditional branch. - for (; verticalIndex <= safeLimit - 2; verticalIndex += 2) + for (; x <= endX - 2; x += 2) { - uint offset = (uint)(currentRowOffset + verticalIndex) << 2; + uint offset = (uint)(currentColumnOffset + x) << 2; var center = Vector256.LoadUnsafe(ref sourceReference, offset); + var up = Vector256.LoadUnsafe(ref sourceReference, (uint)(previousColumnOffset + x) << 2); + var down = Vector256.LoadUnsafe(ref sourceReference, (uint)(nextColumnOffset + x) << 2); - var up = Vector256.LoadUnsafe(ref sourceReference, offset - 4); - var down = Vector256.LoadUnsafe(ref sourceReference, offset + 4); - var left = Vector256.LoadUnsafe(ref sourceReference, (uint)(previousRowOffset + - verticalIndex) << 2); - var right = Vector256.LoadUnsafe(ref sourceReference, (uint)(nextRowOffset + verticalIndex) << 2); + var left = Vector256.LoadUnsafe(ref sourceReference, offset - 4); + var right = Vector256.LoadUnsafe(ref sourceReference, offset + 4); var neighbors = Avx.Add(Avx.Add(up, down), Avx.Add(left, right)); - var result = Avx.Add(Avx.Multiply(center, centerWeightVector), Avx.Multiply(neighbors, neighborWeightVector)); @@ -972,28 +968,13 @@ private void PartialDiffuse(int slice, int slices, float delta) } } - // If Avx2 is unsupported, the following loops will take care of the scalar operations. That must be - // executed after the SIMD operations if Avx2 is supported anyway, as we need to take care of a possible - // "tail" if the PlaneSize is not aligned to the previous SIMD algorithm. - - // This is the scalar algorithm. It executes if Avx2 is not supported and on the tail we discussed in the - // previous comments. - for (; verticalIndex < safeLimit; ++verticalIndex) + // Tail fallback + for (; x < endX; ++x) { - int currentIndex = currentRowOffset + verticalIndex; + int currentIndex = currentColumnOffset + x; destinationDensity[currentIndex] = sourceDensity[currentIndex] * centerWeight + (sourceDensity[currentIndex - 1] + sourceDensity[currentIndex + 1] + - sourceDensity[previousRowOffset + verticalIndex] + sourceDensity[nextRowOffset + verticalIndex]) - * neighborWeight; - } - - if (verticalIndex < planeSize) - { - int lastIndex = currentRowOffset + verticalIndex; - destinationDensity[lastIndex] = sourceDensity[lastIndex] * centerWeight + - (sourceDensity[lastIndex - 1] + sourceDensity[currentRowOffset] + - sourceDensity[previousRowOffset + verticalIndex] + sourceDensity[nextRowOffset + verticalIndex]) - * neighborWeight; + sourceDensity[previousColumnOffset + x] + sourceDensity[nextColumnOffset + x]) * neighborWeight; } } } From bfd020afe677fd392b033bedaa3b79c1e49a2d47 Mon Sep 17 00:00:00 2001 From: Francesco Date: Thu, 7 May 2026 11:45:14 +0200 Subject: [PATCH 28/32] Using correct coordinates in PartialAdvect --- src/microbe_stage/CompoundCloudPlane.cs | 132 ++++++++++++------------ 1 file changed, 68 insertions(+), 64 deletions(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index 2a667a8e4b4..8714ea24bc7 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -1164,7 +1164,6 @@ private void PartialAdvect(int slice, int slices, float delta) float resolution = CloudResolution; float intensityScale = 255.0f / Constants.CLOUD_MAX_INTENSITY_SHOWN; - Vector2 worldPositionBase = GetWorldPositionForAdvection(0, rowStart); var vScale = Vector128.Create(intensityScale); var vZero = Vector128.Zero; @@ -1176,90 +1175,95 @@ private void PartialAdvect(int slice, int slices, float delta) for (int y = 0; y < rowCount; ++y) { int absoluteY = y + rowStart; - float worldY = worldPositionBase.Y + y * resolution; int x = 0; - for (; x <= planeSize - 4; x += 4) + for (int chunkX = 0; chunkX < Constants.CLOUD_PLANE_SQUARES_PER_SIDE; ++chunkX) { - var p0 = source[sourceIdx].AsVector128(); - var p1 = source[sourceIdx + 1].AsVector128(); - var p2 = source[sourceIdx + 2].AsVector128(); - var p3 = source[sourceIdx + 3].AsVector128(); + Vector2 worldPositionBase = GetWorldPositionForAdvection(chunkX * planeSize / + Constants.CLOUD_PLANE_SQUARES_PER_SIDE, rowStart); - var sums = Vector128.Create(p0[0] + p0[1] + p0[2] + p0[3], - p1[0] + p1[1] + p1[2] + p1[3], - p2[0] + p2[1] + p2[2] + p2[3], - p3[0] + p3[1] + p3[2] + p3[3]); - - if (Vector128.LessThanAll(sums, Vector128.Create(1.0f))) + for (; x <= planeSize - 4; x += 4) { - Vector128.Zero.StoreUnsafe(ref bufferSpan[bufferIdx]); - - sourceIdx += 4; - bufferIdx += 16; - continue; - } + var p0 = source[sourceIdx].AsVector128(); + var p1 = source[sourceIdx + 1].AsVector128(); + var p2 = source[sourceIdx + 2].AsVector128(); + var p3 = source[sourceIdx + 3].AsVector128(); - for (int i = 0; i < 4; ++i) - { - Vector4 currentDensity = source[sourceIdx++]; + var sums = Vector128.Create(p0[0] + p0[1] + p0[2] + p0[3], + p1[0] + p1[1] + p1[2] + p1[3], + p2[0] + p2[1] + p2[2] + p2[3], + p3[0] + p3[1] + p3[2] + p3[3]); - var vPixel = currentDensity.AsVector128(); - vPixel = Vector128.Multiply(vPixel, vScale); - vPixel = Vector128.Min(Vector128.Max(vPixel, vZero), v255); - var vInt = Vector128.ConvertToInt32(vPixel); - - // These check are JIT compile-time constants, so they are pruned during execution. - // This makes branching here de-facto non-existent. - Vector128 packed8; - if (Sse2.IsSupported) - { - var packed16 = Sse2.PackSignedSaturate(vInt, vInt); - packed8 = Sse2.PackUnsignedSaturate(packed16, packed16).AsByte(); - } - else if (AdvSimd.IsSupported) + if (Vector128.LessThanAll(sums, Vector128.Create(1.0f))) { - var narrow16 = AdvSimd.ExtractNarrowingSaturateLower(vInt); - var narrow8 = AdvSimd.ExtractNarrowingSaturateUnsignedLower(narrow16.ToVector128()); - packed8 = narrow8.ToVector128(); + Vector128.Zero.StoreUnsafe(ref bufferSpan[bufferIdx]); + + sourceIdx += 4; + bufferIdx += 16; + continue; } - else + + for (int i = 0; i < 4; ++i) { - packed8 = Vector128.Create((byte)vInt.GetElement(0), (byte)vInt.GetElement(1), - (byte)vInt.GetElement(2), (byte)vInt.GetElement(3), - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0).AsByte(); + Vector4 currentDensity = source[sourceIdx++]; + + var vPixel = currentDensity.AsVector128(); + vPixel = Vector128.Multiply(vPixel, vScale); + vPixel = Vector128.Min(Vector128.Max(vPixel, vZero), v255); + var vInt = Vector128.ConvertToInt32(vPixel); + + // These check are JIT compile-time constants, so they are pruned during execution. + // This makes branching here de-facto non-existent. + Vector128 packed8; + if (Sse2.IsSupported) + { + var packed16 = Sse2.PackSignedSaturate(vInt, vInt); + packed8 = Sse2.PackUnsignedSaturate(packed16, packed16).AsByte(); + } + else if (AdvSimd.IsSupported) + { + var narrow16 = AdvSimd.ExtractNarrowingSaturateLower(vInt); + var narrow8 = AdvSimd.ExtractNarrowingSaturateUnsignedLower(narrow16.ToVector128()); + packed8 = narrow8.ToVector128(); + } + else + { + packed8 = Vector128.Create((byte)vInt.GetElement(0), (byte)vInt.GetElement(1), + (byte)vInt.GetElement(2), (byte)vInt.GetElement(3), + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0).AsByte(); + } + + MemoryMarshal.Write(bufferSpan.Slice(bufferIdx, 4), + packed8.AsUInt32().ToScalar()); + + bufferIdx += 4; + + if (currentDensity.X + currentDensity.Y + currentDensity.Z + currentDensity.W < 1.0f) + continue; + + ProcessPixelAdvection(currentDensity, x + i, absoluteY, worldPositionBase.Y, + (x + i) * resolution, delta, destination, planeSize, worldPositionBase); } + } - MemoryMarshal.Write(bufferSpan.Slice(bufferIdx, 4), - packed8.AsUInt32().ToScalar()); + for (; x < planeSize; ++x) + { + Vector4 currentDensity = source[sourceIdx++]; + bufferSpan[bufferIdx] = (byte)Math.Clamp(currentDensity.X * intensityScale, 0, 255); + bufferSpan[bufferIdx + 1] = (byte)Math.Clamp(currentDensity.Y * intensityScale, 0, 255); + bufferSpan[bufferIdx + 2] = (byte)Math.Clamp(currentDensity.Z * intensityScale, 0, 255); + bufferSpan[bufferIdx + 3] = (byte)Math.Clamp(currentDensity.W * intensityScale, 0, 255); bufferIdx += 4; if (currentDensity.X + currentDensity.Y + currentDensity.Z + currentDensity.W < 1.0f) continue; - ProcessPixelAdvection(currentDensity, x + i, absoluteY, worldY, (x + i) * resolution, - delta, destination, planeSize, worldPositionBase); + ProcessPixelAdvection(currentDensity, x, absoluteY, worldPositionBase.Y, + x * resolution, delta, destination, planeSize, worldPositionBase); } } - - for (; x < planeSize; ++x) - { - Vector4 currentDensity = source[sourceIdx++]; - - bufferSpan[bufferIdx] = (byte)Math.Clamp(currentDensity.X * intensityScale, 0, 255); - bufferSpan[bufferIdx + 1] = (byte)Math.Clamp(currentDensity.Y * intensityScale, 0, 255); - bufferSpan[bufferIdx + 2] = (byte)Math.Clamp(currentDensity.Z * intensityScale, 0, 255); - bufferSpan[bufferIdx + 3] = (byte)Math.Clamp(currentDensity.W * intensityScale, 0, 255); - bufferIdx += 4; - - if (currentDensity.X + currentDensity.Y + currentDensity.Z + currentDensity.W < 1.0f) - continue; - - ProcessPixelAdvection(currentDensity, x, absoluteY, worldY, x * resolution, delta, destination, - planeSize, worldPositionBase); - } } } From baa31d1e9d490929e4676cedc076010355fec28b Mon Sep 17 00:00:00 2001 From: Francesco Date: Thu, 7 May 2026 11:53:11 +0200 Subject: [PATCH 29/32] Renamed variables for readability --- src/microbe_stage/CompoundCloudPlane.cs | 34 ++++++++++++------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index 8714ea24bc7..107b1145e5c 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -1280,30 +1280,30 @@ private void ProcessPixelAdvection(Vector4 currentDensity, int x, int absoluteY, float targetX = x + delta * velocity.X; float targetY = absoluteY + delta * velocity.Y; - CalculateMovementFactors(targetX, targetY, out int fX, out int cX, out int fY, out int cY, out float wR, - out float wL, out float wB, out float wT); + CalculateMovementFactors(targetX, targetY, out int floorX, out int ceilX, out int floorY, out int ceilY, + out float weightRight, out float weightLeft, out float weightBottom, out float weightTop); // Normally the checks here would be fX < 0 || fX >= planeSize // By casting the operands to uint here, the first condition (fX < 0) is converted to fX becoming a really large // number. This makes the comparison always true (as planeSize is usually much smaller than the max integer) // thus saving us a few more instructions during the calculations. - if ((uint)fX >= (uint)planeSize) - fX = (fX < 0) ? fX + planeSize : fX - planeSize; - if ((uint)cX >= (uint)planeSize) - cX = (cX < 0) ? cX + planeSize : cX - planeSize; - if ((uint)fY >= (uint)planeSize) - fY = (fY < 0) ? fY + planeSize : fY - planeSize; - if ((uint)cY >= (uint)planeSize) - cY = (cY < 0) ? cY + planeSize : cY - planeSize; - - int fYRow = fY * planeSize; - int cYRow = cY * planeSize; + if ((uint)floorX >= (uint)planeSize) + floorX = (floorX < 0) ? floorX + planeSize : floorX - planeSize; + if ((uint)ceilX >= (uint)planeSize) + ceilX = (ceilX < 0) ? ceilX + planeSize : ceilX - planeSize; + if ((uint)floorY >= (uint)planeSize) + floorY = (floorY < 0) ? floorY + planeSize : floorY - planeSize; + if ((uint)ceilY >= (uint)planeSize) + ceilY = (ceilY < 0) ? ceilY + planeSize : ceilY - planeSize; + + int floorYRow = floorY * planeSize; + int ceilYRow = ceilY * planeSize; Vector4 decayed = currentDensity * decayRates; - destination[fX + fYRow] += decayed * (wL * wT); - destination[fX + cYRow] += decayed * (wL * wB); - destination[cX + fYRow] += decayed * (wR * wT); - destination[cX + cYRow] += decayed * (wR * wB); + destination[floorX + floorYRow] += decayed * (weightLeft * weightTop); + destination[floorX + ceilYRow] += decayed * (weightLeft * weightBottom); + destination[ceilX + floorYRow] += decayed * (weightRight * weightTop); + destination[ceilX + ceilYRow] += decayed * (weightRight * weightBottom); } private void PartialClearDensity(int x0, int y0, int width, int height) From 45edcae7ade635c8e576c4b94080a4b9eb2f925e Mon Sep 17 00:00:00 2001 From: Francesco Date: Thu, 7 May 2026 18:43:09 +0200 Subject: [PATCH 30/32] Fixed x not being relative to chunkX in advection loop --- src/microbe_stage/CompoundCloudPlane.cs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index 107b1145e5c..8adb01e2d5d 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -1164,6 +1164,7 @@ private void PartialAdvect(int slice, int slices, float delta) float resolution = CloudResolution; float intensityScale = 255.0f / Constants.CLOUD_MAX_INTENSITY_SHOWN; + int chunkWidth = planeSize / Constants.CLOUD_PLANE_SQUARES_PER_SIDE; var vScale = Vector128.Create(intensityScale); var vZero = Vector128.Zero; @@ -1176,14 +1177,17 @@ private void PartialAdvect(int slice, int slices, float delta) { int absoluteY = y + rowStart; - int x = 0; - for (int chunkX = 0; chunkX < Constants.CLOUD_PLANE_SQUARES_PER_SIDE; ++chunkX) { Vector2 worldPositionBase = GetWorldPositionForAdvection(chunkX * planeSize / Constants.CLOUD_PLANE_SQUARES_PER_SIDE, rowStart); - for (; x <= planeSize - 4; x += 4) + int startX = chunkX * chunkWidth; + int endX = startX + chunkWidth; + + int x = startX; + + for (; x <= endX - 4; x += 4) { var p0 = source[sourceIdx].AsVector128(); var p1 = source[sourceIdx + 1].AsVector128(); @@ -1247,7 +1251,7 @@ private void PartialAdvect(int slice, int slices, float delta) } } - for (; x < planeSize; ++x) + for (; x < endX; ++x) { Vector4 currentDensity = source[sourceIdx++]; From 276e4c5e263b7f4fb2c744a5a1d92eef09f8cd79 Mon Sep 17 00:00:00 2001 From: Francesco Date: Fri, 8 May 2026 11:07:27 +0200 Subject: [PATCH 31/32] Polished PartialDiffuseScalar --- src/microbe_stage/CompoundCloudPlane.cs | 38 +++++++++++++------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index 8adb01e2d5d..6bcfb3f20d7 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -983,29 +983,31 @@ private void PartialDiffuse(int x0, int y0, int size, float delta) /// This is the original implementation of the PartialDiffuse algorithm, which was scalar. /// It has been kept to diffuse edges. /// - private void PartialDiffuseScalar(int x0, int y0, int width, int height, float delta) + private void PartialDiffuseScalar(int x0, int y0, int width, int height, float deltaTime) { - float a = delta * Constants.CLOUD_DIFFUSION_RATE; - var cellMultiplier = a * 0.25f; - var planeSize = PlaneSize; + float diffusionAmount = deltaTime * Constants.CLOUD_DIFFUSION_RATE; + float cellMultiplier = diffusionAmount * 0.25f; + float neighbourMultiplier = 1.0f - diffusionAmount; + int planeSize = PlaneSize; - for (int x = x0; x < x0 + width; ++x) + var sourceDensity = Density.AsSpan(); + var destinationDensity = OldDensity.AsSpan(); + + for (int y = y0; y < y0 + height; ++y) { - var xMinus = x == 0 ? planeSize - 1 : x - 1; - var xPlus = x == planeSize - 1 ? 0 : x + 1; + int currentRowOffset = y * planeSize; + int previousRowOffset = (y == 0 ? planeSize - 1 : y - 1) * planeSize; + int nextRowOffset = (y == planeSize - 1 ? 0 : y + 1) * planeSize; - for (int y = y0; y < y0 + height; ++y) + for (int x = x0; x < x0 + width; ++x) { - var yMinus = y == 0 ? planeSize - 1 : y - 1; - var yPlus = y == planeSize - 1 ? 0 : y + 1; - - OldDensity[x + y * planeSize] = - Density[x + y * planeSize] * (1 - a) + - ( - Density[x + yMinus * planeSize] + - Density[x + yPlus * planeSize] + - Density[xMinus + y * planeSize] + - Density[xPlus + y * planeSize]) * cellMultiplier; + int currentIndex = currentRowOffset + x; + int prevX = x == 0 ? planeSize - 1 : x - 1; + int nextX = x == planeSize - 1 ? 0 : x + 1; + + destinationDensity[currentIndex] = sourceDensity[currentIndex] * neighbourMultiplier + + (sourceDensity[currentRowOffset + prevX] + sourceDensity[currentRowOffset + nextX] + + sourceDensity[previousRowOffset + x] + sourceDensity[nextRowOffset + x]) * cellMultiplier; } } } From b64ef0c6f0af1b6221c83ebf2d84e647e4f7214b Mon Sep 17 00:00:00 2001 From: Francesco Date: Fri, 8 May 2026 11:13:50 +0200 Subject: [PATCH 32/32] Polished advection algorithm --- src/microbe_stage/CompoundCloudPlane.cs | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs index 6bcfb3f20d7..fe0880a53e8 100644 --- a/src/microbe_stage/CompoundCloudPlane.cs +++ b/src/microbe_stage/CompoundCloudPlane.cs @@ -1153,7 +1153,7 @@ private int GetEdgeShift(int coord, int playerPos) return 0; } - private void PartialAdvect(int slice, int slices, float delta) + private void PartialAdvect(int slice, int slices, float deltaTime) { int planeSize = PlaneSize; int rowStart = slice * planeSize / slices; @@ -1187,6 +1187,8 @@ private void PartialAdvect(int slice, int slices, float delta) int startX = chunkX * chunkWidth; int endX = startX + chunkWidth; + float worldY = worldPositionBase.Y + absoluteY * resolution; + int x = startX; for (; x <= endX - 4; x += 4) @@ -1248,8 +1250,9 @@ private void PartialAdvect(int slice, int slices, float delta) if (currentDensity.X + currentDensity.Y + currentDensity.Z + currentDensity.W < 1.0f) continue; - ProcessPixelAdvection(currentDensity, x + i, absoluteY, worldPositionBase.Y, - (x + i) * resolution, delta, destination, planeSize, worldPositionBase); + float worldX = worldPositionBase.X + (x + i) * resolution; + ProcessPixelAdvection(currentDensity, x + i, absoluteY, worldX, worldY, deltaTime, + destination, planeSize); } } @@ -1266,25 +1269,25 @@ private void PartialAdvect(int slice, int slices, float delta) if (currentDensity.X + currentDensity.Y + currentDensity.Z + currentDensity.W < 1.0f) continue; - ProcessPixelAdvection(currentDensity, x, absoluteY, worldPositionBase.Y, - x * resolution, delta, destination, planeSize, worldPositionBase); + float worldX = worldPositionBase.X + x * resolution; + ProcessPixelAdvection(currentDensity, x, absoluteY, worldX, worldY, deltaTime, + destination, planeSize); } } } } - private void ProcessPixelAdvection(Vector4 currentDensity, int x, int absoluteY, float worldY, float worldXOffset, - float delta, Span destination, int planeSize, Vector2 worldPositionBase) + private void ProcessPixelAdvection(Vector4 currentDensity, int absoluteX, int absoluteY, float worldX, float worldY, + float deltaTime, Span destination, int planeSize) { - float worldX = worldPositionBase.X + worldXOffset; Vector2 velocity = fluidSystem!.VelocityAt(new Vector2(worldX, worldY)); if (MathF.Abs(velocity.X) + MathF.Abs(velocity.Y) < Constants.CURRENT_COMPOUND_CLOUD_ADVECT_THRESHOLD) velocity = Vector2.Zero; velocity *= VISCOSITY; - float targetX = x + delta * velocity.X; - float targetY = absoluteY + delta * velocity.Y; + float targetX = absoluteX + deltaTime * velocity.X; + float targetY = absoluteY + deltaTime * velocity.Y; CalculateMovementFactors(targetX, targetY, out int floorX, out int ceilX, out int floorY, out int ceilY, out float weightRight, out float weightLeft, out float weightBottom, out float weightTop);