diff --git a/src/microbe_stage/CompoundCloudPlane.cs b/src/microbe_stage/CompoundCloudPlane.cs
index 2d6ee6a58d8..fe0880a53e8 100644
--- a/src/microbe_stage/CompoundCloudPlane.cs
+++ b/src/microbe_stage/CompoundCloudPlane.cs
@@ -5,6 +5,10 @@
using System;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.Arm;
+using System.Runtime.Intrinsics.X86;
using System.Threading;
using System.Threading.Tasks;
using Godot;
@@ -34,9 +38,9 @@ public partial class CompoundCloudPlane : MeshInstance3D, ISaveLoadedTracked, IA
/// Because this is such a high-priority system, this uses a bit more happily null suppressing than elsewhere
///
///
- public Vector4[,] Density = null!;
+ public Vector4[] Density = null!;
- public Vector4[,] OldDensity = null!;
+ public Vector4[] OldDensity = null!;
public Compound[] Compounds = null!;
@@ -69,6 +73,8 @@ public partial class CompoundCloudPlane : MeshInstance3D, ISaveLoadedTracked, IA
private Vector4 decayRates;
+ private byte[] tempBuffer = null!;
+
///
/// Which square plane player is in
///
@@ -127,7 +133,7 @@ public static CompoundCloudPlane ReadFromArchive(ISArchiveReader reader, ushort
int dimensions = instance.PlaneSize;
- var target = new Vector4[dimensions, dimensions];
+ var target = new Vector4[dimensions * dimensions];
for (int x = 0; x < dimensions; ++x)
{
@@ -156,7 +162,7 @@ public static CompoundCloudPlane ReadFromArchive(ISArchiveReader reader, ushort
(uint)buffer[bufferReadOffset++] << 16 | (uint)buffer[bufferReadOffset++] << 24;
vector4.W = BitConverter.UInt32BitsToSingle(data);
- target[x, y] = vector4;
+ target[x + y * dimensions] = vector4;
}
if (bufferReadOffset != buffer.Length)
@@ -181,9 +187,8 @@ public void WriteToArchive(ISArchiveWriter writer)
var localDensity = Density;
- // If rank changes square root is not suitable
- if (localDensity.Rank != 2)
- throw new Exception("Cloud plane densities array rank is not 2");
+ if (Math.Abs(Math.Sqrt(localDensity.Length) - (int)Math.Round(Math.Sqrt(localDensity.Length))) > 0.001f)
+ throw new Exception("Cloud plane densities size is not a perfect square");
int dimensions = (int)Math.Sqrt(localDensity.Length);
@@ -206,7 +211,7 @@ public void WriteToArchive(ISArchiveWriter writer)
// Convert data into the buffer
for (int y = 0; y < dimensions; ++y)
{
- var vector4 = localDensity[x, y];
+ var vector4 = localDensity[x + y * dimensions];
var data = BitConverter.SingleToUInt32Bits(vector4.X);
@@ -248,14 +253,17 @@ public void WriteToArchive(ISArchiveWriter writer)
// Called when the node enters the scene tree for the first time.
public override void _Ready()
{
+ if (Marshal.SizeOf() * 4 != Marshal.SizeOf())
+ throw new InvalidCastException("The assumption that Vector4 is 4 floats is not valid in this context.");
+
if (!IsLoadedFromSave)
{
PlaneSize = Settings.Instance.CloudSimulationWidth;
CloudResolution = Settings.Instance.CloudResolution;
CreateDensityTexture();
- Density = new Vector4[PlaneSize, PlaneSize];
- OldDensity = new Vector4[PlaneSize, PlaneSize];
+ Density = new Vector4[PlaneSize * PlaneSize];
+ OldDensity = new Vector4[PlaneSize * PlaneSize];
ClearContents();
}
else
@@ -265,7 +273,7 @@ public override void _Ready()
// without starting a new save
CreateDensityTexture();
- OldDensity = new Vector4[PlaneSize, PlaneSize];
+ OldDensity = new Vector4[PlaneSize * PlaneSize];
SetMaterialUVForPosition();
}
@@ -368,20 +376,19 @@ public void UpdatePosition(Vector2I newPosition)
/// Updates the edge concentrations of this cloud before the rest of the cloud.
/// This is not run in parallel.
///
- public void DiffuseEdges(float delta)
+ public void DiffuseEdges(float deltaTime)
{
- // Increase diffusion effect
- delta *= 100.0f;
+ deltaTime *= 100.0f;
int edgeWidth = Constants.CLOUD_PLANE_EDGE_WIDTH;
int halfEdgeWidth = edgeWidth / 2;
int planeChunkSize = PlaneSize / Constants.CLOUD_PLANE_SQUARES_PER_SIDE;
// Vertical edge columns
- PartialDiffuse(0, 0, halfEdgeWidth, PlaneSize, delta);
- PartialDiffuse(1 * planeChunkSize - halfEdgeWidth, 0, edgeWidth, PlaneSize, delta);
- PartialDiffuse(2 * planeChunkSize - halfEdgeWidth, 0, edgeWidth, PlaneSize, delta);
- PartialDiffuse(3 * planeChunkSize - halfEdgeWidth, 0, halfEdgeWidth, PlaneSize, delta);
+ PartialDiffuseScalar(0, 0, halfEdgeWidth, PlaneSize, deltaTime);
+ PartialDiffuseScalar(1 * planeChunkSize - halfEdgeWidth, 0, edgeWidth, PlaneSize, deltaTime);
+ PartialDiffuseScalar(2 * planeChunkSize - halfEdgeWidth, 0, edgeWidth, PlaneSize, deltaTime);
+ PartialDiffuseScalar(3 * planeChunkSize - halfEdgeWidth, 0, halfEdgeWidth, PlaneSize, deltaTime);
// Horizontal edge rows
for (int square = 0; square < Constants.CLOUD_PLANE_SQUARES_PER_SIDE; ++square)
@@ -389,32 +396,29 @@ public void DiffuseEdges(float delta)
int x = square * planeChunkSize + halfEdgeWidth;
int width = planeChunkSize - edgeWidth;
- PartialDiffuse(x, 3 * planeChunkSize - halfEdgeWidth, width, halfEdgeWidth, delta);
- PartialDiffuse(x, 2 * planeChunkSize - halfEdgeWidth, width, edgeWidth, delta);
- PartialDiffuse(x, 1 * planeChunkSize - halfEdgeWidth, width, edgeWidth, delta);
- PartialDiffuse(x, 0, width, halfEdgeWidth, delta);
+ PartialDiffuseScalar(x, 3 * planeChunkSize - halfEdgeWidth, width, halfEdgeWidth, deltaTime);
+ PartialDiffuseScalar(x, 2 * planeChunkSize - halfEdgeWidth, width, edgeWidth, deltaTime);
+ PartialDiffuseScalar(x, 1 * planeChunkSize - halfEdgeWidth, width, edgeWidth, deltaTime);
+ PartialDiffuseScalar(x, 0, width, halfEdgeWidth, deltaTime);
}
}
///
/// Updates the cloud in parallel.
///
- public void QueueDiffuseCloud(float delta, List queue)
+ public void QueueDiffuseCloud(float deltaTime, List queue)
{
- delta *= 100.0f;
- var planeChunkSize = PlaneSize / Constants.CLOUD_PLANE_SQUARES_PER_SIDE;
+ deltaTime *= 100.0f;
+ int planeChunkSize = PlaneSize / Constants.CLOUD_PLANE_SQUARES_PER_SIDE;
- for (var i = 0; i < Constants.CLOUD_PLANE_SQUARES_PER_SIDE; ++i)
+ for (int i = 0; i < Constants.CLOUD_PLANE_SQUARES_PER_SIDE; ++i)
{
- var x0 = i * planeChunkSize;
+ int x0 = i * planeChunkSize;
- for (var j = 0; j < Constants.CLOUD_PLANE_SQUARES_PER_SIDE; ++j)
+ for (int j = 0; j < Constants.CLOUD_PLANE_SQUARES_PER_SIDE; ++j)
{
- var y0 = j * planeChunkSize;
-
- // TODO: fix task allocations
- var task = new Task(() => PartialDiffuseCenter(x0, y0, planeChunkSize, delta));
- queue.Add(task);
+ int y0 = j * planeChunkSize;
+ queue.Add(new Task(() => PartialDiffuse(x0, y0, planeChunkSize, deltaTime)));
}
}
}
@@ -422,50 +426,29 @@ public void QueueDiffuseCloud(float delta, List queue)
///
/// Updates the cloud in parallel.
///
- public void QueueAdvectCloud(float delta, List queue)
+ public void QueueAdvectCloud(float deltaTime, List queue)
{
- delta *= 100.0f;
- var planeChunkSize = PlaneSize / Constants.CLOUD_PLANE_SQUARES_PER_SIDE;
-
- for (var i = 0; i < Constants.CLOUD_PLANE_SQUARES_PER_SIDE; ++i)
- {
- var x0 = i * planeChunkSize;
-
- for (var j = 0; j < Constants.CLOUD_PLANE_SQUARES_PER_SIDE; ++j)
- {
- var y0 = j * planeChunkSize;
-
- // TODO: fix task allocations
- var task = new Task(() => PartialAdvect(x0, y0, planeChunkSize, delta));
- queue.Add(task);
- }
- }
- }
+ deltaTime *= 100.0f;
- ///
- /// Updates the cloud in parallel.
- ///
- public void QueueUpdateTextureImage(List queue)
- {
- var planeChunkSize = PlaneSize / Constants.CLOUD_PLANE_SQUARES_PER_SIDE;
+ int slices = Constants.CLOUD_PLANE_SQUARES_PER_SIDE * Constants.CLOUD_PLANE_SQUARES_PER_SIDE;
- for (var i = 0; i < Constants.CLOUD_PLANE_SQUARES_PER_SIDE; ++i)
+ for (int slice = 0; slice < slices; ++slice)
{
- var x0 = i * planeChunkSize;
-
- for (var j = 0; j < Constants.CLOUD_PLANE_SQUARES_PER_SIDE; ++j)
- {
- var y0 = j * planeChunkSize;
+ int atSlice = slice;
- // TODO: fix task allocations
- var task = new Task(() => PartialUpdateTextureImage(x0, y0, planeChunkSize, planeChunkSize));
- queue.Add(task);
- }
+ // TODO: fix task allocations
+ var task = new Task(() => PartialAdvect(atSlice, slices, deltaTime));
+ queue.Add(task);
}
}
public void UpdateTexture()
{
+ int width = image!.GetWidth();
+ int height = image.GetHeight();
+ int size = width * height * 4;
+
+ image!.SetData(width, height, false, image.GetFormat(), tempBuffer.AsSpan(0, size));
texture.Update(image);
}
@@ -498,11 +481,11 @@ public void AddCloudInterlocked(Compound compound, int x, int y, float density)
{
do
{
- seenCurrentAmount = Density[x, y].X;
+ seenCurrentAmount = Density[x + y * PlaneSize].X;
newValue = seenCurrentAmount + density;
}
- while (Interlocked.CompareExchange(ref Density[x, y].X, newValue, seenCurrentAmount) !=
- seenCurrentAmount);
+ while (Interlocked.CompareExchange(ref Density[x + y * PlaneSize].X, newValue,
+ seenCurrentAmount) != seenCurrentAmount);
break;
}
@@ -511,11 +494,11 @@ public void AddCloudInterlocked(Compound compound, int x, int y, float density)
{
do
{
- seenCurrentAmount = Density[x, y].Y;
+ seenCurrentAmount = Density[x + y * PlaneSize].Y;
newValue = seenCurrentAmount + density;
}
- while (Interlocked.CompareExchange(ref Density[x, y].Y, newValue, seenCurrentAmount) !=
- seenCurrentAmount);
+ while (Interlocked.CompareExchange(ref Density[x + y * PlaneSize].Y, newValue,
+ seenCurrentAmount) != seenCurrentAmount);
break;
}
@@ -524,11 +507,11 @@ public void AddCloudInterlocked(Compound compound, int x, int y, float density)
{
do
{
- seenCurrentAmount = Density[x, y].Z;
+ seenCurrentAmount = Density[x + y * PlaneSize].Z;
newValue = seenCurrentAmount + density;
}
- while (Interlocked.CompareExchange(ref Density[x, y].Z, newValue, seenCurrentAmount) !=
- seenCurrentAmount);
+ while (Interlocked.CompareExchange(ref Density[x + y * PlaneSize].Z, newValue,
+ seenCurrentAmount) != seenCurrentAmount);
break;
}
@@ -537,11 +520,11 @@ public void AddCloudInterlocked(Compound compound, int x, int y, float density)
{
do
{
- seenCurrentAmount = Density[x, y].W;
+ seenCurrentAmount = Density[x + y * PlaneSize].W;
newValue = seenCurrentAmount + density;
}
- while (Interlocked.CompareExchange(ref Density[x, y].W, newValue, seenCurrentAmount) !=
- seenCurrentAmount);
+ while (Interlocked.CompareExchange(ref Density[x + y * PlaneSize].W, newValue,
+ seenCurrentAmount) != seenCurrentAmount);
break;
}
@@ -572,11 +555,11 @@ public bool AddCloudInterlockedIfHandlesType(Compound compound, int x, int y, fl
{
do
{
- seenCurrentAmount = Density[x, y].X;
+ seenCurrentAmount = Density[x + y * PlaneSize].X;
newValue = seenCurrentAmount + density;
}
- while (Interlocked.CompareExchange(ref Density[x, y].X, newValue, seenCurrentAmount) !=
- seenCurrentAmount);
+ while (Interlocked.CompareExchange(ref Density[x + y * PlaneSize].X, newValue,
+ seenCurrentAmount) != seenCurrentAmount);
return true;
}
@@ -585,11 +568,11 @@ public bool AddCloudInterlockedIfHandlesType(Compound compound, int x, int y, fl
{
do
{
- seenCurrentAmount = Density[x, y].Y;
+ seenCurrentAmount = Density[x + y * PlaneSize].Y;
newValue = seenCurrentAmount + density;
}
- while (Interlocked.CompareExchange(ref Density[x, y].Y, newValue, seenCurrentAmount) !=
- seenCurrentAmount);
+ while (Interlocked.CompareExchange(ref Density[x + y * PlaneSize].Y, newValue,
+ seenCurrentAmount) != seenCurrentAmount);
return true;
}
@@ -598,11 +581,11 @@ public bool AddCloudInterlockedIfHandlesType(Compound compound, int x, int y, fl
{
do
{
- seenCurrentAmount = Density[x, y].Z;
+ seenCurrentAmount = Density[x + y * PlaneSize].Z;
newValue = seenCurrentAmount + density;
}
- while (Interlocked.CompareExchange(ref Density[x, y].Z, newValue, seenCurrentAmount) !=
- seenCurrentAmount);
+ while (Interlocked.CompareExchange(ref Density[x + y * PlaneSize].Z, newValue,
+ seenCurrentAmount) != seenCurrentAmount);
return true;
}
@@ -611,11 +594,11 @@ public bool AddCloudInterlockedIfHandlesType(Compound compound, int x, int y, fl
{
do
{
- seenCurrentAmount = Density[x, y].W;
+ seenCurrentAmount = Density[x + y * PlaneSize].W;
newValue = seenCurrentAmount + density;
}
- while (Interlocked.CompareExchange(ref Density[x, y].W, newValue, seenCurrentAmount) !=
- seenCurrentAmount);
+ while (Interlocked.CompareExchange(ref Density[x + y * PlaneSize].W, newValue,
+ seenCurrentAmount) != seenCurrentAmount);
return true;
}
@@ -633,17 +616,17 @@ public bool AddCloudInterlockedIfHandlesType(Compound compound, int x, int y, fl
/// The amount of compound taken
public float TakeCompound(Compound compound, int x, int y, float fraction = 1.0f)
{
- float amountInCloud = HackyAddress(ref Density[x, y], GetCompoundIndex(compound));
+ float amountInCloud = HackyAddress(ref Density[x + y * PlaneSize], GetCompoundIndex(compound));
var amountToGive = amountInCloud * fraction;
if (amountInCloud - amountToGive < 0.1f)
{
// Taking basically everything in the cloud
- Density[x, y] += CalculateCloudToAdd(compound, -amountInCloud);
+ Density[x + y * PlaneSize] += CalculateCloudToAdd(compound, -amountInCloud);
}
else
{
- Density[x, y] += CalculateCloudToAdd(compound, -amountToGive);
+ Density[x + y * PlaneSize] += CalculateCloudToAdd(compound, -amountToGive);
}
return amountToGive;
@@ -677,17 +660,17 @@ public bool TakeCompoundInterlocked(int compoundIndex, int x, int y, float fract
switch (compoundIndex)
{
case 0:
- return Interlocked.CompareExchange(ref Density[x, y].X, newValue, seenCurrentAmount) ==
- seenCurrentAmount;
+ return Interlocked.CompareExchange(ref Density[x + y * PlaneSize].X, newValue,
+ seenCurrentAmount) == seenCurrentAmount;
case 1:
- return Interlocked.CompareExchange(ref Density[x, y].Y, newValue, seenCurrentAmount) ==
- seenCurrentAmount;
+ return Interlocked.CompareExchange(ref Density[x + y * PlaneSize].Y, newValue,
+ seenCurrentAmount) == seenCurrentAmount;
case 2:
- return Interlocked.CompareExchange(ref Density[x, y].Z, newValue, seenCurrentAmount) ==
- seenCurrentAmount;
+ return Interlocked.CompareExchange(ref Density[x + y * PlaneSize].Z, newValue,
+ seenCurrentAmount) == seenCurrentAmount;
case 3:
- return Interlocked.CompareExchange(ref Density[x, y].W, newValue, seenCurrentAmount) ==
- seenCurrentAmount;
+ return Interlocked.CompareExchange(ref Density[x + y * PlaneSize].W, newValue,
+ seenCurrentAmount) == seenCurrentAmount;
default:
throw new ArgumentException("Compound index out of range");
}
@@ -701,7 +684,7 @@ public bool TakeCompoundInterlocked(int compoundIndex, int x, int y, float fract
/// The amount available for taking
public float AmountAvailable(Compound compound, int x, int y, float fraction = 1.0f)
{
- float amountInCloud = HackyAddress(ref Density[x, y], GetCompoundIndex(compound));
+ float amountInCloud = HackyAddress(ref Density[x + y * PlaneSize], GetCompoundIndex(compound));
float amountToGive = amountInCloud * fraction;
return amountToGive;
}
@@ -720,7 +703,7 @@ public void GetCompoundsAt(int x, int y, Dictionary result, boo
if (onlyAbsorbable && !compoundDefinitions[i]!.IsAbsorbable)
continue;
- float amount = HackyAddress(ref Density[x, y], i);
+ float amount = HackyAddress(ref Density[x + y * PlaneSize], i);
if (amount > 0)
result[compound] = amount;
}
@@ -815,7 +798,7 @@ public void AbsorbCompounds(int localX, int localY, CompoundBag storage,
while (true)
{
// Overestimate of how many compounds we get
- float cloudAmount = HackyAddress(ref Density[localX, localY], i);
+ float cloudAmount = HackyAddress(ref Density[localX + localY * PlaneSize], i);
float generousAmount = cloudAmount * Constants.SKIP_TRYING_TO_ABSORB_RATIO;
// Skip if there isn't enough to absorb
@@ -930,29 +913,150 @@ private Vector4 CalculateCloudToAdd(Compound compound, float density)
Compounds[3] == compound ? density : 0.0f);
}
- private void PartialDiffuse(int x0, int y0, int width, int height, float delta)
+ private void PartialDiffuse(int x0, int y0, int size, float delta)
{
- float a = delta * Constants.CLOUD_DIFFUSION_RATE;
- var cellMultiplier = a * 0.25f;
- var planeSize = PlaneSize;
+ int halfEdge = Constants.CLOUD_PLANE_EDGE_WIDTH / 2;
+ int startX = x0 + halfEdge;
+ int endX = x0 + size - halfEdge;
+ int startY = y0 + halfEdge;
+ int endY = y0 + size - halfEdge;
- for (int x = x0; x < x0 + width; ++x)
+ int planeSize = PlaneSize;
+ float diffusionAmount = delta * Constants.CLOUD_DIFFUSION_RATE;
+ float neighborWeight = diffusionAmount * 0.25f;
+ float centerWeight = 1.0f - diffusionAmount;
+
+ ReadOnlySpan sourceDensity = Density.AsSpan();
+ Span destinationDensity = OldDensity.AsSpan();
+ ReadOnlySpan sourceFloats = MemoryMarshal.Cast(sourceDensity);
+ Span destinationFloats = MemoryMarshal.Cast(destinationDensity);
+
+ var centerWeightVector = Vector256.Create(centerWeight);
+ var neighborWeightVector = Vector256.Create(neighborWeight);
+
+ ref float sourceReference = ref MemoryMarshal.GetReference(sourceFloats);
+ ref float destinationReference = ref MemoryMarshal.GetReference(destinationFloats);
+
+ bool avx2Supported = Avx2.IsSupported;
+
+ for (int y = startY; y < endY; ++y)
{
- var xMinus = x == 0 ? planeSize - 1 : x - 1;
- var xPlus = x == planeSize - 1 ? 0 : x + 1;
+ int currentColumnOffset = y * planeSize;
+ int previousColumnOffset = (y == 0 ? planeSize - 1 : y - 1) * planeSize;
+ int nextColumnOffset = (y == planeSize - 1 ? 0 : y + 1) * planeSize;
- for (int y = y0; y < y0 + height; ++y)
+ int x = startX;
+
+ if (avx2Supported)
+ {
+ for (; x <= endX - 2; x += 2)
+ {
+ uint offset = (uint)(currentColumnOffset + x) << 2;
+
+ var center = Vector256.LoadUnsafe(ref sourceReference, offset);
+ var up = Vector256.LoadUnsafe(ref sourceReference, (uint)(previousColumnOffset + x) << 2);
+ var down = Vector256.LoadUnsafe(ref sourceReference, (uint)(nextColumnOffset + x) << 2);
+
+ var left = Vector256.LoadUnsafe(ref sourceReference, offset - 4);
+ var right = Vector256.LoadUnsafe(ref sourceReference, offset + 4);
+
+ var neighbors = Avx.Add(Avx.Add(up, down), Avx.Add(left, right));
+ var result = Avx.Add(Avx.Multiply(center, centerWeightVector),
+ Avx.Multiply(neighbors, neighborWeightVector));
+
+ result.StoreUnsafe(ref destinationReference, offset);
+ }
+ }
+
+ // Tail fallback
+ for (; x < endX; ++x)
+ {
+ int currentIndex = currentColumnOffset + x;
+ destinationDensity[currentIndex] = sourceDensity[currentIndex] * centerWeight +
+ (sourceDensity[currentIndex - 1] + sourceDensity[currentIndex + 1] +
+ sourceDensity[previousColumnOffset + x] + sourceDensity[nextColumnOffset + x]) * neighborWeight;
+ }
+ }
+ }
+
+ ///
+ /// This is the original implementation of the PartialDiffuse algorithm, which was scalar.
+ /// It has been kept to diffuse edges.
+ ///
+ private void PartialDiffuseScalar(int x0, int y0, int width, int height, float deltaTime)
+ {
+ float diffusionAmount = deltaTime * Constants.CLOUD_DIFFUSION_RATE;
+ float cellMultiplier = diffusionAmount * 0.25f;
+ float neighbourMultiplier = 1.0f - diffusionAmount;
+ int planeSize = PlaneSize;
+
+ var sourceDensity = Density.AsSpan();
+ var destinationDensity = OldDensity.AsSpan();
+
+ for (int y = y0; y < y0 + height; ++y)
+ {
+ int currentRowOffset = y * planeSize;
+ int previousRowOffset = (y == 0 ? planeSize - 1 : y - 1) * planeSize;
+ int nextRowOffset = (y == planeSize - 1 ? 0 : y + 1) * planeSize;
+
+ for (int x = x0; x < x0 + width; ++x)
+ {
+ int currentIndex = currentRowOffset + x;
+ int prevX = x == 0 ? planeSize - 1 : x - 1;
+ int nextX = x == planeSize - 1 ? 0 : x + 1;
+
+ destinationDensity[currentIndex] = sourceDensity[currentIndex] * neighbourMultiplier +
+ (sourceDensity[currentRowOffset + prevX] + sourceDensity[currentRowOffset + nextX] +
+ sourceDensity[previousRowOffset + x] + sourceDensity[nextRowOffset + x]) * cellMultiplier;
+ }
+ }
+ }
+
+ private void AreaDiffuse(int horizontalStart, int horizontalEnd, int verticalStart, int verticalEnd,
+ float delta)
+ {
+ int planeSize = PlaneSize;
+ float diffusionAmount = delta * Constants.CLOUD_DIFFUSION_RATE;
+ float neighborWeight = diffusionAmount * 0.25f;
+ float centerWeight = 1.0f - diffusionAmount;
+
+ var sourceDensity = Density.AsSpan();
+ var destinationDensity = OldDensity.AsSpan();
+
+ for (int horizontalIndex = horizontalStart; horizontalIndex < horizontalEnd; ++horizontalIndex)
+ {
+ int currentRowOffset = horizontalIndex * planeSize;
+ int previousRowOffset = (horizontalIndex == 0 ? planeSize - 1 : horizontalIndex - 1) * planeSize;
+ int nextRowOffset = (horizontalIndex == planeSize - 1 ? 0 : horizontalIndex + 1) * planeSize;
+
+ int verticalIndex = verticalStart;
+
+ if (verticalIndex == 0 && verticalIndex < verticalEnd)
+ {
+ int currentIndex = currentRowOffset + 0;
+ destinationDensity[currentIndex] = sourceDensity[currentIndex] * centerWeight +
+ (sourceDensity[currentRowOffset + (planeSize - 1)] + sourceDensity[currentRowOffset + 1] +
+ sourceDensity[previousRowOffset] + sourceDensity[nextRowOffset]) * neighborWeight;
+ ++verticalIndex;
+ }
+
+ int safeLimit = Math.Min(verticalEnd, planeSize - 1);
+ for (; verticalIndex < safeLimit; ++verticalIndex)
+ {
+ int currentIndex = currentRowOffset + verticalIndex;
+ destinationDensity[currentIndex] = sourceDensity[currentIndex] * centerWeight +
+ (sourceDensity[currentIndex - 1] + sourceDensity[currentIndex + 1] +
+ sourceDensity[previousRowOffset + verticalIndex] + sourceDensity[nextRowOffset + verticalIndex])
+ * neighborWeight;
+ }
+
+ if (verticalIndex == planeSize - 1 && verticalIndex < verticalEnd)
{
- var yMinus = y == 0 ? planeSize - 1 : y - 1;
- var yPlus = y == planeSize - 1 ? 0 : y + 1;
-
- OldDensity[x, y] =
- Density[x, y] * (1 - a) +
- (
- Density[x, yMinus] +
- Density[x, yPlus] +
- Density[xMinus, y] +
- Density[xPlus, y]) * cellMultiplier;
+ int currentIndex = currentRowOffset + verticalIndex;
+ destinationDensity[currentIndex] = sourceDensity[currentIndex] * centerWeight +
+ (sourceDensity[currentIndex - 1] + sourceDensity[currentRowOffset] +
+ sourceDensity[previousRowOffset + verticalIndex] + sourceDensity[nextRowOffset + verticalIndex])
+ * neighborWeight;
}
}
}
@@ -1049,71 +1153,166 @@ private int GetEdgeShift(int coord, int playerPos)
return 0;
}
- private void PartialAdvect(int x0, int y0, int size, float delta)
+ private void PartialAdvect(int slice, int slices, float deltaTime)
{
- var resolution = CloudResolution;
- var worldPos = GetWorldPositionForAdvection(x0, y0);
+ int planeSize = PlaneSize;
+ int rowStart = slice * planeSize / slices;
+ int rowEnd = (slice + 1) * planeSize / slices;
+ int rowCount = rowEnd - rowStart;
+
+ ReadOnlySpan source = OldDensity.AsSpan(rowStart * planeSize, rowCount * planeSize);
+ Span destination = Density.AsSpan();
+ Span bufferSpan = tempBuffer.AsSpan(rowStart * planeSize * 4, rowCount * planeSize * 4);
+
+ float resolution = CloudResolution;
+ float intensityScale = 255.0f / Constants.CLOUD_MAX_INTENSITY_SHOWN;
+ int chunkWidth = planeSize / Constants.CLOUD_PLANE_SQUARES_PER_SIDE;
+
+ var vScale = Vector128.Create(intensityScale);
+ var vZero = Vector128.Zero;
+ var v255 = Vector128.Create(255.0f);
+
+ int sourceIdx = 0;
+ int bufferIdx = 0;
- for (int x = x0; x < x0 + size; ++x)
+ for (int y = 0; y < rowCount; ++y)
{
- var worldX = worldPos.X + x * resolution;
+ int absoluteY = y + rowStart;
- for (int y = y0; y < y0 + size; ++y)
+ for (int chunkX = 0; chunkX < Constants.CLOUD_PLANE_SQUARES_PER_SIDE; ++chunkX)
{
- var worldY = worldPos.Y + y * resolution;
+ Vector2 worldPositionBase = GetWorldPositionForAdvection(chunkX * planeSize /
+ Constants.CLOUD_PLANE_SQUARES_PER_SIDE, rowStart);
- var oldDensity = OldDensity[x, y];
+ int startX = chunkX * chunkWidth;
+ int endX = startX + chunkWidth;
- // This is better for performance than checking length squared of oldDensity although
- // might cause issues if for some reason density would end up with negative value
- if (oldDensity.X + oldDensity.Y + oldDensity.Z + oldDensity.W < 1)
- continue;
+ float worldY = worldPositionBase.Y + absoluteY * resolution;
- var velocity =
- fluidSystem!.VelocityAt(new Vector2(worldX, worldY));
+ int x = startX;
- if (MathF.Abs(velocity.X) + MathF.Abs(velocity.Y) <
- Constants.CURRENT_COMPOUND_CLOUD_ADVECT_THRESHOLD)
+ for (; x <= endX - 4; x += 4)
{
- velocity = Vector2.Zero;
- }
+ var p0 = source[sourceIdx].AsVector128();
+ var p1 = source[sourceIdx + 1].AsVector128();
+ var p2 = source[sourceIdx + 2].AsVector128();
+ var p3 = source[sourceIdx + 3].AsVector128();
+
+ var sums = Vector128.Create(p0[0] + p0[1] + p0[2] + p0[3],
+ p1[0] + p1[1] + p1[2] + p1[3],
+ p2[0] + p2[1] + p2[2] + p2[3],
+ p3[0] + p3[1] + p3[2] + p3[3]);
- velocity *= VISCOSITY;
+ if (Vector128.LessThanAll(sums, Vector128.Create(1.0f)))
+ {
+ Vector128.Zero.StoreUnsafe(ref bufferSpan[bufferIdx]);
+
+ sourceIdx += 4;
+ bufferIdx += 16;
+ continue;
+ }
- float dx = x + delta * velocity.X;
- float dy = y + delta * velocity.Y;
+ for (int i = 0; i < 4; ++i)
+ {
+ Vector4 currentDensity = source[sourceIdx++];
+
+ var vPixel = currentDensity.AsVector128();
+ vPixel = Vector128.Multiply(vPixel, vScale);
+ vPixel = Vector128.Min(Vector128.Max(vPixel, vZero), v255);
+ var vInt = Vector128.ConvertToInt32(vPixel);
+
+ // These check are JIT compile-time constants, so they are pruned during execution.
+ // This makes branching here de-facto non-existent.
+ Vector128 packed8;
+ if (Sse2.IsSupported)
+ {
+ var packed16 = Sse2.PackSignedSaturate(vInt, vInt);
+ packed8 = Sse2.PackUnsignedSaturate(packed16, packed16).AsByte();
+ }
+ else if (AdvSimd.IsSupported)
+ {
+ var narrow16 = AdvSimd.ExtractNarrowingSaturateLower(vInt);
+ var narrow8 = AdvSimd.ExtractNarrowingSaturateUnsignedLower(narrow16.ToVector128());
+ packed8 = narrow8.ToVector128();
+ }
+ else
+ {
+ packed8 = Vector128.Create((byte)vInt.GetElement(0), (byte)vInt.GetElement(1),
+ (byte)vInt.GetElement(2), (byte)vInt.GetElement(3),
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0).AsByte();
+ }
+
+ MemoryMarshal.Write(bufferSpan.Slice(bufferIdx, 4),
+ packed8.AsUInt32().ToScalar());
+
+ bufferIdx += 4;
+
+ if (currentDensity.X + currentDensity.Y + currentDensity.Z + currentDensity.W < 1.0f)
+ continue;
+
+ float worldX = worldPositionBase.X + (x + i) * resolution;
+ ProcessPixelAdvection(currentDensity, x + i, absoluteY, worldX, worldY, deltaTime,
+ destination, planeSize);
+ }
+ }
- CalculateMovementFactors(dx, dy,
- out int floorX, out int ceilX, out int floorY, out int ceilY,
- out float weightRight, out float weightLeft, out float weightBottom, out float weightTop);
+ for (; x < endX; ++x)
+ {
+ Vector4 currentDensity = source[sourceIdx++];
- floorX = floorX.PositiveModulo(PlaneSize);
- ceilX = ceilX.PositiveModulo(PlaneSize);
- floorY = floorY.PositiveModulo(PlaneSize);
- ceilY = ceilY.PositiveModulo(PlaneSize);
+ bufferSpan[bufferIdx] = (byte)Math.Clamp(currentDensity.X * intensityScale, 0, 255);
+ bufferSpan[bufferIdx + 1] = (byte)Math.Clamp(currentDensity.Y * intensityScale, 0, 255);
+ bufferSpan[bufferIdx + 2] = (byte)Math.Clamp(currentDensity.Z * intensityScale, 0, 255);
+ bufferSpan[bufferIdx + 3] = (byte)Math.Clamp(currentDensity.W * intensityScale, 0, 255);
+ bufferIdx += 4;
- var oldDensityDecayed = oldDensity * decayRates;
- var oldDensityDecayedLeft = oldDensityDecayed * weightLeft;
- var oldDensityDecayedRight = oldDensityDecayed * weightRight;
+ if (currentDensity.X + currentDensity.Y + currentDensity.Z + currentDensity.W < 1.0f)
+ continue;
- Density[floorX, floorY] += oldDensityDecayedLeft * weightTop;
- Density[floorX, ceilY] += oldDensityDecayedLeft * weightBottom;
- Density[ceilX, floorY] += oldDensityDecayedRight * weightTop;
- Density[ceilX, ceilY] += oldDensityDecayedRight * weightBottom;
+ float worldX = worldPositionBase.X + x * resolution;
+ ProcessPixelAdvection(currentDensity, x, absoluteY, worldX, worldY, deltaTime,
+ destination, planeSize);
+ }
}
}
}
- private void PartialUpdateTextureImage(int x0, int y0, int width, int height)
+ private void ProcessPixelAdvection(Vector4 currentDensity, int absoluteX, int absoluteY, float worldX, float worldY,
+ float deltaTime, Span destination, int planeSize)
{
- for (int x = x0; x < x0 + width; ++x)
- {
- for (int y = y0; y < y0 + height; ++y)
- {
- var pixel = Density[x, y] * (1 / Constants.CLOUD_MAX_INTENSITY_SHOWN);
- image!.SetPixel(x, y, new Color(pixel.X, pixel.Y, pixel.Z, pixel.W));
- }
- }
+ Vector2 velocity = fluidSystem!.VelocityAt(new Vector2(worldX, worldY));
+
+ if (MathF.Abs(velocity.X) + MathF.Abs(velocity.Y) < Constants.CURRENT_COMPOUND_CLOUD_ADVECT_THRESHOLD)
+ velocity = Vector2.Zero;
+
+ velocity *= VISCOSITY;
+ float targetX = absoluteX + deltaTime * velocity.X;
+ float targetY = absoluteY + deltaTime * velocity.Y;
+
+ CalculateMovementFactors(targetX, targetY, out int floorX, out int ceilX, out int floorY, out int ceilY,
+ out float weightRight, out float weightLeft, out float weightBottom, out float weightTop);
+
+ // Normally the checks here would be fX < 0 || fX >= planeSize
+ // By casting the operands to uint here, the first condition (fX < 0) is converted to fX becoming a really large
+ // number. This makes the comparison always true (as planeSize is usually much smaller than the max integer)
+ // thus saving us a few more instructions during the calculations.
+ if ((uint)floorX >= (uint)planeSize)
+ floorX = (floorX < 0) ? floorX + planeSize : floorX - planeSize;
+ if ((uint)ceilX >= (uint)planeSize)
+ ceilX = (ceilX < 0) ? ceilX + planeSize : ceilX - planeSize;
+ if ((uint)floorY >= (uint)planeSize)
+ floorY = (floorY < 0) ? floorY + planeSize : floorY - planeSize;
+ if ((uint)ceilY >= (uint)planeSize)
+ ceilY = (ceilY < 0) ? ceilY + planeSize : ceilY - planeSize;
+
+ int floorYRow = floorY * planeSize;
+ int ceilYRow = ceilY * planeSize;
+ Vector4 decayed = currentDensity * decayRates;
+
+ destination[floorX + floorYRow] += decayed * (weightLeft * weightTop);
+ destination[floorX + ceilYRow] += decayed * (weightLeft * weightBottom);
+ destination[ceilX + floorYRow] += decayed * (weightRight * weightTop);
+ destination[ceilX + ceilYRow] += decayed * (weightRight * weightBottom);
}
private void PartialClearDensity(int x0, int y0, int width, int height)
@@ -1122,17 +1321,11 @@ private void PartialClearDensity(int x0, int y0, int width, int height)
{
for (int y = y0; y < y0 + height; ++y)
{
- Density[x, y] = Vector4.Zero;
+ Density[x + y * PlaneSize] = Vector4.Zero;
}
}
}
- private void PartialDiffuseCenter(int x0, int y0, int size, float delta)
- {
- PartialDiffuse(x0 + Constants.CLOUD_PLANE_EDGE_WIDTH / 2, y0 + Constants.CLOUD_PLANE_EDGE_WIDTH / 2, size
- - Constants.CLOUD_PLANE_EDGE_WIDTH, size - Constants.CLOUD_PLANE_EDGE_WIDTH, delta);
- }
-
private float HackyAddress(ref Vector4 vector, int index)
{
switch (index)
@@ -1163,6 +1356,12 @@ private int GetCompoundIndex(Compound compound)
private void CreateDensityTexture()
{
+ int requestedSize = PlaneSize * PlaneSize * 4;
+ if (tempBuffer == null! || requestedSize > tempBuffer.Length)
+ {
+ tempBuffer = new byte[requestedSize];
+ }
+
image = Image.CreateEmpty(PlaneSize, PlaneSize, false, Image.Format.Rgba8);
texture = ImageTexture.CreateFromImage(image);
diff --git a/src/microbe_stage/CompoundCloudSystem.cs b/src/microbe_stage/CompoundCloudSystem.cs
index 80a66eaadec..c04959e5917 100644
--- a/src/microbe_stage/CompoundCloudSystem.cs
+++ b/src/microbe_stage/CompoundCloudSystem.cs
@@ -488,15 +488,6 @@ private void UpdateCloudContents(float delta)
cloud.QueueAdvectCloud(delta, tasks);
}
- executor.RunTasks(tasks);
- tasks.Clear();
-
- // Update the cloud textures in parallel
- foreach (var cloud in clouds)
- {
- cloud.QueueUpdateTextureImage(tasks);
- }
-
executor.RunTasks(tasks);
foreach (var cloud in clouds)