I have an implementation of BFS that works just fine, but seems to get really CPU heavy, even at low depth (sub ms for a depth of 4 but 10 ms for a depth of 10). I'm quite confident that this algorithm should run sub ms event at a depth of 100 but i'm not quite sure what I'm missing. Here's the code :
using System.Collections.Generic;
using System.Linq;
using UnityEngine;
public class VisionGraph : MonoBehaviour
{
public Transform Ground;
public int Height;
public int Width;
private MeshFilter mf;
public Vector3[] Vertices;
public float precision;
public Vector3 SelectedVertex;
// Start is called before the first frame update
private void Start()
{
mf = Ground.GetComponent<MeshFilter>();
Matrix4x4 localToWorld = transform.localToWorldMatrix;
Vector3 world_max = localToWorld.MultiplyPoint3x4(mf.mesh.vertices[0]);
Width = Mathf.RoundToInt(world_max.z * (1 / precision));
int maxIndex = Mathf.RoundToInt((world_max.z * (1 / precision)) * (Height * (1 / precision)) * world_max.x);
Vertices = new Vector3[maxIndex];
//This is the graph initialization.
//Indices increment by 1 while actual coordinates increments by `precision`.
//Indices are then reversed to coordinates in BFS with `position/precision`.
int xInd, yInd, zInd; xInd = yInd = zInd = 0;
float x, y, z; x = y = z = 0;
int index = 0;
while (index < Vertices.Length - 1)
{
index = (yInd * (Width * Width)) (zInd * Width) xInd;
Debug.Log(index " " maxIndex);
Vertices[index] = new(x, y, z);
x = precision;
xInd ;
if (x > world_max.x)
{
x = 0;
xInd = 0;
z = precision;
zInd ;
if (z > world_max.z)
{
z = 0;
zInd = 0;
y = precision;
yInd ;
}
}
}
SelectedVertex = Vertices[600];
}
private void OnDrawGizmos()
{
// Needs to be turned into retrieve index from position.
// but i'm not sure how to clamp the continuous position to `precision` steps.
SelectedVertex = Vertices.Where(v => Vector3.Distance(v, SelectedVertex) <= precision - 0.0001 ).FirstOrDefault();
var watch = System.Diagnostics.Stopwatch.StartNew();
List<Vector3Int> closeVertices = BFS(SelectedVertex, 10); // second param is the search depth
watch.Stop();
Debug.Log(watch.ElapsedMilliseconds);
foreach (var vert in closeVertices)
{
var index = (vert.y * (Width * Width)) (vert.z * Width) vert.x;
if (index >= Vertices.Length) continue;
Gizmos.color = Color.red;
Gizmos.DrawSphere(Vertices[index], 0.1f);
}
}
private List<Vector3Int> BFS(Vector3 start, int depth)
{
Vector3Int startIndex = new((int)(start.x / precision), (int)(start.y / precision), (int)(start.z / precision));
Dictionary<Vector3Int, bool> closedList = new();
List<Vector3Int> queue = new() { startIndex };
while (queue.Count > 0)
{
Vector3Int v = queue[^1];
queue.RemoveAt(queue.Count-1);
Vector3Int[] neighbors = new[]
{
v Vector3Int.left,
v Vector3Int.right,
v Vector3Int.up,
v Vector3Int.down,
v Vector3Int.forward,
v Vector3Int.back,
};
foreach (Vector3Int n in neighbors)
{
if (n.x < 0 || n.y < 0 || n.z < 0) continue; // this will alos include the high limit of the grid but i dont "need" it at this point of the tests
//For every implementation of graph search algorithms I make, this always seem to bee the weak part.
if ((n - startIndex).sqrMagnitude > depth*depth || queue.Any(vert => vert == n) || closedList.ContainsKey(n)) continue;
queue.Insert(0, n);
}
closedList.Add(v, true);
}
return closedList.Keys.ToList();
}
}
The use of a Dictionary for the closedList
is a poor attempt at trying to reduce the List searching time, it used to be closedList.Any(vert => vert == n)
but I didn't see great change by the use of each.
I'd be really glad if somebody could pinpoint what really slow this down.
Second question : How do you even MultiThread with BFS ? Both the queue and the closed list are very dynamic, is there a solution to work this out with NativeLists
?
Thank you for your time. Let me know if anything's unclear.
CodePudding user response:
I cannot make out at a glance what your code is doing ( there is no docs and my C# is rusty ) But your BFS has an surprising amount of code ( A BFS usually only takes a few lines ). Why do you need to create a new, large data structure on EVERY pass through the while loop? That is expensive, and as far as I recall C# uses some bedeviled garbage collector thingy that is going to be fighting you all the way
You have a variable named queue, but it is not a queue but rather a list. This is confusing, and probably incorrect.
As far as I can make out, you are creating the neighbors of a node on the fly as you go along. Terrible idea! You should model the graph either with an adjacency matrix ( simplest ) or node adjacency lists ( more complicated, but memory efficient for large graphs ) Then you can move through the graph using the node indices to look up the neighbours without needing to create them over and over again.
For comparison, here is a C code for a BFS that uses a real queue and concerns itself only with node indices.
void cPathFinder::breadth(
std::function<void(int v, int p)> visitor)
{
if (!nodeCount())
throw std::runtime_error("breadth called on empty graph");
std::vector<bool> visited(nodeCount(), false);
std::queue<int> Q;
visited[myStart] = true;
Q.push(myStart);
while (Q.size())
{
int v = Q.front();
Q.pop();
for (int w : adjacent(v))
{
if (!visited[w])
{
// reached a new node
visitor(w, v);
visited[w] = true;
Q.push(w);
}
}
}
}
CodePudding user response:
Upfront, this may, or may not be suitable in your situation, but as an exercise for myself as much as anything, I thought it'd be interesting to see how fast I could get the execution of the given code.
I created three methods in my test.
- The first was your method verbatim.
- The second method was your code, but 'rejigged' but taking the
neighbours
array creation out of the loop and adding in aQueue<Vector3Int>
instead of theList<Vector3Int>
as I suspected that the insertion to element 0 of theList<T>
might be a big part of the issue in your original code also replacing yourDictionary<Vector3Int, bool>
with aHashSet<Vector3Int>
. I also replacedAny(v => v == n)
withContains(v)
.Contains
turned out to be significantly faster. - Then the third method I created, I went even further to add a second
HashSet
to enable a fast lookup of items that were still in the queue.
I then ran the tests against each method using a StopWatch
to record the times. I also ran the last method again, but that time from Task.Run
.
Here are the two 'optimised' methods:
private List<Vector3Int> BFS_Optimised ( Vector3 start, int depth )
{
Vector3Int startIndex = new((int)(start.x / precision), (int)(start.y / precision), (int)(start.z / precision));
HashSet<Vector3Int> closedList = new ();
Queue<Vector3Int> queue = new ();
queue.Enqueue(startIndex);
var dSquared = depth * depth;
Vector3Int[] neighbors =
new[] { Vector3Int.left, Vector3Int.right, Vector3Int.up, Vector3Int.down, Vector3Int.forward, Vector3Int.back };
while ( queue.Count > 0 )
{
var v = queue.Dequeue();
for ( int i = 0; i < 6; i )
{
var n = v neighbors[i];
if ( n.x < 0 || n.y < 0 || n.z < 0 )
continue; // this will alos include the high limit of the grid but i dont "need" it at this point of the tests
if ( ( n - startIndex ).sqrMagnitude > dSquared
|| closedList.Contains ( n )
|| queue.Contains ( n ) ) // queue.Any(v => v == n ) ) //
continue;
queue.Enqueue ( n );
}
closedList.Add ( v );
}
return closedList.ToList ( );
}
private List<Vector3Int> BFS_Optimised2 ( Vector3 start, int depth )
{
Vector3Int startIndex = new((int)(start.x / precision), (int)(start.y / precision), (int)(start.z / precision));
HashSet<Vector3Int> closedList = new ();
Queue<Vector3Int> queue = new ();
queue.Enqueue ( startIndex );
HashSet<Vector3Int> qHash = new ( ) { startIndex };
var dSquared = depth * depth;
Vector3Int[] neighbors =
new[] { Vector3Int.left, Vector3Int.right, Vector3Int.up, Vector3Int.down, Vector3Int.forward, Vector3Int.back };
while ( queue.Count > 0 )
{
var v = queue.Dequeue();
qHash.Remove ( v );
for ( int i = 0; i < 6; i )
{
var n = v neighbors[i];
if ( n.x < 0 || n.y < 0 || n.z < 0 )
continue; // this will alos include the high limit of the grid but i dont "need" it at this point of the tests
if ( ( n - startIndex ).sqrMagnitude > dSquared
|| closedList.Contains ( n )
|| qHash.Contains ( n ) )
continue;
queue.Enqueue ( n );
qHash.Add ( n );
}
closedList.Add ( v );
}
return closedList.ToList ( );
}
Here's the test (excuse the crudeness and lack of depth in tests):
async void Run ( )
{
var iterations = 100;
var d = 10;
var v = new Vector3 ( 10, 10, 10 );
List<Vector3Int> r1 = default;
List<Vector3Int> r2 = default;
List<Vector3Int> r3 = default;
List<Vector3Int> r4 = default;
Debug.Log ( "Waiting ... " );
await Task.Delay ( 2000 );
Debug.Log ( "Run ... " );
Stopwatch sw = new();
sw.Start ( );
for ( int i = 0; i < iterations; i )
r1 = BFS ( v, d );
sw.Stop ( );
var t1 = sw.Elapsed.TotalMilliseconds;
sw.Restart ( );
for ( int i = 0; i < iterations; i )
r2 = BFS_Optimised ( v, d );
sw.Stop ( );
var t2 = sw.Elapsed.TotalMilliseconds;
sw.Restart ( );
for ( int i = 0; i < iterations; i )
r3 = BFS_Optimised2 ( v, d );
sw.Stop ( );
var t3 = sw.Elapsed.TotalMilliseconds;
sw.Restart ( );
r4 = await Task.Run ( ( ) => BFS_Optimised2 ( v, d ) );
sw.Stop ( );
var t4 = sw.Elapsed.TotalMilliseconds;
StringBuilder sb = new();
sb.AppendLine ( $"Original : {t1} ms [{r1.Count}] ({r1 [ 0 ]}) .. ({r1 [ ^1 ]})" );
sb.AppendLine ( $"Optimised : {t2} ms [{r2.Count}] ({r2 [ 0 ]}) .. ({r2 [ ^1 ]})" );
sb.AppendLine ( $"Optimised2 : {t3} ms [{r3.Count}] ({r3 [ 0 ]}) .. ({r3 [ ^1 ]})" );
sb.AppendLine ( $"Optimised2 Task.Run : {t4} ms [{r4.Count}] ({r4 [ 0 ]}) .. ({r4 [ ^1 ]})" );
Debug.Log ( sb.ToString ( ) );
}
And here are the results:
Original : 10701.7465 ms [4169] ((10, 10, 10)) .. ((15, 5, 3))
Optimised : 1830.9519 ms [4169] ((10, 10, 10)) .. ((15, 5, 3))
Optimised2 : 209.1559 ms [4169] ((10, 10, 10)) .. ((15, 5, 3))
Optimised2 Task.Run : 17.7353 ms [4169] ((10, 10, 10)) .. ((15, 5, 3))