I got a challenge today to calculate a Euclidean Distance on the CodeSignal website.
The last solution I've tried got success on all the tests but failed in performance.
public static double solution(int[][] p)
{
double bestDistance = double.MaxValue;
int plength = p.Length;
for (int i = 0; i < plength - 1; i )
{
double x = p[i][0];
double y = p[i][1];
for (int j = i 1; j < plength; j )
{
double a = p[j][0];
double b = p[j][1];
var distance = Math.Sqrt(Math.Pow(x - a, 2) Math.Pow(y - b, 2));
if (distance < bestDistance)
bestDistance = distance;
}
}
return bestDistance;
}
The first array gets every point on a cartesian map. The second is always 0 and 1 positions, for axis x and y. So, I have to iterate over all the existing points and check which distance is smaller between any 2 points. All the tests passed but failed for performance and I really don't know why.
CodePudding user response:
Well I am going to trust that we're not cheating together in a test :)
So in this case it's about keeping the CPU off duty as much as possible, so we don't want to compute anything twice, unless it's fair to assume that the lookup is more expensive than the calculation.
Then below, besides setting up test data and instrumentation you'll get consistently over 55% if you cache the result of the Math.Pow operations, think Do play around with it more I am sure You can reduce more (?)
[Fact]
public void EuclidifyTest() {
Exception exception = null;
try{
int sampleCount = 8000;
Debug.WriteLine($"Sample count: {sampleCount}, generating unique data");
var rnd = new Random();
var samplesArray = new int[sampleCount, 2];
bool alreadyInSet(int x, int y){
for (int i = 0; i < sampleCount; i )
{
if(samplesArray[i, 0] == x && samplesArray[i,1] == y)
return true;
}
return false;
}
for (int i = 0; i < sampleCount; i )
{
var nextX = rnd.Next(1, 1000);
var nextY = rnd.Next(1, 1000);
while(alreadyInSet(nextX, nextY)){
nextX = rnd.Next(1, 1000);
nextY = rnd.Next(1, 1000);
}
samplesArray[i, 0] = nextX;
samplesArray[i, 1] = nextY;
}
Debug.WriteLine($"Samples ready");
var samplesArrayJaggar = new int[sampleCount][];
for (int i = 0; i < sampleCount; i ){
samplesArrayJaggar[i] = new int[]{ samplesArray[i,0], samplesArray[i,1] };
}
var sw = Stopwatch.StartNew();
var dblFirst = solution(samplesArrayJaggar);
sw.Stop();
var timeFirst = sw.ElapsedMilliseconds;
Debug.WriteLine($"First way result in {sw.ElapsedMilliseconds} ms, produced shortest distance {dblFirst:0.0000000} ");
sw.Restart();
var dblSecond = speedUp(samplesArray);
sw.Stop();
var timeSecond = sw.ElapsedMilliseconds;
Debug.WriteLine($"Second way result in {sw.ElapsedMilliseconds} ms, produced shortest distance {dblSecond:0.0000000} ");
var diff = timeFirst- timeSecond;
var pctImprove = (double)diff/timeFirst * 100;
Debug.WriteLine($"Improvement {pctImprove:0.0}%");
}catch(Exception ex){
exception = ex;
Debug.WriteLine(ex.ToString());
}
Assert.Null(exception);
}
static double speedUp(int[,] samplesArray) {
int samplesLength = samplesArray.GetLength(0);
var powersDictionary = new Dictionary<int, double>(samplesLength);
double getOr(int dist){
if(powersDictionary.TryGetValue(dist, out double found)){
return found;
}
var exp = Math.Pow(dist, 2);
powersDictionary.Add(dist, exp);
return exp;
}
int distX;
int distY;
double lowestExponent = 0;
double expSum;
for (int i = 0; i < samplesLength; i )
{
for(int j = i 1;j < samplesLength; j ){
distX = samplesArray[i, 0] - samplesArray[j, 0];
distY = samplesArray[i, 1] - samplesArray[j, 1];
expSum = getOr(distX) getOr(distY);
if(i == 0)
lowestExponent = expSum;
else
lowestExponent = lowestExponent > expSum ? expSum : lowestExponent;
}
}
return Math.Sqrt(lowestExponent);
}
//Your solution copy pasted in
public static double solution(int[][] p)
{
double bestDistance = double.MaxValue;
int plength = p.Length;
for (int i = 0; i < plength - 1; i )
{
double x = p[i][0];
double y = p[i][1];
for (int j = i 1; j < plength; j )
{
double a = p[j][0];
double b = p[j][1];
var distance = Math.Sqrt(Math.Pow(x - a, 2) Math.Pow(y - b, 2));
if (distance < bestDistance)
bestDistance = distance;
}
}
return bestDistance;
}
Update: well anyway got curious to squeeze a bit more, so next up to not violate the relative simplicy too much could be to go parallel. Here i'm seeing a relative improvement of consistently above 82% obviously these gains will increase with the size of the set
static double speedUpMore(int[,] samplesArray) {
int samplesLength = samplesArray.GetLength(0);
var powersDictionary = new Dictionary<int, double>(samplesLength);
double getOr(int dist){
if(powersDictionary.TryGetValue(dist, out double found)){
return found;
}
var exp = Math.Pow(dist, 2);
powersDictionary.TryAdd(dist, exp);
return exp;
}
double handleBatch(object batchBorders){
var fromTo = (int[])batchBorders;
var sampleCount = fromTo[2];
double? lowestLocal = null;
for(int i = fromTo[0];i <= fromTo[1]; i ){
for(int j = i 1; j< sampleCount; j ){
var distX = samplesArray[i, 0] - samplesArray[j, 0];
var distY = samplesArray[i, 1] - samplesArray[j, 1];
var expSum = getOr(distX) getOr(distY);
if(j == fromTo[0] 1)
lowestLocal = expSum;
else
lowestLocal = lowestLocal > expSum ? expSum : lowestLocal;
}
}
return lowestLocal.Value;
}
double batchNumbers = 10.0;
var batchSize = (int)Math.Ceiling( samplesLength / batchNumbers);
var tasksList = new List<Task<double>>(10);
int idxFrom;
int idxTo;
for(int i = 0; i < batchNumbers; i ){
idxFrom = i (i * batchSize);
if(i < (batchNumbers-1.0))
idxTo = i (i * batchSize) batchSize;
else
idxTo = samplesLength -1;
tasksList.Add(Task<double>.Factory.StartNew(
(batchBorders) => handleBatch(batchBorders),
new int[]{idxFrom, idxTo, samplesLength})
);
}
Task.WaitAll(tasksList.ToArray());
double lowestExponent = 0;
foreach(var doneTask in tasksList){
if(!doneTask.IsCompletedSuccessfully) throw new Exception("Unexpected consequences");
if (lowestExponent == 0){
lowestExponent = doneTask.Result;
} else if(doneTask.Result < lowestExponent){
lowestExponent = doneTask.Result;
}
}
return lowestExponent;
}