I have to compare 2M lists which contains 3136 elements each
The way I do it now is:
function compare_matrices(connectivity_list, configuration_list)
i = 1
while i <= size(connectivity_list, 1)
x = connectivity_list[i]
j = i 1
while j <= size(connectivity_list, 1)
if all(isapprox(x[k], connectivity_list[j][k] ; atol = 0.0001) for k=1:1540)
deleteat!(connectivity_list, j)
deleteat!(configuration_list, j)
else
j = 1
end
end
i = 1
end
return configuration_list
end
For information, these lists are originally matrices that I vectorised and sorted to be able to compare element per elements. As you can guess, this will take quite a long time.
...
push!(connectivity_list, sort(vec(connectivity)))
...
Where connectivity is a 2D (lower triangular) matrix
Is there a faster way to compare lists in such way using Julia? I thought about using Sets and compare them but some elements are similar, so it is not possible.
CodePudding user response:
This is about 70X faster on some contrived data, so expect a different factor on your real data. The main changes are as follows:
- Use a
flag
array to keep track of distinct vs. approximately equal vectors to avoid deleting vectors inside the loop. - Skip vectors whose
flag
becomesfalse
- Use
abs(x[k]-y[k]) <= 0.0001
instead ofisapprox
- Return the modified array as
configuration_list[flag]
function compare_matrices2(connectivity_list, configuration_list)
l = length(connectivity_list)
m = length(connectivity_list[1])
flag = trues(l)
for i = 1:l
flag[i] || continue
x = connectivity_list[i]
for j = i 1:l
flag[j] || continue
y = connectivity_list[j]
if all(abs(x[k]-y[k]) <= 0.0001 for k=1:m)
flag[j] = false
end
end
end
return configuration_list[flag]
end