I have two lists of the same length, and I wish to find out what percentage of all possible pairs of list indices' values have the same relationship for the two lists, in my case greater than (or sign(list[index_1] - list[index_2])).
Here is my slow implementation:
from itertools import combinations
import random
import numpy as np
values_lists = []
for i in range(4):
value_list = []
for j in range(50):
value_list.append(random.random())
values_lists.append(value_list)
#
total = 0.
n = 0
for list_1, list_2 in combinations(values_lists, 2):
for index_1, index_2 in combinations(range(len(list_1)), 2):
if np.sign(list_1[index_2] - list_1[index_1]) == np.sign(list_2[index_2] - list_2[index_1]):
total = 1
n = 1
print(total / n)
I'm wondering if anyone has a faster solution, as this takes some time.
CodePudding user response:
Precomputing the signs for each list and not using NumPy appears to make it about 14 times faster:
11.34 ms solution1
0.82 ms solution2
11.36 ms solution1
0.82 ms solution2
11.34 ms solution1
0.81 ms solution2
Code (Try it online!):
def solution1(values_lists):
total = 0.
n = 0
for list_1, list_2 in combinations(values_lists, 2):
for index_1, index_2 in combinations(range(len(list_1)), 2):
if np.sign(list_1[index_2] - list_1[index_1]) == np.sign(list_2[index_2] - list_2[index_1]):
total = 1
n = 1
return total / n
def solution2(values_lists):
signs_lists = [
[-1 if a < b else 1 if b < a else 0
for a, b in combinations(lst, 2)]
for lst in values_lists
]
total = 0.
n = 0
for signs_1, signs_2 in combinations(signs_lists, 2):
n = len(signs_1)
for sign_1, sign_2 in zip(signs_1, signs_2):
if sign_1 == sign_2:
total = 1
return total / n
funcs = solution1, solution2
from timeit import repeat
from itertools import combinations
import random
import numpy as np
values_lists = []
for i in range(4):
value_list = []
for j in range(50):
value_list.append(random.random())
values_lists.append(value_list)
for func in funcs:
print(func(values_lists))
for _ in range(3):
print()
for func in funcs:
t = min(repeat(lambda: func(values_lists), number=10)) / 10
print('%5.2f ms ' % (t * 1e3), func.__name__)
CodePudding user response:
Expanding on @Kelly Bundy 's answer, you can make solution2
even faster by summing a generator expression rather than the nested for loop:
def generatorway():
signs_lists = (
[-1 if a < b else 1 if b < a else 0
for a, b in combinations(lst, 2)]
for lst in values_lists)
combos = list(combinations(signs_lists, 2))
n = sum(len(i) for i, _ in combos)
total = sum(1 for i, j in combos for k, m in zip(i, j) if k==m)
return total/n
Speed comparison with values_lists = [[random.random() for _ in range(500)] for _ in range(4)]
:
print(generatorway())
print(solution2())
%timeit generatorway()
%timeit solution2()
Output:
0.4931048764195057
0.4931048764195057
66.9 ms ± 58.5 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
68.6 ms ± 38.6 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
CodePudding user response:
I think the best help I can offer is with formatting and organization. You should break your functions into pieces that operate independently.
I also simplified your nested list builder to use a list comprehension.
This doesn't address your code speed concerns, but I couldn't find anything slow when I run the script in question...
from itertools import combinations
import random
import numpy as np
from loguru import logger
@logger.catch
def build_nested_lists(inner_list_size=50, outer_list_size=4):
values_lists = []
for ii in range(outer_list_size):
value_list = [random.random() for jj in range(inner_list_size)]
values_lists.append(value_list)
return values_lists
@logger.catch
def handle_nested_lists(
values_lists=None,
):
assert isinstance(values_lists, (list, tuple))
total = 0.0
n = 0
for list_1, list_2 in combinations(values_lists, 2):
for index_1, index_2 in combinations(range(len(list_1)), 2):
if np.sign(list_1[index_2] - list_1[index_1]) == np.sign(
list_2[index_2] - list_2[index_1]
):
total = 1
n = 1
return total / n
if __name__=="__main__":
print(handle_nested_lists(build_nested_lists(50, 4)))