I have lists a
,b
,c
,... of equal length. I'd like to sort all of them the order obtained by sorting a
, i.e., I could do the decorate-sort-undecorate pattern
a, b, c = map(list, zip(*sorted(zip(a, b, c))))
or something like that. However, I'd like that the lists are sorted in place (I assume that sorted
pulls everything from the temporary iterator passed to it to a temporary list, and then zip
stuff into three output lists, so every datum in the input is copied twice unnecessarily) without creating temporary objects. So what I don't mean is:
a_sorted, b_sorted, c_sorted = map(list, zip(*sorted(zip(a, b, c))))
a[:] = a_sorted
b[:] = b_sorted
c[:] = c_sorted
How can I achieve that?
CodePudding user response:
I think "without creating temporary objects" is impossible, especially since "everything is an object" in Python.
You could get O(1) space / number of objects if you implement some sorting algorithm yourself, though if you want O(n log n) time and stability, it's difficult. If you don't care about stability (seems likely, since you say you want to sort by a
but then actually sort by a
, b
and c
), heapsort is reasonably easy:
def sort_together_heapsort(a, b, c):
n = len(a)
def swap(i, j):
a[i], a[j] = a[j], a[i]
b[i], b[j] = b[j], b[i]
c[i], c[j] = c[j], c[i]
def siftdown(i):
while (kid := 2*i 1) < n:
imax = kid if a[kid] > a[i] else i
kid = 1
if kid < n and a[kid] > a[imax]:
imax = kid
if imax == i:
return
swap(i, imax)
i = imax
for i in range(n // 2)[::-1]:
siftdown(i)
while n := n - 1:
swap(0, n)
siftdown(0)
Anyway, if someone's interested in just saving some amount of memory, that can be done by decorating in-place (building tuples and storing them in a
):
def sort_together_decorate_in_a(a, b, c):
for i, a[i] in enumerate(zip(a, b, c)):
pass
a.sort()
for i, [a[i], b[i], c[i]] in enumerate(a):
pass
Or if you trust that list.sort
will ask for keys for the elements in order (at least in CPython it does, already did so when the key
parameter was introduced 18 years ago, and I suspect will keep doing so):
def sort_together_iter_key(a, b, c):
it = iter(a)
b.sort(key=lambda _: next(it))
it = iter(a)
c.sort(key=lambda _: next(it))
a.sort()
Testing memory and time with three lists of 100,000 elements:
15,072,568 bytes 160 ms sort_together_zip_sort
15,072,320 bytes 140 ms sort_together_zip_sort_2
6,670,748 bytes 126 ms sort_together_decorate_in_a
1,597,408 bytes 84 ms sort_together_iter_key
744 bytes 1464 ms sort_together_heapsort
168 bytes 1303 ms sort_together_heapsort_opti
The second solution is a shortened/improved version of yours, no need for temporary variables and conversions to lists.
The whole code (Try it online!):
import tracemalloc as tm
from random import random
from timeit import timeit
def sort_together_zip_sort(a, b, c):
a_sorted, b_sorted, c_sorted = map(list, zip(*sorted(zip(a, b, c))))
a[:] = a_sorted
b[:] = b_sorted
c[:] = c_sorted
def sort_together_zip_sort_2(a, b, c):
a[:], b[:], c[:] = zip(*sorted(zip(a, b, c)))
def sort_together_decorate_in_a(a, b, c):
for i, a[i] in enumerate(zip(a, b, c)):
pass
a.sort()
for i, [a[i], b[i], c[i]] in enumerate(a):
pass
def sort_together_iter_key(a, b, c):
it = iter(a)
b.sort(key=lambda _: next(it))
it = iter(a)
c.sort(key=lambda _: next(it))
a.sort()
def sort_together_heapsort(a, b, c):
n = len(a)
def swap(i, j):
a[i], a[j] = a[j], a[i]
b[i], b[j] = b[j], b[i]
c[i], c[j] = c[j], c[i]
def siftdown(i):
while (kid := 2*i 1) < n:
imax = kid if a[kid] > a[i] else i
kid = 1
if kid < n and a[kid] > a[imax]:
imax = kid
if imax == i:
return
swap(i, imax)
i = imax
for i in range(n // 2)[::-1]:
siftdown(i)
while n := n - 1:
swap(0, n)
siftdown(0)
def sort_together_heapsort_opti(a, b, c):
# Avoid inner functions and range-loop to minimize memory.
# Makes it faster, too. But duplicates code. Not recommended.
n = len(a)
i0 = n // 2 - 1
while i0 >= 0:
i = i0
while (kid := 2*i 1) < n:
imax = kid if a[kid] > a[i] else i
kid = 1
if kid < n and a[kid] > a[imax]:
imax = kid
if imax == i:
break
a[i], a[imax] = a[imax], a[i]
b[i], b[imax] = b[imax], b[i]
c[i], c[imax] = c[imax], c[i]
i = imax
i0 -= 1
while n := n - 1:
a[0], a[n] = a[n], a[0]
b[0], b[n] = b[n], b[0]
c[0], c[n] = c[n], c[0]
i = 0
while (kid := 2*i 1) < n:
imax = kid if a[kid] > a[i] else i
kid = 1
if kid < n and a[kid] > a[imax]:
imax = kid
if imax == i:
break
a[i], a[imax] = a[imax], a[i]
b[i], b[imax] = b[imax], b[i]
c[i], c[imax] = c[imax], c[i]
i = imax
funcs = [
sort_together_zip_sort,
sort_together_zip_sort_2,
sort_together_decorate_in_a,
sort_together_iter_key,
sort_together_heapsort,
sort_together_heapsort_opti,
]
n = 100000
a0 = [random() for _ in range(n)]
b0 = [x 1 for x in a0]
c0 = [x 2 for x in a0]
for _ in range(3):
for func in funcs:
a, b, c = a0[:], b0[:], c0[:]
time = timeit(lambda: func(a, b, c), number=1)
assert a == sorted(a0)
assert b == sorted(b0)
assert c == sorted(c0)
a, b, c = a0[:], b0[:], c0[:]
tm.start()
func(a, b, c)
memory = tm.get_traced_memory()[1]
tm.stop()
print(f'{memory:10,} bytes {int(time * 1e3):4} ms {func.__name__}')
print()