So I'm trying to make a function in Python to return all of the powers of two used in the binary of a number.
For example: 123 in binary is 1111011. I want my function to return the powers of two corresponding to the True bits of 123 (1, 2, 8, 16, 32 and 64) as fast as possible.
Right now the best I've found is:
def true_bits(num):
while num:
temp = num & -num
num -= temp
yield temp
CodePudding user response:
Some (non-faster) alternatives:
Using numpy
and assuming 8bits unsigned integers:
import numpy as np
def numpy_bits(num):
bits = np.unpackbits(np.uint8(num), bitorder='little')
return 2**np.arange(8)[bits.astype(bool)]
numpy_bits(123)
# array([ 1, 2, 8, 16, 32, 64])
# 6.8 µs ± 293 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
Using a python loop (decreasing bit order):
def python_bits(num):
for i in range(7,-1,-1):
if num >= (x:=2**i):
yield x
num -= x
list(python_bits(123))
# [64, 32, 16, 8, 2, 1]
# 2.26 µs ± 61.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
OP's approach:
list(true_bits(123))
# [1, 2, 8, 16, 32, 64]
# 1.14 µs ± 37.1 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
CodePudding user response:
Benchmark with a bunch of random 64-bit numbers with 24 true bits (based on what you said in the comments):
47.7 ms true_bits_original
16.0 ms true_bits_Kelly
45.6 ms true_bits_original
15.7 ms true_bits_Kelly
47.4 ms true_bits_original
16.3 ms true_bits_Kelly
I'm using eight lookup-tables, each responsible for eight of the bits. Full code with benchmark (Try it online!):
intern = {2**i: 2**i for i in range(64)}.get
table0 = [()]
for i in range(8):
table0 = [(*bits, intern(2**i)) for bits in table0]
table0 = tuple(table0)
table1 = tuple(tuple(intern(bit << 8) for bit in bits) for bits in table0)
table2 = tuple(tuple(intern(bit << 8) for bit in bits) for bits in table1)
table3 = tuple(tuple(intern(bit << 8) for bit in bits) for bits in table2)
table4 = tuple(tuple(intern(bit << 8) for bit in bits) for bits in table3)
table5 = tuple(tuple(intern(bit << 8) for bit in bits) for bits in table4)
table6 = tuple(tuple(intern(bit << 8) for bit in bits) for bits in table5)
table7 = tuple(tuple(intern(bit << 8) for bit in bits) for bits in table6)
def true_bits_Kelly(num):
return chain(table0[num & 0xff],
table1[(num >> 8) & 0xff],
table2[(num >> 16) & 0xff],
table3[(num >> 24) & 0xff],
table4[(num >> 32) & 0xff],
table5[(num >> 40) & 0xff],
table6[(num >> 48) & 0xff],
table7[num >> 56])
def true_bits_original(num):
while num:
temp = num & -num
num -= temp
yield temp
funcs = true_bits_original, true_bits_Kelly
import timeit
from itertools import repeat, chain
from random import getrandbits, sample
from collections import deque
# Correctness
for _ in range(1000):
num = getrandbits(64)
expect = list(funcs[0](num))
for func in funcs:
assert list(func(num)) == expect
# Speed
for _ in range(3):
nums = [sum(2**i for i in sample(range(64), 24))
for _ in range(10000)]
for func in funcs:
def run():
gens = map(func, nums)
consumes = map(deque, gens, repeat(0))
deque(consumes, 0)
t = min(timeit.repeat(run, number=1))
print('%4.1f ms ' % (t * 1e3), func.__name__)
print()