I have some Numba accelerated code in NoPython mode using numbaa.njit()
.
At some point I need to parse a str
(or bytes
) to get a float
.
In pure Python, the way I would go about it is with float()
, but that does not work:
import numba as nb
@nb.njit
def str2float(text):
return float(text)
str2float("1.2")
TypingError: Failed in nopython mode pipeline (step: nopython frontend) No implementation of function Function(<class 'float'>) found for signature:
>>> float(unicode_type)
while I would like it to produce a float
with value 1.2
.
The following questions are somewhat related:
- this question discusses parsing to
int
(str
/bytes
-to-int
) - this question discusses the opposite, i.e. the conversion of a
float
-to-str
conversion
CodePudding user response:
While this is not yet supported (as of July 2022), you can implement something manually.
Below are two versions, one for str
and one for bytes
.
In the process of solving the task I use a str
/bytes
-to-int
which is used to parse str
that include the exponential notation e.g. 1.0e-02
and -- potentially -- a trim()
function to pre-process inputs surrounded by whitespaces ("C" whitespaces: " "
, "\n"
, "\r"
, "\t"
, "\v"
).
Both are presented here and only used below.
From str
import math
import numba as nb
@nb.njit
def str2float_helper(text):
sep = ord(".")
c_min = ord("0")
c_max = ord("9")
n = len(text)
valid = n > 0
# determine sign
start = n - 1
stop = -1
sign = 1
if valid:
first = ord(text[0])
if first == ord(" "):
stop = 0
elif first == ord("-"):
sign = -1
stop = 0
# parse rest
sep_pos = 0
number = 0
j = 0
for i in range(start, stop, -1):
c = ord(text[i])
if c_min <= c <= c_max:
number = (c - c_min) * 10 ** j
j = 1
elif c == sep and sep_pos == 0:
sep_pos = j
else:
valid = False
break
return sign * number, sep_pos, valid
@nb.njit
def str2float(text):
if text == "nan" or text == "NAN" or text == "NaN":
return math.nan
exp_chars = b"eE"
exp_pos = -1
for exp_char in exp_chars:
for i, c in enumerate(text[::-1]):
c = ord(c)
if c == exp_char:
exp_pos = i
break
if exp_pos > -1:
break
if exp_pos > 0:
exp_number = str2int(text[-exp_pos:])
if exp_number is None:
exp_number = 0
number, sep_pos, valid = str2float_helper(text[:-exp_pos-1])
result = number / 10.0 ** (sep_pos - exp_number) if valid else None
else:
number, sep_pos, valid = str2float_helper(text)
result = number / 10.0 ** sep_pos if valid else None
return result
This should work similarly to float_()
(defined below) which is a helper function that returns None
instead of raising in case of parsing failure:
def float_(x):
try:
return float(x)
except ValueError:
return None
def is_close(x, y):
if x and not y or not x and y:
return False
else:
return x == y or math.isclose(x, y) or math.isnan(x) and math.isnan(y)
numbers = (
"", "NaN", "10", "32.1", "4123.43214e 05", "4123.43214E 05", "4123.43214e-05",
"-31", "-12.3", "-4123.43214e 05", "-4123.43214E 05", "-4123.43214e-05",
" 1321.432 \t ", "1 2", "1-2", "1e", "e1",
)
k = 24
for number in numbers:
print(f"{number!r:{k}} {float_(number)!s:{k}} {str2float(number)!s:{k}} {is_close(float_(number), str2float(number))}")
# '' None None True
# 'NaN' nan nan True
# '10' 10.0 10.0 True
# '32.1' 32.1 32.1 True
# '4123.43214e 05' 412343214.0 412343214.0 True
# '4123.43214E 05' 412343214.0 412343214.0 True
# '4123.43214e-05' 0.0412343214 0.0412343214 True
# '-31' -31.0 -31.0 True
# '-12.3' -12.3 -12.3 True
# '-4123.43214e 05' -412343214.0 -412343214.0 True
# '-4123.43214E 05' -412343214.0 -412343214.0 True
# '-4123.43214e-05' -0.0412343214 -0.0412343214 True
# ' 1321.432 \t ' 1321.432 None False
# '1 2' None None True
# '1-2' None None True
# '1e' None None True
# 'e1' None None True
# '1.1e-200' 1.1e-200 1.0999999999999995e-200 True
# '1.1e 200' 1.1e 200 1.1000000000000005e 200 True
(except for the trimming whitespaces part which can be added if needed).
Timewise, this is some 12x slower than pure Python:
%timeit -n 32 -r 32 [str2float(number) for number in numbers]
# 32 loops, best of 32: 80.3 µs per loop
%timeit -n 32 -r 32 [float_(number) for number in numbers]
# 32 loops, best of 32: 6.55 µs per loop
and hence only useful if this is needed as part of a more complex njit()
-ed code.
From bytes
This is essentially a rewrite of the above to work with bytes
(which typically only require skipping some ord()
, because iterating bytes
provides the integer representation directly) or defining a ==
operator because that is not available for bytes
.
@nb.njit
def bytes2float_helper(text):
sep = ord(".")
c_min = ord("0")
c_max = ord("9")
n = len(text)
valid = n > 0
# determine sign
start = n - 1
stop = -1
sign = 1
if valid:
first = text[0]
if first == ord(" "):
stop = 0
elif first == ord("-"):
sign = -1
stop = 0
# parse rest
sep_pos = 0
number = 0
j = 0
for i in range(start, stop, -1):
c = text[i]
if c_min <= c <= c_max:
number = (c - c_min) * 10 ** j
j = 1
elif c == sep and sep_pos == 0:
sep_pos = j
else:
valid = False
break
return sign * number, sep_pos, valid
@nb.njit
def eqb(text_a, text_b):
len_a = len(text_a)
len_b = len(text_b)
if len_a == len_b:
for i in range(len_a):
if text_a[i] != text_b[i]:
return False
return True
else:
return False
@nb.njit
def bytes2float(text):
if eqb(text, b"nan") or eqb(text, b"NAN") or eqb(text, b"NaN"):
return math.nan
exp_chars = b"eE"
exp_pos = -1
for exp_char in exp_chars:
for i, c in enumerate(text[::-1]):
if c == exp_char:
exp_pos = i
break
if exp_pos > -1:
break
if exp_pos > 0:
exp_number = bytes2int(text[-exp_pos:])
if exp_number is None:
exp_number = 0
number, sep_pos, valid = bytes2float_helper(text[:-exp_pos-1])
result = number / 10.0 ** (sep_pos - exp_number) if valid else None
else:
number, sep_pos, valid = bytes2float_helper(text)
result = number / 10.0 ** sep_pos if valid else None
return result
The interesting bit it of this is that this has comparable speed (albeit marginally slower by some 15%) as the pure Python counterpart:
numbers = (
b"", b"NaN", b"10", b"32.1", b"4123.43214e 05", b"4123.43214E 05", b"4123.43214e-05",
b"-31", b"-12.3", b"-4123.43214e 05", b"-4123.43214E 05", b"-4123.43214e-05",
b" 1321.432 ", b"1 2", b"1-2", b"1e", b"e1", b"1.1e-200", b"1.1e 200",
)
k = 24
for number in numbers:
print(f"{number!s:{k}} {float_(number)!s:{k}} {bytes2float(number)!s:{k}} {is_close(float_(number), bytes2float(number))}")
# b'' None None True
# b'NaN' nan nan True
# b'10' 10.0 10.0 True
# b'32.1' 32.1 32.1 True
# b'4123.43214e 05' 412343214.0 412343214.0 True
# b'4123.43214E 05' 412343214.0 412343214.0 True
# b'4123.43214e-05' 0.0412343214 0.0412343214 True
# b'-31' -31.0 -31.0 True
# b'-12.3' -12.3 -12.3 True
# b'-4123.43214e 05' -412343214.0 -412343214.0 True
# b'-4123.43214E 05' -412343214.0 -412343214.0 True
# b'-4123.43214e-05' -0.0412343214 -0.0412343214 True
# b' 1321.432 ' 1321.432 None False
# b'1 2' None None True
# b'1-2' None None True
# b'1e' None None True
# b'e1' None None True
# b'1.1e-200' 1.1e-200 1.0999999999999995e-200 True
# b'1.1e 200' 1.1e 200 1.1000000000000005e 200 True
%timeit -n 32 -r 32 [bytes2float(number) for number in numbers]
# 32 loops, best of 32: 8.84 µs per loop
%timeit -n 32 -r 32 [float_(number) for number in numbers]
# 32 loops, best of 32: 7.66 µs per loop