I am trying to make a small fixed-point math library. My fixed point numbers are 32-bit, with 16 bits each for the integral and fractional parts. The trouble comes with adding fixed-point numbers and then seeing the resulting value. The function fixed_from_parts
below takes an integral and fractional part, and emits a fixed-point number, so fixed_from_parts(5, 2)
would equal 0000000000000101.0000000000000010
.
When adding two numbers, as seen in the main
function below, it seems that the integral parts are added as one number, and the fractional part is added as another (5.2 3.9 incorrectly becomes 8.11, because 5 3 == 8 and 2 9 == 11). I think that I need to reverse the order of the bits stored in the fractional part, but I'm not quite sure how to do that. Am I overcomplicating this? How do I make addition work correctly?
#include <stdint.h>
#include <stdio.h>
typedef int16_t integral_t;
typedef int32_t fixed_t;
fixed_t int_to_fixed(const integral_t x) {
return x << 16;
}
integral_t fixed_to_int(const fixed_t x) {
return x >> 16;
}
// shifts right (clears integral bits), and then shifts back
integral_t get_fixed_fractional(const fixed_t x) {
return (integral_t) x << 16 >> 16;
}
// fixed_from_parts(5, 2) == 5.2
fixed_t fixed_from_parts(const integral_t integral, const integral_t fractional) {
return int_to_fixed(integral) fractional;
}
void print_fixed_base_2(const fixed_t x) {
for (int i = (sizeof(fixed_t) << 3) - 1; i >= 0; i--) {
putchar((x & (1 << i)) ? '1' : '0');
if (i == sizeof(fixed_t) << 2) putchar('.');
}
putchar('\n');
}
void print_fixed_base_10(const fixed_t x) {
printf("%d.%d\n", fixed_to_int(x), get_fixed_fractional(x));
}
int main(void) {
// 5.2 3.9 = 9.1
const fixed_t a = fixed_from_parts(5, 2), b = fixed_from_parts(3, 9);
print_fixed_base_2(a);
print_fixed_base_2(b);
const fixed_t result = a b;
print_fixed_base_2(result);
print_fixed_base_10(result); // why is the result 8.11?
}
CodePudding user response:
Your one is not a fixed point.
Example:
#define MULT (1 << 16)
#define MAKE_FIXED(d) ((int32_t)(d * MULT))
#define MAKE_REAL(f) (((double)(f)) / MULT)
int32_t mulf(int32_t a, int32_t b)
{
int64_t part = (int64_t)a * b;
return part/MULT;
}
int32_t divf(int32_t a, int32_t b)
{
int64_t part = ((int64_t)a * MULT) / b;
return part;
}
int main(void)
{
int32_t num1 = MAKE_FIXED(5.2);
int32_t num2 = MAKE_FIXED(3.9);
printf("%f\n", MAKE_REAL(num1 num2));
int32_t result = mulf(num1, num2);
printf("%f\n", MAKE_REAL(result));
result = divf(num1,num2);
printf("%f\n", MAKE_REAL(result));
}
CodePudding user response:
There are multiple problems in your code:
the function
get_fixed_fractional
has undefined behavior: to get rid of the integral part, you shift it out with<< 16
which may cause arithmetic overflow. Furthermore, the typeintegral_t
is signed whereas the fractional part should be unsigned. You should just mask the high bits and return afixed_t
:// clear the integral bits fixed_t get_fixed_fractional(fixed_t x) { return x & 0xFFFF; }
you print the fractional part with
%d
, but it produces misleading output:fixed_from_parts(5, 2)
is printed as5.2
but the value is5.000030517578125
, which you could round as5.00003
. The code to print afixed_t
should be:void print_fixed_base_10(const fixed_t x) { printf("%d.lld\n", fixed_to_int(x), (get_fixed_fractional(x) * 100000LL 32768) / 65536); }
Here is a modified version:
#include <stdint.h>
#include <stdio.h>
typedef int16_t integral_t;
typedef int32_t fixed_t;
fixed_t int_to_fixed(integral_t x) {
return x << 16;
}
integral_t fixed_to_int(fixed_t x) {
return x >> 16;
}
// clear the integral bits
integral_t get_fixed_fractional(fixed_t x) {
return (integral_t)(x & 0xFFFF);
}
// fixed_from_parts(5, 2) == 5.2
fixed_t fixed_from_parts(integral_t integral, integral_t fractional) {
return int_to_fixed(integral) fractional;
}
void print_fixed_base_2(fixed_t x) {
for (int i = 32; i-- > 0;) {
putchar((x & ((uint32_t)1 << i)) ? '1' : '0');
if (i == 16)
putchar('.');
}
putchar('\n');
}
void print_fixed_base_10(fixed_t x) {
printf("%d.lld\n",
fixed_to_int(x),
(get_fixed_fractional(x) * 100000LL 32768) / 65536);
}
int main(void) {
// 5.2 3.9 = 9.1 (not really)
const fixed_t a = fixed_from_parts(5, 2), b = fixed_from_parts(3, 9);
const fixed_t result = a b;
print_fixed_base_2(a);
print_fixed_base_2(b);
print_fixed_base_2(result);
print_fixed_base_10(a);
print_fixed_base_10(b);
print_fixed_base_10(result);
return 0;
}
Output:
0000000000000101.0000000000000010
0000000000000011.0000000000001001
0000000000001000.0000000000001011
5.00003
3.00014
8.00017
You might want to pass a third argument to fixed_from_parts
to specify the denominator:
// fixed_from_parts(5, 2, 10) == 5.2
fixed_t fixed_from_parts(integral_t integral, unsigned int fractional, unsigned int denominator) {
return int_to_fixed(integral) (fixed_t)((fractional * 65536LL denominator / 2) / denominator);
}