Trouble in implementing fixed-point numbers in C-CodePudding

I am trying to make a small fixed-point math library. My fixed point numbers are 32-bit, with 16 bits each for the integral and fractional parts. The trouble comes with adding fixed-point numbers and then seeing the resulting value. The function fixed_from_parts below takes an integral and fractional part, and emits a fixed-point number, so fixed_from_parts(5, 2) would equal 0000000000000101.0000000000000010.

When adding two numbers, as seen in the main function below, it seems that the integral parts are added as one number, and the fractional part is added as another (5.2 3.9 incorrectly becomes 8.11, because 5 3 == 8 and 2 9 == 11). I think that I need to reverse the order of the bits stored in the fractional part, but I'm not quite sure how to do that. Am I overcomplicating this? How do I make addition work correctly?

#include <stdint.h>
#include <stdio.h>

typedef int16_t integral_t;
typedef int32_t fixed_t;

fixed_t int_to_fixed(const integral_t x) {
    return x << 16;
} 

integral_t fixed_to_int(const fixed_t x) {
    return x >> 16;
}

// shifts right (clears integral bits), and then shifts back
integral_t get_fixed_fractional(const fixed_t x) {
    return (integral_t) x << 16 >> 16;
}

// fixed_from_parts(5, 2) == 5.2
fixed_t fixed_from_parts(const integral_t integral, const integral_t fractional) {
    return int_to_fixed(integral)   fractional;
}

void print_fixed_base_2(const fixed_t x) {
    for (int i = (sizeof(fixed_t) << 3) - 1; i >= 0; i--) {
        putchar((x & (1 << i)) ? '1' : '0');
        if (i == sizeof(fixed_t) << 2) putchar('.');
    }
    putchar('\n');
}

void print_fixed_base_10(const fixed_t x) {
    printf("%d.%d\n", fixed_to_int(x), get_fixed_fractional(x));
}

int main(void) {
    // 5.2   3.9 = 9.1
    const fixed_t a = fixed_from_parts(5, 2), b = fixed_from_parts(3, 9);

    print_fixed_base_2(a);
    print_fixed_base_2(b);

    const fixed_t result = a   b;

    print_fixed_base_2(result);
    print_fixed_base_10(result); // why is the result 8.11?
}

CodePudding user response：

Your one is not a fixed point.

Example:

#define MULT    (1 << 16)

#define MAKE_FIXED(d)  ((int32_t)(d * MULT))
#define MAKE_REAL(f)   (((double)(f)) / MULT)

int32_t mulf(int32_t a, int32_t b)
{
    int64_t part = (int64_t)a * b;
    return part/MULT;
}

int32_t divf(int32_t a, int32_t b)
{
    int64_t part = ((int64_t)a * MULT) / b;
    return part;
}


int main(void)
{
    int32_t num1 = MAKE_FIXED(5.2);
    int32_t num2 = MAKE_FIXED(3.9);


    printf("%f\n", MAKE_REAL(num1   num2));
    int32_t result = mulf(num1, num2);
    printf("%f\n", MAKE_REAL(result));
    result = divf(num1,num2);
    printf("%f\n", MAKE_REAL(result));
}

CodePudding user response：

There are multiple problems in your code:

the function get_fixed_fractional has undefined behavior: to get rid of the integral part, you shift it out with << 16 which may cause arithmetic overflow. Furthermore, the type integral_t is signed whereas the fractional part should be unsigned. You should just mask the high bits and return a fixed_t:
```
// clear the integral bits
fixed_t get_fixed_fractional(fixed_t x) { return x & 0xFFFF; }
```
you print the fractional part with %d, but it produces misleading output: fixed_from_parts(5, 2) is printed as 5.2 but the value is 5.000030517578125, which you could round as 5.00003. The code to print a fixed_t should be:
```
void print_fixed_base_10(const fixed_t x) {
    printf("%d.lld\n",
           fixed_to_int(x),
           (get_fixed_fractional(x) * 100000LL   32768) / 65536);
}
```

Here is a modified version:

#include <stdint.h>
#include <stdio.h>

typedef int16_t integral_t;
typedef int32_t fixed_t;

fixed_t int_to_fixed(integral_t x) {
    return x << 16;
}

integral_t fixed_to_int(fixed_t x) {
    return x >> 16;
}

// clear the integral bits
integral_t get_fixed_fractional(fixed_t x) {
    return (integral_t)(x & 0xFFFF);
}

// fixed_from_parts(5, 2) == 5.2
fixed_t fixed_from_parts(integral_t integral, integral_t fractional) {
    return int_to_fixed(integral)   fractional;
}

void print_fixed_base_2(fixed_t x) {
    for (int i = 32; i-- > 0;) {
        putchar((x & ((uint32_t)1 << i)) ? '1' : '0');
        if (i == 16)
            putchar('.');
    }
    putchar('\n');
}

void print_fixed_base_10(fixed_t x) {
    printf("%d.lld\n",
           fixed_to_int(x),
           (get_fixed_fractional(x) * 100000LL   32768) / 65536);
}

int main(void) {
    // 5.2   3.9 = 9.1 (not really)
    const fixed_t a = fixed_from_parts(5, 2), b = fixed_from_parts(3, 9);
    const fixed_t result = a   b;

    print_fixed_base_2(a);
    print_fixed_base_2(b);
    print_fixed_base_2(result);

    print_fixed_base_10(a);
    print_fixed_base_10(b);
    print_fixed_base_10(result);
    return 0;
}

Output:

0000000000000101.0000000000000010
0000000000000011.0000000000001001
0000000000001000.0000000000001011
5.00003
3.00014
8.00017

You might want to pass a third argument to fixed_from_parts to specify the denominator:

// fixed_from_parts(5, 2, 10) == 5.2
fixed_t fixed_from_parts(integral_t integral, unsigned int fractional, unsigned int denominator) {
    return int_to_fixed(integral)   (fixed_t)((fractional * 65536LL   denominator / 2) / denominator);
}