Home > Blockchain >  Binary search function that returns key's successor or predecessor if not found
Binary search function that returns key's successor or predecessor if not found

Time:12-26

I have this binary search function written in C that returns the first occurrance of a given key. But I wanted to add the functionality described in the title, using the not_found parameter.

void* binary_search(const void *key, const void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *), int not_found) {
    const void *p;
    int lo = 0;
    int hi = nmemb - 1;
    int mid, c;
    int found = -1;

    while (lo <= hi) {
        mid = lo   (hi - lo   1)/2;
        p = base   mid * size;
        c = compar(key, p);
        if (c == 0) {
            found = mid;
            hi = mid - 1;
        }
        else if (c > 0)
            lo = mid   1;
        else
            hi = mid - 1;
    }
    
    if (found != -1) {
        p = base   found * size;
        return (void*)p;
    }
    else if (not_found == 0) {
        return NULL;
    }
    else if (not_found == 1) {
        /*
        p = base   mid * size;
        return (void*)p;
        */
    }
    else {
        /*
        p = base   (mid-1) * size;
        return (void*)p;
        */
    }
}

How can I make that if the key is not found and not_found == 1, then return its successor or if not_found == -1, return its predecessor

That commented part of code worked in some cases, but it really depends on the case. From this list of strings, "A", "C", "E", "G", "I", "K", "M", "O", searching for "D" with not_found = -1 returns "A" but searching for "F" it returns correctly "E". The same goes for successor cases, that part of the code doesn't always get it right.

CodePudding user response:

Well, an interesting problem for a slow day.

The following works in its realm and could be adapted to function with the OP notion of an array of generic datatype (ie: void *).

As alluded to in the comments, the problem statement was optimistic. The code below finds an element in an array, or returns an index value so the caller can know where such an element might be if it existed.

Fun challenge.

#include <stdio.h>

int closeTo( const char ch, const char buf[], const int sz ) {
    int lo = 0, hi = sz-1, mid = 0, cmp = 0;
    while( lo <= hi ) {
        mid = (lo hi)/2;
        cmp = ch - buf[mid];
        if( cmp == 0 ) return mid;
        if( cmp > 0 ) lo =   mid; else hi = --mid;
    }
    /* Not found */
    if( cmp < 0 ) mid  ; // last value of buf[mid] was too high
    return mid - sz - 1;
}

int main( void ) {
    char *str = "BDEFGHIJKMNOPQRSTUVWXY";
    int sz = strlen( str );

    for( char *trgts = "ABCLMZ"; *trgts; trgts   ) {
        printf( "Search '%c': ", *trgts );
        int ind = closeTo( *trgts, str, sz );
        if( ind >= 0 )
            printf( "found at str[%d] %c", ind, str[ind] );
        else if( (ind  = sz) < 0 )
            printf( "PRECEDES str[0] %c", str[0] );
        else
            printf( "would follow str[%d] %c", ind, str[ind] );
        putchar( '\n' );
    }
    return 0;
}
Search 'A': PRECEDES str[0] B
Search 'B': found at str[0] B
Search 'C': would follow str[0] B
Search 'L': would follow str[8] K
Search 'M': found at str[9] M
Search 'Z': would follow str[21] Y

In brief, the "phantom" index is returned as a negative index. In the particular case of, for instance an 'A' not already in the list, the phantom index is 'encoded' to 'decode' to -1, the index of the phantom element prior to index 0.

CodePudding user response:

You said you wanted the "first" match which would be the leftmost binary search algorithm. Then I suggest wrap the standard algorithm for handling of the result:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

enum not_found {
    PREDECESSOR = -1,
    CURRENT,
    SUCCESSOR
};

const void *binary_search(const void *key, const void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *)) {
    size_t l = 0;
    size_t r = nmemb;
    while(l < r) {
        size_t m = (l   r) / 2;
        if(compar((const char *) base   m * size, key) < 0)
            l = m   1;
        else
            r = m;
    }
    return (const char *) base   l * size;
}

const void *binary_search2(const void *key, const void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *), enum not_found not_found) {
    const void *p = binary_search(key, base, nmemb, size, compar);
    if(!compar(p, key))
        return p;
    switch(not_found) {
        case PREDECESSOR:
            return (const char *) p - size;
        case CURRENT:
            return NULL;
        case SUCCESSOR:
            return p;
    }
}

int charcmp(const void *a, const void *b) {
    const char *a2 = a;
    const char *b2 = b;
    if(*a2 < *b2) return -1;
    if(*a2 > *b2) return 1;
    return 0;
}

int main(void) {
    char *s = "ACEGIKMO";
    enum not_found tests[] = {
        PREDECESSOR,
        CURRENT,
        SUCCESSOR
    };
    for(size_t i = 0; i < sizeof tests / sizeof *tests; i  ) {
        const void *p = binary_search2("D", s,  strlen(s), 1, charcmp, tests[i]);
        if(!p)
            printf("NULL\n");
        else
            printf("%.1s\n", (const char *) p);
    }
}

and example run:

C
NULL
E

A pointer to the element before the first in base is undefined behavior. If you need that then you would need to change the interface. It's ok to point to the element after the last one you just can't dereference it in general. For strings it would be a '\0' which is fine.

  • Related