Home > Software engineering >  Optimize Binary Search in sorted array find number of occurences
Optimize Binary Search in sorted array find number of occurences

Time:05-31

Im trying to do the smallest number of operations possible to find the number of occurences of an element in the array. Save even 1 operation if possible. So far this is the best binary search version I know. I cant use vectors or any other std:: functions

int modifiedbinsearch_low(int* arr, int low, int high , int key){   
    if(low==high) return high ; 

    int mid = low   (high-low) /2;

    if(key >  arr[mid] ) { modifiedbinsearch_low(arr,mid   1 , high,key);  } 
    else  { modifiedbinsearch_low(arr,low,mid,key);  }  
}

int modifiedbinsearch_high(int* arr, int low, int high , int key){   
    if(low==high) return high ; 

    int mid = low   (high-low) /2;

    if(key <  arr[mid] ) { modifiedbinsearch_high(arr,low,mid,key);  } 
    else  { modifiedbinsearch_high(arr,mid 1,high,key);  } 

} 

int low = modifiedbinsearch_low( ...)
int high = modifiedbinsearch_high( ...)

This version collapsed both functions into just one but it takes almost double the time. Im wondering the idea is good for it to become the fastest but the implementation is wrong.

#include<stdio.h>
int binarysearch(int a[],int n,int k,bool searchfirst){
    int result=-1;
    int low=0,high=n-1;
    while(low<=high){
        int mid=(low high)/2;
        if(a[mid]==k)  {
              result=mid; 
           if(searchfirst)
              high=mid-1; 
            else
              low=mid 1;
    }
    else if(k<a[mid])  high=mid-1;
    else low=mid 1;
    }
    return result;
}

int main(){
    int a[]={1,1,1,2,2,3,3,3,6,6,6,6,6,7,7};
    int n=sizeof(a)/sizeof(a[0]);
    int x=6;
    int firstindex=binarysearch(a,n,x,true);
    printf("%d\n",firstindex);
    if(firstindex==-1){
        printf("elment not found in the array:\n ");
    }
    else {
        int lastindex=binarysearch(a,n,x,false);
        printf("%d\n",lastindex);
        printf("count is = %d", lastindex-firstindex 1);
    }

}

Shorter version

      int  binmin(int a[], int start, int end,int val ) {
         if(start<end) {
            int mid = (start end)/2;
            if(a[mid]>=val) 
                binmin(a,start,mid-1,val);
            else if(a[mid]<val)
                binmin(a,mid 1,end,val);

      }
      else if(start>end)
           return start;
}

CodePudding user response:

Here's a performance issue. In the main while loop, you aren't breaking out fo the loop when you find the target value.

while(low<=high){
    int mid=(low high)/2;
    if(a[mid]==k)  {
          result=mid;  // you need to break out of the loop here
       if(searchfirst)
          high=mid-1; 
        else
          low=mid 1;
}

But that's not the only issue. The searchfirst value shouldn't be what dictates adjusting high or low as you are doing it. In a classic binary search, adjust the high or low parameter based on how a[mid] compares to k`. Probably closer to this:

while(low<=high) {
    int mid=(low high)/2;
    if(a[mid]==k)  {
          result=mid;  // you need to break out of the loop here
          break;
    }
    if(a[mid] > k)
        high=mid-1; 
    else
       low=mid 1;
}

You have the right idea. Binary search to find the element. But let me suggest the simpler solution after the initial binary search is to just "scan to the left" and "scan to the right" to count duplicate elements.

Let me know what you think of this:

int binarySearch(int* arr, int start, int end, int value) {
    while (start <= end) {
        int mid = (start   end) / 2;
        if (arr[mid] == value) {
            return mid;
        }
        start = (arr[mid] < value) ? (mid   1) : start;
        end = (arr[mid] > value) ? (mid - 1) : end;
    }
    return -1;
}

int countSide(int* arr, int length, int index, int value, int step) {
    int count = 0;
    while (index >= 0 && index <= (length - 1) && arr[index] == value) {
        count  ;
        index  = step;
    }
    return count;
}

int main() {
    int a[] = { 1,1,1,2,2,3,3,3,6,6,6,6,6,7,7 };
    int n = sizeof(a) / sizeof(a[0]);
    int x = 6;
    int firstindex = binarySearch(a, 0, n - 1, x);
    printf("%d\n", firstindex);
    if (firstindex == -1) {
        printf("elment not found in the array:\n ");
    }
    else {
        int count = countSide(a, n, firstindex, x, -1);
        count  = countSide(a, n, firstindex, x, 1);
        count--; // because we counted the middle element twice
        printf("count is = %d\n", count);
    }
}

Updated

Here's a solution that does two binary searches to find the lower and upper bounds of the target value in the array and simply measures the distance between the indices to get the count:

int bound(int* arr, int length, int value, bool isUpperBound) {

    int best = -1;
    int start = 0;
    int end = start   length - 1;

    while (start <= end) {
        int mid = (start   end) / 2;
        if (arr[mid] == value) {
            best = mid;

            if (isUpperBound) {
                start = mid   1;
            }
            else {
                end = mid - 1;
            }
        }
        else if (arr[mid] < value) {
            start = mid   1;
        }
        else if (arr[mid] > value) {
            end = mid - 1;
        }
    }
    return best;
}


int main() {
    int a[] = { 1,1,1,2,2,3,3,3,6,6,6,6,6,7,7 };
    int n = sizeof(a) / sizeof(a[0]);
    int x = 6;
    int firstindex = bound(a, n, x, false);
    int lastindex = bound(a, n, x, true);
    printf("%d - %d\n", firstindex, lastindex);
    if (firstindex == -1) {
        printf("elment not found in the array:\n ");
    }
    else {
        int count = lastindex-firstindex   1;
        printf("count is = %d\n", count);
    }
}
  • Related