How can i optimize this c code to find common elements in 2 arrays of different sizes-CodePudding

This function f is to find common elements in an array and return result array and i am using 4 four loops to accomplish this task which i feel is no the best use of the loops, Another problem is, how can i determine the size of the returned array so that my loop is within bounds

here is the code

#include <stdio.h>
#include <stdlib.h>

int *f(int first[], int second[], int size_first, int size_second);
int main(void) {
  int arr1[]={1, 8, 3, 2, 6};
  int arr2[]= {2, 6, 1};
  int size1 = sizeof(arr1)/sizeof(arr1[0]);
  int size2 = sizeof(arr2)/sizeof(arr2[0]);
  int *intersection = f(arr1, arr2, size1, size2); 
  for(int i=0;i<3; i  ){
    printf("%d ", intersection[i]);
  }
  return 0;
}

 // function to find common elements in 2 arrays
 int *f(int first[], int second[], int size_first, int size_second){
  int k=0, count=0;

   //loop through the array to find the number common elements and store in count for dynamic memory allocation in future
   for(int i=0;i<size_first;i  ){
     for(int j=0;j<size_second;j  ){
       if(first[i]==second[j]){
        count   ;
       }
     }
   }


  // allocate memory for the common elements by making use of count
  int * common_elements = (int*)malloc(count*sizeof(int));

  // store the common elements in the new memory location
  for(int i=0;i<size_first;i  ){
     for(int j=0;j<size_second;j  ){
       if(first[i]==second[j]){
        common_elements[k]=first[i];
        k  ;
       }
     }
   }
  
  return common_elements;
  free(common_elements);
 }

CodePudding user response：

If you are allowed to waste some memory, note that the intersection cannot have cardinality larger than the number of elements in the smaller set. Therefore, you can allocate more memory than you might need and avoid having to count first and allocate later.

Or, you can realloc as you go.

In general, you need a good data structure for checking set membership more quickly than scanning an entire array although for small sizes which fit in various caches, the linear scan will not perform too shabbily either.

For larger sets, however, you'll want to load the larger of the sets into an AVL tree or Scapegoat tree.

For really large data sets, you'll need to look into Bloom filters and related data structures depending on the use case.

I am including below the most naive improvement in your code which still has the nested loop and wastes memory up to the size of the smaller set to avoid counting common elements first.

#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>

// TODO: What about duplicates in smaller set?
int *
int_set_intersection(
        const int *first,
        const int *second,
        const size_t size_first,
        const size_t size_second,
        size_t *n
        )
{
    size_t K = 0; // number of common elements
    const int is_first_smaller = (size_first < size_second);
    // Done this way so I can declare variables as consts
    const int *set_smaller = is_first_smaller ? first : second;
    const int *set_larger  = is_first_smaller ? second : first;
    const size_t size_smaller   = is_first_smaller ? size_first : size_second;
    const size_t size_larger    = is_first_smaller ? size_second : size_first;

    int *common = malloc(size_smaller * sizeof(*common));

    if (!common) {
        fprintf(stderr, "Failed to allocate memory for %z ints\n", size_smaller);
        perror("Cannot allocate memory for common elements");
        exit(EXIT_FAILURE);
    }

    for (size_t i = 0; i < size_smaller;   i) {
        for (size_t j = 0; j < size_larger;   j) {
            if (set_smaller[i] == set_larger[j]) {
                common[K] = set_smaller[i];
                  K;
                break;
            }
        }
    }

    *n = K;
    return common;
}

void
int_set_print(const int *set, size_t n, FILE *f)
{
    FILE *out = f ? f : stdout;
    size_t i = 0;

    fputs("{ ", out);

    for (i = 0; i < n - 1;   i) {
        fprintf(out, "%d, ", set[i]);
    }

    fprintf(out, "%d }\n", set[i]);
}

int
main(void) {
  int arr1[] = {1, 8, 3, 2, 6};
  int arr2[] = {2, 5, 1};
  size_t n = 0;

  const int *intersection = int_set_intersection(
          arr1,
          arr2,
          sizeof(arr1)/sizeof(arr1[0]),
          sizeof(arr2)/sizeof(arr2[0]),
          &n
          );

  int_set_print(intersection, n, NULL);

  free(intersection); // not really needed, but good hygiene

  return 0;
}

CodePudding user response：

For larger arrays, one option is to sort the contents first to make it easier to check for common elements, as shown in the code below. If the original array contents cannot be changed, first copy them into dynamically allocated memory. Dynamically allocated memory is also needed to hold the list of common elements, but that can use the same storage as one of the copies.

OP's original function returns a pointer to dynamically allocated memory containing the array of common elements, but does not indicate the length of the array. I added a parameter to return the length.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int *f(int first[], int second[], int size_first, int size_second, int *size_common);
int main(void) {
  int arr1[]={1, 8, 3, 2, 6};
  int arr2[]= {2, 6, 1};
  int size1 = sizeof(arr1)/sizeof(arr1[0]);
  int size2 = sizeof(arr2)/sizeof(arr2[0]);
  int size_common;
  int *intersection = f(arr1, arr2, size1, size2, &size_common); 
  for(int i=0;i<size_common; i  ){
    printf("%d ", intersection[i]);
  }
  free(intersection);
  return 0;
}

static int cmp_int(const void *ap, const void *bp) {
  int a = *(const int *)ap;
  int b = *(const int *)bp;
  return (a > b) - (a < b);
}

// function to find common elements in 2 arrays
int *f(int first[], int second[], int size_first, int size_second,
       int *size_common) {
  int *copy1;
  int *copy2;
  int idx1;
  int idx2;
  int count;

  // allocate memory local copies of the arrays
  copy1 = malloc(size_first * sizeof (int));
  copy2 = malloc(size_second * sizeof (int));
  if (!copy1 || !copy2) {
    // allocation error
    free(copy1);
    free(copy2);
    *size_common = -1;  // use -1 to report error
    return NULL;
  }

  // copy the arrays
  memcpy(copy1, first, size_first * sizeof (int));
  memcpy(copy2, second, size_second * sizeof (int));

  // sort the copies in ascending order
  qsort(copy1, size_first, sizeof (int), cmp_int);
  qsort(copy2, size_second, sizeof (int), cmp_int);

  // find common elements
  idx1 = 0;
  idx2 = 0;
  count = 0;
  while (idx1 < size_first && idx2 < size_second) {
    if (copy1[idx1] < copy2[idx2]) {
      idx1  ;
    } else if (copy1[idx1] > copy2[idx2]) {
      idx2  ;
    } else {
      // common element found!
      // use copy1[] to store common elements
      copy1[count] = copy1[idx1];
      count  ;
      idx1  ;
      idx2  ;
    }
  }

  // common elements are in copy1[].
  // finished with copy2, so free it.
  free(copy2);

  if (count == 0) {
    // no common elements
    free(copy1);  // free the memory
    copy1 = NULL; // and make the function return NULL
  } else {
    // try to reduce memory for common elements
    copy2 = realloc(copy1, count * sizeof (int));
    if (copy2) {
      // reallocation successful
      copy1 = copy2;
    } // else, never mind, copy1 is still valid
  }

  // return the common elements
  *size_common = count;
  return copy1;
}