Home > Software engineering >  Implementation issue with merge sort
Implementation issue with merge sort

Time:12-02

I cannot understand the problem in my code. The book where I got my algorithm is: Introduction to Algorithms, Third Edition

I understand how the algorithm works but while coding it, my program only sorts the first 4 numbers.

Code

#include <stdio.h>

void sortArr(int *nums, int arrSize) {
    // nums[start...end]
    // nums[start...mid]  n1
    // nums[mid 1...end]  n2
    int start, mid, end;
    start = 0;
    end = arrSize-1;
    mid = (end   start) / 2;
    int n1, n2;
    n1 = mid - start   1;
    n2 = end - mid;
    int l[n1], r[n2];
    for (int i = 0; i < n1; i  ) {
        l[i] = nums[start   i];
    }

    for (int i = 0; i < n2; i  ) {
        r[i] = nums[mid   1   i];
    }

    int i, j;
    i = 0;
    j = 0;
    for (int k = start; k < arrSize; k  ) {
        if (l[i] <= r[j]) {
            nums[k] = l[i];
            i  ;
        } else {
            nums[k] = r[j];
            j  ;
        }
    }
}

int main() {
    int arr[] = {3, 41, 52, 26, 38, 57, 9, 49};
    int arrsize = sizeof(arr) / sizeof(arr[0]);
    printf("before sorting: \n");
    for (int i = 0; i < arrsize; i  ) {
        printf("%d ", arr[i]);
    }

    sortArr(arr, arrsize);
    printf("\n after sorting: \n");
    for (int i = 0; i < arrsize; i  ) {
        printf("%d ", arr[i]);
    }

    return 0;
}

CodePudding user response:

There's quite a few problems with your code, including the fact that you've only implemented MERGE from the book (a subroutine for MERGESORT), and the book uses a trick of appending infinity to the "halves" of the array to avoid having code that handles when one of the halves is used up when merging. You've not included that trick (eg: by appending INT_MAX to L and R and requiring that the original array didn't include that value), but you haven't replaced it with anything so your code can easily read out of bounds when merging.

Here's a working version based on the book algorithms. Compared to the code in the question, it also avoids the VLAs (variable length arrays) which are likely to fail on large input arrays using a one-time malloc-ed buffer, and uses the more correct size_t for array indexes.

The code adds some extra tests in the if statement for li and ri when merging which is a different way than the book's infinity trick, and works better in C.

The top-level function MERGESORT returns 1 if successful, and 0 if unsuccessful (which can happen if malloc fails). Assertions are used to check assumptions about the various indexes and sizes -- these should fail only if there's a bug in the code and can be compiled out.

It runs on a random array of a configurable size (currently 1234), and prints out ok or failed at the end depending whether the array is actually sorted. (Note: rand is normally to be avoided because it's usually a poor supply of random numbers, but it's fine here).

Note this code is carefully written, but it's still not very well tested so you may find bugs!

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>

void MERGE(int *nums, int *buffer, size_t start, size_t mid, size_t end) {
    size_t n1 = mid - start   1;
    size_t n2 = end - mid;
    assert(n1 > 0);
    assert(n2 > 0);
    memcpy(buffer, nums   start, (end - start   1) * sizeof(int));
    int *L = buffer;
    int *R = buffer   n1;
    size_t li = 0, ri = 0;
    for (size_t i = start; i <= end; i   ){
        if (li < n1 && (ri == n2 || L[li] <= R[ri])) {
            assert(li < n1);
            nums[i] = L[li  ];
        } else {
            assert(ri < n2);
            nums[i] = R[ri  ];
        }
    }
}

void MERGESORT0(int *nums, int *buffer, size_t start, size_t end) {
    if (end == start) return;
    size_t mid = start   (end - start) / 2;
    MERGESORT0(nums, buffer, start, mid);
    MERGESORT0(nums, buffer, mid 1, end);
    MERGE(nums, buffer, start, mid, end);
}

int MERGESORT(int *nums, size_t size) {
    if (size == 0) {
        return 1;
    }
    int *buffer = malloc(sizeof(int) * size);
    if (!buffer) return 0;
    MERGESORT0(nums, buffer, 0, size-1);
    free(buffer);
    return 1;
}

#define N 1234
int main(){
    int arr[N];
    for (size_t i = 0; i < N; i  ) {
        arr[i] = rand();
    }
    size_t arrsize = sizeof(arr)/sizeof(arr[0]);
    printf("before sorting: \n");
    for(size_t i=0; i<arrsize; i  ){
        printf("%d ", arr[i]);
    }
    printf("\n");
    if (!MERGESORT(arr, arrsize)) {
        printf("failed\n");
        exit(1);
    }
    printf("after sorting: \n");
    int failed = 0;
    for(size_t i=0; i<arrsize; i  ){
        if (i > 0 && arr[i] < arr[i-1]) failed = 1;
        printf("%d ", arr[i]);
    }
    printf("\n");
    printf("%s\n", failed ? "failed" : "ok");
    return 0;
}

CodePudding user response:

Add this print command to the final loop: printf("%d %d %d\n", k, i, j);

n1 = n2 = 4, so both arrays have values only up to index 3. Yet in the final loop, you run i from 0 to 6. This cannot work.

You can add more print commands, run the algorithm with pen and paper, and hopefully find the place where it went wrong.

  • Related