Home > database >  My InsertionSort Program runs faster than MergeSort Program even with very large input. Why?
My InsertionSort Program runs faster than MergeSort Program even with very large input. Why?

Time:12-08

I am a beginner, I wanted to find out the time taken by my code to execute and find out if Insertion Sort(IS) takes more time than MergeSort(MS) (It is supposed to take more time for sufficiently large input).

So I wrote a Recursive IS algo:

void Insertion_Sort(int *A, int n) {
    if (n < SIZE - 1) {
        int j = n;
        int key = A[j   1];
        while (j >= 0 && A[j] > key) {
            A[j   1] = A[j];
            j--;
        }
        A[j   1] = key;
        n  ;
        Insertion_Sort(A, n);
    }
}

int main() { 
    struct timeval tv1, tv2;
    int array[SIZE];

    printf("Enter the elements to be sorted\n");
    freopen("input1.txt", "r", stdin);
    freopen("output1.txt", "w", stdout);
    for (int i = 0; i < SIZE; i  ) {
        scanf("%d", array   i);
    }
    gettimeofday(&tv1, NULL);
    for (int i = 0; i < 1000; i  ) {
        Insertion_Sort(array, 0);
    }
    gettimeofday(&tv2, NULL);
    printf("The sorted array is :\n");
    for (int i = 0; i < SIZE; i  ) {
        printf("%d\n", array[i]);
    }
    printf("The microsecond 1 is %f\n", (double)(tv2.tv_usec));
    printf("The microsecond 2 is %f\n", (double)(tv1.tv_usec));
    printf("Total time = %f seconds\n",
           (double)(tv2.tv_usec - tv1.tv_usec) / 1000000  
           (double)(tv2.tv_sec - tv1.tv_sec));
}

This was the code. It gave a run time of 0.015s

I also wrote a code for MergeSort:

void Merge(int *Array, int p, int q, int r) {
    int n1 = q - p   1;
    int n2 = r - q;
    int Arr_1[n1   1];
    int Arr_2[n2   1];

    for (int i = 0; i < n1; i  ) {
        Arr_1[i] = Array[p   i];
    }
    for (int i = 0; i < n2; i  ) {
        Arr_2[i] = Array[q   i   1];
    }
    Arr_1[n1] = __INT_MAX__;
    Arr_2[n2] = __INT_MAX__;
    int i = 0;
    int j = 0;
    for (int k = p ; k <= r; k  ) {
        if (Arr_1[i] < Arr_2[j]) {
            Array[k] = Arr_1[i];
            i  ;
        } else {
            Array[k] = Arr_2[j];
            j  ;
        } 
    }
}

void Merge_Sort(int *Array, int p, int r) {
    if (p < r) { 
        int mid;
        mid = (p   r) / 2;
        Merge_Sort(Array, p, mid);
        Merge_Sort(Array, mid   1, r);
        Merge(Array, p, mid, r);
    }
}

//driver code
int main() {
    struct timeval tv1, tv2;
    int Array[SIZE];

    printf("Enter the array for Merging Operation:\n");
    freopen("input1.txt", "r", stdin);
    freopen("output3.txt", "w", stdout);
    for (int i = 0; i < SIZE; i  ) {
        scanf("%d", &Array[i]);
    }
    gettimeofday(&tv1, NULL);

    for (int i = 0; i < 1000; i  ) {
        Merge_Sort(Array, 0, SIZE - 1);
    }
    gettimeofday(&tv2, NULL);
    printf("The sorted array :\n");
    for (int i = 0; i < SIZE; i  ) {
        printf("%d\n", Array[i]);
    }
    printf("The microsecond is %f\n", (double)(tv2.tv_usec));
    printf("The microsecond is %f\n", (double)(tv1.tv_usec));
    printf("\n\nTotal time = %f seconds\n",
           (double)(tv2.tv_usec - tv1.tv_usec) / 1000000  
           (double)(tv2.tv_sec - tv1.tv_sec));
}

This one gave a run time of 0.06s I want to understand why MS is slower than IS even though its supposed to be faster.

I don't really understand how the timing code bit works but it was showing 0 no matter how large of an input I gave it. So, to make it show some result I looped it a 1000 times. I think a possible reason is that the array is sorted already in first invocation in the loop and since IS is linear in best case while MS is n lg(n) in all cases it runs faster. Is that possible? I would be really helpful if someone could show me how to time my code. Thanks in advance

CodePudding user response:

The problem in your test code is you repeatedly sort the same array 1000 times: 999 of these calls involve a sorted array. InsertionSort is very efficient on sorted arrays with a time complexity of O(n), whereas MergeSort has a time complexity of O(n.log(n)) in all cases.

For a more reliable test, you should make a copy of the unsorted array.

Here is a modified benchmark:

#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <sys/time.h>

#define SIZE    1024
#define REPEAT  1000

void Insertion_Sort(int *A, int n, int last) {
    if (n < last) {
        int j = n;
        int key = A[j   1];
        while (j >= 0 && A[j] > key) {
            A[j   1] = A[j];
            j--;
        }
        A[j   1] = key;
        n  ;
        Insertion_Sort(A, n, last);
    }
}

void Merge(int *Array, int p, int q, int r) {
    int n1 = q - p   1;
    int n2 = r - q;
    int Arr_1[n1   1];
    int Arr_2[n2   1];

    for (int i = 0; i < n1; i  ) {
        Arr_1[i] = Array[p   i];
    }
    for (int i = 0; i < n2; i  ) {
        Arr_2[i] = Array[q   i   1];
    }
    Arr_1[n1] = __INT_MAX__;
    Arr_2[n2] = __INT_MAX__;
    int i = 0;
    int j = 0;
    for (int k = p ; k <= r; k  ) {
        if (Arr_1[i] < Arr_2[j]) {
            Array[k] = Arr_1[i];
            i  ;
        } else {
            Array[k] = Arr_2[j];
            j  ;
        }
    }
}

void Merge_Sort(int *Array, int p, int r) {
    if (p < r) {
        int mid;
        mid = (p   r) / 2;
        Merge_Sort(Array, p, mid);
        Merge_Sort(Array, mid   1, r);
        Merge(Array, p, mid, r);
    }
}

//driver code
int main() {
    struct timeval tv1, tv2;
    double IS_time, MS_time;
    FILE *fp;
    int SampleArray[SIZE];
    int Array1[SIZE];
    int Array3[SIZE];

    fp = fopen("input1.txt", "r");
    if (fp == NULL) {
        fprintf(stderr, "cannot open input1.txt: %s\n", strerror(errno));
        return 1;
    }
    for (int i = 0; i < SIZE; i  ) {
        if (fscanf(fp, "%d", &SampleArray[i]) != 1)
            SampleArray[i] = i * i * i % SIZE;
    }
    fclose(fp);
    gettimeofday(&tv1, NULL);
    for (int i = 0; i < REPEAT; i  ) {
        memcpy(Array1, SampleArray, sizeof Array1);
        Insertion_Sort(Array1, 0, SIZE - 1);
    }
    gettimeofday(&tv2, NULL);
    IS_time = ((double)(tv2.tv_usec - tv1.tv_usec) / 1000000  
               (double)(tv2.tv_sec - tv1.tv_sec));
    for (int i = 1; i < SIZE; i  ) {
        if (Array1[i - 1] > Array1[i]) {
            printf("InsertionSort error at %d: %d <-> %d\n",
                   i, Array1[i - 1], Array1[i]);
            break;
        }
    }

    gettimeofday(&tv1, NULL);
    for (int i = 0; i < REPEAT; i  ) {
        memcpy(Array3, SampleArray, sizeof Array3);
        Merge_Sort(Array3, 0, SIZE - 1);
    }
    gettimeofday(&tv2, NULL);
    MS_time = ((double)(tv2.tv_usec - tv1.tv_usec) / 1000000  
               (double)(tv2.tv_sec - tv1.tv_sec));
    for (int i = 1; i < SIZE; i  ) {
        if (Array3[i - 1] > Array3[i]) {
            printf("MergeSort error at %d: %d <-> %d\n",
                   i, Array3[i - 1], Array3[i]);
            break;
        }
    }

    fp = fopen("output1.txt", "w");
    if (fp == NULL) {
        fprintf(stderr, "cannot open output1.txt: %s\n", strerror(errno));
        return 1;
    }
    for (int i = 0; i < SIZE; i  ) {
        fprintf(fp, "%d\n", Array1[i]);
    }
    fclose(fp);

    fp = fopen("output3.txt", "w");
    if (fp == NULL) {
        fprintf(stderr, "cannot open output3.txt: %s\n", strerror(errno));
        return 1;
    }
    for (int i = 0; i < SIZE; i  ) {
        fprintf(fp, "%d\n", Array3[i]);
    }
    fclose(fp);

    printf("InsertionSort total time = %f seconds\n", IS_time);
    printf("MergeSort total time =     %f seconds\n", MS_time);
    return 0;
}

Output for SIZE=1024:

InsertionSort total time = 0.110109 seconds
MergeSort total time =     0.054474 seconds

Output for SIZE=4096:

InsertionSort total time = 2.057987 seconds
MergeSort total time =     0.276475 seconds

Output for SIZE=16384:

InsertionSort total time = 28.658141 seconds
MergeSort total time =     1.297120 seconds

These timings are consistent with the expected complexity: O(n2) for Insertion Sort and O(n.log(n)) for Merge Sort.

CodePudding user response:

A somewhat optimized top down merge sort that does a one time allocate and copy for the working array, and changes direction of merge depending on level of recursion. Takes less than .001 second to sort 16384 integers on my system (Intel 3770K).

void Merge(int a[], int bgn, int mid, int end, int b[])
{
int i, j, k;
    i = bgn, j = mid, k = bgn;
    while(1){
        if(a[i] <= a[j]){               // if left smaller
            b[k  ] = a[i  ];            //  copy left element
            if(i < mid)                 //  if not end of left run
                continue;               //    continue
            do                          //  else copy rest of right run
                b[k  ] = a[j  ];
            while(j < end);
            break;                      //   and break
        } else {                        // else right smaller
            b[k  ] = a[j  ];            //  copy right element
            if(j < end)                 //  if not end of right run
                continue;               //    continue
            do                          //  else copy rest of left run
                b[k  ] = a[i  ];
            while(i < mid);
            break;                      //   and break
        }
    }
}

void MergeSortR(int b[], int bgn, int end, int a[])
{
    if (end - bgn <= 1)                 // if run size == 1
        return;                         //   consider it sorted
    int mid = (end   bgn) / 2;
    MergeSortR(a, bgn, mid, b);
    MergeSortR(a, mid, end, b);
    Merge(b, bgn, mid, end, a);
}

void MergeSort(int a[], int n)          // n = size (not size-1)
{
    if(n < 2)
        return;
    int *b = malloc(n*sizeof(int));     // 1 time allocate and copy
    for(size_t i = 0; i < n; i  )
        b[i] = a[i];
    MergeSortR(b, 0, n, a);             // sort data from b[] into a[]
    free(b);
}
  • Related