Home > Software design >  Very high memory usage in these Digit Analysis (Hash) algorithms in C
Very high memory usage in these Digit Analysis (Hash) algorithms in C

Time:12-08

I have an university exercise, where I have to compare some hashing methods with their number of colisions in the hash table. Then I made theses Digit Analysis algorithms, but they are using A LOT of memory (I can't even run the code until the end, because it kills my computer). You can ignore the comments, but fell free if you want and knows portuguese.

Digit Analysis function 1 (Using dinamic matrix)

int hashUVDigitAnalysis(int len, int value, int numofdigits, int *arr){
    int keydigits, *digits = getDigits(value, &keydigits);

    if(numofdigits >= keydigits){
        free(digits);                                                        //                                     -ATENÇÃO-
        return value;                                                        //
    }else{                                                                   // Essa função funciona, mas quando testei com o vetor *arr muito grande, ele rapidamente
        int **numqntmatrix = (int **)(malloc(10 * sizeof(int *)));           //          consumiu maior parte da memória do meu pc, e o computador falhou por
        int cont1, cont2, countdigits;                                       //                    falta de memória. Use essa função apenas para
        float *detours = (float *)(malloc(keydigits * sizeof(float)));       //                            testar um único valor (UV).
                                                                             // Como alternativa, tentei realizar a função abaixo desta, mas a mesma deu o mesmo problema.
        for(cont1 = 0; cont1 < 10; cont1  ){                                 //
            numqntmatrix[cont1] = (int *)(malloc(keydigits * sizeof(int)));
            for(cont2 = 0; cont2 < keydigits; cont2  ){
                numqntmatrix[cont1][cont2] = 0;
            }
        }

        for(cont1 = 0; cont1 < len; cont1  ){
            digits = getDigits(arr[cont1], &countdigits);
            for(cont2 = 0; cont2 < keydigits && cont2 < countdigits; cont2  ){
                numqntmatrix[digits[cont2]][cont2]  = 1;
            }
        }

        for(cont1 = 0; cont1 < keydigits; cont1  ){
            detours[cont1] = 0.0;
        }

        for(cont1 = 0; cont1 < keydigits; cont1  ){
            for(cont2 = 0; cont2 < 10; cont2  ){
                detours[cont1]  = (numqntmatrix[cont2][cont1] - (len / 10.0)) * (numqntmatrix[cont2][cont1] - (len / 10.0));
            }
        }

        for(cont1 = 0; cont1 < 10; cont1  ){
            free(numqntmatrix[cont1]);
        }
        free(numqntmatrix);

        int *concatenate = (int *)(malloc(numofdigits * sizeof(int)));

        for(cont1 = 0; cont1 < numofdigits; cont1  ){
            cont2 = 0;
            concatenate[cont1] = cont2;
            for(cont2 = 1; cont2 < keydigits; cont2  ){
                if(detours[cont2] < detours[concatenate[cont1]] && detours[cont2] != -1){
                    concatenate[cont1] = cont2;
                }
            }
            detours[concatenate[cont1]] = -1;
        }

        digits = getDigits(value, &countdigits);
        int valueposition = 0;

        for(cont1 = 0; cont1 < numofdigits; cont1  ){
            valueposition  = digits[concatenate[cont1]] * pow(10, cont1);
        }

        free(detours);
        free(digits);

        return valueposition;
    }
}

Digit Analysis function 2 (Using only arrays)

int hashQuadraticDigitAnalysis(int len, int value, int numofdigits, int *arr){
    int keydigits, *digits = getDigits(value, &keydigits);

    if(numofdigits >= keydigits){
        free(digits);
        return value;
    }else{
        int cont1, cont2, countdigits;
        int *numbers = (int *)(malloc(10 * sizeof(int)));
        float *detours = (float *)(malloc(10 * sizeof(float)));

        for(cont1 = 0; cont1 < 10; cont1  ){
            numbers[cont1] = 0;
            detours[cont1] = 0.0;
        }

        for(cont1 = 0; cont1 < 10; cont1  ){
            for(cont2 = 0; cont2 < len; cont2  ){
                digits = getDigits(arr[cont2], &countdigits);
                if(cont1 < countdigits){
                    numbers[digits[cont1]]  = 1;
                }
            }
            for(cont2 = 0; cont2 < 10; cont2  ){
                detours[cont1]  = (numbers[cont2] - (len / 10.0)) * (numbers[cont2] - (len / 10.0));
                numbers[cont2] = 0;
            }
        }

        int *concatenate = (int *)(malloc(numofdigits * sizeof(int)));

        for(cont1 = 0; cont1 < numofdigits; cont1  ){
            cont2 = 0;
            concatenate[cont1] = cont2;
            for(cont2 = 1; cont2 < keydigits; cont2  ){
                if(detours[cont2] < detours[concatenate[cont1]] && detours[cont2] != -1){
                    concatenate[cont1] = cont2;
                }
            }
            detours[concatenate[cont1]] = -1;
        }

        digits = getDigits(value, &countdigits);
        int valueposition = 0;

        for(cont1 = 0; cont1 < numofdigits; cont1  ){
            valueposition  = digits[concatenate[cont1]] * pow(10, cont1);
        }

        free(concatenate);
        free(detours);
        free(digits);

        return valueposition;
    }
}

getDigits function

int* getDigits(int value, int *addr_countdigits){
    int copyofvalue = value;
    *addr_countdigits = 0;

    while(copyofvalue != 0){
        copyofvalue = copyofvalue / 10;
        *addr_countdigits = *addr_countdigits   1;
    }

    int tmp = *addr_countdigits;
    int *array = (int *)(malloc(tmp * sizeof(int)));
    copyofvalue = value;

    while(copyofvalue > 0){
        array[tmp - 1] = copyofvalue % 10;
        copyofvalue = copyofvalue / 10;
        tmp = tmp-1;
    }

    return array;
}

Main

int main(void){
    int len = 100000, lenarr = 200000, cont, random, numcolision = 0, pos;

    int *randomarr = (int *)(malloc(lenarr * sizeof(int)));

    int *hash_division = (int *)(malloc(len * sizeof(int)));

    for(cont = 0; cont < len; cont  ){
        hash_division[cont] = -1;
    }

    for(cont = 0; cont < lenarr; cont  ){
        random  = (((rand() & 255)<<8 | (rand() & 255))<<8 | (rand() & 255))<<7 | (rand() & 127);
        random = random % 2000000001;
        randomarr[cont] = random;
    }

    for(cont = 0; cont < lenarr; cont  ){
        pos = hashUVDigitAnalysis(lenarr, randomarr[cont], 5, randomarr);
        if(hash_division[pos] == -1){
            hash_division[pos] = randomarr[cont];
        } else{
            numcolision  ;
        }
    }

    printf("%d", numcolision);
    return 0;
}

I have 14 GB of usable RAM memory (total of 16 GB). I tested both functions and there isn't any infinity loop. The code runs when I put lenarr = 10000, but I have to test with len = 100000 and lenarr equals to 200 thousand, 400 thousand, 600 thousand, 800 thousand and 1 million, so only 10 thousand will not works for me. I'm doing something wrong? Is there any way to fix this? I never had a memory problem in any code before, so I'm not good with controlling my memory in code.

CodePudding user response:

Cause of leak

I looked at this in valgrind, and it looks like you're missing five calls to free. This is the largest leak:

    for(cont1 = 0; cont1 < len; cont1  ){
        digits = getDigits(arr[cont1], &countdigits);
        for(cont2 = 0; cont2 < keydigits && cont2 < countdigits; cont2  ){
            numqntmatrix[digits[cont2]][cont2]  = 1;
        }
        // free memory returned by getDigits()
        free(digits);
    }

Every loop through this calls getDigits(), which allocates memory which is never freed.

Other sources of leaks:

  • int keydigits, *digits = getDigits(value, &keydigits);
  • int *concatenate = (int *)(malloc(numofdigits * sizeof(int)));
  • int *randomarr = (int *)(malloc(lenarr * sizeof(int)));
  • int *hash_division = (int *)(malloc(len * sizeof(int)));

How to use valgrind

Here's how I performed the valgrind analysis:

First, I reduced the number of loops the program makes from 100K to 10. This prevents it from running out of memory while valgrind is running.

Second, I compiled the program with -ggdb, to enable debugging information. Here's the command I used:

gcc test027.c -Wall -Werror -lm -ggdb -o test027

Third, I ran valgrind with --leak-check=full:

valgrind --leak-check=full ./test027 

This showed me the stacktraces of the sources of the memory leaks. Each one looks like this:

==174419== 1,501,600 bytes in 40,000 blocks are definitely lost in loss record 1 of 1
==174419==    at 0x483C7F3: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==174419==    by 0x10925C: getDigits (test027.c:16)
==174419==    by 0x10940D: hashUVDigitAnalysis (test027.c:50)
==174419==    by 0x109988: main (test027.c:118)

The first thing you see is the size of the memory leak. (1,501,600 bytes). You should solve the largest memory leak first. Next, you see how many allocations were made that were never freed. (40,000 blocks). Next, you see the line numbers of the part of the program which is responsible.

Solve each memory leak one at a time, and re-run valgrind. Once you solve all the leaks, valgrind will show this output:

==174500== HEAP SUMMARY:
==174500==     in use at exit: 0 bytes in 0 blocks
==174500==   total heap usage: 43,003 allocs, 43,003 frees, 1,621,428 bytes allocated
==174500== 
==174500== All heap blocks were freed -- no leaks are possible
  • Related