Home > Back-end >  How to reduce a C array of strings to unique values
How to reduce a C array of strings to unique values

Time:05-17

I am working on a basic framework to dynamically allocate arrays. In this case it is an array of strings. I am trying to create a function to delete all non-unique string values from the array and testing it with Google Test. When I test the function titled unique_string_vec I get the following error. However, to be fair, the code appears to be working correctly, and the compiler itself does not throw an error. Instead it appears that google test is throwing an error. I am trying to determine if this is an issue with google test or if I legitimately have a memory management issue that I need to fix. Any thoughts or suggestions would be appreciated.

unit_tests: malloc.c:2617: sysmalloc: Assertion (old_top == initial_top (av) && old_size == 0)

I am posting the relevant portions of the framework below and would appreciate any thoughts on how to fix the issue.

vector.h

#ifndef ARRAY_H
#define ARRAY_H

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>

typedef enum
{
    FLOAT,
    DOUBLE,
    CHAR,
    INT,
    STRING
} dat_type;
// --------------------------------------------------------------------------------

typedef struct
{
    char **array;
    size_t len;
    int elem;
    dat_type dat;
} StringVector;
// --------------------------------------------------------------------------------

int string_vector_mem_alloc(StringVector *array, size_t num_indices);
// --------------------------------------------------------------------------------

StringVector init_string_vector();
// --------------------------------------------------------------------------------

int append_string_vector(StringVector *s, char *value);
// --------------------------------------------------------------------------------

void pop_string_vector(StringVector *array, int index);
// --------------------------------------------------------------------------------

void unique_string_vec(StringVector *array);
// --------------------------------------------------------------------------------

void free_string_array(StringVector *array);
// --------------------------------------------------------------------------------
#endif /* ARRAY_H */

array.c

int string_vector_mem_alloc(StringVector *array, size_t num_indices) {
    // Determine the total memory allocation and assign to pointer
    void *pointer;
    pointer = malloc(num_indices * array->elem);

    // If memory is full fail gracefully
    if (pointer == NULL) {
        printf("Unable to allocate memory, exiting.\n");
        free(pointer);
        return 0;
    }
    // Allocate resources and instantiate Array
    else {
        array->array = pointer;
        array->len = 0;
        return 1;
    }
}
// --------------------------------------------------------------------------------

StringVector init_string_vector() {
    StringVector array;
    array.dat = STRING;
    array.elem = sizeof(char);
    string_vector_mem_alloc(&array, array.elem);
    return array;
}
// --------------------------------------------------------------------------------

int append_string_vector(StringVector *array, char *value) {
    value = strdup(value);
    if (!value) {
        return -1;
    }
    array->len  ;
    char **resized = realloc(array->array, sizeof(char *)*array->len   1);
    if (!resized) {
        free(value);
        return -1;
    }
    resized[array->len-1] = value;
    array->array = resized;
    return 0;
}
// --------------------------------------------------------------------------------

void pop_string_vector(StringVector *array, int index) {
    if (index >= array->len) {
        printf("Index %d out of bounds for pop_string_vector\n", index);
    }
    unsigned char **dst = (unsigned char **)array->array   index * array->elem;
    memmove(array->array   index, array->array   index   1,  \
            sizeof *array->array * array->len -1);
    array->len -= 1;
}
// --------------------------------------------------------------------------------

void unique_string_vec(StringVector *array) {
    int repeat_status = 0;
    for (int i = 0; ; i  ) {
        if (i >= array->len) break;
        for (int j = i   1; ; j  ) {
            if (j >= array->len) break;
            if (strcmp(array->array[i], array->array[j]) == 0){
                repeat_status = 1;
                pop_string_vector(array, j);
            }
        }
        if (repeat_status == 1) {
            pop_string_vector(array, i);
            repeat_status = 0;
            i -= 1;
        }
    }
}
// --------------------------------------------------------------------------------

void free_string_array(StringVector *array) {
    if (array != NULL) {
        for (int i = 0; i < array->len; i  ) {
            free(array->array[i]);
        }
    }
    free(array->array);
    // Reset all variables in the struct
    array->array = NULL;
    array->len = 0;
    array->elem = 0;
}

test_vector.cpp

#include <gtest/gtest.h>

extern "C" {
#include "vector.h"
}

TEST(string, unique_string_vec) {
    StringVector arr_test = init_string_vector();
    char one[] = "Hello";
    char two[] = "World";
    char three[] = "Hello";
    char four[] = "Goodbye";
    append_string_vector(&arr_test, one);
    append_string_vector(&arr_test, two);
    append_string_vector(&arr_test, three);
    append_string_vector(&arr_test, four);
    // - Even though the code works correctly, I get an error thrown
    //   by google test when I apply this function
    unique_string_vec(&arr_test);
    // These produce the correct value
    printf("%s\n", arr_test.array[0]);
    printf("%s\n", arr_test.array[1]);
    printf("%d\n", arr_test.len);

//  EXPECT_EQ(arr_test.len, 2);
//  int result1 = strcmp(two, arr_test.array[0]);
//  int result2 = strcmp(four, arr_test.array[1]);

//  EXPECT_EQ(result1, 0);
//  EXPECT_EQ(result2, 0);
}

CodePudding user response:

malloc.c:2617: sysmalloc: Assertion (old_top == initial_top (av) && old_size == 0)

This error means: you have corrupted heap. Examples of heap corruption:

  • writing past the end of allocated buffer,
  • free()ing unallocated memory,
  • free()ing some allocated memory twice, etc. etc.

Heap corruption bugs are somewhat hard to find by inspection. Fortunately there are tools which can point you straight at the problem: Valgrind and Address Sanitizer.

Here is what Address Sanitizer had to say about your program:

==148888==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x604000000031 at pc 0x7facb56f055e bp 0x7ffe2ddb46b0 sp 0x7ffe2ddb3e60
READ of size 31 at 0x604000000031 thread T0
    #0 0x7facb56f055d in __interceptor_memmove ../../../../src/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc:810
    #1 0x5621653f67f3 in pop_string_vector /tmp/array.c:54
    #2 0x5621653f695a in unique_string_vec /tmp/array.c:68
    #3 0x5621653f6e41 in main /tmp/array.c:106
    #4 0x7facb55017fc in __libc_start_main ../csu/libc-start.c:332
    #5 0x5621653f61a9 in _start (/tmp/a.out 0x11a9)

0x604000000031 is located 0 bytes to the right of 33-byte region [0x604000000010,0x604000000031)
allocated by thread T0 here:
    #0 0x7facb5765b48 in __interceptor_realloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:164
    #1 0x5621653f65fe in append_string_vector /tmp/array.c:38
    #2 0x5621653f6e35 in main /tmp/array.c:103
    #3 0x7facb55017fc in __libc_start_main ../csu/libc-start.c:332

SUMMARY: AddressSanitizer: heap-buffer-overflow ../../../../src/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc:810 in __interceptor_memmove

Above error is not the cause of corruption (reading memory out of bounds can't cause corruption), but it is an indication that you are using heap incorrectly.

Once you fix the first bug, the next one (probably) will be the one that is the root cause of the crash.

CodePudding user response:

As it turns out, in this instance, the problem was caused in the init_string_vector function. The variable titled array.elem was instantiated as sizeof(char) and should have been sizeof(char *). It was a simple mistake, but caused memory allocation errors.

  • Related