I am working on a series of C functions to allow a user to dynamically build an array. The core of the library resides in the Array
struct
which contains a pointer variable array
that contains the array data, len
which contains the length of the array, size
, which is the total memory allocation for the array, elem
, which contains the memory allocation per indices, and pointer variables name
and dtype
which contains strings describing the name of the array and the type of the array. For the moment I have constrained the scope so that only int
, float
, double
, and char
arrays can be considered.
Thus far I have defined, and individually tested the following functions;
array_mem_alloc
which contains code that allocates memory for an array.init_array
which is a wrapper aroundarray_mem_alloc
that instantiates anArray
struct
, determines the data type and returns anArray
data type to a user.append_array
which allows a user to dynamically grow an array one index at a time, or add an already defined array.free_array
which frees all memory and resetsstruct
variablesint_array_val
which typecasts the data at an index and returns to user. I have versions of this function for all relevant data types, but for this problem I will only use this version.find_int_array_indices
which looks for where a specific integer exists in the array and records the index number into another array which is returned to the user.
For the purposes of testing find_int_array_indices
I am calling init_array
for a variable titled arr_test
and appending it with 7 integers int a[7] = {6, 1, 3, 6, 6, 4, 5}
. I pass the Array
container arr_test
to the find_int_array_indices
function and everything works fine, which also returns another Array
container titled p
. However, when I try to retrieve the integer variables with the int_array_val
function it fails, because it does not recognize the variable array->dtype
as containing the string "int"
. However, when I test the container inside of find_int_array_indices
and in the main function, the variable does contain the string "int"
. This tells me that I probably have a pointer error, but I do not see it. Any advice would be very useful. I am wondering if I need to go back to the beginning and define name
and dtype
as fixed length arrays in the Array
struct
instead of as pointer variables.
array.h
typedef struct
{
void *array; // Pointer to array
size_t len; // Active length of array
size_t size; // Number of allocated indizes
int elem; // Memory consumption per indice
char *name; // The array name
char *dtype; // A string representing the datatype
} Array;
void array_mem_alloc(Array *array, size_t num_indices);
Array init_array(char *dtype, size_t num_indices, char *name);
int append_array(Array *array, void *elements, size_t count);
void free_array(Array *array);
int int_array_val(Array *array, int indice);
Array find_int_array_indices(Array *array, int integer);
array.c
void array_mem_alloc(Array *array, size_t num_indices) {
// Determine the total memory allocation and assign to pointer
void *pointer;
pointer = malloc(num_indices * array->elem);
// If memory is full fail gracefully
if (pointer == NULL) {
printf("Unable to allocate memory, exiting.\n");
free(pointer);
exit(0);
}
// Allocate resources and instantiate Array
else {
array->array = pointer;
array->len = 0;
array->size = num_indices;
}
}
// --------------------------------------------------------------------------------
Array init_array(char *dtype, size_t num_indices, char *name) {
// Determine memory blocks based on data type
int size;
if (strcmp(dtype, "float") == 0) size = sizeof(float);
else if (strcmp(dtype, "int") == 0) size = sizeof(int);
else if (strcmp(dtype, "double") == 0) size = sizeof(double);
else if (strcmp(dtype, "char") == 0) size = sizeof(char);
else {
printf("Data type not correctly entered into init_array, exiting program!\n");
exit(0);
}
// Allocate indice size and call array_mem_alloc
Array array;
array.dtype = dtype;
array.elem = size;
array_mem_alloc(&array, num_indices);
array.name = name;
return array;
}
// --------------------------------------------------------------------------------
int append_array(Array *array, void *elements, size_t count) {
// Allocae more memory if necessary
if (array->len count > array->size) {
size_t size = (array->len count) * 2;
void *pointer = realloc(array->array, size * array->elem);
// If memory is full return operations
if (pointer == NULL) {
printf("Unable to allocate memory, exiting.\n");
return 0;
}
// Allocate memory to variables and increment array size
array->array = pointer;
array->size = size;
}
// Append variables and increment the array length
memcpy((char *)array->array array->len * array->elem, elements, count * array->elem);
array->len = count;
return 1;
}
// --------------------------------------------------------------------------------
void free_array(Array *array) {
// Free all memory in the array
free(array->array);
// Reset all variables in the struct
array->array = NULL;
array->size = 0;
array->len = 0;
array->elem = 0;
}
// --------------------------------------------------------------------------------
int int_array_val(Array *array, int indice) {
// Ensure array contains integers
printf("%s\n", array->dtype);
if (strcmp(array->dtype, "int") != 0) {
printf("Function can only return integer values, exiting function!\n");
exit(0);
}
// Cast value to an integer and return
int a = ((int *)array->array)[indice];
return a;
}
Array find_int_array_indices(Array *array, int integer) {
int number = 0;
int input;
for (int i = 0; i < array->len; i ) {
if (integer == int_array_val(array, i)) {
number ;
}
}
char dtype[7] = "int";
char name[9] = "indices";
Array indice_arr = init_array(dtype, number, name);
for (int i = 0; i < array->len; i ) {
input = i;
if (integer == int_array_val(array, i)) {
append_array(&indice_arr, &input, 1);
}
}
return indice_arr;
}
main.c
size_t indices = 10;
char name[6] = "array";
char dtype[7] = "int";
Array arr_test = init_array(dtype, indices, name);
int a[7] = {6, 1, 3, 6, 6, 4, 5};
append_array(&arr_test, a, 7);
Array p = find_int_array_indices(&arr_test, 6);
printf("%s\n", p.dtype); // This shows that p does contain dtype "int"
int d = int_array_val(&p, 0); // This fails in function, because it does not see dtype = "int"???
printf("%d\n", d);
CodePudding user response:
In find_int_array_indices
char dtype[7] = "int";
char name[9] = "indices";
are both local variables, which are invalidated when the function returns. See: Dangling pointer and Lifetime.
init_array
uses these values as if they had a lifetime to match its return value
Array array;
array.dtype = dtype;
array.elem = size;
array_mem_alloc(&array, num_indices);
array.name = name;
return array;
which, as a structure type, is a lifetime determined by the context of its caller (return is copy, after all).
find_int_array_indices
completes the error when it returns indice_arr
to main
.
Some options:
- Strictly use pointers to strings with static storage duration.
- Change your structure definition to include space for these strings (or allocate it), and perform string copies.
- Use an enumerated type instead.
- Ditch this string-based, type limited paradigm all together by supporting all memory sizes generically (the naming feature remains an issue, though).
A rather long-winded continuation, to elaborate on using enumerated types:
The idea is to define a smaller set of acceptable values that your library works with, and making the user more aware of these values. As we can see, you have partially done that using strings but the implementation has some issues, as strings are generally clunky. Some problems with strings:
- you have no control over the strings that users of your library use (this leads you to have to exit1 the program in the event the users enters something unexpected, which is easy to do),
- you must account for their potentially large or excess memory consumption,
- string comparison is O(N),
- strings are generally unsafe in C, requiring more care than other basic constructs when handling them (assignment, comparison, storage).
So instead of using strings ("foo"
, "bar
", "qux"
in these examples), we use an enumerated type
enum OBJECT_TYPE {
OBJECT_FOO,
OBJECT_BAR,
OBJECT_QUX
};
which establishes the following:
- it is more clear what the acceptable values are
- some2 control over what users enter, via type hinting
- comparison is O(1)
- handling is the same as any integral type
The structure definition then looks like
typedef struct {
/* ... whatever members are needed for the structure */
size_t something_based_on_type;
enum OBJECT_TYPE type;
char debug_name[MAX_DEBUG_NAME];
} Object;
Nothing can really be done about the name member of your structure. If you want user defined nametags for things, then yes, as stated previously, you need to allocate space for them.
Our initialization function works similarly, but we can2 take advantage of some properties of integral types.
void object_init(Object *object, enum OBJECT_TYPE type, const char *debug_name) {
/* ... accept other arguments, whatever is needed to initialize */
size_t value_translations[] = { 42, 51, 99 };
object->type = type;
/* while neat, this is somewhat naive, see footnotes */
object->something_based_on_type = value_translations[type];
if (debug_name && strlen(debug_name) < MAX_DEBUG_NAME)
strcpy(object->debug_name, debug_name);
else
*object->debug_name = '\0';
}
Now we want to provide a function that works with our generic data of only type OBJECT_FOO
(like your int_array_val
). Again, the comparison is much easier to understand.
void object_print_foo(Object *o) {
if (OBJECT_FOO != o->type)
/* handle type mismatch */;
}
Although it would be better to provide a generic object_print
function that again branches based on o->type
.
A main function for completeness:
int main(void) {
Object a;
object_init(&a, OBJECT_QUX, "object_a");
object_print_foo(&a);
}
This is the general idea of using enumerated types.
With all that said, I think this is not really any better than just handling arbitrary data sizes, risks included. Something like
const void *array_get(Array *array, size_t index) {
if (index >= array->length)
return NULL;
return (char *) array->array index * array->elem;
}
works, if the user respects the const
contract, and uses the correct types (they would need to remember their typing with specifically typed getters too).
Generic data structures in C are a bit of a leap of faith no matter what.
1. So a note on exit
ing from library code: don't. As a library author, you have no reasonable right to cause user programs to terminate (unless requested, or the user invokes UB outside your control). Delegate upwards, return errors, and let the user exit the program on their own terms, as they may need to perform their own cleanups (or might carry on if the failure is non-critical).
2. C's enumeration type is rather weak. enum
are actually just int
, and users can enter plain integer values outside the specified ranges. This is akin to invoking undefined behavior from a library's point of view, but we may wish to protect the user anyway.