I have the following DAG (Directed Acyclic Graph) data structure:
struct dag {
struct dag **children; // Array of pointers to DAGs
int n_children; // Number of children
};
I want this to be a DAG and not just a tree, which means some of the children of a given DAG may be identical. This raises a problem in the deletion function:
void del(struct dag *d)
{
if (d != NULL) {
for(int i = 0; i < d->n_children; i ) {
del(d->children[i]);
}
free(d->children);
free(d);
}
}
Indeed, I end up freeing stuff which is already freed, as shown by the example below (which raises a segmentation fault). This code creates two DAGs a
and b
; a
has no children, b
has two children which are both a
.
void main()
{
struct dag *a, *b, **b_children;
a = malloc(sizeof(*a));
a->children = NULL;
a->n_children = 0;
b_children = malloc(sizeof(a) * 2);
b_children[0] = a;
b_children[1] = a;
b = malloc(sizeof(*b));
b->children = b_children;
b->n_children = 2;
del(b);
}
What is flawed in this approach ? Can I write a del
function that does what I want ?
CodePudding user response:
For a directed acyclic graph, the nodes may be referenced from multiple parents, which requires extra bookkeeping to handle the life cycle.
There are multiple approaches for this:
you can add a reference count in each node, increment it for every extra pointer that references the node and decrement it whenever a pointer no longer references the node, in particular when the parent node is to be freed. Reference counting is very tricky to implement correctly and reliably, and is not a complete solution if your graph ever becomes more complex, ie: if there are actual cycles.
you can maintain a linked list of allocated nodes and free the full list when the tree is freed. This approach allows for arbitrary node cross references but does not allow for freeing individual nodes. You can only free the whole tree at once. You can still delete individual nodes from the tree but they will remain allocated in the list until you free the whole tree. Also note that the overhead is equivalent: an extra pointer instead vs: a single integer is needed for each node.
Depending on the specifics of the problem you are solving with this tree, one approach might be more appropriate than the other.
In both cases, you should use functions to allocate, update and delete the nodes.
Here is a version with reference counts:
#include <stdio.h>
#include <stdlib.h>
static int total_dag_nodes = 0; // for debugging
struct dag {
int refs;
int n_children; // Number of children
struct dag **children; // Array of pointers to DAGs
};
struct dag *new_dag(int n) {
struct dag *s = malloc(sizeof *s);
if (s == NULL)
return NULL;
total_dag_nodes ;
s->refs = 1;
s->n_children = 0;
s->children = NULL;
if (n > 0) {
s->n_children = n;
s->children = calloc(sizeof(*s->children), n);
}
return s;
}
void free_dag(struct dag *s) {
if (s && --s->refs <= 0) {
for (int i = 0; i < s->n_children; i )
free_dag(s->children[i]);
free(s);
total_dag_nodes--;
}
}
int set_dag_child(struct dag *s, int n, struct dag *child) {
if (s == NULL || n < 0 || n >= s->n_children) {
/* invalid argument */
return -1;
}
struct dag *prev = s->children[n];
s->children[n] = child;
if (child != NULL)
child->refs ;
/* delay freeing the previous node to avoid freeing it
if it is identical to new one. */
free_dag(prev);
return 0;
}
void print_dag(const char *name, struct dag *s) {
printf("%s: %p", name, (void*)s);
if (s) {
printf(" [refs:%d] {", s->refs);
for (int i = 0; i < s->n_children; i )
printf(" %p", (void*)s->children[i]);
printf(" }");
}
printf(";\n");
}
int main() {
struct dag *a = new_dag(0);
struct dag *b = new_dag(2);
printf("\ntotal_dag_nodes=%d\n", total_dag_nodes);
print_dag("a", a);
print_dag("b", b);
set_dag_child(b, 0, a);
set_dag_child(b, 1, a);
printf("\ntotal_dag_nodes=%d\n", total_dag_nodes);
print_dag("a", a);
print_dag("b", b);
free_dag(a); // removes the reference from a to the first dag
printf("\ntotal_dag_nodes=%d\n", total_dag_nodes);
print_dag("b[0]", b->children[0]);
print_dag("b", b);
free_dag(b); // should remove all references to both dags
printf("\ntotal_dag_nodes=%d\n", total_dag_nodes);
return 0;
}
Output:
total_dag_nodes=2 a: 0x7f885f600000 [refs:1] { };
b: 0x7f885f600010 [refs:1] { 0x0 0x0 };
total_dag_nodes=2 a: 0x7f885f600000 [refs:3] { };
b: 0x7f885f600010 [refs:1] { 0x7f885f600000 0x7f885f600000 };
total_dag_nodes=2 b[0]: 0x7f885f600000 [refs:2] { };
b: 0x7f885f600010 [refs:1] { 0x7f885f600000 0x7f885f600000 };
total_dag_nodes=0
Here is an alternative using a global list:
#include <stdio.h>
#include <stdlib.h>
static int total_dag_nodes = 0; // for debugging
static struct dag *dag_list;
struct dag {
struct dag *next; // linked list for maintenance
int n_children; // Number of children
struct dag **children; // Array of pointers to DAGs
};
struct dag *new_dag(int n) {
struct dag *s = malloc(sizeof *s);
if (s == NULL)
return NULL;
total_dag_nodes ;
s->next = dag_list;
dag_list = s;
s->n_children = 0;
s->children = NULL;
if (n > 0) {
s->n_children = n;
s->children = calloc(sizeof(*s->children), n);
}
return s;
}
void free_dag(struct dag *s) {
/* nothing to do because tag could be referenced elsewhere */
}
void free_all_dags(void) {
struct dag *p;
while ((p = dag_list) != NULL) {
dag_list = p->next;
free(p->children);
free(p);
total_dag_nodes--;
}
}
void print_dag(const char *name, struct dag *s) {
printf("%s: %p", name, (void*)s);
if (s) {
printf(" {");
for (int i = 0; i < s->n_children; i )
printf(" %p", (void*)s->children[i]);
printf(" }");
}
printf(";\n");
}
int main() {
struct dag *a = new_dag(0);
struct dag *b = new_dag(2);
printf("\ntotal_dag_nodes=%d\n", total_dag_nodes);
print_dag("a", a);
print_dag("b", b);
b->children[0] = a;
b->children[1] = a;
printf("\ntotal_dag_nodes=%d\n", total_dag_nodes);
print_dag("a", a);
print_dag("b", b);
free_dag(a); // individual frees do nothing
printf("\ntotal_dag_nodes=%d\n", total_dag_nodes);
print_dag("b[0]", b->children[0]);
print_dag("b", b);
free_all_dags();
printf("\ntotal_dag_nodes=%d\n", total_dag_nodes);
return 0;
}
Output:
total_dag_nodes=2 a: 0x7f8d7b402650 { };
b: 0x7f8d7b402670 { 0x0 0x0 };
total_dag_nodes=2 a: 0x7f8d7b402650 { };
b: 0x7f8d7b402670 { 0x7f8d7b402650 0x7f8d7b402650 };
total_dag_nodes=2 b[0]: 0x7f8d7b402650 { };
b: 0x7f8d7b402670 { 0x7f8d7b402650 0x7f8d7b402650 };
total_dag_nodes=0