Home > OS >  Filling a hash table with an linked list
Filling a hash table with an linked list

Time:10-07

I'm working on creating a hash table (array) with linked list collision control for a dictionary of words. The dictionary of words is first loaded into a linked list named "list" using the data structure below. However I have issues in creating the collision control if I have multiple entries per hash code in the array: The linked list is created ad infinitum for multiple entries..

That's the structure I'm using for the entries.

typedef struct node
{
    char word[LENGTH   1];
    int hash;
    struct node *next;
}
node;

And here's the part to fill the hash table:

    while (list->next != NULL)
    {
        if (table[list->hash].hash != list->hash) // No entry in the array yet
        {
            node *tmp = list->next;
            table[list->hash] = *list;
            table[list->hash].next = NULL;
            list = tmp;
        }
        else                                     // If array is already filled
        {
            node *tmp = list->next;
            list->next = &table[list->hash];
            table[list->hash] = *list;
            list = tmp;
        }
    }

Hope you can help, thanks!

See full code here:

#include <ctype.h>
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>

#define LENGTH 45

const unsigned int N = 4000;

typedef struct node
{
    char word[LENGTH   1];
    int hash;
    struct node *next;
}
node;

bool load(const char *dictionary);
unsigned int hash(const char *word);

node table[N];

int main(void)
{
    char *dict = "dictionaries/medium";
    bool loaded = load(dict);
    if (!loaded)
    {
        printf("could not load dictionary\n");
        return 1;
    }
    printf("worked\n");
    return 0;
}

bool load(const char *dictionary)
{
    FILE *input = fopen(dictionary, "r");
    if (input == NULL)
    {
        return false;
    }

    node *list = NULL;
    char word[LENGTH   1];
    int wordcount = 0;

    while(fscanf(input, "%s", word) != EOF)
    {
        if (list == NULL)
        {
            node *n = malloc(sizeof(node));
            if (n == NULL)
            {
                return false;
            }
            strcpy(n->word, word);
            n->hash = hash(word);
            n->next = NULL;
            list = n;
            wordcount  ;
        }
        else
        {
            node *n = malloc(sizeof(node));
            if (n == NULL)
            {
                return false;
            }
            strcpy(n->word, word);
            n->hash = hash(word);
            n->next = list;
            list = n;
            wordcount  ;
        }
    }
    while (list->next != NULL)
    {
        if (table[list->hash].hash != list->hash)
        {
            node *tmp = list->next;
            table[list->hash] = *list;
            table[list->hash].next = NULL;
            list = tmp;
        }
        else
        {
            node *tmp = list->next;
            list->next = &table[list->hash];
            table[list->hash] = *list;
            list = tmp;
        }
    }
    // test function to print
    printf("%s\n%s\n%s\n%s\n", table[196].word, table[196].next->word, table[196].next->next->word, table[196].next->next->next->word);
    return true;
    fclose(input);
}

unsigned int hash(const char *word)
{
    int hash = 0;

    for (int i = 0; i < strlen(word); i  )
    {
        if (word[i] != '\'')
        {
            hash  = ((toupper(word[i]) - 64));
        }
        if (i % 2 == 0)
        {
            hash = (hash * 3);
        }
    }

    hash = hash % N;
    return hash;
}

The dictionaries/medium file contains the following items currently:

maxim
maxim
Nina
maxim
Lukas
Nina

That's also why I am using array element 196 to print out which is the hash code for 'Nina'.

Edit: thanks to the replies so far I have removed redundancies and tried to fill the array as the list is created. However I still have an issue which I think is due to self-referencing when an array spot is already filled. Really lost how to resolve that. New code is here:

#include <ctype.h>
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>

#define LENGTH 45

const unsigned int N = 4000;

typedef struct node
{
    char word[LENGTH   1];
    int hash;
    struct node *next;
}
node;

bool load(const char *dictionary);
unsigned int hash(const char *word);

node table[N];

int main(void)
{
    char *dict = "dictionaries/medium";
    bool loaded = load(dict);
    if (!loaded)
    {
        printf("could not load dictionary\n");
        return 1;
    }
    printf("worked\n");
    return 0;
}

bool load(const char *dictionary)
{
    FILE *input = fopen(dictionary, "r");
    if (input == NULL)
    {
        return false;
    }

    node *list = NULL;
    char word[LENGTH   1];
    int wordcount = 0;

    while(fscanf(input, "%s", word) != EOF)
    {
        node *n = malloc(sizeof(node));
        if (n == NULL)
        {
            return false;
        }
        strcpy(n->word, word);
        n->hash = hash(word);
        if (table[n->hash].hash != n->hash)
        {
            n->next = list;
            table[n->hash] = *n;
        }
        else
        {
            n->next = &table[n->hash];
            table[n->hash] = *n;
        }
        free(n);
        wordcount  ;
    }

    // test function to print
    printf("%s\n%s\n%s\n", table[196].word, table[196].next->word, table[196].next->next->word);
    return true;
    fclose(input);
}

unsigned int hash(const char *word)
{
    int hash = 0;

    for (int i = 0; i < strlen(word); i  )
    {
        if (word[i] != '\'')
        {
            hash  = ((toupper(word[i]) - 64));
        }
        if (i % 2 == 0)
        {
            hash = (hash * 3);
        }
    }

    hash = hash % N;
    return hash;
}

CodePudding user response:

This part of the code is wrong:

    while (list->next != NULL)
    {
        if (table[list->hash].hash != list->hash)
        {
            node *tmp = list->next;
            table[list->hash] = *list;
            table[list->hash].next = NULL;
            list = tmp;
        }
        else
        {
            node *tmp = list->next;
            list->next = &table[list->hash];
            table[list->hash] = *list;
            list = tmp;
        }
    }

In particular, the else block is wrong, and the loop ignores the final element on the list. It should be something like this:

    while (list != NULL)
    {
        node *next = list->next;
        node copy = table[list->hash];
        list->next = copy.next;
        table[list->hash] = *list;
        *list = copy;
        list = next;
    }

Another problem is that the input file is never closed because these lines are in the wrong order:

    return true;
    fclose(input);

The fclose call is never reached.

The way that the length of the array table is specified is not defined by C:

const unsigned int N = 4000;
/*...*/
node table[N];

The length needs to be an integer constant expression. That could be fixed by replacing const unsigned int N = 4000; with a macro definition: #define N 4000.


The code design can by simplified by changing node table[N]; to node *table[N];, i.e. by turning it into an array of pointers. Also, there is no need to store the hash in the node. The hash is just an index into the hashtable:

#include <ctype.h>
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>

#define LENGTH 45

#define N 4000

typedef struct node
{
    char word[LENGTH   1];
    struct node *next;
}
node;

bool load(const char *dictionary);
unsigned int hash(const char *word);

node *table[N];

int main(void)
{
    char *dict = "dictionaries/medium";
    bool loaded = load(dict);
    if (!loaded)
    {
        printf("could not load dictionary\n");
        return 1;
    }
    printf("worked\n");
    return 0;
}

bool load(const char *dictionary)
{
    FILE *input = fopen(dictionary, "r");
    if (input == NULL)
    {
        return false;
    }

    char word[LENGTH   1];
    int wordcount = 0;

    while(fscanf(input, "%s", word) != EOF)
    {
        int wordhash = hash(word);
        node *n = malloc(sizeof(node));
        if (n == NULL)
        {
            return false;
        }
        strcpy(n->word, word);
        n->next = table[wordhash];
        table[wordhash] = n;
        wordcount  ;
    }
    fclose(input);
    return true;
}

unsigned int hash(const char *word)
{
    int hash = 0;

    for (int i = 0; i < strlen(word); i  )
    {
        if (word[i] != '\'')
        {
            hash  = ((toupper(word[i]) - 64));
        }
        if (i % 2 == 0)
        {
            hash = (hash * 3);
        }
    }

    hash = hash % N;
    return hash;
}
  • Related