I'm working on creating a hash table (array) with linked list collision control for a dictionary of words. The dictionary of words is first loaded into a linked list named "list" using the data structure below. However I have issues in creating the collision control if I have multiple entries per hash code in the array: The linked list is created ad infinitum for multiple entries..
That's the structure I'm using for the entries.
typedef struct node
{
char word[LENGTH 1];
int hash;
struct node *next;
}
node;
And here's the part to fill the hash table:
while (list->next != NULL)
{
if (table[list->hash].hash != list->hash) // No entry in the array yet
{
node *tmp = list->next;
table[list->hash] = *list;
table[list->hash].next = NULL;
list = tmp;
}
else // If array is already filled
{
node *tmp = list->next;
list->next = &table[list->hash];
table[list->hash] = *list;
list = tmp;
}
}
Hope you can help, thanks!
See full code here:
#include <ctype.h>
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#define LENGTH 45
const unsigned int N = 4000;
typedef struct node
{
char word[LENGTH 1];
int hash;
struct node *next;
}
node;
bool load(const char *dictionary);
unsigned int hash(const char *word);
node table[N];
int main(void)
{
char *dict = "dictionaries/medium";
bool loaded = load(dict);
if (!loaded)
{
printf("could not load dictionary\n");
return 1;
}
printf("worked\n");
return 0;
}
bool load(const char *dictionary)
{
FILE *input = fopen(dictionary, "r");
if (input == NULL)
{
return false;
}
node *list = NULL;
char word[LENGTH 1];
int wordcount = 0;
while(fscanf(input, "%s", word) != EOF)
{
if (list == NULL)
{
node *n = malloc(sizeof(node));
if (n == NULL)
{
return false;
}
strcpy(n->word, word);
n->hash = hash(word);
n->next = NULL;
list = n;
wordcount ;
}
else
{
node *n = malloc(sizeof(node));
if (n == NULL)
{
return false;
}
strcpy(n->word, word);
n->hash = hash(word);
n->next = list;
list = n;
wordcount ;
}
}
while (list->next != NULL)
{
if (table[list->hash].hash != list->hash)
{
node *tmp = list->next;
table[list->hash] = *list;
table[list->hash].next = NULL;
list = tmp;
}
else
{
node *tmp = list->next;
list->next = &table[list->hash];
table[list->hash] = *list;
list = tmp;
}
}
// test function to print
printf("%s\n%s\n%s\n%s\n", table[196].word, table[196].next->word, table[196].next->next->word, table[196].next->next->next->word);
return true;
fclose(input);
}
unsigned int hash(const char *word)
{
int hash = 0;
for (int i = 0; i < strlen(word); i )
{
if (word[i] != '\'')
{
hash = ((toupper(word[i]) - 64));
}
if (i % 2 == 0)
{
hash = (hash * 3);
}
}
hash = hash % N;
return hash;
}
The dictionaries/medium file contains the following items currently:
maxim
maxim
Nina
maxim
Lukas
Nina
That's also why I am using array element 196 to print out which is the hash code for 'Nina'.
Edit: thanks to the replies so far I have removed redundancies and tried to fill the array as the list is created. However I still have an issue which I think is due to self-referencing when an array spot is already filled. Really lost how to resolve that. New code is here:
#include <ctype.h>
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#define LENGTH 45
const unsigned int N = 4000;
typedef struct node
{
char word[LENGTH 1];
int hash;
struct node *next;
}
node;
bool load(const char *dictionary);
unsigned int hash(const char *word);
node table[N];
int main(void)
{
char *dict = "dictionaries/medium";
bool loaded = load(dict);
if (!loaded)
{
printf("could not load dictionary\n");
return 1;
}
printf("worked\n");
return 0;
}
bool load(const char *dictionary)
{
FILE *input = fopen(dictionary, "r");
if (input == NULL)
{
return false;
}
node *list = NULL;
char word[LENGTH 1];
int wordcount = 0;
while(fscanf(input, "%s", word) != EOF)
{
node *n = malloc(sizeof(node));
if (n == NULL)
{
return false;
}
strcpy(n->word, word);
n->hash = hash(word);
if (table[n->hash].hash != n->hash)
{
n->next = list;
table[n->hash] = *n;
}
else
{
n->next = &table[n->hash];
table[n->hash] = *n;
}
free(n);
wordcount ;
}
// test function to print
printf("%s\n%s\n%s\n", table[196].word, table[196].next->word, table[196].next->next->word);
return true;
fclose(input);
}
unsigned int hash(const char *word)
{
int hash = 0;
for (int i = 0; i < strlen(word); i )
{
if (word[i] != '\'')
{
hash = ((toupper(word[i]) - 64));
}
if (i % 2 == 0)
{
hash = (hash * 3);
}
}
hash = hash % N;
return hash;
}
CodePudding user response:
This part of the code is wrong:
while (list->next != NULL)
{
if (table[list->hash].hash != list->hash)
{
node *tmp = list->next;
table[list->hash] = *list;
table[list->hash].next = NULL;
list = tmp;
}
else
{
node *tmp = list->next;
list->next = &table[list->hash];
table[list->hash] = *list;
list = tmp;
}
}
In particular, the else
block is wrong, and the loop ignores the final element on the list. It should be something like this:
while (list != NULL)
{
node *next = list->next;
node copy = table[list->hash];
list->next = copy.next;
table[list->hash] = *list;
*list = copy;
list = next;
}
Another problem is that the input file is never closed because these lines are in the wrong order:
return true;
fclose(input);
The fclose
call is never reached.
The way that the length of the array table
is specified is not defined by C:
const unsigned int N = 4000;
/*...*/
node table[N];
The length needs to be an integer constant expression. That could be fixed by replacing const unsigned int N = 4000;
with a macro definition: #define N 4000
.
The code design can by simplified by changing node table[N];
to node *table[N];
, i.e. by turning it into an array of pointers. Also, there is no need to store the hash in the node. The hash is just an index into the hashtable:
#include <ctype.h>
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#define LENGTH 45
#define N 4000
typedef struct node
{
char word[LENGTH 1];
struct node *next;
}
node;
bool load(const char *dictionary);
unsigned int hash(const char *word);
node *table[N];
int main(void)
{
char *dict = "dictionaries/medium";
bool loaded = load(dict);
if (!loaded)
{
printf("could not load dictionary\n");
return 1;
}
printf("worked\n");
return 0;
}
bool load(const char *dictionary)
{
FILE *input = fopen(dictionary, "r");
if (input == NULL)
{
return false;
}
char word[LENGTH 1];
int wordcount = 0;
while(fscanf(input, "%s", word) != EOF)
{
int wordhash = hash(word);
node *n = malloc(sizeof(node));
if (n == NULL)
{
return false;
}
strcpy(n->word, word);
n->next = table[wordhash];
table[wordhash] = n;
wordcount ;
}
fclose(input);
return true;
}
unsigned int hash(const char *word)
{
int hash = 0;
for (int i = 0; i < strlen(word); i )
{
if (word[i] != '\'')
{
hash = ((toupper(word[i]) - 64));
}
if (i % 2 == 0)
{
hash = (hash * 3);
}
}
hash = hash % N;
return hash;
}