Goal is to read a web page, store all words in a trie with each node containing one letter and a count of the number of characters, print the words and number of occurrences. I keep getting a segmentation fault and I think the issue is in one of these functions. Thanks!
struct trieNode *indexPage(const char *url) {
if (url == NULL) {
return NULL;
printf("Web link must be provided.");
}
//get text from page and check return value
char *page = NULL;
int bytesRead = getText(url, page, MAX_BUFFER_SIZE);
if (page == NULL) {
printf("Page could not be indexed.");
return NULL;
}
//index buffer into separate words
int i = 0;
char *word = NULL;
struct trieNode *node = malloc(sizeof(struct trieNode));
if (node == NULL) {
printf("Node memory could not be allocated.");
return NULL;
}
while (i < bytesRead) {
while (isalpha(page[i])) {
word[i] = page[i];
}
addWordOccurrence(word, sizeof(word), i);
i ;
}
return node;
}
//Create space for node in heap and add to trie structure
int addWordOccurrence(const char* word, const int wordLength, int index) {
if (word == NULL)
return -1;
//allocate memory for new node
struct trieNode *node = malloc(sizeof(struct trieNode));
if (node == NULL) {
printf("Node memory could not be allocated.");
return -2;
}
//recursively add characters to trie and
//increase count
if (index < wordLength) {
setNodeData(node->child[index], word[index]);
node->count ;
}
addWordOccurrence(word, wordLength, index 1);
return 0;
}
Using gdb I found the fault may be coming from the print function, possibly when trying to access pointers.
//Prints contents
void printTrieContents(struct trieNode *root) {
//if child is found with a non zero count
//add child character to string
char *word = NULL;
for (int i = 0; i < SIZE; i ) {
if ((root->count) && (root->child[i])) {
word[i] = i 'a';
printTrieContents(root->child[i]);
}
}
if (root->child == NULL) {
printf("%s: %d", word, root->count);
}
}
CodePudding user response:
There are multiple issues:
- in
indexPage
,while (isalpha(page[i])) { word[i] = page[i]; }
is potentially an infinite loop. - in
printTrieContents
,word[i] = i 'a'
dereferences a null pointer asword
is never allocated. addWordOccurrence
always recurses, even after reaching the last character. There is no need for recursion, use a loop and a proper test.- more algorithmic issues: the code needs a lot a work.
CodePudding user response:
superficially, it looks like addWordOccurrence(word, sizeof(word), i); should be addWordOccurrence(word, sizeof(word), 0); - the last parameter being the index of each letter that is handled in the recursion.