Home > Enterprise >  What is wrong with my trie suggestion algorithm?
What is wrong with my trie suggestion algorithm?

Time:08-01

I am coding an algorithm that uses the trie data structure. Basically, if input is "he", it will return ["hello","help","held","hen",other_words_starting_with_he]. code:

//NOTE: The expression sizeof(array)/sizeof(node) must always evaluate to 26. Not more; not less, for all the node arrays.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *chars="abcdefghijklmnopqrstuvwxyz";
struct node{
    char ch;
    struct node *next[26];
};
void init_w_null(struct node **n, int len){
    register int counter=0;
    while(counter<len){
       *(n counter)=NULL;
       counter  ;
    }
}
int index_of_char(char ch){
    register int counter=0;
    while(*(chars counter)!='\0'){
        if(*(chars counter)==ch){
            return counter;
        }
        counter  ;
    }
    return -1;
}
void insert(struct node **root, char *key){
    if(*root==NULL){
        *root=(struct node*)malloc(sizeof(struct node));
        if(*root==NULL){
            perror("[malloc]");
            exit(EXIT_FAILURE);
        }
        init_w_null((**root).next,26);
        struct node *selected=*root;
        (**root).ch=key[0];
        register int counter=1;
        while(counter<strlen(key)){
            int ind=index_of_char(key[counter]);
            (*selected).next[ind]=(struct node *)malloc(sizeof(struct node));
            if(selected==NULL){
                perror("[malloc]");
                exit(EXIT_FAILURE);
            }
            (*(*selected).next[ind]).ch=key[counter];
            selected=(*selected).next[ind];
            counter  ;
        }
        return;
    }
    register int counter=1;
    struct node *selected=*root;
    while(counter<=strlen(key)){
        int ind=index_of_char(key[counter]);
        if((*selected).next[ind]!=NULL){
            selected=(*selected).next[ind];
            counter  ;
            continue;
        }
        (*selected).next[ind]=(struct node*)malloc(sizeof(struct node));
        if((*selected).next[ind]==NULL){
            perror("[malloc]");
            exit(EXIT_FAILURE);
        }
        (*(*selected).next[ind]).ch=key[counter];
        selected=(*selected).next[ind];
        init_w_null((*selected).next,26);
        counter  ;
    }
}
void find(struct node *root, char *key){
    register int counter=1;
    struct node *selected=root;
    int ind=0;
    while(counter<=strlen(key)){
        //if key param ends, and tree doesn't
        if(key[counter]=='\0'){
            printf("List of possible keys:\n");
            construct_str(selected,key);
            return;
        }
        ind=index_of_char(key[counter]);
        //a character of key not found.
        if((*selected).next[ind]==NULL){
            puts("Similar keys not found.");
            return;
        }
        selected=(*selected).next[ind];
        counter  ;
    }
    puts("Key found.");
}
void construct_str(struct node *n, char *str){
    puts("[construct_str]");
    //end of recursion
    if(all_children_null(n)&&n!=NULL){
        printf("%s\n",str);
        return;
    }
    register int counter=0;
    while(counter<26){
        if((*n).next[counter]!=NULL){
            char nstr[2];
            nstr[0]=(*(*n).next[counter]).ch;
            nstr[1]='\0';
            str=strcat(str,nstr);
            construct_str((*n).next[counter],str);
        }
        counter  ;
    }
}
int all_children_null(struct node *n){
    register int counter=0;
    while(counter<26){
        if((*n).next[counter]!=NULL){
            return 0;
        }
        counter  ;
    }
    return 1;
}
void insert_full(struct node **arr, char *key){
    int first=index_of_char(key[0]);
    insert(&arr[first],key);
}
//a debugging function to see whether insertion is successful.
/*void raw_print(struct node *n){
    //puts("[raw_print]");
    if(n!=NULL){
        putchar((*n).ch);
        register int counter=0;
        for(;counter<26;counter  ){
            raw_print((*n).next[counter]);
        }
        if(all_children_null(n)){
            printf("\nAll children of %c are NULL.\n",(*n).ch);
        }
    }
}*/
int main(){
    struct node *nds[26];
    init_w_null(nds,26);
    insert_full(nds,"hello");
    insert_full(nds,"help");

    insert_full(nds,"bruh");
    insert_full(nds,"lmao");
    find(nds[index_of_char('l')],"lm");
    return 0;
}

output:

List of possible keys:
[construct_str]
Segmentation fault (core dumped)

I've narrowed the problem down to construct_str. Please tell me if I'm wrong, though. FOR PEOPLE WHO DON'T KNOW WHAT A TRIE IS:

It's a structure which can store strings with the same prefix, so if we add "hello" and "help", the fourth character in both the strings would be siblings. the node of a trie contains a character and a node array with 26 members.

CodePudding user response:

I see that my crystal ball is well attuned today.

Starting with ...

    find(nds[index_of_char('l')],"lm");

... you are passing a string literal as the second argument to find(). In that function, a pointer to its first character is associated with parameter key.

Within find(), you are forwarding that pointer to construct_str():

            construct_str(selected,key);

, wherein it is associated with parameter str.

In construct_str(), you are passing that pointer as the first argument to strcat():

            str=strcat(str,nstr);

strcat appends the contents of the second string to the array containing the first. It does not create a new array for the concatenated result. Therefore, the left pointer must point (in)to an array that

  1. is modifiable, and
  2. contains enough space after the end of the string to accommodate the extra contents.

String literals do not satisfy either criterion. Oops. Undefined behavior results.

CodePudding user response:

In the line

str=strcat(str,nstr);

str is a pointer to a string literal. You are not allowed to modify them. Attempting to modify a string literal using the function strcat will invoke undefined behavior.

When calling strcat, you must ensure that the first argument is pointing to a memory buffer which

  • you are allowed to write to, and
  • has sufficient space for adding the character(s) to the string.
  • Related