Home > Net >  How would I be able to read every word in a line with an unknown length?
How would I be able to read every word in a line with an unknown length?

Time:03-04

I was trying to use a linked list to hold every word from a textfile that had a paragraph of text in it. So each line has an unknown number of words on it, each separated by a space. I thought I could use strtok() and getline() to read through each word. However, the program only reads the first word on each line, so I thought I could use a loop to detect the end of each line of the file so that all the words would be read.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

struct node{
    char *word;
    struct node *next;
};

//refers to the struct for linked list 
typedef struct node link;

//a function to add word to front of linked list
link *addName(char[] word, link *head){
    link *temp1;

    temp1 = (link*)malloc(sizeof(link));
    //add char type word to linked list
    temp1->word = strdup(word);
    temp1->next = word;
    head = temp1;
    return head;
}

int main(){
    FILE *fO; 
    fO = fopen("paragraph.data", "r");
    int size = 0;
    int len = 0;

    //initialize it for the getline() and strtok()
    char *line = 0;

    
    //use malloc
    line = (char*)malloc(sizeof(int));

    //loop through the file
    while(getline(&line, &size, fO) != -1){
        char *word = strtok(line, " ");
        printf("the word: %s\n", word);

        //while(there is no "\n" detected?){}
        word = strtok(NULL, " ");
        printf("the word: %s\n", word);
        //addName()
    }

}

the file is like this(shortened for eg.):

lorem ipsum
dolor
sit amet con sec
euter orci

it could have any number of words which is what makes me confused. Does anyone know how to make the while loop detect the end of each line? Right now it just prints out the first word of every line.

the name: lorem
the name: dolor
the name: sit
the name: euter 

CodePudding user response:

A number of issues ...

  1. char[] word is not valid C and won't compile
  2. temp1->next = word; won't compile -- word is a char * pointer and not a pointer to a node
  3. You want: temp1->next = head; to link the new node into the linked list
  4. You're not looping on strtok, so, of course, you'll only get one [or two] tokens.
  5. In main, size must be a size_t and not an int -- the getline call won't even compile.
  6. getline does not strip the newline
  7. line must be freed at the end of the loop
  8. Your code does not call addName in main
  9. Don't cast the return of malloc: Do I cast the result of malloc?

In the code below, I use cpp conditionals to denote old vs. new code:

#if 0
// old code
#else
// new code
#endif

#if 1
// new code
#endif

Here is the refactored code. I've changed link into node to be more descriptive. It is annotated with bugs and fixes:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct node node;
struct node {
    char *word;
    node *next;
};

//a function to add word to front of linked list
// NOTE/BUG: char[] isn't valid C and won't compile
#if 0
node *
addName(char[] word, node *head)
#else
node *
addName(const char *word, node *head)
#endif
{
    node *temp1;

    temp1 = malloc(sizeof(*temp1));

    // add char type word to linked list
    temp1->word = strdup(word);
// NOTE/BUG: word [corrected] is a char* and can't be assigned to next
#if 0
    temp1->next = word;
#else
    temp1->next = head;
#endif
    head = temp1;

    return head;
}

int
main()
{
    FILE *fO;

    fO = fopen("paragraph.data", "r");
// NOTE: bug size must be size_t or the getline won't compile
#if 0
    int size = 0;
#else
    size_t size = 0;
#endif
    int len = 0;

    // initialize it for the getline() and strtok()
#if 0
    char *line = 0;
#else
    char *line = NULL;
#endif

    // use malloc
// NOTE/BUG: getline expects a null pointer if size is 0
#if 0
    line = (char *) malloc(sizeof(int));
#endif

    // loop through the file
#if 0
    while (getline(&line, &size, fO) != -1) {
        char *word = strtok(line, " ");

        printf("the word: %s\n", word);

        // while(there is no "\n" detected?){}
        word = strtok(NULL, " ");
        printf("the word: %s\n", word);
        // addName()
    }
#else
    node *head = NULL;

    while (getline(&line, &size, fO) != -1) {
        // strip newline
        line[strcspn(line,"\n")] = 0;

        char *word = strtok(line," ");

        while (word != NULL) {
            printf("DEBUG: %s\n", word);
            head = addName(word,head);
            word = strtok(NULL," ");
        }
    }

    // must be freed at end
    free(line);

    // print linked list
    for (node *cur = head;  cur != NULL;  cur = cur->next)
        printf("Final: %s\n",cur->word);
#endif

    return 0;
}

Here is the fully cleaned up code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct node node;
struct node {
    char *word;
    node *next;
};

//a function to add word to front of linked list
node *
addName(const char *word, node *head)
{
    node *temp1;

    temp1 = malloc(sizeof(*temp1));

    // add char type word to linked list
    temp1->word = strdup(word);
    temp1->next = head;
    head = temp1;

    return head;
}

int
main()
{
    FILE *fO;

    fO = fopen("paragraph.data", "r");
    size_t size = 0;
    int len = 0;

    // initialize it for the getline() and strtok()
    char *line = NULL;

    // loop through the file
    node *head = NULL;

    while (getline(&line, &size, fO) != -1) {
        // strip newline
        line[strcspn(line,"\n")] = 0;

        char *word = strtok(line," ");

        while (word != NULL) {
            printf("DEBUG: %s\n", word);
            head = addName(word,head);
            word = strtok(NULL," ");
        }
    }

    // must be freed at end
    free(line);

    // print linked list
    for (node *cur = head;  cur != NULL;  cur = cur->next)
        printf("Final: %s\n",cur->word);

    return 0;
}

For your sample input, here is the program output:

DEBUG: lorem
DEBUG: ipsum
DEBUG: dolor
DEBUG: sit
DEBUG: amet
DEBUG: con
DEBUG: sec
DEBUG: euter
DEBUG: orci
Final: orci
Final: euter
Final: sec
Final: con
Final: amet
Final: sit
Final: dolor
Final: ipsum
Final: lorem

CodePudding user response:

Your main issue is that line is allocated and size is zero. If you intend to let getline allocate space for the line, then size must be zero AND line must be null. Also, you should free the allocated memory at the end of the loop.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct node {
    char *word;
    struct node *next;
} link;

//a function to add word to front of linked list
link *addWord(char *word, link *head){
    link *temp1 = malloc(sizeof(link));

    //add string word to linked list
    temp1->word = strdup(word);
    temp1->next = head;
    return temp1;
}

int main(){
    FILE *fO; 
    fO = fopen("paragraph.data", "r");
    int size = 0;
    int len = 0;
    char *line;

    //loop through the file
    while(getline(&line, &size, fO) != EOF){
        char *word = strtok(line, " \n");

// addWord(word,head); printf("the word: %s\n", word);

        while(word) {
            word = strtok(NULL, " \n");
            printf("the word: %s\n", word);

// addWord(word,head); } free(line); } }

  • Related