Home > database >  how can i parse this file in c
how can i parse this file in c

Time:11-04

how can split the word from its meaning 1. mammoth: large

My code:

void ReadFromFile(){
FILE *dictionary = fopen("dictionary.txt", "r");
char word[20];
char meaning[50];
while(fscanf(dictionary, "%[^:]:%[^\t]\t", word, meaning) == 2){
    printf("%s %s\n", word, meaning);
}
fclose(dictionary);

CodePudding user response:

Assuming the word and the meaning do not contain digits and dots, my approach is the following:

  • First, split the input line on the digits and dots into the tokens which have the form as word: meaning.
  • Next separate each token on the colon character.
  • As a finish up, remove the leading and trailing blank characters.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define INFILE "dictionary.txt"

void split(char *str);
void separate(char *str);
char *trim(char *str);

/*
 * split line on serial number into "word" and "meaning" pairs
 * WARNING: the array of "str" is modified
 */
void
split(char *str)
{
    char *tk;                           // pointer to each token
    char delim[] = "0123456789.";       // characters used in the serial number

    tk = strtok(str, delim);            // get the first token
    while (tk != NULL) {
        separate(tk);                   // separate each token
        tk = strtok(NULL, delim);       // get the next token
    }
}

/*
 * separate the pair into "word" and "meaning" and print them
 */
void
separate(char *str)
{
    char *p;
    if (NULL == (p = index(str, ':'))) {
                                        // search a colon character in "str"
        fprintf(stderr, "Illegal format: %s\n", str);
        exit(1);
    }
    *p   = '\0';                        // terminate the "word" string
                                        // now "p" points to the start of "meaning"
    printf("%s %s\n", trim(str), trim(p));
}

/*
 * remove leading and trailing whitespaces
 * WARNING: the array of "str" is modified
 */
char *
trim(char *str)
{
    char *p;
    for (p = str; *p != '\0'; p  );     // jump to the end of "str"
    for (; p > str && (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n' || *p == '\0'); p--);
                                        // rewind the pointer skipping blanks
    *  p = '\0';                        // chop the trailing blanks off
    for (p = str; *p != '\0' && (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n'); p  );
                                        // skip leading blanks
    return p;
}

int
main()
{
    FILE *fp;
    char str[BUFSIZ];

    if (NULL == (fp = fopen(INFILE, "r"))) {
        perror(INFILE);
        exit(1);
    }

    while (NULL != fgets(str, BUFSIZ, fp)) {
        split(trim(str));
    }

    fclose(fp);
    return 0;
}

Output:

foe enemy
vast huge
purchase buy
drowsy sleepy
absent missing
prank trick
[snip]

[Alternative]
I suppose C may not be a suitable language for this kind of string manipulations. High-level languages such as python, perl or ruby will solve it with much fewer codes. Here is an example with python which will produce the same results:

import re

with open("dictionary.txt") as f:
    s = f.read()

for m in re.finditer(r'\d \.\s*(. ?):\s*(\S )', s):
    print(m.group(1)   " "   m.group(2))
  • Related