Home > Mobile >  Parser in C doesn't store values in a array
Parser in C doesn't store values in a array

Time:11-12

I write a parser in C to parse a file that describe a graph, with vertex and edges, the graph file looks like this :

c FILE: graph_test
c
c SOURCE: generator
c 
p edge 10 12
e 1 2
e 2 3
e 6 2

the "c" lines correspond to comments (must be ignored), the "p" line describe the number of nodes and the number of Edges, and the nbEdges "e" following lines describe the edges between the two nodes.

I use strtok() to split the string to get only the values that interest me, but when I try to store the values into a array int edges[nb_adges][2], the array is filled with wrong valued

Here's the entire code ("aretes" = edges, and "sommets" : nodes)

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#define TAILLE_MAX 1000

int main(int argc, char * argv[]) {
    if(argc != 2) {
        printf("\n[UTILISATION] : ./parseur chemin_fichier\n");
        exit(1);
    }

    char* nom_fichier = argv[1];
    FILE* fichier;

    //Ouverture en lecture
    fichier = fopen(nom_fichier,"r");
    if(fichier == NULL) {
        printf("\nImpossible d'ouvrir le fichier '%s'\n", nom_fichier);
        exit(2);
    }

    char ligne[TAILLE_MAX];
    int nb_sommets = 0;
    int nb_aretes = 0;
    int aretes[nb_aretes][2];

    //On lit chaque ligne du fichier
    int i;
    while(fgets(ligne, TAILLE_MAX, fichier) != NULL) {
        //Si la ligne commence par "c" c'est un commentaire on l'ignore
        if(ligne[0] == 'c') {
            continue;
        }
        //Si la ligne commence par "p" c'est la ligne qui contient le nombre de sommets et de aretes
        if(ligne[0] == 'p') {
            i = 0;
            char* tmp = strtok(ligne, " ");
            while(tmp != NULL) {
                if(i == 2) {
                    nb_sommets = atoi(tmp);
                }
                if(i == 3) {
                    nb_aretes = atoi(tmp);
                }
                tmp = strtok(NULL, " ");
                i  ;
            }
            i = 0;
            continue;
        }

        //Si la ligne commence par "e" on récupère les aretes
        if(ligne[0] == 'e') {
            char* tmp = strtok(ligne, " ");
            int j = 0;
            while(tmp != NULL) {
                if(j == 1) {
                    aretes[i][0] = atoi(tmp);
                }
                if(j == 2) {
                    aretes[i][1] = atoi(tmp);
                }
                printf("tmp : %s\n",tmp);
                tmp = strtok(NULL, " ");
                j  ;
            }
            i  ;
            continue;
        }
    }

    printf("\nNombre de sommets : %d\n", nb_sommets);
    printf("Nombre d'aretes : %d\n", nb_aretes);
    printf("Liste des aretes :\n");

    //On affiche toutes les aretes
    for(int i = 0; i < nb_aretes; i  ) {
        printf("%d -> %d\n", aretes[i][0], aretes[i][1]);
    }

    //On ferme le fichier
    fclose(fichier);

    return 0;
}

I tried to print the value of tmp in the 'e' loop, it display the correct value of nodes, but the array int edges[nb_edges][2] is filled with stranges values

> Execution 
List of edges :
1 -> 2
2 -> 3
6 -> 2
8 -> 3
3473509 -> 52
8 -> 7
9 -> 4
5 -> 6
7 -> 9
10 -> 1
1 -> 7
5 -> 4

CodePudding user response:

In addition to my top comments ...

This:

int nb_aretes = 0;
int aretes[nb_aretes][2];

This is [effectively]:

int aretes[0][2];

That's because the size is fixed when the declaration occurs. It does not increase if nb_aretes is increased subsequently.

We need a fixed size array (e.g.):

#define ARETES_MAX 10000
int aretes[ARETES_MAX][2];

The data file does not match up. The p line says 15 edges but there are only 3 e lines.


Although, I studied French [for four years, 40 years ago], I ran your code through google translate. So, some of this may be from that.

I changed the code to ignore the p line for nb_edges and to just count the e lines:

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#define MAX_LINE 1000                   // max line size
#define EDGES_MAX 10000                 // max number of edges/nodes

int
main(int argc, char *argv[])
{
    if (argc != 2) {
        printf("\n[USAGE]: ./parser file_path\n");
        exit(1);
    }

    char *file_name = argv[1];
    FILE *file;

    // Open for reading
    file = fopen(file_name, "r");
    if (file == NULL) {
        printf("\nCould not open file '%s'\n", file_name);
        exit(2);
    }

    char line[MAX_LINE];
    int nb_vertices = 0;

// NOTE/BUG: this produces [effectively]
    int nb_edges = 0;
#if 0
    int edges[nb_edges][2];
#else
    int counted_edges = 0;
    int edges[EDGES_MAX][2];
#endif

    // We read each line of the file
    int i;

    while (fgets(line, MAX_LINE, file) != NULL) {
        // If the line begins with "c" it is a comment, we ignore it
        if (line[0] == 'c') {
            continue;
        }

        // If the line begins with "p" this is the line that contains the
        // number of vertices and edges
        if (line[0] == 'p') {
            i = 0;
            char *tmp = strtok(line, " ");

            while (tmp != NULL) {
                if (i == 2) {
                    nb_vertices = atoi(tmp);
                }
                if (i == 3) {
// NOTE/BUG: in the data file, this does _not_ match the number of "e" lines
                    nb_edges = atoi(tmp);
// NOTE/BUG: this is flagged by the compiler with -Wall
// NOTE/BUG: using [2] is UB (undefined behavior) because the max index can be
// is 1
#if 0
                    edges[nb_edges][2];
#endif
                }
                tmp = strtok(NULL, " ");
                i  ;
            }
            i = 0;
            continue;
        }

        // If the line begins with "e" we retrieve the edges
        if (line[0] == 'e') {
            char *tmp = strtok(line, " ");
            int j = 0;

            while (tmp != NULL) {
                if (j == 1) {
#if 0
                    edges[i][0] = atoi(tmp);
#else
                    edges[counted_edges][0] = atoi(tmp);
#endif
                }
                if (j == 2) {
#if 0
                    edges[i][1] = atoi(tmp);
#else
                    edges[counted_edges][1] = atoi(tmp);
#endif
                }
                printf("tmp: %s\n", tmp);
                tmp = strtok(NULL, " ");
                j  ;
            }
#if 0
            i  ;
#else
              counted_edges;
#endif
            continue;
        }
    }

    printf("\nNumber of vertices: %d\n", nb_vertices);
    printf("Number of edges: %d (expected)\n", nb_edges);
    printf("Number of edges: %d (calculated)\n", counted_edges);
    printf("List of edges:\n");

    // We display all the edges
#if 0
    for (int i = 0; i < nb_edges; i  ) {
#else
    for (int i = 0; i < counted_edges; i  ) {
#endif
        printf("%d -> %d\n", edges[i][0], edges[i][1]);
    }

    // We close the file
    fclose(file);

    return 0;
}

In the above code, I've used cpp conditionals to denote old vs. new code:

#if 0
// old code
#else
// new code
#endif

#if 1
// new code
#endif

Note: this can be cleaned up by running the file through unifdef -k


I used your sample input:

c FILE: graph_test
c
c SOURCE: generator
c
p edge 10 12
e 1 2
e 2 3
e 6 2

Here is the program output:

tmp: e
tmp: 1
tmp: 2

tmp: e
tmp: 2
tmp: 3

tmp: e
tmp: 6
tmp: 2

Number of vertices: 10
Number of edges: 12 (expected)
Number of edges: 3 (calculated)
List of edges:
1 -> 2
2 -> 3
6 -> 2

I think the code can be simplified. There is too much replication of strtok calls under each if statement.

Better to have a loop that splits the line before deciding on the command to process.

And, a switch/case seems better than the if statements.

Here's how I would clean things up:

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#define MAX_LINE 1000                   // max line size
#define EDGES_MAX 10000                 // max number of edges/nodes
#if 1
#define MAX_TOKENS  10                  // max tokens per line
#endif

int
main(int argc, char *argv[])
{
    if (argc != 2) {
        printf("\n[USAGE]: ./parser file_path\n");
        exit(1);
    }

    char *file_name = argv[1];
    FILE *file;

    // Open for reading
    file = fopen(file_name, "r");
    if (file == NULL) {
        printf("\nCould not open file '%s'\n", file_name);
        exit(2);
    }

    char line[MAX_LINE];
    char saved_line[MAX_LINE];

    int nb_vertices = 0;
    int nb_edges = 0;
    int counted_edges = 0;
    int edges[EDGES_MAX][2];

    // We read each line of the file

#if 1
    int ntokens;
    char *tokens[MAX_TOKENS   1];
#endif

    while (fgets(line, MAX_LINE, file) != NULL) {
        // If the line begins with "c" [or "#"] it is a comment, we ignore it
        if (line[0] == 'c')
            continue;
#if 1
        if (line[0] == '#')
            continue;
#endif

        // split line into tokens
#if 1
        // copy in case of error
        strcpy(saved_line,line);

        char *tok = strtok(line," \n");
        for (ntokens = 0;  ntokens < MAX_TOKENS;    ntokens) {
            if (tok == NULL)
                break;
            tokens[ntokens] = tok;
            tok = strtok(NULL," \n");
        }
        tokens[ntokens] = NULL;

        // ignore blank lines
        if (ntokens == 0)
            continue;
#endif

        switch (line[0]) {
        case 'p':
            // If the line begins with "p" this is the line that contains the
            // number of vertices and edges
            if (ntokens != 4) {
                printf("bad p line -- ntokens=%d %s",ntokens,saved_line);
                exit(1);
            }

            nb_vertices = atoi(tokens[2]);
            nb_edges = atoi(tokens[3]);
            break;

        case 'e':
            // If the line begins with "e" we retrieve the edges
            if (ntokens != 3) {
                printf("bad e line -- ntokens=%d %s",ntokens,saved_line);
                exit(1);
            }

            edges[counted_edges][0] = atoi(tokens[1]);
            edges[counted_edges][1] = atoi(tokens[2]);
              counted_edges;
            break;

        default:
            printf("unknown command -- %s",saved_line);
            exit(1);
            break;
        }
    }

    printf("\nNumber of vertices: %d\n", nb_vertices);
    printf("Number of edges: %d (expected)\n", nb_edges);
    printf("Number of edges: %d (calculated)\n", counted_edges);
    printf("List of edges:\n");

    // We display all the edges
    for (int i = 0; i < counted_edges; i  )
        printf("%d -> %d\n", edges[i][0], edges[i][1]);

    // We close the file
    fclose(file);

    return 0;
}

I did not do the following in the code above.

But, if the intention was that there is only one p line at the top of the file, we could use the:

int aretes[nb_aretes][2];

But, we'd have to parse the p line above that definition in a separate block. Then, nb_aretes would be set correctly [dynamically] before we see the definition.

  • Related