I write a parser in C to parse a file that describe a graph, with vertex and edges, the graph file looks like this :
c FILE: graph_test
c
c SOURCE: generator
c
p edge 10 12
e 1 2
e 2 3
e 6 2
the "c" lines correspond to comments (must be ignored), the "p" line describe the number of nodes and the number of Edges, and the nbEdges "e" following lines describe the edges between the two nodes.
I use strtok() to split the string to get only the values that interest me, but when I try to store the values into a array int edges[nb_adges][2]
, the array is filled with wrong valued
Here's the entire code ("aretes" = edges, and "sommets" : nodes)
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define TAILLE_MAX 1000
int main(int argc, char * argv[]) {
if(argc != 2) {
printf("\n[UTILISATION] : ./parseur chemin_fichier\n");
exit(1);
}
char* nom_fichier = argv[1];
FILE* fichier;
//Ouverture en lecture
fichier = fopen(nom_fichier,"r");
if(fichier == NULL) {
printf("\nImpossible d'ouvrir le fichier '%s'\n", nom_fichier);
exit(2);
}
char ligne[TAILLE_MAX];
int nb_sommets = 0;
int nb_aretes = 0;
int aretes[nb_aretes][2];
//On lit chaque ligne du fichier
int i;
while(fgets(ligne, TAILLE_MAX, fichier) != NULL) {
//Si la ligne commence par "c" c'est un commentaire on l'ignore
if(ligne[0] == 'c') {
continue;
}
//Si la ligne commence par "p" c'est la ligne qui contient le nombre de sommets et de aretes
if(ligne[0] == 'p') {
i = 0;
char* tmp = strtok(ligne, " ");
while(tmp != NULL) {
if(i == 2) {
nb_sommets = atoi(tmp);
}
if(i == 3) {
nb_aretes = atoi(tmp);
}
tmp = strtok(NULL, " ");
i ;
}
i = 0;
continue;
}
//Si la ligne commence par "e" on récupère les aretes
if(ligne[0] == 'e') {
char* tmp = strtok(ligne, " ");
int j = 0;
while(tmp != NULL) {
if(j == 1) {
aretes[i][0] = atoi(tmp);
}
if(j == 2) {
aretes[i][1] = atoi(tmp);
}
printf("tmp : %s\n",tmp);
tmp = strtok(NULL, " ");
j ;
}
i ;
continue;
}
}
printf("\nNombre de sommets : %d\n", nb_sommets);
printf("Nombre d'aretes : %d\n", nb_aretes);
printf("Liste des aretes :\n");
//On affiche toutes les aretes
for(int i = 0; i < nb_aretes; i ) {
printf("%d -> %d\n", aretes[i][0], aretes[i][1]);
}
//On ferme le fichier
fclose(fichier);
return 0;
}
I tried to print the value of tmp in the 'e' loop, it display the correct value of nodes, but the array int edges[nb_edges][2]
is filled with stranges values
> Execution
List of edges :
1 -> 2
2 -> 3
6 -> 2
8 -> 3
3473509 -> 52
8 -> 7
9 -> 4
5 -> 6
7 -> 9
10 -> 1
1 -> 7
5 -> 4
CodePudding user response:
In addition to my top comments ...
This:
int nb_aretes = 0;
int aretes[nb_aretes][2];
This is [effectively]:
int aretes[0][2];
That's because the size is fixed when the declaration occurs. It does not increase if nb_aretes
is increased subsequently.
We need a fixed size array (e.g.):
#define ARETES_MAX 10000
int aretes[ARETES_MAX][2];
The data file does not match up. The p
line says 15 edges but there are only 3 e
lines.
Although, I studied French [for four years, 40 years ago], I ran your code through google translate. So, some of this may be from that.
I changed the code to ignore the p
line for nb_edges
and to just count the e
lines:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define MAX_LINE 1000 // max line size
#define EDGES_MAX 10000 // max number of edges/nodes
int
main(int argc, char *argv[])
{
if (argc != 2) {
printf("\n[USAGE]: ./parser file_path\n");
exit(1);
}
char *file_name = argv[1];
FILE *file;
// Open for reading
file = fopen(file_name, "r");
if (file == NULL) {
printf("\nCould not open file '%s'\n", file_name);
exit(2);
}
char line[MAX_LINE];
int nb_vertices = 0;
// NOTE/BUG: this produces [effectively]
int nb_edges = 0;
#if 0
int edges[nb_edges][2];
#else
int counted_edges = 0;
int edges[EDGES_MAX][2];
#endif
// We read each line of the file
int i;
while (fgets(line, MAX_LINE, file) != NULL) {
// If the line begins with "c" it is a comment, we ignore it
if (line[0] == 'c') {
continue;
}
// If the line begins with "p" this is the line that contains the
// number of vertices and edges
if (line[0] == 'p') {
i = 0;
char *tmp = strtok(line, " ");
while (tmp != NULL) {
if (i == 2) {
nb_vertices = atoi(tmp);
}
if (i == 3) {
// NOTE/BUG: in the data file, this does _not_ match the number of "e" lines
nb_edges = atoi(tmp);
// NOTE/BUG: this is flagged by the compiler with -Wall
// NOTE/BUG: using [2] is UB (undefined behavior) because the max index can be
// is 1
#if 0
edges[nb_edges][2];
#endif
}
tmp = strtok(NULL, " ");
i ;
}
i = 0;
continue;
}
// If the line begins with "e" we retrieve the edges
if (line[0] == 'e') {
char *tmp = strtok(line, " ");
int j = 0;
while (tmp != NULL) {
if (j == 1) {
#if 0
edges[i][0] = atoi(tmp);
#else
edges[counted_edges][0] = atoi(tmp);
#endif
}
if (j == 2) {
#if 0
edges[i][1] = atoi(tmp);
#else
edges[counted_edges][1] = atoi(tmp);
#endif
}
printf("tmp: %s\n", tmp);
tmp = strtok(NULL, " ");
j ;
}
#if 0
i ;
#else
counted_edges;
#endif
continue;
}
}
printf("\nNumber of vertices: %d\n", nb_vertices);
printf("Number of edges: %d (expected)\n", nb_edges);
printf("Number of edges: %d (calculated)\n", counted_edges);
printf("List of edges:\n");
// We display all the edges
#if 0
for (int i = 0; i < nb_edges; i ) {
#else
for (int i = 0; i < counted_edges; i ) {
#endif
printf("%d -> %d\n", edges[i][0], edges[i][1]);
}
// We close the file
fclose(file);
return 0;
}
In the above code, I've used cpp
conditionals to denote old vs. new code:
#if 0
// old code
#else
// new code
#endif
#if 1
// new code
#endif
Note: this can be cleaned up by running the file through unifdef -k
I used your sample input:
c FILE: graph_test
c
c SOURCE: generator
c
p edge 10 12
e 1 2
e 2 3
e 6 2
Here is the program output:
tmp: e
tmp: 1
tmp: 2
tmp: e
tmp: 2
tmp: 3
tmp: e
tmp: 6
tmp: 2
Number of vertices: 10
Number of edges: 12 (expected)
Number of edges: 3 (calculated)
List of edges:
1 -> 2
2 -> 3
6 -> 2
I think the code can be simplified. There is too much replication of strtok
calls under each if
statement.
Better to have a loop that splits the line before deciding on the command to process.
And, a switch/case
seems better than the if
statements.
Here's how I would clean things up:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define MAX_LINE 1000 // max line size
#define EDGES_MAX 10000 // max number of edges/nodes
#if 1
#define MAX_TOKENS 10 // max tokens per line
#endif
int
main(int argc, char *argv[])
{
if (argc != 2) {
printf("\n[USAGE]: ./parser file_path\n");
exit(1);
}
char *file_name = argv[1];
FILE *file;
// Open for reading
file = fopen(file_name, "r");
if (file == NULL) {
printf("\nCould not open file '%s'\n", file_name);
exit(2);
}
char line[MAX_LINE];
char saved_line[MAX_LINE];
int nb_vertices = 0;
int nb_edges = 0;
int counted_edges = 0;
int edges[EDGES_MAX][2];
// We read each line of the file
#if 1
int ntokens;
char *tokens[MAX_TOKENS 1];
#endif
while (fgets(line, MAX_LINE, file) != NULL) {
// If the line begins with "c" [or "#"] it is a comment, we ignore it
if (line[0] == 'c')
continue;
#if 1
if (line[0] == '#')
continue;
#endif
// split line into tokens
#if 1
// copy in case of error
strcpy(saved_line,line);
char *tok = strtok(line," \n");
for (ntokens = 0; ntokens < MAX_TOKENS; ntokens) {
if (tok == NULL)
break;
tokens[ntokens] = tok;
tok = strtok(NULL," \n");
}
tokens[ntokens] = NULL;
// ignore blank lines
if (ntokens == 0)
continue;
#endif
switch (line[0]) {
case 'p':
// If the line begins with "p" this is the line that contains the
// number of vertices and edges
if (ntokens != 4) {
printf("bad p line -- ntokens=%d %s",ntokens,saved_line);
exit(1);
}
nb_vertices = atoi(tokens[2]);
nb_edges = atoi(tokens[3]);
break;
case 'e':
// If the line begins with "e" we retrieve the edges
if (ntokens != 3) {
printf("bad e line -- ntokens=%d %s",ntokens,saved_line);
exit(1);
}
edges[counted_edges][0] = atoi(tokens[1]);
edges[counted_edges][1] = atoi(tokens[2]);
counted_edges;
break;
default:
printf("unknown command -- %s",saved_line);
exit(1);
break;
}
}
printf("\nNumber of vertices: %d\n", nb_vertices);
printf("Number of edges: %d (expected)\n", nb_edges);
printf("Number of edges: %d (calculated)\n", counted_edges);
printf("List of edges:\n");
// We display all the edges
for (int i = 0; i < counted_edges; i )
printf("%d -> %d\n", edges[i][0], edges[i][1]);
// We close the file
fclose(file);
return 0;
}
I did not do the following in the code above.
But, if the intention was that there is only one p
line at the top of the file, we could use the:
int aretes[nb_aretes][2];
But, we'd have to parse the p
line above that definition in a separate block. Then, nb_aretes
would be set correctly [dynamically] before we see the definition.