Home > other >  How to read comma-separated csv file with `sscanf()`
How to read comma-separated csv file with `sscanf()`

Time:05-17

I'm attempting to print an array of structures read from a CSV file in Excel. However, only the students' IDs are printed; other information was also printed but some confusing rare characters. Can you please tell me what could be wrong with this code?

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct student {
    char ID[8];
    char name[32];
    int score;
} student;

int main(int argc, char *argv[]) {
    student student_list[100];
    FILE *source = fopen("students.csv", "r");
    if (source == NULL) {
        perror("Unable to open the source file.");
        exit(1);
    }

    char buffer[1024];
    fgets(buffer, sizeof(buffer), source);
    int num_student = 0;
    while (!feof(source)) {
        student *one_student = student_list   num_student;
        sscanf(buffer, "%8[^,] 2[^,] %3[^,]",
               &one_student->ID, &one_student->name, &one_student->score);
        fgets(buffer, sizeof(buffer), source);
        num_student  ;
    }
    for (int i = 0; i < num_student; i  ) {
         printf("ID: %s  name: %-9s score: %-3d\n",
                student_list[i].ID, student_list[i].name, student_list[i].score);
    }
    fclose(source);
    return 0;
}

This is a sample input file students.csv:

B213350,John Adam Smith,80
B191835,Mary Elizabeth Smith,71
B201304,Yamazaki Fuyumi,95
B201832,Liam,57
B201834,Alfonso Hernández,65

CodePudding user response:

There are multiple problems:

  • you should not use feof(). Read Why is “while ( !feof (file) )” always wrong?
    Use this loop instead:

      while (fgets(buffer, sizeof buffer, source)) {
          // handle the line
      }
    
  • the sscanf() format string is incorrect: the character counts are too large and the , are missing. It should be " %7[^,\n], 1[^,\n], %d" and you should check that the return value is 3, the number of successful conversions expected.

  • you should stop when the student array is full.

Here is a modified version:

#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct student {
    char ID[8];
    char name[32];
    int score;
} student;

int main(int argc, char *argv[]) {
    student student_list[100];
    FILE *source = fopen("students.csv", "r");
    if (source == NULL) {
        fprintf(stderr, "Cannot open file students.csv: %s\n", strerror(errno));
        return 1;
    }

    char buffer[1024];
    int num_student = 0;
    while (num_student < 100 && fgets(buffer, sizeof(buffer), source)) {
        student *one_student = &student_list[num_student];
        if (sscanf(buffer, " %7[^,\n], 1[^,\n], %d",
                   one_student->ID, one_student->name,
                   &one_student->score) == 3) {
            num_student  ;
        } else {
            printf("invalid CSV line: %s", buffer);
        }
    }
    for (int i = 0; i < num_student; i  ) {
         printf("ID: %-9s  name: %-32s score: %-3d\n",
                student_list[i].ID, student_list[i].name,
                student_list[i].score);
    }
    fclose(source);
    return 0;
}

Note that this approach to parsing CSV files cannot handle empty fields. Parsing the line with strtok() would not work either because consecutive commas would be handled as a single separator. You need a different approach using strcspn() or strchr().

  • Related