I have written a code to read a csv file in c. The file contains data of games and i am supposed to read it and sort it according to the score and print the top 10 rated games. The code is as follows:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define tablesize 18626
typedef struct
{
char title[200];
char platform[20];
char Score[20];
char release_year[20];
} dict;
void printValues(dict *values)
{
for (int i = 0; i < 100; i )
{
printf("title->%s,platform->%s,Score->%s,release->%s\n", values[i].title, values[i].platform, values[i].Score, values[i].release_year);
}
}
void sort(dict *values)
{
for (int i = 0; i < tablesize; i )
{
for (int j = i 1; j < tablesize; j )
{
int a = *values[i].Score - '0';
int b = *values[j].Score - '0';
// printf("%d %d\n",values[i].Score,values[j].Score);
if (a < b)
{
dict temp = values[i];
values[i] = values[j];
values[j] = temp;
}
}
}
}
int main()
{
FILE *fp = fopen("t4_ign.csv", "r");
if (!fp)
{
printf("Error");
return 0;
}
char buff[1024];
int row = 0, column = 0;
int count = 0;
dict *values = NULL;
int i = 0;
while (fgets(buff, 1024, fp))
{
column = 0;
row ;
count ;
values = realloc(values, sizeof(dict) * count);
if (NULL == values)
{
perror("realloc");
break;
}
if (row == 1)
{
continue;
}
char *field = strtok(buff, ",");
while (field)
{
if (column == 0)
{
strcpy(values[i].title, field);
}
if (column == 1)
{
strcpy(values[i].platform, field);
}
if (column == 2)
{
strcpy(values[i].Score, field);
}
if (column == 3)
{
strcpy(values[i].release_year, field);
}
field = strtok(NULL, ",");
column ;
}
i ;
}
fclose(fp);
printf("File loaded!\n", fp);
sort(values);
printValues(values);
free(values);
return 0;
}
The problem i am facing is that the CSV file's Title field has commas in it and it thus differentiates the data separated by the commas as different columns which gives an error in loading the data in the struct.
Here are two example lines of the input file. Quotes are used when the title contains commas.
"The Chronicles of Narnia: The Lion, The Witch and The Wardrobe",PlayStation 2,8,2005
The Chronicles of Narnia: Prince Caspian,Wireless,5,2008
Any suggestions? Thanks in advance.
CodePudding user response:
Since quotes are used for the title field when it contains commas, I suggest you check to see if the "
has been used. If so, use that delimiter for the first item.
char *field;
if(buff[0] == '"') {
field = strtok(buff, "\"");
}
else {
field = strtok(buff, ",");
}
The first one will leave a comma as the first character of the next field, but the next strtok
will filter that off, since it does not allow "empty" fields.
CodePudding user response:
The function strtok
does not suit your needs, because it considers the quotation marks as characters like any other. Therefore, when strtok
sees a comma, it won't care whether the comma is inside quotation marks or not.
Also, as someone else pointed out in the comments section, another problem with strtok
is that it skips empty fields.
Therefore, I do not recommend using strtok
for what you want to do.
In order to solve your problem, I recommend that you write your own function that does something very similar to strtok
and strsep
, but if the first non-whitespace character is a quotation mark, it considers the next quotation mark as the delimiter instead of the next comma. In the code below, I named this function my_strsep
.
Here is an example:
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#define NUM_LINES 2
//this function is equivalent to the POSIX function "strsep", except
//that it always uses "," as a delimiter, unless the first
//non-whitespace character is a quotation mark, in which case it //skips the quotation mark and uses the next quotation mark as a
//delimiter, also consuming the next comma
char *my_strsep( char **restrict stringp )
{
char *p = *stringp;
char *start;
char delimiter = ',';
//do nothing if *stringp is
if ( *stringp == NULL )
return NULL;
//skip all whitespace characters
while ( isspace( (unsigned char)*p ) )
p ;
//remember start of field
start = p;
//determine whether this field uses quotation marks
if ( *p == '"' )
{
//set delimiter to quotation mark instead of comma
delimiter = '\"';
//skip the first quotation mark
p ;
}
//remember the start of the string
start = p;
while ( *p != delimiter )
{
if ( *p == '\0' )
{
if ( delimiter == '\"' )
{
fprintf( stderr,
"Warning: Encountered end of string before the "
"second quotation mark!\n"
);
}
//pass information back to calling function
*stringp = NULL;
return start;
}
p ;
}
//overwrite the delimiter with a null character
*p = '\0';
//go past the delimiter
p ;
//skip the comma too, if quotation marks are being used
if ( delimiter == '\"' )
{
//skip all whitespace characters
while ( isspace( (unsigned char)*p ) )
p ;
//skip the comma
if ( *p == ',' )
p ;
}
//pass information back to calling function
*stringp = p;
return start;
}
int main( void )
{
char lines[NUM_LINES][200] = {
"\"The Chronicles of Narnia: The Lion, The Witch and The Wardrobe\",PlayStation 2,8,2005",
"The Chronicles of Narnia: Prince Caspian,Wireless,5,2008"
};
for ( int i = 0; i < NUM_LINES; i )
{
char *p, *q;
printf( "Processing line #%d:\n", i 1 );
p = lines[i];
while ( ( q = my_strsep( &p ) ) != NULL )
{
printf( "Found field: %s\n", q );
}
printf( "\n" );
}
}
This program has the following output:
Processing line #1:
Found field: The Chronicles of Narnia: The Lion, The Witch and The Wardrobe
Found field: PlayStation 2
Found field: 8
Found field: 2005
Processing line #2:
Found field: The Chronicles of Narnia: Prince Caspian
Found field: Wireless
Found field: 5
Found field: 2008
As you can see, the function my_strsep
can handle fields both with and without quotation marks.