I have a .csv file. Let's say the data is like this:
Location 1,Location 2,Price,Rooms,Bathrooms,CarParks,Type,Area,Furnish
Upper-East-Side,New-York,310000,3,2,0,Built-up,1000,Partly
West-Village,New-York,278000,2,2,0,Built-up,1000,Partly
Theater-District,New-York,688000,3,2,0,Built-up,1000,Partly
Expected output (alphabetized):
Theater-District
Upper-East-Side
West-Village
How can I only show and alphabetize the first column (Location 1) of the file while also skipping the header?
This is currently my code but it's still in a "read and display" form.
#include <stdio.h>
int main()
{
FILE *fh;
fh = fopen("file.csv", "r");
if (fh != NULL)
{
int line_number = 0;
char c;
while ( (c = fgetc(fh)) != EOF )
{
if(line_number > 0 || c == '\n'){
putchar(c);
}
if(c == '\n'){
line_number ;
}
}
fclose(fh);
} else printf("Error opening file.\n");
return 0;
}
CodePudding user response:
csv is not a well defined format so I suggest you use an existing csv library instead of parsing the data yourself. For instance, this will not work if the first field has any embedded commas. It relies on scanf() to allocate the line, and resizes the lines
array as needed. This means there are no arbitrary limits.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int strcmp2(const void *a, const void *b) {
return strcmp((const char *) a, (const char *) b);
}
int main() {
FILE *f = fopen("unsorted.csv", "r");
if(!f) return 1;
char **lines = NULL;
size_t n = 0;
for(;; n ) {
char *location1;
int rv = fscanf(f, "%m[^,]%*[^\n]\n", &location1);
if(rv != 1) break;
char **tmp = realloc(lines, (n 1) * sizeof *tmp);
if(!tmp) return 1;
lines = tmp;
tmp[n] = location1;
}
fclose(f);
free(lines[0]); // header
qsort(&lines[1], n - 1, sizeof *lines, strcmp2);
for(size_t i = 1; i < n; i ) {
printf("%s\n", lines[i]);
free(lines[i]);
}
free(lines);
}
It produces the expected output:
Theater-District
Upper-East-Side
West-Village
CodePudding user response:
So, assuming some hard limits on line length and CSV file record count, we can just use arrays.
To read a record, just use fgets()
. Add each line of text to the array using the usual method.
We use a simple string search and truncate to isolate the first field. (Assuming no fancy stuff like double-quoted fields. I assume you are doing homework.)
To sort everything except the CSV header record, use qsort()
with a little additional mathematics.
#include <iso646.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define unused(x) (void)(x)
#define MAX_LINE_LENGTH 100
#define MAX_RECORD_COUNT 100
int main( int argc, char ** argv )
{
unused( argc );
char records[MAX_RECORD_COUNT][MAX_LINE_LENGTH];
size_t record_count = 0;
const char * filename = argv[1];
if (!filename) return 1;
// Read our records from file
FILE * f = fopen( filename, "r" );
if (!f) return 1;
while ((record_count < MAX_RECORD_COUNT)
and fgets( records[record_count], MAX_LINE_LENGTH, f ))
record_count = 1;
fclose( f );
// Truncate the strings to just the first field
for (size_t n = 0; n < record_count; n )
{
char * p = strchr( records[n], ',' );
if (p) *p = '\0';
}
// Sort everything but the header
if (record_count > 2) // must exist at least two records header
qsort( records 1, record_count-1, MAX_LINE_LENGTH,
(int (*)( const void *, const void * ))strcmp );
// Print everything but the header
for (size_t n = 1; n < record_count; n )
printf( "%s\n", records[n] );
return 0;
}