Home > OS >  How can I alphabetize the first column of a .csv file in C?
How can I alphabetize the first column of a .csv file in C?

Time:01-17

I have a .csv file. Let's say the data is like this:

Location 1,Location 2,Price,Rooms,Bathrooms,CarParks,Type,Area,Furnish
Upper-East-Side,New-York,310000,3,2,0,Built-up,1000,Partly
West-Village,New-York,278000,2,2,0,Built-up,1000,Partly
Theater-District,New-York,688000,3,2,0,Built-up,1000,Partly

Expected output (alphabetized):

Theater-District
Upper-East-Side
West-Village

How can I only show and alphabetize the first column (Location 1) of the file while also skipping the header?

This is currently my code but it's still in a "read and display" form.

#include <stdio.h>

int main()
{
  FILE *fh;
  
  fh = fopen("file.csv", "r");
  
  if (fh != NULL)
  {
    int line_number = 0;
    char c;
    while ( (c = fgetc(fh)) != EOF )
    {
        if(line_number > 0 || c == '\n'){
            putchar(c);
        }
        if(c == '\n'){
            line_number  ;
        }
    }
    fclose(fh);
  
  } else printf("Error opening file.\n");
  
  return 0;
}

CodePudding user response:

csv is not a well defined format so I suggest you use an existing csv library instead of parsing the data yourself. For instance, this will not work if the first field has any embedded commas. It relies on scanf() to allocate the line, and resizes the lines array as needed. This means there are no arbitrary limits.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int strcmp2(const void *a, const void *b) {
    return strcmp((const char *) a, (const char *) b);
}

int main() {
    FILE *f = fopen("unsorted.csv", "r");
    if(!f) return 1;

    char **lines = NULL;
    size_t n = 0;
    for(;; n  ) {
        char *location1;
        int rv = fscanf(f, "%m[^,]%*[^\n]\n", &location1);
        if(rv != 1) break;
        char **tmp = realloc(lines, (n   1) * sizeof *tmp);
        if(!tmp) return 1;
        lines = tmp;
        tmp[n] = location1;
    }
    fclose(f);

    free(lines[0]); // header
    qsort(&lines[1], n - 1, sizeof *lines, strcmp2);
    for(size_t i = 1; i < n; i  ) {
        printf("%s\n", lines[i]);
        free(lines[i]);
    }
    free(lines);
}

It produces the expected output:

Theater-District
Upper-East-Side
West-Village

CodePudding user response:

So, assuming some hard limits on line length and CSV file record count, we can just use arrays.

To read a record, just use fgets(). Add each line of text to the array using the usual method.

We use a simple string search and truncate to isolate the first field. (Assuming no fancy stuff like double-quoted fields. I assume you are doing homework.)

To sort everything except the CSV header record, use qsort() with a little additional mathematics.

#include <iso646.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define unused(x) (void)(x)

#define MAX_LINE_LENGTH 100
#define MAX_RECORD_COUNT 100

int main( int argc, char ** argv )
{
  unused( argc );

  char records[MAX_RECORD_COUNT][MAX_LINE_LENGTH];
  size_t record_count = 0;
  
  const char * filename = argv[1];
  if (!filename) return 1;

  // Read our records from file
  FILE * f = fopen( filename, "r" );
  if (!f) return 1;

  while ((record_count < MAX_RECORD_COUNT)
      and fgets( records[record_count], MAX_LINE_LENGTH, f ))
    record_count  = 1;

  fclose( f );
  
  // Truncate the strings to just the first field
  for (size_t n = 0;  n < record_count;  n  )
  {
    char * p = strchr( records[n], ',' );
    if (p) *p = '\0';
  }
  
  // Sort everything but the header
  if (record_count > 2)  // must exist at least two records   header
    qsort( records 1, record_count-1, MAX_LINE_LENGTH, 
      (int (*)( const void *, const void * ))strcmp );
  
  // Print everything but the header
  for (size_t n = 1;  n < record_count;  n  )
    printf( "%s\n", records[n] );
  
  return 0;
}
  • Related