Home > Back-end >  How to make sorting function simpler (or shorter)
How to make sorting function simpler (or shorter)

Time:09-17

I am writing a code that reads a binary file, reads each struct field separately, sorts the output, then writes the output to a binary file.

Below is my code.

#define MAX_HAT 11
#include<stdio.h>
#include<stdlib.h>

//struct variables
struct test
{
   short int land;
   double experience;
   short int boys;
   int angle;
   float industry;
   unsigned short thread;
   unsigned char shoe;
   double kitty;
   unsigned long price;
   short int father;
   char room;
   int attraction;
   int cake;
   int foot;
   char hat[MAX_HAT];
   char nest;
   double bean;
};

int compare (const void * a, const void * b) 
   {
   
   struct test** a1 = (struct test**) a;
   struct test** b1 = (struct test**) b;

   //experience(asc)
   if ((*a1)->experience > (*b1)->experience)
   {
       return 1;
   }
   if ((*a1)->experience < (*b1)->experience)
   {
       return -1;
   }
   //land(asc)
   if ((*a1)->land > (*b1)->land)
   {
       return 1;
   }
   if ((*a1)->land < (*b1)->land)
   {
       return -1;
   }
   //nest(desc)
   if ((*a1)->nest > (*b1)->nest)
   {
       return -1;
   }
   if ((*a1)->nest < (*b1)->nest)
   {
       return 1;
   }
   //shoe(asc)
   if ((*a1)->shoe > (*b1)->shoe)
   {
       return 1;
   }
   if ((*a1)->shoe < (*b1)->shoe)
   {
       return -1;
   }
   //hat(desc)
   if (strncmp((*b1) -> hat, (*a1) -> hat, MAX_HAT) != 0)
   {
       return strncmp((*b1) -> hat, (*a1) -> hat, MAX_HAT);
   }
   //boys(asc)
   if ((*a1)->boys > (*b1)->boys)
   {
       return 1;
   }
   if ((*a1)->boys < (*b1)->boys)
   {
       return -1;
   }
   //cake(desc)
   if ((*a1)->cake > (*b1)->cake)
   {
       return -1;
   }
   if ((*a1)->cake < (*b1)->cake)
   {
       return 1;
   }
   //price(desc)
   if ((*a1)->price > (*b1)->price)
   {
       return -1;
   }
   if ((*a1)->price < (*b1)->price)
   {
       return 1;
   }
   //bean(desc)
   if ((*a1)->bean > (*b1)->bean)
   {
       return -1;
   }
   if ((*a1)->bean < (*b1)->bean)
   {
       return 1;
   }
   //room(desc)
   if ((*a1)->room > (*b1)->room)
   {
       return -1;
   }
   if ((*a1)->room < (*b1)->room)
   {
       return 1;
   }
   //father(desc)
   if ((*a1)->father > (*b1)->father)
   {
       return -1;
   }
   if ((*a1)->father < (*b1)->father)
   {
       return 1;
   }
   //industry(desc)
   if ((*a1)->industry > (*b1)->industry)
   {
       return -1;
   }
   if ((*a1)->industry < (*b1)->industry)
   {
       return 1;
   }
   //foot(desc)
   if ((*a1)->foot > (*b1)->foot)
   {
       return -1;
   }
   if ((*a1)->foot < (*b1)->foot)
   {
       return 1;
   }
   //angle(asc)
   if ((*a1)->angle > (*b1)->angle)
   {
       return 1;
   }
   if ((*a1)->angle < (*b1)->angle)
   {
       return -1;
   }
   //kitty(asc)
   if ((*a1)->kitty > (*b1)->kitty)
   {
       return 1;
   }
   if ((*a1)->kitty < (*b1)->kitty)
   {
       return -1;
   }
   //attraction(asc)
   if ((*a1)->attraction > (*b1)->attraction)
   {
       return 1;
   }
   if ((*a1)->attraction < (*b1)->attraction)
   {
       return -1;
   }
   //thread(desc)
   if ((*a1)->thread > (*b1)->thread)
   {
       return -1;
   }
   if ((*a1)->thread < (*b1)->thread)
   {
       return 1;
   }
       return 0;
}

int main(int argc, char **argv)
{
   int size = 8;
   int count = 0;
   int i;

   struct test *t1;
   t1 = (struct test*) malloc (size * sizeof(struct test));

   FILE *fp1;
   FILE *fp2;
   
   if (!t1)
   {
       fprintf(stderr, "Could not allocate memory");
       exit(-2) ;
   }

   fp1 = fopen(argv[1], "rb");
   fp2 = fopen(argv[2], "wb");

   if (argc != 3)
   {
       fprintf(stderr, "Incorrect file names");
       exit(1);
   }

   if (!fp2)
   {
       fprintf(stderr, "Could not open file %s\n", argv[2]);
       exit(-3);
   }

   if (!fp1)
   {
       fprintf(stderr, "Could not open file %s\n", argv[1]);
       exit(-3);
   }
   
   while (!feof(fp1))
   {
       if (count >= size)
       {
           size = size * 2;
           t1 = realloc(t1, sizeof(struct test) * size);
       }
   
       fread(&t1[count].land, sizeof(t1[count].land), 1, fp1);
       fread(&t1[count].experience, sizeof(t1[count].experience), 1, fp1);
       fread(&t1[count].boys, sizeof(t1[count].boys), 1, fp1);
       fread(&t1[count].angle, sizeof(t1[count].angle), 1, fp1);
       fread(&t1[count].industry, sizeof(t1[count].industry), 1, fp1);
       fread(&t1[count].thread, sizeof(t1[count].thread), 1, fp1);
       fread(&t1[count].shoe, sizeof(t1[count].shoe), 1, fp1);
       fread(&t1[count].kitty, sizeof(t1[count].kitty), 1, fp1);
       fread(&t1[count].price, sizeof(t1[count].price), 1, fp1);
       fread(&t1[count].father, sizeof(t1[count].father), 1, fp1);
       fread(&t1[count].room, sizeof(t1[count].room), 1, fp1);
       fread(&t1[count].attraction, sizeof(t1[count].attraction), 1, fp1);
       fread(&t1[count].cake, sizeof(t1[count].cake), 1, fp1);
       fread(&t1[count].foot, sizeof(t1[count].foot), 1, fp1);
       fread(&t1[count].hat, sizeof(t1[count].hat), 1, fp1);
       fread(&t1[count].nest, sizeof(t1[count].nest), 1, fp1);
       fread(&t1[count].bean, sizeof(t1[count].bean), 1, fp1);
       
       count  ;
   }
   
   struct test** t2 = (struct test**) malloc (size * 8);
   for (i = 0; i < count; i  )
   {
       t2[i] = &t1[i];
   }
   
   qsort(t2, count - 1, 8, compare);

   for (i = 0; i < count -1; i  )
   {
       fwrite(&t2[i]->land, sizeof(t2[i]->land), 1, fp2);
       fwrite(&t2[i]->experience, sizeof(t2[i]->experience), 1, fp2);
       fwrite(&t2[i]->boys, sizeof(t2[i]->boys), 1, fp2);
       fwrite(&t2[i]->angle, sizeof(t2[i]->angle), 1, fp2);
       fwrite(&t2[i]->industry, sizeof(t2[i]->industry), 1, fp2);
       fwrite(&t2[i]->thread, sizeof(t2[i]->thread), 1, fp2);
       fwrite(&t2[i]->shoe, sizeof(t2[i]->shoe), 1, fp2);
       fwrite(&t2[i]->kitty, sizeof(t2[i]->kitty), 1, fp2);
       fwrite(&t2[i]->price, sizeof(t2[i]->price), 1, fp2);
       fwrite(&t2[i]->father, sizeof(t2[i]->father), 1, fp2);
       fwrite(&t2[i]->room, sizeof(t2[i]->room), 1, fp2);
       fwrite(&t2[i]->attraction, sizeof(t2[i]->attraction), 1, fp2);
       fwrite(&t2[i]->cake, sizeof(t2[i]->cake), 1, fp2);
       fwrite(&t2[i]->foot, sizeof(t2[i]->foot), 1, fp2);
       fwrite(&t2[i]->hat, sizeof(t2[i]->hat), 1, fp2);
       fwrite(&t2[i]->nest, sizeof(t2[i]->nest), 1, fp2);
       fwrite(&t2[i]->bean, sizeof(t2[i]->bean), 1, fp2);
       
       //Print out the outputs
   printf("%d, %f, %i, %i, %f, %d, %d, %f, %li, %d, %d, %d, %d, %x, %s, %c, %f\n", t2[i]->land, t2[i]->experience, t2[i]->boys, t2[i]->angle, t2[i]->industry, t2[i]->thread, t2[i]->shoe, t2[i]->kitty, t2[i]->price, t2[i]->father, t2[i]->room, t2[i]->attraction, t2[i]->cake, t2[i]->foot, t2[i]->hat, t2[i]->nest, t2[i]->bean);
   }

   fclose(fp1);
   fclose(fp2);

   free(t1);
   free(t2);

   return 0;
}

As you can see in the beginning of my code, my int compare function is very long. I would like to make it simpler(or shorter) if possible. Sorting order has following condition.

Sorting order (in order of precedence):
 
experience in ascending order. 
land in ascending order. 
nest in descending order. 
shoe in ascending order. 
hat in descending order. 
boys in ascending order. 
cake in descending order. 
price in descending order. 
bean in descending order. 
room in descending order. 
father in descending order. 
industry in descending order. 
foot in descending order. 
angle in ascending order. 
kitty in ascending order. 
attraction in ascending order. 
thread in descending order. 

Would you please help me out how it can be done. Cheers.

CodePudding user response:

As you can see in the beginning of my code, my int compare function is way too long. I would like to make it simpler if possible.

This sounds as if you consider shorter code to be simpler than longer code. That's not correct. Often it's actually the opposite. Short code often requires complex solutions while writing code which is easy to read and understand leads to more code lines.

Don't use complex, hard to read/understand solutions to reduce the number of code lines. Keep things simple - even if it requires extra typing.

Using more or less complicated macro solution is a bad idea. Use plain simple C so that your code is easy to understand.

Here is one way to reduce the number of code lines while still keeping stuff in simple C code.

Function based solution

int compare_int(long long int a, long long int b)
{
    if (a > b) return 1;
    if (a < b) return -1;
    return 0;
}

int compare_unsigned(long long unsigned a, long long unsigned b)
{
    if (a > b) return 1;
    if (a < b) return -1;
    return 0;
}

int compare_double(double a, double unsigned b)
{
    if (a > b) return 1;
    if (a < b) return -1;
    return 0;
}

Now you can use these function for the fields like:

struct test** a1 = (struct test**) a;
struct test** b1 = (struct test**) b;
struct test* pa = *a1;
struct test* pb = *b1;

int cmp;
if ((cmp = compare_int(pa->land, pb->land))) return cmp;
if ((cmp = compare_double(pa->experience, pb->experience))) return cmp;
if ((cmp = compare_int(pa->boys, pb->boys))) return cmp;
if ((cmp = compare_int(pa->angle, pb->angle))) return cmp;
if ((cmp = compare_int(pa->industry, pb->industry))) return cmp;
...
return 0;

Notice how the compare_xxx function parameter is written with the largest possible type. By doing that they can be safely used for all "smaller" types. This may however have a little performance penalty. Likewise for function call but here you can try with inline

BTW:

char hat[MAX_HAT];

can't be compared using > (etc). You probably want to use strcmp.

Macro based solution

If you really want to use a macro then you could do like:

// Kind of ugly macro but not extremely complex... so perhaps okay
#define CMP_NUMBER(a, b) (((a) > (b)) ? (1) : (((a) < (b)) ? (-1) : (0)))


int cmp;
if ((cmp = CMP_NUMBER(pa->land, pb->land))) return cmp;
if ((cmp = CMP_NUMBER(pa->experience, pb->experience))) return cmp;
if ((cmp = CMP_NUMBER(pa->boys, pb->boys))) return cmp;
if ((cmp = CMP_NUMBER(pa->angle, pb->angle))) return cmp;
if ((cmp = CMP_NUMBER(pa->industry, pb->industry))) return cmp;
...
return 0;

I prefer the function based solution but this kind of macro solution is also pretty okay as it is pretty simple.

CodePudding user response:

As an initial improvement, you can replace every comparison like:

if ((*a1)->experience > (*b1)->experience) {
    return 1;
} else if ((*a1)->experience < (*b1)->experience) {
    return -1;
}

with

res = (*a1)->experience > (*a2)->experience;
if (res) return res;
res = (*a1)->experience < (*a2)->experience;
if (res) return -res;

But this still requires you to write manual comparisons for each of your struct fields. This is cumbersome, and there's no "easy" solution for this in C - the best solution we have is X-macros.

Let's simplify and assume we have:

struct A {
    char a;
    int b;
};

then you can write code like:

#define X_FIELDS  \
    X(char, a)     \
    X(int, b)       \

and define our struct as follows:

typedef struct {
#define X(type, name) type name;
    X_FIELDS
#undef X
} A;

and finally our "automatic" comparison function:

int compare (const void * a, const void * b) {
    A* a1 = a;
    A* b1 = b;
    int res;
#define X(type, name)           \
    res = a1->name > b1->name;   \
    if (res) return res;          \
    res = a1->name < b1->name;     \
    if (res) return -res;                                               
X_FIELDS
#undef X
}

this is fairly complicated, but scales nicely with new fields added to the struct.

Notice that your struct variables must be declared in the same order that you want your comparison to be done as.


Bonus tip: writing X-macros (and macros in general) can get tricky, you can run gcc -E to see if your macros are getting processed the way you expect.

CodePudding user response:

Before looking for simplicity, check correctness

I have found it interesting requests seeking minor performance improvements yet fail to insure function error free code: see far below.

Anyways on to OP's concern.


How to make sorting function simpler (or shorter)

Rather than compare for greatness, compare first with inequality

//if ((*a1)->experience > (*b1)->experience) {
//     return 1;
//}
//if ((*a1)->experience < (*b1)->experience) {
//    return -1;
//}
if ((*a1)->experience != (*b1)->experience) {
  return (*a1)->experience > (*b1)->experience) ? 1 : -1;
}

Not much different.


A subtle problem is with floating point numbers that are not-a-number. To avoid undefined behavior with qsort(), if compare(a,b) returns 0, compare(b,a) must return 0 too. if compare(a,b) returns a positive, compare(b,a) must return a negative. Typical FP compares functions do not have those properties with NANs.

// Suitable FP compare to be called within OP's compare().
// Side effect, all NANs are "greater" than others.
int dbl_cmp(double a, double b) {
  if (isnan(a)) {
    if (isnan(b)) return 0;  // Treat all NAN as same "value"
    return -1;
  }
  if (isnan(b)) {
    return 1;
  }
  return (a > b) - (a- b);
}

Instead of !feof(fp1), check the returns value of fread().

Test return value of malloc().

  • Related