Home > Net >  I want to perform external sorting between two files and the data should print in an order according
I want to perform external sorting between two files and the data should print in an order according

Time:08-19

#include <stdio.h>
#include <stdlib.h>

struct student {
    int roll, age;
    char name[30], branch[30];
} s;

int main() {
    FILE *fp1, *fp2, *fp3;
    int num1, num2, i;
    char ch;
    printf("Enter the Number of Students to compute the data in 1st file: ");
    scanf("%d", &num1);
    fp1 = fopen("Dbms lab-1 1sttxt.txt", "w");
    for (i = 0; i < num1; i  ) {
        printf("\nEnter the Name of the Student: ");
        scanf("%[^\n]s", s.name);
        printf("Enter the Branch the Student is studying in: ");
        scanf("%[^\n]s", s.branch);
        printf("Enter the Age of the Student: ");
        scanf("%d", &s.age);
        printf("Enter the Roll Number: ");
        scanf("%d", &s.roll);
        fprintf(fp1, "\nName: %s\nBranch: %s\nAge: %d\nRoll.No: %d",
                s.name, s.branch, s.age, s.roll);
    }
    printf("The Record is stored in the file.");
    fclose(fp1);
    printf("\nEnter the Number of Students to compute data in 2nd file: ");
    scanf("%d", &num2);
    printf("\n\n Enter Student details in the second file");
    fp2 = fopen("Dbms lab-1 2ndtxt.txt", "w");
    for (i = 0; i < num2; i  ) {
        printf("\n\nEnter the Name of the Student: ");
        scanf("%s", s.name);
        printf("\nEnter the Branch the Student is studying in: ");
        scanf("%s", s.branch);
        printf("\nEnter the Age of the Student: ");
        scanf("%d", &s.age);
        printf("\nEnter the Roll Number: ");
        scanf("%d", &s.roll);
        fprintf(fp1, "\nName: %s\nBranch: %s\nAge: %d\nRoll.No: %d",
                s.name, s.branch, s.age, s.roll);
    }
    printf("\nThe Record is stored in the file.");
    fclose(fp2);
    fp1 = fopen("Dbms lab-1 1sttxt.txt", "r");
    fp2 = fopen("Dbms lab-1 2ndtxt.txt", "r");
    fp3 = fopen("Dbms lab-1 3rdtxt.txt", "w");
    if (fp1 == NULL || fp2 == NULL || fp3 == NULL) {
        printf("Could not open the file,because the file is empty");
        exit(0);
    }
    while ((ch = fgetc(fp1) != EOF)) {
        putc(ch, fp3);
    }
    while ((ch = fgetc(fp2) != EOF)) {
        putc(ch, fp3);
    }
    printf("\nThe two files are successfully merged.");
    fclose(fp1);
    fclose(fp2);
    fclose(fp3);
    return 0;
}

I've merged data from 2 files into a single file. Now I want to perform external sorting in the third file. I want to sort the data based on the value of roll. How do I do it ? Can you help me with the code? Thank you.

CodePudding user response:

There are many problems in your code:

  • ch must be defined with type int to store all possible values returned by fgetc() and properly distinguish EOF from all valid byte values,

  • you should test the return value of scanf() to detect input errors. Each of your calls to scanf() must return 1 for success because there is a single conversion attempted in each call.

  • scanf("%[^\n]s", s.name); is invalid for 2 reasons: the s after the ] is useless, and you should pass the maximum number of characters to store to s.name to avoid buffer overflows on invalid input. Use this instead:

      if (scanf(")[^\n]", s.name) != 1) {
          /* handle the input error */
      }
    
  • the second scanf("%[^\n]s", s.branch); will fail because the newline was left pending in the input stream by the previous call to scanf(). You should flush the rest of the input line with this:

       int ch;
       while ((ch = getchar()) != EOF && ch != '\n')
           continue;
    
  • you use a different conversion for the second entry: %s, thus not allowing embedded spaces in the name or branch.

  • you output both entries to fp1: the second fprintf() has undefined behavior because fp1 has been closed.

  • fprintf(fp1, "\nName: %s\nBranch: %s\nAge: %d\nRoll.No: %d", ...) outputs the data on 5 separate lines starting with a blank line, and ending the file without a trailing newline. This is inadequate. You should instead output the data on a single line with a trailing newline:

          fprintf(fp1, "Name: '%s'  Branch: '%s'  Age: %d  Roll.No: %d\n",
                  s.name, s.branch, s.age, s.roll);
    

External sorting can be performed using the command line utility sort, but the option to sort based on the last numeric field are non trivial. Writing code to perform this sorting in C seems challenging. If you can assume that both files are entered in the expected order, you could merge them as you read them, one line at a time, comparing the last field as a number and writing the smaller one until one of the files is empty:

// merge the files: read one line from each file
//       compare the roll numbers and output the
//       line with the lesser number.
void merge_files(FILE *fp1, FILE *fp2, FILE *fp3) {
    char buf1[200], buf2[200];
    char *p1, *p2;
    int has1 = 0, has2 = 0;
    long roll1 = 0, roll2 = 0;
    for (;;) {
        if (!has1 && fgets(buf1, sizeof buf1, fp1)) {
            p1 = strstr(buf1, "Roll.No: ");
            roll1 = p1 ? strtol(p1   9, NULL, 10) : 0;
            has1 = 1;
        }
        if (!has2 && fgets(buf2, sizeof buf2, fp2)) {
            p2 = strstr(buf2, "Roll.No: ");
            roll2 = p2 ? strtol(p2   9, NULL, 10) : 0;
            has2 = 1;
        }
        if (has1   has2 == 0)
            break;
        if (has2 == 0 || roll1 <= roll2) {
            fputs(buf1, fp3);
            has1 = 0;
        } else {
            fputs(buf2, fp3);
            has2 = 0;
        }
    }
}

CodePudding user response:

What you are attempting is far more complex than your method could achieve. There are too many shortcomings to properly clarify in a brief space. Primarily, the 3rd file should be the combination of records of the first two, but sorted by 'roll'. To attempt to 'merge' them (when records are split across 4 lines in each file) presumes that each 'batch' is sorted to begin with. To make matters worse, the 'sort key' has been 'tainted' with additional text (and is a string of digits, not a number.)

I've written (and trivially tested) a working version. Rather than 'copy/paste' of code, this version 're-uses' "input and storing" functions for the two batches of records.

Notice that the (reliable) library function qsort() is used to sort all the records in memory after the two lists have been joined into one, as you wanted. It's not hard to imagine writing and using a different cmp() function that would sort the records by age or by name, or even subsorting by roll within different branches.

Go very slowly reading through this working example. I hope it helps you become a better 'C' programmer.

#include <stdio.h>
#include <stdlib.h>

typedef struct student {
    char name[30];
    char branch[30];
    int age;
    int roll;
} s_t;

typedef struct {
    s_t *p;
    int count;
} list_t;

void append( list_t *pl, s_t *s ) {
    s_t *tmp = (s_t*)realloc( pl->p, (pl->count   1) * sizeof s_t );
    if( tmp == NULL )
        exit( 1 );
    pl->p = tmp;

    memcpy( pl->p   pl->count, s, sizeof pl->p[0] );
    pl->count  ;
}

void writeBatch( char *fname, list_t *pl ) {
    FILE *fp;

    if( ( fp = fopen( fname, "w" ) ) == NULL )
        exit( 1 );

    for( int i = 0; i < pl->count; i   ) {
        s_t *p = pl->p   i;
        fprintf( fp, "Name: %sBranch: %sAge: %d\nRoll.No: %d\n", p->name, p->branch, p->age, p->roll );
    }

    fclose( fp );
}

int enterOne( s_t *one ) {
    printf("Student Name: ");
    fgets( one->name, sizeof one->name, stdin );

    if( strncmp( one->name, "end", 3 ) == 0 )
        return 0;
    
    printf("Branch where student studies: ");
    fgets( one->branch, sizeof one->branch, stdin );

    char tbuf[32];
    printf("Student's age: ");
    fgets( tbuf, sizeof tbuf, stdin );
    one->age = atoi( tbuf );

    printf("Student's Roll Number: ");
    fgets( tbuf, sizeof tbuf, stdin );
    one->roll = atoi( tbuf );

    return 1;
}

void enterBatch( list_t *pl, int batchNo, char *fname ) {
    printf( "(Enter \"end\" as student name to finish this batch)\n" );
    printf( "Begin batch No. %d:\n", batchNo );

    memset( pl, 0, sizeof *pl );

    s_t s;
    while( enterOne( &s ) )
        append( pl, &s );

    writeBatch( fname, pl );
    printf( "Records stored to %s.\n\n", fname );
}

int cmp( const void *px, const void *py ) {
    return ((s_t *)px)->roll - ((s_t *)py)->roll;
}

int my_main() {
    list_t list1; enterBatch( &list1, 1, "Dbms lab-1 1sttxt.txt" );
    list_t list2; enterBatch( &list2, 2, "Dbms lab-1 2ndtxt.txt" );

    if( list1.count == 0 && list2.count == 0 ) {
        printf( "That was a waste of time.\n" );
        return 0;
    }

    for( int i = 0; i < list2.count; i   )
        append( &list1, list2.p   i );
    
    qsort( list1.p, list1.count, sizeof *list1.p, cmp );

    writeBatch( "Dbms lab-1 3rdtxt.txt", &list1 );

    if( list1.p ) free( list1.p );
    if( list2.p ) free( list2.p );

    printf("\nTwo files written. Sorted version in third file.");

    return 0;
}
  • Related