Home > Software engineering >  Please help me Allocate memory for big .txt file in C
Please help me Allocate memory for big .txt file in C

Time:03-23

I need to allocate memory using malloc or calloc, for a large file that looks like this:

2357 VKLYKK
7947 1WTFWZ
3102 F2IXK3
2963 EXMW55
2865 50CJES
2510 8PC1AI

There are around 10K of lines in that .txt file. How can I allocate the required memory?

What is the program supposed to do?? The program has to read the whole .txt file sort it by the first number and send output to out.txt But since the the input of the file is huge it wont let me.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#pragma warning(disable : 4996)

typedef struct {
    int number;
    char order[10];
} Data;

int sorting(const void *a, const void *b)
{
    Data *dataA = (Data *)a;
    Data *dataB = (Data *)b;

    // return (dataA->number - dataB->number);  // Ascending order
    return (dataB->number - dataA->number);  // Descending order
}

int main()
{
    FILE *fp;
    FILE *f = fopen("out.txt", "w");
    Data data[20];
    char *line[150]
    int i = 0;
    char file_name[10] = "";

    printf("enter file name: ");
    scanf("%s", &file_name);
    fp = fopen(file_name, "r");
    if (fp == NULL)
    {
        printf("\n%s\" File not found!", file_name);
        exit(1);
    }
    while (1)
    {
        if (fgets(line, 150, fp) == NULL)
            break;

        char *pch;
        pch = strtok(line, " ");
        data[i].number = atoi(pch);     
        pch = strtok(NULL, " ");
        strcpy(data[i].order, pch);
        i  ;     
    }
    printf("#################\n");
    printf("number\torder\n"); 
    for (int k = 0; k < 10; k  )
    {
        printf("%d\t%s", data[k].number, data[k].order);
    }

    qsort(data, 10, sizeof(Data), sorting);

    printf("\n#################\n");
    printf("number\torder\n"); 
    for (int k = 0; k < 10; k  )
    {
        printf("%d\t%s", data[k].number, data[k].order);
        fprintf(f, "%d\t%s", data[k].number, data[k].order);
    }
    fclose(fp);
    fclose(f);
    return 0;
}

CodePudding user response:

If your file contains 10,000 lines or so, your while loop will quickly overrun your data array (which you declared with only 20 elements). If the number of lines is not known in advance, the best way to do this is with a growing array. Start by initialing data (and new dataSize and 'dataCount` variables) as follows:

int dataSize = 0;
int dataCount = 0;
Data *data = NULL;

Then as you use up the space in the array, when it reaches dataSize entries you will have to grow your array. Something like this:

while (1) {
    if (dataCount >= dataSize) {
        Data *new;
        dataSize  = 1000;
        new = realloc(data,dataSize * sizeof *data);
        if (new == NULL) {
            perror("realloc");
            free(data);
            return 2;
        }
        data = new;
    }     
    int cnt = fscanf(fp,"%d %9s", &data[dataCount].number, data[dataCount].order);
    if (cnt == EOF)
        break;
    if (cnt != 2) {
        printf("Error reading data\n");
        return 1;
    }
    dataCount  ;
}

When the while loop finishes (if there were no errors), the data array will contain all of the data, and dataCount will be the total number of data items found.

Note that I used fscanf instead of fgets, as this eliminates the need for intermediate step like calls to atoi and strcpy. I also put in some simple error checking. I chose 1000 as the growth increment, though you can change that. But too small and it fragments the heap more rapidly, and too big requires larger amounts of memory too quickly.

CodePudding user response:

this line

char* line[150];

creates an array of 150 char pointers, this is not what you want if you are reading one line like this

if (fgets(line, 150, fp) == NULL) break;

I suspect you wanted one line of 150 chars

so do

 char line[150];

CodePudding user response:

You can use qsort to sort the array of lines, but that may not be the best approach. It may be more effective to insert the lines into a data structure that can be easily traversed in order. Although this simple minded solution is very much less than ideal, here's a simple-minded example of inserting into a tree. This sorts the lines lexicographically; modifying it to sort numerically based on the line is a good exercise.

/* Build an (unbalanced) binary search tree of lines in input. */

#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

static void * xrealloc(void *buf, size_t num, size_t siz, void *end);
FILE * xfopen(const char *path, const char *mode);

struct entry {
    const char *line;
    struct entry *node[2];
};

static struct entry *
new_node(const char *line)
{
    struct entry *e = calloc(1, sizeof *e);
    if( e == NULL ){
        perror("calloc");
        exit(EXIT_FAILURE);
    }
    e->line = line;
    return e;
}

/*
 * Note that this tree needs to be rebalanced.  In a real
 * project, we would use existing libraries.
 */
static struct entry *
lookup(struct entry **lines, const char *line)
{
    struct entry *t = *lines;
    if( t ){
        int cmp = strcmp(line, t->line);
        return lookup(&t->node[cmp > 0], line);
    } else {
        return *lines = new_node(line);
    }
}

/* In-order descent of the tree, printing one line per entry */
static void
print_table(const struct entry *t)
{
    if( t ){
        print_table(t->node[0]);
        printf("%s", t->line);
        print_table(t->node[1]);
    }
}

static void *
xrealloc(void *buf, size_t num, size_t siz, void *endvp)
{
    char **endp = endvp;
    ptrdiff_t offset = endp && *endp ? *endp - (char *)buf : 0;
    buf = realloc(buf, num * siz);
    if( buf == NULL ){
        perror("realloc");
        exit(EXIT_FAILURE);
    }
    if( endp != NULL ){
        *endp = buf   offset;
    }
    return buf;
}

int
main(int argc, char **argv)
{
    FILE *ifp = argc > 1 ? xfopen(argv[1], "r") : stdin;
    struct entry *lines = NULL;
    char *line = NULL;
    size_t cap = 0;
    while( getline(&line, &cap, ifp) > 0 ){
        (void) lookup(&lines, line);
        line = NULL;
    }
    print_table(lines);
}

FILE *
xfopen(const char *path, const char *mode)
{
    FILE *fp = path[0] != '-' || path[1] != '\0' ? fopen(path, mode) :
        *mode == 'r' ? stdin : stdout;
    if( fp == NULL ){
        perror(path);
        exit(EXIT_FAILURE);
    }
    return fp;
}
  • Related