Home > Blockchain >  Checking if word is included in file
Checking if word is included in file

Time:03-20

I wrote a function which should check if a word is included in a file, but my function returns always NOT_EXISTENT, why? I checked ptr and its always empty but the memory is located.

Here my function:

int search_for_word(char wort[]) {
    char *ptr;
    FILE *file;
    unsigned long size_of_file = 0;
    file = fopen("array.txt", "r");
    if (file == NULL) {
        return ERROR;
    }
    fseek(file, 0L, SEEK_END);
    size_of_file = ftell(file);
    ptr = malloc(sizeof(char) *  size_of_file   1);
    printf("Size:%li\n", size_of_file);
    if (ptr == NULL) {
        return ERROR;
    }
    fread(ptr, sizeof(char), size_of_file, file);
    if (strstr(ptr, wort) == NULL) {
        return NOT_EXISTENT;
    }
    fclose(file);
    return EXISTENT;
}

CodePudding user response:

Here is a modified version implementing suggestions from chux's answer and with an alternative method for huge files (which should probably be used for all files):

int search_for_word(const char *wort) {
    int res = NOT_EXISTENT;
    FILE *file = fopen("array.txt", "r");
    if (file == NULL) {
        return ERROR;
    }
#if 0  // set to 1 if you want to load the whole file in memory
    if (fseek(file, 0L, SEEK_END) == -1) {
        fclose(file);
        return ERROR;
    }
    long size_of_file = ftell(file);
    if (size_of_file < 0) {
        fclose(file);
        return ERROR;
    }
    rewind(file);
    if ((unsigned long)size_of_file   1 <= SIZE_MAX) {
        char *ptr = malloc((size_t)size_of_file   1);
        if (ptr != NULL) {
            size_t length = fread(ptr, 1, size_of_file, file);
            ptr[length] = '\0';
            res = strstr(ptr, wort) ? EXISTENT : NOT_EXISTENT;
            free(ptr);
            fclose(file);
            return res;
        }
    }
#endif
    /* use a different method: read 4KB at a time */
    size_t len = strlen(wort);
    char buf[4096   len   1];
    size_t nread, pos = 0;
    while ((nread = fread(buf   pos, 1, 4096, file)) > 0) {
        buf[pos   nread] = '\0';
        if (strstr(buf, wort)) {
            res = EXISTENT;
            break;
        }
        if (pos   nread <= len) {
            pos  = nread;
        } else {
            memmove(buf, buf   pos   nread - len, len);
            pos = len;
        }
    }
    fclose(file);
    return res;
}

CodePudding user response:

At least these problems:

(Biggest issue) Missing rewind @alinsoar

fread() is attempting a read from the end of the file. Move back to the beginning.

rewind(file); // Add
size_t length = fread(ptr, sizeof(char), size_of_file, file);

Not a string @pm100

ptr is not certainly a string as it may lack a null character. strstr() expects 2 strings.

strstr(ptr, wort) // bad

Instead, append a null character to the data read before strstr().

size_t length = fread(ptr, sizeof(char), size_of_file, file);
ptr[length] = '\0'; // Add

Failure to close

Code selectively performs fclose(file). Call fclose() with each successful fopen().

Missing free() @Weather Vane

Free allocated memory when done.

wort[] may be ill formed

Posted code does not show the origin of wort[]. So recommendations are guesses at best.

No check on fseek() success

// fseek(file, 0L, SEEK_END)
if (fseek(file, 0 /* L not needed */, SEEK_END) == -1) {
  Handle_error();
}

Better with a const @chqrlie

This allows passing constant strings.

// int search_for_word(char wort[]){
int search_for_word(const char wort[]) {

Minor

Size sizeof(char) * size_of_file 1 may exceed SIZE_MAX.

sizeof(char) * size_of_file 1 conceptually wrong. Better as sizeof(char) * (size_of_file 1) or just size_of_file 1u.


Some rough alternative code - unchecked.

// Let calling code open the file
// Return 1 on success.
// Return 0 on no-find.
// Return -1 on other failures. 
int search_for_word(const char *word, FILE *inf) {
  if (inf == 0) {
    return -1;
  }
  size_t length_word = strlen(word);
  if (length_word >= SIZE_MAX / 2) {
    return -1;  // TBD code to handle this extreme case
  }
  size_t buf_size = 4096;  // Adjust as desired
  if (buf_size <= length_word * 2) {
    buf_size = length_word * 2   1;
  }
  char *buf = malloc(buf_size);
  if (buf == NULL) {
    return -1;
  }
  char *in = buf;
  size_t in_length = 0;
  for (;;) {
    size_t length_read = fread(in, 1, buf_size, inf);
    in[length_read] = '\0';
    if (strstr(buf, word)) {
      free(buf);
      return 1;
    }
    if (length_read < buf_size) { // no more data expected
      free(buf);
      return 0;
    }
    // Copy last portion of buffer to the beginning.
    in_length  = length_read;
    memmove(buf, &buf[in_length - length_word], length_word);
    in_length = length_word;
    in = buf   in_length;
  }
}
  • Related