I wrote a function which should check if a word is included in a file, but my function returns always NOT_EXISTENT
, why? I checked ptr and its always empty but the memory is located.
Here my function:
int search_for_word(char wort[]) {
char *ptr;
FILE *file;
unsigned long size_of_file = 0;
file = fopen("array.txt", "r");
if (file == NULL) {
return ERROR;
}
fseek(file, 0L, SEEK_END);
size_of_file = ftell(file);
ptr = malloc(sizeof(char) * size_of_file 1);
printf("Size:%li\n", size_of_file);
if (ptr == NULL) {
return ERROR;
}
fread(ptr, sizeof(char), size_of_file, file);
if (strstr(ptr, wort) == NULL) {
return NOT_EXISTENT;
}
fclose(file);
return EXISTENT;
}
CodePudding user response:
Here is a modified version implementing suggestions from chux's answer and with an alternative method for huge files (which should probably be used for all files):
int search_for_word(const char *wort) {
int res = NOT_EXISTENT;
FILE *file = fopen("array.txt", "r");
if (file == NULL) {
return ERROR;
}
#if 0 // set to 1 if you want to load the whole file in memory
if (fseek(file, 0L, SEEK_END) == -1) {
fclose(file);
return ERROR;
}
long size_of_file = ftell(file);
if (size_of_file < 0) {
fclose(file);
return ERROR;
}
rewind(file);
if ((unsigned long)size_of_file 1 <= SIZE_MAX) {
char *ptr = malloc((size_t)size_of_file 1);
if (ptr != NULL) {
size_t length = fread(ptr, 1, size_of_file, file);
ptr[length] = '\0';
res = strstr(ptr, wort) ? EXISTENT : NOT_EXISTENT;
free(ptr);
fclose(file);
return res;
}
}
#endif
/* use a different method: read 4KB at a time */
size_t len = strlen(wort);
char buf[4096 len 1];
size_t nread, pos = 0;
while ((nread = fread(buf pos, 1, 4096, file)) > 0) {
buf[pos nread] = '\0';
if (strstr(buf, wort)) {
res = EXISTENT;
break;
}
if (pos nread <= len) {
pos = nread;
} else {
memmove(buf, buf pos nread - len, len);
pos = len;
}
}
fclose(file);
return res;
}
CodePudding user response:
At least these problems:
(Biggest issue) Missing rewind @alinsoar
fread()
is attempting a read from the end of the file. Move back to the beginning.
rewind(file); // Add
size_t length = fread(ptr, sizeof(char), size_of_file, file);
Not a string @pm100
ptr
is not certainly a string as it may lack a null character. strstr()
expects 2 strings.
strstr(ptr, wort) // bad
Instead, append a null character to the data read before strstr()
.
size_t length = fread(ptr, sizeof(char), size_of_file, file);
ptr[length] = '\0'; // Add
Failure to close
Code selectively performs fclose(file)
. Call fclose()
with each successful fopen()
.
Missing free()
@Weather Vane
Free allocated memory when done.
wort[]
may be ill formed
Posted code does not show the origin of wort[]
. So recommendations are guesses at best.
No check on fseek()
success
// fseek(file, 0L, SEEK_END)
if (fseek(file, 0 /* L not needed */, SEEK_END) == -1) {
Handle_error();
}
Better with a const
@chqrlie
This allows passing constant strings.
// int search_for_word(char wort[]){
int search_for_word(const char wort[]) {
Minor
Size sizeof(char) * size_of_file 1
may exceed SIZE_MAX
.
sizeof(char) * size_of_file 1
conceptually wrong. Better as sizeof(char) * (size_of_file 1)
or just size_of_file 1u
.
Some rough alternative code - unchecked.
// Let calling code open the file
// Return 1 on success.
// Return 0 on no-find.
// Return -1 on other failures.
int search_for_word(const char *word, FILE *inf) {
if (inf == 0) {
return -1;
}
size_t length_word = strlen(word);
if (length_word >= SIZE_MAX / 2) {
return -1; // TBD code to handle this extreme case
}
size_t buf_size = 4096; // Adjust as desired
if (buf_size <= length_word * 2) {
buf_size = length_word * 2 1;
}
char *buf = malloc(buf_size);
if (buf == NULL) {
return -1;
}
char *in = buf;
size_t in_length = 0;
for (;;) {
size_t length_read = fread(in, 1, buf_size, inf);
in[length_read] = '\0';
if (strstr(buf, word)) {
free(buf);
return 1;
}
if (length_read < buf_size) { // no more data expected
free(buf);
return 0;
}
// Copy last portion of buffer to the beginning.
in_length = length_read;
memmove(buf, &buf[in_length - length_word], length_word);
in_length = length_word;
in = buf in_length;
}
}