I'm trying to find the smallest and largest file in C with stat(). It's easy to initialize the counter i with 0 and then compare one by one but IDK how to initialize j. Hence the largest value can be found but not the smallest.
I know there is something wrong with the j part but I'm not sure how to initialize j to the first file and then get going. I feel there is other issues as well but IDK
Thanks.
EDIT So I added some lines as per David's suggestion but I seem to have some misunderstanding in stat used as buffer cause the both results show the largest value. Why does this happen?
EDIT2: Never mind I reentered the Linux environment it's fine now.
#include <stdio.h>
#include <dirent.h>
#include <sys/stat.h>
#include <time.h>
#include <limits.h>
int main(int argc, char* argv[]){
DIR *dp;
struct dirent *dirp;
struct stat filestat;
struct stat statbuf;
struct stat statbuf1; //unsure if I should use another buffer for the min
char*large_name;
char*small_name;
int max = INT_MIN;
int min = INT_MAX;
if(argc > 2){
printf("Please enter ONE dir parameter\n");
return 1;
}
else if(argc==1){
dp = opendir("./");
}
else{
dp = opendir(argv[1]);}
if (dp==NULL){
printf("Can't open dir.\n");
return 1;
}
while ((dirp = readdir(dp)) != NULL) {
stat(dirp->d_name,&filestat);
if(S_ISREG(filestat.st_mode)){
stat(dirp->d_name,&statbuf);
//stat(dirp->d_name,&statbuf1);
if(statbuf.st_size > max) {
max = (statbuf.st_size);
large_name = (dirp->d_name);
}
if(statbuf.st_size < min) {
min = (statbuf.st_size);
small_name = (dirp->d_name);
}
}
}
// Close directory and exit.
printf("the largest file is %s\n",large_name);
printf("the smallest file is %s\n",small_name);
closedir (dp);
return 0;
}
CodePudding user response:
Since st_size
is typically a 64-bit integer, you should use:
int64_t min = INT64_MAX, max = INT64_MIN;
Also, dirp
is dynamic (changes on every iteration of the loop), including what dirp->dname
points to, so that your file names will always display as the last entry in the directory, regardless of its size.. Instead of copying the pointer to small_name
and large_name
, you should strcpy
them:
char large_name[256], small_name[256];
:
:
strcpy(large_name,dirp->d_name);
:
:
strcpy(large_name,dirp->d_name);
CodePudding user response:
In addition to the storage needed for large_name
and small_name
of PATH_MAX
characters to ensure the longest PATH and filename will fit, you need to form the complete relative filename before calling stat()
. (PATH_MAX
is 4096
on Linux and I believe 512
on windows) For example if you pass a directory to search to your program, calling stat()
with dirp->d_name
only provides the "name" not "../path/name" from the current location.
This can be done by storing the path to search (say in srchdir[PATH_MAX]
) and then combining srchdir
with a path separator (e.g. '/'
) and dirp->d_name
using sprintf()
. You can simply define a PATHSEP
, e.g.
#define PATHSEP "/" /* path component separator */
Then at the beginning of your code, fill srchdir
with the path provided or "."
by default, trimming any trailing '/'
from the directory name provided, e.g.
char srchdir[PATH_MAX]; /* to provide full/relative path to stat */
...
if (argc > 1) { /* set srchdir name (remove trailing '/') */
char *end;
size_t len;
strcpy (srchdir, argv[1]); /* copy argv[1] to srchdir */
len = strlen (srchdir); /* get length */
end = strrchr (srchdir, '/'); /* get pointer to last '/' */
/* if '/' found and at last char in string */
if (end && ((size_t)(end - srchdir 1) == len))
*end = 0; /* overwrite w/nul-terminating char */
}
else
strcpy (srchdir, "."); /* otherwise, use '.' */
dp = opendir (srchdir); /* opendir */
Now to combine srchdir
with PATHSEP
and dirp->d_name
, you can simply use sprintf()
, e.g.
while ((dirp = readdir(dp))) { /* for each file */
char fname[PATH_MAX]; /* storage for srchdir/d_name */
/* combine srchdir PATHSEP dirp->d_name in fname */
sprintf (fname, "%s%s%s", srchdir, PATHSEP, dirp->d_name);
if (stat (fname, &filestat) == -1) { /* stat fname */
perror ("stat");
continue;
}
...
That way no matter what directory you provide, stat
will have the correct path information.
An example of one approach to your code would be:
#include <stdio.h>
#include <sys/types.h> /* for opendir/closedir/readdir */
#include <dirent.h>
#include <sys/stat.h> /* with sys/types.h for stat */
#include <unistd.h>
#include <limits.h> /* max/min macros */
#include <inttypes.h> /* includes stdint.h & provides PRI macros */
#include <string.h> /* for strcpy */
#define PATHSEP "/" /* path component separator */
int main (int argc, char *argv[]){
struct dirent *dirp;
struct stat filestat;
char srchdir[PATH_MAX]; /* to provide full/relative path to stat */
char large_name[PATH_MAX]; /* you must provide storage for names */
char small_name[PATH_MAX]; /* the pointer in stat isn't permanent */
int64_t max = INT64_MIN; /* .st_size is type off_t (signed int val) */
int64_t min = INT64_MAX;
uint64_t nfiles = 0;
DIR *dp;
if (argc > 1) { /* set srchdir name (remove trailing '/') */
char *end;
size_t len;
strcpy (srchdir, argv[1]); /* copy argv[1] to srchdir */
len = strlen (srchdir); /* get length */
end = strrchr (srchdir, '/'); /* get pointer to last '/' */
/* if '/' found and at last char in string */
if (end && ((size_t)(end - srchdir 1) == len))
*end = 0; /* overwrite w/nul-terminating char */
}
else
strcpy (srchdir, "."); /* otherwise, use '.' */
dp = opendir (srchdir); /* opendir */
if (!dp) { /* validate directory open */
perror ("opendir"); /* errno set on failure, dump error */
return 1;
}
while ((dirp = readdir(dp))) { /* for each file */
char fname[PATH_MAX]; /* storage for srchdir/d_name */
/* combine srchdir PATHSEP dirp->d_name in fname */
sprintf (fname, "%s%s%s", srchdir, PATHSEP, dirp->d_name);
if (stat (fname, &filestat) == -1) { /* stat fname */
perror ("stat");
continue;
}
if (S_ISREG (filestat.st_mode)) { /* regular file */
if (filestat.st_size > max) { /* check/set max */
max = (filestat.st_size);
strcpy (large_name, fname); /* copy large_name */
}
if (filestat.st_size < min) { /* check/set min */
min = (filestat.st_size);
strcpy (small_name, fname); /* copy small_name */
}
nfiles = 1;
}
}
/* you only need 1 printf for any continual block of output
* no matter how many lines of output that is.
*/
printf ("searched %" PRIu64 " files\n"
"the largest file is [%" PRIu64 " bytes] %s\n"
"the smallest file is [%" PRIu64 " bytes] %s\n",
nfiles, max, large_name, min, small_name);
closedir (dp);
}
If compiling on Linux with gcc, you need to #define _GNU_SOURCE
or pass the define in the compile string with -D_GNU_SOURCE
.
Example Use/Output
Using the default directory:
$ ./bin/readdir_max_min
searched 3473 files
the largest file is [176346 bytes] ./hello_world.bmp
the smallest file is [0 bytes] ./myfile.name
Passing a directory to your code:
$ ./bin/readdir_max_min ~/dev/src-cpp/tmp/debug/
searched 938 files
the largest file is [243643 bytes] /home/david/dev/src-cpp/tmp/debug/2021-04-29-042733_838x822_scrot.png
the smallest file is [0 bytes] /home/david/dev/src-cpp/tmp/debug/outfile.txt
Note: since the code looks for and removes any trailing '/'
from the end of the path the user provides, it doesn't matter if the use provides:
$ ./bin/readdir_max_min ~/dev/src-cpp/tmp/debug/
or
$ ./bin/readdir_max_min ~/dev/src-cpp/tmp/debug
Also note, the reason large_name = (dirp->d_name);
fails is the d_name
buffer used by readdir
isn't memory you have control over. It can be statically declared and overwritten with each new filename processed by readdir
. That's why you must copy the actual name when each new max
or min
is found.
Look things over and let me know if you have further questions.