Home > other >  Find the largest and smallest file with one loop in C?
Find the largest and smallest file with one loop in C?

Time:02-17

I'm trying to find the smallest and largest file in C with stat(). It's easy to initialize the counter i with 0 and then compare one by one but IDK how to initialize j. Hence the largest value can be found but not the smallest.

I know there is something wrong with the j part but I'm not sure how to initialize j to the first file and then get going. I feel there is other issues as well but IDK

Thanks.

EDIT So I added some lines as per David's suggestion but I seem to have some misunderstanding in stat used as buffer cause the both results show the largest value. Why does this happen?

EDIT2: Never mind I reentered the Linux environment it's fine now.

 #include <stdio.h>
#include <dirent.h>
#include <sys/stat.h>
#include <time.h>
#include <limits.h>


int main(int argc, char* argv[]){

        DIR *dp;
        struct dirent *dirp;
        struct stat filestat;
        struct stat statbuf;
        struct stat statbuf1; //unsure if I should use another buffer for the min
        char*large_name;
        char*small_name;
        int max = INT_MIN;
        int min = INT_MAX;

        if(argc > 2){
                printf("Please enter ONE dir parameter\n");
                return 1;
        }
        else if(argc==1){
                dp = opendir("./");
        }
        else{
                dp = opendir(argv[1]);}




        if (dp==NULL){
                printf("Can't open dir.\n");
                return 1;
        }




        while ((dirp = readdir(dp)) != NULL) {
                
                stat(dirp->d_name,&filestat);

                if(S_ISREG(filestat.st_mode)){
                        stat(dirp->d_name,&statbuf);
                        //stat(dirp->d_name,&statbuf1);
                        if(statbuf.st_size > max) {
                                max = (statbuf.st_size);
                                large_name = (dirp->d_name);
                                
                        }
                        
                        if(statbuf.st_size < min) {
                                min = (statbuf.st_size);
                                small_name = (dirp->d_name);
                                
                        }
                

                }

        }
        // Close directory and exit.

        printf("the largest file is %s\n",large_name);
        printf("the smallest file is %s\n",small_name);
        closedir (dp);
        return 0;

}

CodePudding user response:

Since st_size is typically a 64-bit integer, you should use:

int64_t min = INT64_MAX, max = INT64_MIN;

Also, dirp is dynamic (changes on every iteration of the loop), including what dirp->dname points to, so that your file names will always display as the last entry in the directory, regardless of its size.. Instead of copying the pointer to small_name and large_name, you should strcpy them:

char large_name[256], small_name[256];
    :
    :
strcpy(large_name,dirp->d_name);
    :
    :
strcpy(large_name,dirp->d_name);

CodePudding user response:

In addition to the storage needed for large_name and small_name of PATH_MAX characters to ensure the longest PATH and filename will fit, you need to form the complete relative filename before calling stat(). (PATH_MAX is 4096 on Linux and I believe 512 on windows) For example if you pass a directory to search to your program, calling stat() with dirp->d_name only provides the "name" not "../path/name" from the current location.

This can be done by storing the path to search (say in srchdir[PATH_MAX]) and then combining srchdir with a path separator (e.g. '/') and dirp->d_name using sprintf(). You can simply define a PATHSEP, e.g.

#define PATHSEP "/"       /* path component separator */

Then at the beginning of your code, fill srchdir with the path provided or "." by default, trimming any trailing '/' from the directory name provided, e.g.

  char srchdir[PATH_MAX];       /* to provide full/relative path to stat */
  ...
  if (argc > 1) {               /* set srchdir name (remove trailing '/') */
    char *end;
    size_t len;
    
    strcpy (srchdir, argv[1]);        /* copy argv[1] to srchdir */
    len = strlen (srchdir);           /* get length */
    end = strrchr (srchdir, '/');     /* get pointer to last '/' */
    /* if '/' found and at last char in string */
    if (end && ((size_t)(end - srchdir   1) == len))
      *end = 0;                       /* overwrite w/nul-terminating char */
  }
  else
    strcpy (srchdir, ".");            /* otherwise, use '.' */
  
  dp = opendir (srchdir);       /* opendir */

Now to combine srchdir with PATHSEP and dirp->d_name, you can simply use sprintf(), e.g.

  while ((dirp = readdir(dp))) {      /* for each file */
    
    char fname[PATH_MAX];             /* storage for srchdir/d_name */
    
    /* combine srchdir PATHSEP dirp->d_name in fname */
    sprintf (fname, "%s%s%s", srchdir, PATHSEP, dirp->d_name);
    
    if (stat (fname, &filestat) == -1) {  /* stat fname */
      perror ("stat");
      continue;
    }
    ...

That way no matter what directory you provide, stat will have the correct path information.

An example of one approach to your code would be:

#include <stdio.h>

#include <sys/types.h>    /* for opendir/closedir/readdir */
#include <dirent.h>

#include <sys/stat.h>     /* with sys/types.h for stat */
#include <unistd.h>

#include <limits.h>       /* max/min macros */

#include <inttypes.h>     /* includes stdint.h & provides PRI macros */
#include <string.h>       /* for strcpy */

#define PATHSEP "/"       /* path component separator */

int main (int argc, char *argv[]){

  struct dirent *dirp;
  struct stat filestat;
  char srchdir[PATH_MAX];       /* to provide full/relative path to stat */
  char large_name[PATH_MAX];    /* you must provide storage for names */
  char small_name[PATH_MAX];    /* the pointer in stat isn't permanent */
  int64_t max = INT64_MIN;      /* .st_size is type off_t (signed int val) */
  int64_t min = INT64_MAX;
  uint64_t nfiles = 0;
  DIR *dp;

  if (argc > 1) {               /* set srchdir name (remove trailing '/') */
    char *end;
    size_t len;
    
    strcpy (srchdir, argv[1]);        /* copy argv[1] to srchdir */
    len = strlen (srchdir);           /* get length */
    end = strrchr (srchdir, '/');     /* get pointer to last '/' */
    /* if '/' found and at last char in string */
    if (end && ((size_t)(end - srchdir   1) == len))
      *end = 0;                       /* overwrite w/nul-terminating char */
  }
  else
    strcpy (srchdir, ".");            /* otherwise, use '.' */
  
  dp = opendir (srchdir);       /* opendir */
  
  if (!dp) {                    /* validate directory open */
    perror ("opendir");         /* errno set on failure, dump error */
    return 1;
  }

  while ((dirp = readdir(dp))) {      /* for each file */
    
    char fname[PATH_MAX];             /* storage for srchdir/d_name */
    
    /* combine srchdir PATHSEP dirp->d_name in fname */
    sprintf (fname, "%s%s%s", srchdir, PATHSEP, dirp->d_name);
    
    if (stat (fname, &filestat) == -1) {  /* stat fname */
      perror ("stat");
      continue;
    }
    

    if (S_ISREG (filestat.st_mode)) {     /* regular file */
      if (filestat.st_size > max) {       /* check/set max */
        max = (filestat.st_size);
        strcpy (large_name, fname);       /* copy large_name */
      }
      
      if (filestat.st_size < min) {       /* check/set min */
        min = (filestat.st_size);
        strcpy (small_name, fname);       /* copy small_name */
              
      }
      
      nfiles  = 1;
    }
  }

  /* you only need 1 printf for any continual block of output
   * no matter how many lines of output that is.
   */
  printf ("searched %" PRIu64 " files\n"
          "the largest file is  [%" PRIu64 " bytes] %s\n"
          "the smallest file is [%" PRIu64 " bytes] %s\n", 
          nfiles, max, large_name, min, small_name);
  
  closedir (dp);
}

If compiling on Linux with gcc, you need to #define _GNU_SOURCE or pass the define in the compile string with -D_GNU_SOURCE.

Example Use/Output

Using the default directory:

$ ./bin/readdir_max_min
searched 3473 files
the largest file is  [176346 bytes] ./hello_world.bmp
the smallest file is [0 bytes] ./myfile.name

Passing a directory to your code:

$ ./bin/readdir_max_min ~/dev/src-cpp/tmp/debug/
searched 938 files
the largest file is  [243643 bytes] /home/david/dev/src-cpp/tmp/debug/2021-04-29-042733_838x822_scrot.png
the smallest file is [0 bytes] /home/david/dev/src-cpp/tmp/debug/outfile.txt

Note: since the code looks for and removes any trailing '/' from the end of the path the user provides, it doesn't matter if the use provides:

$ ./bin/readdir_max_min ~/dev/src-cpp/tmp/debug/

or

$ ./bin/readdir_max_min ~/dev/src-cpp/tmp/debug

Also note, the reason large_name = (dirp->d_name); fails is the d_name buffer used by readdir isn't memory you have control over. It can be statically declared and overwritten with each new filename processed by readdir. That's why you must copy the actual name when each new max or min is found.

Look things over and let me know if you have further questions.

  •  Tags:  
  • c
  • Related