I am working on a program written in C that recursively walks a given directory in order to print out the Nth largest files and their sizes in bytes. I am using two arrays to account for the filesystem entry names and filesystem entry sizes respectively.
EDIT: I have updated my program to implement the suggestions shared in the comment section. I have also provided a full program instead of a 'bunch of snippets' as suggested in a comment, but this is becoming quite a bit of code. My focus now is on correctly implementing a swap operation within my iSort
function.
#include <stdio.h>
#include <dirent.h>
#include <sys/stat.h>
#include <string.h>
// number of files to display size information for
const int N = 10;
// a struct used to hold filesystem entry names and corresponding sizes in bytes
struct info {
char name[1024];
int size;
};
/* A simple implementation of insertion sort that will operate upon
an array of info structs, sorting them by their member size in
ascending order */
void iSort(struct info *fs_info[], int size, int info_size)
{
int i, j, key;
for (i = 1; i < size; i )
{
key = fs_info[i]->size;
j = i - 1;
while (j >= 0 && fs_info[j]->size > key)
{
printf("info_size: %d\n", info_size);
// TODO complete a swap operation
memmove(fs_info[j 1], fs_info[j], info_size);
j = j - 1;
}
fs_info[j 1]->size = key;
}
}
void get_size(char *path, struct info fs_info[N], int info_size)
{
static int items_added = 0;
static int max_size = 0;
struct stat st;
if (stat(path, &st) == 0)
{
if (items_added < N) // if array capacity will not be exceeded
{
strcpy(fs_info[items_added].name, path);
fs_info[items_added].size = st.st_size;
if (st.st_size > max_size)
max_size = st.st_size;
items_added ;
}
else
{
// do a comparison to determine where to insert
// sort first
iSort(&fs_info, 10, info_size); // this function call results in a seqfault
}
}
else
{
printf("Error getting stat for entry %s: %d\n", path, stat(path, &st));
}
}
void walk(const char *currDir, struct info fs_info[N], int info_size)
{
DIR *dir = opendir(currDir);
struct dirent *entry;
if (dir == NULL)
{
// directory could not be opened
return;
}
while ((entry = readdir(dir)) != NULL)
{
if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0)
{
// if directory is current dir or parent dir
continue;
}
char path_to_entry[1024];
snprintf(path_to_entry, sizeof(path_to_entry), "%s/%s", currDir, entry->d_name);
// use path_to_entry to call stats on the entry
get_size(path_to_entry, fs_info, info_size);
if (entry->d_type == DT_DIR)
{
// recursively visit subdirectories
walk(path_to_entry, fs_info, info_size);
}
}
closedir(dir);
}
int main(int argc, char *argv[])
{
if (argc != 2)
{
printf("Usage: %s <target directory>\n", argv[0]);
}
const char *target_dir = argv[1];
struct info fs_entries[N];
const int info_size = sizeof(struct info);
for (int i = 0; i < N; i )
{
strcpy(fs_entries[i].name, "");
fs_entries[i].size = 0;
}
printf("Finding %d largest files in: %s\n", N, target_dir);
walk(target_dir, fs_entries, info_size);
for (int i = 0; i < N; i )
{
printf("%s : %d\n", fs_entries[i].name, fs_entries[i].size);
}
return 0;
}
Currently, the memmove()
invocation in iSot()
results in a EXC_BAD_ACCESS
error, I am working on improving this function now. Any suggestions in the interim are appreciated. Thanks also to those who commented on this question earlier.
CodePudding user response:
This is not so much an answer as it is an example of how one might code this with a bit less fiddling about with every bit/byte.
I don't run a flavour of UNIX, so this offering is untested and may contain typos and even bugs. I hope not.
#include <stdio.h> // From 'generic' to 'specific'
#include <string.h>
#include <sys/stat.h>
#include <dirent.h>
const int N = 10;
// Global array holding names and sizes
struct {
char name[ MAX_PATH ];
size_t size;
} biggies[ N ], wrk; // and a working buffer.
/* "Global" is bad for large projects.
* In this 'utility' program, global saves a LOT of typing/reading.
* Seek clarity, not conformity,
*
* "wrk" is used to buffer ALL paths encountered.
* Notice that each recursion is an EXTENSION of its parent.
* ONE working buffer to deal with.
*/
void walk() {
size_t len = strlen( wrk.name ); // The path so far...
DIR *dir;
if( ( dir = opendir( wrk.name ) ) == NULL )
return;
wrk.name[ len ] = '/'; // append a slash ahead of strcpy() below
struct dirent *entry;
while( ( entry = readdir( dir ) ) != NULL ) {
if( strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0 )
continue;
// Notice how each 'local' name is written to "the right spot" in one buffer
strcpy( wrk.name len, entry->d_name );
if( entry->d_type == DT_DIR ) // directory, so recursion...
walk();
else {
struct stat st;
if( stat( wrk.name, &st ) != 0 ) {
fprintf( stderr, "Error stat'ing '%s'\n", wrk.name );
exit( EXIT_FAILURE );
}
// Add this info to working buffer.
wrk.size = st.st_size;
// Find where to 'insert' this (if it is larger than descending ordered list
for( int i = 0; i < N && biggies[i].size > wrk.size; i ) {} // loop
if( i < N ) {
// Slide smaller ones down (don't go out of bounds)
memmove( biggies[i 1], biggies[i], (N-i-1) * sizeof biggies[0] );
// Copy this one in place.
memcpy( biggies[i], &wrk, sizeof biggies[0] );
}
}
}
closedir(dir);
}
int main( int argc, char *argv[]) {
char *target_dir = argv[1]; // This is okay, so far...
if( argc != 2 ) {
printf( "Usage: %s <target directory>\n", argv[0] );
puts( "Using current directory..." );
target_dir = ".";
}
strcpy( wrk.name, target_dir );
printf( "Finding %d largest files in: %s\n", N, wrk.name );
walk();
for( size_t i = 0; i < N && biggies[i].size != 0; i )
printf( "%s : %d\n", biggies[i].name, biggies[i].size);
return 0;
}
CodePudding user response:
You don't need to pass the address of fs_info
to iSort()
function. fs_info
is a pointer to first element of fs_entries
array, which is enough to sort the array if the size of array is known. Also, you don't need to pass the size of element of array to iSort()
.
iSort()
function implementation:
void iSort(struct info *fs_info, int size) {
for (int i = 1; i < size; i) {
int key = fs_info[i].size;
struct info x = fs_info[i];
int j = i - 1;
while (j >= 0 && fs_info[j].size > key) {
fs_info[j 1] = fs_info[j];
j--;
}
fs_info[j 1] = x;
}
}
Use memmove()
in iSort()
:
void iSort (struct info *fs_info, int size) {
for (int i = 1; i < size; i) {
int key = fs_info[i].size;
struct info x = fs_info[i];
int j = i - 1;
while (j >= 0 && fs_info[j].size > key) {
j--;
}
if (j != i - 1) {
memmove (&fs_info[j 2], &fs_info[j 1], sizeof(*fs_info) * (i - j - 1));
}
memmove (&fs_info[j 1], &x, sizeof(*fs_info));
}
}
Call iSort()
function like this:
iSort(fs_info, N);
There is a lot of scope of improvement in your code, like, it would be good to have array of pointers to struct info
instead of array of struct info
, so that, during sort simply swapping the pointers required instead of swapping whole structure. Leaving it up to you to identify the improvements and implement them.