when we download some same files on Internet, the filename becomes (2), (3)...
I want to remove these files with C. First of all, I want to find files and print. I write some code blow. But It doesn't work.
int main(){
const char *path;
DIR *dir;
struct dirent* entry;
if((path=getenv("HOME"))==NULL){//get HOME path
path = getpwuid(getuid())->pw_dir;
}
const char *downloads = "/Downloads";
strcat(path,downloads); //make ~/Downloads
if(chdir(path)!=0){
perror("chdir()");
return -1;
}
if((dir=opendir(path))==NULL){ //open directory
perror("open");
return 1;
}
while((entry=readdir(dir))!=NULL){
struct dirent *cmpentry;
DIR *cmpdir;
if((cmpdir=opendir(path))==NULL){
perror("opendir");
return -1;
}
while((cmpentry=readdir(cmpdir))!=NULL){
if((entry->d_name[0]!='.')&&strcmp(entry->d_name,cmpentry->d_name)!=0){
char *ptr=strstr(cmpentry->d_name,entry->d_name);
if(ptr!=NULL)
printf("%s\n",cmpentry->d_name);
}
}
}
}
How can i fix it?
CodePudding user response:
readdir() reads files not like ls but by order its places in directory. There is working variant of your program, but it works wrong, not how you want. Please correct it itself.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/types.h>
#include <dirent.h>
#include <errno.h>
#include <unistd.h>
#include <pwd.h>
int main(){
int m;
char path[256],downloads[256],substr[256],buf[160],*ptr;
DIR *dir,*cmpdir;
struct dirent entry,cmpentry,*pe;
strcpy(path,getenv("HOME"));
if(path==NULL){//get HOME path
strcpy(path,getpwuid(getuid())->pw_dir);
}
strcpy(downloads,"/Downloads");
// strcpy(downloads,"/tmp/down");
strcat(path,downloads);errno=0; //make ~/Downloads
if(chdir(path)!=0){
m = errno;strcpy(buf,strerror(m));fprintf(stdout,"%d %s\n",m,buf);
return -1;
}
errno=0;
if((dir=opendir(path))==NULL){ //open directory
m=errno;strcpy(buf,strerror(m));fprintf(stdout,"%d %s\n",m,buf);
return 1;
}
while((pe=readdir(dir))!=NULL){entry=*pe;
errno=0;if((cmpdir=opendir(path))==NULL){m=errno;
strcpy(buf,strerror(m));fprintf(stdout,"%d %s\n",m,buf);
return -1;
}
}
while((pe=readdir(cmpdir))!=NULL){cmpentry=*pe;
if((entry.d_name[0]!='.')&&(strncmp(entry.d_name,"..",2)!=0)
&&(strcmp(entry.d_name,cmpentry.d_name)!=0)){
fprintf(stdout,"%s %s\n",entry.d_name,cmpentry.d_name);fflush(stdout);
ptr=strstr(cmpentry.d_name,entry.d_name);
if(ptr!=NULL){strcpy(substr,ptr);
fprintf(stdout,"%s\n",cmpentry.d_name);
}
}
}
return 0;}
CodePudding user response:
A number of issues ...
path
does not have enough space for thestrcat
, so you have UB (undefined behavior)- No need to use
chdir
- No
closedir
calls, so for a large directory, you'll run out of file descriptors. - No skip of
.
and..
entries - Using
strcmp
andstrstr
is not sufficient. Duplicates and/or misses. - Opening the same directory repeatedly is slow/wasteful. Better read the directory once and save the entries in an array.
Some fixes:
- Capture the data in an array
- Use an auxiliary struct (e.g.
struct root
below) that splits up the filenames into component parts (e.g.foo(1).pdf
-->foo
,(1)
, and.pdf
) - Added comparison of lengths and file contents
Here is the original code, annotated with the bugs:
int
main()
{
const char *path;
DIR *dir;
struct dirent *entry;
// get HOME path
if ((path = getenv("HOME")) == NULL) {
path = getpwuid(getuid())->pw_dir;
}
const char *downloads = "/Downloads";
// make ~/Downloads
// NOTE/BUG: not enough space in path
// NOTE/BUG: path is a const
strcat(path, downloads);
// NOTE/BUG: no need to chdir as opendir is enough
if (chdir(path) != 0) {
perror("chdir()");
return -1;
}
// open directory
// NOTE/BUG: no closedir for this
if ((dir = opendir(path)) == NULL) {
perror("open");
return 1;
}
while ((entry = readdir(dir)) != NULL) {
// NOTE/BUG: no check for "." or ".."
struct dirent *cmpentry;
DIR *cmpdir;
// NOTE/BUG: no closedir for this
if ((cmpdir = opendir(path)) == NULL) {
perror("opendir");
return -1;
}
while ((cmpentry = readdir(cmpdir)) != NULL) {
// NOTE/BUG: strcmp sense is inverted
// NOTE/BUG: strcmp wrong
if ((entry->d_name[0] != '.') &&
strcmp(entry->d_name, cmpentry->d_name) != 0) {
char *ptr = strstr(cmpentry->d_name, entry->d_name);
if (ptr != NULL)
printf("%s\n", cmpentry->d_name);
}
}
}
}
In the above code, I've used cpp
conditionals to denote old vs. new code:
#if 0
// old code
#else
// new code
#endif
#if 1
// new code
#endif
Note: this can be cleaned up by running the file through unifdef -k
Here is the refactored code. It is annotated:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <pwd.h>
#include <string.h>
#include <dirent.h>
#include <ctype.h>
#include <sys/stat.h>
#ifdef DEBUG
#define dbgprt(_fmt...) \
fprintf(stderr,_fmt)
#else
#define dbgprt(_fmt...) \
do { } while (0)
#endif
// filename parsing control
struct root {
struct dirent root_ent; // raw directory entry
off_t root_size; // file size
int root_paren; // 1=has "(1)"
int root_dup; // 1=is a duplicate
char *root_suf; // suffix/entension (e.g. ".pdf")
char root_core[256]; // root/core/base name
};
// rootshow -- show root struct contents
void
rootshow(const struct root *root,const char *who)
{
dbgprt("rootshow: d_name='%s' root_dup=%d root_paren=%d root_core='%s' root_suf='%s' (from %s)\n",
root->root_ent.d_name,
root->root_dup,root->root_paren,
root->root_core,root->root_suf,who);
}
// rootof -- split up filenames into components
void
rootof(struct root *root,struct dirent *ent,off_t size)
{
char tail[256];
memset(root,0,sizeof(*root));
do {
// get directory entry
root->root_ent = *ent;
// remember the file size
root->root_size = size;
// get the filename
strcpy(tail,ent->d_name);
// remember and strip the extension
char *dot = strrchr(tail,'.');
if (dot != NULL) {
root->root_suf = &ent->d_name[dot - tail];
*dot = 0;
}
// get root/base (e.g. "foo.pdf" --> "foo")
strcpy(root->root_core,tail);
// rightmost part of file must be "(1)"
char *rparen = &tail[strlen(tail) - 1];
if (*rparen != ')')
break;
// assume it's of the correct form
root->root_paren = 1;
// look for "(" and ensure it has some digits
char *lparen = rparen - 1;
for (; lparen >= tail; --lparen) {
if (*lparen == '(')
break;
if (! isdigit(*lparen)) {
root->root_paren = 0;
break;
}
}
// we got something like "X)" (i.e. _not_ "(1)")
if (! root->root_paren)
break;
// assume it's _not_ a match
root->root_paren = 0;
// we got something like "()"
if ((lparen 1) == rparen)
break;
// we must have the "("
if (lparen < tail)
break;
if (*lparen != '(')
break;
// strip "(1)"
*lparen = 0;
root->root_paren = 1;
strcpy(root->root_core,tail);
} while (0);
#if DEBUG
rootshow(root,"rootof");
#endif
}
// fullpath -- get full path (e.g. dir/tail)
void
fullpath(char *path,const char *dir,const char *tail)
{
strcpy(path,dir);
strcat(path,"/");
strcat(path,tail);
}
// dirload -- load up directory into list
struct root *
dirload(const char *path,int *countp)
{
char file[1024];
struct root *list = NULL;
int count = 0;
int cap = 0;
// open directory
DIR *dirp = opendir(path);
if (dirp == NULL) {
perror("open");
exit(1);
}
while (1) {
struct dirent *ent = readdir(dirp);
if (ent == NULL)
break;
// skip over "." and ".."
const char *tail = ent->d_name;
if (tail[0] == '.') {
if (tail[1] == 0)
continue;
if ((tail[1] == '.') && (tail[2] == 0))
continue;
}
// optional -- only ordinary files
#if 1
if (ent->d_type != DT_REG)
continue;
#endif
// enlarge array
if (count >= cap) {
cap = 10;
list = realloc(list,sizeof(*list) * cap);
if (list == NULL) {
perror("realloc");
exit(1);
}
}
// get file size
struct stat st;
fullpath(file,path,ent->d_name);
if (stat(file,&st) < 0) {
perror(file);
exit(1);
}
// parse the filename
rootof(&list[count],ent,st.st_size);
count;
}
closedir(dirp);
// return count to caller
*countp = count;
return list;
}
// filematch -- compare the file contents
// RETURNS: 1=match, 0=mismatch
int
filematch(const char *dir,const struct root *lhs,const struct root *rhs)
{
int fdlhs;
char lhsfile[1024];
char lhsbuf[4096];
int fdrhs;
char rhsfile[1024];
char rhsbuf[4096];
int match = 0;
do {
// file sizes must match
if (lhs->root_size != rhs->root_size)
break;
// open the LHS file
fullpath(lhsfile,dir,lhs->root_ent.d_name);
fdlhs = open(lhsfile,O_RDONLY);
if (fdlhs < 0) {
perror(lhsfile);
exit(1);
}
// open the RHS file
fullpath(rhsfile,dir,rhs->root_ent.d_name);
fdrhs = open(rhsfile,O_RDONLY);
if (fdrhs < 0) {
perror(rhsfile);
exit(1);
}
match = 1;
off_t resid = lhs->root_size;
ssize_t rlen;
ssize_t xlen;
for (; resid > 0; resid -= rlen) {
if (resid > sizeof(lhsbuf))
rlen = sizeof(lhsbuf);
else
rlen = resid;
// get LHS chunk
xlen = read(fdlhs,lhsbuf,rlen);
if (xlen != rlen) {
perror(lhsfile);
exit(1);
}
// get RHS chunk
xlen = read(fdrhs,rhsbuf,rlen);
if (xlen != rlen) {
perror(rhsfile);
exit(1);
}
// they must match
if (memcmp(lhsbuf,rhsbuf,rlen) != 0) {
match = 0;
break;
}
}
close(fdlhs);
close(fdrhs);
} while (0);
return match;
}
int
main(int argc,char **argv)
{
char path[1024];
// skip over program name
--argc;
argv;
// find the directory
do {
if (argc > 0) {
strcpy(path,*argv);
break;
}
// get HOME path
const char *home = getenv("HOME");
if (home == NULL)
home = getpwuid(getuid())->pw_dir;
// make ~/Downloads
fullpath(path,home,"Downloads");
} while (0);
#if DEBUG
setlinebuf(stdout);
setlinebuf(stderr);
#endif
int count = 0;
struct root *list = dirload(path,&count);
for (int lhsidx = 0; lhsidx < count; lhsidx) {
struct root *lhs = &list[lhsidx];
// must _not_ have "(1)"
if (lhs->root_paren)
continue;
rootshow(lhs,"LHS");
for (int rhsidx = 0; rhsidx < count; rhsidx) {
// skip over the same entry
if (rhsidx == lhsidx)
continue;
struct root *rhs = &list[rhsidx];
rootshow(rhs,"RHS");
// file types must match
if (rhs->root_ent.d_type != lhs->root_ent.d_type)
continue;
// must have "(1)"
if (! rhs->root_paren)
continue;
// suffix must match
// both entries must have [or _not_ have] a suffix
if (lhs->root_suf != NULL) {
if (rhs->root_suf == NULL)
continue;
if (strcmp(lhs->root_suf,rhs->root_suf) != 0)
continue;
}
else {
if (rhs->root_suf != NULL)
continue;
}
// core must match
if (strcmp(lhs->root_core,rhs->root_core) != 0)
continue;
// contents must match
if (! filematch(path,lhs,rhs))
continue;
printf("%s is dup of %s\n",
rhs->root_ent.d_name,lhs->root_ent.d_name);
// mark it as a removable duplicate
rhs->root_dup = 1;
}
}
return 0;
}
Here is a test perl
script:
#!/usr/bin/perl
# dotest -- test program
master(@ARGV);
exit(0);
# master -- master control
sub master
{
my(@argv) = @_;
$xfile = shift(@argv);
$xfile //= "duptest";
$pwd = $ENV{PWD};
$xfile = "$pwd/$xfile";
$tstdir = "/tmp/testdir";
dotest("abc","xyz");
dotest("abc.pdf","jkl");
dotest("abc(1).pdf","jkl");
dotest("abc(2)","xyz");
dotest("abc(3)","xx");
dotest("abc(3)","xzy");
dotest("def","blah");
dotest("def(3)","blah");
dotest("def.pdf","blah");
}
sub dotest
{
my($file,$body) = @_;
printf("\n");
printf("%s\n","-" x 80);
system("rm -fr $tstdir");
system("mkdir -p $tstdir");
push(@allfiles,[$file,$body]);
###@rfiles = shuffle(@allfiles);
@rfiles = @allfiles;
foreach $pair (@rfiles) {
($tail,$body) = @$pair;
printf("dotest: FILE %s '%s'\n",$tail,$body);
$file = sprintf("%s/%s",$tstdir,$tail);
open($xfdst,">$file") or
die("dotest: unable to open '$file' -- $!\n");
print($xfdst $body);
close($xfdst);
}
@fsort = sort(@allfiles);
@xfiles = (`$xfile $tstdir`);
$code = $? >> 8;
die("dotest: program aborted\n")
if ($code);
foreach $tail (@xfiles) {
chomp($tail);
printf("dotest: XDUP %s\n",$tail);
}
}
Here is the output of the test program:
--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: XDUP abc(1).pdf is dup of abc.pdf
--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: FILE abc(2) 'xyz'
dotest: XDUP abc(1).pdf is dup of abc.pdf
dotest: XDUP abc(2) is dup of abc
--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: FILE abc(2) 'xyz'
dotest: FILE abc(3) 'xx'
dotest: XDUP abc(1).pdf is dup of abc.pdf
dotest: XDUP abc(2) is dup of abc
--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: FILE abc(2) 'xyz'
dotest: FILE abc(3) 'xx'
dotest: FILE abc(3) 'xzy'
dotest: XDUP abc(1).pdf is dup of abc.pdf
dotest: XDUP abc(2) is dup of abc
--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: FILE abc(2) 'xyz'
dotest: FILE abc(3) 'xx'
dotest: FILE abc(3) 'xzy'
dotest: FILE def 'blah'
dotest: XDUP abc(1).pdf is dup of abc.pdf
dotest: XDUP abc(2) is dup of abc
--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: FILE abc(2) 'xyz'
dotest: FILE abc(3) 'xx'
dotest: FILE abc(3) 'xzy'
dotest: FILE def 'blah'
dotest: FILE def(3) 'blah'
dotest: XDUP def(3) is dup of def
dotest: XDUP abc(1).pdf is dup of abc.pdf
dotest: XDUP abc(2) is dup of abc
--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: FILE abc(2) 'xyz'
dotest: FILE abc(3) 'xx'
dotest: FILE abc(3) 'xzy'
dotest: FILE def 'blah'
dotest: FILE def(3) 'blah'
dotest: FILE def.pdf 'blah'
dotest: XDUP def(3) is dup of def
dotest: XDUP abc(1).pdf is dup of abc.pdf
dotest: XDUP abc(2) is dup of abc