Description:
I have created a small program that stores the name and checksum of a file in a struct, for each file in a directory. When output is written to stdout with printf, everything seems fine, but if we write to a file with either fputs
or fprintf
, values get overwritten, perhaps because of some buffer overflow?
Output from main with print.
Name: 2.txt. Checksum: fc769d448ed4e08bd855927bad2c8e43efdf5315a6daa9f28577758786d52eaf
Name: 1.txt. Checksum: 2d46cffd0302c5537ddb4952a9cca7d66060dafecd56fe3a7fe8e5e5cabbbbf9
Name: 3.txt. Checksum: 37bb2e5563e94eee68fac6b07501c44f018599482e897a626a94dd88053b4b7e
However, if we print the values of checksumMaps[0]
to a file,
the value checksumMaps[0].filename
gets overwritten (with the last 2 bytes of the checksum string) as seen by:
FILE *fp = fopen("mychecksums.txt", "w");
char formatted_bytes[32*2 1];
char *filename = checksumMaps[0].filename;
format_bytes(formatted_bytes, checksumMaps[0].checksum);
fputs(filename, fp);
fputs(formatted_bytes, fp);
// We print the value of `filename` again in order to see that it has been overwritten.
printf("%s \n", filename);
fclose(fp);
The program writes aftxt
to stdout instead of 2.txt
.
Using gdb, I can see that the value of filename
changes from 2.txt
to aftxt
after the line fputs(formatted_bytes, fp);
. What could be the reason for this?
Minimal Reproducible Example
ArchiveFile.h
typedef struct ArchiveFile{
char *uuid;
char *checksum;
char *relative_path;
int is_binary;
} ArchiveFile;
typedef struct file_content{
unsigned char* bytes;
unsigned long file_size;
} file_content;
void set_uuid(ArchiveFile *file, char* uuid);
char* get_absolute_path(ArchiveFile *file, char* root);
char* get_file_text(ArchiveFile *file, char* root);
void get_bytes(ArchiveFile *file, char* root, unsigned char *buffer, size_t fsize);
long get_file_size(ArchiveFile *file, char *root);
ArchiveFile.c
#include <sys/stat.h>
#include <stdlib.h>
#include <stdio.h>
#include "ArchiveFile.h"
#include <string.h>
void set_uuid(ArchiveFile* file, char* uuid){
file->uuid = uuid;
}
char* get_absolute_path(ArchiveFile *file, char* root){
/* Allocate space according to the relative path
the root path null terminating byte.*/
char* absolute_path = malloc(strlen(file->relative_path) strlen(root) 1);
// Add the root path.
strcpy(absolute_path, root);
// Concatonate the root with the rest of the path.
strcat(absolute_path, file->relative_path);
return absolute_path;
}
char* get_file_text(ArchiveFile *file, char* root){
char* absolute_path = get_absolute_path(file, root);
FILE *fp = fopen(absolute_path, "r");
if(fp == NULL)
printf("Could not open file %s \n", absolute_path);
// Platform independent way of getting the file size in bytes.
fseek(fp, 0, SEEK_END);
long fsize = ftell(fp);
fseek(fp, 0, SEEK_SET); /* same as rewind(f); */
char *buffer = malloc(fsize);
if(fp){
fread(buffer, sizeof(char), fsize, fp);
}
fclose(fp);
free(absolute_path);
return buffer;
}
void print_bytes2(unsigned char* md, size_t size){
for (size_t i = 0; i < size; i ) {
printf("x ", md[i]);
}
printf("\n");
}
void get_bytes(ArchiveFile *file, char *root, unsigned char *buffer, size_t fsize){
char* absolute_path = get_absolute_path(file, root);
FILE *fp = fopen(absolute_path, "rb");
if(fp){
fread(buffer, 1, fsize, fp);
}
free(absolute_path);
fclose(fp);
}
long get_file_size(ArchiveFile *file, char *root){
char* filepath = get_absolute_path(file, root);
FILE *fp = fopen(filepath, "rb");
fseek(fp, 0, SEEK_END);
long fsize = ftell(fp);
fseek(fp, 0, SEEK_SET); /* same as rewind(f); */
free(filepath);
fclose(fp);
return fsize;
}
checksum/checksum.h
// Used to store information about filename and checksum.
typedef struct ChecksumMap{
char* filename;
unsigned char checksum [32];
} ChecksumMap;
int calculate_checksum(void* input, unsigned long length, unsigned char* md);
checksum/checksum.h
#include <stdio.h>
#include <openssl/sha.h>
#include "checksum.h"
int calculate_checksum(void* input, unsigned long length, unsigned char* md){
SHA256_CTX context;
if(!SHA256_Init(&context))
return 0;
if(!SHA256_Update(&context, (unsigned char*)input, length))
return 0;
if(!SHA256_Final(md, &context))
return 0;
return 1;
}
main.c
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <dirent.h>
#include <sys/types.h>
#include "ArchiveFile.h"
#include "checksum/checksum.h"
void format_bytes(char* buffer, unsigned char* md){
for (int i = 0; i < 32; i ) {
sprintf(&buffer[i*2], "x", md[i]);
}
buffer[32*2] = '\0';
}
void *listdir(char *name, int count, ChecksumMap *checksumMaps)
{
DIR *dir;
struct dirent *direntry;
if (!(dir = opendir(name)))
return NULL;
while ((direntry = readdir(dir)) != NULL) {
// If we reach a directory (that is not . or ..) then recursive step.
if (direntry->d_type == DT_DIR) {
char path[1024];
if (strcmp(direntry->d_name, ".") == 0 || strcmp(direntry->d_name, "..") == 0)
continue;
snprintf(path, sizeof(path), "%s/%s", name, direntry->d_name);
listdir(path, count, checksumMaps);
} else {
unsigned char md[32];
ArchiveFile file;
file.relative_path = direntry->d_name;
// Get the full path of the file:
char parent_name[strlen(name) 1];
memset(&parent_name[0], 0, sizeof(parent_name));
strcat(parent_name, name);
strcat(parent_name, "/");
size_t fsize = get_file_size(&file, parent_name);
unsigned char *bytes = malloc(sizeof(char) * fsize);
get_bytes(&file, parent_name, bytes, fsize);
calculate_checksum((void*) bytes, fsize, md);
ChecksumMap checksumMap = {.filename=file.relative_path};
memcpy(checksumMap.checksum, md,
sizeof(checksumMap.checksum));
free(bytes);
}
}
closedir(dir);
return NULL;
}
int main(int argc, char const *argv[]) {
FILE *fp = fopen("mychecksums.txt", "w");
char formatted_bytes[32*2 1];
char *filename = checksumMaps[0].filename;
format_bytes(formatted_bytes, checksumMaps[0].checksum);
fputs(filename, fp);
fputs(formatted_bytes, fp);
// We print the value of `filename` again in order to see that it has been overwritten.
printf("%s \n", filename);
fclose(fp);
}
Compile with gcc:
gcc -Wall -Wextra main.c ArchiveFile.c checksum/checksum.c -lcrypto
CodePudding user response:
The program writes aftxt to stdout instead of 2.txt. Using gdb, I can see that the value of filename changes from 2.txt to aftxt after the line
fputs(formatted_bytes, fp);
. What could be the reason for this?
Hard to say, because we're in the domain of UB (undefined behavior). But there are two obvious candidates here.
formatted_bytes
is not properly terminated, causingfputs
to read past the array, invoking UB.fp
is not a valid stream. The reason could be that it's not initialized, or changed, or the stream is closed or something.
Enable -Wall -Wextra -fsanitize=address
. You could also try -fsanitize=undefined
.
Check all return values. malloc
, fopen
and fputs
returns a value that can be used for error checking.
Replace formatted_bytes
with a hardcoded string that have the value you think it has.
Learn how to create a Minimal, Reproducible Example and how to debug small c programs. It's a guide I wrote a while ago.
CodePudding user response:
Update
It seems that there was some different problems with the code.
First thing to notice is file.relative_path = direntry->d_name;
, however the value that direntry points to changes in each iteration, thus the value file.relative_path
points to, also changes. Furthermore, the size of the string stored in file.relative_path has never been specified, which would be a problem, if we use strcpy.
The solution is to specify a size for file.relative_path
and use strcpy
to copy the value of direntry->d_name
. Also, no need for the checksumMap struct, since ArchiveFile already can store the same information (again, specify a size for the checksum).
Thing to keep in mind when you work with strings, buffers, arrays in C:
Remember that strings in C are based on char arrays, themselves based on a pointer to the first element. Assigning the value of one string to another might return in unexpected behavior when you actually want to copy the value of the string, not the address to the first element.
CodePudding user response:
One bug here:
char parent_name[strlen(name) 1];
memset(&parent_name[0], 0, sizeof(parent_name)); // could have been parent_name[0]='\0'; instead
strcat(parent_name, name); // Now the parent_name buffer is full and null terminated.
strcat(parent_name, "/"); // this overwrites the null terminator and writes a new one out-of-bounds
You should have done something like this:
size_t length = strlen(name);
char parent_name[length 1 1];
memcpy(parent_name, name, length); // copies characters only (fast) but not the null term
parent_name[length] = '/'; // append this single character 1 symbol past "name" string
parent_name[length 1] = '\0'; // add manual null termination