I am making a program to parse PE structure with C. I'm going to bring it in binary and parse it in length. Example) DOS header is 0x40(64) bytes long, so I tried to approach it from binary_buf[0]
to binary_buf[39]
.
What data type should I read the file?
I don't know what to write because it's all the same whether I use an int or a char. And I would appreciate it if you could tell me why I should use that data type.
unsigned char *binary_buf = NULL;
fp = fopen(filename, "rb");
if (fp == NULL) {
printf("이 파일은 열 수 없습니다.\n");
return 0;
}
fseek(fp, 0, SEEK_END);
size = ftell(fp);
fseek(fp, 0, SEEK_SET);
binary_buf = malloc(size 1);
fread(binary_buf, 1, size, fp);
fclose(fp);
printf("%d", binary_buf[0]); //77 = 0x4D == 'M'
Thank you for reading my question. Have a good day!
CodePudding user response:
You are reading a binary file, an unsigned char
array seems exactly what you need.
Note that allocating one extra byte does not seem necessary for this job as you are not creating a C string from the file contents.
Reading the contents of the PE head as single bytes is the best portable approach: The header data has a specific layout that might not be correctly matched by a C structure because of alignment and endianness issues.
After checking the signature, you should extract the relevant values from the known offsets in the header and construct the file offsets using appropriate integer arithmetics.
The DOS Header is 64 bytes long and has the following layout:
typedef struct _IMAGE_DOS_HEADER { // DOS .EXE header
WORD e_magic; // Magic number
WORD e_cblp; // Bytes on last page of file
WORD e_cp; // Pages in file
WORD e_crlc; // Relocations
WORD e_cparhdr; // Size of header in paragraphs
WORD e_minalloc; // Minimum extra paragraphs needed
WORD e_maxalloc; // Maximum extra paragraphs needed
WORD e_ss; // Initial (relative) SS value
WORD e_sp; // Initial SP value
WORD e_csum; // Checksum
WORD e_ip; // Initial IP value
WORD e_cs; // Initial (relative) CS value
WORD e_lfarlc; // File address of relocation table
WORD e_ovno; // Overlay number
WORD e_res[4]; // Reserved words
WORD e_oemid; // OEM identifier (for e_oeminfo)
WORD e_oeminfo; // OEM information; e_oemid specific
WORD e_res2[10]; // Reserved words
LONG e_lfanew; // File address of new exe header
} IMAGE_DOS_HEADER, *PIMAGE_DOS_HEADER;
Where WORD
is a 16-bit little endian integer and LONG
a 32-bit little endian integer.
Here is a modified version:
#include <limits.h>
#include <stdio.h>
int read_PE_file(const char *filename) {
unsigned char *binary_buf = NULL;
FILE *fp = fopen(filename, "rb");
if (fp == NULL) {
fprintf(stderr, "Error opening file %s\n", filename);
return -1;
}
fseek(fp, 0, SEEK_END);
long length = ftell(fp);
unsigned long size;
fseek(fp, 0, SEEK_SET);
if (length < 0) {
fprintf(stderr, "Error seeking file %s\n", filename);
fclose(fp);
return -1;
}
size = length;
if (size < 64) {
fprintf(stderr, "file %s too short, size=%lu\n", filename, size);
fclose(fp);
return -1;
}
#if LONG_MAX > SIZE_MAX
if (size > SIZE_MAX) {
fprintf(stderr, "file %s too large, size=%lu\n", filename, size);
fclose(fp);
return -1;
}
#endif
if ((binary_buf = malloc(size)) == NULL) {
fprintf(stderr, "Error allocating %lu byte buffer for file %s\n", size, filename);
fclose(fp);
return -1;
}
if (fread(binary_buf, 1, size, fp) != size) {
fprintf(stderr, "Error reading file %s\n", filename);
free(binary_buf);
fclose(fp);
return -1;
}
fclose(fp);
if (binary_buf[0] != 0x4D || binary_buf[1] != 0x5A) {
fprintf(stderr, "File %s does not have MZ signature\n", filename);
free(binary_buf);
fclose(fp);
return -1;
}
unsigned long offset = binary_buf[60]
(binary_buf[61] << 8)
((unsigned long)binary_buf[62] << 16)
((unsigned long)binary_buf[63] << 24);
if (offset > size) {
fprintf(stderr, "new executable offset %lu greater than file size %lu for file %s\n", offset, size, filename);
free(binary_buf);
fclose(fp);
return -1;
}
printf("new executable offset: %lu\n", offset);
[...]
free(binary_buf);
return 0;
}