Currently trying to disassemble a string-to-mem copy function of an old game from x86 machine code to C .
From the function below I see that the code checks if the string is larger than the destination space reserved for assignment.
If it is not, it goes into the ELSE where string is directly copied.
If it is, then it cuts the string from string[*(this_dest-1) - *this_dest]
location.
I do wonder what can be done to make the pseudocode look more C like?
So far I cannot find solution for *(this_dest-1) - *this_dest
.
I am just using this to learn how to disassemble better so any other tips are welcome!
char *__thiscall copyA2strToA1mem(char *this, const char *strStart_a2, char *strEnd_a3)
{
void *dest_v4; // edi
size_t strLength_v5; // eax
size_t destLength_v6; // ecx
const void *v7; // edi
void *v8; // esi
char *i; // esi
dest_v4 = *this;
strLength_v5 = strEnd_a3 - strStart_a2;
destLength_v6 = *(this 1) - *this;
if ( strEnd_a3 - strStart_a2 > destLength_v6 )// destination space > string size ?
{
qmemcpy(dest_v4, strStart_a2, destLength_v6);
for ( i = &strStart_a2[*(this 1) - *this]; i != strEnd_a3; i )
sub_401D20(this, *i);
}
else
{
qmemcpy(dest_v4, strStart_a2, strLength_v5);
v7 = *(this 1);
v8 = (strLength_v5 *this);
if ( v8 != v7 )
{
memmove(v8, v7, 1u);
*(this 1) = v8 - v7;
return this;
}
}
return this;
Here is the assembly code:
00406660 copyA2strToA1mem proc near ; CODE XREF: sub_406430 1AB↑p
00406660 ; sub_409130 1CC↓p ...
00406660
00406660 strStart_a2 = dword ptr 4
00406660 strEnd_a3 = dword ptr 8
00406660
00406660 000 mov edx, [esp strStart_a2]
00406664 000 push ebx
00406665 004 push ebp
00406666 008 mov ebp, [esp 8 strEnd_a3]
0040666A 008 push esi
0040666B 00C mov ebx, ecx
0040666D 00C mov ecx, [ebx 4]
00406670 00C push edi
00406671 010 mov edi, [ebx]
00406673 010 mov eax, ebp
00406675 010 sub eax, edx
00406677 010 sub ecx, edi
00406679 010 cmp eax, ecx
0040667B 010 mov esi, edx
0040667D 010 ja short loc_4066BA
0040667F 010 mov ecx, eax
00406681 010 mov edx, ecx
00406683 010 shr ecx, 2
00406686 010 rep movsd
00406688 010 mov ecx, edx
0040668A 010 and ecx, 3
0040668D 010 rep movsb
0040668F 010 mov esi, [ebx]
00406691 010 mov edi, [ebx 4]
00406694 010 add esi, eax
00406696 010 cmp esi, edi
00406698 010 jz short loc_4066E4
0040669A 010 mov eax, edi
0040669C 010 sub eax, edi
0040669E 010 add eax, 1
004066A1 010 push eax ; Size
004066A2 014 push edi ; Src
004066A3 018 push esi ; Dst
004066A4 01C call memmove
004066A9 01C add esp, 0Ch
004066AC 010 sub esi, edi
004066AE 010 add [ebx 4], esi
004066B1 010 pop edi
004066B2 00C pop esi
004066B3 008 pop ebp
004066B4 004 mov eax, ebx
004066B6 004 pop ebx
004066B7 000 retn 8
004066BA ; ---------------------------------------------------------------------------
004066BA
004066BA loc_4066BA: ; CODE XREF: copyA2strToA1mem 1D↑j
004066BA 010 mov eax, ecx
004066BC 010 shr ecx, 2
004066BF 010 rep movsd
004066C1 010 mov ecx, eax
004066C3 010 and ecx, 3
004066C6 010 rep movsb
004066C8 010 mov esi, [ebx 4]
004066CB 010 sub esi, [ebx]
004066CD 010 add esi, edx
004066CF 010 cmp esi, ebp
004066D1 010 jz short loc_4066E4
004066D3
004066D3 loc_4066D3: ; CODE XREF: copyA2strToA1mem 82↓j
004066D3 010 mov cl, [esi]
004066D5 010 push ecx
004066D6 014 mov ecx, ebx
004066D8 014 call sub_401D20
004066DD 010 add esi, 1
004066E0 010 cmp esi, ebp
004066E2 010 jnz short loc_4066D3
004066E4
004066E4 loc_4066E4: ; CODE XREF: copyA2strToA1mem 38↑j
004066E4 ; copyA2strToA1mem 71↑j
004066E4 010 pop edi
004066E5 00C pop esi
004066E6 008 pop ebp
004066E7 004 mov eax, ebx
004066E9 004 pop ebx
004066EA 000 retn 8
004066EA copyA2strToA1mem endp
CodePudding user response:
ebx
is being set to the initial value of ecx
, which is this
pointer if the calling convention truly is __thiscall
.
[ebx]
, ie [ebx 0]
, refers to the data member located at byte offset 0 in this
.
[ebx 4]
refers to the data member located at byte offset 4 in this
.
The way these two data members are being used, we can assume they are 32-bit pointers, and they are related to each other, so they are most likely the start and end pointers of a buffer.
Knowing that, if I translated the assembly correctly, the C code would look something like the following:
SomeClass* SomeClass::copyA2strToA1mem(char *strStart, char *strEnd)
{
int strLen = strEnd - strStart;
int bufferLen = this->bufferEnd - this->bufferStart;
if (strLen <= bufferLen)
{
qmemcpy(this->bufferStart, strStart, strLen);
char *src = this->bufferEnd;
char *dst = this->bufferStart strLen;
if (dst != src)
{
memmove(dst, src, 1);
this->bufferEnd = (dst - src);
}
}
else
{
qmemcpy(this->bufferStart, strStart, bufferLen);
strStart = bufferLen;
while (strStart != strEnd)
{
this->sub_401D20(*strStart );
}
}
return this;
}
The memmove()
is a little tricky. I think the code is checking if there is a gap between the end of the copied string and the end of the buffer, and if so then it is effectively moving 1 byte from the end of the buffer to the end of the copied string, and then setting bufferEnd
to point at that byte. Why, who knows.