I'm writing a program for mathematical research. Assembly language is used for speed. Data is represented as managed records with operator overloads. Some data is represented by constants.
program TestMR;
{$APPTYPE CONSOLE}
uses
System.SysUtils;
type
TMR = record
public
Data: array[0..3] of Double;
class operator Implicit(const Src: Int64): TMR; overload;
//class operator Assign(var Dest: TMR; const [ref] Src: TMR);
end;
{ TMR }
{
class operator TMR.Assign(var Dest: TMR; const [ref] Src: TMR);
asm
vmovdqu ymm0, dqword ptr [rdx]
vmovdqu dqword ptr [rcx], ymm0
end;
}
class operator TMR.Implicit(const Src: Int64): TMR;
asm
vcvtsi2sd xmm1, xmm1, rdx
vpbroadcastq ymm0, xmm1
vmovdqu yword ptr [rcx], ymm0
ret
end;
const
cD: TMR = (Data:(1.0, 2.0, 3.0, 4.0));
var
vD: TMR;
begin
vD := 4;
vD := cD;
end.
The standard assignment method is represented by the compiler as ...
000000000042A10F 488D3D8A4E0100 lea rdi,[rel $00014e8a]
000000000042A116 488D7520 lea rsi,[rbp $20]
000000000042A11A 48A5 movsq
000000000042A11C 48A5 movsq
000000000042A11E 48A5 movsq
000000000042A120 48A5 movsq
It's slow. If I overload an assignment method, the compiler write an error E2635 Declaring a managed record constant in const section is not allowed. Нow to define a constant managed record with an overridden assignment method? Also is there a method to make forced overloaded operators inline?
CodePudding user response:
Thanks Remy Lebeau for the tip about managed and unmanaged data. I may have found a solution...
program TestMR;
{$APPTYPE CONSOLE}
uses
System.SysUtils;
type
TMR = record
public
Data: array[0..3] of Double;
class operator Implicit(const Src: Int64): TMR; overload;
class operator Implicit(const [ref] Src: TMR): String; overload; inline;
class operator Add(const [ref] O1: TMR; const [ref] O2: TMR): TMR; overload;
class operator Assign(var Dest: TMR; const [ref] Src: TMR);
end;
{ TMR }
class operator TMR.Add(const [ref] O1: TMR; const [ref] O2: TMR): TMR;
asm
vmovdqu ymm0, yword ptr [rdx]
vaddpd ymm0, ymm0, yword ptr [r8]
vmovdqu yword ptr [rcx], ymm0
vzeroupper
ret
end;
class operator TMR.Assign(var Dest: TMR; const [ref] Src: TMR);
asm
vmovdqu ymm0, yword ptr [rdx]
vmovdqu yword ptr [rcx], ymm0
vzeroupper
ret
end;
class operator TMR.Implicit(const [ref] Src: TMR): String;
begin
Result := '';
for var i: Integer := 0 to 3 do Result := Result FloatToStr(Src.Data[i]) ' ';
end;
class operator TMR.Implicit(const Src: Int64): TMR;
asm
vcvtsi2sd xmm1, xmm1, rdx
vpbroadcastq ymm0, xmm1
vmovdqu yword ptr [rcx], ymm0
vzeroupper
ret
end;
var
vD: TMR;
const
cA: array[0..3] of Double = (1.0, 2.0, 3.0, 4.0);
begin
vD := TMR(cA);
vD := TMR(cA) vD;
WriteLn(String(vD));
ReadLn;
end.
Now it would be nice to solve the problem of the possibility of making these functions inline. Сall and ret take approximately 6 ticks on Haswell.
CodePudding user response:
I don't know C very well, so it took some time to write the code.The implementation of the operators is written in an external .asm file. I took this trick from Creel here.
// TestTMR.cpp
#include <iostream>
struct TMR {
double Data[4];
inline TMR& operator = (const long long Src);
inline TMR& operator = (const TMR& Src);
friend inline TMR& operator (const TMR& O1, const TMR& O2);
friend std::ostream& operator << (std::ostream& out, const TMR& Src);
};
TMR vD;
TMR const cA = {1.0, 2.0, 3.0, 4.0};
std::ostream& operator << (std::ostream& out, const TMR& Src)
{
return out << Src.Data[0] << " " << Src.Data[1] << " " << Src.Data[2] << " " << Src.Data[3];
};
int main()
{
vD = 4;
vD = cA;
vD = cA vD;
std::cout << vD << std::endl;
}
;TestTMRasm.asm
.code
; inline TMR& operator = (const long long Src)
??4TMR@@QEAAAEAU0@_J@Z proc
vcvtsi2sd xmm1, xmm7, rdx
vbroadcastsd ymm0, xmm1
vmovdqu ymmword ptr [rcx], ymm0
ret
??4TMR@@QEAAAEAU0@_J@Z endp
; inline TMR& operator = (const TMR& Src);
??4TMR@@QEAAAEAU0@AEBU0@@Z proc
vmovdqu ymm0, ymmword ptr [rdx]
vmovdqu ymmword ptr [rcx], ymm0
ret
??4TMR@@QEAAAEAU0@AEBU0@@Z endp
; inline TMR& operator (const TMR& O1, const TMR& O2)
??H@YAAEAUTMR@@AEBU0@0@Z proc
sub rsp, 20h
vmovdqu ymm1, ymmword ptr [rcx]
vmovdqu ymm2, ymmword ptr [rdx]
vaddpd ymm0, ymm1, ymm2
vmovdqu ymmword ptr [rsp], ymm0
lea rax, qword ptr [rsp]
add rsp, 20h
ret
??H@YAAEAUTMR@@AEBU0@0@Z endp
end
However, the C compiler, like Delphi, does not make them inline (see the disassembled code). Maybe I didn't set the compilation options correctly. If anyone achieves a positive result, please let me know. C proved to be better than Delphi in that there is no need to implement type conversion operators before assignment, as well as the ability to return a reference to the result.
Native support for constants is also good, although Delphi will simply pass the reference to cA
to the statement when calling TMR(cA)
.
23: vD = 4;
00007FF7BAC710F4 BA 04 00 00 00 mov edx,4
00007FF7BAC710F9 48 8D 0D 28 45 00 00 lea rcx,[vD (07FF7BAC75628h)]
00007FF7BAC71100 E8 7B 07 00 00 call TMR::operator= (07FF7BAC71880h)
24: vD = cA;
00007FF7BAC71105 48 8D 15 C4 21 00 00 lea rdx,[cA (07FF7BAC732D0h)]
00007FF7BAC7110C 48 8D 0D 15 45 00 00 lea rcx,[vD (07FF7BAC75628h)]
00007FF7BAC71113 E8 77 07 00 00 call TMR::operator= (07FF7BAC7188Fh)
25: vD = cA vD;
00007FF7BAC71118 48 8D 15 09 45 00 00 lea rdx,[vD (07FF7BAC75628h)]
00007FF7BAC7111F 48 8D 0D AA 21 00 00 lea rcx,[cA (07FF7BAC732D0h)]
00007FF7BAC71126 E8 6D 07 00 00 call operator (07FF7BAC71898h)
00007FF7BAC7112B 48 8B D0 mov rdx,rax
00007FF7BAC7112E 48 8D 0D F3 44 00 00 lea rcx,[vD (07FF7BAC75628h)]
00007FF7BAC71135 E8 55 07 00 00 call TMR::operator= (07FF7BAC7188Fh)