Home > Mobile >  Convert multibyte hex back to UTF-8 string
Convert multibyte hex back to UTF-8 string

Time:11-16

In Delphi 10.4, I have a hexadecimal representation of a string:

function TForm1.Button2Click(Sender: TObject);
var
  i, nr : integer;
  Input, HexStr, h: String;
begin
  HexStr := '';
  Input := Edit2.Text;
  for i:=1 to Length(Input) do begin
    nr := Ord(Input[i]);
    h := IntToHex(nr, 0);
    if Length(h) = 1 then
      h := '0'   h;
    HexStr := HexStr   h;
  end;
  Edit3.Text := HexStr;
end;

For "abc€", I get "61626320AC" here. Note the € sign converts to "20AC". Now for converting it back to a normal string, I split that hex into 2-char pairs, using StrToInt() with a '$' prefix. I have no indicator for it being 4 chars long for an € sign, instead of 2 chars, and this breaks the euro sign:

enter image description here

How do I convert back such a hex string without breaking multibyte characters?

CodePudding user response:

I have no indicator for it being 4 chars long for an € sign, instead of 2 chars

And because of that reason alone, you simply won't be able to convert "61626320AC" back to "abc€", because you don't know which hex codes are 2 digits and which are 4 digits, you didn't deliminate them. For instance, the hex codes 6162 and 6263 also represent valid Unicode characters, too.

As @TomBrunberg mentioned in comments, you can use 4-digit hex codes for every character, eg:

function TForm1.Button2Click(Sender: TObject);
var
  i, nr : integer;
  Input, HexStr, h: String;
begin
  HexStr := '';
  Input := Edit2.Text;
  for i := Low(Input) to High(Input) do begin
    nr := Ord(Input[i]);
    h := IntToHex(nr, 4);
    HexStr := HexStr   h;
  end;
  Edit3.Text := HexStr;
end;

Then you can convert back by splitting up the hex string into 4-character groups when calling StrToInt(), eg:

function TForm1.Button3Click(Sender: TObject);
var
  i, nr : integer;
  Output, HexStr, h: String;
begin
  HexStr := Edit3.Text;
  Output := '';
  i := Low(HexStr);
  while i <= High(HexStr) do begin
    h := Copy(HexStr, i, 4);
    nr := StrToInt('$'   h);
    Output := Output   Char(nr);
    Inc(i, 4);
  end;
  Edit2.Text := Output;
end;

Or, as @AmigoJack hinted at, you can use UTF-8 instead, eg:

function TForm1.Button2Click(Sender: TObject);
var
  i, nr : integer;
  HexStr, h: String;
  Input: UTF8String;
begin
  HexStr := '';
  Input := UTF8String(Edit2.Text);
  for i := Low(Input) to High(Input) do begin
    nr := Ord(Input[i]);
    h := IntToHex(nr, 2);
    HexStr := HexStr   h;
  end;
  Edit3.Text := HexStr;
end;

function TForm1.Button3Click(Sender: TObject);
var
  i, nr : integer;
  HexStr, h: String;
  Output: UTF8String;
begin
  HexStr := Edit3.Text;
  Output := '';
  i := Low(HexStr);
  while i <= High(HexStr) do begin
    h := Copy(HexStr, i, 2);
    nr := StrToInt('$'   h);
    Output := Output   AnsiChar(nr);
    Inc(i, 2);
  end;
  Edit2.Text := string(Output);
end;
  • Related