Home > Software design >  encode string from json utf-8
encode string from json utf-8

Time:03-03

I have a JSON file {"data":[{"clientName":"Is Такой текст"}]} that is UTF-8 encoded, and I am trying to see the text in the tag clientName using the ShowMessage() function.

Instead of normal text, I get "Is strangeText". I understand that the problem is in the encoding, but I don't understand how to fix it.

procedure TForm1.jsonTest;
var
  JSONData, JSON: TJSONObject;
  jArr: TJSONArray;
  s: TStringList;
  i, j: Integer;
  jValue: TJSonValue;
  JsonArray: TJSONArray;
  jPair: TJSONPair;
begin
  json := TJSONObject.Create;
  s := TStringList.Create;
  try
    S.LoadFromFile('clientOrders.json');
    text := S.Text;
    JSON := TJSONObject.ParseJSONValue(BytesOf(text),0) as TJSONObject;
    if JSON <> nil then
    begin
      jPair := JSON.Get(0);
      jArr := jPair.JsonValue as TJSONArray;

      for I := 0 to jArr.Size-1 do
      begin
        JSONData := jArr.Get(I) as TJSONObject;

        for j := 0 to JSONData.Size - 1 do
        begin
          ShowMessage(JSONData.Get(j).JsonValue.ToString);
        end;
      end;
    end
    else
      raise Exception.Create('This is not a JSON');
  finally
    json.Free;
    s.Free;
    jValue.Free;
  end;
end;

CodePudding user response:

Assuming you are running this code on Windows, then the problem is two-fold:

  • you are not telling TStringList.LoadFromFile() what the encoding of the file is. So, unless the file begins with a UTF-8 BOM (which is unlikely with a JSON file), it will be decoded as ANSI, not as UTF-8, thus corrupting any NON-ASCII characters.

  • you are converting the decoded text back into bytes without specifying an encoding. The overload of ParseJSONValue() you are using expects UTF-8 encoded bytes, but BytesOf() will encode to ANSI, not to UTF-8, thus corrupting non-ASCII characters even futher.

That is why you are getting garbage text from the JSON.

There are other problems with your code, too. Namely, a memory leak and a double-free, due to you mismanaging the initlal TJSONObject.

Try this instead.

procedure TForm1.jsonTest;
var
  JSONData, JSON: TJSONObject;
  jArr: TJSONArray;
  s: TStringList;
  i, j: Integer;
  jValue: TJSonValue;
  data: string;
begin
  s := TStringList.Create;
  try
    s.LoadFromFile('clientOrders.json', TEncoding.UTF8);
    data := s.Text;
  finally
    s.Free;
  end;
  { Alternatively:
  data := IOUtils.TFile.ReadAllText('clientOrders.json', TEncoding.UTF8);
  }
  jValue := TJSONObject.ParseJSONValue(TEncoding.UTF8.GetBytes(data), 0);
  if jValue = nil then
    raise Exception.Create('This is not a JSON');
  try
    JSON := jValue as TJSONObject;
    jArr := JSON.Get(0).JsonValue as TJSONArray;
    for I := 0 to jArr.Size-1 do
    begin
      JSONData := jArr.Get(I) as TJSONObject;
       for j := 0 to JSONData.Size - 1 do
       begin
         ShowMessage(JSONData.Get(j).JsonValue.ToString);
       end;
      end;
    end;
  finally
    jValue.Free;
  end;
end;

Alternatively, don't decode the file bytes into a string just to convert them back into bytes, just load them as-is into ParseJSONValue(), eg:

procedure TForm1.jsonTest;
var
  ...
  jValue: TJSonValue;
  data: TBytesStream;
begin
  data := TBytesStream.Create;
  try
    data.LoadFromFile('clientOrders.json');
    jValue := TJSONObject.ParseJSONValue(data.Bytes, 0);
    ...
  finally
    data.Free;
  end;
end;

Or:

procedure TForm1.jsonTest;
var
  ...
  jValue: TJSonValue;
  data: TBytes;
begin
  data := IOUtils.TFile.ReadAllBytes('clientOrders.json');
  jValue := TJSONObject.ParseJSONValue(data, 0);
  ...
end;
  • Related