Home > database >  Serializing complex object containing multiple nested objects with data frames
Serializing complex object containing multiple nested objects with data frames

Time:08-01

Below should be a runnable sample of code. i have a Chart1 object which can contain many panes, and each pane can can contain many series. I would like to serialize this to json so i can send to a flask application to render. To do deal with the dataframes, i am using a custom encoder (ChartEncoder below):

from abc import ABC, abstractmethod
from datetime import datetime
import pandas as pd
import copy
import json
from json import JSONEncoder
import pandas_datareader.data as reader
import datetime as dt

class Series1(object):
    def __init__(self, data):
        self.data = data

class Pane1(object):
    def __init__(self, series: Series1 = None , rel_height = None):
        self.series = [] if series is None else series
        self.rel_height = rel_height

class Chart1(ABC):
    def __init__(self, show_volume = True, *args, **kwargs):
        self.show_volume = show_volume 
        self.panes=[Pane1()]
        self.symbol = None
        self.interval = None

    def to_json(self):
        obj = copy.copy(self)
        obj.data = None
        jsn  = json.dumps(obj, cls=ChartEncoder)
        return jsn 

class ChartEncoder(JSONEncoder):
    def default(self, obj):
        if type(obj) is pd.DataFrame:
            return obj.reset_index().to_json(orient="records", date_format = 'iso')
        elif type(obj) is pd.Series:
            df = pd.DataFrame(obj)
            return df.reset_index().to_json(orient="records", date_format = 'iso')

        elif hasattr(obj, '__dict__'):
            return obj.__dict__
        else:
            return ''

if __name__ == '__main__':
    chart = Chart1()
    start = dt.datetime(2022,7,25)
    end = dt.datetime(2022,7,29)
    tickers = ['AAPL', 'MSFT']
    data = {}
    for t in tickers:
        series = reader.DataReader(t,'yahoo', start, end)
        chart.panes[0].series.append(Series1(series))

    json = chart.to_json()
    print(json)

After running the code there are two problems with the json string returned:

  1. it looks like there are escape characters being added that can not be read by javascript JSON.parse.
'{"show_volume": true, "panes": [{"series": [{"data": "[{\\"Date\\":\\"2022-07-25T00:00:00.000Z\\",\\"High\\":155.0399932861,\\"Low\\":152.2799987793,\\"Open\\":154.0099945068,\\"Close\\":152.9499969482,\\"Volume\\":53623900,\\"Adj Close\\":152.9499969482},{\\"Date\\":\\"2022-07-26T00:00:00.000Z\\",\\"High\\":153.0899963379,\\"Low\\":150.8000030518,\\"Open\\":152.2599945068,\\"Close\\":151.6000061035,\\"Volume\\":55138700,\\"Adj Close\\":151.6000061035},{\\"Date\\":\\"2022-07-27T00:00:00.000Z\\",\\"High\\":157.3300018311,\\"Low\\":152.1600036621,\\"Open\\":152.5800018311,\\"Close\\":156.7899932861,\\"Volume\\":78620700,\\"Adj Close\\":156.7899932861},{\\"Date\\":\\"2022-07-28T00:00:00.000Z\\",\\"High\\":157.6399993896,\\"Low\\":154.4100036621,\\"Open\\":156.9799957275,\\"Close\\":157.3500061035,\\"Volume\\":81378700,\\"Adj Close\\":157.3500061035},{\\"Date\\":\\"2022-07-29T00:00:00.000Z\\",\\"High\\":163.6300048828,\\"Low\\":159.5,\\"Open\\":161.2400054932,\\"Close\\":162.5099945068,\\"Volume\\":101689200,\\"Adj Close\\":162.5099945068}]"}, {"data": "[{\\"Date\\":\\"2022-07-25T00:00:00.000Z\\",\\"High\\":261.5,\\"Low\\":256.8099975586,\\"Open\\":261.0,\\"Close\\":258.8299865723,\\"Volume\\":21056000,\\"Adj Close\\":258.8299865723},{\\"Date\\":\\"2022-07-26T00:00:00.000Z\\",\\"High\\":259.8800048828,\\"Low\\":249.5700073242,\\"Open\\":259.8599853516,\\"Close\\":251.8999938965,\\"Volume\\":39348000,\\"Adj Close\\":251.8999938965},{\\"Date\\":\\"2022-07-27T00:00:00.000Z\\",\\"High\\":270.049987793,\\"Low\\":258.8500061035,\\"Open\\":261.1600036621,\\"Close\\":268.7399902344,\\"Volume\\":45994000,\\"Adj Close\\":268.7399902344},{\\"Date\\":\\"2022-07-28T00:00:00.000Z\\",\\"High\\":277.8399963379,\\"Low\\":267.8699951172,\\"Open\\":269.75,\\"Close\\":276.4100036621,\\"Volume\\":33459300,\\"Adj Close\\":276.4100036621},{\\"Date\\":\\"2022-07-29T00:00:00.000Z\\",\\"High\\":282.0,\\"Low\\":276.6300048828,\\"Open\\":277.700012207,\\"Close\\":280.7399902344,\\"Volume\\":32129400,\\"Adj Close\\":280.7399902344}]"}], "rel_height": null}], "symbol": null, "interval": null, "data": null}'
  1. After stripping away these characters manually, im left with the below. but even this is not a valid parseable json according to: https://jsonformatter.org/json-parser. You will notice that this is because the the series.data property ("data:" below) is quoted, as opposed to an array
{"show_volume": true, "panes": [{"series": [{"data": "[{"Date":"2022-07-25T00:00:00.000Z","High":155.0399932861,"Low":152.2799987793,"Open":154.0099945068,"Close":152.9499969482,"Volume":53623900,"Adj Close":152.9499969482},{"Date":"2022-07-26T00:00:00.000Z","High":153.0899963379,"Low":150.8000030518,"Open":152.2599945068,"Close":151.6000061035,"Volume":55138700,"Adj Close":151.6000061035},{"Date":"2022-07-27T00:00:00.000Z","High":157.3300018311,"Low":152.1600036621,"Open":152.5800018311,"Close":156.7899932861,"Volume":78620700,"Adj Close":156.7899932861},{"Date":"2022-07-28T00:00:00.000Z","High":157.6399993896,"Low":154.4100036621,"Open":156.9799957275,"Close":157.3500061035,"Volume":81378700,"Adj Close":157.3500061035},{"Date":"2022-07-29T00:00:00.000Z","High":163.6300048828,"Low":159.5,"Open":161.2400054932,"Close":162.5099945068,"Volume":101689200,"Adj Close":162.5099945068}]"}, {"data": "[{"Date":"2022-07-25T00:00:00.000Z","High":261.5,"Low":256.8099975586,"Open":261.0,"Close":258.8299865723,"Volume":21056000,"Adj Close":258.8299865723},{"Date":"2022-07-26T00:00:00.000Z","High":259.8800048828,"Low":249.5700073242,"Open":259.8599853516,"Close":251.8999938965,"Volume":39348000,"Adj Close":251.8999938965},{"Date":"2022-07-27T00:00:00.000Z","High":270.049987793,"Low":258.8500061035,"Open":261.1600036621,"Close":268.7399902344,"Volume":45994000,"Adj Close":268.7399902344},{"Date":"2022-07-28T00:00:00.000Z","High":277.8399963379,"Low":267.8699951172,"Open":269.75,"Close":276.4100036621,"Volume":33459300,"Adj Close":276.4100036621},{"Date":"2022-07-29T00:00:00.000Z","High":282.0,"Low":276.6300048828,"Open":277.700012207,"Close":280.7399902344,"Volume":32129400,"Adj Close":280.7399902344}]"}], "rel_height": null}], "symbol": null, "interval": null, "data": null}

Any help with being able to avoid these two issues would be grateful

CodePudding user response:

For starters, the JSON that is emitted is perfectly parsble by Javascript JSON.load, the issue is that your default implementation returns a str object, so that gets serialized as a JSON str.

You probably want to use to_dict (which returns a dict) instead of to_json, you just have to handle the pd.Timestamp objects:

class ChartEncoder(JSONEncoder):
    def default(self, obj):
        if isinstance(obj, (pd.DataFrame, pd.Series)):
            return obj.to_dict(orient="records")
        elif isinstance(obj, pd.Timestamp):
            return obj.isoformat()
        elif hasattr(obj, '__dict__'):
            return obj.__dict__
        else:
            # you probably want this, it doesn't make sense to return an empty string
            return JSONEncoder.default(self, obj)

Note, return obj.__dict__ probably isn't the right way to do this. You should handle your custom types explicitly.

  • Related