Home > front end >  How to convert a JSON structure into a dataclass object tree?
How to convert a JSON structure into a dataclass object tree?

Time:11-12

I have a JSON data structure. Every object has a field called "type".

json_data_str = """
{
    "type" : "Game",
    "levels" : [
        {
            "type": "Level",
            "map" : {
                "type" : "SquareRoom",
                "name" : "Level 1",
                "width" : 100,
                "height" : 100
            },
            "waves" : [
                {
                    "type" : "Wave",
                    "enemies" : [
                        {
                            "type" : "Wizard",
                            "name" : "Gandalf"
                        },
                        {
                            "type" : "Archer",
                            "name" : "Legolass"
                        }
                    ]
                }
            ]
        }
    ]
}
"""

And I want to convert this into an object tree composed of the following classes

from dataclasses import dataclass
from typing import List

@dataclass
class GameObject:
    ...

@dataclass
class Character(GameObject):
    name: str

@dataclass
class Wave(GameObject):
    enemies: List[Character]

@dataclass
class Wizard(Character):
    ...

@dataclass
class Archer(Character):
    ...

@dataclass
class Map(GameObject):
    name: str

@dataclass
class SquareRoom(Map):
    width: int
    height: int

@dataclass
class Level(GameObject):
    waves: List[Wave]
    map: Map
    
@dataclass
class Game(GameObject):
    levels: List[Level]

I can unpack a simple json object into a dataclass quite easily using the ** operator: e.g

json_data_str = """
{
   "type" : "Person"
   "name" : "Bob"
   "age" : 29
}
"""

class GameObject(ABC):
    ...

@dataclass
class Person(GameObject):
    name: str
    age: int

game_object_registry: Dict[str, Type[GameObject]] = {}
game_object_registry['Person'] = Person

json_obj = json.loads(json_data_str)
obj_type = json_obj['type']
del json_obj['type']
ObjType = game_object_registry[obj_type]
ObjType(**json_obj)

But how can I extend this to work with nested objects?

I want it to create this data class instance:

game = Game(levels=[Level(map=SquareRoom(name="Level 1", width=100, height=100), waves=[Wave([Wizard(name="Gandalf"), Archer(name="Legolass")])])])

Here is my best attempt. It doesn't really make sense, but it might be a starting point. I realise this logic doesn't make sense, but I cannot come up with a function that does make sense.

def json_to_game_object(json_obj: Any, game_object_registry: Dict[str, Type[GameObject]]) -> Any:

    if type(json_obj) is dict:
        obj_type: str = json_obj['type']
        del json_obj['type']
        ObjType = game_object_registry[obj_type]
        for key, value in json_obj.items():
            logging.debug(f'Parsing feild "{key}:{value}"')
            json_to_game_object(value, game_object_registry)
            if type(value) is dict:
                logging.debug(f'Creating object of type {ObjType} with args {value}')
                return ObjType(**value)
    elif type(json_obj) is list:
        logging.debug(f'Parsing JSON List')
        for elem in json_obj:
            logging.debug(f'Parsing list element "{json_obj.index(elem)}"')
            json_to_game_object(elem, game_object_registry)
    else:
        logging.debug(f'Parsing value')

CodePudding user response:

Here is an example of how do this export in an object-oriented way. In my personal opinion, converting this to pure dataclass objects brings you no benefit over just keeping everything in a dictionary. Here, each object can have its own behavior.

(I've now modified this to start adding repr handlers, so you can print the whole tree at once.)

json_data_str = """
{
    "type" : "Game",
    "levels" : [
        {
            "type": "Level",
            "map" : {
                "type" : "SquareRoom",
                "name" : "Level 1",
                "width" : 100,
                "height" : 100
            },
            "waves" : [
                {
                    "type" : "Wave",
                    "enemies" : [
                        {
                            "type" : "Wizard",
                            "name" : "Gandalf"
                        },
                        {
                            "type" : "Archer",
                            "name" : "Legolass"
                        }
                    ]
                }
            ]
        }
    ]
}
"""

import json

class GameObject():
    pass

class Game(GameObject):
    def __init__(self, obj):
        self.levels = [Level(k) for k in obj['levels']]
    def __repr__(self):
        s = f"<Game contains {len(self.levels)} levels:>\n"
        s  = '\n'.join(repr(l) for l in self.levels)
        return s

class Character(GameObject):
    def __init__(self,obj):
        self.name = obj['name']


class Wave(GameObject):
    def __init__(self, obj):
        self.enemies = [game_object_registry[e['type']](e) for e in obj['enemies']]
    def __repr__(self):
        return f'<Wave contains {len(self.enemies)} enemies'


class Wizard(Character):
    def __init__(self,obj):
        super().__init__(obj)

class Archer(Character):
    def __init__(self,obj):
        super().__init__(obj)

class Map(GameObject):
    pass

class SquareRoom(Map):
    def __init__(self,obj):
        self.name = obj['name']
        self.widdth = obj['width']
        self.height = obj['height']

class Level(GameObject):
    def __init__(self, obj):
        self.waves = [Wave(e) for e in obj['waves']]
        self.map = game_object_registry[obj['map']['type']](obj['map'])
    def __repr__(self):
        s = f'<Level contains {len(self.waves)} waves>\n'
        s  = '\n'.join(repr(w) for w in self.waves)
        return s

game_object_registry = {
    'Game': Game,
    'Wave': Wave,
    'Level': Level,
    'SquareRoom': SquareRoom,
    'Archer': Archer,
    'Map': Map,
    'Wizard': Wizard
}

json_obj = json.loads(json_data_str)

g = Game(json_obj)
print(g)
print(g.levels[0].waves[0].enemies[0].name)

CodePudding user response:

Assuming you have control over the JSON / dict structure. You can use a framework like dacite.

It will let you map the data into your dataclasses.

Example (taken from dacite github) below:

@dataclass
class A:
    x: str
    y: int


@dataclass
class B:
    a: A


data = {
    'a': {
        'x': 'test',
        'y': 1,
    }
}

result = from_dict(data_class=B, data=data)

assert result == B(a=A(x='test', y=1))

CodePudding user response:

As an alternative, you could also use the dataclass-wizard library for this.

This should support dataclasses in Union types as of a recent version, however note that the tag field name is not configurable as of yet, so in below I've renamed the type field that appears in the JSON object; I've also removed this type field entirely in cases where it was not really needed -- note that you'd only need such a tag field when you have a field that maps to one or more dataclass types, via a Union declaration. The one main benefit of using a custom tag for each class, is that if you later decide to rename the class for instance, any existing JSON data can still be de-serialized into the nested dataclass model as expected.

The below example should work for Python 3.7 with the included __future__ import. This allows you to use PEP 585 and PEP 604- style annotations, for a more convenient shorthand syntax.

from __future__ import annotations

from dataclasses import dataclass

from dataclass_wizard import JSONWizard


@dataclass
class GameObject:
    ...


@dataclass
class Character(GameObject):
    name: str


@dataclass
class Wizard(Character, JSONWizard):

    class _(JSONWizard.Meta):
        tag = 'Wizard'

    ...


@dataclass
class Archer(Character, JSONWizard):

    class _(JSONWizard.Meta):
        tag = 'Archer'

    ...


@dataclass
class Game(GameObject, JSONWizard):
    levels: list[Level]


@dataclass
class Level(GameObject):
    waves: list[Wave]
    # TODO: define other map classes
    map: SquareRoom | Map


@dataclass
class Map(GameObject):
    name: str


@dataclass
class SquareRoom(Map, JSONWizard):

    class _(JSONWizard.Meta):
        tag = 'SquareRoom'

    width: int
    height: int


@dataclass
class Wave(GameObject):
    enemies: list[Wizard | Archer]


def main():
    json_data_str = """
    {
        "levels": [
            {
                "map": {
                    "__tag__": "SquareRoom",
                    "name": "Level 1",
                    "width": 100,
                    "height": 100
                },
                "waves": [
                    {
                        "enemies": [
                            {
                                "__tag__": "Wizard",
                                "name": "Gandalf"
                            },
                            {
                                "__tag__": "Archer",
                                "name": "Legolass"
                            }
                        ]
                    }
                ]
            }
        ]
    }
    """

    game = Game.from_json(json_data_str)
    print(repr(game))


if __name__ == '__main__':
    main()

Output:

Game(levels=[Level(waves=[Wave(enemies=[Wizard(name='Gandalf'), Archer(name='Legolass')])], map=SquareRoom(name='Level 1', width=100, height=100))])

If you need data validation or if you want to retain the type field in the JSON object, I'd also suggest pydantic as another solution. In addition, you can use pydantic drop-in dataclasses and retain the @dataclass usage for the rest of the model classes, as shown below.

from typing import List, Union

from pydantic import BaseModel
from pydantic.dataclasses import dataclass
from typing_extensions import Literal


@dataclass
class GameObject:
    ...


@dataclass
class Character(GameObject):
    name: str


@dataclass
class Wizard(Character):
    type: Literal['Wizard']


@dataclass
class Archer(Character):
    type: Literal['Archer']


@dataclass
class Wave(GameObject):
    enemies: List[Union[Wizard, Archer]]


@dataclass
class Map(GameObject):
    name: str


@dataclass
class SquareRoom(Map):
    type: Literal['SquareRoom']
    width: int
    height: int


@dataclass
class Level(GameObject):
    waves: List[Wave]
    # TODO: define other map classes
    map: Union[SquareRoom, Map]


class Game(BaseModel, GameObject):
    levels: List[Level]


def main():
    json_data_str = """
    {
        "levels": [
            {
                "map": {
                    "type": "SquareRoom",
                    "name": "Level 1",
                    "width": 100,
                    "height": 100
                },
                "waves": [
                    {
                        "enemies": [
                            {
                                "type": "Wizard",
                                "name": "Gandalf"
                            },
                            {
                                "type": "Archer",
                                "name": "Legolass"
                            }
                        ]
                    }
                ]
            }
        ]
    }
    """

    game = Game.parse_raw(json_data_str)
    print(repr(game))


if __name__ == '__main__':
    main()

The output in this case is slightly different - note that when you print the repr of the Game object, you also see the type fields printed out, since technically it is a dataclass field.

Game(levels=[Level(waves=[Wave(enemies=[Wizard(name='Gandalf', type='Wizard'), Archer(name='Legolass', type='Archer')])], map=SquareRoom(name='Level 1', type='SquareRoom', width=100, height=100))])
  • Related