I am trying to instantiate an array of python objects from YAML, inside a struct. Outside of a struct I am able to do this easily, but it seems that the YAML BaseLoader is failing to recursively search the sub nodes of my object.
import yaml
import ruamel.yaml
class Person:
def __init__(self, name: str = 'JohnDoe'):
self.name = name
@classmethod
def from_yaml(cls, constructor, node):
for m in constructor.construct_yaml_map(node):
pass
if 'Name' in m:
name = m['Name']
return cls(name=name)
def __repr__(self):
return f'Person(name={self.name})'
class Car:
def __init__(self):
self.passengers = []
def add_person(self, person: Person = None):
self.passengers.append(person)
@classmethod
def from_yaml(cls, constructor, node):
for m in constructor.construct_yaml_map(node):
pass
inst = cls()
if 'Driver' in m:
inst.passengers = [m['Driver']] inst.passengers
if 'Passengers' in m:
foo = m['Passengers']
print(f'm[\'Passengers\'] = {foo}')
for person in m['Passengers']:
inst.add_person(person)
return inst
def __repr__(self):
return f'Car(passengers={self.passengers})'
if __name__ == "__main__":
yaml = ruamel.yaml.YAML(typ='safe')
yaml.register_class(Person)
yaml.register_class(Car)
data = yaml.load("""
- !Person &0
Name: 'Paul'
- !Person &1
Name: 'George'
- !Person &3
Name: 'John'
- !Car
Driver: *0
Passengers: [*1]
- !Car
Driver: *3
Passengers:
- !Person &4
Name: 'Ringo'
""")
print(f'data = {data}')
the above code prints the following to the console on execution:
m['Passengers'] = []
m['Passengers'] = []
data = [Person(name=Paul), Person(name=George), Person(name=John), Car(passengers=[Person(name=Paul)]), Car(passengers=[Person(name=John)])]
where as I would expect the output to be
m['Passengers'] = [Person(name=George)]
m['Passengers'] = [Person(name=Ringo)]
data = [Person(name=Paul), Person(name=George), Person(name=John), Car(passengers=[Person(name=Paul), Person(name=George)]), Car(passengers=[Person(name=John), Person(name=Ringo)])]
no matter what, even with an array of strings, the associated value of the key 'Passengers' is always [] in the dictionary m.
do I have to manually tell the constructor to travers the rest of the node first in the from_yaml function, or does YAML loader work recursively from the bottom up?
CodePudding user response:
I was able to find a partial answer in the following post: How do I handle recursion in a custom PyYAML constructor?
in the case of raumel.yaml, it looks like our constructor is our from_yaml function, which gets added when the class is registered. all we have to do is add a yield after our initialization of our class in from_yaml, and before we retrieve our recursive item 'Passengers'
class Car:
def __init__(self):
self.passengers = []
def add_person(self, person: Person = None):
self.passengers.append(person)
@classmethod
def from_yaml(cls, constructor, node):
for m in constructor.construct_yaml_map(node):
print(f'm{type(m)} = {m}')
pass
inst = cls()
yield inst # <-- This yield statement fixes our issue
if 'Driver' in m:
inst.passengers = [m['Driver']] inst.passengers
if 'Passengers' in m:
foo = m['Passengers']
print(f'm[\'Passengers\'] = {foo}')
for person in m['Passengers']:
inst.add_person(person)
return inst
def __repr__(self):
return f'Car(passengers={self.passengers})'
CodePudding user response:
There is no need to import yaml
in your example.
During the construction of the car, its passengers are not yet know. So
what you need to do is construct potentially recursive data such
as Person
and Car
in a two step process, first constructing and yielding the "empty" Car
then filling in the Driver
and any Passangers
on the already yielded instance. The loader knows
how to handle this, so you don't have to recurse into anything in your from_yaml
.
Additionally you'll need to call constructor.construct_mapping(node, deep=True)
in from_yaml
, instead of your iteration over constructor.construct_yaml_map(node)
:
import ruamel.yaml
class Person:
def __init__(self, name: str = 'JohnDoe'):
self.name = name
@classmethod
def from_yaml(cls, constructor, node):
inst = cls()
yield inst
m = constructor.construct_mapping(node, deep=True)
if 'Name' in m:
inst.name = m['Name']
def __repr__(self):
return f'Person(name={self.name})'
class Car:
def __init__(self):
self.passengers = []
def add_person(self, person: Person = None):
self.passengers.append(person)
@classmethod
def from_yaml(cls, constructor, node):
inst = cls()
yield inst
m = constructor.construct_mapping(node, deep=True)
if 'Driver' in m:
inst.passengers = [m['Driver']] inst.passengers
if 'Passengers' in m:
foo = m['Passengers']
print(f'm[\'Passengers\'] = {foo}')
for person in m['Passengers']:
inst.add_person(person)
def __repr__(self):
return f'Car(passengers={self.passengers})'
if __name__ == "__main__":
yaml = ruamel.yaml.YAML(typ='safe')
yaml.register_class(Person)
yaml.register_class(Car)
data = yaml.load("""
- !Person &0
Name: 'Paul'
- !Person &1
Name: 'George'
- !Person &3
Name: 'John'
- !Car
Driver: *0
Passengers: [*1]
- !Car
Driver: *3
Passengers:
- !Person &4
Name: 'Ringo'
""")
print(f'data = {data}')
which gives:
m['Passengers'] = [Person(name=George)]
m['Passengers'] = [Person(name=Ringo)]
data = [Person(name=Paul), Person(name=George), Person(name=John), Car(passengers=[Person(name=Paul), Person(name=George)]), Car(passengers=[Person(name=John), Person(name=Ringo)])]
Although it is allowed to write the tag followed by the anchor, it is IMO more appropriate to write the anchor followed by the tag, because you'll get an anchored instance of the tagged object.
So that leaves me wondering what the name is of &2 !Person
(probably the same as for !Person &2
), is it Pete?