Lets say I have a model and I want to do some preprocessing on it. (for this problem it does not matter it this is pydantic model, or some kid of nested iterable, its a general question).
def preprocess(string):
# Accepts some preprocessing and returnes that string
class OtherModel(BaseModel):
other_id:int
some_name: str
class DummyModel(BaseModel):
location_id: int
other_models: List[OtherModel]
name:str
surname:str
one_other_model : OtherModel
I want to make a recursive function that will iterate trough every attribute of a Model and run some preprocessing funciton on it. For example that function can be removing some letter from a string.
I came this far and I dont know how to move further:
from collections.abc import Iterable
def preprocess_item(request: BaseModel) -> BaseModel:
for attribute_key, attribute_value in request:
if isinstance(attribute_value, str):
setattr(
request,
attribute_key,
_remove_html_tag(getattr(request, attribute_key)),
)
elif isinstance(attribute_value, BaseModel):
preprocess_item(attribute_value)
elif isinstance(attribute_value, Iterable):
for item in getattr(request,attribute_key):
preprocess_item(item)
This gives me the wrong answer, it basically unpacks every value. I want the same request object returned but with string fields preprocessed.
CodePudding user response:
If you are actually dealing with Pydantic models, I would argue this is one of the use cases for validators.
There is not really any need for recursion because you can just define the validator on your own base model, if you want it to apply to all models (that inherit from it):
from pydantic import BaseModel as PydanticBaseModel
from pydantic import validator
def process_string(string: str) -> str:
return string.replace("a", "")
class BaseModel(PydanticBaseModel):
@validator("*", pre=True, each_item=True)
def preprocess(cls, v: object) -> object:
if isinstance(v, str):
return process_string(v)
return v
class OtherModel(BaseModel):
other_id: int
some_name: str
class DummyModel(BaseModel):
location_id: int
other_models: list[OtherModel]
name: str
surname: str
one_other_model: OtherModel
If you want to be more selective and apply the same validator to specific models, they can be made reusable as well:
from pydantic import BaseModel, validator
def preprocess(v: object) -> object:
if isinstance(v, str):
return v.replace("a", "")
return v
class OtherModel(BaseModel):
other_id: int
some_name: str
_preprocess = validator("*", pre=True, allow_reuse=True)(preprocess)
class DummyModel(BaseModel):
location_id: int
other_models: list[OtherModel]
name: str
surname: str
one_other_model: OtherModel
_preprocess = validator(
"*",
pre=True,
each_item=True,
allow_reuse=True,
)(preprocess)
class NotProcessed(BaseModel):
field: str
We can test both versions like this:
if __name__ == "__main__":
dummy = DummyModel.parse_obj({
"location_id": 1,
"other_models": [
{"other_id": 1, "some_name": "foo"},
{"other_id": 2, "some_name": "spam"},
],
"name": "bar",
"surname": "baz",
"one_other_model": {"other_id": 2, "some_name": "eggs"},
})
print(dummy.json(indent=4))
The output in both cases is the same:
{
"location_id": 1,
"other_models": [
{
"other_id": 1,
"some_name": "foo"
},
{
"other_id": 2,
"some_name": "spm"
}
],
"name": "br",
"surname": "bz",
"one_other_model": {
"other_id": 2,
"some_name": "eggs"
}
}