I am trying to get to use decorators from Python package "Pandera" and I am having trouble to get them work with classes.
First I create schemas for Pandera:
from pandera import Column, Check
import yaml
in_ = pa.DataFrameSchema(
{
"Name": Column(object, nullable=True),
"Height": Column(object, nullable=True),
})
with open("./in_.yml", "w") as file:
yaml.dump(in_, file)
out_ = pa.DataFrameSchema(
{
"Name": Column(object, nullable=True),
"Height": Column(object, nullable=True),
})
with open("./out_.yml", "w") as file:
yaml.dump(out_, file)
Next I create test.py
file with class:
from pandera import check_io
import pandas as pd
class TransformClass():
with open("./in_.yml", "r") as file:
in_ = file.read()
with open("./out_.yml", "r") as file:
out_ = file.read()
@staticmethod
@check_io(df=in_, out=out_)
def func(df: pd.DataFrame) -> pd.DataFrame:
return df
Finally I importing this class:
from test import TransformClass
data = {'Name': [np.nan, 'Princi', 'Gaurav', 'Anuj'],
'Height': [5.1, 6.2, 5.1, 5.2],
'Qualification': ['Msc', 'MA', 'Msc', 'Msc']}
df = pd.DataFrame(data)
TransformClass.func(df)
I am getting:
File C:\Anaconda3\envs\py310\lib\site-packages\pandera\decorators.py:464, in check_io.<locals>._wrapper(fn, instance, args, kwargs)
462 out_schemas = []
463 else:
--> 464 raise TypeError(
465 f"type of out argument not recognized: {type(out)}"
466 )
468 wrapped_fn = fn
469 for input_getter, input_schema in inputs.items():
470 # pylint: disable=no-value-for-parameter
TypeError: type of out argument not recognized: <class 'str'>
Any help would much appreciated
CodePudding user response:
The check_io
decorator expects arguments of type pandera.DataFrameSchema
. However, it is being passed _out
which is type str
since it is the output of file.read()
.
The Pandera docs explain which types the check_io
decorator is expecting.
A solution would be to pass the output of the file.read()
line to the Pandera constructor, possibly with some transformation:
out_ = yaml.safe_load(file.read())
CodePudding user response:
Thanks to @grbeazley here is the full solution:
from pandera import Column, Check
import yaml
in_ = pa.DataFrameSchema(
{
"Name": Column(object, nullable=True),
"Height": Column(object, nullable=True),
})
with open("in_.yml", "w") as file:
yaml.dump(in_.to_yaml(), file)
with open("./in_.yml", "r") as file:
in_ = yaml.safe_load(file.read())
_ = pa.DataFrameSchema.from_yaml(in_)