I have a code that
- reads the data from CSV,
- replaces the columns from space to underscore, and
- replaces nan with None.
def read_file_and_transform(local_file_path):
""" """
try:
data_df = pd.read_csv(local_file_path)
data_df.columns = data_df.columns.str.replace(' ', '_')
clean_df = data_df.where((pd.notnull(data_df)), None)
except Exception as e:
logger.error("Failure in read file and transform method {}".format(e))
raise e
I am writing a unit test case for these three lines and facing the error with line 3
Here is my test case:
class MockPandas:
def __init__(self):
pass
def read_csv(self, *args, **kwargs):
""" """
return pd.DataFrame([{"a b": np.nan, "b": 2.33}])
def notnull(self, *args, **kwargs):
""" """
return pd.DataFrame([{"a_b": "None", "b": 2.33}])
@patch("path", MockPandas())
def test_read_file_and_transform(self):
""" """
result = self.obj.read_file_and_transform("/file_path")
assert result == [{"a": None, "b": 2.33}]
The error I am facing is :
ValueError: Boolean array expected for the condition, not object
Can anyone help me here? Thanks
CodePudding user response:
pandas.notnull
returns a new data frame with the same size as the original data frame, where each cell has a boolean value indicating if the respective value is not nan.
Therefore you should change the return value of your mocked version of notnull
to match the expected return value.
For example, if the original df is:
A B C D
0 Sandy NaN 20.0 14.8
1 alex olivia 20.0 3.0
2 brook terica 7.0 NaN
3 kelly dan NaN 2.3
4 NaN amanda 8.0 6.0
Then df.notnull would be:
A B C D
0 True False True True
1 True True True True
2 True True True False
3 True True False True
4 False True True True
Panda's notnull documentation can be found here.