I am trying to compare two columns in the same dataframe row by row. As a result I would also need to come with the new dataframe, with the calculated value, whether the data have difference or not.
test_df:
id_a | id_b | value_a | value_b |
---|---|---|---|
123 | abc | 1.0 | 0.0 |
345 | def | 1.0 | 1.0 |
456 | jgk | NaN | 0.0 |
654 | mngk | 1.0 | NaN |
This is the code I use to compare value_a, value_b columns, similarly I have to compare 10 more columns in the same way (number_a vs number_b -> calc_number, name_a vs name_b -> calc_name, and so on.)
I have written a common function, and passing the dataframe and list of columns need to compare at each step.
First, I am passing test_df, and columns list value_a, value_b, where for each row, the value in column value_a need to compare with value in column value_b, if there is match, the new column to be created in the new dataframe with the value Test1, also the new dataframe should have the respective id_a and id_b stores.
Expected output df:
id_a | id_b | calc_value |
---|---|---|
123 | abc | Test4 |
345 | def | Test1 |
456 | jgk | Test2 |
654 | mngk | Test3 |
def que_fn(x,cols):
if (x[(x[cols[2]]==x[cols[3]])]) & (x[x[cols[2]].notna()]) & (x[x[cols[3]].notna()]):
return "Test1"
elif (x[x[cols[2]].isnull()]) & (x[x[cols[3]].isnull()]):
return "Test1"
elif (x[x[cols[2]].isnull()]) & (x[x[cols[3]].notna()]):
return "Test2"
elif (x[x[cols[2]].notna()]) & (x[x[cols[3]].isnull()]):
return "Test3"
else:
return "Test4"
check_cols = ['id_a','id_b','value_a','value_b']
new_df['calc_value'] = test_df.apply(que_fn(test_df,check_cols),axis=1)
The above causing the error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
File C:\Program Files\Python310\lib\site-packages\pandas\core\ops\array_ops.py:301, in na_logical_op(x, y, op)
292 try:
293 # For exposition, write:
294 # yarr = isinstance(y, np.ndarray)
(...)
299 # Then Cases where this goes through without raising include:
300 # (xint or xbool) and (yint or bool)
--> 301 result = op(x, y)
302 except TypeError:
TypeError: ufunc 'bitwise_and' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
Input In [94], in <cell line: 1>()
----> 1 FactLeaseActivity_disc_df['TransferMoveIn_disc'] = sample_df['TransferMoveIn_EDP'].apply(validate_fn(sample_df,move_in_cols),axis=1)
Input In [89], in validate_fn(x, cols)
6 print(x[cols[3]])
7 print(cols)
----> 8 if (x[(x[cols[2]]==x[cols[3]])]) & (x[x[cols[2]].notna()]) & (x[x[cols[3]].notna()]):
9 # if (x[(x[cols[2]]==x[cols[3]]) and (x[cols[2]].notna()) and (x[cols[3]].notna())]):
10 return 0
11 elif (x[x[cols[2]].isnull()]) & (x[x[cols[3]].isnull()]):
File C:\Program Files\Python310\lib\site-packages\pandas\core\ops\common.py:70, in _unpack_zerodim_and_defer.<locals>.new_method(self, other)
66 return NotImplemented
68 other = item_from_zerodim(other)
---> 70 return method(self, other)
File C:\Program Files\Python310\lib\site-packages\pandas\core\arraylike.py:70, in OpsMixin.__and__(self, other)
68 @unpack_zerodim_and_defer("__and__")
69 def __and__(self, other):
---> 70 return self._logical_method(other, operator.and_)
File C:\Program Files\Python310\lib\site-packages\pandas\core\frame.py:6946, in DataFrame._arith_method(self, other, op)
6942 other = ops.maybe_prepare_scalar_for_op(other, (self.shape[axis],))
6944 self, other = ops.align_method_FRAME(self, other, axis, flex=True, level=None)
-> 6946 new_data = self._dispatch_frame_op(other, op, axis=axis)
6947 return self._construct_result(new_data)
File C:\Program Files\Python310\lib\site-packages\pandas\core\frame.py:6985, in DataFrame._dispatch_frame_op(self, right, func, axis)
6979 # TODO: The previous assertion `assert right._indexed_same(self)`
6980 # fails in cases with empty columns reached via
6981 # _frame_arith_method_with_reindex
6982
6983 # TODO operate_blockwise expects a manager of the same type
6984 with np.errstate(all="ignore"):
-> 6985 bm = self._mgr.operate_blockwise(
6986 # error: Argument 1 to "operate_blockwise" of "ArrayManager" has
6987 # incompatible type "Union[ArrayManager, BlockManager]"; expected
6988 # "ArrayManager"
6989 # error: Argument 1 to "operate_blockwise" of "BlockManager" has
6990 # incompatible type "Union[ArrayManager, BlockManager]"; expected
6991 # "BlockManager"
6992 right._mgr, # type: ignore[arg-type]
6993 array_op,
6994 )
6995 return self._constructor(bm)
6997 elif isinstance(right, Series) and axis == 1:
6998 # axis=1 means we want to operate row-by-row
File C:\Program Files\Python310\lib\site-packages\pandas\core\internals\managers.py:1409, in BlockManager.operate_blockwise(self, other, array_op)
1405 def operate_blockwise(self, other: BlockManager, array_op) -> BlockManager:
1406 """
1407 Apply array_op blockwise with another (aligned) BlockManager.
1408 """
-> 1409 return operate_blockwise(self, other, array_op)
File C:\Program Files\Python310\lib\site-packages\pandas\core\internals\ops.py:63, in operate_blockwise(left, right, array_op)
61 res_blks: list[Block] = []
62 for lvals, rvals, locs, left_ea, right_ea, rblk in _iter_block_pairs(left, right):
---> 63 res_values = array_op(lvals, rvals)
64 if left_ea and not right_ea and hasattr(res_values, "reshape"):
65 res_values = res_values.reshape(1, -1)
File C:\Program Files\Python310\lib\site-packages\pandas\core\ops\array_ops.py:391, in logical_op(left, right, op)
387 # For int vs int `^`, `|`, `&` are bitwise operators and return
388 # integer dtypes. Otherwise these are boolean ops
389 filler = fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool
--> 391 res_values = na_logical_op(lvalues, rvalues, op)
392 # error: Cannot call function of unknown type
393 res_values = filler(res_values) # type: ignore[operator]
File C:\Program Files\Python310\lib\site-packages\pandas\core\ops\array_ops.py:308, in na_logical_op(x, y, op)
306 x = ensure_object(x)
307 y = ensure_object(y)
--> 308 result = libops.vec_binop(x.ravel(), y.ravel(), op)
309 else:
310 # let null fall thru
311 assert lib.is_scalar(y)
File C:\Program Files\Python310\lib\site-packages\pandas\_libs\ops.pyx:252, in pandas._libs.ops.vec_binop()
File C:\Program Files\Python310\lib\site-packages\pandas\_libs\ops.pyx:245, in pandas._libs.ops.vec_binop()
TypeError: unsupported operand type(s) for &: 'float' and 'float'
If I change the operand & to and, then I am getting the error "ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all()."
Please help me to resolve these errors.
CodePudding user response:
How about this:
def que_fn(x,cols):
if (x[cols[2]]==x[cols[3]])& (~np.isnan(x[cols[2]])) & (~np.isnan(x[cols[3]])) :
return "Test1"
elif (np.isnan(x[cols[2]])) & (np.isnan(x[cols[3]])):
return "Test1"
elif (np.isnan(x[cols[2]])) & (~np.isnan(x[cols[3]])):
return "Test2"
elif (~np.isnan(x[cols[2]])) & (np.isnan(x[cols[3]])):
return "Test3"
else:
return "Test4"
check_cols = ['id_a','id_b','value_a','value_b']
new_df['calc_value'] = test_df.apply(lambda x :que_fn(x,check_cols),axis=1)
The problem is that you were using dataframe comparison functions, but in reality you have numeric/nan values. Moreover, you need to use lambda function to iterate over dataframe rows.