looks like I need your help, I am trying to merge the datasets into a single dataset. By using this codes
import pandas as pd
import numpy as np
Total_Transfer = pd.read_excel('total_transfer.xlsx')
Total_Issued = pd.read_excel('total_issued.xlsx')
Total_Retirement = pd.read_excel('retirement_finalist.xlsx')
pd.merge(Total_Issued,Total_Retirement,Total_Transfer, on ='Vintage')
However it shows an error like this
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-44-053aa3dcdb13> in <module>
----> 1 pd.merge(Total_Issued,Total_Retirement,Total_Transfer, on ='Vintage')
~\anaconda3\lib\site-packages\pandas\core\reshape\merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
87 validate=validate,
88 )
---> 89 return op.get_result()
90
91
~\anaconda3\lib\site-packages\pandas\core\reshape\merge.py in get_result(self)
666 self.left, self.right = self._indicator_pre_merge(self.left, self.right)
667
--> 668 join_index, left_indexer, right_indexer = self._get_join_info()
669
670 llabels, rlabels = _items_overlap_with_suffix(
~\anaconda3\lib\site-packages\pandas\core\reshape\merge.py in _get_join_info(self)
882 )
883 else:
--> 884 (left_indexer, right_indexer) = self._get_join_indexers()
885
886 if self.right_index:
~\anaconda3\lib\site-packages\pandas\core\reshape\merge.py in _get_join_indexers(self)
860 def _get_join_indexers(self):
861 """ return the join indexers """
--> 862 return _get_join_indexers(
863 self.left_join_keys, self.right_join_keys, sort=self.sort, how=self.how
864 )
~\anaconda3\lib\site-packages\pandas\core\reshape\merge.py in _get_join_indexers(left_keys, right_keys, sort, how, **kwargs)
1326 for n in range(len(left_keys))
1327 )
-> 1328 zipped = zip(*mapped)
1329 llab, rlab, shape = [list(x) for x in zipped]
1330
~\anaconda3\lib\site-packages\pandas\core\reshape\merge.py in <genexpr>(.0)
1323 # get left & right join labels and num. of levels at each location
1324 mapped = (
-> 1325 _factorize_keys(left_keys[n], right_keys[n], sort=sort, how=how)
1326 for n in range(len(left_keys))
1327 )
~\anaconda3\lib\site-packages\pandas\core\reshape\merge.py in _factorize_keys(lk, rk, sort, how)
1993 count = 1
1994
-> 1995 if how == "right":
1996 return rlab, llab, count
1997 return llab, rlab, count
~\anaconda3\lib\site-packages\pandas\core\generic.py in __nonzero__(self)
1327
1328 def __nonzero__(self):
-> 1329 raise ValueError(
1330 f"The truth value of a {type(self).__name__} is ambiguous. "
1331 "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
I don't understand where the ambiguity comes, but I am sure that I have change the data type in to numeric for the key value of every datasets.
I am very appreciate for your support on this one. Thanks!
CodePudding user response:
I think this error appears because you are not using correctly pandas merge() function - you can merge only two DataFrames at a time.
Maybe it helps:
Total_Issued.merge(Total_Retirement, on ='Vintage').merge(Total_Transfer, on='Vintage')
or
pd.merge(pd.merge(Total_Issued, Total_Retirement, on = 'Vintage'), Total_Transfer, on='Vintage')