I was trying to create the 3 and 6 months data frame. following error occured. My data contains entries from jan to sept. I have imported relevant files. Please help me.
Code I tried:
tx_data = pd.read_excel('final2.xlsx')
tx_data['InvoiceDate'] = pd.to_datetime(tx_data['InvoiceDate'])
#create 3m and 6m dataframes
tx_3m = tx_data[(tx_data.InvoiceDate >= date(2021,1,1)) & (tx_data.InvoiceDate < date(2021,3,1))].reset_index(drop=True)
tx_6m = tx_data[(tx_data.InvoiceDate >= date(2021,3,1)) & (tx_data.InvoiceDate < date(2021,9,1))].reset_index(drop=True)
#create tx_user for assigning clustering
tx_user = pd.DataFrame(tx_3m['CustomerID'].unique())
tx_user.columns = ['CustomerID']
Error StackTrace:
InvalidComparison Traceback (most recent call last)
~\anaconda3\lib\site-packages\pandas\core\arrays\datetimelike.py in wrapper(self, other)
115 try:
--> 116 other = _validate_comparison_value(self, other)
117 except InvalidComparison:
~\anaconda3\lib\site-packages\pandas\core\arrays\datetimelike.py in _validate_comparison_value(self, other)
95 elif not is_list_like(other):
---> 96 raise InvalidComparison(other)
97
InvalidComparison: 2021-01-01
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-10-f685d23cc10a> in <module>
6 #create 3m and 6m dataframes
7
----> 8 tx_3m = tx_data[(tx_data.InvoiceDate >= date(2021,1,1)) & (tx_data.InvoiceDate < date(2021,3,1))].reset_index(drop=True)
9 tx_6m = tx_data[(tx_data.InvoiceDate >= date(2021,3,1)) & (tx_data.InvoiceDate < date(2021,9,1))].reset_index(drop=True)
10
~\anaconda3\lib\site-packages\pandas\core\ops\common.py in new_method(self, other)
63 other = item_from_zerodim(other)
64
---> 65 return method(self, other)
66
67 return new_method
~\anaconda3\lib\site-packages\pandas\core\ops\__init__.py in wrapper(self, other)
368 rvalues = extract_array(other, extract_numpy=True)
369
--> 370 res_values = comparison_op(lvalues, rvalues, op)
371
372 return self._construct_result(res_values, name=res_name)
~\anaconda3\lib\site-packages\pandas\core\ops\array_ops.py in comparison_op(left, right, op)
228 if should_extension_dispatch(lvalues, rvalues):
229 # Call the method on lvalues
--> 230 res_values = op(lvalues, rvalues)
231
232 elif is_scalar(rvalues) and isna(rvalues):
~\anaconda3\lib\site-packages\pandas\core\ops\common.py in new_method(self, other)
63 other = item_from_zerodim(other)
64
---> 65 return method(self, other)
66
67 return new_method
~\anaconda3\lib\site-packages\pandas\core\arrays\datetimelike.py in wrapper(self, other)
116 other = _validate_comparison_value(self, other)
117 except InvalidComparison:
--> 118 return invalid_comparison(self, other, op)
119
120 dtype = getattr(other, "dtype", None)
~\anaconda3\lib\site-packages\pandas\core\ops\invalid.py in invalid_comparison(left, right, op)
32 else:
33 typ = type(right).__name__
---> 34 raise TypeError(f"Invalid comparison between dtype={left.dtype} and {typ}")
35 return res_values
36
TypeError: Invalid comparison between dtype=datetime64[ns] and date
CodePudding user response:
If you want to keep both sides as datetime.date
format, convert the column like this in your code, while comparing:
tx_data.InvoiceDate.dt.date >= date(2021,1,1)
The issue now in your code is, tx_data.InvoiceDate
is datetime64[ns]
and date(2021,1,1)
is datetime.date
type.
CodePudding user response:
You can replace your calls to date
with pd.Timestamp
, as follows:
tx_3m = tx_data[(tx_data.InvoiceDate >= pd.Timestamp(2021,1,1)) & (tx_data.InvoiceDate < pd.Timestamp(2021,3,1))].reset_index(drop=True)
tx_6m = tx_data[(tx_data.InvoiceDate >= pd.Timestamp(2021,3,1)) & (tx_data.InvoiceDate < pd.Timestamp(2021,9,1))].reset_index(drop=True)
pd.Timestamp
produces type of pandas._libs.tslibs.timestamps.Timestamp
which can be compared with Pandas dtype=datetime64[ns]
objects.