I have two dataframes, telemetry
and errors1
. I am doing pandas join operation on these two dataframes.
The telemetry dataframe looks like this
and the errors1
dataframe looks like this
Now the join operation is done like this
error_count= telemetry.join(errors1, on= ((telemetry['machineID'] == errors1['machineID'])
& (telemetry['datetime'] == errors1['datetime'])),
how='left')
which is giving the following error
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-222-84983d093331> in <module>
----> 1 error_count= telemetry.join(errors1, on= ((telemetry['machineID'] == errors1['machineID'])
2 & (telemetry['datetime'] == errors1['datetime'])),
3 how='left')
/anaconda/envs/azureml_44cb7df5d7402b6a151767e96abfe35d/lib/python3.6/site-packages/pandas/core/ops/common.py in new_method(self, other)
62 other = item_from_zerodim(other)
63
---> 64 return method(self, other)
65
66 return new_method
/anaconda/envs/azureml_44cb7df5d7402b6a151767e96abfe35d/lib/python3.6/site-packages/pandas/core/ops/__init__.py in wrapper(self, other)
519
520 if isinstance(other, ABCSeries) and not self._indexed_same(other):
--> 521 raise ValueError("Can only compare identically-labeled Series objects")
522
523 lvalues = extract_array(self, extract_numpy=True)
ValueError: Can only compare identically-labeled Series objects
Edit 1- If am using this error_count= telemetry.join(errors1.set_index(['machineID','datetime']), on=['machineID', 'datetime'], how='left')
, it is giving the following error.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-228-845bbda5ab1b> in <module>
----> 1 error_count= telemetry.join(errors1.set_index(['machineID','datetime']), on=['machineID', 'datetime'], how='left')
/anaconda/envs/azureml_44cb7df5d7402b6a151767e96abfe35d/lib/python3.6/site-packages/pandas/core/frame.py in join(self, other, on, how, lsuffix, rsuffix, sort)
7204 """
7205 return self._join_compat(
-> 7206 other, on=on, how=how, lsuffix=lsuffix, rsuffix=rsuffix, sort=sort
7207 )
7208
/anaconda/envs/azureml_44cb7df5d7402b6a151767e96abfe35d/lib/python3.6/site-packages/pandas/core/frame.py in _join_compat(self, other, on, how, lsuffix, rsuffix, sort)
7227 right_index=True,
7228 suffixes=(lsuffix, rsuffix),
-> 7229 sort=sort,
7230 )
7231 else:
/anaconda/envs/azureml_44cb7df5d7402b6a151767e96abfe35d/lib/python3.6/site-packages/pandas/core/reshape/merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
84 copy=copy,
85 indicator=indicator,
---> 86 validate=validate,
87 )
88 return op.get_result()
/anaconda/envs/azureml_44cb7df5d7402b6a151767e96abfe35d/lib/python3.6/site-packages/pandas/core/reshape/merge.py in __init__(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, copy, indicator, validate)
629 # validate the merge keys dtypes. We may need to coerce
630 # to avoid incompat dtypes
--> 631 self._maybe_coerce_merge_keys()
632
633 # If argument passed to validate,
/anaconda/envs/azureml_44cb7df5d7402b6a151767e96abfe35d/lib/python3.6/site-packages/pandas/core/reshape/merge.py in _maybe_coerce_merge_keys(self)
1148 # datetimelikes must match exactly
1149 elif needs_i8_conversion(lk) and not needs_i8_conversion(rk):
-> 1150 raise ValueError(msg)
1151 elif not needs_i8_conversion(lk) and needs_i8_conversion(rk):
1152 raise ValueError(msg)
ValueError: You are trying to merge on datetime64[ns] and object columns. If you wish to proceed you should use pd.concat
CodePudding user response:
I suggest you use pd.merge
df = pd.merge(telemetry, errors1, how='left', left_on=['machineID','datetime'], right_on = ['machineID','datetime'])