in one column in my df I have values and unite of measurements, so I extracted the values to another column however the dtype
is still object
My table:
cost | uom_value |
---|---|
23226.8835 kg | 23226.8835 |
244.09 kg | 244.09 |
24226.5 kg | 24226.5 |
255.01 kg | 255.01 |
I wanted to convert them to float
to use them in my calculations however I am getting the below error, even when doing a simple calculation such as df['uom_value'].astype(float).sum()
any help is appreciated
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [22], in <cell line: 1>()
----> 1 df['uom_value'].astype(float).sum()
File ~\Anaconda3\envs\general\lib\site-packages\pandas\core\generic.py:5912, in NDFrame.astype(self, dtype, copy, errors)
5905 results = [
5906 self.iloc[:, i].astype(dtype, copy=copy)
5907 for i in range(len(self.columns))
5908 ]
5910 else:
5911 # else, only a single dtype is given
-> 5912 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
5913 return self._constructor(new_data).__finalize__(self, method="astype")
5915 # GH 33113: handle empty frame or series
File ~\Anaconda3\envs\general\lib\site-packages\pandas\core\internals\managers.py:419, in BaseBlockManager.astype(self, dtype, copy, errors)
418 def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T:
--> 419 return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
File ~\Anaconda3\envs\general\lib\site-packages\pandas\core\internals\managers.py:304, in BaseBlockManager.apply(self, f, align_keys, ignore_failures, **kwargs)
302 applied = b.apply(f, **kwargs)
303 else:
--> 304 applied = getattr(b, f)(**kwargs)
305 except (TypeError, NotImplementedError):
306 if not ignore_failures:
File ~\Anaconda3\envs\general\lib\site-packages\pandas\core\internals\blocks.py:580, in Block.astype(self, dtype, copy, errors)
562 """
563 Coerce to the new dtype.
564
(...)
576 Block
577 """
578 values = self.values
--> 580 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
582 new_values = maybe_coerce_values(new_values)
583 newb = self.make_block(new_values)
File ~\Anaconda3\envs\general\lib\site-packages\pandas\core\dtypes\cast.py:1292, in astype_array_safe(values, dtype, copy, errors)
1289 dtype = dtype.numpy_dtype
1291 try:
-> 1292 new_values = astype_array(values, dtype, copy=copy)
1293 except (ValueError, TypeError):
1294 # e.g. astype_nansafe can fail on object-dtype of strings
1295 # trying to convert to float
1296 if errors == "ignore":
File ~\Anaconda3\envs\general\lib\site-packages\pandas\core\dtypes\cast.py:1237, in astype_array(values, dtype, copy)
1234 values = values.astype(dtype, copy=copy)
1236 else:
-> 1237 values = astype_nansafe(values, dtype, copy=copy)
1239 # in pandas we don't store numpy str dtypes, so convert to object
1240 if isinstance(dtype, np.dtype) and issubclass(values.dtype.type, str):
File ~\Anaconda3\envs\general\lib\site-packages\pandas\core\dtypes\cast.py:1181, in astype_nansafe(arr, dtype, copy, skipna)
1177 raise ValueError(msg)
1179 if copy or is_object_dtype(arr.dtype) or is_object_dtype(dtype):
1180 # Explicit copy, or required since NumPy can't view from / to object.
-> 1181 return arr.astype(dtype, copy=True)
1183 return arr.astype(dtype, copy=copy)
ValueError: could not convert string to float: 'you can see diemension in the order, it was 3'
CodePudding user response:
Use from pandas.to_numeric method. Note that it is better to set the coerce argument for your method based on your usage. For more detail, read about it in docs