Home > Net >  How to convert object data type to Float64 and Int64 after updating the Pandas for interpolation for
How to convert object data type to Float64 and Int64 after updating the Pandas for interpolation for

Time:08-04

I have updated my Anaconda environment and hence the associated libraries such as Pandas have been updated. I had a working code that now gives me the following error

ValueError: Invalid fill method. Expecting pad (ffill) or backfill (bfill). Got linear

When I analysis the datatypes (df.dtypes) I get all the data as object where as in the previous Pandas it used to show int64 and Float65.

I have looked at several thread on stackoverflow with simillar issues and found that the issue might be with data column, however following other threads did not solve the issue.

My code looks like this:

df['Timestamp'] = pd.to_datetime(df['Timestamp'])
def fault_mapper_FD(faultDate):
    if pd.Timestamp(2017, 8, 27, 0) <= faultDate <= pd.Timestamp(2017, 8, 28, 0):
        return 0
    if pd.Timestamp(2017, 8, 29, 0) <= faultDate <= pd.Timestamp(2017, 8, 29, 23, 59):
        return 0
    if pd.Timestamp(2017, 12, 1, 0) <= faultDate <= pd.Timestamp(2017, 12, 1, 23, 59):
        return 0
    if pd.Timestamp(2017, 12, 3, 0) <= faultDate <= pd.Timestamp(2017, 12, 3, 23, 59):
        return 0
    if pd.Timestamp(2017, 12, 7, 0) <= faultDate <= pd.Timestamp(2017, 12, 8, 0):
        return 0
    if pd.Timestamp(2017, 12, 14, 0) <= faultDate <= pd.Timestamp(2017, 12, 14, 23, 59):
        return 0
    if pd.Timestamp(2018, 2, 7, 0) <= faultDate <= pd.Timestamp(2018, 2, 7, 23, 59):
        return 0
    if pd.Timestamp(2018, 2, 9, 0) <= faultDate <= pd.Timestamp(2018, 2, 9, 23, 59):
        return 0
    if pd.Timestamp(2017, 12, 20, 0) <= faultDate <= pd.Timestamp(2017, 12, 20, 23, 59):
        return 0
    if pd.Timestamp(2018, 2, 18, 0) <= faultDate <= pd.Timestamp(2018, 2, 18, 23, 59):
        return 0
    if pd.Timestamp(2018, 2, 1, 0) <= faultDate <= pd.Timestamp(2018, 2, 1, 23, 59):
        return 0
    if pd.Timestamp(2018, 1, 31, 0) <= faultDate <= pd.Timestamp(2018, 1, 31, 23, 59):
        return 0
    if pd.Timestamp(2018, 1, 28, 0) <= faultDate <= pd.Timestamp(2018, 1, 28, 23, 59):
        return 0
    if pd.Timestamp(2018, 1, 27, 0) <= faultDate <= pd.Timestamp(2018, 1, 27, 23, 59):
        return 0
    if (pd.Timestamp(2017, 9, 1, 0) <= faultDate <= pd.Timestamp(2017, 9, 1, 23, 59) or 
    pd.Timestamp(2017, 11, 30, 0) <= faultDate <= pd.Timestamp(2017, 11, 30, 23, 59) or 
    pd.Timestamp(2017, 12, 9, 0) <= faultDate <= pd.Timestamp(2017, 12, 9, 23, 59) or 
    pd.Timestamp(2017, 12, 10, 0) <= faultDate <= pd.Timestamp(2017, 12, 11, 0) or 
    pd.Timestamp(2017, 12, 24, 0) <= faultDate <= pd.Timestamp(2017, 12, 24, 23, 59) or 
    pd.Timestamp(2018, 2, 4, 0) <= faultDate <= pd.Timestamp(2018, 2, 4, 23, 59) or 
    pd.Timestamp(2018, 2, 5, 0) <= faultDate <= pd.Timestamp(2018, 2, 6, 0)):
        return 1

df['FD'] = df['Timestamp'].apply(lambda fault_date: fault_mapper_FD(fault_date))

cond = (df.Timestamp.dt.time > dt.time(22,0)) | ((df.Timestamp.dt.time < dt.time(7,0)))
df[cond] = df[cond].fillna(0,axis=1)

When I try to interpolate

**df.interpolate(method ='linear', limit_direction ='backward', inplace=True)**

I get the error"

    ---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Input In [6], in <cell line: 1>()
----> 1 df.interpolate(method ='linear', limit_direction ='backward', inplace=True)

File ~\anaconda3\lib\site-packages\pandas\util\_decorators.py:311, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
    305 if len(args) > num_allow_args:
    306     warnings.warn(
    307         msg.format(arguments=arguments),
    308         FutureWarning,
    309         stacklevel=stacklevel,
    310     )
--> 311 return func(*args, **kwargs)

File ~\anaconda3\lib\site-packages\pandas\core\frame.py:10931, in DataFrame.interpolate(self, method, axis, limit, inplace, limit_direction, limit_area, downcast, **kwargs)
  10919 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "method"])
  10920 def interpolate(
  10921     self: DataFrame,
   (...)
  10929     **kwargs,
  10930 ) -> DataFrame | None:
> 10931     return super().interpolate(
  10932         method,
  10933         axis,
  10934         limit,
  10935         inplace,
  10936         limit_direction,
  10937         limit_area,
  10938         downcast,
  10939         **kwargs,
  10940     )

File ~\anaconda3\lib\site-packages\pandas\core\generic.py:7034, in NDFrame.interpolate(self, method, axis, limit, inplace, limit_direction, limit_area, downcast, **kwargs)
   7028 if isna(index).any():
   7029     raise NotImplementedError(
   7030         "Interpolation with NaNs in the index "
   7031         "has not been implemented. Try filling "
   7032         "those NaNs before interpolating."
   7033     )
-> 7034 new_data = obj._mgr.interpolate(
   7035     method=method,
   7036     axis=axis,
   7037     index=index,
   7038     limit=limit,
   7039     limit_direction=limit_direction,
   7040     limit_area=limit_area,
   7041     inplace=inplace,
   7042     downcast=downcast,
   7043     **kwargs,
   7044 )
   7046 result = self._constructor(new_data)
   7047 if should_transpose:

File ~\anaconda3\lib\site-packages\pandas\core\internals\managers.py:359, in BaseBlockManager.interpolate(self, **kwargs)
    358 def interpolate(self: T, **kwargs) -> T:
--> 359     return self.apply("interpolate", **kwargs)

File ~\anaconda3\lib\site-packages\pandas\core\internals\managers.py:304, in BaseBlockManager.apply(self, f, align_keys, ignore_failures, **kwargs)
    302         applied = b.apply(f, **kwargs)
    303     else:
--> 304         applied = getattr(b, f)(**kwargs)
    305 except (TypeError, NotImplementedError):
    306     if not ignore_failures:

File ~\anaconda3\lib\site-packages\pandas\core\internals\blocks.py:1482, in EABackedBlock.interpolate(self, method, axis, inplace, limit, fill_value, **kwargs)
   1480     new_values = values.T.fillna(value=fill_value, method=method, limit=limit).T
   1481 else:
-> 1482     new_values = values.fillna(value=fill_value, method=method, limit=limit)
   1483 return self.make_block_same_class(new_values)

File ~\anaconda3\lib\site-packages\pandas\core\arrays\_mixins.py:300, in NDArrayBackedExtensionArray.fillna(self, value, method, limit)
    296 @doc(ExtensionArray.fillna)
    297 def fillna(
    298     self: NDArrayBackedExtensionArrayT, value=None, method=None, limit=None
    299 ) -> NDArrayBackedExtensionArrayT:
--> 300     value, method = validate_fillna_kwargs(
    301         value, method, validate_scalar_dict_value=False
    302     )
    304     mask = self.isna()
    305     # error: Argument 2 to "check_value_size" has incompatible type
    306     # "ExtensionArray"; expected "ndarray"

File ~\anaconda3\lib\site-packages\pandas\util\_validators.py:378, in validate_fillna_kwargs(value, method, validate_scalar_dict_value)
    376     raise ValueError("Must specify a fill 'value' or 'method'.")
    377 elif value is None and method is not None:
--> 378     method = clean_fill_method(method)
    380 elif value is not None and method is None:
    381     if validate_scalar_dict_value and isinstance(value, (list, tuple)):

File ~\anaconda3\lib\site-packages\pandas\core\missing.py:125, in clean_fill_method(method, allow_nearest)
    123     expecting = "pad (ffill), backfill (bfill) or nearest"
    124 if method not in valid_methods:
--> 125     raise ValueError(f"Invalid fill method. Expecting {expecting}. Got {method}")
    126 return method

ValueError: Invalid fill method. Expecting pad (ffill) or backfill (bfill). Got linear

I have tried also the following solution found in stackoverflow but did not help

for col in df:
    df[col] = pd.to_numeric(df[col], errors='coerce')

CodePudding user response:

import pandas as pd

df = pd.DataFrame({'time': pd.to_datetime(['2010', '2011', 'foo', '2012', '2013'], 
                                          errors='coerce')})
df['time'] = df.time.dt.tz_localize('UTC').dt.tz_convert('Asia/Kolkata')
df.interpolate()

CodePudding user response:

By default, df.interpolate(method='linear') forward-fills NaNs after the last valid value. That is rather surprising given that the method name only mentions "interpolate".

To restrict df.interpolate to only interpolate NaNs between valid (non-NaN) values, as of Pandas version 0.23.0 (Reference), use limit_area='inside'.

import pandas as pd
import numpy as np
a = pd.DataFrame({'col1': [np.nan, 1, np.nan, 3, np.nan, 5, np.nan]})
a['linear'] = a.interpolate(method='linear')['col1']
a['linear inside'] = a.interpolate(method='linear', limit_area='inside')['col1']
print(a)
  • Related