I have updated my Anaconda environment and hence the associated libraries such as Pandas have been updated. I had a working code that now gives me the following error
ValueError: Invalid fill method. Expecting pad (ffill) or backfill (bfill). Got linear
When I analysis the datatypes (df.dtypes
) I get all the data as object where as in the previous Pandas it used to show int64 and Float65.
I have looked at several thread on stackoverflow with simillar issues and found that the issue might be with data column, however following other threads did not solve the issue.
My code looks like this:
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
def fault_mapper_FD(faultDate):
if pd.Timestamp(2017, 8, 27, 0) <= faultDate <= pd.Timestamp(2017, 8, 28, 0):
return 0
if pd.Timestamp(2017, 8, 29, 0) <= faultDate <= pd.Timestamp(2017, 8, 29, 23, 59):
return 0
if pd.Timestamp(2017, 12, 1, 0) <= faultDate <= pd.Timestamp(2017, 12, 1, 23, 59):
return 0
if pd.Timestamp(2017, 12, 3, 0) <= faultDate <= pd.Timestamp(2017, 12, 3, 23, 59):
return 0
if pd.Timestamp(2017, 12, 7, 0) <= faultDate <= pd.Timestamp(2017, 12, 8, 0):
return 0
if pd.Timestamp(2017, 12, 14, 0) <= faultDate <= pd.Timestamp(2017, 12, 14, 23, 59):
return 0
if pd.Timestamp(2018, 2, 7, 0) <= faultDate <= pd.Timestamp(2018, 2, 7, 23, 59):
return 0
if pd.Timestamp(2018, 2, 9, 0) <= faultDate <= pd.Timestamp(2018, 2, 9, 23, 59):
return 0
if pd.Timestamp(2017, 12, 20, 0) <= faultDate <= pd.Timestamp(2017, 12, 20, 23, 59):
return 0
if pd.Timestamp(2018, 2, 18, 0) <= faultDate <= pd.Timestamp(2018, 2, 18, 23, 59):
return 0
if pd.Timestamp(2018, 2, 1, 0) <= faultDate <= pd.Timestamp(2018, 2, 1, 23, 59):
return 0
if pd.Timestamp(2018, 1, 31, 0) <= faultDate <= pd.Timestamp(2018, 1, 31, 23, 59):
return 0
if pd.Timestamp(2018, 1, 28, 0) <= faultDate <= pd.Timestamp(2018, 1, 28, 23, 59):
return 0
if pd.Timestamp(2018, 1, 27, 0) <= faultDate <= pd.Timestamp(2018, 1, 27, 23, 59):
return 0
if (pd.Timestamp(2017, 9, 1, 0) <= faultDate <= pd.Timestamp(2017, 9, 1, 23, 59) or
pd.Timestamp(2017, 11, 30, 0) <= faultDate <= pd.Timestamp(2017, 11, 30, 23, 59) or
pd.Timestamp(2017, 12, 9, 0) <= faultDate <= pd.Timestamp(2017, 12, 9, 23, 59) or
pd.Timestamp(2017, 12, 10, 0) <= faultDate <= pd.Timestamp(2017, 12, 11, 0) or
pd.Timestamp(2017, 12, 24, 0) <= faultDate <= pd.Timestamp(2017, 12, 24, 23, 59) or
pd.Timestamp(2018, 2, 4, 0) <= faultDate <= pd.Timestamp(2018, 2, 4, 23, 59) or
pd.Timestamp(2018, 2, 5, 0) <= faultDate <= pd.Timestamp(2018, 2, 6, 0)):
return 1
df['FD'] = df['Timestamp'].apply(lambda fault_date: fault_mapper_FD(fault_date))
cond = (df.Timestamp.dt.time > dt.time(22,0)) | ((df.Timestamp.dt.time < dt.time(7,0)))
df[cond] = df[cond].fillna(0,axis=1)
When I try to interpolate
**df.interpolate(method ='linear', limit_direction ='backward', inplace=True)**
I get the error"
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [6], in <cell line: 1>()
----> 1 df.interpolate(method ='linear', limit_direction ='backward', inplace=True)
File ~\anaconda3\lib\site-packages\pandas\util\_decorators.py:311, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
305 if len(args) > num_allow_args:
306 warnings.warn(
307 msg.format(arguments=arguments),
308 FutureWarning,
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
File ~\anaconda3\lib\site-packages\pandas\core\frame.py:10931, in DataFrame.interpolate(self, method, axis, limit, inplace, limit_direction, limit_area, downcast, **kwargs)
10919 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "method"])
10920 def interpolate(
10921 self: DataFrame,
(...)
10929 **kwargs,
10930 ) -> DataFrame | None:
> 10931 return super().interpolate(
10932 method,
10933 axis,
10934 limit,
10935 inplace,
10936 limit_direction,
10937 limit_area,
10938 downcast,
10939 **kwargs,
10940 )
File ~\anaconda3\lib\site-packages\pandas\core\generic.py:7034, in NDFrame.interpolate(self, method, axis, limit, inplace, limit_direction, limit_area, downcast, **kwargs)
7028 if isna(index).any():
7029 raise NotImplementedError(
7030 "Interpolation with NaNs in the index "
7031 "has not been implemented. Try filling "
7032 "those NaNs before interpolating."
7033 )
-> 7034 new_data = obj._mgr.interpolate(
7035 method=method,
7036 axis=axis,
7037 index=index,
7038 limit=limit,
7039 limit_direction=limit_direction,
7040 limit_area=limit_area,
7041 inplace=inplace,
7042 downcast=downcast,
7043 **kwargs,
7044 )
7046 result = self._constructor(new_data)
7047 if should_transpose:
File ~\anaconda3\lib\site-packages\pandas\core\internals\managers.py:359, in BaseBlockManager.interpolate(self, **kwargs)
358 def interpolate(self: T, **kwargs) -> T:
--> 359 return self.apply("interpolate", **kwargs)
File ~\anaconda3\lib\site-packages\pandas\core\internals\managers.py:304, in BaseBlockManager.apply(self, f, align_keys, ignore_failures, **kwargs)
302 applied = b.apply(f, **kwargs)
303 else:
--> 304 applied = getattr(b, f)(**kwargs)
305 except (TypeError, NotImplementedError):
306 if not ignore_failures:
File ~\anaconda3\lib\site-packages\pandas\core\internals\blocks.py:1482, in EABackedBlock.interpolate(self, method, axis, inplace, limit, fill_value, **kwargs)
1480 new_values = values.T.fillna(value=fill_value, method=method, limit=limit).T
1481 else:
-> 1482 new_values = values.fillna(value=fill_value, method=method, limit=limit)
1483 return self.make_block_same_class(new_values)
File ~\anaconda3\lib\site-packages\pandas\core\arrays\_mixins.py:300, in NDArrayBackedExtensionArray.fillna(self, value, method, limit)
296 @doc(ExtensionArray.fillna)
297 def fillna(
298 self: NDArrayBackedExtensionArrayT, value=None, method=None, limit=None
299 ) -> NDArrayBackedExtensionArrayT:
--> 300 value, method = validate_fillna_kwargs(
301 value, method, validate_scalar_dict_value=False
302 )
304 mask = self.isna()
305 # error: Argument 2 to "check_value_size" has incompatible type
306 # "ExtensionArray"; expected "ndarray"
File ~\anaconda3\lib\site-packages\pandas\util\_validators.py:378, in validate_fillna_kwargs(value, method, validate_scalar_dict_value)
376 raise ValueError("Must specify a fill 'value' or 'method'.")
377 elif value is None and method is not None:
--> 378 method = clean_fill_method(method)
380 elif value is not None and method is None:
381 if validate_scalar_dict_value and isinstance(value, (list, tuple)):
File ~\anaconda3\lib\site-packages\pandas\core\missing.py:125, in clean_fill_method(method, allow_nearest)
123 expecting = "pad (ffill), backfill (bfill) or nearest"
124 if method not in valid_methods:
--> 125 raise ValueError(f"Invalid fill method. Expecting {expecting}. Got {method}")
126 return method
ValueError: Invalid fill method. Expecting pad (ffill) or backfill (bfill). Got linear
I have tried also the following solution found in stackoverflow but did not help
for col in df:
df[col] = pd.to_numeric(df[col], errors='coerce')
CodePudding user response:
import pandas as pd
df = pd.DataFrame({'time': pd.to_datetime(['2010', '2011', 'foo', '2012', '2013'],
errors='coerce')})
df['time'] = df.time.dt.tz_localize('UTC').dt.tz_convert('Asia/Kolkata')
df.interpolate()
CodePudding user response:
By default, df.interpolate(method='linear') forward-fills NaNs after the last valid value. That is rather surprising given that the method name only mentions "interpolate".
To restrict df.interpolate to only interpolate NaNs between valid (non-NaN) values, as of Pandas version 0.23.0 (Reference), use limit_area='inside'.
import pandas as pd
import numpy as np
a = pd.DataFrame({'col1': [np.nan, 1, np.nan, 3, np.nan, 5, np.nan]})
a['linear'] = a.interpolate(method='linear')['col1']
a['linear inside'] = a.interpolate(method='linear', limit_area='inside')['col1']
print(a)