what could I be doing qrong in relation to the code below? Well, I am building a stock prediction model using LSTM and everytime I try to run the code below to normalize the new filtered dataset I get the error shown bellow afert the code;
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from keras.models import Sequential
from keras.layers import LSTM,Dropout,Dense
from matplotlib.pylab import rcParams
rcParams['figure.figsize']=20,10
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler(feature_range=(0,1))
scaler=MinMaxScaler(feature_range=(0,1))
final_dataset=new_dataset.values
train_data=final_dataset[0:987,:]
valid_data=final_dataset[987:,:]
new_dataset.index=new_dataset.Date
new_dataset.drop("Date",axis=1,inplace=True)
scaler=MinMaxScaler(feature_range=(0,1))
scaled_data=scaler.fit_transform(final_dataset)
x_train_data,y_train_data=[],[]
for i in range(60,len(train_data)):
x_train_data.append(scaled_data[i-60:i,0])
y_train_data.append(scaled_data[i,0])
x_train_data,y_train_data=np.array(x_train_data),np.array(y_train_data)
x_train_data=np.reshape(x_train_data,(x_train_data.shape[0],x_train_data.shape[1],1))
Every time I run it I get this error below and I have try to correct it several time but it keeps popping up. error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-37-15343d926050> in <module>
8 new_dataset.drop("Date",axis=1,inplace=True)
9 scaler=MinMaxScaler(feature_range=(0,1))
---> 10 scaled_data=scaler.fit_transform(final_dataset)
11
12 x_train_data,y_train_data=[],[]
~\anaconda3\lib\site-packages\sklearn\base.py in fit_transform(self, X, y, **fit_params)
697 if y is None:
698 # fit method of arity 1 (unsupervised transformation)
--> 699 return self.fit(X, **fit_params).transform(X)
700 else:
701 # fit method of arity 2 (supervised transformation)
~\anaconda3\lib\site-packages\sklearn\preprocessing\_data.py in fit(self, X, y)
361 # Reset internal state before fitting
362 self._reset()
--> 363 return self.partial_fit(X, y)
364
365 def partial_fit(self, X, y=None):
~\anaconda3\lib\site-packages\sklearn\preprocessing\_data.py in partial_fit(self, X, y)
394
395 first_pass = not hasattr(self, 'n_samples_seen_')
--> 396 X = self._validate_data(X, reset=first_pass,
397 estimator=self, dtype=FLOAT_DTYPES,
398 force_all_finite="allow-nan")
~\anaconda3\lib\site-packages\sklearn\base.py in _validate_data(self, X, y, reset, validate_separately, **check_params)
419 out = X
420 elif isinstance(y, str) and y == 'no_validation':
--> 421 X = check_array(X, **check_params)
422 out = X
423 else:
~\anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
61 extra_args = len(args) - len(all_args)
62 if extra_args <= 0:
---> 63 return f(*args, **kwargs)
64
65 # extra_args > 0
~\anaconda3\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
614 array = array.astype(dtype, casting="unsafe", copy=False)
615 else:
--> 616 array = np.asarray(array, order=order, dtype=dtype)
617 except ComplexWarning as complex_warning:
618 raise ValueError("Complex data not supported\n"
~\AppData\Roaming\Python\Python38\site-packages\numpy\core\_asarray.py in asarray(a, dtype, order)
81
82 """
---> 83 return array(a, dtype, copy=False, order=order)
84
85
TypeError: float() argument must be a string or a number, not 'Timestamp'
CodePudding user response:
sklearn expects floating point values, i.e. numbers, and you’re giving it Timestamp
objects. The error TypeError: float() argument must be a string or a number, not 'Timestamp
is saying that the python built-in float()
does not now how to convert a Timestamp to a float.
To avoid this issue, you can transform Timestamps to numbers yourself, before passing them to your function.
- if all dates are ≥ 1970 and you want a resolution less than a day, use
timestamp
:new_dataset['Date'] = new_dataset['Date'].apply(pd.Timestamp.timestamp)
- if you have at most a daily resolution and want no date restrictions use
toordinal
:new_dataset['Date'] = new_dataset['Date'].apply(pd.Timestamp.toordinal)