I was trying to use sklearn to do a preprocessing for my data
import math
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pandas_datareader import data
import pandas_datareader.data as web
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
start = datetime.datetime(2011,1,1)
end = datetime.date.today()
df = web.DataReader("1211.HK", "yahoo", start, end)
plt.figure(figsize=(16,8))
plt.title('BYD close price',fontsize=18)
plt.plot(df['Close'])
plt.xlabel('Date',fontsize=18)
plt.ylabel('Close price HK($)',fontsize=18)
plt.show()
data = df.filter(['close'])
dataset = data.values
trainning_data_len =math.ceil(len (dataset)*.8)
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(dataset)
An error was reported when I tried to check the scaled_data
ValueError: Found array with 0 feature(s) (shape=(2698, 0)) while a minimum of 1 is required by MinMaxScaler.
and I have no idea how to solve the problem.
Thanks in advance.
UPDATE: The environment I run is jupyterLab 1.2.6, and following are the log of the error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-9-146c8eeabe3c> in <module>
1 scaler = MinMaxScaler()
----> 2 scaled_data = scaler.fit_transform(dataset)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/base.py in fit_transform(self, X, y, **fit_params)
569 if y is None:
570 # fit method of arity 1 (unsupervised transformation)
--> 571 return self.fit(X, **fit_params).transform(X)
572 else:
573 # fit method of arity 2 (supervised transformation)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/preprocessing/_data.py in fit(self, X, y)
337 # Reset internal state before fitting
338 self._reset()
--> 339 return self.partial_fit(X, y)
340
341 def partial_fit(self, X, y=None):
/opt/anaconda3/lib/python3.7/site-packages/sklearn/preprocessing/_data.py in partial_fit(self, X, y)
371 X = check_array(X,
372 estimator=self, dtype=FLOAT_DTYPES,
--> 373 force_all_finite="allow-nan")
374
375 data_min = np.nanmin(X, axis=0)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
592 " a minimum of %d is required%s."
593 % (n_features, array.shape, ensure_min_features,
--> 594 context))
595
596 if warn_on_dtype and dtype_orig is not None and array.dtype != dtype_orig:
ValueError: Found array with 0 feature(s) (shape=(2698, 0)) while a minimum of 1 is required by MinMaxScaler.
CodePudding user response:
Your data frame:
Index(['High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close'], dtype='object')
So it should be df.filter(['Close'])
instead of df.filter(['close'])
:
data = df.filter(['Close'])
dataset = data.values
trainning_data_len =math.ceil(len (dataset)*.8)
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(dataset)
scaled_data[:5]
array([[0.09673202],
[0.10424837],
[0.10441177],
[0.10571895],
[0.10571895]])