# importing libraries
import numpy as np
import pandas
import pandas as pd
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
#read data
dataset=pd.read_csv("Salary_Levels.csv")
#data frame
data=pd.DataFrame(dataset)
#independant and dependant
x=data["level_id"].astype(int)
y=data["Salary"].astype(int)
#ployfeatures
poly=PolynomialFeatures(degree=4)
x_poly=poly.fit_transform(x)
pilreg=LinearRegression()
pilreg.fit(x_poly,y)
pilreg.predict(poly.fit_transform([[10]]))
#plot
plt.scatter(x,y,color='r',s=5)
plt.plot(x,pilreg.predict(poly.fit_transform(x)),color='blue')
plt.show()
Traceback (most recent call last):
File "/Users/david/desktop/code/Python/PolyReg/main.py", line 23, in <module>
x_poly=poly.fit_transform(x)
File "/Users/david/Desktop/Code/Python/PolyReg/venv/desktop/code/Python/Python/lib/python2.7/site-packages/sklearn/base.py", line 464, in fit_transform
return self.fit(X, **fit_params).transform(X)
File "/Users/david/Desktop/Code/Python/PolyReg/venv/desktop/code/Python/Python/lib/python2.7/site-packages/sklearn/preprocessing/data.py", line 1460, in fit
n_samples, n_features = check_array(X, accept_sparse=True).shape
File "/Users/david/Desktop/Code/Python/PolyReg/venv/desktop/code/Python/Python/lib/python2.7/site-packages/sklearn/utils/validation.py", line 552, in check_array
"if it contains a single sample.".format(array))
ValueError: Expected 2D array, got 1D array instead:
array=[1 2 3 4 5 6].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.
Not sure why im getting an error message. Any ideas??? Trying to do polynomial regression but seems to be a problem with the x values. Error message and code is above. I tried reshaping but that didn't work.
CodePudding user response:
fit_transform
expects a 2D input, you have to provide it as requested (even if the second dimension is 1)
I don't have an example of your data, but slicing your column as dataframe should do the trick:
x=data[["level_id"]].astype(int)
CodePudding user response:
You're really close. The problem is because the fit_transform function expects a DataFrame, not a Series. Change line 18 to x=pd.DataFrame(data["level_id"].astype(int))
and you're good to go.