Following is my code I am running it on IDLE python 3.8
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction import DictVectorizer
from sklearn import trees
from sklearn.metrics import accuracy_score,classification_report
import warnings
from sklearn.preprocessing import StandardScalar
from sklearn.neural_networks import MLPClassifier
warnings.filterwarnings(action='ignore',category=DeprecationWarning)
data=pd.read_csv('data.csv')
cols_to_retain=[]
x-feature=data[cols_to_retain]
x_dict=x_feature.T.to_dict.values()
vect=DictVectorizer(sparse=False)
x_vector=vect.fit_transform(x_dict)
print(x_vector)
x_train=[:-1]
x_test=[-1:]
print('Train set')
print(x_train)
print('Test set')
print(x_test)
le=LabelEncoder
y_train=le.fit_transform(data['Goal'][:-1])
clf=tree.DecisionTreeClassifier(criteron='entropy')
clf=clf.fit_transform(x_train,y_train)
print('Test Data')
print(le.inverse_transform(clf.predict(x_test)))
It shows me error for these particular lines It only says invalid syntax error
x_train=[:-1]
x_test=[-1:]
packages are imported correctly
CodePudding user response:
Your code contains multiple issues:
- The import should be
StandardScaler
notStandardScalar
, - You got unused imports like
MLPClassifier
, cols_to_retrain
is empty. Thus,data[cols_to_retrain]
will return an empty data frame,to_dict
should beto_dict()
,- variable names
x-feature
andx_feature
do not match, LabelEncoder
is missing brackets()
,x_train=[:-1]
andx_test=[-1:]
is not valid. You probably wanted to select a subset likex_train = x_vector[:-1]
orx_test = x_vector[-1:]
. Please add additional sample data, if you need help with this selection.
Here is an updated version of your code:
import numpy as np
import pandas as pd
from sklearn.feature_extraction import DictVectorizer
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.tree import DecisionTreeClassifier
data = pd.read_csv("data.csv")
print(data)
cols_to_retain = []
x_feature = data[cols_to_retain]
x_dict = x_feature.T.to_dict().values()
vect = DictVectorizer(sparse=False)
x_vector = vect.fit_transform(x_dict)
print(x_vector)
x_train = x_vector[:-1]
x_test = x_vector[-1:]
print("Train set")
print(x_train)
print("Test set")
print(x_test)
le = LabelEncoder()
y_train = le.fit_transform(data["Goal"][:-1])
clf = DecisionTreeClassifier(criteron="entropy")
clf = clf.fit_transform(x_train, y_train)
print("Test Data")
print(le.inverse_transform(clf.predict(x_test)))