How can I find what features are causing this error:
c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py:614: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details:
Traceback (most recent call last):
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 58, in _wrapfunc
return bound(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\pipeline.py", line 346, in fit
self._final_estimator.fit(Xt, y, **fit_params_last_step)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 331, in fit
y, expanded_class_weight = self._validate_y_class_weight(y)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 605, in _validate_y_class_weight
y_original)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
return f(*args, **kwargs)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 167, in compute_sample_weight
y=y_full)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
return f(*args, **kwargs)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 66, in compute_class_weight
i = np.searchsorted(classes, c)
File "<__array_function__ internals>", line 6, in searchsorted
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 1343, in searchsorted
return _wrapfunc(a, 'searchsorted', v, side=side, sorter=sorter)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 67, in _wrapfunc
return _wrapit(obj, method, *args, **kwds)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 44, in _wrapit
result = getattr(asarray(obj), method)(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'
FitFailedWarning)
c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py:614: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details:
Traceback (most recent call last):
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 58, in _wrapfunc
return bound(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\pipeline.py", line 346, in fit
self._final_estimator.fit(Xt, y, **fit_params_last_step)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 331, in fit
y, expanded_class_weight = self._validate_y_class_weight(y)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 605, in _validate_y_class_weight
y_original)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
return f(*args, **kwargs)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 167, in compute_sample_weight
y=y_full)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
return f(*args, **kwargs)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 66, in compute_class_weight
i = np.searchsorted(classes, c)
File "<__array_function__ internals>", line 6, in searchsorted
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 1343, in searchsorted
return _wrapfunc(a, 'searchsorted', v, side=side, sorter=sorter)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 67, in _wrapfunc
return _wrapit(obj, method, *args, **kwds)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 44, in _wrapit
result = getattr(asarray(obj), method)(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'
FitFailedWarning)
c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py:614: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details:
Traceback (most recent call last):
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 58, in _wrapfunc
return bound(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\pipeline.py", line 346, in fit
self._final_estimator.fit(Xt, y, **fit_params_last_step)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 331, in fit
y, expanded_class_weight = self._validate_y_class_weight(y)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 605, in _validate_y_class_weight
y_original)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
return f(*args, **kwargs)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 167, in compute_sample_weight
y=y_full)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
return f(*args, **kwargs)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 66, in compute_class_weight
i = np.searchsorted(classes, c)
File "<__array_function__ internals>", line 6, in searchsorted
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 1343, in searchsorted
return _wrapfunc(a, 'searchsorted', v, side=side, sorter=sorter)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 67, in _wrapfunc
return _wrapit(obj, method, *args, **kwds)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 44, in _wrapit
result = getattr(asarray(obj), method)(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'
FitFailedWarning)
c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py:614: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details:
Traceback (most recent call last):
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 58, in _wrapfunc
return bound(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\pipeline.py", line 346, in fit
self._final_estimator.fit(Xt, y, **fit_params_last_step)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 331, in fit
y, expanded_class_weight = self._validate_y_class_weight(y)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 605, in _validate_y_class_weight
y_original)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
return f(*args, **kwargs)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 167, in compute_sample_weight
y=y_full)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
return f(*args, **kwargs)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 66, in compute_class_weight
i = np.searchsorted(classes, c)
File "<__array_function__ internals>", line 6, in searchsorted
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 1343, in searchsorted
return _wrapfunc(a, 'searchsorted', v, side=side, sorter=sorter)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 67, in _wrapfunc
return _wrapit(obj, method, *args, **kwds)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 44, in _wrapit
result = getattr(asarray(obj), method)(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'
FitFailedWarning)
c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py:614: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details:
Traceback (most recent call last):
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 58, in _wrapfunc
return bound(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\pipeline.py", line 346, in fit
self._final_estimator.fit(Xt, y, **fit_params_last_step)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 331, in fit
y, expanded_class_weight = self._validate_y_class_weight(y)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 605, in _validate_y_class_weight
y_original)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
return f(*args, **kwargs)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 167, in compute_sample_weight
y=y_full)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
return f(*args, **kwargs)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 66, in compute_class_weight
i = np.searchsorted(classes, c)
File "<__array_function__ internals>", line 6, in searchsorted
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 1343, in searchsorted
return _wrapfunc(a, 'searchsorted', v, side=side, sorter=sorter)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 67, in _wrapfunc
return _wrapit(obj, method, *args, **kwds)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 44, in _wrapit
result = getattr(asarray(obj), method)(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'
FitFailedWarning)
Cross validation scores with F1 scoring [nan nan nan nan nan]
AVG Cross validation score with F1 scoring nan
c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py:614: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details:
Traceback (most recent call last):
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 58, in _wrapfunc
return bound(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\pipeline.py", line 346, in fit
self._final_estimator.fit(Xt, y, **fit_params_last_step)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 331, in fit
y, expanded_class_weight = self._validate_y_class_weight(y)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 605, in _validate_y_class_weight
y_original)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
return f(*args, **kwargs)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 167, in compute_sample_weight
y=y_full)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
return f(*args, **kwargs)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 66, in compute_class_weight
i = np.searchsorted(classes, c)
File "<__array_function__ internals>", line 6, in searchsorted
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 1343, in searchsorted
return _wrapfunc(a, 'searchsorted', v, side=side, sorter=sorter)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 67, in _wrapfunc
return _wrapit(obj, method, *args, **kwds)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 44, in _wrapit
result = getattr(asarray(obj), method)(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'
FitFailedWarning)
c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py:614: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details:
Traceback (most recent call last):
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 58, in _wrapfunc
return bound(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\pipeline.py", line 346, in fit
self._final_estimator.fit(Xt, y, **fit_params_last_step)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 331, in fit
y, expanded_class_weight = self._validate_y_class_weight(y)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 605, in _validate_y_class_weight
y_original)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
return f(*args, **kwargs)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 167, in compute_sample_weight
y=y_full)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
return f(*args, **kwargs)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 66, in compute_class_weight
i = np.searchsorted(classes, c)
File "<__array_function__ internals>", line 6, in searchsorted
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 1343, in searchsorted
return _wrapfunc(a, 'searchsorted', v, side=side, sorter=sorter)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 67, in _wrapfunc
return _wrapit(obj, method, *args, **kwds)
File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 44, in _wrapit
result = getattr(asarray(obj), method)(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'
Im creating an ML model and when I want to train my model I'm always getting that error. My data types look like this:
label object
f1 object
f2 object
f3 object
f4 object
f5 object
f6 object
f7 object
f8 float64
f9 float64
f10 float64
f11 float64
f12 float64
f13 int64
f14 float64
f15 object
f16 object
f17 int64
f18 int64
f19 int64
f20 int64
f21 int64
f22 int64
I do not have NaN
values in columns, I do not have columns with mixed values (columns with strings and numbers).
Now I'm transforming the columns:
columns_for_encoding = ['f1',
'f2',
'f3',
'f4',
'f5',
'f6',
'f7',
'f15',
'f16']
columns_for_scaling = ['f8','f9', 'f10', 'f11', 'f12', 'f13', 'f14', 'f17', 'f18', 'f19', 'f20', 'f20', 'f21', 'f22']
transformerVectoriser = ColumnTransformer(transformers=[('Vector Cat', OneHotEncoder(handle_unknown = "ignore"), columns_for_encoding),
('Normalizer', Normalizer(), columns_for_scaling)],
remainder='passthrough')
Now I'm training the model:
classifiers = [["RandomForestClassifier 30", RandomForestClassifier(max_depth = 30, n_estimators = 175, random_state = 42, class_weight = {1: 3.5, 0: 1})],
["LogisticRegression", LogisticRegression(max_iter = 5000, class_weight = {1: 3.5, 0: 1})],
["GradientBoostingClassifier", GradientBoostingClassifier(max_depth = 25, n_estimators = 175, random_state = 42)]]
for class_ in classifiers:
name = class_[0]
clf = class_[1]
print(name)
pipeline = Pipeline([('transformer', transformerVectoriser),
('classifier', clf)])
cv_score_f1 = cross_val_score(pipeline, features, results, cv=5, scoring = 'f1')
cv_score_f1.sort()
print('Cross validation scores with F1 scoring', cv_score_f1)
cv_score_f1 = round(np.average(cv_score_f1), 5)
print("AVG Cross validation score with F1 scoring", cv_score_f1, '\n')
cv_score_acc = cross_val_score(pipeline, features, results, cv=5, scoring = 'accuracy')
cv_score_acc.sort()
print('Cross validation scores with accuracy scoring', cv_score_acc)
cv_score_acc = round(np.average(cv_score_acc), 5)
print("AVG Cross validation score with accuracy scoring", cv_score_acc, '\n')
print()
Is there a way to find out what column is causing my error?
CodePudding user response:
I see that your label column is of type object
, meaning it is a string. But in your class weights, you use an integer in class_weight = {1: 3.5, 0: 1}
, so you to specify the classes correctly, or labelEncode.
With an example dataset, where my labels are "yes" or "no" :
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import OneHotEncoder, Normalizer, LabelEncoder
from sklearn.compose import ColumnTransformer
import pandas as pd
import numpy as np
df = pd.DataFrame({'f1':np.random.uniform(0,1,100),
'f2':np.random.choice(['a','b','c'],100),
'label':np.random.choice(['yes','no'],100)})
df.dtypes
f1 float64
f2 object
label object
If we set up the pipeline like you did, I get a similar error:
columns_for_encoding = ['f2']
columns_for_scaling = ['f1']
transformerVectoriser = ColumnTransformer(
transformers=[('Vector Cat', OneHotEncoder(handle_unknown = "ignore"), columns_for_encoding),
('Normalizer', Normalizer(), columns_for_scaling)],
remainder='passthrough')
pipeline = Pipeline([('transformer', transformerVectoriser),
('classifier', RandomForestClassifier(class_weight = {1: 3.5, 0: 1}))])
pipeline.fit(df[['f1','f2']],df['label'])
Let's define the weights properly and it works:
pipeline = Pipeline([('transformer', transformerVectoriser),
('classifier', RandomForestClassifier(class_weight = {'yes': 3.5, 'no': 1}))])
pipeline.fit(df[['f1','f2']],df['label'])
Pipeline(steps=[('transformer',
ColumnTransformer(remainder='passthrough',
transformers=[('Vector Cat',
OneHotEncoder(handle_unknown='ignore'),
['f2']),
('Normalizer', Normalizer(),
['f1'])])),
('classifier',
RandomForestClassifier(class_weight={'no': 1, 'yes': 3.5}))])