What features are causing TypeError: '<' not supported between instances of 'str&#-CodePudding

How can I find what features are causing this error:

c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py:614: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 58, in _wrapfunc
    return bound(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\pipeline.py", line 346, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 331, in fit
    y, expanded_class_weight = self._validate_y_class_weight(y)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 605, in _validate_y_class_weight
    y_original)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 167, in compute_sample_weight
    y=y_full)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 66, in compute_class_weight
    i = np.searchsorted(classes, c)
  File "<__array_function__ internals>", line 6, in searchsorted
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 1343, in searchsorted
    return _wrapfunc(a, 'searchsorted', v, side=side, sorter=sorter)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 67, in _wrapfunc
    return _wrapit(obj, method, *args, **kwds)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 44, in _wrapit
    result = getattr(asarray(obj), method)(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'

  FitFailedWarning)
c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py:614: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 58, in _wrapfunc
    return bound(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\pipeline.py", line 346, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 331, in fit
    y, expanded_class_weight = self._validate_y_class_weight(y)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 605, in _validate_y_class_weight
    y_original)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 167, in compute_sample_weight
    y=y_full)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 66, in compute_class_weight
    i = np.searchsorted(classes, c)
  File "<__array_function__ internals>", line 6, in searchsorted
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 1343, in searchsorted
    return _wrapfunc(a, 'searchsorted', v, side=side, sorter=sorter)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 67, in _wrapfunc
    return _wrapit(obj, method, *args, **kwds)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 44, in _wrapit
    result = getattr(asarray(obj), method)(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'

  FitFailedWarning)
c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py:614: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 58, in _wrapfunc
    return bound(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\pipeline.py", line 346, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 331, in fit
    y, expanded_class_weight = self._validate_y_class_weight(y)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 605, in _validate_y_class_weight
    y_original)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 167, in compute_sample_weight
    y=y_full)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 66, in compute_class_weight
    i = np.searchsorted(classes, c)
  File "<__array_function__ internals>", line 6, in searchsorted
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 1343, in searchsorted
    return _wrapfunc(a, 'searchsorted', v, side=side, sorter=sorter)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 67, in _wrapfunc
    return _wrapit(obj, method, *args, **kwds)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 44, in _wrapit
    result = getattr(asarray(obj), method)(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'

  FitFailedWarning)
c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py:614: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 58, in _wrapfunc
    return bound(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\pipeline.py", line 346, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 331, in fit
    y, expanded_class_weight = self._validate_y_class_weight(y)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 605, in _validate_y_class_weight
    y_original)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 167, in compute_sample_weight
    y=y_full)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 66, in compute_class_weight
    i = np.searchsorted(classes, c)
  File "<__array_function__ internals>", line 6, in searchsorted
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 1343, in searchsorted
    return _wrapfunc(a, 'searchsorted', v, side=side, sorter=sorter)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 67, in _wrapfunc
    return _wrapit(obj, method, *args, **kwds)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 44, in _wrapit
    result = getattr(asarray(obj), method)(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'

  FitFailedWarning)
c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py:614: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 58, in _wrapfunc
    return bound(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\pipeline.py", line 346, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 331, in fit
    y, expanded_class_weight = self._validate_y_class_weight(y)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 605, in _validate_y_class_weight
    y_original)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 167, in compute_sample_weight
    y=y_full)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 66, in compute_class_weight
    i = np.searchsorted(classes, c)
  File "<__array_function__ internals>", line 6, in searchsorted
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 1343, in searchsorted
    return _wrapfunc(a, 'searchsorted', v, side=side, sorter=sorter)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 67, in _wrapfunc
    return _wrapit(obj, method, *args, **kwds)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 44, in _wrapit
    result = getattr(asarray(obj), method)(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'

  FitFailedWarning)
Cross validation scores with F1 scoring [nan nan nan nan nan]
AVG Cross validation score with F1 scoring nan 

c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py:614: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 58, in _wrapfunc
    return bound(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\pipeline.py", line 346, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 331, in fit
    y, expanded_class_weight = self._validate_y_class_weight(y)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 605, in _validate_y_class_weight
    y_original)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 167, in compute_sample_weight
    y=y_full)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 66, in compute_class_weight
    i = np.searchsorted(classes, c)
  File "<__array_function__ internals>", line 6, in searchsorted
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 1343, in searchsorted
    return _wrapfunc(a, 'searchsorted', v, side=side, sorter=sorter)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 67, in _wrapfunc
    return _wrapit(obj, method, *args, **kwds)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 44, in _wrapit
    result = getattr(asarray(obj), method)(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'

  FitFailedWarning)
c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py:614: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 58, in _wrapfunc
    return bound(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\pipeline.py", line 346, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 331, in fit
    y, expanded_class_weight = self._validate_y_class_weight(y)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\ensemble\_forest.py", line 605, in _validate_y_class_weight
    y_original)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 167, in compute_sample_weight
    y=y_full)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\class_weight.py", line 66, in compute_class_weight
    i = np.searchsorted(classes, c)
  File "<__array_function__ internals>", line 6, in searchsorted
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 1343, in searchsorted
    return _wrapfunc(a, 'searchsorted', v, side=side, sorter=sorter)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 67, in _wrapfunc
    return _wrapit(obj, method, *args, **kwds)
  File "c:\users\pc\appdata\local\programs\python\python37\lib\site-packages\numpy\core\fromnumeric.py", line 44, in _wrapit
    result = getattr(asarray(obj), method)(*args, **kwds)
TypeError: '<' not supported between instances of 'str' and 'int'

Im creating an ML model and when I want to train my model I'm always getting that error. My data types look like this:

label          object
f1             object
f2             object
f3             object
f4             object
f5             object
f6             object
f7             object
f8             float64
f9             float64
f10            float64
f11            float64
f12            float64
f13            int64
f14            float64
f15            object
f16            object
f17            int64
f18            int64
f19            int64
f20            int64
f21            int64
f22            int64

I do not have NaN values in columns, I do not have columns with mixed values (columns with strings and numbers). Now I'm transforming the columns:

columns_for_encoding = ['f1',
                       'f2',
                       'f3',
                       'f4',
                       'f5',
                       'f6',
                       'f7',
                       'f15',
                       'f16']

columns_for_scaling = ['f8','f9', 'f10', 'f11', 'f12', 'f13', 'f14', 'f17', 'f18', 'f19', 'f20', 'f20', 'f21', 'f22']

transformerVectoriser = ColumnTransformer(transformers=[('Vector Cat', OneHotEncoder(handle_unknown = "ignore"), columns_for_encoding),
                                                        ('Normalizer', Normalizer(), columns_for_scaling)],
                                          remainder='passthrough')

Now I'm training the model:

classifiers = [["RandomForestClassifier 30", RandomForestClassifier(max_depth = 30, n_estimators = 175, random_state = 42, class_weight = {1: 3.5, 0: 1})],
               ["LogisticRegression", LogisticRegression(max_iter = 5000, class_weight = {1: 3.5, 0: 1})], 
               ["GradientBoostingClassifier", GradientBoostingClassifier(max_depth = 25, n_estimators = 175, random_state = 42)]]

for class_ in classifiers:
    
    name = class_[0]
    clf = class_[1]
    print(name)
    
    pipeline = Pipeline([('transformer', transformerVectoriser),
                         ('classifier', clf)])

    cv_score_f1 = cross_val_score(pipeline, features, results, cv=5, scoring = 'f1')
    cv_score_f1.sort()
    print('Cross validation scores with F1 scoring', cv_score_f1)
    cv_score_f1 = round(np.average(cv_score_f1), 5)
    print("AVG Cross validation score with F1 scoring", cv_score_f1, '\n')

    cv_score_acc = cross_val_score(pipeline, features, results, cv=5, scoring = 'accuracy')
    cv_score_acc.sort()
    print('Cross validation scores with accuracy scoring', cv_score_acc)
    cv_score_acc = round(np.average(cv_score_acc), 5)
    print("AVG Cross validation score with accuracy scoring", cv_score_acc, '\n')
    print()

Is there a way to find out what column is causing my error?

CodePudding user response：

I see that your label column is of type object, meaning it is a string. But in your class weights, you use an integer in class_weight = {1: 3.5, 0: 1} , so you to specify the classes correctly, or labelEncode.

With an example dataset, where my labels are "yes" or "no" :

from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import OneHotEncoder, Normalizer, LabelEncoder
from sklearn.compose import ColumnTransformer
import pandas as pd
import numpy as np

df = pd.DataFrame({'f1':np.random.uniform(0,1,100),
'f2':np.random.choice(['a','b','c'],100),
'label':np.random.choice(['yes','no'],100)})

df.dtypes
f1       float64
f2        object
label     object

If we set up the pipeline like you did, I get a similar error:

columns_for_encoding = ['f2']
columns_for_scaling = ['f1']

transformerVectoriser = ColumnTransformer(
transformers=[('Vector Cat', OneHotEncoder(handle_unknown = "ignore"), columns_for_encoding),
('Normalizer', Normalizer(), columns_for_scaling)],
remainder='passthrough') 

pipeline = Pipeline([('transformer', transformerVectoriser),
                         ('classifier', RandomForestClassifier(class_weight = {1: 3.5, 0: 1}))])

pipeline.fit(df[['f1','f2']],df['label'])

Let's define the weights properly and it works:

pipeline = Pipeline([('transformer', transformerVectoriser),
                             ('classifier', RandomForestClassifier(class_weight = {'yes': 3.5, 'no': 1}))])

pipeline.fit(df[['f1','f2']],df['label'])

Pipeline(steps=[('transformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('Vector Cat',
                                                  OneHotEncoder(handle_unknown='ignore'),
                                                  ['f2']),
                                                 ('Normalizer', Normalizer(),
                                                  ['f1'])])),
                ('classifier',
                 RandomForestClassifier(class_weight={'no': 1, 'yes': 3.5}))])