I am working on feature selection based on the LOFO Importance selection algorithm. I have defined the target value but it gives an error that the target value is not defined.
import pandas as pd
from sklearn.model_selection import KFold
from lofo import LOFOImportance, Dataset, plot_importance
%matplotlib inline
# import data
train_df = pd.read_csv("/content/heat_pipe.csv")
train_df.head()
# # extract a sample of the data
sample_df = train_df.sample(frac=0.01, random_state=0)
sample_df.sort_values("Date", inplace=True)
# # define the validation scheme
cv = KFold(n_splits=4, shuffle=True, random_state=0)
# # define the binary target and the features
dataset = Dataset(df=sample_df, target="day_of_the_week", features=[col for col in train_df.columns if col != target])
# # define the validation scheme and scorer. The default model is LightGBM
lofo_imp = LOFOImportance(dataset, cv=cv, scoring="roc_auc")
# # # get the mean and standard deviation of the importances in pandas format
importance_df = lofo_imp.get_importance()
# # # plot the means and standard deviations of the importances
plot_importance(importance_df, figsize=(12, 20))
Error
NameError Traceback (most recent call last)
<ipython-input-33-cca2efb2dc97> in <module>()
34
35 # # define the binary target and the features
---> 36 dataset = Dataset(df=sample_df, target="day_of_the_week", features=[col for col in train_df.columns if col != target])
37
38 # # # define the validation scheme and scorer. The default model is LightGBM
<ipython-input-33-cca2efb2dc97> in <listcomp>(.0)
34
35 # # define the binary target and the features
---> 36 dataset = Dataset(df=sample_df, target="day_of_the_week", features=[col for col in train_df.columns if col != target])
37
38 # # # define the validation scheme and scorer. The default model is LightGBM
NameError: name 'target' is not defined
CodePudding user response:
There is a confusion with your usage of
dataset = Dataset(df=sample_df, target="day_of_the_week", features=[col for col in train_df.columns if col != target])
The target="day_of_the_week"
is a kwarg (a named parameter for the Dataset() class. It isn't assigning the value of "day_of_the_week" to target.
Try adding target = "day_of_the_week"
before that line:
import pandas as pd
from sklearn.model_selection import KFold
from lofo import LOFOImportance, Dataset, plot_importance
%matplotlib inline
# import data
train_df = pd.read_csv("/content/heat_pipe.csv")
train_df.head()
# # extract a sample of the data
sample_df = train_df.sample(frac=0.01, random_state=0)
sample_df.sort_values("Date", inplace=True)
# # define the validation scheme
cv = KFold(n_splits=4, shuffle=True, random_state=0)
# set target
target = "day_of_the_week"
# # define the binary target and the features
dataset = Dataset(df=sample_df, target=target, features=[col for col in train_df.columns if col != target])
# # define the validation scheme and scorer. The default model is LightGBM
lofo_imp = LOFOImportance(dataset, cv=cv, scoring="roc_auc")
# # # get the mean and standard deviation of the importances in pandas format
importance_df = lofo_imp.get_importance()
# # # plot the means and standard deviations of the importances
plot_importance(importance_df, figsize=(12, 20))