I am kind of new to using the mlxtend
package and as well as the Keras
package so please bear with me. I have been trying to combine predictions of various models, i.e., Random Forest
, Logistic Regression
, and a Neural Network
model, using StackingCVClassifier
. I am trying to stack these classifiers that operate on different feature subsets. Kindly see the code as follows.
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from tensorflow import keras
from keras import layers
from keras.constraints import maxnorm
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Input
from mlxtend.classifier import StackingCVClassifier
from mlxtend.feature_selection import ColumnSelector
from sklearn.pipeline import make_pipeline
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.neural_network import MLPClassifier
X, y = make_classification()
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=0)
# defining neural network model
def create_model ():
# create model
model = Sequential()
model.add(Dense(10, input_dim=10, activation='relu'))
model.add(Dropout(0.2))
model.add(Flatten())
optimizer= keras.optimizers.RMSprop(lr=0.001)
model.add(Dense(units = 1, activation = 'sigmoid')) # Compile model
model.compile(loss='binary_crossentropy',
optimizer=optimizer, metrics=[keras.metrics.AUC(), 'accuracy'])
return model
# using KerasClassifier on the neural network model
NN_clf=KerasClassifier(build_fn=create_model, epochs=5, batch_size= 5)
NN_clf._estimator_type = "classifier"
# stacking of classifiers that operate on different feature subsets
pipeline1 = make_pipeline(ColumnSelector(cols=(np.arange(0, 5, 1))), LogisticRegression())
pipeline2 = make_pipeline(ColumnSelector(cols=(np.arange(5, 10, 1))), RandomForestClassifier())
pipeline3 = make_pipeline(ColumnSelector(cols=(np.arange(10, 20, 1))), NN_clf)
# final stacking
clf = StackingCVClassifier(classifiers=[pipeline1, pipeline2, pipeline3], meta_classifier=MLPClassifier())
clf.fit(X_train, y_train)
print("Stacking model score: %.3f" % clf.score(X_val, y_val))
However, I am getting this error:
ValueError Traceback (most recent call last)
<ipython-input-11-ef342536824f> in <module>
42 # final stacking
43 clf = StackingCVClassifier(classifiers=[pipeline1, pipeline2, pipeline3], meta_classifier=MLPClassifier())
---> 44 clf.fit(X_train, y_train)
45
46 print("Stacking model score: %.3f" % clf.score(X_val, y_val))
~\anaconda3\lib\site-packages\mlxtend\classifier\stacking_cv_classification.py in fit(self, X, y, groups, sample_weight)
282 meta_features = prediction
283 else:
--> 284 meta_features = np.column_stack((meta_features, prediction))
285
286 if self.store_train_meta_features:
~\anaconda3\lib\site-packages\numpy\core\overrides.py in column_stack(*args, **kwargs)
~\anaconda3\lib\site-packages\numpy\lib\shape_base.py in column_stack(tup)
654 arr = array(arr, copy=False, subok=True, ndmin=2).T
655 arrays.append(arr)
--> 656 return _nx.concatenate(arrays, 1)
657
658
~\anaconda3\lib\site-packages\numpy\core\overrides.py in concatenate(*args, **kwargs)
ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 3 dimension(s)
Please help me. Thanks!
CodePudding user response:
The error is happening because you are combining prediction from traditional ML models and DL model.
ML models are giving predictions in the shape like this (80,1)
whereas DL model is predicting in shape like this (80,1,1)
, so there is mismatch while trying to append all the predictions.
Common workaround for this is to strip the extra dimension of predictions given by DL method to make it (80,1)
instead of (80,1,1)
So, open the py file located inside:
anaconda3\lib\site-packages\mlxtend\classifier\stacking_cv_classification.py
In the line 280 and 356, outside of if
block, add this:
prediction = prediction.squeeze(axis=1) if len(prediction.shape)>2 else prediction
So, it will look something like this:
...
...
...
if not self.use_probas:
prediction = prediction[:, np.newaxis]
elif self.drop_proba_col == "last":
prediction = prediction[:, :-1]
elif self.drop_proba_col == "first":
prediction = prediction[:, 1:]
prediction = prediction.squeeze(axis=1) if len(prediction.shape)>2 else prediction
if meta_features is None:
meta_features = prediction
else:
meta_features = np.column_stack((meta_features, prediction))
...
...
...
for model in self.clfs_:
if not self.use_probas:
prediction = model.predict(X)[:, np.newaxis]
else:
if self.drop_proba_col == "last":
prediction = model.predict_proba(X)[:, :-1]
elif self.drop_proba_col == "first":
prediction = model.predict_proba(X)[:, 1:]
else:
prediction = model.predict_proba(X)
prediction = prediction.squeeze(axis=1) if len(prediction.shape)>2 else prediction
per_model_preds.append(prediction)
...
...
...
CodePudding user response:
Prakash's answer raises really good points.
If you want to get this running without too many changes, you can roll your own version of a scikit-learn BaseEstimator
/ClassifierMixin
object, or wrap in the recommended KerasClassifier object.
i.e. You can roll your own estimator like this:
class MyKerasModel(BaseEstimator, ClassifierMixin):
def fit(self, X, y):
model = keras.Sequential()
model.add(layers.Input(shape=X.shape[1]))
model.add(layers.Dense(10, input_dim=10, activation='relu'))
model.add(layers.Dropout(0.2))
model.add(layers.Flatten())
model.add(layers.Dense(units = 1, activation = 'sigmoid'))
optimizer= keras.optimizers.RMSprop(learning_rate=0.001)
model.compile(loss='binary_crossentropy',
optimizer=optimizer, metrics=[keras.metrics.AUC(), 'accuracy'])
model.fit(X, y)
self.model = model
return self
def predict(self, X):
return (self.model.predict(X) > 0.5).flatten()
And putting all the pieces together allows you to stack the predictions:
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers
from mlxtend.classifier import StackingCVClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
X, y = make_classification()
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=0)
class MyKerasModel(BaseEstimator, ClassifierMixin):
def fit(self, X, y):
model = keras.Sequential()
model.add(layers.Input(shape=X.shape[1]))
model.add(layers.Dense(10, input_dim=10, activation='relu'))
model.add(layers.Dropout(0.2))
model.add(layers.Flatten())
model.add(layers.Dense(units = 1, activation = 'sigmoid'))
optimizer= keras.optimizers.RMSprop(learning_rate=0.001)
model.compile(loss='binary_crossentropy',
optimizer=optimizer, metrics=[keras.metrics.AUC(), 'accuracy'])
model.fit(X, y)
self.model = model
return self
def predict(self, X):
return (self.model.predict(X) > 0.5).flatten()
clf = StackingCVClassifier(
classifiers=[RandomForestClassifier(), LogisticRegression(), MyKerasModel()],
meta_classifier=MLPClassifier(),
).fit(X_train, y_train)
print("Stacking model score: %.3f" % clf.score(X_val, y_val))
Output:
2/2 [==============================] - 0s 11ms/step - loss: 0.8580 - auc: 0.5050 - accuracy: 0.5500
2/2 [==============================] - 0s 1ms/step
2/2 [==============================] - 0s 4ms/step - loss: 0.6955 - auc_1: 0.5777 - accuracy: 0.5750
2/2 [==============================] - 0s 1ms/step
3/3 [==============================] - 0s 3ms/step - loss: 0.7655 - auc_2: 0.6037 - accuracy: 0.6125
Stacking model score: 1.000