I'm developing a CNN for a binary image classification problem (Cats/Dogs). My goal is to use K-Fold CV (in this case I'd apply 5 folds) to find the best parameters (batch size, epochs).
so far my code is this
# Defining the Loss
loss = binary_crossentropy
# Creating the grid of parameters
batches = [32, 64, 128, 256]
epochs = [20, 30, 40, 50]
params_grid = dict(batch_size = batches, epochs = epochs)
# Creating the model
def model_cnn_three_layer(optimizer='adam'):
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(32, (3, 3), padding = "same", use_bias=False, input_shape = (64, 64, 1), activation = 'relu'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.MaxPooling2D(pool_size = (2, 2)),
tf.keras.layers.Conv2D(32, (3, 3), padding = "same", use_bias=False, activation = 'relu'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.MaxPooling2D(pool_size = (2, 2)),
tf.keras.layers.Dropout(0.25),
tf.keras.layers.Conv2D(64, (3, 3), padding = "same", use_bias=False, activation = 'relu'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.MaxPooling2D(pool_size = (2, 2)),
tf.keras.layers.Dropout(0.25),
tf.keras.layers.Conv2D(64, (3, 3), padding = "same", use_bias=False, activation = 'relu'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.MaxPooling2D(pool_size = (2, 2)),
tf.keras.layers.Dropout(0.25),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(128, use_bias=False, activation = 'relu'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(2, activation = 'softmax')
])
# Compiling the model
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
model.summary()
return model
# Create the sklearn CV model for the network
model_cnn_three_layer_CV = KerasClassifier(build_fn=model_cnn_three_layer, verbose=1)
grid = GridSearchCV(estimator=model_cnn_three_layer_CV,
param_grid=params_grid,
cv=5)
grid_result = grid.fit(X_train, y_train)
# Print results
print(f'Best Accuracy for {grid_result.best_score_:.4} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print(f'mean={mean:.4}, std={stdev:.4} using {param}')
Is this approach correct?
If I wanted to compute the CV 'manually' (not using sklearn) how would I change the code? I found an answer to a similar question that does something like this
# parameters
epochs = 20
batch_size = 64
# Defining callback(s)
early_callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)
# Defining plots
legend_size = 14
# Define the K-fold Cross Validator
num_folds = 5
kfold = KFold(n_splits=num_folds, shuffle=True)
loss_cnn_three_layer = []
acc_cnn_three_layer = []
fold_no = 1
for train, test in kfold.split(X, y):
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(32, (3, 3), padding = "same", use_bias=False, input_shape = (64, 64, 1), activation = 'relu'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.MaxPooling2D(pool_size = (2, 2)),
tf.keras.layers.Conv2D(32, (3, 3), padding = "same", use_bias=False, activation = 'relu'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.MaxPooling2D(pool_size = (2, 2)),
tf.keras.layers.Dropout(0.25),
tf.keras.layers.Conv2D(64, (3, 3), padding = "same", use_bias=False, activation = 'relu'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Activation('relu'),
tf.keras.layers.MaxPooling2D(pool_size = (2, 2)),
tf.keras.layers.Dropout(0.25),
tf.keras.layers.Conv2D(64, (3, 3), padding = "same", use_bias=False, activation = 'relu'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.MaxPooling2D(pool_size = (2, 2)),
tf.keras.layers.Dropout(0.25),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(128, use_bias=False, activation = 'relu'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(2, activation = 'softmax')
])
# compiling the model
model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])
net_name = "CNN_three_layers_batch_and_dropout"
model.summary()
# log dir for saving TensorBoard logs
logdir = os.path.join("CNN_nets", net_name)
# callback to run TensorBoard
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)
callbacks = [tensorboard_callback, early_callback]
history = model.fit(X_train, y_train, epochs=epochs, validation_data=(X_test, y_test),
batch_size=batch_size, callbacks=callbacks, verbose=1)
scores = model.evaluate(X_test, y_test)
print(
f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1] * 100}%')
acc_cnn_three_layer.append(scores[1] * 100)
loss_cnn_three_layer.append(scores[0])
# Increase fold number
fold_no = fold_no 1
# == Provide average scores ==
print('------------------------------------------------------------------------')
print('Score per fold')
for i in range(0, len(loss_cnn_three_layer)):
print('------------------------------------------------------------------------')
print(f'> Fold {i 1} - Loss: {loss_cnn_three_layer[i]} - Accuracy: {acc_cnn_two_layer[i]}%')
print('------------------------------------------------------------------------')
print('Average scores for all folds:')
print(f'> Accuracy: {np.mean(acc_cnn_three_layer)} ( - {np.std(acc_cnn_three_layer)})')
print(f'> Loss: {np.mean(loss_cnn_three_layer)}')
print('------------------------------------------------------------------------')
But I'm not convinced by this approach, because it simply runs the model 5 times on the same data and not on different splits of the training data. How would this be changed to effectively run a CV on split portions of the train data and then evaluate on the test data? Moreover, how would I loop this last network over the values of the parameters of the grid?
CodePudding user response:
from sklearn.model_selection import StratifiedKFold as kfold
x = # features
y = # labels
batches = [32, 64, 128, 256]
epochs = [20, 30, 40, 50]
splits = 5
kf = kfold(splits, shuffle=True)
indices = kf.split(x, y)
loss_cnn_three_layer = []
acc_cnn_three_layer = []
preds = []
for train, test in indices:
x_train, x_test, y_train, y_test = x[train], x[test], y[train], y[test]
# do model stuff
history = model.fit(x_train, y_train, shuffle=True, epochs=10, verbose=1)
prediction = model.predict(x_test)
loss_cnn_three_layer.append(history.history["loss"])
acc_cnn_three_layer.append(history.history["accuracy"])
preds.append(prediction)
Edit to include iterable of parameters:
from sklearn.model_selection import StratifiedKFold as kfold
x = # features
y = # labels
splits = 5
kf = kfold(splits, shuffle=True)
indices = kf.split(x, y)
loss_cnn_three_layer = []
acc_cnn_three_layer = []
preds = []
for batch, epochs in zip(batches, epochs):
for train, test in indices:
x_train, x_test, y_train, y_test = x[train], x[test], y[train], y[test]
# do model stuff
history = model.fit(x_train, y_train, shuffle=True, batch_size=batch epochs=epochs, verbose=1)
prediction = model.predict(x_test)
loss_cnn_three_layer.append(history.history["loss"])
acc_cnn_three_layer.append(history.history["accuracy"])
preds.append(prediction)
If you want to iterate different batches and epochs based on the kfold instead, just swap the two for
placements, but leave everything else within.
If you want to have the dictionary, do something like this:
for i, j in zip([*params_grid.values()]): # assuming batch and epoch lists have the same length
# where i is batch, and j is epochs
# do stuff
If you wanted to train the model based on the number of epochs per batch size (or vice versa*), do something like this:
for k, l in [(i, j) for j in epochs for i in batches]: # swap batches and epochs for vice versa*
# where k is batch, and l is epochs
# do stuff
CodePudding user response:
I tried the following solution
loss_cnn_three_layer = []
acc_cnn_three_layer = []
# create the first loop for batches and epochs
for batch, epoch in zip(batches, epochs):
# second loop for training the model on each split
for train, test in indices:
X_train, X_test, y_train, y_test = X[train], X[test], y[train], y[test]
# model = tf.keras.Sequential([ ... ])
# compiling the model
model.compile(optimizer = optimizer, loss=loss, metrics=['accuracy'])
net_name = "CNN_three_layers_batch_and_dropout"
model.summary()
# log dir for saving TensorBoard logs
logdir = os.path.join("CNN_nets", net_name)
# callback to run TensorBoard
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq = 1)
callbacks = [tensorboard_callback, early_callback]
# fitting the network
history = model.fit(X_train, y_train, epochs = epoch,
batch_size = batch, callbacks = callbacks, verbose = 1)
# evaluating the performance
scores = model.evaluate(X_test, y_test)
# printing accuracy and loss
print(f'Score per batch {batch} and epochs {epoch}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
acc_cnn_three_layer.append(scores[1] * 100)
loss_cnn_three_layer.append(scores[0])
However by doing this it runs the model and the Cross Validation only on the first combination of batches and epochs (32, 20) and then it stops.