I want to tune my LSTM Model. Playing around with different optimizers, I stumbled on an issue with the Adamax optimizer. My code:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, LSTM, Dropout
import keras_tuner
from tensorflow.keras.callbacks import EarlyStopping
def build_lstm_for_tuning(hp):
activation=['relu','sigmoid']
lossfct='binary_crossentropy'
hidden_units_first_layer = hp.Choice('neurons first layer',[32,64,128,256,512,1024])
lr = hp.Choice('learning_rate', [0.0005]), #0.005,0.001,,0.0001,5e-05,1e-05
optimizer_name = hp.Choice('optimizer', ["Adamax"])#,"Ftrl","Adadelta","Adagrad","RMSprop","Nadam","SGD"
model = Sequential()
model.add(LSTM(hidden_units_first_layer,input_shape=(24, 237),activation=activation[0]))
model.add(Dense(units=21, activation=activation[1]))
optimizer = {"Ftrl":tf.keras.optimizers.Ftrl(lr),"Adadelta":tf.keras.optimizers.Adadelta(lr),"Adagrad":tf.keras.optimizers.Adagrad(lr),\
"Adamax":tf.keras.optimizers.Adamax(lr),"RMSprop":tf.keras.optimizers.RMSprop(lr),\
"Nadam":tf.keras.optimizers.Nadam(lr),"SGD":tf.keras.optimizers.SGD(lr)}[optimizer_name]
model.compile(loss=lossfct, optimizer= optimizer,\
metrics=[tf.keras.metrics.Precision(),tf.keras.metrics.Recall(),tf.keras.metrics.TruePositives(),tf.keras.metrics.AUC(multi_label=True)])
return model
tuner = keras_tuner.RandomSearch(
build_lstm_for_tuning,
objective=keras_tuner.Objective("val_auc", direction="max"),
max_trials=20,
overwrite=True)
tuner.search(input_data['X_train'], input_data['Y_train'], epochs=1, batch_size=512,
validation_data=(input_data['X_valid'], input_data['Y_valid']))
Output:
WARNING:tensorflow:Layer lstm_1 will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU.
Search: Running Trial #1
Value |Best Value So Far |Hyperparameter
1024 |? |neurons first layer
0.0005 |? |learning_rate
Adamax |? |optimizer
WARNING:tensorflow:Layer lstm will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU.
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
/tmp/ipykernel_263/2162334881.py in <module>
27 overwrite=True)
28 tuner.search(input_data['X_train'], input_data['Y_train'], epochs=1, batch_size=512,
---> 29 validation_data=(input_data['X_valid'], input_data['Y_valid']))
~/.local/lib/python3.7/site-packages/keras_tuner/engine/base_tuner.py in search(self, *fit_args, **fit_kwargs)
177
178 self.on_trial_begin(trial)
--> 179 results = self.run_trial(trial, *fit_args, **fit_kwargs)
180 # `results` is None indicates user updated oracle in `run_trial()`.
181 if results is None:
~/.local/lib/python3.7/site-packages/keras_tuner/engine/tuner.py in run_trial(self, trial, *args, **kwargs)
292 callbacks.append(model_checkpoint)
293 copied_kwargs["callbacks"] = callbacks
--> 294 obj_value = self._build_and_fit_model(trial, *args, **copied_kwargs)
295
296 histories.append(obj_value)
~/.local/lib/python3.7/site-packages/keras_tuner/engine/tuner.py in _build_and_fit_model(self, trial, *args, **kwargs)
220 hp = trial.hyperparameters
221 model = self._try_build(hp)
--> 222 results = self.hypermodel.fit(hp, model, *args, **kwargs)
223 return tuner_utils.convert_to_metrics_dict(
224 results, self.oracle.objective, "HyperModel.fit()"
~/.local/lib/python3.7/site-packages/keras_tuner/engine/hypermodel.py in fit(self, hp, model, *args, **kwargs)
135 If return a float, it should be the `objective` value.
136 """
--> 137 return model.fit(*args, **kwargs)
138
139
~/.local/lib/python3.7/site-packages/keras/utils/traceback_utils.py in error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
~/.local/lib/python3.7/site-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
57 ctx.ensure_initialized()
58 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 59 inputs, attrs, num_outputs)
60 except core._NotOkStatusException as e:
61 if name is not None:
InvalidArgumentError: lr is not a scalar : [1]
[[node Adamax/Adamax/update_4/ResourceApplyAdaMax
(defined at /home/cdsw/.local/lib/python3.7/site-packages/keras/optimizer_v2/adamax.py:141)
]] [Op:__inference_train_function_1833827]
Errors may have originated from an input operation.
...
Does anyone have an idea what is causing the error and how to avoid it? My guess is that is connected to hp.Choice
. The tuning framework might change the dtype of lr
or something similar, but I did not manage to find solid proof for that.
CodePudding user response:
I found the issue in my code. The problem was that
lr = hp.Choice('learning_rate', [0.0005]), #0.005,0.001,,0.0001,5e-05,1e-05
turns lr
into a tuple due to the ,
at the end of the line. Removing the comma makes sure lr
remains an integer as expected. So
lr = hp.Choice('learning_rate', [0.0005]) #0.005,0.001,,0.0001,5e-05,1e-05
solves the problem.