I wrote the following model fn:
from tensorflow.keras.layers import Dense, LSTM, Dropout, Input, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import tensorflow_addons as tfa
import tensorflow as tf
def get_model(num_features, output_size, output_bias=None):
output_bias = tf.keras.initializers.Constant(output_bias)
opt = Adam(learning_rate=0.0008)
inputs = Input(shape=[None, num_features], dtype=tf.float32, ragged=True)
layers = LSTM(32, activation='tanh')(
inputs.to_tensor(), mask=tf.sequence_mask(inputs.row_lengths()))
layers = BatchNormalization()(layers)
layers = Dropout(0.05)(layers)
layers = Dense(32, activation='relu')(layers)
layers = BatchNormalization()(layers)
layers = Dropout(0.05)(layers)
layers = Dense(32, activation='relu')(layers)
layers = BatchNormalization()(layers)
layers = Dropout(0.05)(layers)
layers = Dense(output_size, activation='sigmoid',
bias_initializer=output_bias)(layers)
model = Model(inputs, layers)
model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=opt, metrics=[tfa.metrics.F1Score(num_classes=2)])
model.summary()
return model
here is the model summary:
Model: "model_5"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_6 (InputLayer) [(None, None, 11)] 0
__________________________________________________________________________________________________
input.row_lengths_5 (InstanceMe (None,) 0 input_6[0][0]
__________________________________________________________________________________________________
input.to_tensor_5 (InstanceMeth (None, None, 11) 0 input_6[0][0]
__________________________________________________________________________________________________
tf.sequence_mask_5 (TFOpLambda) (None, None) 0 input.row_lengths_5[0][0]
__________________________________________________________________________________________________
lstm_5 (LSTM) (None, 32) 5632 input.to_tensor_5[0][0]
tf.sequence_mask_5[0][0]
__________________________________________________________________________________________________
batch_normalization_15 (BatchNo (None, 32) 128 lstm_5[0][0]
__________________________________________________________________________________________________
dropout_15 (Dropout) (None, 32) 0 batch_normalization_15[0][0]
__________________________________________________________________________________________________
dense_15 (Dense) (None, 32) 1056 dropout_15[0][0]
__________________________________________________________________________________________________
batch_normalization_16 (BatchNo (None, 32) 128 dense_15[0][0]
__________________________________________________________________________________________________
dropout_16 (Dropout) (None, 32) 0 batch_normalization_16[0][0]
__________________________________________________________________________________________________
dense_16 (Dense) (None, 32) 1056 dropout_16[0][0]
__________________________________________________________________________________________________
batch_normalization_17 (BatchNo (None, 32) 128 dense_16[0][0]
__________________________________________________________________________________________________
dropout_17 (Dropout) (None, 32) 0 batch_normalization_17[0][0]
__________________________________________________________________________________________________
dense_17 (Dense) (None, 1) 33 dropout_17[0][0]
==================================================================================================
Total params: 8,161
Trainable params: 7,969
Non-trainable params: 192
__________________________________________________________________________________________________
And here are the shapes of my data:
print(train_x.shape,train_y.shape)
print(val_x.shape,val_y.shape)
(52499, None, 11) (52499,)
(17500, None, 11) (17500,)
When trying to fit my model, I get the following error:
model.fit(train_x, train_y, epochs=300, batch_size=500, validation_data=(val_x, val_y))
ValueError: Dimension 0 in both shapes must be equal, but are 2 and 1. Shapes are [2] and [1].
I can't understand what is wrong with the shapes.
CodePudding user response:
Your model seems fine. The problem is that you are running into an open issue with the tfa.metrics.F1Score
. For your binary case, you will have to change the parameters of the F1Score to tfa.metrics.F1Score(num_classes=1, threshold=0.5)
. Here is a complete working example:
from tensorflow.keras.layers import Dense, LSTM, Dropout, Input, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import tensorflow_addons as tfa
import tensorflow as tf
def get_model(num_features, output_size, output_bias=0.001):
output_bias = tf.keras.initializers.Constant(output_bias)
opt = Adam(learning_rate=0.0008)
inputs = Input(shape=[None, num_features], dtype=tf.float32, ragged=True)
layers = LSTM(32, activation='tanh')(
inputs.to_tensor(), mask=tf.sequence_mask(inputs.row_lengths()))
layers = BatchNormalization()(layers)
layers = Dropout(0.05)(layers)
layers = Dense(32, activation='relu')(layers)
layers = BatchNormalization()(layers)
layers = Dropout(0.05)(layers)
layers = Dense(32, activation='relu')(layers)
layers = BatchNormalization()(layers)
layers = Dropout(0.05)(layers)
layers = Dense(output_size, activation='sigmoid',
bias_initializer=output_bias)(layers)
model = Model(inputs, layers)
model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=opt, metrics=[tfa.metrics.F1Score(num_classes=1, threshold=0.5)])
model.summary()
return model
model = get_model(11, 1)
rt = tf.RaggedTensor.from_row_splits(values=tf.ones([5, 11], tf.int32),
row_splits=[0, 2, 5])
model.fit(rt, tf.random.uniform((2,1), maxval=2), epochs=300, batch_size=2, verbose=2)
Alternatively, you just define your own F1Score method and set it as metric in your model. See this post for more information.