Shapes mismatch in Tensorflow model-CodePudding

I wrote the following model fn:

from tensorflow.keras.layers import Dense, LSTM, Dropout, Input, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import tensorflow_addons as tfa
import tensorflow as tf

def get_model(num_features, output_size, output_bias=None):
    output_bias = tf.keras.initializers.Constant(output_bias)

    opt = Adam(learning_rate=0.0008)

    inputs = Input(shape=[None, num_features], dtype=tf.float32, ragged=True)
    layers = LSTM(32, activation='tanh')(
        inputs.to_tensor(), mask=tf.sequence_mask(inputs.row_lengths()))

    layers = BatchNormalization()(layers)
    layers = Dropout(0.05)(layers)

    layers = Dense(32, activation='relu')(layers)
    layers = BatchNormalization()(layers)
    layers = Dropout(0.05)(layers)

    layers = Dense(32, activation='relu')(layers)
    layers = BatchNormalization()(layers)
    layers = Dropout(0.05)(layers)

    layers = Dense(output_size, activation='sigmoid',
                         bias_initializer=output_bias)(layers)
    model = Model(inputs, layers)
    model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=opt, metrics=[tfa.metrics.F1Score(num_classes=2)])
    model.summary()
    return model

here is the model summary:

Model: "model_5"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_6 (InputLayer)            [(None, None, 11)]   0                                            
__________________________________________________________________________________________________
input.row_lengths_5 (InstanceMe (None,)              0           input_6[0][0]                    
__________________________________________________________________________________________________
input.to_tensor_5 (InstanceMeth (None, None, 11)     0           input_6[0][0]                    
__________________________________________________________________________________________________
tf.sequence_mask_5 (TFOpLambda) (None, None)         0           input.row_lengths_5[0][0]        
__________________________________________________________________________________________________
lstm_5 (LSTM)                   (None, 32)           5632        input.to_tensor_5[0][0]          
                                                                 tf.sequence_mask_5[0][0]         
__________________________________________________________________________________________________
batch_normalization_15 (BatchNo (None, 32)           128         lstm_5[0][0]                     
__________________________________________________________________________________________________
dropout_15 (Dropout)            (None, 32)           0           batch_normalization_15[0][0]     
__________________________________________________________________________________________________
dense_15 (Dense)                (None, 32)           1056        dropout_15[0][0]                 
__________________________________________________________________________________________________
batch_normalization_16 (BatchNo (None, 32)           128         dense_15[0][0]                   
__________________________________________________________________________________________________
dropout_16 (Dropout)            (None, 32)           0           batch_normalization_16[0][0]     
__________________________________________________________________________________________________
dense_16 (Dense)                (None, 32)           1056        dropout_16[0][0]                 
__________________________________________________________________________________________________
batch_normalization_17 (BatchNo (None, 32)           128         dense_16[0][0]                   
__________________________________________________________________________________________________
dropout_17 (Dropout)            (None, 32)           0           batch_normalization_17[0][0]     
__________________________________________________________________________________________________
dense_17 (Dense)                (None, 1)            33          dropout_17[0][0]                 
==================================================================================================
Total params: 8,161
Trainable params: 7,969
Non-trainable params: 192
__________________________________________________________________________________________________

And here are the shapes of my data:

print(train_x.shape,train_y.shape)
print(val_x.shape,val_y.shape)

(52499, None, 11) (52499,)
(17500, None, 11) (17500,)

When trying to fit my model, I get the following error:

model.fit(train_x, train_y, epochs=300, batch_size=500, validation_data=(val_x, val_y))

ValueError: Dimension 0 in both shapes must be equal, but are 2 and 1. Shapes are [2] and [1].

I can't understand what is wrong with the shapes.

CodePudding user response：

Your model seems fine. The problem is that you are running into an open issue with the tfa.metrics.F1Score. For your binary case, you will have to change the parameters of the F1Score to tfa.metrics.F1Score(num_classes=1, threshold=0.5). Here is a complete working example:

from tensorflow.keras.layers import Dense, LSTM, Dropout, Input, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import tensorflow_addons as tfa
import tensorflow as tf

def get_model(num_features, output_size, output_bias=0.001):
    output_bias = tf.keras.initializers.Constant(output_bias)

    opt = Adam(learning_rate=0.0008)

    inputs = Input(shape=[None, num_features], dtype=tf.float32, ragged=True)
    layers = LSTM(32, activation='tanh')(
        inputs.to_tensor(), mask=tf.sequence_mask(inputs.row_lengths()))

    layers = BatchNormalization()(layers)
    layers = Dropout(0.05)(layers)

    layers = Dense(32, activation='relu')(layers)
    layers = BatchNormalization()(layers)
    layers = Dropout(0.05)(layers)

    layers = Dense(32, activation='relu')(layers)
    layers = BatchNormalization()(layers)
    layers = Dropout(0.05)(layers)

    layers = Dense(output_size, activation='sigmoid',
                         bias_initializer=output_bias)(layers)
    model = Model(inputs, layers)
    model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=opt, metrics=[tfa.metrics.F1Score(num_classes=1, threshold=0.5)])
    model.summary()
    return model


model = get_model(11, 1)
rt = tf.RaggedTensor.from_row_splits(values=tf.ones([5, 11], tf.int32),
                                  row_splits=[0, 2, 5])
model.fit(rt, tf.random.uniform((2,1), maxval=2), epochs=300, batch_size=2, verbose=2)

Alternatively, you just define your own F1Score method and set it as metric in your model. See this post for more information.