Masking input for ConvLSTM1D-CodePudding

I am doing a binary regression problem using keras. The input shape is: (None, 2, 94, 3) (channels is the last dimension)

I have the following architecture:

input1 = Input(shape=(time, n_rows, n_channels))
masking = Masking(mask_value=-999)(input1)
convlstm = ConvLSTM1D(filters=16, kernel_size=15,
                      data_format='channels_last',
                      activation="tanh")(masking)
dropout = Dropout(0.2)(convlstm)
flatten1 = Flatten()(dropout)
outputs = Dense(n_outputs, activation='sigmoid')(flatten1)
model = Model(inputs=input1, outputs=outputs)
model.compile(loss=keras.losses.BinaryCrossentropy(),
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.01))

However when training I get this error: Dimensions must be equal, but are 94 and 80 for '{{node conv_lstm1d/while/SelectV2}} = SelectV2[T=DT_FLOAT](conv_lstm1d/while/Tile, conv_lstm1d/while/mul_5, conv_lstm1d/while/Placeholder_2)' with input shapes: [?,94,16], [?,80,16], [?,80,16].

If I remove the masking layer this error disappears, what is the masking doing that triggers this error? Also the only way I was able to run the above architecture was with a kernel_size of 1.

CodePudding user response：

Seems like the ConvLSTM1D layer needs a mask with the shape (samples, timesteps) according to the docs. The mask you are calculating has the shape (samples, time, rows). Here is one solution to fix your problem but I am not sure if it is the 'correct' way to go:

import tensorflow as tf

input1 = tf.keras.layers.Input(shape=(2, 94, 3))
masking = tf.keras.layers.Masking(mask_value=-999)(input1)
convlstm = tf.keras.layers.ConvLSTM1D(filters=16, kernel_size=15,
                      data_format='channels_last',
                      activation="tanh")(inputs = masking, mask = tf.reduce_all(masking._keras_mask, axis=-1))

dropout = tf.keras.layers.Dropout(0.2)(convlstm)
flatten1 = tf.keras.layers.Flatten()(dropout)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(flatten1)
model = tf.keras.Model(inputs=input1, outputs=outputs)
model.compile(loss=tf.keras.losses.BinaryCrossentropy(),
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.01))

import tensorflow as tf

class Reduce(tf.keras.layers.Layer):

  def __init__(self):
      super(Reduce, self).__init__()

  def call(self, inputs):
      return tf.reduce_all(tf.reduce_any(tf.not_equal(inputs, -999), axis=-1, keepdims=False), axis=1)

input1 = tf.keras.layers.Input(shape=(2, 94, 3))
reduce_layer = Reduce()
boolean_mask = reduce_layer(input1)
convlstm = tf.keras.layers.ConvLSTM1D(filters=16, kernel_size=15,
                      data_format='channels_last',
                      activation="tanh")(inputs = input1, mask = boolean_mask)

dropout = tf.keras.layers.Dropout(0.2)(convlstm)
flatten1 = tf.keras.layers.Flatten()(dropout)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(flatten1)
model = tf.keras.Model(inputs=input1, outputs=outputs)
model.compile(loss=tf.keras.losses.BinaryCrossentropy(),
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.01))
print(model.summary(expand_nested=True))
x = tf.random.normal((50, 2, 94, 3))
y = tf.random.uniform((50, ), maxval=3, dtype=tf.int32)
model.fit(x, y)