Input 0 of layer "lstm" is incompatible with the layer: expected shape=(128, None, 256), f-CodePudding

below is the error traceback when I run the predict() using my trained model


ValueError                                Traceback (most recent call last)
\<ipython-input-51-5ae18e06838a\> in \<module\>
7 for input_example_batch, target_example_batch in ds_series_batch_test:
8
\----\> 9   pred=model.predict(input_example_batch)
10   pred_max=tf.argmax(tf.nn.softmax(pred),2).numpy().flatten()
11   y_true=target_example_batch.numpy().flatten()

1 frames
/usr/local/lib/python3.7/dist-packages/keras/engine/training.py in tf__predict_function(iterator)
13                 try:
14                     do_return = True
\---\> 15                     retval\_ = ag_\_.converted_call(ag_\_.ld(step_function), (ag_\_.ld(self), ag_\_.ld(iterator)), None, fscope)
16                 except:
17                     do_return = False

ValueError: in user code:

    File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1845, in predict_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1834, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1823, in run_step  **
        outputs = model.predict_step(data)
    File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1791, in predict_step
        return self(x, training=False)
    File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/usr/local/lib/python3.7/dist-packages/keras/engine/input_spec.py", line 264, in assert_input_compatibility
        raise ValueError(f'Input {input_index} of layer "{layer_name}" is '
    
    ValueError: Exception encountered when calling layer "sequential" (type Sequential).
    
    Input 0 of layer "lstm" is incompatible with the layer: expected shape=(128, None, 256), found shape=(32, 187, 256)
    
    Call arguments received by layer "sequential" (type Sequential):
      • inputs=tf.Tensor(shape=(32, 187), dtype=int32)
      • training=False
      • mask=None

below is the code used for generating train and test data

X_total, X_test, y_total, y_test = train_test_split(train, test,
    test_size=0.2, shuffle = True, random_state = 8)

X_train, X_val, y_train, y_val = train_test_split(X_total, y_total,
    test_size=0.25, shuffle = True, random_state = 8)  # 0.25 x 0.8 = 0.2

print("X_train shape: {}".format(X_train.shape))
print("X_test shape: {}".format(X_test.shape))
print("X_val shape: {}".format(X_val.shape))
print("y_train shape: {}".format(y_train.shape))
print("y_test shape: {}".format(y_test.shape))
print("y val shape: {}".format(y_val.shape))

X_train shape: (173424,)
X_test shape: (57809,)
X_val shape: (57809,)
y_train shape: (173424,)
y_test shape: (57809,)
y val shape: (57809,)

# train_formatted
train_formatted = []
for eg1, eg2 in zip(X_train, y_train):
  train_formatted.append((eg1,eg2))

# test_formatted
test_formatted = []
for eg1, eg2 in zip(X_test, y_test):
  test_formatted.append((eg1,eg2))
# valid_formatted
valid_formatted = []
for eg1, eg2 in zip(X_val, y_val):
  valid_formatted.append((eg1,eg2))
# training generator
def gen_train_series():

    for eg in train_formatted:
      yield eg[0],eg[1]

# validation generator
def gen_valid_series():

    for eg in valid_formatted:
      yield eg[0],eg[1]

# test generator
def gen_test_series():

  for eg in test_formatted:
      yield eg[0],eg[1]
  
  
# create Dataset objects for train, test and validation sets  
series = tf.data.Dataset.from_generator(gen_train_series,output_types=(tf.int32, tf.int32),output_shapes = ((None, None)))
series_valid = tf.data.Dataset.from_generator(gen_valid_series,output_types=(tf.int32, tf.int32),output_shapes = ((None, None)))
series_test = tf.data.Dataset.from_generator(gen_test_series,output_types=(tf.int32, tf.int32),output_shapes = ((None, None)))

BATCH_SIZE = 128
BUFFER_SIZE=1000

# create padded batch series objects for train, test and validation sets
ds_series_batch = series.shuffle(BUFFER_SIZE).padded_batch(BATCH_SIZE, padded_shapes=([None], [None]), drop_remainder=True)
ds_series_batch_valid = series_valid.padded_batch(BATCH_SIZE, padded_shapes=([None], [None]), drop_remainder=True)
ds_series_batch_test = series_test.padded_batch(BATCH_SIZE, padded_shapes=([None], [None]), drop_remainder=True)

# print example batches
for input_example_batch, target_example_batch in ds_series_batch_valid.take(1):
  print(input_example_batch)
  print(target_example_batch)

# below are the shapes of my input and target batch tensor shape

input_example_batch - shape passed to predict() -  as follows
tf.Tensor(
[[36 26 37 ...  0  0  0]
 [40 40 43 ...  0  0  0]
 [26 39 26 ...  0  0  0]
 ...
 [11  8 12 ...  0  0  0]
 [44 28 33 ...  0  0  0]
 [46  1 38 ...  0  0  0]], shape=(128, 160), dtype=int32)
tf.Tensor(
[[6 6 6 ... 0 0 0]
 [6 6 6 ... 0 0 0]
 [6 6 6 ... 0 0 0]
 ...
 [2 6 2 ... 0 0 0]
 [2 2 2 ... 0 0 0]
 [2 6 2 ... 0 0 0]], shape=(128, 160), dtype=int32)

I am training my model as follows -

vocab_size = len(vocabulary) #53

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

label_size = len(labels)  # - 0 to 7

def build_model(vocab_size,label_size, embedding_dim, rnn_units, batch_size):
      model = tf.keras.Sequential([
          tf.keras.layers.Embedding(vocab_size, embedding_dim,
                            batch_input_shape=[batch_size, None],mask_zero=True),
          tf.keras.layers.LSTM(rnn_units,
                      return_sequences=True,
                      stateful=True,
                      recurrent_initializer='glorot_uniform'),
          tf.keras.layers.Dense(label_size)
          ])
      return model


#TODO - check why vocab_size 1,
# passing label_size - because it already includes - 'OTHER' label

model = build_model(vocab_size = vocab_size 1 ,label_size = label_size,embedding_dim=embedding_dim,
      rnn_units=rnn_units,
              batch_size=BATCH_SIZE)

# define loss function
def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

model.compile(optimizer='adam', loss=loss,metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])


checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True, save_freq = 'epoch')

# fitting the model as follows

# using just 1 epoch - as I am debugging

EPOCHS = 1
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
      filepath=checkpoint_prefix,
      save_weights_only=True, save_freq = 'epoch')
  
ic("Fitting the Model...")
history = model.fit(ds_series_batch, epochs=EPOCHS, validation_data=ds_series_batch_valid,callbacks=[checkpoint_callback])

below displayed is the model summary

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 embedding (Embedding)       (128, None, 256)          13824     
                                                                 
 lstm (LSTM)                 (128, 1024)               5246976   
                                                                 
 dense (Dense)               (128, 7)                  7175      
                                                                 
=================================================================
Total params: 5,267,975
Trainable params: 5,267,975
Non-trainable params: 0
_______________________________

from sklearn.metrics import classification_report, confusion_matrix

preds = np.array([])
y_trues= np.array([])

# iterate through test set, make predictions based on trained model
for input_example_batch, target_example_batch in ds_series_batch_test:

  pred=model.predict(input_example_batch)
  pred_max=tf.argmax(tf.nn.softmax(pred),2).numpy().flatten()
  y_true=target_example_batch.numpy().flatten()

the above code is what I use to call predict() - which results in the error mentioned in the title

below link has the code referred to - test out a character level RNN implementation http://alexminnaar.com/2019/08/22/ner-rnns-tensorflow.html I pretty much did the same as the one in the collab notebook, but training using a different input data.

Many solutions suggested were not very specific to this problem, and one I tried - was setting return_sequences = True, which is what I already had while building the model. Nothing has worked so far. with above code, training works fine, but unable to run predict - due to mismatch of output shape. I am very new to tensorflow. So any kind of help in understanding the issue, and ways to resolve would be greatly appreciated. Thank you in advance.

    encoded_input_features                              output_features
0   [11, 11, 1, 39, 40, 43, 45, 33, 1, 44, 45, 43,...   [2, 2, 6, 3, 3, 3, 3, 3, 6, 3, 3, 3, 3, 3, 3, ...
1   [29, 5, 10, 5, 44, 33, 34, 47, 26, 37, 34, 36,...   [6, 6, 2, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 2, ...
2   [10, 14, 5, 41, 43, 26, 28, 33, 34, 1, 27, 46,...   [2, 2, 6, 2, 2, 2, 2, 2, 2, 6, 2, 2, 2, 2, 2, ...
3   [48, 51, 1, 14, 16, 10, 1, 41, 26, 29, 26, 38,...   [6, 6, 6, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...
4   [32, 31, 1, 9, 11, 5, 45, 15, 5, 32, 43, 26, 2...   [6, 6, 6, 2, 2, 6, 2, 2, 6, 2, 2, 2, 2, 2, 2, ...
... ... ...
289041  [47, 30, 39, 36, 30, 45, 30, 44, 33, 1, 39, 26...   [6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...
289042  [43, 26, 27, 34, 39, 29, 43, 26, 1, 36, 30, 39...   [6, 6, 6, 6, 6, 6, 6, 6, 6, 0, 0, 0, 0, 0, 0, ...
289043  [41, 26, 37, 35, 26, 1, 31, 26, 37, 34, 50, 46...   [6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...
289044  [17, 13, 16, 10, 9, 15, 1, 36, 26, 37, 34, 1, ...   [2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...
289045  [36, 40, 39, 32, 46, 1, 38, 30, 44, 44, 5, 36,...   [6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...

now after I generate the train, test, validation split - from the above dataset - trying to convert the input features - into a list of numpy array

import numpy as np
address_character_list = []

def convert_input_features(x):
  address_character_list.append(np.asarray(x, dtype=np.float32))

pd.DataFrame(y_train)['output_features'].swifter.apply(lambda x: convert_input_features(x))
y_train_list = np.array(address_character_list)

import numpy as np
address_character_list = []

def convert_input_features(x):
  address_character_list.append(np.asarray(x, dtype=np.float32))

pd.DataFrame(X_train)['encoded_input_features'].swifter.apply(lambda x: convert_input_features(x))
X_train_list = np.array(address_character_list)

this gives

type(X_train) = numpy.ndarray
type(y_train) = numpy.ndarray

for eg. X_train looks like

array([array([...],
             dtype=float32)                                                ,[array([...], dtype = float32),
      dtype=object)

similarly - I have y_train as well, now if I need to reshape my train example using code below - how do I do that, cause from_tensor_slices() gives the error below

# train_examples = tf.data.Dataset.from_tensor_slices((X_train_list, y_train_list))
train_examples = tf.data.Dataset.from_tensor_slices((X_train_list, y_train_list))
# x_train = np.random.randint(0,10, size=(289042))
# y_train = np.random.randint(0,7, size=(289042))
# train_examples = tf.data.Dataset.from_tensor_slices((x_train, y_train))

def preprocess(ds):
    return (
        ds
        .cache()
        .shuffle(buffer_size=1000)
        .batch(128)
        .prefetch(buffer_size=tf.data.AUTOTUNE)
    )

train_examples = preprocess(train_examples)

error as follows

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type list).

and the solutions given for this error didn't work.

CodePudding user response：

Updating the answer... Now, let suppose I as you said I have

x_train = np.random.randint(0,10, size=(289042))
y_train = np.random.randint(0,7, size=(289042))

You don't need a generator to yield the dataset, if you have a numpy array simply load it with tf.data.DataSet.from_tensor_slices(),

train_examples = tf.data.Dataset.from_tensor_slices((x_train, y_train))

def preprocess(ds):
    return (
        ds
        .cache()
        .shuffle(buffer_size=1000)
        .batch(128)
        .prefetch(buffer_size=tf.data.AUTOTUNE)
    )

train_examples = preprocess(train_examples)

Now, something more if you want stateful=True in LSTM then your batch_size should be equal for all the samples, there may be a chance if your last sample has less than 128 batch size. so, then it will throw the error.

model = tf.keras.Sequential([
          tf.keras.layers.Input(shape=(1)),
          tf.keras.layers.Embedding(1024, 512, mask_zero=True),
          tf.keras.layers.LSTM(200,
                      return_sequences=False,
                      recurrent_initializer='glorot_uniform'),
          tf.keras.layers.Dense(7)
          ])

model(next(iter(train_examples.take(1)))[0]).shape
model.summary()

Furthermore, use tf.keras.losses.SparseCategorical_crossentropy(from_logits=True)

model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
              ,metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

history = model.fit(train_examples, epochs=1)

Output:

[==============================] - 7s 30ms/step - loss: 1.9473 - sparse_categorical_accuracy: 0.1398