below is the error traceback when I run the predict() using my trained model
ValueError Traceback (most recent call last)
\<ipython-input-51-5ae18e06838a\> in \<module\>
7 for input_example_batch, target_example_batch in ds_series_batch_test:
8
\----\> 9 pred=model.predict(input_example_batch)
10 pred_max=tf.argmax(tf.nn.softmax(pred),2).numpy().flatten()
11 y_true=target_example_batch.numpy().flatten()
1 frames
/usr/local/lib/python3.7/dist-packages/keras/engine/training.py in tf__predict_function(iterator)
13 try:
14 do_return = True
\---\> 15 retval\_ = ag_\_.converted_call(ag_\_.ld(step_function), (ag_\_.ld(self), ag_\_.ld(iterator)), None, fscope)
16 except:
17 do_return = False
ValueError: in user code:
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1845, in predict_function *
return step_function(self, iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1834, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1823, in run_step **
outputs = model.predict_step(data)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1791, in predict_step
return self(x, training=False)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 67, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/usr/local/lib/python3.7/dist-packages/keras/engine/input_spec.py", line 264, in assert_input_compatibility
raise ValueError(f'Input {input_index} of layer "{layer_name}" is '
ValueError: Exception encountered when calling layer "sequential" (type Sequential).
Input 0 of layer "lstm" is incompatible with the layer: expected shape=(128, None, 256), found shape=(32, 187, 256)
Call arguments received by layer "sequential" (type Sequential):
• inputs=tf.Tensor(shape=(32, 187), dtype=int32)
• training=False
• mask=None
below is the code used for generating train and test data
X_total, X_test, y_total, y_test = train_test_split(train, test,
test_size=0.2, shuffle = True, random_state = 8)
X_train, X_val, y_train, y_val = train_test_split(X_total, y_total,
test_size=0.25, shuffle = True, random_state = 8) # 0.25 x 0.8 = 0.2
print("X_train shape: {}".format(X_train.shape))
print("X_test shape: {}".format(X_test.shape))
print("X_val shape: {}".format(X_val.shape))
print("y_train shape: {}".format(y_train.shape))
print("y_test shape: {}".format(y_test.shape))
print("y val shape: {}".format(y_val.shape))
X_train shape: (173424,)
X_test shape: (57809,)
X_val shape: (57809,)
y_train shape: (173424,)
y_test shape: (57809,)
y val shape: (57809,)
# train_formatted
train_formatted = []
for eg1, eg2 in zip(X_train, y_train):
train_formatted.append((eg1,eg2))
# test_formatted
test_formatted = []
for eg1, eg2 in zip(X_test, y_test):
test_formatted.append((eg1,eg2))
# valid_formatted
valid_formatted = []
for eg1, eg2 in zip(X_val, y_val):
valid_formatted.append((eg1,eg2))
# training generator
def gen_train_series():
for eg in train_formatted:
yield eg[0],eg[1]
# validation generator
def gen_valid_series():
for eg in valid_formatted:
yield eg[0],eg[1]
# test generator
def gen_test_series():
for eg in test_formatted:
yield eg[0],eg[1]
# create Dataset objects for train, test and validation sets
series = tf.data.Dataset.from_generator(gen_train_series,output_types=(tf.int32, tf.int32),output_shapes = ((None, None)))
series_valid = tf.data.Dataset.from_generator(gen_valid_series,output_types=(tf.int32, tf.int32),output_shapes = ((None, None)))
series_test = tf.data.Dataset.from_generator(gen_test_series,output_types=(tf.int32, tf.int32),output_shapes = ((None, None)))
BATCH_SIZE = 128
BUFFER_SIZE=1000
# create padded batch series objects for train, test and validation sets
ds_series_batch = series.shuffle(BUFFER_SIZE).padded_batch(BATCH_SIZE, padded_shapes=([None], [None]), drop_remainder=True)
ds_series_batch_valid = series_valid.padded_batch(BATCH_SIZE, padded_shapes=([None], [None]), drop_remainder=True)
ds_series_batch_test = series_test.padded_batch(BATCH_SIZE, padded_shapes=([None], [None]), drop_remainder=True)
# print example batches
for input_example_batch, target_example_batch in ds_series_batch_valid.take(1):
print(input_example_batch)
print(target_example_batch)
# below are the shapes of my input and target batch tensor shape
input_example_batch - shape passed to predict() - as follows
tf.Tensor(
[[36 26 37 ... 0 0 0]
[40 40 43 ... 0 0 0]
[26 39 26 ... 0 0 0]
...
[11 8 12 ... 0 0 0]
[44 28 33 ... 0 0 0]
[46 1 38 ... 0 0 0]], shape=(128, 160), dtype=int32)
tf.Tensor(
[[6 6 6 ... 0 0 0]
[6 6 6 ... 0 0 0]
[6 6 6 ... 0 0 0]
...
[2 6 2 ... 0 0 0]
[2 2 2 ... 0 0 0]
[2 6 2 ... 0 0 0]], shape=(128, 160), dtype=int32)
I am training my model as follows -
vocab_size = len(vocabulary) #53
# The embedding dimension
embedding_dim = 256
# Number of RNN units
rnn_units = 1024
label_size = len(labels) # - 0 to 7
def build_model(vocab_size,label_size, embedding_dim, rnn_units, batch_size):
model = tf.keras.Sequential([
tf.keras.layers.Embedding(vocab_size, embedding_dim,
batch_input_shape=[batch_size, None],mask_zero=True),
tf.keras.layers.LSTM(rnn_units,
return_sequences=True,
stateful=True,
recurrent_initializer='glorot_uniform'),
tf.keras.layers.Dense(label_size)
])
return model
#TODO - check why vocab_size 1,
# passing label_size - because it already includes - 'OTHER' label
model = build_model(vocab_size = vocab_size 1 ,label_size = label_size,embedding_dim=embedding_dim,
rnn_units=rnn_units,
batch_size=BATCH_SIZE)
# define loss function
def loss(labels, logits):
return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
model.compile(optimizer='adam', loss=loss,metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath=checkpoint_prefix,
save_weights_only=True, save_freq = 'epoch')
# fitting the model as follows
# using just 1 epoch - as I am debugging
EPOCHS = 1
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath=checkpoint_prefix,
save_weights_only=True, save_freq = 'epoch')
ic("Fitting the Model...")
history = model.fit(ds_series_batch, epochs=EPOCHS, validation_data=ds_series_batch_valid,callbacks=[checkpoint_callback])
below displayed is the model summary
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding (Embedding) (128, None, 256) 13824
lstm (LSTM) (128, 1024) 5246976
dense (Dense) (128, 7) 7175
=================================================================
Total params: 5,267,975
Trainable params: 5,267,975
Non-trainable params: 0
_______________________________
from sklearn.metrics import classification_report, confusion_matrix
preds = np.array([])
y_trues= np.array([])
# iterate through test set, make predictions based on trained model
for input_example_batch, target_example_batch in ds_series_batch_test:
pred=model.predict(input_example_batch)
pred_max=tf.argmax(tf.nn.softmax(pred),2).numpy().flatten()
y_true=target_example_batch.numpy().flatten()
the above code is what I use to call predict() - which results in the error mentioned in the title
below link has the code referred to - test out a character level RNN implementation http://alexminnaar.com/2019/08/22/ner-rnns-tensorflow.html I pretty much did the same as the one in the collab notebook, but training using a different input data.
Many solutions suggested were not very specific to this problem, and one I tried - was setting return_sequences = True, which is what I already had while building the model. Nothing has worked so far. with above code, training works fine, but unable to run predict - due to mismatch of output shape. I am very new to tensorflow. So any kind of help in understanding the issue, and ways to resolve would be greatly appreciated. Thank you in advance.
encoded_input_features output_features
0 [11, 11, 1, 39, 40, 43, 45, 33, 1, 44, 45, 43,... [2, 2, 6, 3, 3, 3, 3, 3, 6, 3, 3, 3, 3, 3, 3, ...
1 [29, 5, 10, 5, 44, 33, 34, 47, 26, 37, 34, 36,... [6, 6, 2, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 2, ...
2 [10, 14, 5, 41, 43, 26, 28, 33, 34, 1, 27, 46,... [2, 2, 6, 2, 2, 2, 2, 2, 2, 6, 2, 2, 2, 2, 2, ...
3 [48, 51, 1, 14, 16, 10, 1, 41, 26, 29, 26, 38,... [6, 6, 6, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...
4 [32, 31, 1, 9, 11, 5, 45, 15, 5, 32, 43, 26, 2... [6, 6, 6, 2, 2, 6, 2, 2, 6, 2, 2, 2, 2, 2, 2, ...
... ... ...
289041 [47, 30, 39, 36, 30, 45, 30, 44, 33, 1, 39, 26... [6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...
289042 [43, 26, 27, 34, 39, 29, 43, 26, 1, 36, 30, 39... [6, 6, 6, 6, 6, 6, 6, 6, 6, 0, 0, 0, 0, 0, 0, ...
289043 [41, 26, 37, 35, 26, 1, 31, 26, 37, 34, 50, 46... [6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...
289044 [17, 13, 16, 10, 9, 15, 1, 36, 26, 37, 34, 1, ... [2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...
289045 [36, 40, 39, 32, 46, 1, 38, 30, 44, 44, 5, 36,... [6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...
now after I generate the train, test, validation split - from the above dataset - trying to convert the input features - into a list of numpy array
import numpy as np
address_character_list = []
def convert_input_features(x):
address_character_list.append(np.asarray(x, dtype=np.float32))
pd.DataFrame(y_train)['output_features'].swifter.apply(lambda x: convert_input_features(x))
y_train_list = np.array(address_character_list)
import numpy as np
address_character_list = []
def convert_input_features(x):
address_character_list.append(np.asarray(x, dtype=np.float32))
pd.DataFrame(X_train)['encoded_input_features'].swifter.apply(lambda x: convert_input_features(x))
X_train_list = np.array(address_character_list)
this gives
type(X_train) = numpy.ndarray
type(y_train) = numpy.ndarray
for eg. X_train looks like
array([array([...],
dtype=float32) ,[array([...], dtype = float32),
dtype=object)
similarly - I have y_train as well, now if I need to reshape my train example using code below - how do I do that, cause from_tensor_slices() gives the error below
# train_examples = tf.data.Dataset.from_tensor_slices((X_train_list, y_train_list))
train_examples = tf.data.Dataset.from_tensor_slices((X_train_list, y_train_list))
# x_train = np.random.randint(0,10, size=(289042))
# y_train = np.random.randint(0,7, size=(289042))
# train_examples = tf.data.Dataset.from_tensor_slices((x_train, y_train))
def preprocess(ds):
return (
ds
.cache()
.shuffle(buffer_size=1000)
.batch(128)
.prefetch(buffer_size=tf.data.AUTOTUNE)
)
train_examples = preprocess(train_examples)
error as follows
ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type list).
and the solutions given for this error didn't work.
CodePudding user response:
Updating the answer... Now, let suppose I as you said I have
x_train = np.random.randint(0,10, size=(289042))
y_train = np.random.randint(0,7, size=(289042))
You don't need a generator to yield the dataset, if you have a numpy array simply load it with tf.data.DataSet.from_tensor_slices(),
train_examples = tf.data.Dataset.from_tensor_slices((x_train, y_train))
def preprocess(ds):
return (
ds
.cache()
.shuffle(buffer_size=1000)
.batch(128)
.prefetch(buffer_size=tf.data.AUTOTUNE)
)
train_examples = preprocess(train_examples)
Now, something more if you want stateful=True in LSTM then your batch_size should be equal for all the samples, there may be a chance if your last sample has less than 128 batch size. so, then it will throw the error.
model = tf.keras.Sequential([
tf.keras.layers.Input(shape=(1)),
tf.keras.layers.Embedding(1024, 512, mask_zero=True),
tf.keras.layers.LSTM(200,
return_sequences=False,
recurrent_initializer='glorot_uniform'),
tf.keras.layers.Dense(7)
])
model(next(iter(train_examples.take(1)))[0]).shape
model.summary()
Furthermore, use tf.keras.losses.SparseCategorical_crossentropy(from_logits=True)
model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
,metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
history = model.fit(train_examples, epochs=1)
Output:
[==============================] - 7s 30ms/step - loss: 1.9473 - sparse_categorical_accuracy: 0.1398