I'm working on a project using Keras Model Subclassing in order to create a model with 2 inputs and 2 outputs. The training data for this model is essentially a dataset of other image classification datasets, with each image being paired with it's corresponding label; a dataset of datasets. One input of the network receives the label, the other receives the image.
train_img = generate_tensors(train, 0)
train_ans = generate_tensors(train, 1)
val_img = generate_tensors(val, 0)
val_ans = generate_tensors(val, 1)
train_img_b = train_img.batch(batch_size) # b for batched
train_ans_b = train_ans.batch(batch_size)
structuremodel = StructureModel()
hnet_output, anet_output = structuremodel([train_img_b, train_ans_b])
In the above code, I'm trying to perform a single forward propagation on my custom "StructureModel" class. "train_img" and "train_ans" are of shapes (None, 100, 224, 224, 1) and [insert shape] respectively. I have set the batch_size to 1.
The model itself is defined as follows:
class StructureModel(keras.Model):
num_images = 100 # images per timestep
resolution = [224, 224]
hnet_pred_vars = 9
anet_pred_vars = 25 # the thing on my whiteboard didnt include a stopping node
alphabet = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()* ,-./:;<=>?@[\\]^_`{|}~ "
def __init__(self):
super().__init__()
self.anet_layer = ArchitectureNet(self.anet_pred_vars)
def call(self, inputs):
# CNN-RNN/CNN-LSTM for processing images and corresponding answers
# Copied VGG16 for structure
# Image processing
# shape=(timesteps,resolution,resolution,rgb channels)
images = inputs[0]
answers = inputs[1]
x = TimeDistributed(Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu'))(images)
x = TimeDistributed(Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu'))(x)
x = TimeDistributed(MaxPooling2D(pool_size=(2, 2), strides=2))(x)
filters_convs = [(128, 2), (256, 3), (512, 3), (512, 3)]
for n_filters, n_convs in filters_convs:
for _ in range(n_convs):
x = TimeDistributed(Conv2D(filters=n_filters, kernel_size=(3, 3), padding='same', activation='relu'))(x)
x = TimeDistributed(MaxPooling2D(pool_size=(2, 2), strides=2))(x)
x = TimeDistributed(Flatten())(x)
img_embed = TimeDistributed(Dense(units=1000), name='Image_Preprocessing')(x)
# Answer embedding
# Number of image-answer pairs, characters in answer, single character
x = TimeDistributed(LSTM(units=500))(answers) # All answers, shape (100, None, 95)
answer_embed = TimeDistributed(Dense(units=1000), name='Answer_Preprocessing/Embed')(x)
# Combines both models
merge = Concatenate(axis=2)([img_embed, answer_embed])
x = LSTM(units=100)(merge)
dataset_embed = Dense(units=100, activation='relu', name='Dataset_Embed')(x)
# hnet
x = Dense(units=50)(dataset_embed)
hnet_output = Dense(units=self.hnet_pred_vars, name='Hyperparameters')(x)
# anet
anet_output = self.anet_layer(dataset_embed)
return hnet_output, anet_output
There's a lot of extra fluff in it, and I'm sure there's many other errors in the model, but the main one that I care about is the TypeError that I keep receiving. Without resolving that, I can't get to debugging anything else. The error is as follows:
File ~\Documents\Programming\Python\HYPAT\NetworksV2.py:83 in call
x = TimeDistributed(Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu'))(images)
TypeError: Exception encountered when calling layer "structure_model_7" (type StructureModel).
'<' not supported between instances of 'NoneType' and 'int'
Call arguments received by layer "structure_model_7" (type StructureModel):
• inputs=['<BatchDataset element_spec=TensorSpec(shape=(None, 100, 224, 224, 1), dtype=tf.float32, name=None)>', '<BatchDataset element_spec=TensorSpec(shape=(None, 100, 2, 95), dtype=tf.float64, name=None)>']
If it would be of any use, here's the entirety of the code.
import keras
from keras.layers import TimeDistributed, Conv2D, Dense, MaxPooling2D, Flatten, LSTM, Concatenate
from tensorflow.keras.utils import plot_model
import pickle
import tqdm
import tensorflow as tf
from varname import nameof
# constants/hyperparamete
batch_size = 1
epochs = 10
train_test_split = 0.25
with open("datasets", "rb") as fp:
datasets = pickle.load(fp)
class ArchitectureNet(keras.layers.Layer):
def __init__(self, anet_pred_vars, **kwargs):
super().__init__()
self.anet_pred_vars = anet_pred_vars
self.concat = Concatenate(axis=1)
self.dense1 = Dense(units=50, activation='relu')
self.dense2 = Dense(units=50, activation='relu')
self.anet_output = Dense(units=self.anet_pred_vars, name='Architecture')
self.stopping_node = Dense(units=1, activation='sigmoid')
def call(self, prev_output, dataset_embed):
x = self.concat([prev_output, dataset_embed])
x = self.dense1(x)
x = self.dense2(x)
anet_output = self.anet_output(x)
stop_node_output = self.stopping_node(x)
print(tf.make_ndarray(stop_node_output))
return anet_output
class StructureModel(keras.Model):
num_images = 100 # images per timestep
resolution = [224, 224]
hnet_pred_vars = 9
anet_pred_vars = 25 # the thing on my whiteboard didnt include a stopping node
alphabet = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()* ,-./:;<=>?@[\\]^_`{|}~ "
def __init__(self):
super().__init__()
self.anet_layer = ArchitectureNet(self.anet_pred_vars)
def call(self, inputs):
# CNN-RNN/CNN-LSTM for processing images and corresponding answers
# Copied VGG16 for structure
# Image processing
# shape=(timesteps,resolution,resolution,rgb channels)
images = inputs[0]
answers = inputs[1]
x = TimeDistributed(Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu'))(images)
x = TimeDistributed(Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu'))(x)
x = TimeDistributed(MaxPooling2D(pool_size=(2, 2), strides=2))(x)
filters_convs = [(128, 2), (256, 3), (512, 3), (512, 3)]
for n_filters, n_convs in filters_convs:
for _ in range(n_convs):
x = TimeDistributed(Conv2D(filters=n_filters, kernel_size=(3, 3), padding='same', activation='relu'))(x)
x = TimeDistributed(MaxPooling2D(pool_size=(2, 2), strides=2))(x)
x = TimeDistributed(Flatten())(x)
img_embed = TimeDistributed(Dense(units=1000), name='Image_Preprocessing')(x)
# Answer embedding
# Number of image-answer pairs, characters in answer, single character
x = TimeDistributed(LSTM(units=500))(answers) # All answers, shape (100, None, 95)
answer_embed = TimeDistributed(Dense(units=1000), name='Answer_Preprocessing/Embed')(x)
# Combines both models
merge = Concatenate(axis=2)([img_embed, answer_embed])
x = LSTM(units=100)(merge)
dataset_embed = Dense(units=100, activation='relu', name='Dataset_Embed')(x)
# hnet
x = Dense(units=50)(dataset_embed)
hnet_output = Dense(units=self.hnet_pred_vars, name='Hyperparameters')(x)
# anet
anet_output = self.anet_layer(dataset_embed)
return hnet_output, anet_output
def compile(self):
super().compile()
# Reserve 10,000 samples for validation
ratio = int(train_test_split * len(datasets))
val = datasets[:ratio]
train = datasets[ratio:]
if len(val) == 0: # look at me mom i'm a real programmer
raise IndexError('List \"x_val\" is empty; \"train_test_split\" is set too small')
# Prepare the training and testing datasets
def generate_tensors(data, img_or_ans): # 0 for image, 1 for ans
# technically the images aren't ragged arrays but for simplicity sake we'll keep them alll as ragged tensors
column = [i[img_or_ans] for i in data]
tensor_data = tf.ragged.constant(column)
tensor_data = tensor_data.to_tensor()
tensor_dataset = tf.data.Dataset.from_tensor_slices(tensor_data)
return tensor_dataset
train_img = generate_tensors(train, 0)
train_ans = generate_tensors(train, 1)
val_img = generate_tensors(val, 0)
val_ans = generate_tensors(val, 1)
# TODO: Test if CIFAR 100 dataset (which has variable length answers) will work
#train_dataset = tf.data.Dataset.zip((train_img, train_ans))
#train_dataset = train_dataset.batch(batch_size)
train_img_b = train_img.batch(batch_size) # b for batched
train_ans_b = train_ans.batch(batch_size)
structuremodel = StructureModel()
hnet_output, anet_output = structuremodel([train_img_b, train_ans_b])
plot_model(StructureModel, to_file='aeu.png', show_shapes=True)
"""
for epoch in tqdm.trange(epochs, desc="Epochs"):
# Iterate over the batches of the dataset.
for step, (x_batch_train, y_batch_train) in tqdm(enumerate(train_dataset), leave=False):
# Open a GradientTape to record the operations run
# during the forward pass, which enables auto-differentiation.
with tf.GradientTape() as tape:
# Run the forward pass of the layer.
# The operations that the layer applies
# to its inputs are going to be recorded
# on the GradientTape.
# Logits for this minibatch
logits = model(x_batch_train, training=True)
# Compute the loss value for this minibatch.
loss_value = los5s_fn(y_batch_train, logits)
# Use the gradient tape to automatically retrieve
# the gradients of the trainable variables with respect to the loss.
grads = tape.gradient(loss_value, model.trainable_weights)
# Run one step of gradient descent by updating
# the value of the variables to minimize the loss.
optimizer.apply_gradients(zip(grads, model.trainable_weights))
# Log every 200 batches.
if step % 200 == 0:
print(
"Training loss (for one batch) at step %d: %.4f"
% (step, float(loss_value))
)
print("Seen so far: %s samples" % ((step 1) * batch_size))
"""
CodePudding user response:
You cannot feed tf.data.Datasets
directly to keras
layers. Try this:
dataset1 = tf.data.Dataset.from_tensor_slices((tf.random.uniform((5, 100, 224, 224, 1)))).batch(1)
dataset2 = tf.data.Dataset.from_tensor_slices((tf.random.uniform((5, 100, 2, 95)))).batch(1)
structuremodel = StructureModel()
for (x1, x2) in zip(dataset1.take(1), dataset2.take(1)):
hnet_output, anet_output = structuremodel([x1, x2])
Note, however, that StructureModel
is buggy, but I'm sure you know that.