Keras Model Subclassing TypeError: '<' not supported between instances of 'NoneTyp-CodePudding

I'm working on a project using Keras Model Subclassing in order to create a model with 2 inputs and 2 outputs. The training data for this model is essentially a dataset of other image classification datasets, with each image being paired with it's corresponding label; a dataset of datasets. One input of the network receives the label, the other receives the image.

train_img = generate_tensors(train, 0)
train_ans = generate_tensors(train, 1)
val_img = generate_tensors(val, 0)
val_ans = generate_tensors(val, 1)

train_img_b = train_img.batch(batch_size)  # b for batched
train_ans_b = train_ans.batch(batch_size)

structuremodel = StructureModel()
hnet_output, anet_output = structuremodel([train_img_b, train_ans_b])

In the above code, I'm trying to perform a single forward propagation on my custom "StructureModel" class. "train_img" and "train_ans" are of shapes (None, 100, 224, 224, 1) and [insert shape] respectively. I have set the batch_size to 1.

The model itself is defined as follows:

class StructureModel(keras.Model):
    num_images = 100  # images per timestep
    resolution = [224, 224]
    hnet_pred_vars = 9
    anet_pred_vars = 25  # the thing on my whiteboard didnt include a stopping node
    alphabet = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()* ,-./:;<=>?@[\\]^_`{|}~ "

    def __init__(self):
        super().__init__()
        self.anet_layer = ArchitectureNet(self.anet_pred_vars)

    def call(self, inputs):
        # CNN-RNN/CNN-LSTM for processing images and corresponding answers
        # Copied VGG16 for structure
        # Image processing
        # shape=(timesteps,resolution,resolution,rgb channels)
        images = inputs[0]
        answers = inputs[1]

        x = TimeDistributed(Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu'))(images)
        x = TimeDistributed(Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu'))(x)
        x = TimeDistributed(MaxPooling2D(pool_size=(2, 2), strides=2))(x)
        filters_convs = [(128, 2), (256, 3), (512, 3), (512, 3)]
        for n_filters, n_convs in filters_convs:
            for _ in range(n_convs):
                x = TimeDistributed(Conv2D(filters=n_filters, kernel_size=(3, 3), padding='same', activation='relu'))(x)
            x = TimeDistributed(MaxPooling2D(pool_size=(2, 2), strides=2))(x)
        x = TimeDistributed(Flatten())(x)
        img_embed = TimeDistributed(Dense(units=1000), name='Image_Preprocessing')(x)

        # Answer embedding
        # Number of image-answer pairs, characters in answer, single character
        x = TimeDistributed(LSTM(units=500))(answers)  # All answers, shape (100, None, 95)
        answer_embed = TimeDistributed(Dense(units=1000), name='Answer_Preprocessing/Embed')(x)

        # Combines both models
        merge = Concatenate(axis=2)([img_embed, answer_embed])
        x = LSTM(units=100)(merge)
        dataset_embed = Dense(units=100, activation='relu', name='Dataset_Embed')(x)

        # hnet
        x = Dense(units=50)(dataset_embed)
        hnet_output = Dense(units=self.hnet_pred_vars, name='Hyperparameters')(x)

        # anet
        anet_output = self.anet_layer(dataset_embed)

        return hnet_output, anet_output

There's a lot of extra fluff in it, and I'm sure there's many other errors in the model, but the main one that I care about is the TypeError that I keep receiving. Without resolving that, I can't get to debugging anything else. The error is as follows:

File ~\Documents\Programming\Python\HYPAT\NetworksV2.py:83 in call
    x = TimeDistributed(Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu'))(images)

TypeError: Exception encountered when calling layer "structure_model_7" (type StructureModel).

'<' not supported between instances of 'NoneType' and 'int'

Call arguments received by layer "structure_model_7" (type StructureModel):
  • inputs=['<BatchDataset element_spec=TensorSpec(shape=(None, 100, 224, 224, 1), dtype=tf.float32, name=None)>', '<BatchDataset element_spec=TensorSpec(shape=(None, 100, 2, 95), dtype=tf.float64, name=None)>']

If it would be of any use, here's the entirety of the code.

import keras
from keras.layers import TimeDistributed, Conv2D, Dense, MaxPooling2D, Flatten, LSTM, Concatenate
from tensorflow.keras.utils import plot_model
import pickle
import tqdm
import tensorflow as tf
from varname import nameof

# constants/hyperparamete
batch_size = 1
epochs = 10
train_test_split = 0.25

with open("datasets", "rb") as fp:
    datasets = pickle.load(fp)


class ArchitectureNet(keras.layers.Layer):
    def __init__(self, anet_pred_vars, **kwargs):
        super().__init__()
        self.anet_pred_vars = anet_pred_vars

        self.concat = Concatenate(axis=1)
        self.dense1 = Dense(units=50, activation='relu')
        self.dense2 = Dense(units=50, activation='relu')
        self.anet_output = Dense(units=self.anet_pred_vars, name='Architecture')
        self.stopping_node = Dense(units=1, activation='sigmoid')

    def call(self, prev_output, dataset_embed):
        x = self.concat([prev_output, dataset_embed])
        x = self.dense1(x)
        x = self.dense2(x)
        anet_output = self.anet_output(x)
        stop_node_output = self.stopping_node(x)
        print(tf.make_ndarray(stop_node_output))
        return anet_output


class StructureModel(keras.Model):
    num_images = 100  # images per timestep
    resolution = [224, 224]
    hnet_pred_vars = 9
    anet_pred_vars = 25  # the thing on my whiteboard didnt include a stopping node
    alphabet = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()* ,-./:;<=>?@[\\]^_`{|}~ "

    def __init__(self):
        super().__init__()
        self.anet_layer = ArchitectureNet(self.anet_pred_vars)

    def call(self, inputs):
        # CNN-RNN/CNN-LSTM for processing images and corresponding answers
        # Copied VGG16 for structure
        # Image processing
        # shape=(timesteps,resolution,resolution,rgb channels)
        images = inputs[0]
        answers = inputs[1]

        x = TimeDistributed(Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu'))(images)
        x = TimeDistributed(Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu'))(x)
        x = TimeDistributed(MaxPooling2D(pool_size=(2, 2), strides=2))(x)
        filters_convs = [(128, 2), (256, 3), (512, 3), (512, 3)]
        for n_filters, n_convs in filters_convs:
            for _ in range(n_convs):
                x = TimeDistributed(Conv2D(filters=n_filters, kernel_size=(3, 3), padding='same', activation='relu'))(x)
            x = TimeDistributed(MaxPooling2D(pool_size=(2, 2), strides=2))(x)
        x = TimeDistributed(Flatten())(x)
        img_embed = TimeDistributed(Dense(units=1000), name='Image_Preprocessing')(x)

        # Answer embedding
        # Number of image-answer pairs, characters in answer, single character
        x = TimeDistributed(LSTM(units=500))(answers)  # All answers, shape (100, None, 95)
        answer_embed = TimeDistributed(Dense(units=1000), name='Answer_Preprocessing/Embed')(x)

        # Combines both models
        merge = Concatenate(axis=2)([img_embed, answer_embed])
        x = LSTM(units=100)(merge)
        dataset_embed = Dense(units=100, activation='relu', name='Dataset_Embed')(x)

        # hnet
        x = Dense(units=50)(dataset_embed)
        hnet_output = Dense(units=self.hnet_pred_vars, name='Hyperparameters')(x)

        # anet
        anet_output = self.anet_layer(dataset_embed)

        return hnet_output, anet_output

    def compile(self):
        super().compile()


# Reserve 10,000 samples for validation
ratio = int(train_test_split * len(datasets))
val = datasets[:ratio]
train = datasets[ratio:]
if len(val) == 0:  # look at me mom i'm a real programmer
    raise IndexError('List \"x_val\" is empty; \"train_test_split\" is set too small')

# Prepare the training and testing datasets


def generate_tensors(data, img_or_ans):  # 0 for image, 1 for ans
    # technically the images aren't ragged arrays but for simplicity sake we'll keep them alll as ragged tensors
    column = [i[img_or_ans] for i in data]
    tensor_data = tf.ragged.constant(column)
    tensor_data = tensor_data.to_tensor()
    tensor_dataset = tf.data.Dataset.from_tensor_slices(tensor_data)
    return tensor_dataset


train_img = generate_tensors(train, 0)
train_ans = generate_tensors(train, 1)
val_img = generate_tensors(val, 0)
val_ans = generate_tensors(val, 1)

# TODO: Test if CIFAR 100 dataset (which has variable length answers) will work
#train_dataset = tf.data.Dataset.zip((train_img, train_ans))
#train_dataset = train_dataset.batch(batch_size)
train_img_b = train_img.batch(batch_size)  # b for batched
train_ans_b = train_ans.batch(batch_size)

structuremodel = StructureModel()
hnet_output, anet_output = structuremodel([train_img_b, train_ans_b])

plot_model(StructureModel, to_file='aeu.png', show_shapes=True)
"""
for epoch in tqdm.trange(epochs, desc="Epochs"):
    # Iterate over the batches of the dataset.
    for step, (x_batch_train, y_batch_train) in tqdm(enumerate(train_dataset), leave=False):

        # Open a GradientTape to record the operations run
        # during the forward pass, which enables auto-differentiation.
        with tf.GradientTape() as tape:

            # Run the forward pass of the layer.
            # The operations that the layer applies
            # to its inputs are going to be recorded
            # on the GradientTape.
            # Logits for this minibatch
            logits = model(x_batch_train, training=True)

            # Compute the loss value for this minibatch.
            loss_value = los5s_fn(y_batch_train, logits)

        # Use the gradient tape to automatically retrieve
        # the gradients of the trainable variables with respect to the loss.
        grads = tape.gradient(loss_value, model.trainable_weights)

        # Run one step of gradient descent by updating
        # the value of the variables to minimize the loss.
        optimizer.apply_gradients(zip(grads, model.trainable_weights))

        # Log every 200 batches.
        if step % 200 == 0:
            print(
                "Training loss (for one batch) at step %d: %.4f"
                % (step, float(loss_value))
            )
            print("Seen so far: %s samples" % ((step   1) * batch_size))
"""

CodePudding user response：

You cannot feed tf.data.Datasets directly to keras layers. Try this:

dataset1 = tf.data.Dataset.from_tensor_slices((tf.random.uniform((5, 100, 224, 224, 1)))).batch(1)
dataset2 = tf.data.Dataset.from_tensor_slices((tf.random.uniform((5, 100, 2, 95)))).batch(1)

structuremodel = StructureModel()

for (x1, x2) in zip(dataset1.take(1), dataset2.take(1)):
  hnet_output, anet_output = structuremodel([x1, x2])

Note, however, that StructureModel is buggy, but I'm sure you know that.