Using TFDS datasets with Keras Functional API-CodePudding

I'm trying to train a neural network made with the Keras Functional API with one of the default TFDS Datasets, but I keep getting dataset related errors.

The idea is doing a model for object detection, but for the first draft I was trying to do just plain image classification (img, label). The input would be (256x256x3) images. The input layer is as follows:

img_inputs = keras.Input(shape=[256, 256, 3], name='image')

Then I'm trying to use the voc2007 dataset as available in TFDS (a very old and light version to make it faster)

(train_ds, test_ds), ds_info = tfds.load(
'voc/2007',
split=['train', 'test'],
data_dir="/content/drive/My Drive",
with_info=True)

then preprocessing the data as follows:

def resize_and_normalize_img(example):
  """Normalizes images: `uint8` -> `float32`."""
  example['image'] = tf.image.resize(example['image'], [256, 256])
  example['image'] = tf.cast(example['image'], tf.float32) / 255.
  return example

def reduce_for_classification(example):
        for key in ['image/filename', 'labels_no_difficult', 'objects']:
            example.pop(key)
        return example

train_ds_class = train_ds.map(reduce_for_classification, num_parallel_calls=tf.data.AUTOTUNE)
train_ds_class = train_ds_class.map(resize_and_normalize_img, num_parallel_calls=tf.data.AUTOTUNE)
train_ds_class = train_ds_class.cache()
train_ds_class = train_ds_class.shuffle(ds_info.splits['train'].num_examples)
train_ds_class = train_ds_class.batch(64)
train_ds_class = train_ds_class.prefetch(tf.data.AUTOTUNE)

test_ds_class = test_ds.map(reduce_for_classification, num_parallel_calls=tf.data.AUTOTUNE)
test_ds_class = test_ds_class.map(resize_and_normalize_img, num_parallel_calls=tf.data.AUTOTUNE)
test_ds_class = test_ds_class.batch(64)
test_ds_class = test_ds_class.cache()
test_ds_class = test_ds_class.prefetch(tf.data.AUTOTUNE)

And then fitting the model like:

epochs=8
history = model.fit(
  x=train_x, y =trian_y,
  validation_data=test_ds_clas,
  epochs=epochs
)

And after doing this is when I get an error saying that my model expects an input of shape [None, 256, 256, 3] but it's getting an input of shape [256, 256, 3].

I think it's an issue to do with the label. Before I got problems with the extra keys from the dictionary-like format of the data you get from tfds and tried to remove everything except the label, but now I'm still getting this and don't know how to go forward. I feel like after getting the dataset prepared with tfds it should be ready to be fed to a model, and after looking through the documentation, tutorials and stack overflow I haven't found the answer, I hope someone who comes across this can help.

Update: To give a bit more of information, this is the model I'm using:

TLDR: Image input 256x256x3, a succession of convolutions and residual blocks, and an ending with average pooling, fully connected layer, and softmax that results in a (None, 1280) tensor. Using sparse categorical cross-entropy as loss and accuracy as metric.

img_inputs = keras.Input(shape=[256, 256, 3], name='image')

# first convolution
conv_first = tf.keras.layers.Conv2D(32, kernel_size=(3, 3), padding='same', name='first_conv')
x = conv_first(img_inputs)

# Second convolution
x = tf.keras.layers.Conv2D(64, kernel_size=(3, 3), strides=2, padding='same', name='second_conv')(x)

# First residual block
res = tf.keras.layers.Conv2D(32, kernel_size=(1, 1), name='res_block1_conv1')(x)
res = tf.keras.layers.Conv2D(64, kernel_size=(3, 3), padding='same', name='res_block1_conv2')(res)
x = x   res

# Convolution after First residual block
x = tf.keras.layers.Conv2D(128, kernel_size=3, strides=2, padding='same', name='first_post_res_conv')(x)

# Second residual Block
for i in range(2):
  shortcut = x
  res = tf.keras.layers.Conv2D(64, kernel_size=1, name=f'res_block2_conv1_loop{i}')(x)
  res = tf.keras.layers.Conv2D(128, kernel_size=3, padding='same', name=f'res_block2_conv2_loop{i}')(res)

  x = res   shortcut

# Convolution after Second residual block
x = tf.keras.layers.Conv2D(256, 3, strides=2, padding='same', name='second_post_res_conv')(x)

# Third residual Block
for i in range(8):
  shortcut = x
  res = tf.keras.layers.Conv2D(128, kernel_size=1, name=f'res_block3_conv1_loop{i}')(x)
  res = tf.keras.layers.Conv2D(256, kernel_size=3, padding='same', name=f'res_block3_conv2_loop{i}')(res)

  x = res   shortcut

# Convolution after Third residual block
x = tf.keras.layers.Conv2D(512, 3, strides=2, padding='same', name='third_post_res_conv')(x)

# Fourth residual Block
for i in range(8):
  shortcut = x
  res = tf.keras.layers.Conv2D(256, kernel_size=1, name=f'res_block4_conv1_loop{i}')(x)
  res = tf.keras.layers.Conv2D(512, kernel_size=3, padding='same', name=f'res_block4_conv2_loop{i}')(res)

  x = res   shortcut

# Convolution after Fourth residual block
x = tf.keras.layers.Conv2D(1024, 3, strides=2, padding='same', name='fourth_post_res_conv')(x)

# Fifth residual Block
for i in range(4):
  shortcut = x
  res = tf.keras.layers.Conv2D(512, kernel_size=1, name=f'res_block5_conv1_loop{i}')(x)
  res = tf.keras.layers.Conv2D(1024, kernel_size=3, padding='same', name=f'res_block5_conv2_loop{i}')(res)

  x = res   shortcut

# Global avg pooling
x = tf.keras.layers.GlobalAveragePooling2D(name='average_pooling')(x)

# Fully connected layer
x = tf.keras.layers.Dense(1280, name='fully_connected_layer')(x)

# Softmax
end_result = tf.keras.layers.Softmax(name='softmax')(x)

model = tf.keras.Model(inputs=img_inputs, outputs=end_result, name="darknet53")

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

After trying the solution proposed by AloneTogether I'm getting the following errors (I tried changing the axis in the tf.one_hot() function many times and same result):

Node: 'sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits'
logits and labels must have the same first dimension, got logits shape [64,1280] and labels shape [1280]
     [[{{node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_train_function_20172]

Which seems to be related to the batching, but don't know exactly how to fix it.

The whole issue really seems related to the labels encoding, because when running that line without the tf.reduce_sum() function I get the same but with:

First element had shape [2,20] and element 1 had shape [1,20].

And if I run the same without the one-hot encoding line, I get this error:

´´´ Node: 'IteratorGetNext' Cannot batch tensors with different shapes in component 1. First element had shape [4] and element 1 had shape [1]. [[{{node IteratorGetNext}}]] [Op:__inference_train_function_18534] ´´´

CodePudding user response：

I think the problem is that each image can belong to multiple classes, so I would recommend one-hot encoding the labels. It should then work. Here is an example:

import tensorflow as tf
import tensorflow_datasets as tfds 

def resize_and_normalize_img(example):
  """Normalizes images: `uint8` -> `float32`."""
  example['image'] = tf.image.resize(example['image'], [256, 256])
  example['image'] = tf.cast(example['image'], tf.float32) / 255.
  return example['image'], example['labels']

def reduce_for_classification(example):
        for key in ['image/filename', 'labels_no_difficult', 'objects']:
            example.pop(key)
        return example

(train_ds, test_ds), ds_info = tfds.load('voc/2007', split=['train', 'test'], with_info=True)

train_ds_class = train_ds.map(reduce_for_classification, num_parallel_calls=tf.data.AUTOTUNE)
train_ds_class = train_ds_class.map(resize_and_normalize_img, num_parallel_calls=tf.data.AUTOTUNE)
train_ds_class = train_ds_class.map(lambda x, y: (x, tf.reduce_sum(tf.one_hot(y, 20, axis=-1), axis=0)))

train_ds_class = train_ds_class.cache()
train_ds_class = train_ds_class.shuffle(ds_info.splits['train'].num_examples)
train_ds_class = train_ds_class.batch(64)
train_ds_class = train_ds_class.prefetch(tf.data.AUTOTUNE)

inputs = tf.keras.layers.Input(shape=[256, 256, 3], name='image')
x = tf.keras.layers.Flatten()(inputs)
x = tf.keras.layers.Dense(50, activation='relu')(x)
outputs = tf.keras.layers.Dense(20, activation='sigmoid')(x)
model = tf.keras.Model(inputs, outputs)
model.compile(loss='binary_crossentropy', optimizer='adam')

model.fit(train_ds_class, epochs=5)

Epoch 1/5
40/40 [==============================] - 16s 124ms/step - loss: 3.0883
Epoch 2/5
40/40 [==============================] - 5s 115ms/step - loss: 0.9750
Epoch 3/5
40/40 [==============================] - 5s 115ms/step - loss: 0.4578
Epoch 4/5
40/40 [==============================] - 5s 115ms/step - loss: 0.6004
Epoch 5/5
40/40 [==============================] - 5s 115ms/step - loss: 0.3534
<keras.callbacks.History at 0x7f0e59513f50>