Task: Keras captcha ocr model training.
Problem: I am trying to print CAPTCHAS from my validation set, but doing so is causing the following error
InvalidArgumentError Traceback (most recent call last)
<ipython-input-6-df1fce607804> in <module>()
1
2 #_, ax = plt.subplots(1, 4, figsize=(10, 5))
----> 3 for batch in validation_dataset.take(1):
4 images = batch["image"]
5 labels = batch["label"]
3 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py in raise_from_not_ok_status(e, name)
7105 def raise_from_not_ok_status(e, name):
7106 e.message = (" name: " name if name is not None else "")
-> 7107 raise core._status_to_exception(e) from None # pylint: disable=protected-access
7108
7109
InvalidArgumentError: Cannot add tensor to the batch: number of elements does not match. Shapes are: [tensor]: [4], [batch]: [5] [Op:IteratorGetNext]
The code to print my output, this is what I tried:
#_, ax = plt.subplots(1, 4, figsize=(10, 5))
for batch in validation_dataset.take(1):
images = batch["image"]
labels = batch["label"]
for i in range(batch_size):
img = (images[i] * 255).numpy().astype("uint8")
label = tf.strings.reduce_join(num_to_char(labels[i])).numpy().decode("utf-8")
plt.title(label)
plt.imshow(img[:, :, 0].T, cmap="gray")
plt.show()
For this task, i tried setting batch size 1, but i want to train my model with higher batch size my batch size = 16
# Mapping integers back to original characters
num_to_char = layers.StringLookup(
vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
)
This is the code taken from tensorflow datasets docs to convert data to dataset type in tf Creating Dataset objects
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train]))
train_dataset = (
train_dataset.map(
encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE
)
.batch(batch_size)
.prefetch(buffer_size=tf.data.AUTOTUNE).repeat(10)
)
validation_dataset = tf.data.Dataset.from_tensor_slices((x_valid, y_valid]))
validation_dataset = (
validation_dataset.map(
encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE
)
.batch(batch_size)
.prefetch(buffer_size=tf.data.AUTOTUNE)
)
This code does reading the image and preprocessing the image to bring all the images to uniform shape Function encode single sample
def encode_single_sample(img_path, label):
# 1. Read image
img = tf.io.read_file(img_path)
# 2. Decode and convert to grayscale
img = tf.io.decode_png(img, channels=3)
# 3. Convert to float32 in [0, 1] range
img = tf.image.convert_image_dtype(img, tf.float32)
# 4. Resize to the desired size
img = tf.image.resize(img, [img_height, img_width])
# 5. Transpose the image because we want the time
# dimension to correspond to the width of the image.
img = tf.transpose(img, perm=[1, 0, 2])
# 6. Map the characters in label to numbers
label = char_to_num(tf.strings.unicode_split(label, input_encoding="UTF-8"))
# 7. Return a dict as our model is expecting two inputs
return {"image": img, "label": label}