Is it possible to Crop/Resize images per batch ?
I'm using Tensorflow dataset API as below:
dataset = dataset.shuffle().repeat().batch(batch_size, drop_remainder=True)
I want, within the batch all the images should have the same size. However across the batches it can have different sizes.
For example, 1st batch has all the images of shape (batch_size, 300, 300, 3). Next batch can have images of shape (batch_size, 224, 224, 3). Another batch can have images of shape (batch_size, 400, 400, 3).
Basically I want to have dymanically shaped batches, however all the images within the batch have static shapes.
If we do as follow:
dataset = dataset.shuffle().repeat().batch(batch_size, drop_remainder=True).map(lambda x, y: map_fn(x, y))
Does the above .map() applies to each batch separately or over the entire dataset ?
If above .map() doesn't apply to each batch separately, how can we do this ? Can we define any iterator after dataset.batch(), apply tf.image.crop_and_resize() over each image per batch and later use dataset.concatenate() to combine all transformed batches ?
I'm creating the dataset as below:
# Dataset creation (read image data from files of COCO dataset)
dataset = tf.data.Dataset.list_files(self._file_pattern, shuffle=False)
dataset = dataset.shard(dataset_num_shards, dataset_shard_index)
dataset = dataset.shuffle(tf.cast(256 / dataset_num_shards, tf.int64))
dataset = dataset.interleave(map_func=tf.data.TFRecordDataset(filename).prefetch(1), cycle_length=32, block_length=1, num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.map(tf_example_decoder.TfExampleDecoder().decode, num_parallel_calls=64)
dataset = dataset.shuffle(64).repeat()
# Parse each image for preprocessing
dataset = dataset.map(lambda data, _: _parse_example(data), num_parallel_calls=64)
dataset = dataset.batch(batch_size=batch_size, drop_remainder=True)
# Below code suggested by you to resize images to fixed shape in each batch
def resize_data(images, labels):
tf.print('Original shape -->', tf.shape(images))
SIZE = (300, 300)
return tf.image.resize(images, SIZE), labels
dataset = dataset.map(resize_data)
dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
tf.estimator.Estimator(...).train(
input_fn=dataset,
steps=steps,
hooks=train_hooks)
CodePudding user response:
You could try something like this:
import tensorflow as tf
import numpy as np
dataset1 = tf.data.Dataset.from_tensor_slices(np.random.random((32, 300, 300, 3)))
dataset2 = tf.data.Dataset.from_tensor_slices(np.random.random((32, 224, 224, 3)))
dataset3 = tf.data.Dataset.from_tensor_slices(np.random.random((32, 400, 400, 3)))
dataset = dataset1.concatenate(dataset2.concatenate(dataset3))
dataset = dataset.shuffle(1).repeat().batch(32, drop_remainder=True)
def resize_data(images):
tf.print('Original shape -->', tf.shape(images))
SIZE = (180, 180)
return tf.image.resize(images, SIZE)
dataset = dataset.map(resize_data)
for images in dataset.take(3):
tf.print('New shape -->', tf.shape(images))
Original shape --> [32 300 300 3]
New shape --> [32 180 180 3]
Original shape --> [32 224 224 3]
New shape --> [32 180 180 3]
Original shape --> [32 400 400 3]
New shape --> [32 180 180 3]
You could also use tf.image.resize_with_crop_or_pad
if you want:
def resize_data(images):
tf.print('Original shape -->', tf.shape(images))
SIZE = (180, 180)
return tf.image.resize_with_crop_or_pad(images, SIZE[0], SIZE[1])
dataset = dataset.map(resize_data)
for images in dataset.take(3):
tf.print('New shape -->', tf.shape(images))
Note that using repeat()
will create an infinite dataset.
Update: If you want a random size for each batch, try something like this:
import tensorflow as tf
import numpy as np
dataset1 = tf.data.Dataset.from_tensor_slices(np.random.random((32, 300, 300, 3)))
dataset2 = tf.data.Dataset.from_tensor_slices(np.random.random((32, 224, 224, 3)))
dataset3 = tf.data.Dataset.from_tensor_slices(np.random.random((32, 400, 400, 3)))
dataset = dataset1.concatenate(dataset2.concatenate(dataset3))
dataset = dataset.batch(32, drop_remainder=True).shuffle(96)
def resize_data(images):
batch_size = tf.shape(images)[0]
images_resized = tf.TensorArray(dtype=tf.float32, size = 0, dynamic_size=True)
SIZE = tf.random.uniform((2,), minval=300, maxval=500, dtype=tf.int32)
for i in range(batch_size):
images_resized = images_resized.write(images_resized.size(), tf.image.resize(images[i], SIZE))
return images_resized.stack()
dataset = dataset.map(resize_data)
for images in dataset:
tf.print('New shape -->', tf.shape(images))
New shape --> [32 392 385 3]
New shape --> [32 468 459 3]
New shape --> [32 466 461 3]