def _bytes_feature(value):
#Returns a bytes_list from a string / byte.
if isinstance(value, type(tf.constant(0))): # if value ist tensor
value = value.numpy() # get value of tensor
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _int64_feature(value):
#Returns an int64_list from a bool / enum / int / uint.
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def write_tfrecords(data_list, output_file):
"""
write data for nearest neighbor evaluation
"""
total_samples = 0
with tf.io.TFRecordWriter(output_file) as writer:
for image, label in tqdm(data_list):
data = {
"image": _bytes_feature(image.numpy().bytes()),
"label": _int64_feature(label)
}
out = tf.train.Example(features=tf.train.Features(feature=data))
writer.write(out.SerializeToString())
total_samples = 1
return total_samples
The image object at this line "image": _bytes_feature(image.numpy().bytes())
, is a tensor.
I used numpy bytes here but when I decode the data I found that the shape of the array is missing. I need to specify the tensor array shape. This is how numpy.bytes()
and numpy.frombuffer()
works: Convert byte array back to numpy array
Is there a better way that the array shape can be serialized as well?
CodePudding user response:
There are a few options:
tf.io.serialize_tensor()
andtf.io.parse_tensor()
for arbitrary tensors.tf.io.encode_png()
(ortf.io.encode_jpeg()
) andtf.io.decode_image()
for images.
As an example:
# generate a test image.
image = tf.random.uniform([24, 24, 3], 0.0, 1.0)
# Create a tf.train.Example.
example = tf.train.Example(features=tf.train.Features(
feature={
'image': _bytes_feature(tf.io.serialize_tensor(image)),
}))
# Serialize and deserialize example (this mirrors writing and reading from a TFRecord)
example = tf.train.Example.FromString(example.SerializeToString())
# Decode the original image.
image = tf.io.parse_tensor(
example.features.feature['image'].bytes_list.value[0], out_type=tf.float32)
In the first case, a tensor is converted to a serialized TensorProto
which maintains shape information. In the second case, shape is maintained in the PNG/JPEG header.