The following code created a TFRecordDataset
from test_filenames
, and it contains 10000 records:
test_dataset = tf.data.TFRecordDataset([test_filenames])
I want to keep the first record in the test_dataset and remove all other records for testing.
Here is the dummy
code:
test_dataset = test_dataset.removeAllExceptFirst()
...
first_record = test_dataset.getItem(0)
test_dataset = test_dataset.removeAll()
test_dataset = test_dataset.add(first_record)
Is there any existing method for implementing this feature?
Here is the test for using "test_dataset.batch(1).take(1)
", it does not work as expected:
def test_function(record):
keys_to_features = {
"test1": tf.io.FixedLenFeature((), tf.string, default_value=""),
'test2': tf.io.FixedLenFeature([], tf.string),
"test3": tf.io.FixedLenFeature((), tf.string)
}
features = tf.io.parse_single_example(record, keys_to_features)
print("features: {}".format(features))
return None, None
test_dataset = tf.data.TFRecordDataset([test_filenames])
test_dataset = test_dataset.batch(1).take(1)
test_dataset = test_dataset.map(test_function)
Here is the error
:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_11776/3885954589.py in <cell line: 3>()
1 test_dataset = tf.data.TFRecordDataset([test_filenames])
2 test_dataset = test_dataset.batch(1).take(1)
----> 3 test_dataset = test_dataset.map(test_function)
/usr/local/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py in map(self, map_func, num_parallel_calls, deterministic, name)
2014 warnings.warn("The `deterministic` argument has no effect unless the "
2015 "`num_parallel_calls` argument is specified.")
-> 2016 return MapDataset(self, map_func, preserve_cardinality=True, name=name)
2017 else:
2018 return ParallelMapDataset(
/usr/local/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py in __init__(self, input_dataset, map_func, use_inter_op_parallelism, preserve_cardinality, use_legacy_function, name)
5189 self._use_inter_op_parallelism = use_inter_op_parallelism
5190 self._preserve_cardinality = preserve_cardinality
-> 5191 self._map_func = structured_function.StructuredFunctionWrapper(
5192 map_func,
5193 self._transformation_name(),
/usr/local/lib/python3.8/site-packages/tensorflow/python/data/ops/structured_function.py in __init__(self, func, transformation_name, dataset, input_classes, input_shapes, input_types, input_structure, add_to_graph, use_legacy_function, defun_kwargs)
269 fn_factory = trace_tf_function(defun_kwargs)
270
--> 271 self._function = fn_factory()
272 # There is no graph to add in eager mode.
273 add_to_graph &= not context.executing_eagerly()
/usr/local/lib/python3.8/site-packages/tensorflow/python/eager/function.py in get_concrete_function(self, *args, **kwargs)
3068 or `tf.Tensor` or `tf.TensorSpec`.
3069 """
-> 3070 graph_function = self._get_concrete_function_garbage_collected(
3071 *args, **kwargs)
3072 graph_function._garbage_collector.release() # pylint: disable=protected-access
/usr/local/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _get_concrete_function_garbage_collected(self, *args, **kwargs)
3034 args, kwargs = None, None
3035 with self._lock:
-> 3036 graph_function, _ = self._maybe_define_function(args, kwargs)
3037 seen_names = set()
3038 captured = object_identity.ObjectIdentitySet(
/usr/local/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs)
3290
3291 self._function_cache.add_call_context(cache_key.call_context)
-> 3292 graph_function = self._create_graph_function(args, kwargs)
3293 self._function_cache.add(cache_key, cache_key_deletion_observer,
3294 graph_function)
/usr/local/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3128 arg_names = base_arg_names missing_arg_names
3129 graph_function = ConcreteFunction(
-> 3130 func_graph_module.func_graph_from_py_func(
3131 self._name,
3132 self._python_function,
/usr/local/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes, acd_record_initial_resource_uses)
1159 _, original_func = tf_decorator.unwrap(python_func)
1160
-> 1161 func_outputs = python_func(*func_args, **func_kwargs)
1162
1163 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
/usr/local/lib/python3.8/site-packages/tensorflow/python/data/ops/structured_function.py in wrapped_fn(*args)
246 attributes=defun_kwargs)
247 def wrapped_fn(*args): # pylint: disable=missing-docstring
--> 248 ret = wrapper_helper(*args)
249 ret = structure.to_tensor_list(self._output_structure, ret)
250 return [ops.convert_to_tensor(t) for t in ret]
/usr/local/lib/python3.8/site-packages/tensorflow/python/data/ops/structured_function.py in wrapper_helper(*args)
175 if not _should_unpack(nested_args):
176 nested_args = (nested_args,)
--> 177 ret = autograph.tf_convert(self._func, ag_ctx)(*nested_args)
178 if _should_pack(ret):
179 ret = tuple(ret)
/usr/local/lib/python3.8/site-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args, **kwargs)
690 except Exception as e: # pylint:disable=broad-except
691 if hasattr(e, 'ag_error_metadata'):
--> 692 raise e.ag_error_metadata.to_exception(e)
693 else:
694 raise
ValueError: in user code:
File "/tmp/ipykernel_11776/3804092897.py", line 8, in test_function *
features = tf.io.parse_single_example(record, keys_to_features)
ValueError: Input serialized must be a scalar
CodePudding user response:
You need to create Dataset
first. for creating the dataset you need to change your test_function
like below then use .map()
and at the end use batch(1).take(1)
like below:
def test_function(record):
keys_to_features = {
"test1": tf.io.FixedLenFeature((), tf.string, default_value=""),
'test2': tf.io.FixedLenFeature([], tf.string),
"test3": tf.io.FixedLenFeature((), tf.string)
}
features = tf.io.parse_single_example(record, keys_to_features)
return (feature['test1'], feature['test2'])
test_dataset = tf.data.TFRecordDataset([test_filenames])
test_dataset = test_dataset.map(test_function)
test_dataset = test_dataset.batch(1).take(1)