In dataset pipeline (.map()
) I'm using conversion from int tensor to file pattern string for make_csv_dataset(...)
.
I get an error:
ValueError: No files match `file_pattern` dataset/PAMAP2_Dataset/train/*_Tensor("strided_slice:0", shape=(), dtype=int32).csv.
Here is the error: Tensor("strided_slice:0", shape=(), dtype=int32)
- this should be an integer number not this text .....
Code
labels = [ 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 16, 17, 18, 19, 20, 24 ]
def _make_dataset(idx):
# shuffle
activityID = tf.random.shuffle(labels)
dataset = (
tf.data.experimental.make_csv_dataset(
file_pattern=("dataset/PAMAP2_Dataset/train/*_" str(activityID[idx]) ".csv"),
batch_size=1,
num_epochs=1,
shuffle=False,
).batch(64, drop_remainder=True).shuffle(64)
dataset = (
tf.data.Dataset.range(1)
.interleave(
map_func=_make_dataset,
cycle_length=tf.data.AUTOTUNE,
num_parallel_calls=tf.data.AUTOTUNE,
deterministic=False,
)
)
CodePudding user response:
There is a bug related to using tf.data.Dataset.interleave
and make_csv_dataset
. The recommendation is to use the CsvDataset
API right now. Try something like this with tf.io.matching_files
to solve file patterns:
import pandas as pd
import tensorflow as tf
labels = [ 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 16, 17, 18, 19, 20, 24 ]
data = {
"id": [420, 380, 390],
"duration": [50, 40, 45]
}
# Create dummy data
df = pd.DataFrame(data)
for i in labels:
df.to_csv('test_{}.csv'.format(i), index=False)
def get_random_path(x, idx):
return "/content/*_" str(x[idx].numpy()) ".csv"
def _make_dataset(idx):
# shuffle
activityID = tf.random.shuffle(labels)
path = tf.py_function(get_random_path, [activityID, idx], Tout=[tf.string])
dataset = tf.data.experimental.CsvDataset(
filenames=tf.io.matching_files(path), record_defaults=[tf.int32, tf.int32], header=True)
return dataset
dataset = (
tf.data.Dataset.range(1)
.interleave(_make_dataset,
cycle_length=tf.data.AUTOTUNE,
num_parallel_calls=tf.data.AUTOTUNE,
deterministic=False,
)
)
for x in dataset:
print(x)
(<tf.Tensor: shape=(), dtype=int32, numpy=420>, <tf.Tensor: shape=(), dtype=int32, numpy=50>)
(<tf.Tensor: shape=(), dtype=int32, numpy=380>, <tf.Tensor: shape=(), dtype=int32, numpy=40>)
(<tf.Tensor: shape=(), dtype=int32, numpy=390>, <tf.Tensor: shape=(), dtype=int32, numpy=45>)
For more details, check the docs.