Add single tensor in model.fit()-CodePudding

I have a dataset made of tensors. A sample tensor looks like this:

(<tf.Tensor: shape=(1,), dtype=string, numpy=
array([b"Some text"],
      dtype=object)>, <tf.Tensor: shape=(), dtype=int64, numpy=0>)

Instead of giving as an input the whole dataset, I would like to iteratively get the tensors and input them to the model.

I tried this but I get a

IndexError: list index out of range

for element in dataset:
  model.fit(x=element)

What is the best way to achieve the desired output?

Thank you in advance!

You can find my model here:

import pandas as pd
import tensorflow as tf

df = pd.read_csv('labeled_tweets_processed.csv')

labels = df.pop('class')
dataset = tf.data.Dataset.from_tensor_slices((df, labels))

VOCAB_SIZE = 1000
encoder = tf.keras.layers.TextVectorization(
    max_tokens=VOCAB_SIZE)
encoder.adapt(dataset.map(lambda text, label: text))

BUFFER_SIZE = 2
BATCH_SIZE = 1

train_dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

model = tf.keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=64,
        # Use masking to handle the variable sequence lengths
        mask_zero=True),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])

and some of my dataset here:

(<tf.Tensor: shape=(1,), dtype=string, numpy=
array([b'text1'],
      dtype=object)>, <tf.Tensor: shape=(), dtype=int64, numpy=1>)
(<tf.Tensor: shape=(1,), dtype=string, numpy=
array([b"text2"],
      dtype=object)>, <tf.Tensor: shape=(), dtype=int64, numpy=0>)
(<tf.Tensor: shape=(1,), dtype=string, numpy=
array([b"text3"],
      dtype=object)>, <tf.Tensor: shape=(), dtype=int64, numpy=0>)

CodePudding user response：

Not too sure why you want to call model.fit in a loop but you can try something like this:

import pandas as pd
import tensorflow as tf

df = pd.DataFrame(data = {'texts': ['Some text', 'Some text', 'Some text', 'Some text', 'Some text'],
                          'class': [0, 0, 1, 1, 1]})

labels = df.pop('class')
dataset = tf.data.Dataset.from_tensor_slices((df, labels))

VOCAB_SIZE = 1000
encoder = tf.keras.layers.TextVectorization(
    max_tokens=VOCAB_SIZE)
encoder.adapt(dataset.map(lambda text, label: text))

BUFFER_SIZE = 2
BATCH_SIZE = 1

train_dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

model = tf.keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=64,
        # Use masking to handle the variable sequence lengths
        mask_zero=True),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])

for x, y in train_dataset:
  model.fit(x, y, epochs=2)