Keras hub text layer, with a sequence of documents to LSTM-CodePudding

I would like to use a sequence of documents to predict a target label:

['some text here', 'some more text here'] --> label

Initially my text sequences are of a fixed length, which I am getting to work before trying to use a padded length. The architecture is as follows:

Input -> HubLayer -> LSTM -> Dense

The following code starts running and then fails:

hub_model = 'https://tfhub.dev/google/nnlm-en-dim50/2'
hub_layer = hub.KerasLayer(hub_model, input_shape=(), dtype='string', trainable=False)

def build_model():
    inputs = tf.keras.Input(shape=(), dtype='string')
    inputs_1d = tf.reshape(inputs, [-1])
    x = hub_layer(inputs_1d)
    x = tf.reshape(x, [BATCH_SIZE,2, 50])
    x = tf.keras.layers.LSTM(32, activation='relu')(x)
    outputs = tf.keras.layers.Dense(y.shape[1], activation='sigmoid')(x)
    return tf.keras.Model(inputs, outputs)

The problem is with how to pass a sequence to the keras hub layer (I believe).

Error:

2021-11-02 19:34:34.360697: W tensorflow/core/framework/op_kernel.cc:1680] Invalid argument: required broadcastable shapes
---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
/tmp/ipykernel_9371/20784351.py in <module>
----> 1 history = model.fit(train, epochs=2, validation_data=test)

/opt/conda/lib/python3.7/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
   1182                 _r=1):
   1183               callbacks.on_train_batch_begin(step)
-> 1184               tmp_logs = self.train_function(iterator)
   1185               if data_handler.should_sync:
   1186                 context.async_wait()

/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
    883 
    884       with OptionalXlaContext(self._jit_compile):
--> 885         result = self._call(*args, **kwds)
    886 
    887       new_tracing_count = self.experimental_get_tracing_count()

/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
    915       # In this case we have created variables on the first call, so we run the
    916       # defunned version which is guaranteed to never create variables.
--> 917       return self._stateless_fn(*args, **kwds)  # pylint: disable=not-callable
    918     elif self._stateful_fn is not None:
    919       # Release the lock early so that multiple threads can perform the call

/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in __call__(self, *args, **kwargs)
   3038        filtered_flat_args) = self._maybe_define_function(args, kwargs)
   3039     return graph_function._call_flat(
-> 3040         filtered_flat_args, captured_inputs=graph_function.captured_inputs)  # pylint: disable=protected-access
   3041 
   3042   @property

/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
   1962       # No tape is watching; skip to running the function.
   1963       return self._build_call_outputs(self._inference_function.call(
-> 1964           ctx, args, cancellation_manager=cancellation_manager))
   1965     forward_backward = self._select_forward_and_backward_functions(
   1966         args,

/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in call(self, ctx, args, cancellation_manager)
    594               inputs=args,
    595               attrs=attrs,
--> 596               ctx=ctx)
    597         else:
    598           outputs = execute.execute_with_cancellation(

/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
     58     ctx.ensure_initialized()
     59     tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 60                                         inputs, attrs, num_outputs)
     61   except core._NotOkStatusException as e:
     62     if name is not None:

InvalidArgumentError: 2 root error(s) found.
  (0) Invalid argument:  required broadcastable shapes
     [[node gradient_tape/binary_crossentropy/logistic_loss/mul/Mul (defined at tmp/ipykernel_9371/484917154.py:1) ]]
  (1) Invalid argument:  required broadcastable shapes
     [[node gradient_tape/binary_crossentropy/logistic_loss/mul/Mul (defined at tmp/ipykernel_9371/484917154.py:1) ]]
     [[model_1/keras_layer_1/StatefulPartitionedCall/StatefulPartitionedCall/StatefulPartitionedCall/tokenize/StringSplit/StringSplit/_23]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_4634]

Function call stack:
train_function -> train_function

CodePudding user response：

You just need to make sure that you provide both your sentences and labels during training and that both your input and output shapes are correct. Here is a simple, working example where the input contains two sentences and a corresponding label:

import tensorflow_hub as hub
import tensorflow as tf

hub_model = 'https://tfhub.dev/google/nnlm-en-dim50/2'
hub_layer = hub.KerasLayer(hub_model, input_shape=(), dtype='string', trainable=False)

def build_model():
    inputs = tf.keras.Input(shape=(2,), dtype='string')
    inputs_1d = tf.reshape(inputs, [-1])
    x = hub_layer(inputs_1d)
    x = tf.reshape(x, [BATCH_SIZE, 2, 50])
    x = tf.keras.layers.LSTM(32, activation='relu')(x)
    outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)
    return tf.keras.Model(inputs, outputs)

BATCH_SIZE = 2

sentences = tf.constant([
                         [
"Improve the physical fitness of your goldfish by getting him a bicycle",
"You are unsure whether or not to trust him but very thankful that you wore a turtle neck"],
["Not all people who wander are lost", 
"There is a reason that roses have thorns"],
["Charles ate the french fries knowing they would be his last meal",
"He hated that he loved what she hated about hate"],
["Charles ate the french fries knowing they would be his last meal",
"He hated that he loved what she hated about hate"],
["Charles ate the french fries knowing they would be his last meal",
"He hated that he loved what she hated about hate"],
["Charles ate the french fries knowing they would be his last meal",
"He hated that he loved what she hated about hate"]
])

labels = tf.random.uniform((6, ), minval=0, maxval=2, dtype=tf.dtypes.int32)

model = build_model()
model.compile(optimizer='adam',
              loss=tf.losses.BinaryCrossentropy())

train_dataset = tf.data.Dataset.from_tensor_slices(
        (sentences, labels)).shuffle(
        sentences.shape[0]).batch(
        BATCH_SIZE)
    
model.fit(x=train_dataset, epochs=2)

Epoch 1/2
3/3 [==============================] - 1s 8ms/step - loss: 0.6965
Epoch 2/2
3/3 [==============================] - 0s 6ms/step - loss: 0.6916
<keras.callbacks.History at 0x7fe851c4a090>

Your LSTM requires the input shape (timesteps, features), so you need to make your x tensor of that shape. And your first reshape is necessary, because the hub_layer takes a batch of sentences in a 1-D tensor of strings as input.