ValueError: Can not squeeze dim[1], expected a dimension of 1, got 1400-CodePudding

I tried to build MemN2N with Tensorflow 2 but I got the error that confuse me. The error written is:

ValueError: in user code:

File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1021, in train_function  *
    return step_function(self, iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1010, in step_function  **
    outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1000, in run_step  **
    outputs = model.train_step(data)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 860, in train_step
    loss = self.compute_loss(x, y, y_pred, sample_weight)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 919, in compute_loss
    y, y_pred, sample_weight, regularization_losses=self.losses)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/compile_utils.py", line 201, in __call__
    loss_value = loss_obj(y_t, y_p, sample_weight=sw)
File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 143, in __call__
    losses, sample_weight, reduction=self._get_reduction())
File "/usr/local/lib/python3.7/dist-packages/keras/utils/losses_utils.py", line 322, in compute_weighted_loss
    losses, None, sample_weight)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/losses_utils.py", line 211, in squeeze_or_expand_dimensions
    sample_weight = tf.squeeze(sample_weight, [-1])

ValueError: Can not squeeze dim[1], expected a dimension of 1, got 1400 for '{{node sparse_categorical_crossentropy/weighted_loss/Squeeze}} = Squeeze[T=DT_FLOAT, squeeze_dims=[-1]](Cast)' with input shapes: [100,1400].

What I know that it's mean my output and my label don't fit in shape but my last layer produce tensor with shape (100, 5244) that's my (batch_size, vocab_size) result of softmax and my label is (100,) array of integer with 100 as batch_size. And the only number that contains 1400 is my sentence_size in which I don't know where it come from.

The build code is :

def build_model(ontology_shape, query_shape):
ontology_input = tf.keras.layers.Input(ontology_shape, name='ontology_input')
query_input = tf.keras.layers.Input(query_shape, name='query_input')

m_embedded = Embedding(dim_input=vocab_size,
                        dim_output=embeddings_size,
                        initializer='he_uniform',
                        input_length=vocab_size,
                        name='embedding_A')(ontology_input)
c_embedded = Embedding(dim_input=vocab_size,
                        dim_output=embeddings_size,
                        initializer='he_uniform',
                        input_length= None,
                        name='embedding_B')(ontology_input)
u_embedded = Embedding(dim_input=vocab_size,
                        dim_output=embeddings_size,
                        initializer='he_uniform',
                        input_length= None,
                        name='embedding_C')(query_input)

m = Encoding(name='positional_encoding')(m_embedded)
c = Encoding(name='positional_encoding2')(c_embedded)
u = Encoding(name='positional_encoding3')(u_embedded)

memory_input = [m,u,c]

outputs = Memory(nhop=define_hop,
                dim_input=vocab_size,
                dim_output=embeddings_size,
                initializer='he_uniform',
                name='memory_layer')(memory_input)

model = Model(inputs=[ontology_input, query_input], outputs=outputs)

# Compile the model.
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate, epsilon=0.0, clipvalue=40.0),
            loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
            metrics=['accuracy'])
model.summary()
return model

And the last layer that is memory layer is:

class Memory(Layer):

def __init__(self,
             nhop,
             dim_input,
             dim_output,
             initializer,
             **kwargs):
    super().__init__(**kwargs)
    self.nhop = nhop
    self.dim_input = dim_input
    self.dim_output = dim_output
    self.initializer = initializer

def build(self, input_shape):
    self.W = self.add_weight(
        shape=(self.dim_output, self.dim_input),
        initializer=self.initializer,
        name='W',
        trainable=True,
        experimental_autocast=False)
    self.built = True
    super().build(input_shape)

def call(self, inputs, mask=None):
    m = inputs[0]                                                               # [batch_size, setence_size, embedding_size]
    u = inputs[1]                                                               # [batch_size, query_size, embedding_size]
    c = inputs[2]                                                               # [batch_size, setence_size, embedding_size]
    u = tf.reduce_sum(u, 1)                                                     # [batch_size, embedding_size]
    for h in range(self.nhop):
        u_expand = tf.expand_dims(u, 1)                                         # [batch_size, 1, embedding_size]
        p = tf.matmul(m, tf.transpose(u_expand, (0,2,1)))                       # [batch_size, setence_size, embedding_size] x [batch_size, embedding_size, 1] = [batch_size, sentence_size, 1]
        p = tf.reduce_sum(p, -1)                                                # [batch_size, sentence_size]
        o = tf.matmul(p,c)                                                      # [batch_size, 1 , sentence_size] x [batch_size, setence_size, embedding_size] = [batch_size, 1, embedding_size]
        o = tf.reduce_sum(o, 1)                                                 # [batch_size, embedding_size]
        u = tf.add(o,u)                                                         # [batch_size, embedding_size]
    
    a = tf.matmul(u, self.W)                                                    # [batch_size, embedding_size] x [embbeding_size, vocab_size] = [batch_size, vocab_size]
    a_hat = tf.nn.softmax(a)                                                    # [batch_size, vocab_size]
    return a_hat                                  

def compute_mask(self, inputs, mask=None):
    return mask

Model summary:

Model Summary

Import Library:

import random
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn import metrics
from tensorflow.keras import Model
from tensorflow.keras.layers import TimeDistributed, Layer, Dense, Embedding
from tensorflow.keras.constraints import Constraint
from tensorflow.keras.callbacks import Callback```

Encoding Layer:

    class Encoding(Layer):
    def __init__(self,
                 **kwargs):
        super().__init__(**kwargs)

    def build(self, input_shape):
        self.sentence_size = input_shape[-2]
        self.embedding_size = input_shape[-1]
        super().build(input_shape)

    def call(self, inputs, mask=None):
        if mask is not None:
            mask = tf.cast(mask, dtype=self.dtype)
            mask = tf.expand_dims(mask, axis=-1)
            return tf.reduce_sum(mask * position_encoding(self.sentence_size, self.embedding_size) * inputs, axis=-2)
        else:
            return tf.reduce_sum((position_encoding(self.sentence_size, self.embedding_size) * inputs), axis=-2)

    def compute_mask(self, inputs, mask=None):
        if mask is None:
            return None

        return tf.reduce_any(mask, axis=-1)

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[-1])

Positional Encoding:

def position_encoding(sentence_size, embedding_size):
    """Position Encoding.
    Encodes the position of words within the sentence (implementation based on
    https://arxiv.org/pdf/1503.08895.pdf [1]).
    Arguments:
      sentence_size: int, the size of the sentence (number of words).
      embedding_size: int, the size of the word embedding.
    Returns:
        A encoding matrix represented by a Numpy array with shape `[sentence_size, embedding_size]`.
    """
    encoding = np.ones((embedding_size, sentence_size), dtype=np.float32)
    length_of_sentence = sentence_size   1
    length_of_embedding = embedding_size   1
    for i in range(1, length_of_embedding):
        for j in range(1, length_of_sentence):
            encoding[i - 1, j - 1] = (i - (embedding_size   1) / 2) * (j - (sentence_size   1) / 2)
    encoding = 1   4 * encoding / embedding_size / sentence_size
    return np.transpose(encoding)

CodePudding user response：

You seem to be mixing up the parameters of the Embedding layers. Maybe try something like this:

import tensorflow as tf
import random
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn import metrics
from tensorflow.keras import Model
from tensorflow.keras.layers import TimeDistributed, Layer, Dense, Embedding
from tensorflow.keras.constraints import Constraint
from tensorflow.keras.callbacks import Callback

vocab_size = 5422
embeddings_size = 20
def build_model(ontology_shape =(1400, 3), query_shape= (1, 3)):
  ontology_input = tf.keras.layers.Input(ontology_shape, name='ontology_input')
  query_input = tf.keras.layers.Input(query_shape, name='query_input')

  m_embedded = Embedding(input_dim=vocab_size,
                          output_dim=embeddings_size,
                          embeddings_initializer='he_uniform',
                          input_length=1400,
                          name='embedding_A')(ontology_input)
  c_embedded = Embedding(input_dim=vocab_size,
                          output_dim=embeddings_size,
                          embeddings_initializer='he_uniform',
                          input_length= 1400,
                          name='embedding_B')(ontology_input)
  u_embedded = Embedding(input_dim=vocab_size,
                          output_dim=embeddings_size,
                          embeddings_initializer='he_uniform',
                          input_length= 1,
                          name='embedding_C')(query_input)

  m = Encoding(name='positional_encoding')(m_embedded)
  c = Encoding(name='positional_encoding2')(c_embedded)
  u = Encoding(name='positional_encoding3')(u_embedded)

  memory_input = [m,u,c]

  outputs = Memory(nhop=10,
                  dim_input=vocab_size,
                  dim_output=embeddings_size,
                  initializer='he_uniform',
                  name='memory_layer')(memory_input)

  model = Model(inputs=[ontology_input, query_input], outputs=outputs)
  # Compile the model.
  model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001, epsilon=0.0, clipvalue=40.0),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
              metrics=['accuracy'])
  model.summary()
  return model

model = build_model()

x1 = tf.random.uniform((500, 1400, 3), maxval=vocab_size, dtype=tf.int32) 
x2 = tf.random.uniform((500, 1, 3), maxval=vocab_size, dtype=tf.int32)
y = tf.random.uniform((500, 1), maxval=vocab_size, dtype=tf.int32)

model.fit([x1, x2], y)