I tried to build MemN2N with Tensorflow 2 but I got the error that confuse me. The error written is:
ValueError: in user code:
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1021, in train_function *
return step_function(self, iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1010, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1000, in run_step **
outputs = model.train_step(data)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 860, in train_step
loss = self.compute_loss(x, y, y_pred, sample_weight)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 919, in compute_loss
y, y_pred, sample_weight, regularization_losses=self.losses)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/compile_utils.py", line 201, in __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 143, in __call__
losses, sample_weight, reduction=self._get_reduction())
File "/usr/local/lib/python3.7/dist-packages/keras/utils/losses_utils.py", line 322, in compute_weighted_loss
losses, None, sample_weight)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/losses_utils.py", line 211, in squeeze_or_expand_dimensions
sample_weight = tf.squeeze(sample_weight, [-1])
ValueError: Can not squeeze dim[1], expected a dimension of 1, got 1400 for '{{node sparse_categorical_crossentropy/weighted_loss/Squeeze}} = Squeeze[T=DT_FLOAT, squeeze_dims=[-1]](Cast)' with input shapes: [100,1400].
What I know that it's mean my output and my label don't fit in shape but my last layer produce tensor with shape (100, 5244) that's my (batch_size, vocab_size) result of softmax and my label is (100,) array of integer with 100 as batch_size. And the only number that contains 1400 is my sentence_size in which I don't know where it come from.
The build code is :
def build_model(ontology_shape, query_shape):
ontology_input = tf.keras.layers.Input(ontology_shape, name='ontology_input')
query_input = tf.keras.layers.Input(query_shape, name='query_input')
m_embedded = Embedding(dim_input=vocab_size,
dim_output=embeddings_size,
initializer='he_uniform',
input_length=vocab_size,
name='embedding_A')(ontology_input)
c_embedded = Embedding(dim_input=vocab_size,
dim_output=embeddings_size,
initializer='he_uniform',
input_length= None,
name='embedding_B')(ontology_input)
u_embedded = Embedding(dim_input=vocab_size,
dim_output=embeddings_size,
initializer='he_uniform',
input_length= None,
name='embedding_C')(query_input)
m = Encoding(name='positional_encoding')(m_embedded)
c = Encoding(name='positional_encoding2')(c_embedded)
u = Encoding(name='positional_encoding3')(u_embedded)
memory_input = [m,u,c]
outputs = Memory(nhop=define_hop,
dim_input=vocab_size,
dim_output=embeddings_size,
initializer='he_uniform',
name='memory_layer')(memory_input)
model = Model(inputs=[ontology_input, query_input], outputs=outputs)
# Compile the model.
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate, epsilon=0.0, clipvalue=40.0),
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
model.summary()
return model
And the last layer that is memory layer is:
class Memory(Layer):
def __init__(self,
nhop,
dim_input,
dim_output,
initializer,
**kwargs):
super().__init__(**kwargs)
self.nhop = nhop
self.dim_input = dim_input
self.dim_output = dim_output
self.initializer = initializer
def build(self, input_shape):
self.W = self.add_weight(
shape=(self.dim_output, self.dim_input),
initializer=self.initializer,
name='W',
trainable=True,
experimental_autocast=False)
self.built = True
super().build(input_shape)
def call(self, inputs, mask=None):
m = inputs[0] # [batch_size, setence_size, embedding_size]
u = inputs[1] # [batch_size, query_size, embedding_size]
c = inputs[2] # [batch_size, setence_size, embedding_size]
u = tf.reduce_sum(u, 1) # [batch_size, embedding_size]
for h in range(self.nhop):
u_expand = tf.expand_dims(u, 1) # [batch_size, 1, embedding_size]
p = tf.matmul(m, tf.transpose(u_expand, (0,2,1))) # [batch_size, setence_size, embedding_size] x [batch_size, embedding_size, 1] = [batch_size, sentence_size, 1]
p = tf.reduce_sum(p, -1) # [batch_size, sentence_size]
o = tf.matmul(p,c) # [batch_size, 1 , sentence_size] x [batch_size, setence_size, embedding_size] = [batch_size, 1, embedding_size]
o = tf.reduce_sum(o, 1) # [batch_size, embedding_size]
u = tf.add(o,u) # [batch_size, embedding_size]
a = tf.matmul(u, self.W) # [batch_size, embedding_size] x [embbeding_size, vocab_size] = [batch_size, vocab_size]
a_hat = tf.nn.softmax(a) # [batch_size, vocab_size]
return a_hat
def compute_mask(self, inputs, mask=None):
return mask
Model summary:
Import Library:
import random
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn import metrics
from tensorflow.keras import Model
from tensorflow.keras.layers import TimeDistributed, Layer, Dense, Embedding
from tensorflow.keras.constraints import Constraint
from tensorflow.keras.callbacks import Callback```
Encoding Layer:
class Encoding(Layer):
def __init__(self,
**kwargs):
super().__init__(**kwargs)
def build(self, input_shape):
self.sentence_size = input_shape[-2]
self.embedding_size = input_shape[-1]
super().build(input_shape)
def call(self, inputs, mask=None):
if mask is not None:
mask = tf.cast(mask, dtype=self.dtype)
mask = tf.expand_dims(mask, axis=-1)
return tf.reduce_sum(mask * position_encoding(self.sentence_size, self.embedding_size) * inputs, axis=-2)
else:
return tf.reduce_sum((position_encoding(self.sentence_size, self.embedding_size) * inputs), axis=-2)
def compute_mask(self, inputs, mask=None):
if mask is None:
return None
return tf.reduce_any(mask, axis=-1)
def compute_output_shape(self, input_shape):
return (input_shape[0], input_shape[-1])
Positional Encoding:
def position_encoding(sentence_size, embedding_size):
"""Position Encoding.
Encodes the position of words within the sentence (implementation based on
https://arxiv.org/pdf/1503.08895.pdf [1]).
Arguments:
sentence_size: int, the size of the sentence (number of words).
embedding_size: int, the size of the word embedding.
Returns:
A encoding matrix represented by a Numpy array with shape `[sentence_size, embedding_size]`.
"""
encoding = np.ones((embedding_size, sentence_size), dtype=np.float32)
length_of_sentence = sentence_size 1
length_of_embedding = embedding_size 1
for i in range(1, length_of_embedding):
for j in range(1, length_of_sentence):
encoding[i - 1, j - 1] = (i - (embedding_size 1) / 2) * (j - (sentence_size 1) / 2)
encoding = 1 4 * encoding / embedding_size / sentence_size
return np.transpose(encoding)
CodePudding user response:
You seem to be mixing up the parameters of the Embedding
layers. Maybe try something like this:
import tensorflow as tf
import random
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn import metrics
from tensorflow.keras import Model
from tensorflow.keras.layers import TimeDistributed, Layer, Dense, Embedding
from tensorflow.keras.constraints import Constraint
from tensorflow.keras.callbacks import Callback
vocab_size = 5422
embeddings_size = 20
def build_model(ontology_shape =(1400, 3), query_shape= (1, 3)):
ontology_input = tf.keras.layers.Input(ontology_shape, name='ontology_input')
query_input = tf.keras.layers.Input(query_shape, name='query_input')
m_embedded = Embedding(input_dim=vocab_size,
output_dim=embeddings_size,
embeddings_initializer='he_uniform',
input_length=1400,
name='embedding_A')(ontology_input)
c_embedded = Embedding(input_dim=vocab_size,
output_dim=embeddings_size,
embeddings_initializer='he_uniform',
input_length= 1400,
name='embedding_B')(ontology_input)
u_embedded = Embedding(input_dim=vocab_size,
output_dim=embeddings_size,
embeddings_initializer='he_uniform',
input_length= 1,
name='embedding_C')(query_input)
m = Encoding(name='positional_encoding')(m_embedded)
c = Encoding(name='positional_encoding2')(c_embedded)
u = Encoding(name='positional_encoding3')(u_embedded)
memory_input = [m,u,c]
outputs = Memory(nhop=10,
dim_input=vocab_size,
dim_output=embeddings_size,
initializer='he_uniform',
name='memory_layer')(memory_input)
model = Model(inputs=[ontology_input, query_input], outputs=outputs)
# Compile the model.
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001, epsilon=0.0, clipvalue=40.0),
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
metrics=['accuracy'])
model.summary()
return model
model = build_model()
x1 = tf.random.uniform((500, 1400, 3), maxval=vocab_size, dtype=tf.int32)
x2 = tf.random.uniform((500, 1, 3), maxval=vocab_size, dtype=tf.int32)
y = tf.random.uniform((500, 1), maxval=vocab_size, dtype=tf.int32)
model.fit([x1, x2], y)