I am trying to add Attention layer to my model for text clasiffication. but I get an error after adding the layer and then fitting the model. here is my code:
model = Sequential()
for i in range(len(kernel_size)):
model.add(Conv1D(filters=nb_filter, kernel_size=kernel_size[i], padding='valid', activation='relu',
input_shape=(data_batch_size, emb_dim)))
model.add(MaxPooling1D(pool_size=pool_size))
model.add(Bidirectional(LSTM(units=lstm_out, return_sequences=True), merge_mode='concat',
input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Bidirectional(LSTM(units=lstm_out, go_backwards=True)))
# ------------------------------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------------------------
model.add(Attention(return_sequences=True))
# ------------------------------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------------------------
model.add(Dropout(DropoutP))
model.add(Dense(cat_output, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
Y_tmp = np.zeros([Y_train.shape[0], 2])
Y_tmp[:, 0] = 2 - Y_train
Y_tmp[:, 1] = Y_train - 1
Y_train = Y_tmp
history = model.fit(X_train, Y_train, validation_split=test_size, epochs=nb_epoch, verbose=1,
callbacks=[EarlyStopping(monitor='val_accuracy', patience=0, restore_best_weights=True)])
And This is the Attention class:
class Attention(Layer):
def __init__(self, return_sequences=True):
self.return_sequences = return_sequences
super(Attention, self).__init__()
def build(self, input_shape):
self.W = self.add_weight(name="att_weight", shape=(input_shape[-1], 1), initializer="normal")
self.b = self.add_weight(name="att_bias", shape=(input_shape[1], 1), initializer="zeros")
super(Attention, self).build(input_shape)
def call(self, x):
e = K.tanh(K.dot(x, self.W) self.b)
a = K.softmax(e, axis=1)
output = x * a
if self.return_sequences:
return output
return K.sum(output, axis=1)
And this is the error: Incompatible shapes: [32,2] vs. [1200,2]
What am I doing wrong?
CodePudding user response:
There is a problem with :
self.b = self.add_weight(name="att_bias", shape=(input_shape[1], 1), initializer="zeros")
which should be :
self.b = self.add_weight(name="att_bias", shape=(1,), initializer="zeros")
In fact, you are redefining the Dense layer. To see for yourself, you can look at the custom Linear
layer in layers and models via sub-classing.
The custom attention layer is actually what you want using the Dense layers and is more general (a Bahdanau attention layer).