I'm trying binary classification of text with bi-lstm model but getting this error: ValueError: Dimensions must be equal, but are 2 and 64 for '{{node binary_crossentropy/mul}} = Mul[T=DT_FLOAT](binary_crossentropy/Cast, binary_crossentropy/Log)' with input shapes: [?,2], [?,64]. I am a beginner please provide some valuable solutions.
text=df['text']
label=df['label']
X = pad_sequences(X, maxlen=max_len,padding=pad_type,truncating=trunc_type)
Y = pd.get_dummies(label).values
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.20)
print(X_train.shape,Y_train.shape)
print(X_test.shape,Y_test.shape)
#model creation
model=tf.keras.Sequential([
# add an embedding layer
tf.keras.layers.Embedding(word_count, 16, input_length=max_len),
tf.keras.layers.Dropout(0.2),
# add another bi-lstm layer
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(2,return_sequences=True)),
# add a dense layer
tf.keras.layers.Dense(32, activation=tf.keras.activations.relu),
tf.keras.layers.Dense(32, activation=tf.keras.activations.relu),
tf.keras.layers.Dense(32, activation=tf.keras.activations.relu),
tf.keras.layers.Dense(32, activation=tf.keras.activations.softmax),
# add the prediction layer
tf.keras.layers.Dense(1, activation=tf.keras.activations.sigmoid),
])
model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy'])
model.summary()
history = model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs = 10, batch_size=batch_size, callbacks = [callback_func], verbose=1)
CodePudding user response:
The output dimension of the prediction layer of the binary classification should be 2:
# add the prediction layer
tf.keras.layers.Dense(2, activation=tf.keras.activations.sigmoid)
Flatten:
#model creation
model=tf.keras.Sequential([
# add an embedding layer
tf.keras.layers.Embedding(word_count, 16, input_length=max_len),
tf.keras.layers.Dropout(0.2),
# add another bi-lstm layer
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(2,return_sequences=True)),
# add flatten
tf.keras.layers.Flatten(), #<========================
# add a dense layer
tf.keras.layers.Dense(32, activation=tf.keras.activations.relu),
tf.keras.layers.Dense(32, activation=tf.keras.activations.relu),
tf.keras.layers.Dense(32, activation=tf.keras.activations.relu),
tf.keras.layers.Dense(32, activation=tf.keras.activations.softmax),
# add the prediction layer
tf.keras.layers.Dense(2, activation=tf.keras.activations.sigmoid),
])