Training Accuracy Stuck at 0.50 --CodePudding

I have been working on a comparison of the CNN and RNN deep learning models for sentimental analysis.

I built the CNN following this guide: https://machinelearningmastery.com/develop-word-embedding-model-predicting-movie-review-sentiment/ , and I got an accuracy of 90 in CNN.

However, when I tried to recreate a LSTM model, the accuracy seems to hover around 0.5 -, and doesnt seems to improve over time. I wonder what is wrong with my codes I the only thing I have done is to replace the existing CNN model with LSTM in the model.add section. I have tried to change the loss from "binary" to "categorical", and different activation function. It still doesn't resolve the issue.

CNN RESULTS LSTM RESULTS

This is my CNN model which worked fine

 # load the vocabulary

    vocab_filename = 'vocab.txt'
    vocab = load_doc(vocab_filename)
    vocab = vocab.split()
    vocab = set(vocab)

    # load all training reviews

    positive_docs = process_docs('C:/Users/zenhu/OneDrive/Desktop/Final Year Project/archive/review_polarity/txt_sentoken/pos', vocab, True)
    negative_docs = process_docs('C:/Users/zenhu/OneDrive/Desktop/Final Year Project/archive/review_polarity/txt_sentoken/neg', vocab, True)
    train_docs = negative_docs   positive_docs

    # create the tokenizer

    tokenizer = Tokenizer()

    # fit the tokenizer on the documents

    tokenizer.fit_on_texts(train_docs)

    # sequence encode

    encoded_docs = tokenizer.texts_to_sequences(train_docs)

    # pad sequences

    max_length = max([len(s.split()) for s in train_docs])
    Xtrain = pad_sequences(encoded_docs, maxlen=max_length, padding='post')

    # define training labels, TRAINING DATASET

    ytrain = array([0 for _ in range(900)]   [1 for _ in range(900)])

    # load all test reviews

    positive_docs = process_docs('C:/Users/zenhu/OneDrive/Desktop/Final Year Project/archive/review_polarity/txt_sentoken/pos', vocab, False)
    negative_docs = process_docs('C:/Users/zenhu/OneDrive/Desktop/Final Year Project/archive/review_polarity/txt_sentoken/neg', vocab, False)
    test_docs = negative_docs   positive_docs

    # sequence encode

    encoded_docs = tokenizer.texts_to_sequences(test_docs)

    # pad sequences

    Xtest = pad_sequences(encoded_docs, maxlen=max_length, padding='post')

    # define test labels, TESTING DATASET

    ytest = array([0 for _ in range(100)]   [1 for _ in range(100)])

    # define vocabulary size (largest integer value)

    vocab_size = len(tokenizer.word_index)   1
    print(vocab_size)

    # define model

    model = Sequential()
    model.add(Embedding(vocab_size, 100, input_length=max_length))
    model.add(Conv1D(filters=32, kernel_size=8, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    print(model.summary())

    # compile network

    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    # fit network

    history=model.fit(Xtrain, ytrain, epochs=10, verbose=2)

    # evaluate

    loss, acc = model.evaluate(Xtest, ytest, verbose=0)
    print('Test Accuracy: %f' % (acc*100))
    print(history.history.keys())

#summarize history for accuracy

    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

# summarize history for loss

    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

This is my current LSTM model

    # load the vocabulary

    vocab_filename = 'vocab.txt'
    vocab = load_doc(vocab_filename)
    vocab = vocab.split()
    vocab = set(vocab)

    # load all training reviews

    positive_docs = process_docs('C:/Users/zenhu/OneDrive/Desktop/Final Year Project/archive/review_polarity/txt_sentoken/pos', vocab, True)
    negative_docs = process_docs('C:/Users/zenhu/OneDrive/Desktop/Final Year Project/archive/review_polarity/txt_sentoken/neg', vocab, True)
    train_docs = negative_docs   positive_docs

    # create the tokenizer

    tokenizer = Tokenizer()

    # fit the tokenizer on the documents

    tokenizer.fit_on_texts(train_docs)

    # sequence encode

    encoded_docs = tokenizer.texts_to_sequences(train_docs)

    # pad sequences

    max_length = max([len(s.split()) for s in train_docs])
    Xtrain = pad_sequences(encoded_docs, maxlen=max_length, padding='post')

    # define training labels, TRAINING DATASET

    ytrain = array([0 for _ in range(900)]   [1 for _ in range(900)])

    # load all test reviews

    positive_docs = process_docs('C:/Users/zenhu/OneDrive/Desktop/Final Year Project/archive/review_polarity/txt_sentoken/pos', vocab, False)
    negative_docs = process_docs('C:/Users/zenhu/OneDrive/Desktop/Final Year Project/archive/review_polarity/txt_sentoken/neg', vocab, False)
    test_docs = negative_docs   positive_docs

    # sequence encode

    encoded_docs = tokenizer.texts_to_sequences(test_docs)

    # pad sequences

    Xtest = pad_sequences(encoded_docs, maxlen=max_length, padding='post')

    # define test labels, TESTING DATASET

    ytest = array([0 for _ in range(100)]   [1 for _ in range(100)])

    # define vocabulary size (largest integer value)

    vocab_size = len(tokenizer.word_index)   1

    # define model

    model = Sequential()
    model.add(Embedding(vocab_size, 100, input_length=max_length))
    model.add(LSTM(units=100))
    model.add(Dense(10))
    model.add(Dense(1, activation='sigmoid'))
    print(model.summary())

    # compile network

    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    # fit network

    history=model.fit(Xtrain, ytrain, epochs=10, verbose=2)

    # evaluate

    loss, acc = model.evaluate(Xtest, ytest, verbose=0)
    print('Test Accuracy: %f' % (acc*100))
    
    print(history.history.keys())`

#summarize history for accuracy

    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

# summarize history for loss

    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

CodePudding user response：

The problem is in your LSTM layer. It is not returning a sequence of the same length. You must set return_sequences=True when stacking layer so that the second layer has a three-dimensional sequence input. After adding return_sequences = True parameter in your LSTM layer, it will give you around 90% accuracy for sure.

model.add(LSTM(units=100, return_sequences = True))