my CNN model claims it has 70 % accurcy but when i test it its actually 6%-CodePudding

im having an odd bug that i cant seem to debug

#import all the necessary libraries and be specific so as to avoid wasting time importing everything 
import sys
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.applications import VGG16
from tensorflow.keras import optimizers
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import sklearn.metrics as metrics
from sklearn.metrics import confusion_matrix
import seaborn as sns  
model_type = 'vgg16'


# Loading the VGG Model

vgg_model = VGG16(weights='imagenet', include_top=False, input_shape=(200,200,3))
vgg_model.trainable = False
model = tf.keras.Sequential([vgg_model,
                                 tf.keras.layers.GlobalAveragePooling2D(),
                                 tf.keras.layers.Dropout(0.1),
                                 tf.keras.layers.Dense(512, activation= "relu"),
                                 tf.keras.layers.BatchNormalization(),
                                 tf.keras.layers.Dropout(0.1),
                                 tf.keras.layers.Dense(16, activation="softmax")                                     
                                ])

datagen = ImageDataGenerator(featurewise_center=True)
#retrieve the trian data using imagedatagen 
train = datagen.flow_from_directory('/content/16_flowers/Train/',
  class_mode='categorical', color_mode= 'rgb',batch_size=64, target_size=(200, 200))
test = datagen.flow_from_directory('/content/16_flowers/Test/',
  class_mode='categorical', color_mode= 'rgb',batch_size=10, target_size=(200, 200))

base_learning_rate = 0.00005
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate),loss = tf.keras.metrics.categorical_crossentropy,metrics=['accuracy'])

history = model.fit(train,epochs = 100 , validation_data = test)

#summarise_diagnostics(history)


model.save("vgg16CNN.model")


_, acc = model.evaluate(test, steps=len(test), verbose=0)
print('> %.3f' % (acc * 100.0))

plot_cm(model)

my dataset is a dataset of 16 flowers, the training data is 70 images per class and my validation/ test data is 10 images per class. when i run the code i get approx 99% training accuracy and 76% validation accuracy with 0.07 training loss and 0.7 validation loss.

but when i take the model which is produced and i use it to make predictions on the test dataset and then compare its predictions to the true classes i get around 6.25% accuracy, can anyone tell me why this might be?

below is my code to make predictions and retrun the accuracy of those predictions

datagen = ImageDataGenerator(featurewise_center=True)
  test = datagen.flow_from_directory('/content/16_flowers/Test/',
  class_mode='categorical', color_mode= 'rgb',batch_size=10, target_size=(200, 200))

  predictions = model.predict(test)
  predicted_classes = np.argmax(predictions, axis=1)

  true_classes = test.classes
  count = 0
  for i in range(len(predicted_classes)):
    print(true_classes[i],predicted_classes[i])
    if true_classes[i] == predicted_classes[i]:
      count  =1
  print(count/160*100)

CodePudding user response：

For test in flow_from_directory set shuffle=False to preserve the order of the predictions with respect to the files. Then use

classes=list(train_gen.class_indices.keys())
count=0
predictions = model.predict(test)
for i,p in enumerate(predictions):
    index=np.argmax(p)
    predicted_class=classes[index]
    true_index=test.labels[i]
    true_class= classes[true_index]
    print('True class is ', true_class, .  '  Predicted class is ', predicted class)
    if index == true_index:
        count  =1
accuracy = count * 100/len(predictions)
print('Accuracy om Test set is ', accuracy)