I'm trying to build image classification model on flower image dataset using tf.data with 4 different classes. When I test trained model I'm getting same class prediction even for different class images but training goes smoothly with good training accuracy and validation accuracy and also it gives good accuracy on test datasets.
My implementation of training and testing pipelines are as below
dataset_url = "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz"
data_dir = tf.keras.utils.get_file(origin=dataset_url,
fname='flower_photos',
untar=True)
data_dir = pathlib.Path(data_dir)
data_dir = pathlib.Path(r'C:\Users\Hilary\.keras\datasets\flower_photos')
slide_labels =os.listdir(data_dir)
CLASS_NAMES = slide_labels
NUM_CLASSES = len(CLASS_NAMES)
num_examples = len(list(data_dir.glob('*/*.jpg')))
def get_label(file_path):
# convert the path to a list of path components
parts = tf.strings.split(file_path, os.path.sep)
# The second to last is the class-directory
return tf.where(parts[-2] == CLASS_NAMES)[0][0]
def decode_img(img):
# convert the compressed string to a 3D uint8 tensor
img = tf.image.decode_jpeg(img, channels=3)
return img
def process_path(file_path):
label = get_label(file_path)
# load the raw data from the file as a string
img = tf.io.read_file(file_path)
img = decode_img(img)
features = {'image': img, 'label': label}
return features
list_ds = tf.data.Dataset.list_files(str(data_dir/'*/*'))
ds = list_ds.map(process_path, num_parallel_calls=tf.data.experimental.AUTOTUNE)
print("Total number of images", len(ds))
print("total No of classes: ",NUM_CLASSES)
train_size = int(0.90 * num_examples)
val_size = int(0.09* num_examples)
test_size = int(0.01 * num_examples)
full_dataset = ds.shuffle(reshuffle_each_iteration=False, buffer_size=len(ds))
train_dataset = full_dataset.take(train_size)
test_val_dataset = full_dataset.skip(train_size)
val_dataset = test_val_dataset.take(val_size)
test_dataset = test_val_dataset.skip(val_size)
print("Number of examples on training set is ", len(train_dataset))
When I do inference on individual class image as below
for img_f in list(paths.list_images(r'C:\Users\hillary\.keras\datasets\flower_photos\sunflowers')):
img = cv2.imread(img_f)
test_img = [img]
# test_img = [np.expand_dims(img, axis=0) for img in test_img]
test_img = tf.concat(test_img, axis=0)
test_img = tf.image.resize(test_img, [128, 128])
test_img = tf.cast(image, tf.float32) / 255.0
# test_img = tf.expand_dims(image, axis = 0)
logits = model(test_img)
y_probabilities = tf.nn.softmax(logits).numpy()[0]
print(y_probabilities)
index_max_proba = np.argmax(tf.nn.softmax(logits))
print(class_labels[index_max_proba])
I get results as
[1.2498085e-03 1.7629927e-01 8.2240731e-01 3.1031032e-05 1.2520954e-05]
roses
[1.2498085e-03 1.7629927e-01 8.2240731e-01 3.1031032e-05 1.2520954e-05]
roses
[1.2498085e-03 1.7629927e-01 8.2240731e-01 3.1031032e-05 1.2520954e-05]
roses
[1.2498085e-03 1.7629927e-01 8.2240731e-01 3.1031032e-05 1.2520954e-05]
roses
[1.2498085e-03 1.7629927e-01 8.2240731e-01 3.1031032e-05 1.2520954e-05]
roses
[1.2498085e-03 1.7629927e-01 8.2240731e-01 3.1031032e-05 1.2520954e-05]
roses
[1.2498085e-03 1.7629927e-01 8.2240731e-01 3.1031032e-05 1.2520954e-05]
roses
[1.2498085e-03 1.7629927e-01 8.2240731e-01 3.1031032e-05 1.2520954e-05]
roses
[1.2498085e-03 1.7629927e-01 8.2240731e-01 3.1031032e-05 1.2520954e-05]
roses
[1.2498085e-03 1.7629927e-01 8.2240731e-01 3.1031032e-05 1.2520954e-05]
roses
[1.2498085e-03 1.7629927e-01 8.2240731e-01 3.1031032e-05 1.2520954e-05]
roses
[1.2498085e-03 1.7629927e-01 8.2240731e-01 3.1031032e-05 1.2520954e-05]
roses
[1.2498085e-03 1.7629927e-01 8.2240731e-01 3.1031032e-05 1.2520954e-05]
roses
[1.2498085e-03 1.7629927e-01 8.2240731e-01 3.1031032e-05 1.2520954e-05]
roses
[1.2498085e-03 1.7629927e-01 8.2240731e-01 3.1031032e-05 1.2520954e-05]
roses
[1.2498085e-03 1.7629927e-01 8.2240731e-01 3.1031032e-05 1.2520954e-05]
roses
[1.2498085e-03 1.7629927e-01 8.2240731e-01 3.1031032e-05 1.2520954e-05]
roses
which predicts as roses for sunflower class images same with other class images as well
I tested this pipeline for different dataset and models I was getting same results which are single class prediction for different class images ..
Any help or suggestion to rectify my mistake will be appreciated
CodePudding user response:
You're loading the file with opencv which loads the image in BGR format, while you load it with tf.io in the original pipeline. Try converting it to RGB with the following code
img = cv2.imread(img_f)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
test_img = [img]