Home > Mobile >  ValueError: Exception encountered when calling layer "dense" (type Dense)
ValueError: Exception encountered when calling layer "dense" (type Dense)

Time:01-23

I get this error when I try to train my model via transfer learning a model I already trained:

ValueError: Exception encountered when calling layer "dense" (type Dense).

Dimensions must be equal, but are 100352 and 2048 for '{{node dense/MatMul}} = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false](Placeholder, dense/MatMul/ReadVariableOp)' with input shapes: [?,100352], [2048,256].

Call arguments received by layer "dense" (type Dense): inputs=tf.Tensor(shape=(None, 100352), dtype=float32)

This is my code for the model:

import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import matplotlib as plt
import tensorflow as tf
from tensorflow import keras
from keras import layers
from sklearn.model_selection import train_test_split
import pickle

#loading data
IMG_SIZE = 225
pickle_in = open("X.pickle", "rb")
X = pickle.load(pickle_in)
pickle_in = open("Y.pickle", "rb")
Y = pickle.load(pickle_in)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=42)
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.20, random_state=42)

#normalize
X_train /= 255.
X_test /= 255.

#loading VGG16
VGG16 = keras.models.load_model("VGG16.h5", compile=False, custom_objects = {"sin": tf.math.sin})

#model
model = keras.Sequential()
model.add(keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3)))
for idx, layer in enumerate(VGG16.layers[0:-1]):
    model.add(layer)
    layer._name = "VGG16"   str(idx)
for layer in model.layers[1:]:
    layer.trainable = False
model.add(layers.Flatten())
model.add(layers.Conv2D(64, kernel_size=(3,3), padding="SAME", activation=tf.math.sin))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2,2), padding="SAME"), strides=2)
model.add(layers.Conv2D(32, kernel_size=(3,3), padding="SAME", activation=tf.math.sin))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2,2), padding="SAME"), strides=2)
model.add(layers.Flatten())
model.add(layers.Dense(1, activation="sigmoid"))
print(model.summary())

#training model
loss = keras.losses.BinaryCrossentropy()
optim = keras.optimizers.Adam(learning_rate=0.001)
model.compile(loss=loss, optimizer=optim, metrics=["accuracy"])
model.fit(X_train, Y_train, epochs=15, validation_data=(X_val, Y_val))
model.evaluate(X_test, Y_test, verbose=2)

And this is my code for the model I already trained:

import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import tensorflow as tf
from tensorflow import keras
from tensorflow import math
from keras import layers
from keras.datasets import cifar10

def main():
    #loading data
    (X_train, Y_train), (X_test, Y_test) = keras.datasets.cifar10.load_data()
    X_test, X_train = X_test.astype("float32") / 255., X_train.astype("float32") / 255.
    Y_train, Y_test = keras.utils.to_categorical(Y_train, 10), keras.utils.to_categorical(Y_test, 10)

    #VGG16 model with SIREN
    model = keras.Sequential()
    model.add(layers.Conv2D(64, (3, 3), padding='same', kernel_initializer="he_uniform", activation=tf.math.sin, input_shape=(32,32,1)))
    model.add(layers.Conv2D(64, (3, 3), padding='same', activation=math.sin))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
    model.add(layers.Conv2D(128, (3, 3), padding='same', activation=math.sin))
    model.add(layers.Conv2D(128, (3, 3), padding='same', activation=math.sin))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
    model.add(layers.Conv2D(256, (3, 3), padding='same', activation=math.sin))
    model.add(layers.Conv2D(256, (3, 3), padding='same', activation=math.sin))
    model.add(layers.Conv2D(256, (3, 3), padding='same', activation=math.sin))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
    model.add(layers.Conv2D(512, (3, 3), padding='same', activation=math.sin))
    model.add(layers.Conv2D(512, (3, 3), padding='same', activation=math.sin))
    model.add(layers.Conv2D(512, (3, 3), padding='same', activation=math.sin))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
    model.add(layers.Flatten())
    model.add(layers.Dense(256, activation=math.sin))
    model.add(layers.Dense(128, activation=math.sin))
    model.add(layers.Dense(10, activation="softmax"))

    #training model
    lr = 0.0001
    loss = keras.losses.CategoricalCrossentropy()
    decayed_lr = tf.keras.optimizers.schedules.ExponentialDecay(lr, 10000, 0.85, True)
    optim = keras.optimizers.Adam(decayed_lr)
    model.compile(loss=loss, optimizer=optim, metrics=["accuracy"])
    checkpoint_filepath = '/Users/JamesRONewton/Documents/Programming/MachineLearning/SIREN_projects/BrainTumor/checkpoint.hdf5'
    checkpoint = keras.callbacks.ModelCheckpoint(filepath = checkpoint_filepath, monitor='accuracy', verbose=2, save_best_only=True, save_weights_only=True, mode='auto', save_freq ="epoch")
    try:
        model.load_weights(checkpoint_filepath)
    except Exception as e:
        pass
    model.fit(X_train, Y_train, batch_size=128, epochs=25, callbacks = [checkpoint])
    model.evaluate(X_test, Y_test, verbose=2)

    #saving model
    model.save("VGG16.h5")

if __name__ == '__main__':
    main()

I'm using different sized inputs for the two different models, but I've read that shouldn't be a problem as long as the strides are the same. I'm not 100% sure the strides are the same, but I think they are.

CodePudding user response:

Because if you want to make transfer learning with different input shape, you can use conv2d layers, but not Dense layers. if you do VGG16.summary() you will see last 4 layers are not conv. So first mistake is for idx, layer in enumerate(VGG16.layers[0:-1]): should be for idx, layer in enumerate(VGG16.layers[0:-4]):

After adding vgg16 layers, you are adding some conv layers too. Before that you should not use flatten.Also some strides props are not in the right place. If we fix all errors it becomes:

model = keras.Sequential()
model.add(keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3)))
for idx, layer in enumerate(VGG16.layers[0:-4]):
    model.add(layer)
    layer._name = "VGG16"   str(idx)
for layer in model.layers[1:]:
    layer.trainable = False
#model.add(layers.Flatten())
model.add(layers.Conv2D(64, kernel_size=(3,3), padding="SAME", activation=tf.math.sin))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2,2), padding="SAME",strides=2))
model.add(layers.Conv2D(32, kernel_size=(3,3), padding="SAME", activation=tf.math.sin))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2,2), padding="SAME", strides=2))
model.add(layers.Flatten())
model.add(layers.Dense(1, activation="sigmoid"))
print(model.summary())
  • Related