Keras Sequential model gives less accuracy than Functional Model though being same-CodePudding

I trained two models having same trainable parameters and same structure. But the Functional model performs better compared to Sequential model. Trying to predict a vector from a given image. The image output is from vgg16 model. excludes top layer. When compared the original vector with the predicted vector. Functional model tends to have greater similarity with original vector. Can someone explain why does this happen?

Code below -

from keras.models import Sequential
import numpy as np
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from tensorflow import keras
from numpy import random
from sklearn.metrics.pairwise import cosine_similarity

epochs=2000

x = random.random_sample((1, 4096))
y = np.array([ 0.01897711, 0.00196044, -0.0100884 , 0.08048831, 0.07945059, -0.13450155, -0.00228113, 0.30315322, -0.2170798 , 0.12462355, -0.12226178, -0.19237731, -0.14406398, 0.11556922, 0.04466464, -0.22505943, -0.07492258, -0.05925079, 0.02871693, -0.32403016, 0.16885516, -0.01677704, 0.03490563, 0.08720589, -0.03105724, -0.10850648, 0.04820024, -0.1348836 , -0.26358405, 0.08388387, 0.13177398, 0.00133367, -0.01074621, -0.01703981, 0.14912938, 0.13562258, 0.12910905, -0.02097122, -0.05823291, -0.21523051, -0.1051832 , -0.0112495 , -0.02306462, 0.30883443, 0.24211378, -0.01332151, -0.04171557, -0.07624041, 0.05742156, 0.17561561, -0.05971769, -0.22914584, -0.2354534 , -0.12413627, -0.02892042, -0.08661073, 0.14135012, -0.15514424, -0.09965582, -0.13770337, 0.09548005, 0.0925705 , -0.10030732, 0.16057852, -0.17537649, 0.23076315, -0.12471516, 0.2811343 , -0.1576465 , 0.17364068, 0.0658261 , 0.044597 , 0.27390295, -0.04520088, 0.00317772, 0.05926268, 0.06897669, -0.2579084 , -0.30417407, -0.08170868, -0.10205928, -0.14339833, -0.2291172 , 0.1584655 , -0.108877 , 0.03841971, -0.02097263, -0.00477816, -0.08784705, 0.00944081, 0.01409219, 0.1655657 , 0.09393094, 0.233216 , 0.28611556, -0.00573498, 0.1374636 , -0.19641444, 0.14472656, 0.254758 , -0.26166946, 0.30998066, 0.1026804 , -0.0578127 , -0.0882837 , -0.25514072, 0.12337176, 0.1786545 , 0.04052542, -0.17535737, -0.05401937, -0.27649277, -0.04952267, 0.08122452, 0.04374097, -0.07044917, 0.0653659 , -0.36983526, -0.02356564, -0.01144519, 0.1440273 , 0.12321867, 0.10163002, -0.13444787, -0.06148207, 0.11309719, -0.24679276, -0.04028287, -0.0930292 , -0.06392674, 0.10477038, 0.00828285, -0.11968364, -0.16145884, -0.08808196, 0.14231506, -0.02768413, -0.24046096, 0.02477906, -0.3868386 , 0.08224358, -0.30728677, -0.31634584, -0.24805053, -0.19289431, -0.04890246, -0.23479757, 0.13149938, 0.02801071, 0.12761658, 0.02897108, -0.14499697, 0.05322106, 0.06153642, -0.21517622, 0.255269 , 0.08573797, 0.09940388, -0.10590497, 0.13063994, 0.11253715, 0.15636472, -0.19782121, 0.01258014, -0.04391019, 0.16168897, -0.05669969, -0.17957021, -0.04841055, -0.00175814, -0.25425357, 0.14485207, 0.08319512, -0.20990393, 0.04344559, 0.20995931, -0.16608813, 0.28736553, 0.12240092, 0.12146739, 0.05718496, 0.01994314, 0.09686041, 0.13452487, 0.1052431 , 0.10266875, -0.01051683, 0.01536175, 0.25623122, 0.11273847, 0.06577922, -0.09992851, -0.02046986, -0.11516961, 0.12051879, 0.00518495, 0.0988002 , -0.279763 , -0.09997523, -0.04474135])
y = y.reshape(1,-1)

inputs  = Input(shape=(4096,))
decoder = Dense(256, activation="sigmoid")(inputs)
decoder = Dense(256, activation="sigmoid")(decoder)
decoder = Dense(256, activation="sigmoid")(decoder)
outputs = Dense(200, activation="sigmoid")(decoder)

functional = Model(inputs=inputs, outputs=outputs)
opt = keras.optimizers.Adam(learning_rate=0.01)
functional.compile(loss="mse", optimizer=opt)

sequen = Sequential()
sequen.add(Dense(256,input_shape=(4096,),activation="sigmoid"))
sequen.add(Dense(256,activation="sigmoid"))
sequen.add(Dense(256,activation="sigmoid"))
sequen.add(Dense(200,activation="sigmoid"))
sequen.compile(loss="mse", optimizer=opt)

functional.fit(x,y,verbose=1,validation_data=(x, y),epochs=epochs)
sequen.fit(x,y,verbose=1,validation_data=(x, y),epochs=epochs)


functional_output = cosine_similarity(functional.predict(x),y)
sequential_output = cosine_similarity(sequen.predict(x),y)
print(functional_output,sequential_output)

#Calculating cosine_similarity between both outputs. Functional api gives gives better output.
#output - array([[0.65056009]]), array([[0.19631703]])

Functional Model Structure

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_1 (InputLayer)        [(None, 4096)]            0         
                                                                 
 dense (Dense)               (None, 256)               1048832   
                                                                 
 dense_1 (Dense)             (None, 256)               65792     
                                                                 
 dense_2 (Dense)             (None, 256)               65792     
                                                                 
 dense_3 (Dense)             (None, 200)               51400     
                                                                 
=================================================================
Total params: 1,231,816
Trainable params: 1,231,816
Non-trainable params: 0
_________________________________________________________________

Sequential Model Structure

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 dense_4 (Dense)             (None, 256)               1048832   
                                                                 
 dense_5 (Dense)             (None, 256)               65792     
                                                                 
 dense_6 (Dense)             (None, 256)               65792     
                                                                 
 dense_7 (Dense)             (None, 200)               51400     
                                                                 
=================================================================
Total params: 1,231,816
Trainable params: 1,231,816
Non-trainable params: 0
_________________________________________________________________

CodePudding user response：

I think the main problem is that you use the same optimizer to train your models, and after training your first model, the optimizer already has an internal state. Using two separate optimizers seems to yield (almost) identical results:

...
opt1 = keras.optimizers.Adam(learning_rate=0.01)
opt2 = keras.optimizers.Adam(learning_rate=0.01)
...

...
[[0.65034289]] [[0.65033581]]

CodePudding user response：

The two models are initialized with different weights and biases. You can initialize the weights and biases of the model as a zeros matrix by adding the parameters, kernel_initializer=tf.keras.initializers.Zeros() and bias_initializer=tf.keras.initializers.Zeros(). And if you run this code, you will see similar results, but not identical.
As pointed out by @AloneTogether, after training your first model, the optimizer already has an internal state. So, initializing that optimizer again would fix this issue.

So, if you run this code, you will get identical results:

from keras.models import Sequential
import numpy as np
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from tensorflow import keras
import tensorflow as tf
from numpy import random
from sklearn.metrics.pairwise import cosine_similarity

epochs=200

x = random.random_sample((1, 4096))
y = np.array([ 0.01897711, 0.00196044, -0.0100884 , 0.08048831, 0.07945059, -0.13450155, -0.00228113, 0.30315322, -0.2170798 , 0.12462355, -0.12226178, -0.19237731, -0.14406398, 0.11556922, 0.04466464, -0.22505943, -0.07492258, -0.05925079, 0.02871693, -0.32403016, 0.16885516, -0.01677704, 0.03490563, 0.08720589, -0.03105724, -0.10850648, 0.04820024, -0.1348836 , -0.26358405, 0.08388387, 0.13177398, 0.00133367, -0.01074621, -0.01703981, 0.14912938, 0.13562258, 0.12910905, -0.02097122, -0.05823291, -0.21523051, -0.1051832 , -0.0112495 , -0.02306462, 0.30883443, 0.24211378, -0.01332151, -0.04171557, -0.07624041, 0.05742156, 0.17561561, -0.05971769, -0.22914584, -0.2354534 , -0.12413627, -0.02892042, -0.08661073, 0.14135012, -0.15514424, -0.09965582, -0.13770337, 0.09548005, 0.0925705 , -0.10030732, 0.16057852, -0.17537649, 0.23076315, -0.12471516, 0.2811343 , -0.1576465 , 0.17364068, 0.0658261 , 0.044597 , 0.27390295, -0.04520088, 0.00317772, 0.05926268, 0.06897669, -0.2579084 , -0.30417407, -0.08170868, -0.10205928, -0.14339833, -0.2291172 , 0.1584655 , -0.108877 , 0.03841971, -0.02097263, -0.00477816, -0.08784705, 0.00944081, 0.01409219, 0.1655657 , 0.09393094, 0.233216 , 0.28611556, -0.00573498, 0.1374636 , -0.19641444, 0.14472656, 0.254758 , -0.26166946, 0.30998066, 0.1026804 , -0.0578127 , -0.0882837 , -0.25514072, 0.12337176, 0.1786545 , 0.04052542, -0.17535737, -0.05401937, -0.27649277, -0.04952267, 0.08122452, 0.04374097, -0.07044917, 0.0653659 , -0.36983526, -0.02356564, -0.01144519, 0.1440273 , 0.12321867, 0.10163002, -0.13444787, -0.06148207, 0.11309719, -0.24679276, -0.04028287, -0.0930292 , -0.06392674, 0.10477038, 0.00828285, -0.11968364, -0.16145884, -0.08808196, 0.14231506, -0.02768413, -0.24046096, 0.02477906, -0.3868386 , 0.08224358, -0.30728677, -0.31634584, -0.24805053, -0.19289431, -0.04890246, -0.23479757, 0.13149938, 0.02801071, 0.12761658, 0.02897108, -0.14499697, 0.05322106, 0.06153642, -0.21517622, 0.255269 , 0.08573797, 0.09940388, -0.10590497, 0.13063994, 0.11253715, 0.15636472, -0.19782121, 0.01258014, -0.04391019, 0.16168897, -0.05669969, -0.17957021, -0.04841055, -0.00175814, -0.25425357, 0.14485207, 0.08319512, -0.20990393, 0.04344559, 0.20995931, -0.16608813, 0.28736553, 0.12240092, 0.12146739, 0.05718496, 0.01994314, 0.09686041, 0.13452487, 0.1052431 , 0.10266875, -0.01051683, 0.01536175, 0.25623122, 0.11273847, 0.06577922, -0.09992851, -0.02046986, -0.11516961, 0.12051879, 0.00518495, 0.0988002 , -0.279763 , -0.09997523, -0.04474135])
y = y.reshape(1,-1)

inputs  = Input(shape=(4096,))
decoder = Dense(256, activation="sigmoid", kernel_initializer=tf.keras.initializers.Zeros(), bias_initializer=tf.keras.initializers.Zeros())(inputs)
decoder = Dense(256, activation="sigmoid", kernel_initializer=tf.keras.initializers.Zeros(), bias_initializer=tf.keras.initializers.Zeros())(decoder)
decoder = Dense(256, activation="sigmoid", kernel_initializer=tf.keras.initializers.Zeros(), bias_initializer=tf.keras.initializers.Zeros())(decoder)
outputs = Dense(200, activation="sigmoid", kernel_initializer=tf.keras.initializers.Zeros(), bias_initializer=tf.keras.initializers.Zeros())(decoder)

functional = Model(inputs=inputs, outputs=outputs)
opt = keras.optimizers.Adam(learning_rate=0.01)
functional.compile(loss="mse", optimizer=opt)

sequen = Sequential()
sequen.add(Input(shape=(4096,)))
sequen.add(Dense(256,activation="sigmoid", kernel_initializer=tf.keras.initializers.Zeros(), bias_initializer=tf.keras.initializers.Zeros()))
sequen.add(Dense(256,activation="sigmoid", kernel_initializer=tf.keras.initializers.Zeros(), bias_initializer=tf.keras.initializers.Zeros()))
sequen.add(Dense(256,activation="sigmoid", kernel_initializer=tf.keras.initializers.Zeros(), bias_initializer=tf.keras.initializers.Zeros()))
sequen.add(Dense(200,activation="sigmoid", kernel_initializer=tf.keras.initializers.Zeros(), bias_initializer=tf.keras.initializers.Zeros()))

opt2 = keras.optimizers.Adam(learning_rate=0.01)
sequen.compile(loss="mse", optimizer=opt2)

functional.fit(x,y,verbose=1,validation_data=(x, y),epochs=epochs)
sequen.fit(x,y,verbose=1,validation_data=(x, y),epochs=epochs)


functional_output = cosine_similarity(functional.predict(x),y)
sequential_output = cosine_similarity(sequen.predict(x),y)
print(functional_output,sequential_output)