Home > Blockchain >  Keras sequential and functional models show diffrent results
Keras sequential and functional models show diffrent results

Time:12-08

Keras Sequential and Functional implementation of the same model on the same dataset shows different results.

The code for the Sequential:

import random
import glob, os
import numpy as np
from sklearn.utils import shuffle
from sklearn.model_selection import KFold, StratifiedKFold
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import models
from tensorflow.keras import layers
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow import keras
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score

SEED = 42
os.environ['PYTHONHASHSEED']=str(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
random.seed(SEED)


dataset = np.load("/content/drive/MyDrive/Test_Performance/dataset.npz")
x = dataset['x']
y = dataset['y']
encoder = LabelEncoder()
encoder.fit(y)
label = encoder.transform(y)


model_1 = Sequential([

  layers.Dense(256, activation='relu'),

  layers.Dense(128, activation='relu'),

  layers.Dense(64, activation='relu'),

  layers.Dense(32, activation='relu'),

  layers.Dense(16, activation='relu'),
  layers.Dense(1)
])

def get_model_2():
    inputs1 = layers.Input(256)
    x1 = layers.Dense(units=256, activation="relu")(inputs1)
    x2 = layers.Dense(units=128, activation="relu")(x1)
    x3 = layers.Dense(units=64, activation="relu")(x2)
    x4 = layers.Dense(units=32, activation="relu")(x3)
    x5 = layers.Dense(units=16, activation="relu")(x4)
    outputs = layers.Dense(1)(x5)

    model = models.Model(inputs=inputs1, outputs=outputs, name="mymodel")
    optimizer2 = keras.optimizers.Adam(learning_rate=0.0001)
    model.compile(loss='binary_crossentropy',
                  optimizer=optimizer2,
                  metrics=['accuracy'])
    return model

print(y)
print(y.shape)
np.savez_compressed("dataset.npz",x=x, y=y)
x, y = shuffle(x, y, random_state=SEED)

# ========= Model 1 ==========
optimizer2 = keras.optimizers.Adam(learning_rate=0.0001)
model_1.compile(loss='binary_crossentropy',
             optimizer=optimizer2,
             metrics=['accuracy'])
estimator = KerasClassifier(build_fn=lambda: model_1, epochs=200, batch_size=100,verbose=1)
# =============================


kfold = StratifiedKFold(n_splits=10, random_state=SEED,shuffle=True)
results = cross_val_score(estimator, x, y, scoring='accuracy', cv=kfold, verbose=2)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

The code for functional:

import random
import glob, os
import numpy as np
from sklearn.utils import shuffle
from sklearn.model_selection import KFold, StratifiedKFold
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import models
from tensorflow.keras import layers
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow import keras
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score

SEED = 42
os.environ['PYTHONHASHSEED']=str(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
random.seed(SEED)

dataset = np.load("/content/drive/MyDrive/Test_Performance/dataset.npz")
x = dataset['x']
y = dataset['y']
encoder = LabelEncoder()
encoder.fit(y)
label = encoder.transform(y)


model_1 = Sequential([

  layers.Dense(256, activation='relu'),

  layers.Dense(128, activation='relu'),

  layers.Dense(64, activation='relu'),

  layers.Dense(32, activation='relu'),

  layers.Dense(16, activation='relu'),
  layers.Dense(1)
])

def get_model_2():
    inputs1 = layers.Input(256)
    x1 = layers.Dense(units=256, activation="relu")(inputs1)
    x2 = layers.Dense(units=128, activation="relu")(x1)
    x3 = layers.Dense(units=64, activation="relu")(x2)
    x4 = layers.Dense(units=32, activation="relu")(x3)
    x5 = layers.Dense(units=16, activation="relu")(x4)
    outputs = layers.Dense(1)(x5)

    model = models.Model(inputs=inputs1, outputs=outputs, name="mymodel")
    optimizer2 = keras.optimizers.Adam(learning_rate=0.0001)
    model.compile(loss='binary_crossentropy',
                  optimizer=optimizer2,
                  metrics=['accuracy'])
    return model

print(y)
print(y.shape)
np.savez_compressed("dataset.npz",x=x, y=y)
x, y = shuffle(x, y, random_state=SEED)

# ====== Model 2 ========
estimator = KerasClassifier(build_fn=lambda: get_model_2(), epochs=200, batch_size=100,verbose=1)
# =======================


kfold = StratifiedKFold(n_splits=10, random_state=SEED,shuffle=True)
results = cross_val_score(estimator, x, y, scoring='accuracy', cv=kfold, verbose=2)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

I already set a fixed seed number for both codes. Also, you can find the code in this google colab and here is the dataset.

The sequential results are almost always upper 0.9, while the functional is always around 0.5!

CodePudding user response:

The layers have default initialization weights that depend on the current Random number generator (RNG) state which keeps on changing with each random number generation. So, the default (initialization) weights for the layers (Dense in your case) are different for model 1 and model 2 because the RNG state is different for them. A simple way to fix is to seed the RNG just before creating the model.

Fixed sample code:

SEED = 42

x = np.random.randn(100,256)
y = np.random.randn(100,1)

def get_model1():
  os.environ['PYTHONHASHSEED']=str(SEED)
  np.random.seed(SEED)
  tf.random.set_seed(SEED)
  random.seed(SEED)

  model = Sequential([
    layers.Dense(256, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(1)
  ])

  model.compile(loss='binary_crossentropy',
                  optimizer=keras.optimizers.Adam(lr=0.0001),
                  metrics=['accuracy'])
  return model


get_model1().fit(x,y,epochs=4)
print ("--"*10)
def get_model2():
  os.environ['PYTHONHASHSEED']=str(SEED)
  np.random.seed(SEED)
  tf.random.set_seed(SEED)
  random.seed(SEED)

  inputs1 = layers.Input(256)
  x1 = layers.Dense(units=256, activation="relu")(inputs1)
  x2 = layers.Dense(units=128, activation="relu")(x1)
  x3 = layers.Dense(units=64, activation="relu")(x2)
  x4 = layers.Dense(units=32, activation="relu")(x3)
  x5 = layers.Dense(units=16, activation="relu")(x4)
  outputs = layers.Dense(1)(x5)

  model = models.Model(inputs=inputs1, outputs=outputs, name="mymodel")
  model.compile(loss='binary_crossentropy',
                optimizer=keras.optimizers.Adam(learning_rate=0.0001),
                metrics=['accuracy'])
  return model

get_model2().fit(x,y,epochs=4)

output:

Epoch 1/4
4/4 [==============================] - 1s 6ms/step - loss: 1.4076 - accuracy: 0.0000e 00
Epoch 2/4
4/4 [==============================] - 0s 9ms/step - loss: 1.2012 - accuracy: 0.0000e 00
Epoch 3/4
4/4 [==============================] - 0s 5ms/step - loss: 1.1590 - accuracy: 0.0000e 00
Epoch 4/4
4/4 [==============================] - 0s 5ms/step - loss: 1.1359 - accuracy: 0.0000e 00
--------------------
Epoch 1/4
4/4 [==============================] - 1s 6ms/step - loss: 1.4076 - accuracy: 0.0000e 00
Epoch 2/4
4/4 [==============================] - 0s 5ms/step - loss: 1.2012 - accuracy: 0.0000e 00
Epoch 3/4
4/4 [==============================] - 0s 5ms/step - loss: 1.1590 - accuracy: 0.0000e 00
Epoch 4/4
4/4 [==============================] - 0s 5ms/step - loss: 1.1359 - accuracy: 0.0000e 00

As you can see the loss of both the models is converging the same.

The reproducibility becomes tricky for GPU operations because some GPU operations are non-deterministic. However, since you are only using dense layers your runs are deterministic (and reproducible).

  • Related