How to reuse the layers of keras functional model-CodePudding

import tensorflow as tf
import keras

def get_model():

    x1 = keras.layers.Dense(6, activation='relu',input_shape=(10,))
    x2 = keras.layers.Dense(3, activation='relu')(x1)
    output_ = keras.layers.Dense(10,acitvation='sigmoid')(x2)

    model = keras.model(inputs=[x1], outputs=[output_])

    return model

model = get_model()

model.compile(...)

chk_point = keras.callbacks.ModelCheckpoint(f'./best_model.h5', 
                            monitor='val_loss', save_best_only=True, mode='min')
model.fit(..., callbacks=[chk_point])


def new_model():
    old = '../best_model.h5' #using old model for training new model

(i am trying to do transfer learning with pretrained functional model)

now I want to get all the layers of the best_model. if possible I want to remove the last layer of my best_model. and I want to freeze all the layers of best_model i.e trainable = False. and add new layers to that model.

I am trying to do denoising autoencoder on train dataset where the input and output of the best_model.h5 is same (eg.input_shape=(100,) and output_shape=(100,)). then I am thinking of freezing all that layers and removing the last layer of that model after that adding new layers to that model. then training that model on X and y as usual

CodePudding user response：

You can do something like this:

import tensorflow as tf
import keras

def get_model():
    
    _input = tf.keras.layers.Input(shape=(10,))
    x1 = tf.keras.layers.Dense(6, activation='relu')(_input)
    x2 = tf.keras.layers.Dense(3, activation='relu')(x1)
    _output = tf.keras.layers.Dense(10, activation='sigmoid')(x2)

    model = tf.keras.Model(inputs=_input, outputs=_output)

    return model

model = get_model()

model.compile(optimizer='adam', loss='binary_crossentropy')

chk_point = keras.callbacks.ModelCheckpoint(f'./best_model.h5', 
                            monitor='val_loss', save_best_only=True, mode='min')

x_train, y_train = tf.random.normal((100, 10)), tf.random.uniform((100, 10), maxval=2)
x_valid, y_valid = tf.random.normal((100, 10)), tf.random.uniform((100, 10), maxval=2)

model.fit(x_train, y_train, validation_data=(x_valid, y_valid),callbacks=[chk_point], epochs= 10)

old_model = tf.keras.models.load_model('best_model.h5')
for layer in old_model.layers[:-1]:
    layer.trainable = False

_input = tf.keras.layers.Input(shape=(10,))
x = old_model(_input)
x1 = tf.keras.layers.Dense(6, activation='relu')(x)
x2 = tf.keras.layers.Dense(3, activation='relu')(x1)
_output = tf.keras.layers.Dense(10, activation='sigmoid')(x2)

new_model = tf.keras.Model(inputs=_input, outputs=_output)
new_model.compile(optimizer='adam', loss='binary_crossentropy')

x_train, y_train = tf.random.normal((100, 10)), tf.random.uniform((100, 10), maxval=2)
new_model.fit(x_train, y_train, epochs= 10)

Note that only the last layer in the old_model is trainable in the new_model.

CodePudding user response：

One way to do this is to define the new model, then copy the layer weights from the old model (except for the last layer) and set trainable to False. For example, let's say you want to remove the last layer and add two dense layers (this is just an example). Note that the input and output size of your current model is (10,). Also note that the first layer in the functional API is an input layer. Here is the code:

import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
import numpy as np


def get_model():

    inputs = keras.Input(shape=(10, ))
    x1 = keras.layers.Dense(6, activation='relu')(inputs)
    x2 = keras.layers.Dense(3, activation='relu')(x1)
    output_ = keras.layers.Dense(10,activation='sigmoid')(x2)

    model = keras.Model(inputs=inputs, outputs=[output_])

    return model


def get_new_model():

    inputs = keras.Input(shape=(10, ))
    x1 = keras.layers.Dense(6, activation='relu')(inputs)
    x2 = keras.layers.Dense(3, activation='relu')(x1)

    # new layers
    x3 = keras.layers.Dense(15, activation='relu')(x2)
    output_ = keras.layers.Dense(10, activation='sigmoid')(x3)

    model = keras.Model(inputs=inputs, outputs=[output_])

    return model


model = get_model()
model.compile(optimizer='adam', loss='mse')
batch_size = 16
_ = model.call(inputs=tf.random.normal(shape=(batch_size, 10)))
model.summary()

# create x data using two normal distributions with different mean
# y data is unused in auto encoder
x0 = np.random.normal(loc=0.0, size=(100, 10))
x1 = np.random.normal(loc=0.3, size=(100, 10))
x = np.concatenate((x0, x1), axis=0)
# y is unused
y0 = np.zeros((100, 10))
y1 = np.ones((100, 10))
y = np.concatenate((y0, y1), axis=0)
# split train/validation data
x_train, x_val, y_train, y_val = train_test_split(x, y, train_size=0.7) 
print(x_train.shape)
print(y_train.shape)

chk_point = keras.callbacks.ModelCheckpoint(f'./best_model.h5', 
                            monitor='val_loss', save_best_only=True, mode='min')
history = model.fit(x=x_train, y=x_train, batch_size=batch_size, epochs=100, callbacks=[chk_point], validation_data=(x_val, x_val))

# reload old model
model_old = keras.models.load_model('./best_model.h5')
model_old.summary()

# get new model
model_new = get_new_model()
model_new.compile(optimizer='adam', loss='mse')
_ = model_new.call(inputs=tf.random.normal(shape=(batch_size, 10)))
model_new.summary()

# copy the two dense layer weights and set trainable to False
# skip the first layer which is an InputLayer
for count, (layer_old, layer_new) in enumerate(zip(model_old.layers[1:3], model_new.layers[1:3])):  
  layer_new.trainable = False
  layer_new.set_weights(layer_old.get_weights())
  model_new.layers[count 1] = layer_new

model_new.summary()