Home > Mobile >  How to fine tune the last layers of neural network in target model for transfer learning?
How to fine tune the last layers of neural network in target model for transfer learning?

Time:08-24

I am learning how transfer learning works using this data https://www.kaggle.com/competitions/santander-customer-satisfaction/data .. so this is my simple source model code in tensorflow. and I am saving this model

import pandas as pd
pd.set_option('display.max_rows', None)
import numpy  as np
from tensorflow import keras
import matplotlib.pyplot as plt
import tensorflow as tf
""" # Read in the csv data using pandas 
train  = pd.read_csv('Z:\ADwork2\python\PM/train.csv',index_col=0)
test   = pd.read_csv('Z:\ADwork2\python\PM/test.csv', index_col=0)
sample = pd.read_csv('Z:\ADwork2\python\PM/sample_submission.csv')
 """
# Read in the csv data using pandas 
train  = pd.read_csv('train.csv',index_col=0)
test   = pd.read_csv('test.csv', index_col=0)
sample = pd.read_csv('sample_submission.csv')


train.dtypes.value_counts()

train.select_dtypes(include=['int64']).nunique()

features_to_drop = train.nunique()
features_to_drop = features_to_drop.loc[features_to_drop.values==1].index
# now drop these columns from both the training and the test datasets
train = train.drop(features_to_drop,axis=1)
test  = test.drop(features_to_drop,axis=1)

train.isnull().values.any()


X = train.iloc[:,:-1]
y = train['TARGET']

y.value_counts().to_frame().T


from imblearn.over_sampling import SMOTE
X_resampled, y_resampled = SMOTE().fit_resample(X, y)


y_resampled.value_counts().to_frame().T

from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X_resampled, y_resampled, 
                                                  train_size=0.5,
                                                  test_size=0.2, 
                                                  random_state=42, 
                                                  shuffle=True)


from sklearn.preprocessing import MinMaxScaler
scaler  = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_val   = scaler.transform(X_val)
test    = scaler.transform(test)


model = keras.Sequential(
    [
        keras.layers.Dense(units=9, activation="relu", input_shape=(X_train.shape[-1],) ),
        # randomly delete 30% of the input units below
        keras.layers.Dropout(0.3),
        keras.layers.Dense(units=9, activation="relu"),
        # the output layer, with a single neuron
        keras.layers.Dense(units=1, activation="sigmoid"),
    ]
)

# save the initial weights for later
initial_weights = model.get_weights()
model.summary()

#keras.utils.plot_model(model, show_shapes=True)

learning_rate = 0.001

model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), 
              loss="binary_crossentropy", 
              metrics=keras.metrics.AUC()
             )

history = model.fit(X_train, y_train, 
          epochs=500, 
          batch_size=1000, 
          validation_data=(X_val, y_val),
          verbose=0)

from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(
    min_delta = 0.0002, # minimium amount of change to count as an improvement
    patience  = 20,     # how many epochs to wait before stopping
    restore_best_weights=True,
)

model.set_weights(initial_weights)
history = model.fit(X_train, y_train, 
          epochs=500, 
          batch_size=1000, 
          validation_data=(X_val, y_val),
          verbose=0,
          # add in our early stopping callback
          callbacks=[early_stopping]
        )

sample['TARGET'] = model.predict(test)

sample.to_csv('submission.csv',index=False)
#tf.keras.models.save_model()
model.save('modelcentral.h5')

I am saving this model and then loading this model into new python file in the target model

from pyexpat import model
import pandas as pd
pd.set_option('display.max_rows', None)
import numpy  as np
from tensorflow import keras
import matplotlib.pyplot as plt
import tensorflow as tf

import tryt
# Read in the csv data using pandas 
train  = pd.read_csv('train.csv',index_col=0)
test   = pd.read_csv('test.csv', index_col=0)
sample = pd.read_csv('sample_submission.csv')


train.dtypes.value_counts()

train.select_dtypes(include=['int64']).nunique()

features_to_drop = train.nunique()
features_to_drop = features_to_drop.loc[features_to_drop.values==1].index
# now drop these columns from both the training and the test datasets
train = train.drop(features_to_drop,axis=1)
test  = test.drop(features_to_drop,axis=1)

train.isnull().values.any()


X = train.iloc[:,:-1]
y = train['TARGET']

y.value_counts().to_frame().T


from imblearn.over_sampling import SMOTE
X_resampled, y_resampled = SMOTE().fit_resample(X, y)


y_resampled.value_counts().to_frame().T

from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X_resampled, y_resampled, 
                                                  train_size=0.5,
                                                  test_size=0.2, 
                                                  random_state=42, 
                                                  shuffle=True)


from sklearn.preprocessing import MinMaxScaler
scaler  = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_val   = scaler.transform(X_val)
test    = scaler.transform(test)

#f.keras.models.load_model()
# It can be used to reconstruct the model identically.
model = keras.models.load_model("modelcentral.h5")
model.trainable=False
#layer1.trainable = False

#inputs = keras.Input(shape=(150, 150, 3))
learning_rate = 0.001
model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), 
              loss="binary_crossentropy", 
              metrics=keras.metrics.AUC()
             )

history = model.fit(X_train, y_train, 
          epochs=500, 
          batch_size=1000, 
          validation_data=(X_val, y_val),
          verbose=0)
model.summary()

for now I am just freezing all model layers but what if I need to fine tune last layers for example I HAVE BINARY Classification in source model and what if in the target model there is multi-classification. how can I fine tune last layers? i am following this repo https://github.com/rasbt/stat453-deep-learning-ss21/blob/main/L14/5-transfer-learning-vgg16_small.ipynb to learn fine-tuning of final layers for transfer learning but this code is in pytorch and on image data .. so I am confused

model.classifier[1].requires_grad = True
model.classifier[3].requires_grad = True
#For the last layer, because the number of class labels differs compared to ImageNet, we replace the output layer with your own output layer:

model.classifier[6] = torch.nn.Linear(4096, 10)

please help and if there is any mistake in current code then guide me

CodePudding user response:

Given your source model:

import tensorflow as tf

model = tf.keras.Sequential(
    [
        tf.keras.layers.Dense(units=9, activation="relu", input_shape=(10,) ),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(units=9, activation="relu"),
        tf.keras.layers.Dense(units=1, activation="sigmoid"),
    ])

model.save('model.h5')

You can do something like this to replace your last layer with some other layer:

model = tf.keras.models.load_model("model.h5")

transfer_model = tf.keras.Sequential()

for idx, l in enumerate(model.layers):
    if idx == len(model.layers) - 1:
      transfer_model.add(tf.keras.layers.Dense(units=10, activation="softmax")) # add output layer with 10 different classes
    else: transfer_model.add(l)
    
print(transfer_model.summary())

You can decide which layers you then want to freeze or make trainable using l.trainable = True / False. You could also do this all without the for loop if you prefer:

model.layers[0].trainable = True
model.layers[2].trainable = True

outputs = tf.keras.layers.Dense(units=10, activation="softmax")(model.layers[-2].output)
transfer_model = tf.keras.Model(inputs=model.input, outputs=outputs)
  • Related