I am learning how transfer learning works using this data https://www.kaggle.com/competitions/santander-customer-satisfaction/data .. so this is my simple source model code in tensorflow. and I am saving this model
import pandas as pd
pd.set_option('display.max_rows', None)
import numpy as np
from tensorflow import keras
import matplotlib.pyplot as plt
import tensorflow as tf
""" # Read in the csv data using pandas
train = pd.read_csv('Z:\ADwork2\python\PM/train.csv',index_col=0)
test = pd.read_csv('Z:\ADwork2\python\PM/test.csv', index_col=0)
sample = pd.read_csv('Z:\ADwork2\python\PM/sample_submission.csv')
"""
# Read in the csv data using pandas
train = pd.read_csv('train.csv',index_col=0)
test = pd.read_csv('test.csv', index_col=0)
sample = pd.read_csv('sample_submission.csv')
train.dtypes.value_counts()
train.select_dtypes(include=['int64']).nunique()
features_to_drop = train.nunique()
features_to_drop = features_to_drop.loc[features_to_drop.values==1].index
# now drop these columns from both the training and the test datasets
train = train.drop(features_to_drop,axis=1)
test = test.drop(features_to_drop,axis=1)
train.isnull().values.any()
X = train.iloc[:,:-1]
y = train['TARGET']
y.value_counts().to_frame().T
from imblearn.over_sampling import SMOTE
X_resampled, y_resampled = SMOTE().fit_resample(X, y)
y_resampled.value_counts().to_frame().T
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X_resampled, y_resampled,
train_size=0.5,
test_size=0.2,
random_state=42,
shuffle=True)
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
test = scaler.transform(test)
model = keras.Sequential(
[
keras.layers.Dense(units=9, activation="relu", input_shape=(X_train.shape[-1],) ),
# randomly delete 30% of the input units below
keras.layers.Dropout(0.3),
keras.layers.Dense(units=9, activation="relu"),
# the output layer, with a single neuron
keras.layers.Dense(units=1, activation="sigmoid"),
]
)
# save the initial weights for later
initial_weights = model.get_weights()
model.summary()
#keras.utils.plot_model(model, show_shapes=True)
learning_rate = 0.001
model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
loss="binary_crossentropy",
metrics=keras.metrics.AUC()
)
history = model.fit(X_train, y_train,
epochs=500,
batch_size=1000,
validation_data=(X_val, y_val),
verbose=0)
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(
min_delta = 0.0002, # minimium amount of change to count as an improvement
patience = 20, # how many epochs to wait before stopping
restore_best_weights=True,
)
model.set_weights(initial_weights)
history = model.fit(X_train, y_train,
epochs=500,
batch_size=1000,
validation_data=(X_val, y_val),
verbose=0,
# add in our early stopping callback
callbacks=[early_stopping]
)
sample['TARGET'] = model.predict(test)
sample.to_csv('submission.csv',index=False)
#tf.keras.models.save_model()
model.save('modelcentral.h5')
I am saving this model and then loading this model into new python file in the target model
from pyexpat import model
import pandas as pd
pd.set_option('display.max_rows', None)
import numpy as np
from tensorflow import keras
import matplotlib.pyplot as plt
import tensorflow as tf
import tryt
# Read in the csv data using pandas
train = pd.read_csv('train.csv',index_col=0)
test = pd.read_csv('test.csv', index_col=0)
sample = pd.read_csv('sample_submission.csv')
train.dtypes.value_counts()
train.select_dtypes(include=['int64']).nunique()
features_to_drop = train.nunique()
features_to_drop = features_to_drop.loc[features_to_drop.values==1].index
# now drop these columns from both the training and the test datasets
train = train.drop(features_to_drop,axis=1)
test = test.drop(features_to_drop,axis=1)
train.isnull().values.any()
X = train.iloc[:,:-1]
y = train['TARGET']
y.value_counts().to_frame().T
from imblearn.over_sampling import SMOTE
X_resampled, y_resampled = SMOTE().fit_resample(X, y)
y_resampled.value_counts().to_frame().T
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X_resampled, y_resampled,
train_size=0.5,
test_size=0.2,
random_state=42,
shuffle=True)
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
test = scaler.transform(test)
#f.keras.models.load_model()
# It can be used to reconstruct the model identically.
model = keras.models.load_model("modelcentral.h5")
model.trainable=False
#layer1.trainable = False
#inputs = keras.Input(shape=(150, 150, 3))
learning_rate = 0.001
model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
loss="binary_crossentropy",
metrics=keras.metrics.AUC()
)
history = model.fit(X_train, y_train,
epochs=500,
batch_size=1000,
validation_data=(X_val, y_val),
verbose=0)
model.summary()
for now I am just freezing all model layers but what if I need to fine tune last layers for example I HAVE BINARY Classification in source model and what if in the target model there is multi-classification. how can I fine tune last layers? i am following this repo https://github.com/rasbt/stat453-deep-learning-ss21/blob/main/L14/5-transfer-learning-vgg16_small.ipynb to learn fine-tuning of final layers for transfer learning but this code is in pytorch and on image data .. so I am confused
model.classifier[1].requires_grad = True
model.classifier[3].requires_grad = True
#For the last layer, because the number of class labels differs compared to ImageNet, we replace the output layer with your own output layer:
model.classifier[6] = torch.nn.Linear(4096, 10)
please help and if there is any mistake in current code then guide me
CodePudding user response:
Given your source model:
import tensorflow as tf
model = tf.keras.Sequential(
[
tf.keras.layers.Dense(units=9, activation="relu", input_shape=(10,) ),
tf.keras.layers.Dropout(0.3),
tf.keras.layers.Dense(units=9, activation="relu"),
tf.keras.layers.Dense(units=1, activation="sigmoid"),
])
model.save('model.h5')
You can do something like this to replace your last layer with some other layer:
model = tf.keras.models.load_model("model.h5")
transfer_model = tf.keras.Sequential()
for idx, l in enumerate(model.layers):
if idx == len(model.layers) - 1:
transfer_model.add(tf.keras.layers.Dense(units=10, activation="softmax")) # add output layer with 10 different classes
else: transfer_model.add(l)
print(transfer_model.summary())
You can decide which layers you then want to freeze or make trainable using l.trainable = True / False
. You could also do this all without the for loop if you prefer:
model.layers[0].trainable = True
model.layers[2].trainable = True
outputs = tf.keras.layers.Dense(units=10, activation="softmax")(model.layers[-2].output)
transfer_model = tf.keras.Model(inputs=model.input, outputs=outputs)