The following model is defined in TF1, I am trying to migrate it to TF2 without using compat API.
# Define the tensorflow neural network
# 1. Input:
self.input_states = tf.placeholder(
tf.float32, shape=[None, 4, board_height, board_width])
self.input_state = tf.transpose(self.input_states, [0, 2, 3, 1])
# 2. Common Networks Layers
self.conv1 = tf.layers.conv2d(inputs=self.input_state,
filters=32, kernel_size=[3, 3],
padding="same", data_format="channels_last",
activation=tf.nn.relu)
self.conv2 = tf.layers.conv2d(inputs=self.conv1, filters=64,
kernel_size=[3, 3], padding="same",
data_format="channels_last",
activation=tf.nn.relu)
self.conv3 = tf.layers.conv2d(inputs=self.conv2, filters=128,
kernel_size=[3, 3], padding="same",
data_format="channels_last",
activation=tf.nn.relu)
# 3-1 Action Networks
self.action_conv = tf.layers.conv2d(inputs=self.conv3, filters=4,
kernel_size=[1, 1], padding="same",
data_format="channels_last",
activation=tf.nn.relu)
# Flatten the tensor
self.action_conv_flat = tf.reshape(
self.action_conv, [-1, 4 * board_height * board_width])
# 3-2 Full connected layer, the output is the log probability of moves
# on each slot on the board
self.action_fc = tf.layers.dense(inputs=self.action_conv_flat,
units=board_height * board_width,
activation=tf.nn.log_softmax)
# 4 Evaluation Networks
self.evaluation_conv = tf.layers.conv2d(inputs=self.conv3, filters=2,
kernel_size=[1, 1],
padding="same",
data_format="channels_last",
activation=tf.nn.relu)
self.evaluation_conv_flat = tf.reshape(
self.evaluation_conv, [-1, 2 * board_height * board_width])
self.evaluation_fc1 = tf.layers.dense(inputs=self.evaluation_conv_flat,
units=64, activation=tf.nn.relu)
# output the score of evaluation on current state
self.evaluation_fc2 = tf.layers.dense(inputs=self.evaluation_fc1,
units=1, activation=tf.nn.tanh)
# Define the Loss function
# 1. Label: the array containing if the game wins or not for each state
self.labels = tf.placeholder(tf.float32, shape=[None, 1])
# 2. Predictions: the array containing the evaluation score of each state
# which is self.evaluation_fc2
# 3-1. Value Loss function
self.value_loss = tf.losses.mean_squared_error(self.labels,
self.evaluation_fc2)
# 3-2. Policy Loss function
self.mcts_probs = tf.placeholder(
tf.float32, shape=[None, board_height * board_width])
self.policy_loss = tf.negative(tf.reduce_mean(
tf.reduce_sum(tf.multiply(self.mcts_probs, self.action_fc), 1)))
# 3-3. L2 penalty (regularization)
l2_penalty_beta = 1e-4
vars = tf.trainable_variables()
l2_penalty = l2_penalty_beta * tf.add_n(
[tf.nn.l2_loss(v) for v in vars if 'bias' not in v.name.lower()])
# 3-4 Add up to be the Loss function
self.loss = self.value_loss self.policy_loss l2_penalty
# Define the optimizer we use for training
self.learning_rate = tf.placeholder(tf.float32)
self.optimizer = tf.train.AdamOptimizer(
learning_rate=self.learning_rate).minimize(self.loss)
And here is my TF2 code
l2_penalty_beta = 1e-4
# Define the tensorflow neural network
# 1. Input:
self.inputs = tf.keras.Input( shape=(4, board_height, board_width), dtype=tf.dtypes.float32)
self.transposed_inputs = tf.keras.layers.Lambda( lambda x: tf.transpose(x, [0, 2, 3, 1]) )(self.inputs)
# 2. Common Networks Layers
self.conv1 = tf.keras.layers.Conv2D( name="conv1",
filters=32,
kernel_size=(3, 3),
padding="same",
data_format="channels_last",
activation=tf.keras.activations.relu,
kernel_regularizer=tf.keras.regularizers.L2(l2_penalty_beta))(self.transposed_inputs)
self.conv2 = tf.keras.layers.Conv2D( name="conv2",
filters=64,
kernel_size=(3, 3),
padding="same",
data_format="channels_last",
activation=tf.keras.activations.relu,
kernel_regularizer=tf.keras.regularizers.L2(l2_penalty_beta))(self.conv1)
self.conv3 = tf.keras.layers.Conv2D( name="conv3",
filters=128,
kernel_size=(3, 3),
padding="same",
data_format="channels_last",
activation=tf.keras.activations.relu,
kernel_regularizer=tf.keras.regularizers.L2(l2_penalty_beta))(self.conv2)
# 3-1 Action Networks
self.action_conv = tf.keras.layers.Conv2D( name="action_conv",
filters=4,
kernel_size=(1, 1),
padding="same",
data_format="channels_last",
activation=tf.keras.activations.relu,
kernel_regularizer=tf.keras.regularizers.L2(l2_penalty_beta))(self.conv3)
# flatten tensor
self.action_conv_flat = tf.keras.layers.Reshape( (-1, 4 * board_height * board_width), name="action_conv_flat" )(self.action_conv)
# 3-2 Full connected layer, the output is the log probability of moves
# on each slot on the board
self.action_fc = tf.keras.layers.Dense( board_height * board_width,
activation=tf.nn.log_softmax,
name="action_fc",
kernel_regularizer=tf.keras.regularizers.L2(l2_penalty_beta))(self.action_conv_flat)
# 4 Evaluation Networks
self.evaluation_conv = tf.keras.layers.Conv2D( name="evaluation_conv",
filters=2,
kernel_size=(1, 1),
padding="same",
data_format="channels_last",
activation=tf.keras.activations.relu,
kernel_regularizer=tf.keras.regularizers.L2(l2_penalty_beta))(self.conv3)
self.evaluation_conv_flat = tf.keras.layers.Reshape( (-1, 2 * board_height * board_width),
name="evaluation_conv_flat" )(self.evaluation_conv)
self.evaluation_fc1 = tf.keras.layers.Dense( 64,
activation=tf.keras.activations.relu,
name="evaluation_fc1",
kernel_regularizer=tf.keras.regularizers.L2(l2_penalty_beta))(self.evaluation_conv_flat)
self.evaluation_fc2 = tf.keras.layers.Dense( 1,
activation=tf.keras.activations.tanh,
name="evaluation_fc2",
kernel_regularizer=tf.keras.regularizers.L2(l2_penalty_beta))(self.evaluation_fc1)
self.outputs = tf.keras.layers.Concatenate()([self.action_fc, self.evaluation_fc2])
self.model = tf.keras.Model(inputs=self.inputs, outputs=self.outputs, name="policy_value_model")
self.model.summary()
def custom_loss(labels, predictions):
expected_act_probs, expected_value = tf.split(labels, [self.board_height*self.board_width, -1], axis=2)
pred_act_probs, pred_value = tf.split(predictions, [self.board_height*self.board_width, -1], axis=2)
value_loss = tf.losses.mean_squared_error(expected_value[0], pred_value[0])
policy_loss = tf.negative(tf.reduce_mean(
tf.reduce_sum(tf.multiply(expected_act_probs, pred_act_probs), 1)))
return value_loss policy_loss
#print(tf.autograph.to_code(custom_loss))
self.model.compile(optimizer=tf.keras.optimizers.Adam(),
loss=tf.function(custom_loss),
metrics=['accuracy'])
Here is summary of this model.
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) [(None, 4, 15, 15)] 0 []
lambda (Lambda) (None, 15, 15, 4) 0 ['input_1[0][0]']
conv1 (Conv2D) (None, 15, 15, 32) 1184 ['lambda[0][0]']
conv2 (Conv2D) (None, 15, 15, 64) 18496 ['conv1[0][0]']
conv3 (Conv2D) (None, 15, 15, 128) 73856 ['conv2[0][0]']
evaluation_conv (Conv2D) (None, 15, 15, 2) 258 ['conv3[0][0]']
action_conv (Conv2D) (None, 15, 15, 4) 516 ['conv3[0][0]']
evaluation_conv_flat (Reshape) (None, 1, 450) 0 ['evaluation_conv[0][0]']
action_conv_flat (Reshape) (None, 1, 900) 0 ['action_conv[0][0]']
evaluation_fc1 (Dense) (None, 1, 64) 28864 ['evaluation_conv_flat[0][0]']
action_fc (Dense) (None, 1, 225) 202725 ['action_conv_flat[0][0]']
evaluation_fc2 (Dense) (None, 1, 1) 65 ['evaluation_fc1[0][0]']
concatenate (Concatenate) (None, 1, 226) 0 ['action_fc[0][0]',
'evaluation_fc2[0][0]']
==================================================================================================
As you can notice, the TF1 model outputs two tensors (action_fc
and evaluation_fc2
). And in TF2 model I added a concatenate layer in the last to combine them into a single tensor so that I can have a single loss function on them.
The loss function in TF1 model sum up three parts. The l2_penalty
is the L2 lost of all weights in the model.
l2_penalty_beta = 1e-4
vars = tf.trainable_variables()
l2_penalty = l2_penalty_beta * tf.add_n(
[tf.nn.l2_loss(v) for v in vars if 'bias' not in v.name.lower()])
self.loss = self.value_loss self.policy_loss l2_penalty
In TF2 model, all trainable layers are added kernel_regularizer=tf.keras.regularizers.L2(l2_penalty_beta)
. Is that the same as TF1 model? Or do I make some mistake?
The TF2 model will be executed in an environment without Python interpreter, that is to say the model will be compiled into graph. I guess my loss function has to be stateless -- it must not rely on any varialble out of the function scope.
UPDATE:
Here is the training logs from the fress TF2 model. The loss
looks abnormal. It is too small at the start. The loss
is greater than 4.0 in TF1 model. Will the L2 regularizer be part of loss in TF2?
batch i:2, episode_len:113
1/1 - 0s - loss: 0.7342 - accuracy: 0.0000e 00 - 18ms/epoch - 18ms/step
1/1 - 0s - loss: 0.4714 - accuracy: 0.0000e 00 - 12ms/epoch - 12ms/step
1/1 - 0s - loss: 0.1785 - accuracy: 0.0000e 00 - 14ms/epoch - 14ms/step
1/1 - 0s - loss: 0.0829 - accuracy: 0.0000e 00 - 30ms/epoch - 30ms/step
1/1 - 0s - loss: 0.0743 - accuracy: 0.0000e 00 - 13ms/epoch - 13ms/step
kl:0.00178,lr_multiplier:2.250,loss:[0.07430928945541382]
batch i:3, episode_len:92
1/1 - 0s - loss: 0.0764 - accuracy: 1.0000 - 19ms/epoch - 19ms/step
1/1 - 0s - loss: 0.0718 - accuracy: 1.0000 - 28ms/epoch - 28ms/step
1/1 - 0s - loss: 0.0705 - accuracy: 1.0000 - 12ms/epoch - 12ms/step
1/1 - 0s - loss: 0.0693 - accuracy: 1.0000 - 12ms/epoch - 12ms/step
1/1 - 0s - loss: 0.0681 - accuracy: 1.0000 - 34ms/epoch - 34ms/step
kl:0.00068,lr_multiplier:3.375,loss:[0.06813239306211472]
batch i:4, episode_len:118
1/1 - 0s - loss: 0.0676 - accuracy: 1.0000 - 16ms/epoch - 16ms/step
1/1 - 0s - loss: 0.0665 - accuracy: 1.0000 - 12ms/epoch - 12ms/step
1/1 - 0s - loss: 0.0654 - accuracy: 1.0000 - 12ms/epoch - 12ms/step
1/1 - 0s - loss: 0.0643 - accuracy: 1.0000 - 15ms/epoch - 15ms/step
1/1 - 0s - loss: 0.0631 - accuracy: 1.0000 - 11ms/epoch - 11ms/step
kl:0.00425,lr_multiplier:5.062,loss:[0.06307009607553482]
batch i:5, episode_len:84
1/1 - 0s - loss: 4.0628 - accuracy: 0.0000e 00 - 17ms/epoch - 17ms/step
1/1 - 0s - loss: 4.0618 - accuracy: 0.0000e 00 - 12ms/epoch - 12ms/step
1/1 - 0s - loss: 4.0606 - accuracy: 0.0000e 00 - 16ms/epoch - 16ms/step
1/1 - 0s - loss: 4.0592 - accuracy: 0.0000e 00 - 11ms/epoch - 11ms/step
1/1 - 0s - loss: 4.0577 - accuracy: 0.0000e 00 - 13ms/epoch - 13ms/step
kl:0.07804,lr_multiplier:3.375,loss:[4.057666778564453]
batch i:6, episode_len:96
1/1 - 0s - loss: 0.0599 - accuracy: 1.0000 - 17ms/epoch - 17ms/step
1/1 - 0s - loss: 0.0589 - accuracy: 1.0000 - 11ms/epoch - 11ms/step
1/1 - 0s - loss: 0.0579 - accuracy: 1.0000 - 12ms/epoch - 12ms/step
1/1 - 0s - loss: 0.0568 - accuracy: 1.0000 - 31ms/epoch - 31ms/step
1/1 - 0s - loss: 0.0556 - accuracy: 1.0000 - 11ms/epoch - 11ms/step
kl:0.06898,lr_multiplier:2.250,loss:[0.055556993931531906]
batch i:7, episode_len:62
1/1 - 0s - loss: 0.0577 - accuracy: 1.0000 - 28ms/epoch - 28ms/step
1/1 - 0s - loss: 0.0569 - accuracy: 1.0000 - 11ms/epoch - 11ms/step
1/1 - 0s - loss: 0.0559 - accuracy: 1.0000 - 14ms/epoch - 14ms/step
1/1 - 0s - loss: 0.0549 - accuracy: 1.0000 - 16ms/epoch - 16ms/step
1/1 - 0s - loss: 0.0538 - accuracy: 1.0000 - 19ms/epoch - 19ms/step
kl:0.03346,lr_multiplier:2.250,loss:[0.05379907414317131]
batch i:8, episode_len:118
1/1 - 0s - loss: 0.0552 - accuracy: 1.0000 - 28ms/epoch - 28ms/step
1/1 - 0s - loss: 0.0543 - accuracy: 1.0000 - 48ms/epoch - 48ms/step
1/1 - 0s - loss: 0.0532 - accuracy: 1.0000 - 12ms/epoch - 12ms/step
1/1 - 0s - loss: 0.0521 - accuracy: 1.0000 - 11ms/epoch - 11ms/step
1/1 - 0s - loss: 0.0510 - accuracy: 1.0000 - 11ms/epoch - 11ms/step
kl:0.04336,lr_multiplier:1.500,loss:[0.051018256694078445]
CodePudding user response:
Yes, I think that's the right way to add L2 regularization to the weights (but not the biases) in TF2
My only thought is that you don't have to concatenate the outputs tensors in TF2, you can define
self.outputs = [self.action_fc, self.evaluation_fc2]
if that's easier. (Then also specify your losses as a length 2 list etc.).