I am trying to train a network with custom loss function and I am getting an error:
ValueError: No gradients provided for any variable: ['conv2d/kernel:0', 'conv2d/bias:0', 'conv2d_1/kernel:0', 'conv2d_1/bias:0', 'conv2d_2/kernel:0', 'conv2d_2/bias:0', 'conv2d_3/kernel:0', 'conv2d_3/bias:0', 'conv2d_4/kernel:0', 'conv2d_4/bias:0', 'dense/kernel:0', 'dense/bias:0', 'dense_1/kernel:0', 'dense_1/bias:0'].
The custom loss function is:
def cosine_sim_cal(self, vec1, vec2):
vec1 = tf.convert_to_tensor([vec1])
vec2 = tf.convert_to_tensor([vec2])
cosine_loss = tf.keras.metrics.CosineSimilarity(axis=1)
cosine_loss.update_state(vec1,vec2)
return cosine_loss.result()
def triplets_loss(self, y_pred, m):
eps = tf.keras.backend.epsilon()
loss = 0.0
for i in range(len(y_pred)):
d_a_p = self.cosine_sim_cal(y_pred[i, 0, :], y_pred[i, 1, :])
d_a_n = self.cosine_sim_cal(y_pred[i, 0, :], y_pred[i, 2, :])
loss = tf.math.maximum((d_a_p - d_a_n m), eps)
return loss
The shape of y_pred is TensorShape([180, 3, 128])
and m is a float value. The loss function is computing the loss which looks like tf.Tensor(37.054775, shape=(), dtype=float32)
My training loops is:
model = self.model
train_loss_list = []
validation_loss_list = []
train_triplet_gen_instance = Triplet_Generator(x_data=self.train_class_dict, batch=self.batch)
val_triplet_gen_instance = Triplet_Generator(x_data=self.val_class_dict, batch=self.batch)
for epoch in range(self.epochs):
total_train_loss = 0.0
total_val_loss = 0.0
for step in range(self.training_steps):
x_train_batch = train_triplet_gen_instance.generate_batch()
with tf.GradientTape() as tape:
train_logits = model(x_train_batch, training=True)
train_loss_value = self.triplets_loss(train_logits, m)
total_train_loss = train_loss_value
grads = tape.gradient(train_loss_value, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_weights))
if step ==0:
print('Epoch: {}, Step: {}, training_loss:{}'.format(epoch, step, str(total_train_loss/step)))
mean_training_loss = tf.divide(total_train_loss, self.training_steps)
train_loss_list.append(mean_training_loss.numpy())
x_train_batch
is a tuple of length 3. every element of this tuple is of shape (180, 200, 200, 3)
I am not able to figure out the bug in the code. If I change my loss function to a distance based loss function, the code works.
CodePudding user response:
I found the problem in the custom loss function. It seems that tf.keras.metrics.CosineSimilarity(axis=1)
is not differentiable for which the gradients were not being calculated. For this, I tried to rewrite the function as :
def triplets_loss(self, y_pred, m):
eps = tf.keras.backend.epsilon()
d_a_p = tf.convert_to_tensor(list(map(lambda x, y: tf.tensordot(x,y, axes=1)/(tf.norm(x)*tf.norm(y)), y_pred[:,0,:], y_pred[:,1,:])))
d_a_n = tf.convert_to_tensor(list(map(lambda x, y: tf.tensordot(x,y, axes=1)/(tf.norm(x)*tf.norm(y)), y_pred[:,0,:], y_pred[:,2,:])))
loss = tf.reduce_sum(tf.math.maximum((d_a_p - d_a_n m), eps))
return loss
With the new loss function, I was able to continue with the training.