No gradients provided for any variables -Custom loss function-CodePudding

I am trying to train a network with custom loss function and I am getting an error:

ValueError: No gradients provided for any variable: ['conv2d/kernel:0', 'conv2d/bias:0', 'conv2d_1/kernel:0', 'conv2d_1/bias:0', 'conv2d_2/kernel:0', 'conv2d_2/bias:0', 'conv2d_3/kernel:0', 'conv2d_3/bias:0', 'conv2d_4/kernel:0', 'conv2d_4/bias:0', 'dense/kernel:0', 'dense/bias:0', 'dense_1/kernel:0', 'dense_1/bias:0'].

The custom loss function is:

def cosine_sim_cal(self, vec1, vec2):
    vec1 = tf.convert_to_tensor([vec1])
    vec2 = tf.convert_to_tensor([vec2])
    cosine_loss = tf.keras.metrics.CosineSimilarity(axis=1)
    cosine_loss.update_state(vec1,vec2)
    return cosine_loss.result()

def triplets_loss(self, y_pred, m):
    eps = tf.keras.backend.epsilon()
    loss = 0.0
    for i in range(len(y_pred)):
        d_a_p = self.cosine_sim_cal(y_pred[i, 0, :], y_pred[i, 1, :])
        d_a_n = self.cosine_sim_cal(y_pred[i, 0, :], y_pred[i, 2, :])
        loss  = tf.math.maximum((d_a_p - d_a_n   m), eps)
    return loss

The shape of y_pred is TensorShape([180, 3, 128]) and m is a float value. The loss function is computing the loss which looks like tf.Tensor(37.054775, shape=(), dtype=float32)

My training loops is:

 model = self.model
 train_loss_list = []
 validation_loss_list = []
 train_triplet_gen_instance = Triplet_Generator(x_data=self.train_class_dict, batch=self.batch)
 val_triplet_gen_instance = Triplet_Generator(x_data=self.val_class_dict, batch=self.batch)  

 for epoch in range(self.epochs):
    total_train_loss = 0.0
    total_val_loss = 0.0
        
    for step in range(self.training_steps):
        x_train_batch = train_triplet_gen_instance.generate_batch()       
        with tf.GradientTape() as tape:
             train_logits = model(x_train_batch, training=True)
             train_loss_value = self.triplets_loss(train_logits, m)
             total_train_loss  = train_loss_value
        grads = tape.gradient(train_loss_value, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))
        if step ==0:
            print('Epoch: {}, Step: {}, training_loss:{}'.format(epoch, step, str(total_train_loss/step)))
        mean_training_loss = tf.divide(total_train_loss, self.training_steps)
        train_loss_list.append(mean_training_loss.numpy())

x_train_batch is a tuple of length 3. every element of this tuple is of shape (180, 200, 200, 3)

I am not able to figure out the bug in the code. If I change my loss function to a distance based loss function, the code works.

CodePudding user response：

I found the problem in the custom loss function. It seems that tf.keras.metrics.CosineSimilarity(axis=1) is not differentiable for which the gradients were not being calculated. For this, I tried to rewrite the function as :

def triplets_loss(self, y_pred, m):
    eps = tf.keras.backend.epsilon()
    d_a_p = tf.convert_to_tensor(list(map(lambda x, y: tf.tensordot(x,y, axes=1)/(tf.norm(x)*tf.norm(y)), y_pred[:,0,:], y_pred[:,1,:])))
    d_a_n = tf.convert_to_tensor(list(map(lambda x, y: tf.tensordot(x,y, axes=1)/(tf.norm(x)*tf.norm(y)), y_pred[:,0,:], y_pred[:,2,:])))
    loss = tf.reduce_sum(tf.math.maximum((d_a_p - d_a_n   m), eps))
    return loss

With the new loss function, I was able to continue with the training.