No gradients provided for loss with custom gradient-CodePudding

I'm trying to define a custom gradient for an argmax that I need to call in a loss function. However, with the following code it seems that the gradient computed for the model's weights is None.

Error raised during .fit(..):

ValueError: No gradients provided for any variable: (['dense/kernel:0', 'dense/bias:0'],). Provided `grads_and_vars` is ((None, <tf.Variable 'dense/kernel:0' shape=(10, 10) dtype=float32>), (None, <tf.Variable 'dense/bias:0' shape=(10,) dtype=float32>)).

Code:

import numpy as np
import tensorflow as tf
keras = tf.keras


@tf.custom_gradient
def dummy_argmax(x):
  assert len(x.shape) == 2
  max_index = tf.cast(tf.argmax(x, axis=1), tf.int32)

  @tf.function
  def dummy_grad(dy):
    return tf.ones_like(x) # Just to reproduce the error

  return max_index, dummy_grad


def loss(y_true, y_pred):
  return tf.abs(tf.cast(dummy_argmax(y_pred), tf.float32) - y_true)


train_size = 100
input_dim = 10
label = 7

train_x = np.random.rand(train_size, input_dim)
train_y = np.ones(train_size) * label

dummy_model = keras.Sequential()
dummy_model.add(keras.Input((input_dim,)))
dummy_model.add(keras.layers.Dense(input_dim))
dummy_model.add(keras.layers.Softmax())

dummy_model.compile(optimizer="Adam", loss=loss)
dummy_model.fit(x=train_x, y=train_y, epochs=10)

Is this the correct way to define and use a custom gradient? If not, what am I missing? I suspect that Tensorflow fails to detect that the variables of the model are being used to compute the loss function, but if so I'm not sure how to specify the correct data dependency in the dummy_argmax definition.

CodePudding user response：

Almost there, note that casting is also not differentiable. So try moving it to the the custom gradient block also:

import numpy as np
import tensorflow as tf
keras = tf.keras


@tf.custom_gradient
def dummy_argmax(x):
  assert len(x.shape) == 2
  max_index = tf.cast(tf.argmax(x, axis=1), tf.int32)

  @tf.function
  def dummy_grad(dy):
    return tf.ones_like(x) # Just to reproduce the error

  return tf.cast(max_index, tf.float32), dummy_grad


def loss(y_true, y_pred):
  return tf.abs(dummy_argmax(y_pred) - y_true)


train_size = 100
input_dim = 10
label = 7

train_x = np.random.rand(train_size, input_dim)
train_y = np.ones(train_size) * label

dummy_model = keras.Sequential()
dummy_model.add(keras.Input((input_dim,)))
dummy_model.add(keras.layers.Dense(input_dim))
dummy_model.add(keras.layers.Softmax())

dummy_model.compile(optimizer="Adam", loss=loss)
dummy_model.fit(x=train_x, y=train_y, epochs=10)