Tensorflow: Custom Metrics: ValueError: None values not supported-CodePudding

I'm trying to extend this model to include custom metrics. I added false positive count, true positive count, and accuracy. The accuracy step is failing and throwing errors. I've tried a bunch of modifications but was unable to get it working. This is my code:

class SiameseModel(Model):
    """
    The Siamese Network model with a custom training and testing loops.

    Computes the triplet loss using the three embeddings produced by the
    Siamese Network.

    The triplet loss is defined as:
       L(A, P, N) = max(‖f(A) - f(P)‖² - ‖f(A) - f(N)‖²   margin, 0)
    """

    def __init__(self, siamese_network, margin=0.5):
        super().__init__()
        self.siamese_network = siamese_network
        self.margin = margin
        self.loss_tracker = metrics.Mean(name="loss")
        self.tp_tracker = metrics.Mean(name="tp")
        self.fp_tracker = metrics.Mean(name="fp")
        self.accuracy_tracker = metrics.Mean(name="accuracy")

    def call(self, inputs):
        return self.siamese_network(inputs)

    def train_step(self, data):
        # GradientTape is a context manager that records every operation that
        # you do inside. We are using it here to compute the loss so we can get
        # the gradients and apply them using the optimizer specified in
        # `compile()`.
        with tf.GradientTape() as tape:
            loss = self._compute_loss(data)
            tp, fp, accuracy = self._compute_metrics(data)

        # Storing the gradients of the loss function with respect to the
        # weights/parameters.
        gradients = tape.gradient(loss, self.siamese_network.trainable_weights)

        # Applying the gradients on the model using the specified optimizer
        self.optimizer.apply_gradients(
            zip(gradients, self.siamese_network.trainable_weights)
        )

        # Let's update and return the training loss metric.
        self.loss_tracker.update_state(loss)
        self.tp_tracker.update_state(tp)
        self.fp_tracker.update_state(fp)
        self.accuracy_tracker.update_state(accuracy)
        return {
            "loss": self.loss_tracker.result(),
            "tp": self.tp_tracker.result(),
            "fp": self.fp_tracker.result(),
            "accuracy": self.accuracy_tracker.result()
        }

    def test_step(self, data):
        loss = self._compute_loss(data)
        tp, fp, accuracy = self._compute_metrics(data)

        # Let's update and return the loss metric.
        self.loss_tracker.update_state(loss)
        self.tp_tracker.update_state(tp)
        self.fp_tracker.update_state(fp)
        self.accuracy_tracker.update_state(accuracy)

        return {
            "loss": self.loss_tracker.result(),
            "tp": self.tp_tracker.result(),
            "fp": self.fp_tracker.result(),
            "accuracy": self.accuracy_tracker.result()
        }

    def _compute_metrics(self, data):
        ap_distance, an_distance = self.siamese_network(data)

        tp = tf.reduce_sum(tf.cast(tf.math.greater(ap_distance, an_distance), tf.int8))
        fp = tf.reduce_sum(tf.cast(tf.math.less_equal(ap_distance, an_distance), tf.int8))
        accuracy = tf.cond(tp == 0, lambda: 0, lambda: tf.math.divide_no_nan(fp, ap_distance.get_shape()[0]))

        return tp, fp, accuracy

    def _compute_loss(self, data):
        # The output of the network is a tuple containing the distances
        # between the anchor and the positive example, and the anchor and
        # the negative example.
        ap_distance, an_distance = self.siamese_network(data)

        # Computing the Triplet Loss by subtracting both distances and
        # making sure we don't get a negative value.
        loss = ap_distance - an_distance
        loss = tf.maximum(loss   self.margin, 0.0)

        return loss

    @property
    def metrics(self):
        # We need to list our metrics here so the `reset_states()` can be
        # called automatically.
        return [self.loss_tracker, self.tp_tracker, self.fp_tracker, self.accuracy_tracker]

And this is the error:

Epoch 1/100
Traceback (most recent call last):
  File "/code/main.py", line 136, in <module>
    args.func(args)  # call the default function
  File "/code/functions.py", line 53, in train_triplets
    train_triplets_(split_path)
  File "/code/model/train_triplets/model.py", line 235, in train
    siamese_model.fit(
  File "/usr/local/lib/python3.9/dist-packages/keras/engine/training.py", line 1184, in fit
    tmp_logs = self.train_function(iterator)
  File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/def_function.py", line 885, in __call__
    result = self._call(*args, **kwds)
  File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/def_function.py", line 933, in _call
    self._initialize(args, kwds, add_initializers_to=initializers)
  File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/def_function.py", line 759, in _initialize
    self._stateful_fn._get_concrete_function_internal_garbage_collected(  # pylint: disable=protected-access
  File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/function.py", line 3066, in _get_concrete_function_internal_garbage_collected
    graph_function, _ = self._maybe_define_function(args, kwargs)
  File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/function.py", line 3463, in _maybe_define_function
    graph_function = self._create_graph_function(args, kwargs)
  File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/function.py", line 3298, in _create_graph_function
    func_graph_module.func_graph_from_py_func(
  File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/framework/func_graph.py", line 1007, in func_graph_from_py_func
    func_outputs = python_func(*func_args, **func_kwargs)
  File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/def_function.py", line 668, in wrapped_fn
    out = weak_wrapped_fn().__wrapped__(*args, **kwds)
  File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/framework/func_graph.py", line 994, in wrapper
    raise e.ag_error_metadata.to_exception(e)
ValueError: in user code:

    /usr/local/lib/python3.9/dist-packages/keras/engine/training.py:853 train_function  *
        return step_function(self, iterator)
    /usr/local/lib/python3.9/dist-packages/keras/engine/training.py:842 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /usr/local/lib/python3.9/dist-packages/tensorflow/python/distribute/distribute_lib.py:1286 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /usr/local/lib/python3.9/dist-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /usr/local/lib/python3.9/dist-packages/tensorflow/python/distribute/distribute_lib.py:3632 _call_for_each_replica
        return fn(*args, **kwargs)
    /usr/local/lib/python3.9/dist-packages/keras/engine/training.py:835 run_step  **
        outputs = model.train_step(data)
    /code/model/train_triplets/model.py:61 train_step
        tp, fp, accuracy = self._compute_metrics(data)
    /code/model/train_triplets/model.py:109 _compute_metrics
        accuracy = tf.cond(tf.cast(ap_distance.get_shape() == None, tf.int8), lambda: 0, lambda: tf.math.divide_no_nan(fp, ap_distance.get_shape()[0]))
    /usr/local/lib/python3.9/dist-packages/tensorflow/python/util/dispatch.py:206 wrapper
        return target(*args, **kwargs)
    /usr/local/lib/python3.9/dist-packages/tensorflow/python/ops/control_flow_ops.py:1438 cond_for_tf_v2
        return cond(pred, true_fn=true_fn, false_fn=false_fn, strict=True, name=name)
    /usr/local/lib/python3.9/dist-packages/tensorflow/python/util/dispatch.py:206 wrapper
        return target(*args, **kwargs)
    /usr/local/lib/python3.9/dist-packages/tensorflow/python/util/deprecation.py:549 new_func
        return func(*args, **kwargs)
    /usr/local/lib/python3.9/dist-packages/tensorflow/python/ops/control_flow_ops.py:1254 cond
        return cond_v2.cond_v2(pred, true_fn, false_fn, name)
    /usr/local/lib/python3.9/dist-packages/tensorflow/python/ops/cond_v2.py:89 cond_v2
        false_graph = func_graph_module.func_graph_from_py_func(
    /usr/local/lib/python3.9/dist-packages/tensorflow/python/framework/func_graph.py:1007 func_graph_from_py_func
        func_outputs = python_func(*func_args, **func_kwargs)
    /code/model/train_triplets/model.py:109 <lambda>
        accuracy = tf.cond(tf.cast(ap_distance.get_shape() == None, tf.int8), lambda: 0, lambda: tf.math.divide_no_nan(fp, ap_distance.get_shape()[0]))
    /usr/local/lib/python3.9/dist-packages/tensorflow/python/util/dispatch.py:206 wrapper
        return target(*args, **kwargs)
    /usr/local/lib/python3.9/dist-packages/tensorflow/python/util/dispatch.py:206 wrapper
        return target(*args, **kwargs)
    /usr/local/lib/python3.9/dist-packages/tensorflow/python/ops/math_ops.py:1598 div_no_nan
        y = ops.convert_to_tensor(y, name="y", dtype=x.dtype.base_dtype)
    /usr/local/lib/python3.9/dist-packages/tensorflow/python/profiler/trace.py:163 wrapped
        return func(*args, **kwargs)
    /usr/local/lib/python3.9/dist-packages/tensorflow/python/framework/ops.py:1566 convert_to_tensor
        ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
    /usr/local/lib/python3.9/dist-packages/tensorflow/python/framework/constant_op.py:346 _constant_tensor_conversion_function
        return constant(v, dtype=dtype, name=name)
    /usr/local/lib/python3.9/dist-packages/tensorflow/python/framework/constant_op.py:271 constant
        return _constant_impl(value, dtype, shape, name, verify_shape=False,
    /usr/local/lib/python3.9/dist-packages/tensorflow/python/framework/constant_op.py:288 _constant_impl
        tensor_util.make_tensor_proto(
    /usr/local/lib/python3.9/dist-packages/tensorflow/python/framework/tensor_util.py:445 make_tensor_proto
        raise ValueError("None values not supported.")

    ValueError: None values not supported.

I was assuming ap_distance.get_shape()[0] is giving an error because shape is None. So I tried to check it first.

CodePudding user response：

I think you have two minor issues, that can easily be fixed:

def _compute_metrics(self, data):

  ap_distance, an_distance = self.siamese_network(data)
  tp = tf.reduce_sum(tf.cast(tf.math.greater(ap_distance, an_distance), tf.int8))
  fp = tf.reduce_sum(tf.cast(tf.math.less_equal(ap_distance, an_distance), tf.float32))
  accuracy = tf.cond(tp == 0, lambda: 0.0, lambda: tf.math.divide_no_nan(fp, tf.cast(tf.shape(ap_distance), tf.float32)[0]))
    
  return tp, fp, accuracy

First, a direct call to ap_distance.get_shape() returns the value None, which you cannot work with. So call tf.shape(ap_distance) instead, which will be evaluated when the fit(*) method is called. Alternatively, you could just hard-code the value 32, since that is your batch_size. Second, tf.math.divide_no_nan expects float values, which is why I replaced your int values with floats. If you really need to work with int then you are going to have use a different method, but I don't think it makes much of a difference in your case. You just need to make sure both operations in tf.cond return the same data type. After changing your code, you should get outputs like this:

1/151 [..............................] - ETA: 1:55:24 - loss: 0.6733 - tp: 10.0000 - fp: 22.0000 - accuracy: 0.687513
2/151 [..............................] - ETA: 1:24:23 - loss: 0.9405 - tp: 11.5000 - fp: 20.5000 - accuracy: 0.640618
3/151 [..............................] - ETA: 1:20:25 - loss: 1.1410 - tp: 13.6667 - fp: 18.3333 - accuracy: 0.572912