I'm trying to extend this model to include custom metrics. I added false positive count, true positive count, and accuracy. The accuracy step is failing and throwing errors. I've tried a bunch of modifications but was unable to get it working. This is my code:
class SiameseModel(Model):
"""
The Siamese Network model with a custom training and testing loops.
Computes the triplet loss using the three embeddings produced by the
Siamese Network.
The triplet loss is defined as:
L(A, P, N) = max(‖f(A) - f(P)‖² - ‖f(A) - f(N)‖² margin, 0)
"""
def __init__(self, siamese_network, margin=0.5):
super().__init__()
self.siamese_network = siamese_network
self.margin = margin
self.loss_tracker = metrics.Mean(name="loss")
self.tp_tracker = metrics.Mean(name="tp")
self.fp_tracker = metrics.Mean(name="fp")
self.accuracy_tracker = metrics.Mean(name="accuracy")
def call(self, inputs):
return self.siamese_network(inputs)
def train_step(self, data):
# GradientTape is a context manager that records every operation that
# you do inside. We are using it here to compute the loss so we can get
# the gradients and apply them using the optimizer specified in
# `compile()`.
with tf.GradientTape() as tape:
loss = self._compute_loss(data)
tp, fp, accuracy = self._compute_metrics(data)
# Storing the gradients of the loss function with respect to the
# weights/parameters.
gradients = tape.gradient(loss, self.siamese_network.trainable_weights)
# Applying the gradients on the model using the specified optimizer
self.optimizer.apply_gradients(
zip(gradients, self.siamese_network.trainable_weights)
)
# Let's update and return the training loss metric.
self.loss_tracker.update_state(loss)
self.tp_tracker.update_state(tp)
self.fp_tracker.update_state(fp)
self.accuracy_tracker.update_state(accuracy)
return {
"loss": self.loss_tracker.result(),
"tp": self.tp_tracker.result(),
"fp": self.fp_tracker.result(),
"accuracy": self.accuracy_tracker.result()
}
def test_step(self, data):
loss = self._compute_loss(data)
tp, fp, accuracy = self._compute_metrics(data)
# Let's update and return the loss metric.
self.loss_tracker.update_state(loss)
self.tp_tracker.update_state(tp)
self.fp_tracker.update_state(fp)
self.accuracy_tracker.update_state(accuracy)
return {
"loss": self.loss_tracker.result(),
"tp": self.tp_tracker.result(),
"fp": self.fp_tracker.result(),
"accuracy": self.accuracy_tracker.result()
}
def _compute_metrics(self, data):
ap_distance, an_distance = self.siamese_network(data)
tp = tf.reduce_sum(tf.cast(tf.math.greater(ap_distance, an_distance), tf.int8))
fp = tf.reduce_sum(tf.cast(tf.math.less_equal(ap_distance, an_distance), tf.int8))
accuracy = tf.cond(tp == 0, lambda: 0, lambda: tf.math.divide_no_nan(fp, ap_distance.get_shape()[0]))
return tp, fp, accuracy
def _compute_loss(self, data):
# The output of the network is a tuple containing the distances
# between the anchor and the positive example, and the anchor and
# the negative example.
ap_distance, an_distance = self.siamese_network(data)
# Computing the Triplet Loss by subtracting both distances and
# making sure we don't get a negative value.
loss = ap_distance - an_distance
loss = tf.maximum(loss self.margin, 0.0)
return loss
@property
def metrics(self):
# We need to list our metrics here so the `reset_states()` can be
# called automatically.
return [self.loss_tracker, self.tp_tracker, self.fp_tracker, self.accuracy_tracker]
And this is the error:
Epoch 1/100
Traceback (most recent call last):
File "/code/main.py", line 136, in <module>
args.func(args) # call the default function
File "/code/functions.py", line 53, in train_triplets
train_triplets_(split_path)
File "/code/model/train_triplets/model.py", line 235, in train
siamese_model.fit(
File "/usr/local/lib/python3.9/dist-packages/keras/engine/training.py", line 1184, in fit
tmp_logs = self.train_function(iterator)
File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/def_function.py", line 885, in __call__
result = self._call(*args, **kwds)
File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/def_function.py", line 933, in _call
self._initialize(args, kwds, add_initializers_to=initializers)
File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/def_function.py", line 759, in _initialize
self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/function.py", line 3066, in _get_concrete_function_internal_garbage_collected
graph_function, _ = self._maybe_define_function(args, kwargs)
File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/function.py", line 3463, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/function.py", line 3298, in _create_graph_function
func_graph_module.func_graph_from_py_func(
File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/framework/func_graph.py", line 1007, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/def_function.py", line 668, in wrapped_fn
out = weak_wrapped_fn().__wrapped__(*args, **kwds)
File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/framework/func_graph.py", line 994, in wrapper
raise e.ag_error_metadata.to_exception(e)
ValueError: in user code:
/usr/local/lib/python3.9/dist-packages/keras/engine/training.py:853 train_function *
return step_function(self, iterator)
/usr/local/lib/python3.9/dist-packages/keras/engine/training.py:842 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/usr/local/lib/python3.9/dist-packages/tensorflow/python/distribute/distribute_lib.py:1286 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.9/dist-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.9/dist-packages/tensorflow/python/distribute/distribute_lib.py:3632 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.9/dist-packages/keras/engine/training.py:835 run_step **
outputs = model.train_step(data)
/code/model/train_triplets/model.py:61 train_step
tp, fp, accuracy = self._compute_metrics(data)
/code/model/train_triplets/model.py:109 _compute_metrics
accuracy = tf.cond(tf.cast(ap_distance.get_shape() == None, tf.int8), lambda: 0, lambda: tf.math.divide_no_nan(fp, ap_distance.get_shape()[0]))
/usr/local/lib/python3.9/dist-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.9/dist-packages/tensorflow/python/ops/control_flow_ops.py:1438 cond_for_tf_v2
return cond(pred, true_fn=true_fn, false_fn=false_fn, strict=True, name=name)
/usr/local/lib/python3.9/dist-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.9/dist-packages/tensorflow/python/util/deprecation.py:549 new_func
return func(*args, **kwargs)
/usr/local/lib/python3.9/dist-packages/tensorflow/python/ops/control_flow_ops.py:1254 cond
return cond_v2.cond_v2(pred, true_fn, false_fn, name)
/usr/local/lib/python3.9/dist-packages/tensorflow/python/ops/cond_v2.py:89 cond_v2
false_graph = func_graph_module.func_graph_from_py_func(
/usr/local/lib/python3.9/dist-packages/tensorflow/python/framework/func_graph.py:1007 func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
/code/model/train_triplets/model.py:109 <lambda>
accuracy = tf.cond(tf.cast(ap_distance.get_shape() == None, tf.int8), lambda: 0, lambda: tf.math.divide_no_nan(fp, ap_distance.get_shape()[0]))
/usr/local/lib/python3.9/dist-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.9/dist-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.9/dist-packages/tensorflow/python/ops/math_ops.py:1598 div_no_nan
y = ops.convert_to_tensor(y, name="y", dtype=x.dtype.base_dtype)
/usr/local/lib/python3.9/dist-packages/tensorflow/python/profiler/trace.py:163 wrapped
return func(*args, **kwargs)
/usr/local/lib/python3.9/dist-packages/tensorflow/python/framework/ops.py:1566 convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
/usr/local/lib/python3.9/dist-packages/tensorflow/python/framework/constant_op.py:346 _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
/usr/local/lib/python3.9/dist-packages/tensorflow/python/framework/constant_op.py:271 constant
return _constant_impl(value, dtype, shape, name, verify_shape=False,
/usr/local/lib/python3.9/dist-packages/tensorflow/python/framework/constant_op.py:288 _constant_impl
tensor_util.make_tensor_proto(
/usr/local/lib/python3.9/dist-packages/tensorflow/python/framework/tensor_util.py:445 make_tensor_proto
raise ValueError("None values not supported.")
ValueError: None values not supported.
I was assuming ap_distance.get_shape()[0]
is giving an error because shape is None. So I tried to check it first.
CodePudding user response:
I think you have two minor issues, that can easily be fixed:
def _compute_metrics(self, data):
ap_distance, an_distance = self.siamese_network(data)
tp = tf.reduce_sum(tf.cast(tf.math.greater(ap_distance, an_distance), tf.int8))
fp = tf.reduce_sum(tf.cast(tf.math.less_equal(ap_distance, an_distance), tf.float32))
accuracy = tf.cond(tp == 0, lambda: 0.0, lambda: tf.math.divide_no_nan(fp, tf.cast(tf.shape(ap_distance), tf.float32)[0]))
return tp, fp, accuracy
First, a direct call to ap_distance.get_shape()
returns the value None, which you cannot work with. So call tf.shape(ap_distance)
instead, which will be evaluated when the fit(*)
method is called. Alternatively, you could just hard-code the value 32, since that is your batch_size
. Second, tf.math.divide_no_nan
expects float values, which is why I replaced your int
values with floats. If you really need to work with int
then you are going to have use a different method, but I don't think it makes much of a difference in your case. You just need to make sure both operations in tf.cond
return the same data type. After changing your code, you should get outputs like this:
1/151 [..............................] - ETA: 1:55:24 - loss: 0.6733 - tp: 10.0000 - fp: 22.0000 - accuracy: 0.687513
2/151 [..............................] - ETA: 1:24:23 - loss: 0.9405 - tp: 11.5000 - fp: 20.5000 - accuracy: 0.640618
3/151 [..............................] - ETA: 1:20:25 - loss: 1.1410 - tp: 13.6667 - fp: 18.3333 - accuracy: 0.572912