Custom keras model with custom loss function gives error-CodePudding

I'm building a custom three layered neural network with a custom loss and activation function. However it gives me the following error:

File "C:\Users\untitled1.py", line 196, in <module>
    cModel.fit(X_train, y_train, batch_size=64, epochs=2)

  File "C:\Users\\Anaconda3\lib\site-packages\keras\engine\training.py", line 1184, in fit
    tmp_logs = self.train_function(iterator)

  File "C:\Users\\Anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 885, in __call__
    result = self._call(*args, **kwds)

  File "C:\Users\\Anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 933, in _call
    self._initialize(args, kwds, add_initializers_to=initializers)

  File "C:\Users\\Anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 759, in _initialize
    self._stateful_fn._get_concrete_function_internal_garbage_collected(  # pylint: disable=protected-access

  File "C:\Users\\Anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3066, in _get_concrete_function_internal_garbage_collected
    graph_function, _ = self._maybe_define_function(args, kwargs)

  File "C:\Users\\Anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3463, in _maybe_define_function
    graph_function = self._create_graph_function(args, kwargs)

  File "C:\Users\\Anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3298, in _create_graph_function
    func_graph_module.func_graph_from_py_func(

  File "C:\Users\\Anaconda3\lib\site-packages\tensorflow\python\framework\func_graph.py", line 1007, in func_graph_from_py_func
    func_outputs = python_func(*func_args, **func_kwargs)

  File "C:\Users\\Anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 668, in wrapped_fn
    out = weak_wrapped_fn().__wrapped__(*args, **kwds)

  File "C:\Users\\Anaconda3\lib\site-packages\tensorflow\python\framework\func_graph.py", line 994, in wrapper
    raise e.ag_error_metadata.to_exception(e)

TypeError: in user code:

    C:\Users\\Anaconda3\lib\site-packages\keras\engine\training.py:853 train_function  *
        return step_function(self, iterator)

    TypeError: tf__compile() missing 1 required positional argument: 'loss'

The code I use is shown below. I tried it with my own custom loss and with a loss from Keras and both gave the same error. If I just initialize a keras sequential model and incorporate my loss it works fine. But with my custom model it does not, I use a custom model because I also want to customize the optimization method after this. How can this error be resolved?

#%% Functions
class CustomModel(keras.Model):
    #initialize the model with the beta needed and the dimensions
    def __init__(self, b, input_dim):
        #first i initialized it as self, model and without the call function
        #but this gave me an error that said i needed a call function thus i changed it to this
        super(CustomModel, self).__init__()
        self.dim = keras.Input( shape=(input_dim,))
        self.dense1 = keras.layers.Dense(20, name='hidden', kernel_initializer=initializer, 
                 bias_initializer=initializer, activation = lambda x: K.tanh(b*x))
        self.dense2 = keras.layers.Dense(2, activation='linear', name='output', use_bias=False, trainable=False,kernel_initializer= lambda shape, 
                 dtype: initializeOutputWeights(shape, dtype))
    
    def call(self):
        x1 = self.dense1(self.dim)
        return self.dense2(x1)
    
    def compile(self, optimizer, loss):
        #for the use of the custom loss function
        super(CustomModel, self).compile()
        self.optimizer = optimizer
        self.loss = loss
        
    def train_step(self, data):
        # Unpack the data. Its structure depends on your model and
        # on what you pass to `fit()`.
        x, y = data
        

        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)  # Forward pass
            # Compute the loss value
            # (the loss function is configured in `compile()`)
            loss = self.loss(y, y_pred, regularization_losses=self.losses)

        # Compute gradients
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)
        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        # Update metrics (includes the metric that tracks the loss)
        self.compiled_metrics.update_state(y, y_pred)
        # Return a dict mapping metric names to current value
        return {m.name: m.result() for m in self.metrics}

def initializeOutputWeights(shape, dtype=None):
    #output weights are initialized as 1 or -1 and not changed afterwards
    randoms = np.random.randint(low=2, size=shape)
    new = np.where(randoms==0, -1, randoms)
    return K.variable(new, dtype=dtype)

class customLoss(keras.losses.Loss):
    #the custom loss function of the GVM
    def __init__(self, d=10, name = "CustomLoss"):
        #need the margin d
        super().__init__(name=name)
        self.d = d    
        
    def call(self,y_true, y_pred):
        #calculate the loss
        N = len(y_true)
        L = len(y_pred[0])
        y_dot = y_pred*y_true
        y_d = y_dot-self.d
        y_square= y_d*y_d
        index_replace = y_dot>self.d
        idx_replace=tf.where(index_replace==True)
        y_loss = tf.tensor_scatter_nd_update(y_square, idx_replace, tf.zeros(len(idx_replace)))
        return tf.divide(K.sum(K.sum(y_loss, axis=1)),tf.cast(N*L, tf.float32))

seed(1)
tf.random.set_seed(2)
acc_metric = keras.metrics.SparseCategoricalAccuracy(name="accuracy")
initializer = tf.keras.initializers.RandomUniform(minval=-1, maxval=1)
b = np.ones(20)
cModel = CustomModel(b, 9)
Losscustom = customLoss(d=16)
cModel.compile(optimizer='adam',loss=Losscustom)
cModel.fit(X_train, y_train, batch_size=64, epochs=2)

CodePudding user response：

You seem to have mixed up a few constructs that don't fit together. I suggest you define your own custom training loop that will give you the flexibility you need:

First define your model:

import tensorflow as tf
import random
import numpy as np

def initializeOutputWeights(shape, dtype=None):
    #output weights are initialized as 1 or -1 and not changed afterwards
    randoms = np.random.randint(low=2, size=shape)
    new = np.where(randoms==0, -1, randoms)
    return tf.keras.backend.variable(new, dtype=dtype)

class CustomModel(tf.keras.Model):
   def __init__(self, b, input_dim):
        #first i initialized it as self, model and without the call function
        #but this gave me an error that said i needed a call function thus i changed it to this
        super(CustomModel, self).__init__()

        initializer = tf.keras.initializers.RandomUniform(minval=-1, maxval=1)
        self.dense = tf.keras.layers.Dense(20, name='hidden', kernel_initializer=initializer, 
                 bias_initializer=initializer, activation = lambda x: tf.tanh(b*x), input_shape=(input_dim,))
        self._output = tf.keras.layers.Dense(2, activation='linear', name='output', use_bias=False, trainable=False,kernel_initializer= lambda shape, 
                 dtype: initializeOutputWeights(shape, dtype))

   def call(self, inputs):
        x =  self.dense(inputs)
        return self._output(x)

Then define your loss function:

def compute_loss(d, y_pred, y_true):
    #calculate the loss
    N = len(y_true)
    L = len(y_pred[0])
    y_dot = y_pred*y_true
    y_d = y_dot-d
    y_square= y_d*y_d
    index_replace = y_dot>d
    idx_replace=tf.where(index_replace==True)
    y_loss = tf.tensor_scatter_nd_update(y_square, idx_replace, tf.zeros(len(idx_replace)))
    return tf.divide(tf.keras.backend.sum(tf.keras.backend.sum(y_loss, axis=1)),tf.cast(N*L, tf.float32))

Afterwards define your training loop:

@tf.function
def train_step(model, batch, optimizer):
    with tf.GradientTape() as tape:
        x, y = batch
        d = 16
        y_pred = model(x)
        loss = compute_loss(d, y_pred, y)

    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    # update your metrics here how you want.
    acc_metric.update_state(y, y_pred)
    tf.print("Training loss (for one batch): ", loss)

def train(dataset, optimizer, epochs=25):
   
   b = np.ones(20)
   custom_model = CustomModel(b, 9)
   for epoch in range(epochs):
     for batch in dataset:
       train_step(custom_model, batch, optimizer) 

     train_acc = acc_metric.result()
     tf.print("Training acc over epoch: %.4f" % (float(train_acc),))

     # Reset training metrics at the end of each epoch
     acc_metric.reset_states()

And finally define your dataset and other important variables and train your model:

random.seed(1)
tf.random.set_seed(2)
acc_metric = tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy")
opt = tf.keras.optimizers.Adam()

#### Dummy data ####
y_train = tf.cast(tf.random.uniform((500, 1), minval=0, maxval=2, dtype=tf.int32), tf.float32)
X_train = tf.random.normal((500, 9))
BATCH_SIZE = 64
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(
    X_train.shape[0]).batch(
    BATCH_SIZE)

train(train_dataset, opt)