I'm trying to calculate the gradient within tensorflow, however returning None
. I have already adjusted the type to be tensorflow.python.framework.ops.EagerTensor
, however htis did not solve the problem.
This is the code so far:
accuracy = tf.keras.metrics.CategoricalAccuracy('accuracy')
loss = tf.keras.metrics.CategoricalCrossentropy('loss')
for epoch in range(epochs):
accuracy.reset_states()
loss.reset_states()
for batch in iterate_minibatches(X_train, y_train, batch_size):
imgs = batch[0]
labels = batch[1]
with tf.GradientTape() as tape:
preds = model(imgs)
labels = tf.convert_to_tensor(labels, dtype=tf.float32)
#print(loss(labels,preds))
# Loss is crossentropy loss with regularization term for each parameter
total_loss = loss(labels, preds) # l2_penalty(model, theta_A)
grads = tape.gradient(total_loss, model.trainable_variables)
model.optimizer.apply_gradients(zip(grads, model.trainable_variables))
accuracy.update_state(labels, preds)
loss.update_state(labels, preds)
print("\rEpoch: {}, Batch: {}, Loss: {:.3f}, Accuracy: {:.3f}".format(
epoch 1, batch 1, loss.result().numpy(), accuracy.result().numpy()), flush=True, end='')
print("")
print("Task B accuracy after training trained model on Task B: {}".format(model.evaluate(task_B_test)))
print("Task A accuracy after training trained model on Task B: {}".format(model.evaluate(task_A_test)))
does anybody know why it's turning none or how i can fix this?
Edit: My error message looks the following:
AttributeError Traceback (most recent call last) C:\Users\DC5DE~1.ALB\AppData\Local\Temp/ipykernel_13300/818221091.py in 34 grads = tape.gradient(total_loss, model.trainable_variables) 35 ---> 36 model.optimizer.apply_gradients(zip(grads, model.trainable_variables)) 37 38 accuracy.update_state(labels, preds)
AttributeError: 'NoneType' object has no attribute 'apply_gradients'
Since I am not sure if it has to do with how I pass my image data to GradientTape here is my function for the minibatch:
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
assert inputs.shape[0] == targets.shape[0]
if shuffle:
indices = np.arange(inputs.shape[0])
np.random.shuffle(indices)
for start_idx in range(0, inputs.shape[0] - batchsize 1, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx batchsize]
else:
excerpt = slice(start_idx, start_idx batchsize)
yield inputs[excerpt], targets[excerpt]
Also: a similar problem was mentioned here, however without any solution that works.
CodePudding user response:
You need to use tf.keras.losses.CategoricalCrossentropy
for loss computation instead of tf.keras.metrics.CategoricalCrossentropy
which works differently and will stop gradient propagation.
CodePudding user response:
You are getting a few things mixed up. You either need to call model.compile
or define your own optimizer. Also you should not mix up your metrics with your loss function. Here is a working example:
import tensorflow as tf
accuracy = tf.keras.metrics.CategoricalAccuracy('accuracy')
metric = tf.keras.metrics.CategoricalCrossentropy('metric_ categorical_crossentropy')
loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
epochs = 2
model = tf.keras.Sequential([
tf.keras.layers.Dense(units=3, input_shape=(1,))
])
optimizer = tf.keras.optimizers.Adam()
dataset = tf.data.Dataset.from_tensor_slices((tf.random.normal((50, 1)), tf.random.normal((50, 3)))).batch(5)
for epoch in range(epochs):
accuracy.reset_states()
metric.reset_states()
for i, batch in enumerate(dataset):
imgs = batch[0]
labels = batch[1]
print(imgs.shape, labels.shape)
with tf.GradientTape() as tape:
preds = model(imgs)
#print(loss(labels,preds))
# Loss is crossentropy loss with regularization term for each parameter
total_loss = loss(labels, preds) # l2_penalty(model, theta_A)
grads = tape.gradient(total_loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
accuracy.update_state(labels, preds)
metric.update_state(labels, preds)
print("\rEpoch: {}, Batch: {}, Loss: {:.3f}, Accuracy: {:.3f}".format(
epoch 1, i 1, metric.result().numpy(), accuracy.result().numpy()), flush=True, end='')
print("")
Epoch: 1, Batch: 1, Loss: 4.209, Accuracy: 0.200
Epoch: 1, Batch: 2, Loss: 1.641, Accuracy: 0.400
Epoch: 1, Batch: 3, Loss: 1.294, Accuracy: 0.333
Epoch: 1, Batch: 4, Loss: 1.025, Accuracy: 0.300
Epoch: 1, Batch: 5, Loss: -0.110, Accuracy: 0.320
Epoch: 1, Batch: 6, Loss: 0.316, Accuracy: 0.267
Epoch: 1, Batch: 7, Loss: -0.118, Accuracy: 0.257
Epoch: 1, Batch: 8, Loss: -0.284, Accuracy: 0.225
Epoch: 1, Batch: 9, Loss: -0.249, Accuracy: 0.244
Epoch: 1, Batch: 10, Loss: -0.464, Accuracy: 0.260
Epoch: 2, Batch: 1, Loss: 4.468, Accuracy: 0.200
Epoch: 2, Batch: 2, Loss: 1.578, Accuracy: 0.400
Epoch: 2, Batch: 3, Loss: 1.012, Accuracy: 0.400
Epoch: 2, Batch: 4, Loss: 0.836, Accuracy: 0.350
Epoch: 2, Batch: 5, Loss: -0.294, Accuracy: 0.360
Epoch: 2, Batch: 6, Loss: 0.168, Accuracy: 0.300
Epoch: 2, Batch: 7, Loss: -0.201, Accuracy: 0.286
Epoch: 2, Batch: 8, Loss: -0.634, Accuracy: 0.250
Epoch: 2, Batch: 9, Loss: -0.552, Accuracy: 0.267
Epoch: 2, Batch: 10, Loss: -0.920, Accuracy: 0.280