I have a model that classify classes in two related datasets. the first dataset has 4 classes and the second has 2 classes. This is the model that I have implemented
def build_model():
branch_A_input = tf.keras.Input(shape=IMG_SHAPE)
branch_A_rescale = tf.keras.layers.experimental.preprocessing.Rescaling(1./127.5, offset= -1)(branch_A_input)
branch_A = tf.keras.layers.Dropout(0.3)(branch_A_rescale)
branch_A = tf.keras.layers.Conv2D(filters = 128, kernel_size = 13, activation= 'swish', name = "base_conv_A")(branch_A)
branch_A = tf.keras.layers.BatchNormalization(name = "base_batch_normalization_A")(branch_A)
branch_A = tf.keras.Model(inputs=branch_A_input, outputs = branch_A)
branch_B_input = tf.keras.Input(shape=IMG_SHAPE)
branch_B_rescale = tf.keras.layers.experimental.preprocessing.Rescaling(1./127.5, offset= -1)(branch_B_input)
branch_B = tf.keras.layers.Dropout(0.3)(branch_B_rescale)
branch_B = tf.keras.layers.Conv2D(filters = 128, kernel_size = 13, activation= 'swish', name = "base_conv_B")(branch_B)
branch_B = tf.keras.layers.BatchNormalization(name = "base_batch_normalization_B")(branch_B)
branch_B = tf.keras.Model(inputs=branch_B_input, outputs = branch_B)
merge = concatenate([branch_A.output, branch_B.output])
output_A = tf.keras.layers.Dense(tsk1_CLASSES_NUM, activation='softmax', name='4cls')(merge)
output_B= tf.keras.layers.Dense(1, name='2cls')(merge)
model = tf.keras.Model(inputs = [branch_A.input, branch_B.input] , outputs = [output_A, output_B], name="multi_task_model")
optimizer = tf.keras.optimizers.get('adam')
optimizer.learning_rate = 0.001
losses = {'4cls': tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
'2cls': tf.keras.losses.BinaryCrossentropy(from_logits=True)}
mtrcs = {
"4cls": 'accuracy',
"2cls": 'accuracy'
}
model.compile(optimizer=optimizer,
loss= losses,
metrics=mtrcs)
return model
model = build_model()
when I tried to train the model using
history = model.fit([tsk1_train_ds,tsk2_train_ds],
epochs=initial_epochs)
I got the error
ValueError: Failed to find data adapter that can handle input: (<class 'list'> containing values of types {"<class 'tensorflow.python.data.ops.dataset_ops.PrefetchDataset'>"}), <class 'NoneType'>
and when I tried
tsk1_image_batch, tsk1_label_batch = next(iter(tsk1_train_ds))
tsk2_image_batch, tsk2_label_batch = next(iter(tsk2_train_ds))
history = model.fit(x = [tsk1_image_batch, tsk2_image_batch],
y = [tsk1_label_batch, tsk2_label_batch],
epochs=initial_epochs)
I got the error
TypeError: 'NoneType' object is not callable
the data in both tsk1_image_batch
and tsk2_image_batch
looks something like:
<tf.Tensor: shape=(8, 166, 166, 3), dtype=float32, numpy=
array([[[[ 38.996986 , 38.996986 , 38.996986 ],
[ 73.22591 , 73.22591 , 73.22591 ],
[ 85.44268 , 85.44268 , 85.44268 ],
...,
[ 85.927734 , 85.927734 , 85.927734 ],
[ 75.0845 , 75.0845 , 75.0845 ],
[ 60.244205 , 60.244205 , 60.244205 ]],
[[ 9.421633 , 9.421633 , 9.421633 ],
[ 53.4908 , 53.4908 , 53.4908 ],
[ 64.668945 , 64.668945 , 64.668945 ],
...,
[ 82.186516 , 82.186516 , 82.186516 ],
[ 69.15674 , 69.15674 , 69.15674 ],
[ 59.0754 , 59.0754 , 59.0754 ]],
the data in tsk1_label_batch looks like:
<tf.Tensor: shape=(8,), dtype=int64, numpy=array([0, 2, 3, 2, 3, 3, 0, 0], dtype=int64)>
and the data in tsk2_label_batch looks like:
<tf.Tensor: shape=(8,), dtype=int64, numpy=array([0, 1, 1, 1, 1, 0, 1, 0], dtype=int64)>
I am not sure what I missing. any help is appreciated.
EDIT
from this answer, it seems I shouldn't use from_logits=True when I use softmax. Hence I updated the relevant code:
losses = {'4cls': tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
'2cls': tf.keras.losses.BinaryCrossentropy(from_logits=True)}
and now I get the error
ValueError: `logits` and `labels` must have the same shape, received ((None, 154, 154, 1) vs (None,)).
CodePudding user response:
following the comment marco cerliani on this question, I should either use Flatten or GlobalPooling layer. I used GlobalAveragePooling2D layer:
the code should be like this:
merge = concatenate([branch_A.output, branch_B.output])
gap = tf.keras.layers.GlobalAveragePooling2D()(merge)
output_A = tf.keras.layers.Dense(OCT_CLASSES_NUM, activation='softmax', name='4cls')(gap)
output_B= tf.keras.layers.Dense(1,activation='sigmoid', name='2cls')(gap)