Found unvisited input tensors that are disconnected from the outputs-CodePudding

I'm trying to concatenate my inputs which consists of acoustic data and auxilary variable. The model is based on a transformer architecture. My problem is that I keep on getting the output:

Found unvisited input tensors that are disconnected from the outputs

This is my code:

def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = layers.Dropout(dropout)(x)
    res = x   inputs

    # Feed Forward Part
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x   res


def build_model(
    input_shape_acoustic,
    input_shape_fotography,
    #input_weight,
    head_size,
    num_heads,
    ff_dim,
    num_transformer_blocks,
    num_vison_transformer_blocks,
    mlp_units,
    dropout=0,
    mlp_dropout=0,
):
    input_acoustic = keras.Input(shape=input_shape_acoustic)
    input_fotography = keras.Input(shape=input_shape_fotography)
    #input_weight = keras.Input(shape=input_weight)
    
    #building acoustic model
    x = input_acoustic
    print(x)
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)
    #input_acoustic = layers.Concatenate()([input_acoustic, input_weight])
    input_acoustic = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    
    
    #buidling vision transformer
    patches = Patches(patch_size)(input_fotography)
    encoded_patches = PatchEncoder(num_patches, projection_dim)(patches)

    for _ in range(num_vison_transformer_blocks):
        # Layer normalization 1.
        x1 = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
        # Create a multi-head attention layer.
        attention_output = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=projection_dim, dropout=0.1
        )(x1, x1)
        # Skip connection 1.
        x2 = layers.Add()([attention_output, encoded_patches])
        # Layer normalization 2.
        x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
        # MLP.
        x3 = mlp(x3, hidden_units=transformer_units, dropout_rate=0.1)
        # Skip connection 2.
        encoded_patches = layers.Add()([x3, x2])

    representation = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
    representation = layers.Flatten()(representation)
    representation = layers.Dropout(0.5)(representation)
    
    #fusiing information
    x = layers.Concatenate()([input_acoustic, representation])
    print(x.shape)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)
    outputs = layers.Dense(n_classes, activation="softmax")(x)
    
    return keras.Model(inputs=[input_acoustic, input_fotography], outputs=outputs)

import tensorflow as tf
from tensorflow import keras

input_shape = np.array(a).reshape(-1, 132096,1).shape[1:]
patch_size = 6
num_patches = (100 // patch_size) ** 2
projection_dim=64
transformer_units = [
    projection_dim * 2,
    projection_dim,
]  # Size of the transformer layers
transformer_layers = 4
mlp_head_units = [2048, 1024]
n_classes = 3


model = build_model(
    np.array(a).shape[1:],
    np.array(f).shape[1:],
    #np.array(w).shape[0],
    head_size=256,
    num_heads=4,
    ff_dim=4,
    num_transformer_blocks=1,
    num_vison_transformer_blocks=1,
    mlp_units=[128],
    mlp_dropout=0.4,
    dropout=0.25,
)

model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer=keras.optimizers.Adam(learning_rate=1e-5),
    metrics=["sparse_categorical_accuracy"],
)

#callbacks = [keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)]

history = model.fit(
    [np.array(a), np.array(f)],
    np.array(s.reshape(-1,1)),
    validation_split=0.3,
    epochs=20,
    batch_size=64,
    #callbacks=callbacks,
)

I understand the error message according to the forum post of enter link description here however, I initialited my variable correctly via layers.Input()``` and I do not see that I overwrite my variable for input_weight```. Does anybody see what is causing the problem?

CodePudding user response：

You are overwriting input_acoustic:

 input_acoustic = layers.GlobalAveragePooling1D(data_format="channels_first")(x)

As a result, you do not provide the input layer to Model. It cannot update the weights of the layers before GlobalAveragePooling1D.

Change the variable name to something else, e.g.

acoustic_global_pool = layers.GlobalAveragePooling1D(data_format="channels_first")(x)

Do not overwrite input_acoustic, since it's your input layer, which you need as input to Model.