I am trying to do a time series plot forecast in transformer. The input size is (None, 30). However, an error occurs here.
x = layers.MultiHeadAttention(
5 key_dim=1, num_heads=1, dropout=dropout
----> 6 )(inputs, inputs)
7 x = layers.Dropout(dropout)(x)
8 x = layers.LayerNormalization(epsilon=1e-6)(x)
An error occurs here. IndexError: tuple index out of range
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
# Attention and Normalization
print(inputs.shape)
x = layers.MultiHeadAttention(
key_dim=head_size, num_heads=num_heads, dropout=dropout
)(inputs, inputs)
x = layers.Dropout(dropout)(x)
x = layers.LayerNormalization(epsilon=1e-6)(x)
res = x inputs
# Feed Forward Part
x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(res)
x = layers.Dropout(dropout)(x)
x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
x = layers.LayerNormalization(epsilon=1e-6)(x)
return x res
def build_model(
input_shape,
head_size,
num_heads,
ff_dim,
num_transformer_blocks,
mlp_units,
dropout=0,
mlp_dropout=0,
):
inputs = keras.Input(shape=input_shape)
x = inputs
for _ in range(num_transformer_blocks):
x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)
x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
for dim in mlp_units:
x = layers.Dense(dim, activation="relu")(x)
x = layers.Dropout(mlp_dropout)(x)
outputs = layers.Dense(n_classes)(x)
return keras.Model(inputs, outputs)
from tensorflow import keras
from tensorflow.keras import layers
input_shape = X_train.shape[1:]
model_mlp = build_model(
input_shape,
head_size=256,
num_heads=1,
ff_dim=1,
num_transformer_blocks=4,
mlp_units=[128],
mlp_dropout=0.4,
dropout=0.25,
)
model_mlp.compile(optimizer = adam, loss = root_mean_squared_error)
model_mlp.summary()
I am trying to do a time series plot forecast in transformer. The input size is (None, 30). However, an error occurs here.
CodePudding user response:
Make the following changes,
X_train = tf.expand_dims(X_train, -1) #change your input
input_shape = X_train.shape[1:] #input shape should change to (30,1)
model_mlp = build_model(
input_shape,
head_size=256,
num_heads=1,
ff_dim=1,
num_transformer_blocks=4,
mlp_units=[128],
mlp_dropout=0.4,
dropout=0.25,
)
model_mlp.compile(optimizer = adam, loss = root_mean_squared_error)
model_mlp.summary()
then check,
model_mlp(X_train)