I have defined a custom LSTM Layer as follows:
class LSTMModel(tf.keras.Model):
def __init__(self, CNN_model, num_classes):
super().__init__()
self.cnn_model = CNN_model
self.lstm = tf.keras.layers.LSTM(units=64, return_state=True, dropout=0.3)
self.dense = tf.keras.layers.Dense(num_classes, activation="softmax")
def call(self, input):
pass
However, I am unclear what needs too occur in the call function here. I also wrote a generic CNN class like below:
class generic_vns_function(tf.keras.Model):
# Where would we use layer_units here?
def __init__(self, input_shape, layers, layer_units):
super().__init__()
self.convolutions = []
# Dynamically create Convolutional layers and MaxPools
for layer in range(len(layers)):
self.convolutions.append(tf.keras.layers.Conv2D(layer, 3, padding="same",
input_shape=input_shape, activation="relu"))
# Add MaxPooling layer
self.convolutions.append(tf.keras.layers.MaxPooling2D((2,2)))
# Flatten
self.flatten = tf.keras.layers.Flatten()
# Dense layer
self.dense1 = tf.keras.layers.Dense(1024, activation="relu")
def call(self, input):
x = input
for layer in self.convolutions:
x = layer(x)
x = self.flatten(x)
x = self.dense1(x)
return x
but here the required structure makes a lot more sense to me. I am just initializing all of the layers. What do I need to do to initialize my LSTM layers?
CodePudding user response:
You could write it like this:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras import Model
class LSTMModel(Model):
def __init__(self, num_classes, num_units=64, drop_prob=0.3):
super().__init__()
self.num_classes = num_classes
self.num_units = num_units
self.drop_prob = drop_prob
self.lstm = LSTM(
units=self.num_units,
return_state=True,
dropout=self.drop_prob)
self.dense = Dense(
num_classes,
activation="softmax")
def call(self, x, training=True):
x, *state = self.lstm(x, training=training)
x = self.dense(x)
return x
And then you would use it like:
model = LSTMModel(num_classes=2)
time_series = tf.random.normal((32, 64, 128))
x_pred = model(time_series)
# loss and gradients calculations ...
It is a common tensorflow idom to instantiate layers when initializing a custom layer/model, and then execute their call()
methods by passing data through them in your custom call implementation.