I've created an LSTM in PyTorch and I need to give it a sequence length variable, the following is my code:
class Seq2SeqSingle(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, in_features, out_features):
super(Seq2SeqSingle, self).__init__()
self.out_features = out_features
self.num_layers = num_layers
self.input_size = input_size
self.hidden_size = hidden_size
self.fc_i = nn.Linear(input_size, out_features)
self.fc_o = nn.Linear(out_features, input_size)
self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
self.fc_0 = nn.Linear(128*11, out_features) ## <----------- LOOK HERE
self.fc_1 = nn.Linear(out_features, out_features)
def forward(self, x):
#print(x.shape)
output = self.fc_i(torch.relu(x))
output = self.fc_o(torch.relu(output))
h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)).to(device)
c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)).to(device)
output, (h_out, c_out) = self.lstm(output, (h_0, c_0))
output = output.reshape(x.size(0), -1)
output = self.fc_0(torch.relu(output))
output = self.fc_1(torch.relu(output))
output = nn.functional.softmax(output, dim = 1)
return output
In order to match the size of the output of the LSTM layer I need to multiply 128 (that is the hidden size) with 11 (the sequence length), obviously if I change the sequence length it crashes, how can I avoid to specify this fixed size?
CodePudding user response:
You can't avoid specifying the fixed size. As joe32140 said in a comment, a common approach is to take only the hidden state of the last step as input for the linear layer, so the size no longer depends on the number of steps.