I am trying to build a multiclass text classification using Pytorch and torchtext. but I am recieving this error when ever output in last hidden layer is 2, but running fine on 1 outputdim. I know there is a problem with batchsize and Data shape. Please Suggest me what to do as I dont know the fix. Constructing iterator:
#set batch size
BATCH_SIZE = 16
train_iterator, valid_iterator = BucketIterator.splits(
(train_data, valid_data),
batch_size = BATCH_SIZE,
sort_key = lambda x: len(x.text),
sort_within_batch=True,
device = device)
Model class:
class classifier(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers,
bidirectional, dropout):
super(classifier,self).__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.gru = nn.GRU(embedding_dim,
hidden_dim,
num_layers=n_layers,
bidirectional=bidirectional,
dropout=dropout,
batch_first=True)
self.fc1 = nn.Linear(hidden_dim * 2, 128)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(128, 64)
self.relu2 = nn.ReLU()
self.fc3 = nn.Linear(64, 16)
self.relu3 = nn.ReLU()
self.fc4 = nn.Linear(16, output_dim)
self.act = nn.Sigmoid()
def forward(self, text, text_lengths):
embedded = self.embedding(text)
#embedded = [batch size, sent_len, emb dim]
packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths.to('cpu'),batch_first=True)
packed_output, hidden = self.gru(packed_embedded)
hidden = torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1)
dense_1=self.fc1(hidden)
x = self.relu1(dense_1)
x = self.fc2(x)
x = self.relu2(x)
x = self.fc3(x)
x = self.relu3(x)
dense_outputs = self.fc4(x)
#Final activation function
outputs=self.act(dense_outputs)
return outputs
instantiating the model:
size_of_vocab = len(TEXT.vocab)
embedding_dim = 300
num_hidden_nodes = 256
num_output_nodes = 2
num_layers = 4
bidirection = True
dropout = 0.2
model = classifier(size_of_vocab, embedding_dim, num_hidden_nodes,num_output_nodes, num_layers,
bidirectional = True, dropout = dropout).to(device)
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'The model has {count_parameters(model):,} trainable parameters')
pretrained_embeddings = TEXT.vocab.vectors
model.embedding.weight.data.copy_(pretrained_embeddings)
print(pretrained_embeddings.shape)
Optimizer and criterion used:
optimizer = optim.Adam(model.parameters())
criterion = nn.BCELoss()
model = model.to(device)
criterion = criterion.to(device)
Training function:
import torchmetrics as tm
metrics = tm.Accuracy()
def train(model, iterator, optimizer, criterion):
#initialize every epoch
epoch_loss = 0
epoch_acc = 0
#set the model in training phase
model.train()
for batch in iterator:
#resets the gradients after every batch
optimizer.zero_grad()
#retrieve text and no. of words
text, text_lengths = batch.text
#convert to 1D tensor
predictions = model(text, text_lengths).squeeze()
#compute the loss
loss = criterion(predictions, batch.label)
#compute the binary accuracy
# acc = binary_accuracy(predictions, batch.label)
acc = metrics(predictions,batch.label)
#backpropage the loss and compute the gradients
loss.backward()
#update the weights
optimizer.step()
#loss and accuracy
epoch_loss = loss.item()
epoch_acc = acc.item()
return epoch_loss / len(iterator), epoch_acc / len(iterator)
Full error
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-60-eeabf5bacadf> in <module>()
5
6 #train the model
----> 7 train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
8
9 #evaluate the model
3 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in binary_cross_entropy(input, target, weight, size_average, reduce, reduction)
2906 raise ValueError(
2907 "Using a target size ({}) that is different to the input size ({}) is deprecated. "
-> 2908 "Please ensure they have the same size.".format(target.size(), input.size())
2909 )
2910
ValueError: Using a target size (torch.Size([16])) that is different to the input size (torch.Size([16, 2])) is deprecated. Please ensure they have the same size.
CodePudding user response:
What you want is CrossEntropyLoss
instead of BCELoss
.