Using a target size (torch.Size([16])) that is different to the input size (torch.Size([16, 2])) is-CodePudding

I am trying to build a multiclass text classification using Pytorch and torchtext. but I am recieving this error when ever output in last hidden layer is 2, but running fine on 1 outputdim. I know there is a problem with batchsize and Data shape. Please Suggest me what to do as I dont know the fix. Constructing iterator:

#set batch size
BATCH_SIZE = 16

train_iterator, valid_iterator = BucketIterator.splits(
    (train_data, valid_data), 
    batch_size = BATCH_SIZE,
    sort_key = lambda x: len(x.text),
    sort_within_batch=True,
    device = device)

Model class:

class classifier(nn.Module):

    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers, 
                 bidirectional, dropout):
        super(classifier,self).__init__()          
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        
        self.gru = nn.GRU(embedding_dim, 
                           hidden_dim, 
                           num_layers=n_layers, 
                           bidirectional=bidirectional, 
                           dropout=dropout,
                           batch_first=True)
        
        self.fc1 = nn.Linear(hidden_dim * 2, 128)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(64, 16)
        self.relu3 = nn.ReLU()
        self.fc4 = nn.Linear(16, output_dim)
        self.act = nn.Sigmoid()
        
    def forward(self, text, text_lengths):

        embedded = self.embedding(text)
        #embedded = [batch size, sent_len, emb dim]
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths.to('cpu'),batch_first=True)

        packed_output, hidden = self.gru(packed_embedded)

        hidden = torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1)
                
        dense_1=self.fc1(hidden)
        x = self.relu1(dense_1)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        x = self.relu3(x)
        dense_outputs = self.fc4(x)

        #Final activation function
        outputs=self.act(dense_outputs)
        
        return outputs

instantiating the model:

size_of_vocab = len(TEXT.vocab)
embedding_dim = 300
num_hidden_nodes = 256
num_output_nodes = 2
num_layers = 4
bidirection = True
dropout = 0.2

model = classifier(size_of_vocab, embedding_dim, num_hidden_nodes,num_output_nodes, num_layers, 
                   bidirectional = True, dropout = dropout).to(device)

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
    
print(f'The model has {count_parameters(model):,} trainable parameters')

pretrained_embeddings = TEXT.vocab.vectors
model.embedding.weight.data.copy_(pretrained_embeddings)

print(pretrained_embeddings.shape)

Optimizer and criterion used:

optimizer = optim.Adam(model.parameters())
criterion = nn.BCELoss()
model = model.to(device)
criterion = criterion.to(device)

Training function:

import torchmetrics as tm
metrics = tm.Accuracy()

def train(model, iterator, optimizer, criterion):
    
    #initialize every epoch 
    epoch_loss = 0
    epoch_acc = 0
    
    #set the model in training phase
    model.train()  
    
    for batch in iterator:
        
        #resets the gradients after every batch
        optimizer.zero_grad()   
        
        #retrieve text and no. of words
        text, text_lengths = batch.text   
        
        #convert to 1D tensor
        predictions = model(text, text_lengths).squeeze()  
        
        #compute the loss
        loss = criterion(predictions, batch.label)        
        
        #compute the binary accuracy
        # acc = binary_accuracy(predictions, batch.label)  
        acc = metrics(predictions,batch.label)

        #backpropage the loss and compute the gradients
        loss.backward()       
        
        #update the weights
        optimizer.step()      
        
        #loss and accuracy
        epoch_loss  = loss.item()  
        epoch_acc  = acc.item()    
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

Full error

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-60-eeabf5bacadf> in <module>()
      5 
      6     #train the model
----> 7     train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
      8 
      9     #evaluate the model

3 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in binary_cross_entropy(input, target, weight, size_average, reduce, reduction)
   2906         raise ValueError(
   2907             "Using a target size ({}) that is different to the input size ({}) is deprecated. "
-> 2908             "Please ensure they have the same size.".format(target.size(), input.size())
   2909         )
   2910 

ValueError: Using a target size (torch.Size([16])) that is different to the input size (torch.Size([16, 2])) is deprecated. Please ensure they have the same size.

CodePudding user response：

What you want is CrossEntropyLoss instead of BCELoss.