Good day!
I've been struggling with the following error: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument target in method wrapper_nll_loss_forward)
I've been unable to locate where I need to add a part of my model to the GPU. From the error message I gather it should be in the loss function but I've tried all places I could think off, related to the loss function and have been unable to solve it.
Would love some help with this.
My full code can be found here:
https://huggingface.co/AFAD85/CNN_apples/blob/main/CNN paper clone pytorch.ipynb
I tried to isolate all possibly relevant code below:
`transformer = transforms.Compose([
transforms.Resize((350,350)),
transforms.ToTensor(),
transforms.Normalize([0.5,0.5,0.5],
[0.5,0.5,0.5])
])`
`class ConvNet(nn.Module):
def __init__(self,num_classes=4):
super(ConvNet,self).__init__()
self.conv1 = nn.Conv2d(in_channels=3,out_channels=128,kernel_size=3,stride=1,padding='valid')
self.bn1 = nn.BatchNorm2d(num_features=128)
self.relu1 = nn.ReLU()
self.pool1 = nn.MaxPool2d(kernel_size=2)
self.conv2 = nn.Conv2d(in_channels=128,out_channels=64,kernel_size=3,stride=1,padding='valid')
self.bn2 = nn.BatchNorm2d(num_features=64)
self.relu2 = nn.ReLU()
self.pool2 = nn.MaxPool2d(kernel_size=2)
self.conv3 = nn.Conv2d(in_channels=64,out_channels=64,kernel_size=3,stride=1,padding='valid')
self.bn3 = nn.BatchNorm2d(num_features=64)
self.relu3 = nn.ReLU()
self.pool3 = nn.MaxPool2d(kernel_size=2)
self.conv4 = nn.Conv2d(in_channels=64,out_channels=32,kernel_size=3,stride=1,padding='valid')
self.bn4 = nn.BatchNorm2d(num_features=32)
self.relu4 = nn.ReLU()
self.pool4 = nn.MaxPool2d(kernel_size=2)
self.conv5 = nn.Conv2d(in_channels=32,out_channels=32,kernel_size=3,stride=1,padding='valid')
self.bn5 = nn.BatchNorm2d(num_features=32)
self.relu5 = nn.ReLU()
self.pool5 = nn.MaxPool2d(kernel_size=2)
self.flat = nn.Flatten()
self.fc1 = nn.Linear(in_features=2592, out_features = 256)
self.fc2 = nn.Linear(in_features=256, out_features = num_classes)
def forward(self,input):
output = self.conv1(input)
output = self.bn1(output)
output = self.relu1(output)
output = self.pool1(output)
output = self.conv2(output)
output = self.bn2(output)
output = self.relu2(output)
output = self.pool2(output)
output = self.conv3(output)
output = self.bn3(output)
output = self.relu3(output)
output = self.pool3(output)
output = self.conv4(output)
output = self.bn4(output)
output = self.relu4(output)
output = self.pool4(output)
output = self.conv5(output)
output = self.bn5(output)
output = self.relu5(output)
output = self.pool5(output)
# output = output.view(-1,32,9,9)
output = self.flat(output)
output = self.fc1(output)
output = self.fc2(output)
return output`
model = ConvNet(num_classes=4).to(device)
optimizer = Adam(model.parameters(),lr=0.001,weight_decay=0.0001)
loss_function = nn.CrossEntropyLoss()
`best_accuracy = 0.0
for epoch in range(num_epochs):
model.train()
train_accuracy = 0.0
train_loss = 0.0
for i, (images,labels) in enumerate(train_loader):
if torch.cuda.is_available():
images = Variable(images.cuda())
lables = Variable(labels.cuda())
optimizer.zero_grad()
outputs = model(images)
loss = loss_function(outputs,labels)
loss.backward()
optimizer.step()
train_loss = loss.cpu().data*images.size(0)
_.prediction = torch.max(outputs.data,1)
train_accuracy = int(torch.sum(prediction==labels.data))
train_accuracy = train_accuracy/train_count
train_loss = train_loss/train_count
#test set evalueren
model.eval()
test_accuracy = 0.0
for i, (images,labels) in enumerate(train_loader):
if torch.cuda.is_available():
images = Variable(images.cuda())
lables = Variable(labels.cuda())
outputs = model(images)
_.prediction = torch.max(outputs.data,1)
test_accuracy = test_accuracy/test_count
print('Epoch: ' str(epoch) ' Train Loss: ' str(int(train_loss))) ' Train Accuracy: ' str(train_accuracy) ' Test Accuracy: ' str(test_accuracy)
if test_accuracy > best_accuracy:
torch.save(model.state_dict(), 'best_checkpoint.model')`
I tried to have the model run an epoch, expecting it to run on the GPU.
I tried adding .cuda() and .to_device() in all places where I expected the problem might lie, but was unable to find the correct one.
CodePudding user response:
It was due to a typo
lables = Variable(labels.cuda())
where lables
should be labels
.