I am new to PyTorch and Machine Learning so I try to follow the tutorial from here: https://medium.com/@nutanbhogendrasharma/pytorch-convolutional-neural-network-with-mnist-dataset-4e8a4265e118
By copying the code step by step I got the following error for no reason. I tried the program on another computer and it gives syntax error. However, my IDE didn't warn my anything about syntax. I am really confused how I can fix the issue. Any help is appreciated.
RuntimeError: DataLoader worker exited unexpectedly
Here is the code.
import torch
from torchvision import datasets
from torchvision.transforms import ToTensor
import torch.nn as nn
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from torch import optim
from torch.autograd import Variable
train_data = datasets.MNIST(
root='data',
train=True,
transform=ToTensor(),
download=True,
)
test_data = datasets.MNIST(
root='data',
train=False,
transform=ToTensor()
)
print(train_data)
print(test_data)
print(train_data.data.size())
print(train_data.targets.size())
plt.imshow(train_data.data[0], cmap='gray')
plt.title('%i' % train_data.targets[0])
plt.show()
figure = plt.figure(figsize=(10, 8))
cols, rows = 5, 5
for i in range(1, cols * rows 1):
sample_idx = torch.randint(len(train_data), size=(1,)).item()
img, label = train_data[sample_idx]
figure.add_subplot(rows, cols, i)
plt.title(label)
plt.axis("off")
plt.imshow(img.squeeze(), cmap="gray")
plt.show()
loaders = {
'train': DataLoader(train_data,
batch_size=100,
shuffle=True,
num_workers=1),
'test': DataLoader(test_data,
batch_size=100,
shuffle=True,
num_workers=1),
}
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=5,
stride=1,
padding=2,
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(16, 32, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(2),
)
# fully connected layer, output 10 classes
self.out = nn.Linear(32 * 7 * 7, 10)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
# flatten the output of conv2 to (batch_size, 32 * 7 * 7)
x = x.view(x.size(0), -1)
output = self.out(x)
return output, x # return x for visualization
cnn = CNN()
print(cnn)
loss_func = nn.CrossEntropyLoss()
print(loss_func)
optimizer = optim.Adam(cnn.parameters(), lr=0.01)
print(optimizer)
num_epochs = 10
def train(num_epochs, cnn, loaders):
cnn.train()
# Train the model
total_step = len(loaders['train'])
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(loaders['train']):
# gives batch data, normalize x when iterate train_loader
b_x = Variable(images) # batch x
b_y = Variable(labels) # batch y
output = cnn(b_x)[0]
loss = loss_func(output, b_y)
# clear gradients for this training step
optimizer.zero_grad()
# backpropagation, compute gradients
loss.backward()
# apply gradients
optimizer.step()
if (i 1) % 100 == 0:
print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
.format(epoch 1, num_epochs, i 1, total_step, loss.item()))
pass
pass
pass
train(num_epochs, cnn, loaders)
def evalFunc():
# Test the model
cnn.eval()
with torch.no_grad():
correct = 0
total = 0
for images, labels in loaders['test']:
test_output, last_layer = cnn(images)
pred_y = torch.max(test_output, 1)[1].data.squeeze()
accuracy = (pred_y == labels).sum().item() / float(labels.size(0))
pass
print('Test Accuracy of the model on the 10000 test images: %.2f' % accuracy)
pass
evalFunc()
sample = next(iter(loaders['test']))
imgs, lbls = sample
actual_number = lbls[:10].numpy()
test_output, last_layer = cnn(imgs[:10])
pred_y = torch.max(test_output, 1)[1].data.numpy().squeeze()
print(f'Prediction number: {pred_y}')
print(f'Actual number: {actual_number}')
CodePudding user response:
If you are working on jupyter notebook. The problem is more likely to be num_worker
. You should set num_worker=0
. You can find here some solutions to follow. Because unfortunately, jupyter notebook has some issues with running multiprocessing.