I have been trying to use this image classification model I found online to classify traffic signs. The original code classifies image files saved on the device. I am trying to make it classify live video from my webcam, but I ran into this error. What changes should I make to the webcam image or the model?
test_transforms = transforms.Compose([
transforms.Resize([112, 112]),
transforms.ToTensor()
])
while True:
with torch.no_grad():
model.eval()
isTrue, frame = capture.read()
cv2.imshow('Video', frame)
cv2.waitKey(16)
PIL_image = Image.fromarray(np.uint8(frame)).convert('RGB')
PIL_image = Image.fromarray(frame.astype('uint8'), 'RGB')
img = test_transforms(PIL_image)
print(img.shape)
y_test_pred = model(img)
The error:
RuntimeError Traceback (most recent call last)
Cell In [8], line 40
27 print(img.shape)
28 # img.size
29 # numpyimg = asarray(frame)
30
(...)
38
39 # print(image)
---> 40 y_test_pred = model(img)
41 # print(y_test_pred)
42 # y_pred_softmax = torch.log_softmax(y_test_pred[0], dim=1)
43 # _, y_pred_tags = torch.max(y_pred_softmax, dim=1)
File c:\Users\lenovo\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\nn\modules\module.py:1194, in Module._call_impl(self, *input, **kwargs)
1190 # If we don't have any hooks, we want to skip the rest of the logic in
1191 # this function, and just call forward.
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
Cell In [5], line 42, in AlexnetTS.forward(self, x)
40 x = self.features(x)
41 h = x.view(x.shape[0], -1)
---> 42 x = self.classifier(h)
43 return x, h
File c:\Users\lenovo\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\nn\modules\module.py:1194, in Module._call_impl(self, *input, **kwargs)
1190 # If we don't have any hooks, we want to skip the rest of the logic in
1191 # this function, and just call forward.
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
File c:\Users\lenovo\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\nn\modules\container.py:204, in Sequential.forward(self, input)
202 def forward(self, input):
203 for module in self:
--> 204 input = module(input)
205 return input
File c:\Users\lenovo\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\nn\modules\module.py:1194, in Module._call_impl(self, *input, **kwargs)
1190 # If we don't have any hooks, we want to skip the rest of the logic in
1191 # this function, and just call forward.
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
File c:\Users\lenovo\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\nn\modules\linear.py:114, in Linear.forward(self, input)
113 def forward(self, input: Tensor) -> Tensor:
--> 114 return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (256x196 and 12544x1000)
The neural network model:
class AlexnetTS(nn.Module):
def __init__(self, output_dim):
super().__init__()
self.features = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=2, padding=1),
nn.MaxPool2d(kernel_size=2),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels=64, out_channels=192, kernel_size=3, padding=1),
nn.MaxPool2d(kernel_size=2),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels=192, out_channels=384, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
nn.MaxPool2d(kernel_size=2),
nn.ReLU(inplace=True),
)
self.classifier = nn.Sequential(
nn.Dropout(0.5),
nn.Linear(256*7*7, 1000),
nn.ReLU(inplace=True),
nn.Dropout(0.5),
nn.Linear(in_features=1000, out_features=256),
nn.ReLU(inplace=True),
nn.Linear(256, output_dim)
)
def forward(self, x):
x = self.features(x)
h = x.view(x.shape[0], -1)
x = self.classifier(h)
return x, h
CodePudding user response:
I think you should pass the image as a batch of one image with the shape [batch_size, channels, height, width]
where batch_size=1, so you need to add a dimension of batch_size before passing it through the model using the line img = img.unsqueeze(0)
as follow:
test_transforms = transforms.Compose([
transforms.Resize([112, 112]),
transforms.ToTensor()
])
while True:
with torch.no_grad():
model.eval()
isTrue, frame = capture.read()
cv2.imshow('Video', frame)
cv2.waitKey(16)
PIL_image = Image.fromarray(np.uint8(frame)).convert('RGB')
PIL_image = Image.fromarray(frame.astype('uint8'), 'RGB')
img = test_transforms(PIL_image)
print(img.shape)
img = img.unsqueeze(0)
y_test_pred = model(img)