How do I convert the image to the required size in PyTorch?-CodePudding

I have been trying to use this image classification model I found online to classify traffic signs. The original code classifies image files saved on the device. I am trying to make it classify live video from my webcam, but I ran into this error. What changes should I make to the webcam image or the model?

test_transforms = transforms.Compose([
    transforms.Resize([112, 112]),
    transforms.ToTensor()
    ])

while True:    
    with torch.no_grad():
        model.eval()


        isTrue, frame = capture.read()
        cv2.imshow('Video', frame)
        cv2.waitKey(16)

        PIL_image = Image.fromarray(np.uint8(frame)).convert('RGB')
        PIL_image = Image.fromarray(frame.astype('uint8'), 'RGB')

        
        img = test_transforms(PIL_image)
        print(img.shape)
        y_test_pred = model(img)

The error:

RuntimeError                              Traceback (most recent call last)
Cell In [8], line 40
     27 print(img.shape)
     28 # img.size
     29 # numpyimg = asarray(frame)
     30 
   (...)
     38 
     39 # print(image)
---> 40 y_test_pred = model(img)    
     41         # print(y_test_pred)
     42 # y_pred_softmax = torch.log_softmax(y_test_pred[0], dim=1)
     43 # _, y_pred_tags = torch.max(y_pred_softmax, dim=1)

File c:\Users\lenovo\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\nn\modules\module.py:1194, in Module._call_impl(self, *input, **kwargs)
   1190 # If we don't have any hooks, we want to skip the rest of the logic in
   1191 # this function, and just call forward.
   1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1193         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194     return forward_call(*input, **kwargs)
   1195 # Do not call functions when jit is used
   1196 full_backward_hooks, non_full_backward_hooks = [], []

Cell In [5], line 42, in AlexnetTS.forward(self, x)
     40 x = self.features(x)
     41 h = x.view(x.shape[0], -1)
---> 42 x = self.classifier(h)
     43 return x, h

File c:\Users\lenovo\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\nn\modules\module.py:1194, in Module._call_impl(self, *input, **kwargs)
   1190 # If we don't have any hooks, we want to skip the rest of the logic in
   1191 # this function, and just call forward.
   1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1193         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194     return forward_call(*input, **kwargs)
   1195 # Do not call functions when jit is used
   1196 full_backward_hooks, non_full_backward_hooks = [], []

File c:\Users\lenovo\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\nn\modules\container.py:204, in Sequential.forward(self, input)
    202 def forward(self, input):
    203     for module in self:
--> 204         input = module(input)
    205     return input

File c:\Users\lenovo\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\nn\modules\module.py:1194, in Module._call_impl(self, *input, **kwargs)
   1190 # If we don't have any hooks, we want to skip the rest of the logic in
   1191 # this function, and just call forward.
   1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1193         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194     return forward_call(*input, **kwargs)
   1195 # Do not call functions when jit is used
   1196 full_backward_hooks, non_full_backward_hooks = [], []

File c:\Users\lenovo\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\nn\modules\linear.py:114, in Linear.forward(self, input)
    113 def forward(self, input: Tensor) -> Tensor:
--> 114     return F.linear(input, self.weight, self.bias)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (256x196 and 12544x1000)

The neural network model:

class AlexnetTS(nn.Module):
    def __init__(self, output_dim):
        super().__init__()
        
        self.features = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=2, padding=1),
            nn.MaxPool2d(kernel_size=2),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(in_channels=64, out_channels=192, kernel_size=3, padding=1),
            nn.MaxPool2d(kernel_size=2),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(in_channels=192, out_channels=384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
            nn.MaxPool2d(kernel_size=2),
            nn.ReLU(inplace=True),
            )
        
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(256*7*7, 1000),
            nn.ReLU(inplace=True),
            
            nn.Dropout(0.5),
            nn.Linear(in_features=1000, out_features=256),
            nn.ReLU(inplace=True),
            
            nn.Linear(256, output_dim)
            )
        
    def forward(self, x):
        x = self.features(x)
        h = x.view(x.shape[0], -1)
        x = self.classifier(h)
        return x, h

CodePudding user response：

I think you should pass the image as a batch of one image with the shape [batch_size, channels, height, width] where batch_size=1, so you need to add a dimension of batch_size before passing it through the model using the line img = img.unsqueeze(0) as follow:

test_transforms = transforms.Compose([
   transforms.Resize([112, 112]),
   transforms.ToTensor()
])

while True:    
    with torch.no_grad():
        model.eval()


        isTrue, frame = capture.read()
        cv2.imshow('Video', frame)
        cv2.waitKey(16)

        PIL_image = Image.fromarray(np.uint8(frame)).convert('RGB')
        PIL_image = Image.fromarray(frame.astype('uint8'), 'RGB')

    
        img = test_transforms(PIL_image)
        print(img.shape)
        img = img.unsqueeze(0)
        y_test_pred = model(img)