Home > front end >  Expected scalar type Double but found Float
Expected scalar type Double but found Float

Time:04-10

I am getting this error while trying to give an input image batch to my Pytorch model

"RuntimeError: Given groups=1, weight of size [64, 3, 4, 4], expected input[5, 96, 96, 3] to have 3 channels, but got 96 channels instead".

I read images with skimage. My images are 96x96 and batch size is 5. Here is my Generator class:

class Generator(nn.Module):
  def __init__(self):
    super().__init__()

    def downsample(input_filters, output_filters, normalize=True):
      layers = [nn.Conv2d(input_filters, output_filters, kernel_size=4, padding=1, stride=2)]

      if normalize:
        layers.append(nn.BatchNorm2d(output_filters, 0.8))
      
      layers.append(nn.LeakyReLU(0.2))

      return layers
    
    def upsample(input_filters, output_filters, normalize=True, last_layer=False):
      layers = [nn.ConvTranspose2d(input_filters, output_filters, kernel_size=4, stride=2, padding=1)]

      if normalize:
        layers.append(nn.BatchNorm2d(output_filters, 0.8))

      if not last_layer:
        layers.append(nn.ReLU())

      return layers
    
    self.model = nn.Sequential(
        *downsample(3, 64, normalize=False), # 96x96
        *downsample(64, 64), # 48x48
        *downsample(64, 128), # 24x24
        *downsample(128, 256), # 12x12
        *downsample(256, 512), # 6x6
        nn.Conv2d(512, 4000, kernel_size=4), # 3x3
        *upsample(4000, 512), # 6x6
        *upsample(512, 256), # 12x12
        *upsample(256, 128), # 24x24
        *upsample(128, 64), # 48x48
        *upsample(64, 64), # 96x96
        *upsample(64, 3, last_layer=True), # 192x192
        nn.Tanh()
    )

  def forward(self, x):
    return self.model(x)
  

Here is my Dataset Class:

class OutpaintDataset(Dataset):
  def __init__(self, data_path, input_size, output_size):
    self.data_path = data_path
    self.input_size = input_size
    self.output_size = output_size
    self.image_names = glob.glob(data_path)

  def outpaint(self, image):
    masked = image
    mask_size = int(self.input_size/2)

    masked[:, :mask_size, :] = 1
    masked[:, -1*mask_size:, :] = 1
    masked[:mask_size, :, :] = 1
    masked[-1*mask_size:, :, :] = 1

    return masked

  def custom_resize(self, image, size):
    return transform.resize(image, (size, size))
  
  def __len__(self):
    return len(self.image_names)
  
  def __getitem__(self, index):
      image = io.imread(self.image_names[index])
      
      # image to size of (96, 96)
      input_image = self.custom_resize(image, self.input_size)
      
      # image to size of (192, 192)
      ground_image = self.custom_resize(image, self.output_size)
      masked_image = self.outpaint(image=ground_image.copy())

      return input_image, masked_image, ground_image

--UPDATE--


I changed the shape of the given image batch from torch.Size([5, 192, 192, 3]) to torch.Size([5, 3, 192, 192]). Now I am getting a new error RuntimeError: expected scalar type Double but found Float.

I reshape and use my images at the following code block

for i, data in enumerate(train_loader, 0):
  input_image, masked_image, ground_image, = data

  reshaped = masked_image.permute(0, 3, 1, 2)
  reshaped = reshaped.type(torch.double)

  output = generator(reshaped)
  break

CodePudding user response:

I usually print model summary using torchinfo library to debug such errors. Your input should be in the form of [5, 3, 192, 192]. If image size is 96x96, your image size before applying the convolution is less than 4x4 so it shows error. this image will show you image size after each of layer with number of parameters

CodePudding user response:

You need to convert 96x96 images to 1x96x96 for one channel and 3x96x96 for three channels. try this [None,...] for each images like below:

>>> torch.rand(96,96).size()
torch.Size([96, 96])

>>> torch.rand(96,96)[None, ...].size()
torch.Size([1, 96, 96])
  • Related