how to increase a size of image in DCGAN in python?-CodePudding

I am trying to generate artificial images in DCGANs of size 128x128. but in all of the examples on the internet, the generated image size is 64x64. and I don't know where I am making a mistake. here is a code. i have taken this code from inter and when I run this code for 64x64 image it run perfectly fine but for 128x128 image, i am getting error about this line code ---> 90 disc_fake = disc(fake.detach()).reshape(-1)

Discriminator and Generator implementation from DCGAN paper

import torch
import torch.nn as nn


class Discriminator(nn.Module):
    def __init__(self, channels_img, features_d):
        super(Discriminator, self).__init__()
        self.disc = nn.Sequential(
            # input: N x channels_img x 64 x 64
            nn.Conv2d(
                channels_img, features_d, kernel_size=4, stride=2, padding=1
            ),
            nn.LeakyReLU(0.2),
            # _block(in_channels, out_channels, kernel_size, stride, padding)
            self._block(features_d, features_d * 2, 4, 2, 1),
            self._block(features_d * 2, features_d * 4, 4, 2, 1),
            self._block(features_d * 4, features_d * 8, 4, 2, 1),
            self._block(features_d * 8, features_d * 16, 4, 2, 1),
            # After all _block img output is 4x4 (Conv2d below makes into 1x1)
            nn.Conv2d(features_d * 16, 1, kernel_size=4, stride=2, padding=0),
            nn.Sigmoid(),
        )

    def _block(self, in_channels, out_channels, kernel_size, stride, padding):
        return nn.Sequential(
            nn.Conv2d(
                in_channels,
                out_channels,
                kernel_size,
                stride,
                padding,
                bias=False,
            ),
            #nn.BatchNorm2d(out_channels),
            nn.LeakyReLU(0.2),
        )

    def forward(self, x):
        return self.disc(x)


class Generator(nn.Module):
    def __init__(self, channels_noise, channels_img, features_g):
        super(Generator, self).__init__()
        self.net = nn.Sequential(
            # Input: N x channels_noise x 1 x 1
            self._block(channels_noise, features_g * 32, 4, 1, 0),
            self._block(features_g * 32, features_g * 16, 4, 1, 1),  # img: 4x4
            self._block(features_g * 16, features_g * 8, 4, 2, 1),  # img: 8x8
            self._block(features_g * 8, features_g * 4, 4, 2, 1),  # img: 16x16
            self._block(features_g * 4, features_g * 2, 4, 2, 1),  # img: 32x32
            nn.ConvTranspose2d(
                features_g * 2, channels_img, kernel_size=4, stride=2, padding=1
            ),
            # Output: N x channels_img x 64 x 64
            nn.Tanh(),
        )

    def _block(self, in_channels, out_channels, kernel_size, stride, padding):
        return nn.Sequential(
            nn.ConvTranspose2d(
                in_channels,
                out_channels,
                kernel_size,
                stride,
                padding,
                bias=False,
            ),
            #nn.BatchNorm2d(out_channels),
            nn.ReLU(),
        )

    def forward(self, x):
        return self.net(x)


def initialize_weights(model):
    # Initializes weights according to the DCGAN paper
    for m in model.modules():
        if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d, nn.BatchNorm2d)):
            nn.init.normal_(m.weight.data, 0.0, 0.02)

Training of DCGAN network on a covid dataset with Discriminator and Generator imported from models.py


import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML

from torch.utils.tensorboard import SummaryWriter




device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
LEARNING_RATE = 0.00025  # could also use two lrs, one for gen and one for disc
BATCH_SIZE = 128
IMAGE_SIZE = 128
CHANNELS_IMG = 3
NOISE_DIM = 128
NUM_EPOCHS = 7500
FEATURES_DISC = 128
FEATURES_GEN = 128


img_list = []
G_loss = []
D_loss = []

transforms = transforms.Compose(
    [
        transforms.Resize(IMAGE_SIZE),
        transforms.ToTensor(),
        transforms.Normalize(
            [0.5 for _ in range(CHANNELS_IMG)], [0.5 for _ in range(CHANNELS_IMG)]
        ),
    ]
)

dataset = datasets.ImageFolder(root="/content/subset", transform=transforms)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
gen = Generator(NOISE_DIM, CHANNELS_IMG, FEATURES_GEN).to(device)
disc = Discriminator(CHANNELS_IMG, FEATURES_DISC).to(device)
initialize_weights(gen)
initialize_weights(disc)

opt_gen = optim.Adam(gen.parameters(), lr=LEARNING_RATE, betas=(0.5, 0.999))
opt_disc = optim.Adam(disc.parameters(), lr=LEARNING_RATE, betas=(0.5, 0.999))
criterion = nn.BCELoss()

fixed_noise = torch.randn(128, NOISE_DIM, 1, 1).to(device)



writer_real = SummaryWriter(f"generated/real11")
writer_fake = SummaryWriter(f"generated/fake11")



step = 0

gen.train()
disc.train()

for epoch in range(NUM_EPOCHS):
    # Target labels not needed! <3 unsupervised
    for batch_idx, (real, _) in enumerate(dataloader):
        real = real.to(device)
        noise = torch.randn(BATCH_SIZE, NOISE_DIM, 1, 1).to(device)
        fake = gen(noise)


        ### Train Discriminator: max log(D(x))   log(1 - D(G(z)))
        disc_real = disc(real).reshape(-1)
        loss_disc_real = criterion(disc_real, torch.ones_like(disc_real))
      

        disc_fake = disc(fake.detach()).reshape(-1)
        loss_disc_fake = criterion(disc_fake, torch.zeros_like(disc_fake))
        loss_disc = (loss_disc_real   loss_disc_fake) / 2
        disc.zero_grad()
        loss_disc.backward()
        opt_disc.step()

        ### Train Generator: min log(1 - D(G(z))) <-> max log(D(G(z))
        output = disc(fake).reshape(-1)
        loss_gen = criterion(output, torch.ones_like(output))
        gen.zero_grad()
        loss_gen.backward()
        opt_gen.step()

        G_loss.append(loss_gen.item())
        D_loss.append(loss_disc.item())

        # Print losses occasionally and print to tensorboard
        if batch_idx in range(BATCH_SIZE):
            print(
                f"Epoch [{epoch}/{NUM_EPOCHS}] Batch {batch_idx}/{len(dataloader)} \
                  Loss D: {loss_disc:.4f}, loss G: {loss_gen:.4f}"
            )
    
            with torch.no_grad():
                fake = gen(fixed_noise)
                # take out (up to) 32 examples
                img_grid_real = torchvision.utils.make_grid(
                    real[:32], normalize=True
                )
                img_grid_fake = torchvision.utils.make_grid(
                    fake[:1], normalize=True
                )
                writer_real.add_image("Real1", img_grid_real, global_step=step)
                for batch_idx in range(BATCH_SIZE):
                  torchvision.utils.save_image(img_grid_fake, f"/content/generated/generated_image/Fake_image-{batch_idx}.png", global_step=step)

                writer_fake.add_image("Fake1", img_grid_fake, global_step=step)
                
              

              

            step  = 1

this is an error I am getting when I run this code.

RuntimeError                              Traceback (most recent call last)
<ipython-input-5-8a323fce319d> in <module>()
     88 
     89 
---> 90         disc_fake = disc(fake.detach()).reshape(-1)
     91         loss_disc_fake = criterion(disc_fake, torch.zeros_like(disc_fake))
     92         loss_disc = (loss_disc_real   loss_disc_fake) / 2
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/conv.py in _conv_forward(self, input, weight, bias)
    442                             _pair(0), self.dilation, self.groups)
    443         return F.conv2d(input, weight, bias, self.stride,
--> 444                         self.padding, self.dilation, self.groups)
    445 
    446     def forward(self, input: Tensor) -> Tensor:

RuntimeError: Calculated padded input size per channel: (2 x 2). Kernel size: (4 x 4). Kernel size can't be greater than actual input size

CodePudding user response：

Your generator is in charge of the output size of the image. It does this by using multiple blocks as the model layers.

As such, if you want to output a different size, you need to make the block layers match your desired output shape by adding another layer like so:

self._block(features_g * 4, features_g * 2, 4, 2, 1),  # img: 32x32
self._block(features_g * 2, features_g, 4, 2, 1),  # img: 64x64 # ADD THIS LINE
nn.ConvTranspose2d(
    features_g * 2, channels_img, kernel_size=4, stride=2, padding=1
),

CodePudding user response：

I checked the code and the issue stems from the fact that you are expecting your Generator to generate images of dimension - 128x3x128x128 (batch_size x channels x image_dim x image_dim). However, the way that you have written the ConvTranspose2d operations, that is not the case.

I checked the output from the intermediate layers and your generator is generating output images of dimension - 128x3x80x80 which is a size mismatch as your Discriminator is expected input images of dimension - 128x3x128x128.

Here are the shapes of the intermediate outputs from your Generator's ConvTranpose2d operations -

torch.Size([128, 4096, 4, 4])
torch.Size([128, 2048, 5, 5])
torch.Size([128, 1024, 10, 10])
torch.Size([128, 512, 20, 20])
torch.Size([128, 256, 40, 40])
torch.Size([128, 3, 80, 80])

I'd suggest, you modify your Generator's ConvTranspose2d parameters as follows -

class Generator(nn.Module):
    def __init__(self, channels_noise, channels_img, features_g):
        super(Generator, self).__init__()
        self.net = nn.Sequential(
            self._block(channels_noise, features_g*32, 4, 1, 0),
            self._block(features_g*32, features_g*16, 4, 2, 1),
            self._block(features_g*16, features_g*8, 4, 2, 1),
            self._block(features_g*8, features_g*4, 4, 2, 1),
            self._block(features_g*4, features_g*2, 4, 2, 1),
            nn.ConvTranspose2d(features_g*2, channels_img, kernel_size=4, stride=2, padding=1),
            # Output: N x channels_img x 64 x 64
            nn.Tanh(),
        )

    def _block(self, in_channels, out_channels, kernel_size, stride, padding):
        return nn.Sequential(
            nn.ConvTranspose2d(
                in_channels,
                out_channels,
                kernel_size,
                stride,
                padding,
                bias=False,
            ),
            #nn.BatchNorm2d(out_channels),
            nn.ReLU(),
        )

    def forward(self, x):
        return self.net(x)

This produces the required dimension of 128x3x128x128. The intermediate dimensions are as follows -

torch.Size([128, 4096, 4, 4])
torch.Size([128, 2048, 8, 8])
torch.Size([128, 1024, 16, 16])
torch.Size([128, 512, 32, 32])
torch.Size([128, 256, 64, 64])
torch.Size([128, 3, 128, 128])

Just replace the Generator with this one and your code should work for images of dimension 3x128x128.