Using unlabelled custom images instead of Mnist and CIFAR for a simple GAN with Pytorch-CodePudding

I am trying to replace standardized data from pytorch such as MNIST and CIFAR with unlabeled custom images in png format in a simple GAN. Unfortunately most examples always use such datasets and dont show the process of preparing and implementing custom data into GANs. I have stored my png-images (336*336, RGB) in the working directory of VS Code. Could you please provide me with a suggestion on how to go forward? Below you find the current code where I would like to replace mnist with my own images to generate new images (from #Preparing Training Data to #Plotting Samples:

import torch
from torch import nn

import math
import matplotlib.pyplot as plt
import torchvision
import torchvision.transforms as transforms

torch.manual_seed(111)

# DEVICE
device = ""
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(device)

***# PREPARING TRAINING DATA

transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]
)

#  LOADING DATA
train_set = torchvision.datasets.MNIST(
    root=".", train=True, download=True, transform=transform
)

# CREATE DATALOADER

batch_size = 32
train_loader = torch.utils.data.DataLoader(
    train_set, batch_size=batch_size, shuffle=True
)***

# PLOTTING SAMPLES

real_samples, mnist_labels = next(iter(train_loader))
for i in range(16):
    ax = plt.subplot(4, 4, i   1)
    plt.imshow(real_samples[i].reshape(28, 28), cmap="gray_r")
    plt.xticks([])
    plt.yticks([])
    plt.show()´

# IMPLEMENTING DISCRIMINATOR AND GENERATOR


class Discriminator(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(784, 1024),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 1),
            nn.Sigmoid(),
        )

    def forward(self, x):
        x = x.view(x.size(0), 784)
        output = self.model(x)
        return output


discriminator = Discriminator().to(device=device)


class Generator(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(100, 256),
            nn.ReLU(),
            nn.Linear(256, 512),
            nn.ReLU(),
            nn.Linear(512, 1024),
            nn.ReLU(),
            nn.Linear(1024, 784),
            nn.Tanh(),
        )

    def forward(self, x):
        output = self.model(x)
        output = output.view(x.size(0), 1, 28, 28)
        return output


generator = Generator().to(device=device)

# TRAINING PARAMS

lr = 0.0001
num_epochs = 100
loss_function = nn.BCELoss()

optimizer_discriminator = torch.optim.Adam(discriminator.parameters(), lr=lr)
optimizer_generator = torch.optim.Adam(generator.parameters(), lr=lr)

# TRAINING LOOP

for epoch in range(num_epochs):
    for n, (real_samples, mnist_labels) in enumerate(train_loader):
        # Data for training the discriminator
        real_samples = real_samples.to(device=device)
        real_samples_labels = torch.ones((batch_size, 1)).to(
            device=device
        )
        latent_space_samples = torch.randn((batch_size, 100)).to(
            device=device
        )
        generated_samples = generator(latent_space_samples)
        generated_samples_labels = torch.zeros((batch_size, 1)).to(
            device=device
        )
        all_samples = torch.cat((real_samples, generated_samples))
        all_samples_labels = torch.cat(
            (real_samples_labels, generated_samples_labels)
        )

        # Training the discriminator
        discriminator.zero_grad()
        output_discriminator = discriminator(all_samples)
        loss_discriminator = loss_function(
            output_discriminator, all_samples_labels
        )
        loss_discriminator.backward()
        optimizer_discriminator.step()

        # Data for training the generator
        latent_space_samples = torch.randn((batch_size, 100)).to(
            device=device
        )

        # Training the generator
        generator.zero_grad()
        generated_samples = generator(latent_space_samples)
        output_discriminator_generated = discriminator(generated_samples)
        loss_generator = loss_function(
            output_discriminator_generated, real_samples_labels
        )
        loss_generator.backward()
        optimizer_generator.step()

        # Show loss
        if n == batch_size - 1:
            print(f"Epoch: {epoch} Loss D.: {loss_discriminator}")
            print(f"Epoch: {epoch} Loss G.: {loss_generator}")

# SAMPLES

latent_space_samples = torch.randn(batch_size, 100).to(device=device)
generated_samples = generator(latent_space_samples)

generated_samples = generated_samples.cpu().detach()
for i in range(16):
    ax = plt.subplot(4, 4, i   1)
    plt.imshow(generated_samples[i].reshape(28, 28), cmap="gray_r")
    plt.xticks([])
    plt.yticks([])
    plt.show()´´´

CodePudding user response：

In the example that you shared above, you are trying to train your generator on single-channel images. Specifically, your Generator and Discriminator layers are written to handle images of dimension 1x28x28 which are the dimensions of MNIST or Fashion-MNIST datasets.

I am supposing that you are trying to train color images (3 channels) or a different dimension, in your case - 3x336x336. In your example, I have added a tensor transform that first converts an input image of any dimension to an image of dimension - 3x28x28.

Here are the code examples for creating the custom dataset and custom dataloader.

from glob import glob
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from skimage import io

path = 'your/image/path'
image_paths = glob(path   '/*.jpg')

img_size = 28
batch_size = 32

transform = transforms.Compose(
        [
            transforms.ToPILImage(),
            transforms.Resize(img_size),
            transforms.CenterCrop(img_size),
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
        ]
    )

class ImageDataset(Dataset):
    def __init__(self, paths, transform):
        self.paths = paths
        self.transform = transform
        
    def __len__(self):
        return len(self.paths)
    

    def __getitem__(self, index):
        image_path = self.paths[index]
        image = io.imread(image_path)
        
        if self.transform:
            image_tensor = self.transform(image)
            
        return image_tensor

dataset = ImageDataset(image_paths, transform)
    
train_loader = DataLoader(dataset, batch_size=batch_size, num_workers=1, shuffle=True)

The dataloader generates image tensors of dimension - batch_size x img_channels x img_dim x img_dim which in this case would be - 32x3x28x28.

import torch
import torch.nn as nn

device = 'cuda' if torch.cuda.is_available() else 'cpu'

class Discriminator(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(784*3, 2048),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(2048, 1024),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 1),
            nn.Sigmoid(),
        )

    def forward(self, x):
        x = x.view(x.size(0), 784*3) # change required for 3 channel image
        output = self.model(x)
        return output


discriminator = Discriminator().to(device=device)


class Generator(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(100, 256),
            nn.ReLU(),
            nn.Linear(256, 512),
            nn.ReLU(),
            nn.Linear(512, 1024),
            nn.ReLU(),
            nn.Linear(1024, 2048),
            nn.ReLU(),
            nn.Linear(2048, 784*3),
            nn.Tanh(),
        )

    def forward(self, x):
        output = self.model(x)
        output = output.view(x.size(0), 3, 28, 28)
        return output


generator = Generator().to(device=device)

# TRAINING PARAMS

lr = 0.0001
num_epochs = 100
loss_function = nn.BCELoss()

optimizer_discriminator = torch.optim.Adam(discriminator.parameters(), lr=lr)
optimizer_generator = torch.optim.Adam(generator.parameters(), lr=lr)

This is the code for Generator and Discriminator. I have made slight modifications to the Generator and Discriminator. Notice the addition of the following layers in the Discriminator

nn.Linear(784*3, 2048),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(2048, 1024),

and these in the Generator

nn.Linear(1024, 2048),
nn.ReLU(),
nn.Linear(2048, 784*3)

This is required to generate and discriminate images of the correct dimension.

Finally, this is your training loop -

for epoch in range(num_epochs):
    for n, real_samples in enumerate(train_loader):
        # Data for training the discriminator
        real_samples = real_samples.to(device=device)
        real_samples_labels = torch.ones((batch_size, 1)).to(
            device=device
        )
        latent_space_samples = torch.randn((batch_size, 100)).to(
            device=device
        )
        print(f'Latent space samples : {latent_space_samples.shape}')
        generated_samples = generator(latent_space_samples)
        generated_samples_labels = torch.zeros((batch_size, 1)).to(
            device=device
        )
        all_samples = torch.cat((real_samples, generated_samples))
        print(f'Real samples : {real_samples.shape}, generated samples : {generated_samples.shape}')
        all_samples_labels = torch.cat(
            (real_samples_labels, generated_samples_labels)
        )

        # Training the discriminator
        discriminator.zero_grad()
        output_discriminator = discriminator(all_samples)
        loss_discriminator = loss_function(
            output_discriminator, all_samples_labels
        )
        loss_discriminator.backward()
        optimizer_discriminator.step()

        # Data for training the generator
        latent_space_samples = torch.randn((batch_size, 100)).to(
            device=device
        )

        # Training the generator
        generator.zero_grad()
        generated_samples = generator(latent_space_samples)
        output_discriminator_generated = discriminator(generated_samples)
        loss_generator = loss_function(
            output_discriminator_generated, real_samples_labels
        )
        loss_generator.backward()
        optimizer_generator.step()

        # Show loss
        if n == batch_size - 1:
            print(f"Epoch: {epoch} Loss D.: {loss_discriminator}")
            print(f"Epoch: {epoch} Loss G.: {loss_generator}")

This works because the images are reshaped from the 784*3 to the 3*28*28 dimension.

This would work but if you are handling images of 3 channels, you would need to write ConvTranspose2d and Conv2d operations in your Generator and Discriminator for upsampling and downsampling the image respectively.

If you are interested in an example that uses ConvTranspose2d and Conv2d for processing multidimensional images, here it is - https://drive.google.com/file/d/1gYiBHPu-r3kialO0klsTdE2RjBR50rMs/view?usp=sharing. To handle images of different dimensions, you would have to modify the layers in the Generator and Discriminator classes.