I am trying to replace standardized data from pytorch such as MNIST and CIFAR with unlabeled custom images in png format in a simple GAN. Unfortunately most examples always use such datasets and dont show the process of preparing and implementing custom data into GANs. I have stored my png-images (336*336, RGB) in the working directory of VS Code. Could you please provide me with a suggestion on how to go forward? Below you find the current code where I would like to replace mnist with my own images to generate new images (from #Preparing Training Data to #Plotting Samples:
import torch
from torch import nn
import math
import matplotlib.pyplot as plt
import torchvision
import torchvision.transforms as transforms
torch.manual_seed(111)
# DEVICE
device = ""
if torch.cuda.is_available():
device = torch.device("cuda")
else:
device = torch.device("cpu")
print(device)
***# PREPARING TRAINING DATA
transform = transforms.Compose(
[transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]
)
# LOADING DATA
train_set = torchvision.datasets.MNIST(
root=".", train=True, download=True, transform=transform
)
# CREATE DATALOADER
batch_size = 32
train_loader = torch.utils.data.DataLoader(
train_set, batch_size=batch_size, shuffle=True
)***
# PLOTTING SAMPLES
real_samples, mnist_labels = next(iter(train_loader))
for i in range(16):
ax = plt.subplot(4, 4, i 1)
plt.imshow(real_samples[i].reshape(28, 28), cmap="gray_r")
plt.xticks([])
plt.yticks([])
plt.show()´
# IMPLEMENTING DISCRIMINATOR AND GENERATOR
class Discriminator(nn.Module):
def __init__(self):
super().__init__()
self.model = nn.Sequential(
nn.Linear(784, 1024),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(1024, 512),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(512, 256),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(256, 1),
nn.Sigmoid(),
)
def forward(self, x):
x = x.view(x.size(0), 784)
output = self.model(x)
return output
discriminator = Discriminator().to(device=device)
class Generator(nn.Module):
def __init__(self):
super().__init__()
self.model = nn.Sequential(
nn.Linear(100, 256),
nn.ReLU(),
nn.Linear(256, 512),
nn.ReLU(),
nn.Linear(512, 1024),
nn.ReLU(),
nn.Linear(1024, 784),
nn.Tanh(),
)
def forward(self, x):
output = self.model(x)
output = output.view(x.size(0), 1, 28, 28)
return output
generator = Generator().to(device=device)
# TRAINING PARAMS
lr = 0.0001
num_epochs = 100
loss_function = nn.BCELoss()
optimizer_discriminator = torch.optim.Adam(discriminator.parameters(), lr=lr)
optimizer_generator = torch.optim.Adam(generator.parameters(), lr=lr)
# TRAINING LOOP
for epoch in range(num_epochs):
for n, (real_samples, mnist_labels) in enumerate(train_loader):
# Data for training the discriminator
real_samples = real_samples.to(device=device)
real_samples_labels = torch.ones((batch_size, 1)).to(
device=device
)
latent_space_samples = torch.randn((batch_size, 100)).to(
device=device
)
generated_samples = generator(latent_space_samples)
generated_samples_labels = torch.zeros((batch_size, 1)).to(
device=device
)
all_samples = torch.cat((real_samples, generated_samples))
all_samples_labels = torch.cat(
(real_samples_labels, generated_samples_labels)
)
# Training the discriminator
discriminator.zero_grad()
output_discriminator = discriminator(all_samples)
loss_discriminator = loss_function(
output_discriminator, all_samples_labels
)
loss_discriminator.backward()
optimizer_discriminator.step()
# Data for training the generator
latent_space_samples = torch.randn((batch_size, 100)).to(
device=device
)
# Training the generator
generator.zero_grad()
generated_samples = generator(latent_space_samples)
output_discriminator_generated = discriminator(generated_samples)
loss_generator = loss_function(
output_discriminator_generated, real_samples_labels
)
loss_generator.backward()
optimizer_generator.step()
# Show loss
if n == batch_size - 1:
print(f"Epoch: {epoch} Loss D.: {loss_discriminator}")
print(f"Epoch: {epoch} Loss G.: {loss_generator}")
# SAMPLES
latent_space_samples = torch.randn(batch_size, 100).to(device=device)
generated_samples = generator(latent_space_samples)
generated_samples = generated_samples.cpu().detach()
for i in range(16):
ax = plt.subplot(4, 4, i 1)
plt.imshow(generated_samples[i].reshape(28, 28), cmap="gray_r")
plt.xticks([])
plt.yticks([])
plt.show()´´´
CodePudding user response:
In the example that you shared above, you are trying to train your generator on single-channel images. Specifically, your Generator and Discriminator layers are written to handle images of dimension 1x28x28
which are the dimensions of MNIST or Fashion-MNIST datasets.
I am supposing that you are trying to train color images (3 channels) or a different dimension, in your case - 3x336x336
. In your example, I have added a tensor transform
that first converts an input image of any dimension to an image of dimension - 3x28x28
.
Here are the code examples for creating the custom dataset and custom dataloader.
from glob import glob
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from skimage import io
path = 'your/image/path'
image_paths = glob(path '/*.jpg')
img_size = 28
batch_size = 32
transform = transforms.Compose(
[
transforms.ToPILImage(),
transforms.Resize(img_size),
transforms.CenterCrop(img_size),
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
]
)
class ImageDataset(Dataset):
def __init__(self, paths, transform):
self.paths = paths
self.transform = transform
def __len__(self):
return len(self.paths)
def __getitem__(self, index):
image_path = self.paths[index]
image = io.imread(image_path)
if self.transform:
image_tensor = self.transform(image)
return image_tensor
dataset = ImageDataset(image_paths, transform)
train_loader = DataLoader(dataset, batch_size=batch_size, num_workers=1, shuffle=True)
The dataloader generates image tensors of dimension - batch_size x img_channels x img_dim x img_dim
which in this case would be - 32x3x28x28
.
import torch
import torch.nn as nn
device = 'cuda' if torch.cuda.is_available() else 'cpu'
class Discriminator(nn.Module):
def __init__(self):
super().__init__()
self.model = nn.Sequential(
nn.Linear(784*3, 2048),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(2048, 1024),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(1024, 512),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(512, 256),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(256, 1),
nn.Sigmoid(),
)
def forward(self, x):
x = x.view(x.size(0), 784*3) # change required for 3 channel image
output = self.model(x)
return output
discriminator = Discriminator().to(device=device)
class Generator(nn.Module):
def __init__(self):
super().__init__()
self.model = nn.Sequential(
nn.Linear(100, 256),
nn.ReLU(),
nn.Linear(256, 512),
nn.ReLU(),
nn.Linear(512, 1024),
nn.ReLU(),
nn.Linear(1024, 2048),
nn.ReLU(),
nn.Linear(2048, 784*3),
nn.Tanh(),
)
def forward(self, x):
output = self.model(x)
output = output.view(x.size(0), 3, 28, 28)
return output
generator = Generator().to(device=device)
# TRAINING PARAMS
lr = 0.0001
num_epochs = 100
loss_function = nn.BCELoss()
optimizer_discriminator = torch.optim.Adam(discriminator.parameters(), lr=lr)
optimizer_generator = torch.optim.Adam(generator.parameters(), lr=lr)
This is the code for Generator and Discriminator. I have made slight modifications to the Generator and Discriminator. Notice the addition of the following layers in the Discriminator
nn.Linear(784*3, 2048),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(2048, 1024),
and these in the Generator
nn.Linear(1024, 2048),
nn.ReLU(),
nn.Linear(2048, 784*3)
This is required to generate and discriminate images of the correct dimension.
Finally, this is your training loop -
for epoch in range(num_epochs):
for n, real_samples in enumerate(train_loader):
# Data for training the discriminator
real_samples = real_samples.to(device=device)
real_samples_labels = torch.ones((batch_size, 1)).to(
device=device
)
latent_space_samples = torch.randn((batch_size, 100)).to(
device=device
)
print(f'Latent space samples : {latent_space_samples.shape}')
generated_samples = generator(latent_space_samples)
generated_samples_labels = torch.zeros((batch_size, 1)).to(
device=device
)
all_samples = torch.cat((real_samples, generated_samples))
print(f'Real samples : {real_samples.shape}, generated samples : {generated_samples.shape}')
all_samples_labels = torch.cat(
(real_samples_labels, generated_samples_labels)
)
# Training the discriminator
discriminator.zero_grad()
output_discriminator = discriminator(all_samples)
loss_discriminator = loss_function(
output_discriminator, all_samples_labels
)
loss_discriminator.backward()
optimizer_discriminator.step()
# Data for training the generator
latent_space_samples = torch.randn((batch_size, 100)).to(
device=device
)
# Training the generator
generator.zero_grad()
generated_samples = generator(latent_space_samples)
output_discriminator_generated = discriminator(generated_samples)
loss_generator = loss_function(
output_discriminator_generated, real_samples_labels
)
loss_generator.backward()
optimizer_generator.step()
# Show loss
if n == batch_size - 1:
print(f"Epoch: {epoch} Loss D.: {loss_discriminator}")
print(f"Epoch: {epoch} Loss G.: {loss_generator}")
This works because the images are reshaped from the 784*3
to the 3*28*28
dimension.
This would work but if you are handling images of 3 channels, you would need to write ConvTranspose2d
and Conv2d
operations in your Generator and Discriminator for upsampling and downsampling the image respectively.
If you are interested in an example that uses ConvTranspose2d
and Conv2d
for processing multidimensional images, here it is - https://drive.google.com/file/d/1gYiBHPu-r3kialO0klsTdE2RjBR50rMs/view?usp=sharing. To handle images of different dimensions, you would have to modify the layers in the Generator and Discriminator classes.