Convolutional Neural Network model predicts no cats-CodePudding

I have trained my first CNN model. I took first 100 images of cats and first 100 images of dogs from Kaggle dataset as my custom dataset.

After the model is trained I'm trying to feed the same images back to the model to see predictions. As result I get score from 0.5 to 0.6 on all images. While I though it should be <0.5 for cats and >0.5 for dogs. Is it a problem of my model architecture, the training process or my dataset is just too small? Why no images gets below 0.5 at all?

Here is my code:

First I generate .csv file to be processed:

import pandas as pd
import os
import torch

device = ("cuda" if torch.cuda.is_available() else "cpu")

train_df = pd.DataFrame(columns=["img_name","label"])
train_df["img_name"] = os.listdir("train/")
for idx, i in enumerate(os.listdir("train/")):
    if "cat" in i:
        train_df["label"][idx] = 0
    if "dog" in i:
        train_df["label"][idx] = 1

train_df.to_csv (r'train_csv.csv', index = False, header=True)

Then I prepare the dataset:

from torch.utils.data import Dataset
import pandas as pd
import os
from PIL import Image
import torch

class CatsAndDogsDataset(Dataset):
    def __init__(self, root_dir, annotation_file, transform=None):
        self.root_dir = root_dir
        self.annotations = pd.read_csv(annotation_file)
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        img_id = self.annotations.iloc[index, 0]
        img = Image.open(os.path.join(self.root_dir, img_id)).convert("RGB")
        y_label = torch.tensor(float(self.annotations.iloc[index, 1]))

        if self.transform is not None:
            img = self.transform(img)

        return (img, y_label)

This is my model:

import torch.nn as nn
import torchvision.models as models

class CNN(nn.Module):
    def __init__(self, train_CNN=False, num_classes=1):
        super(CNN, self).__init__()
        self.train_CNN = train_CNN
        self.inception = models.inception_v3(pretrained=True, aux_logits=False)
        self.inception.fc = nn.Linear(self.inception.fc.in_features, num_classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.sigmoid = nn.Sigmoid()

    def forward(self, images):
        features = self.inception(images)
        return self.sigmoid(self.dropout(self.relu(features))).squeeze(1)

This is my hyper-params, transformations and dataloaders:

from torch.utils.data import DataLoader
import torchvision.transforms as transforms

num_epochs = 10
learning_rate = 0.00001
train_CNN = False
batch_size = 32
shuffle = True
pin_memory = True
num_workers = 0
transform = transforms.Compose(
        [
            transforms.Resize((356, 356)),
            transforms.CenterCrop((299, 299)),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
        ]
    )
dataset = CatsAndDogsDataset("train","train_csv.csv",transform=transform)
train_size = int(0.8 * len(dataset))
validation_size = len(dataset) - train_size
train_set, validation_set = torch.utils.data.random_split(dataset, [train_size, validation_size])
train_loader = DataLoader(dataset=train_set, shuffle=shuffle, batch_size=batch_size,num_workers=num_workers,pin_memory=pin_memory)
validation_loader = DataLoader(dataset=validation_set, shuffle=shuffle, batch_size=batch_size,num_workers=num_workers, pin_memory=pin_memory)

model = CNN().to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for name, param in model.inception.named_parameters():
    if "fc.weight" in name or "fc.bias" in name:
        param.requires_grad = True
    else:
        param.requires_grad = train_CNN

and accuracy check:

def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)

            scores = model(x)
            predictions = torch.tensor([1.0 if i >= 0.5 else 0.0 for i in scores]).to(device)
            num_correct  = (predictions == y).sum()
            num_samples  = predictions.size(0)
    model.train()
    return f"{float(num_correct)/float(num_samples)*100:.2f}"

And this is my training function:

from tqdm import tqdm

def train():
    model.train()
    for epoch in range(num_epochs):
        loop = tqdm(train_loader, total = len(train_loader), leave = True)
        for imgs, labels in loop:
            imgs = imgs.to(device)
            labels = labels.to(device)
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loop.set_description(f"Epoch [{epoch}/{num_epochs}]")
            loop.set_postfix(loss = loss.item(), val_acc = check_accuracy(validation_loader, model))
if __name__ == "__main__":
    train()

Epoch [0/10]: 100%|██████████| 6/6 [12:00<00:00, 120.10s/it, loss=0.652, val_acc=39.02]
Epoch [1/10]: 100%|██████████| 6/6 [11:51<00:00, 118.61s/it, loss=0.497, val_acc=39.02]
Epoch [2/10]: 100%|██████████| 6/6 [11:27<00:00, 114.51s/it, loss=0.693, val_acc=39.02]
Epoch [3/10]: 100%|██████████| 6/6 [11:04<00:00, 110.77s/it, loss=0.531, val_acc=39.02]
Epoch [4/10]: 100%|██████████| 6/6 [10:58<00:00, 109.68s/it, loss=0.693, val_acc=39.02]
Epoch [5/10]: 100%|██████████| 6/6 [12:03<00:00, 120.51s/it, loss=0.803, val_acc=39.02]
Epoch [6/10]: 100%|██████████| 6/6 [11:33<00:00, 115.62s/it, loss=0.693, val_acc=39.02]
Epoch [7/10]: 100%|██████████| 6/6 [11:27<00:00, 114.56s/it, loss=0.675, val_acc=39.02]
Epoch [8/10]: 100%|██████████| 6/6 [11:42<00:00, 117.10s/it, loss=0.806, val_acc=39.02]
Epoch [9/10]: 100%|██████████| 6/6 [12:15<00:00, 122.58s/it, loss=0.768, val_acc=39.02]

Then I loop through the model checking predictions on each image (the dataset variable is available because it is in the same Jupyter Notebook):

import numpy as np

with torch.no_grad():
  for index in range(len(dataset)):
    item = dataset[index]
    image_tensor = item[0]
    true_target = item[1]
    img_np = np.array(image_tensor)
    img_normalized = img_np.transpose(1, 2, 0)
    image = torch.unsqueeze(image_tensor, 0)
    prediction = model(image)
    predicted_class = prediction[0]
    print("class: "   str(true_target.item())   " score: "   str(predicted_class.item()))

The output:

class: 0.0 score: 0.547210156917572
class: 0.0 score: 0.5
class: 0.0 score: 0.5348594188690186
class: 0.0 score: 0.5336627960205078
class: 0.0 score: 0.5178861618041992
class: 0.0 score: 0.5692692995071411
class: 0.0 score: 0.5
class: 0.0 score: 0.5381814241409302
class: 0.0 score: 0.54604572057724
class: 0.0 score: 0.5157472491264343
class: 0.0 score: 0.5257323980331421
class: 0.0 score: 0.5137990713119507
class: 0.0 score: 0.5247158408164978
class: 0.0 score: 0.5320644378662109
class: 0.0 score: 0.5775637626647949
class: 0.0 score: 0.528205156326294
class: 0.0 score: 0.5457945466041565
class: 0.0 score: 0.5301501154899597
class: 0.0 score: 0.5102765560150146
class: 0.0 score: 0.5069065690040588
class: 0.0 score: 0.519408106803894
class: 0.0 score: 0.5414850115776062
class: 0.0 score: 0.5041879415512085
class: 0.0 score: 0.5055546760559082
show more (open the raw output data in a text editor) ...
class: 1.0 score: 0.5
class: 1.0 score: 0.5
class: 1.0 score: 0.5166758894920349
class: 1.0 score: 0.5343206524848938
class: 1.0 score: 0.5716230869293213

So no cats get predicted

CodePudding user response：

can you change your model arch to this(just remove dropout and relu)

import torch.nn as nn
import torchvision.models as models

class CNN(nn.Module):
    def __init__(self, train_CNN=False, num_classes=1):
        super(CNN, self).__init__()
        self.train_CNN = train_CNN
        self.inception = models.inception_v3(pretrained=True, aux_logits=False)
        self.inception.fc = nn.Linear(self.inception.fc.in_features, num_classes)
        self.dropout = nn.Dropout(0.5)
        self.sigmoid = nn.Sigmoid()

    def forward(self, images):
        features = self.inception(images)
        return self.sigmoid(features).squeeze(1)

And just try with model.eval() before doing inference, Since u have used dropout