In [2]:
import torch
import torch.nn.functional as F
import torchvision.transforms as transforms
# import torchvision.models as models
import torchvision
import torch.nn as nn

# import captum
# from captum.attr import IntegratedGradients, Occlusion, LayerGradCam, LayerAttribution
# from captum.attr import visualization as viz

import os
# import sys
# import json

# import numpy as np
# from PIL import Image
import matplotlib.pyplot as plt
# from matplotlib.colors import LinearSegmentedColormap

In [3]:
base_dir = "dogscats/subset"
train_dir = os.path.join(base_dir, "train")
train_cats_dir = os.path.join(base_dir, "train", "cats")
train_dogs_dir = os.path.join(base_dir, "train", "dogs")
validation_dir = os.path.join(base_dir, "validation")
test_dir = os.path.join(base_dir, "test")

In [4]:
transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Resize((150, 150)),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.299, 0.224, 0.225]),
    ]
)


trainset = torchvision.datasets.ImageFolder("subset/train", transform=transform)
testset = torchvision.datasets.ImageFolder("subset/test", transform=transform)

batch_size = 64
trainloader = torch.utils.data.DataLoader(trainset, batch_size, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size, shuffle=True)

In [5]:
def make_train_step(model, optimizer, loss_fn):
    def train_step(x, y):
        yhat = model(x)
        model.train()
        loss = loss_fn(yhat, y)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        return loss
    return train_step

In [6]:
class CatDogClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d((2, 2)),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d((2, 2)),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Flatten(),
            nn.Linear(10368, 512),
            nn.ReLU(),
            nn.Linear(512, 1),
            nn.Sigmoid(),
        )

    def forward(self, xb):
        return self.network(xb)

In [8]:
from torchvision import datasets, models

# model = CatDogClassifier()
# model.load_state_dict(torch.load("catdogmodel", weights_only=True))
model = models.resnet18(pretrained=True)

for params in model.parameters():
    params.requires_grad_ = False

nr_filters = model.fc.in_features
model.fc = nn.Linear(nr_filters, 1)



In [9]:
from torch.nn.modules.loss import BCEWithLogitsLoss
from torch.optim import lr_scheduler

loss_fn = BCEWithLogitsLoss()

optimizer = torch.optim.SGD(model.fc.parameters())

train_step = make_train_step(model, optimizer, loss_fn)

In [None]:
from tqdm import tqdm


losses = []
val_losses = []

epoch_train_losses = []
epoch_test_losses = []

n_epochs = 10
early_stopping_tolerance = 3
early_stopping_threshold = 0.03

for epoch in range(n_epochs):
    epoch_loss = 0
    for i, data in tqdm(
        enumerate(trainloader), total=len(trainloader)
    ):  # iterate ove batches
        x_batch, y_batch = data
        y_batch = y_batch.unsqueeze(1).float()  # convert target to same nn output shape

        loss = train_step(x_batch, y_batch)
        epoch_loss += loss / len(trainloader)
        losses.append(loss)

    epoch_train_losses.append(epoch_loss)
    print("\nEpoch : {}, train loss : {}".format(epoch + 1, epoch_loss))

    # validation doesnt requires gradient
    with torch.no_grad():
        cum_loss = 0
        for x_batch, y_batch in testloader:
            y_batch = y_batch.unsqueeze(
                1
            ).float()  # convert target to same nn output shape

            # model to eval mode
            model.eval()

            yhat = model(x_batch)
            val_loss = loss_fn(yhat, y_batch)
            cum_loss += loss / len(testloader)
            val_losses.append(val_loss.item())

        epoch_test_losses.append(cum_loss)
        print("Epoch : {}, val loss : {}".format(epoch + 1, cum_loss))

        best_loss = min(epoch_test_losses)

        # save best model
        if cum_loss <= best_loss:
            best_model_wts = model.state_dict()

        # early stopping
        early_stopping_counter = 0
        if cum_loss > best_loss:
            early_stopping_counter += 1

        if (early_stopping_counter == early_stopping_tolerance) or (
            best_loss <= early_stopping_threshold
        ):
            print("/nTerminating: early stopping")
            break  # terminate training

# load best model
model.load_state_dict(best_model_wts)


In [10]:
import torchsummary
from torchsummary import summary
summary(model, input_size=(3, 150, 150))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 75, 75]           9,408
       BatchNorm2d-2           [-1, 64, 75, 75]             128
              ReLU-3           [-1, 64, 75, 75]               0
         MaxPool2d-4           [-1, 64, 38, 38]               0
            Conv2d-5           [-1, 64, 38, 38]          36,864
       BatchNorm2d-6           [-1, 64, 38, 38]             128
              ReLU-7           [-1, 64, 38, 38]               0
            Conv2d-8           [-1, 64, 38, 38]          36,864
       BatchNorm2d-9           [-1, 64, 38, 38]             128
             ReLU-10           [-1, 64, 38, 38]               0
       BasicBlock-11           [-1, 64, 38, 38]               0
           Conv2d-12           [-1, 64, 38, 38]          36,864
      BatchNorm2d-13           [-1, 64, 38, 38]             128
             ReLU-14           [-1, 64,