In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim

import torchvision
from torchvision.transforms import v2
import torchvision.models as models
from torchvision.models import resnet152, ResNet152_Weights

from PIL import Image

## Prepare Data

In [2]:
train_data_path = './train'
test_data_path = './test'
val_data_path = './val'

transform = v2.Compose([
    v2.Resize((64, 64)),
    v2.ToImage(),
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_data = torchvision.datasets.ImageFolder(root=train_data_path, transform=transform)
test_data = torchvision.datasets.ImageFolder(root=test_data_path, transform=transform)
val_data = torchvision.datasets.ImageFolder(root=val_data_path, transform=transform)


In [3]:
batch_size = 64
num_workers = 4

train_loader = torch.utils.data.DataLoader(
    dataset=train_data,
    shuffle=True,
    batch_size=batch_size,
    num_workers=num_workers
)

test_loader = torch.utils.data.DataLoader(
    dataset=test_data,
    shuffle=True,
    batch_size=batch_size,
    num_workers=num_workers
)

val_loader = torch.utils.data.DataLoader(
    dataset=val_data,
    shuffle=True,
    batch_size=batch_size,
    num_workers=num_workers
)

## CNN Model (AlexNet from Scratch)

In [4]:
class CNNNet(nn.Module):
    def __init__(self, num_classes=2):
        super(CNNNet, self).__init__()

        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6,6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256*6*6, 4096),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Linear(4096, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)

        return x


In [5]:
device = torch.device('mps') if torch.backends.mps.is_available() else torch.device('cpu')
device


device(type='mps')

In [6]:
cnnnet = CNNNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnnnet.parameters(), lr=0.001)

## Model Training

In [7]:
epochs=20
def train(model=cnnnet, optimizer=optimizer, loss_fn=criterion, train_loader=train_loader, val_loader=val_loader, device=device, epochs=epochs):
    for epoch in range(epochs):
        train_loss = 0.0
        val_loss = 0.0
        model.train()
        for inputs, target in train_loader:
            optimizer.zero_grad()
            inputs = inputs.to(device)
            target = target.to(device)
            output = model(inputs)
            loss = loss_fn(output, target)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
        train_loss /= len(train_loader)

        num_correct = 0.0
        num_examples = 0.0
        model.eval()
        for inputs, target in val_loader:
            inputs = inputs.to(device)
            target = target.to(device)
            output = model(inputs)
            loss = loss_fn(output, target)

            val_loss += loss.item()
            correct = torch.eq(torch.max(F.softmax(output), dim=1)[1], target).view(-1)
            num_correct += torch.sum(correct).item()
            num_examples += correct.shape[0]
        val_loss /= len(val_loader)

        print(f"Epoch: {epoch+1}, Train Loss: {train_loss:.2f}, Val Loss: {val_loss:.2f}, Accuracy: {(num_correct/num_examples):.2f}")

train()

  correct = torch.eq(torch.max(F.softmax(output), dim=1)[1], target).view(-1)


Epoch: 1, Train Loss: 1.41, Val Loss: 0.66, Accuracy: 0.87
Epoch: 2, Train Loss: 0.68, Val Loss: 0.93, Accuracy: 0.29
Epoch: 3, Train Loss: 0.61, Val Loss: 0.81, Accuracy: 0.46
Epoch: 4, Train Loss: 0.52, Val Loss: 0.42, Accuracy: 0.79
Epoch: 5, Train Loss: 0.48, Val Loss: 0.33, Accuracy: 0.89
Epoch: 6, Train Loss: 0.50, Val Loss: 0.46, Accuracy: 0.71
Epoch: 7, Train Loss: 0.44, Val Loss: 0.32, Accuracy: 0.83
Epoch: 8, Train Loss: 0.42, Val Loss: 0.40, Accuracy: 0.77
Epoch: 9, Train Loss: 0.38, Val Loss: 0.37, Accuracy: 0.81
Epoch: 10, Train Loss: 0.39, Val Loss: 0.30, Accuracy: 0.87
Epoch: 11, Train Loss: 0.39, Val Loss: 0.42, Accuracy: 0.78
Epoch: 12, Train Loss: 0.35, Val Loss: 0.38, Accuracy: 0.84
Epoch: 13, Train Loss: 0.33, Val Loss: 0.27, Accuracy: 0.89
Epoch: 14, Train Loss: 0.29, Val Loss: 0.85, Accuracy: 0.57
Epoch: 15, Train Loss: 0.44, Val Loss: 0.60, Accuracy: 0.59
Epoch: 16, Train Loss: 0.37, Val Loss: 0.64, Accuracy: 0.68
Epoch: 17, Train Loss: 0.28, Val Loss: 0.45, Accu

## PreTrained Models

In [None]:
alexnet = models.alexnet(weights=models.AlexNet_Weights.IMAGENET1K_V1)
alexnet.classifier[6] = nn.Linear(alexnet.classifier[6].in_features, 2)
alexnet.to(device)

In [51]:
resnet = resnet152(weights=ResNet152_Weights.IMAGENET1K_V2)
resnet

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

## Test Model

In [50]:
labels = ['Cat', 'Fish']

img = Image.open('./test/cat/41775356_59e1d64431.jpg')
img = transform(img)
img = img.unsqueeze(0)

output = cnnnet(img.to(device))
pred = output.argmax()
print(f"CNNNet Predicted: {labels[pred.item()]}")

output = alexnet(img.to(device))
pred = output.argmax()
print(f"AlexNet Predicted: {labels[pred.item()]}")

CNNNet Predicted: Cat
AlexNet Predicted: Cat


In [49]:
img = Image.open('./test/fish/50081802_bf4d207cd5.jpg')
img = transform(img)
img = img.unsqueeze(0)

output = cnnnet(img.to(device))
pred = output.argmax()
print(f"CNNNet Predicted: {labels[pred.item()]}")

output = alexnet(img.to(device))
pred = output.argmax()
print(f"AlexNet Predicted: {labels[pred.item()]}")

CNNNet Predicted: Fish
AlexNet Predicted: Cat
