In [1]:
import torch
import torch.nn as nn
from torchvision import models, transforms, datasets
from torch.utils.data import DataLoader, random_split
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

In [None]:
import os 
from PIL import Image
import matplotlib.pyplot as plt
from collections import Counter

In [5]:
dataset_dir = '/kaggle/input/eggs-images-classification-damaged-or-not/Eggs Classification'

classes = os.listdir(dataset_dir)
print(f"classes: {classes}")

for cls in classes:
    cls_path = os.path.join(dataset_dir, cls)
    print(f"{cls}: {len(os.listdir(cls_path))} images")

classes: ['Damaged', 'Not Damaged']
Damaged: 632 images
Not Damaged: 162 images


In [11]:
train_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(), # Change shape (C, H, W)
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [12]:
# Load full dataset
full_dataset = datasets.ImageFolder(root='/kaggle/input/eggs-images-classification-damaged-or-not/Eggs Classification', transform=train_transform)

# Split data 
train_ratio = 0.8
val_ratio = 0.2
total_size = len(full_dataset)
train_size = int(total_size * train_ratio)
val_size = total_size -  train_size

train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])
val_dataset.dataset.transform = val_transform # Override train_transform

# Create dataloader
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [13]:
# Load model (a prestrain model)
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)

num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 2) # for normal and break

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [14]:
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels =  images.to(device), labels.to(device)
        optimizer.zero_grad() # Clear the previous gradient
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward() # Compute gradient (backpropagation)
        optimizer.step() # update parameter (w - n*gradient)

        running_loss += loss.item()
        _, predicted = outputs.max(1) # take the label have the highest value, ignore the valuye 
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_acc = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}] " 
          f"Loss: {running_loss/len(train_loader):.4f} " 
          f"Train Acc: {train_acc:.2f}%")

Epoch [1/20] Loss: 0.4309 Train Acc: 78.43%
Epoch [2/20] Loss: 0.0470 Train Acc: 99.37%
Epoch [3/20] Loss: 0.0308 Train Acc: 99.21%
Epoch [4/20] Loss: 0.0115 Train Acc: 99.84%
Epoch [5/20] Loss: 0.0087 Train Acc: 99.69%
Epoch [6/20] Loss: 0.0111 Train Acc: 99.69%
Epoch [7/20] Loss: 0.0050 Train Acc: 99.69%
Epoch [8/20] Loss: 0.0066 Train Acc: 99.84%
Epoch [9/20] Loss: 0.0080 Train Acc: 99.69%
Epoch [10/20] Loss: 0.0038 Train Acc: 99.84%
Epoch [11/20] Loss: 0.0039 Train Acc: 99.84%
Epoch [12/20] Loss: 0.0056 Train Acc: 99.69%
Epoch [13/20] Loss: 0.0084 Train Acc: 99.53%
Epoch [14/20] Loss: 0.0601 Train Acc: 97.80%
Epoch [15/20] Loss: 0.0435 Train Acc: 99.06%
Epoch [16/20] Loss: 0.0235 Train Acc: 99.21%
Epoch [17/20] Loss: 0.0110 Train Acc: 99.53%
Epoch [18/20] Loss: 0.0063 Train Acc: 99.69%
Epoch [19/20] Loss: 0.0050 Train Acc: 99.84%
Epoch [20/20] Loss: 0.0037 Train Acc: 99.84%


In [15]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

val_acc = 100 * correct / total
print(f"Validation Accuracy: {val_acc:.2f}%")


Validation Accuracy: 98.74%
