# Data augumetation

In [1]:
# import 
import os
import torch
from torchvision import transforms, datasets
from PIL import Image

# define new and old data path
dataset_old = '/kaggle/input/eggs-images-classification-damaged-or-not/Eggs Classification'
dataset_new = '/kaggle/working/egg-augument'

# define the transform layer
augumentation = transforms.Compose([
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomResizedCrop(size=224, scale=(0.8, 1.0)),
])

original_transform = transforms.Compose([ # turn original image into 224x224
    transforms.Resize((224,224))
])

# add class in augument dir
for class_name in os.listdir(dataset_old):
    os.makedirs(os.path.join(dataset_new, class_name), exist_ok=True)

# repeated transform in the original dataset
damaged_iter = 3;
not_damaged_iter = 12;

for class_name in os.listdir(dataset_old):
    class_path = os.path.join(dataset_old, class_name)
    output_path = os.path.join(dataset_new, class_name)
    
    for file_name in os.listdir(class_path):
        file_path = os.path.join(class_path, file_name)
        image = original_transform(Image.open(file_path).convert('RGB'))
        image.save(os.path.join(output_path, file_name))

        if class_name == 'Damaged':
            for i in range(damaged_iter):
                augumented_image = augumentation(image)
                new_name = f"{os.path.splitext(file_name)[0]}_aug_{i}.jpg"
                augumented_image.save(os.path.join(output_path, new_name))

        if class_name == 'Not Damaged':
            for i in range(not_damaged_iter):
                augumented_image = augumentation(image)
                new_name = f"{os.path.splitext(file_name)[0]}_aug_{i}.jpg"
                augumented_image.save(os.path.join(output_path, new_name))
        

In [None]:
!zip -r /kaggle/working/egg-augument.zip /kaggle/working/egg-augument 

# Split data

In [3]:
print(f"Not damage: {len(os.listdir('/kaggle/working/egg-augument/Not Damaged'))}")
print(f"Damage: {len(os.listdir('/kaggle/working/egg-augument/Damaged'))}")

Not damage: 2106
Damage: 2528


In [5]:
from torch.utils.data import random_split, DataLoader

transform_tensor = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

dataset = datasets.ImageFolder(root='/kaggle/input/new-egg/egg-augument', transform=transform_tensor)

# set train and val size
train_size = int(0.8 * len(dataset))
val_size = int(len(dataset) - train_size)

print(f"train size: {train_size}")
print(f"val size: {val_size}")

train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train size: 3707
val size: 927


# Load VGG16 (unfreeze the last CNN layer and classification layers)

In [8]:
from torchvision import models
import torch.nn as nn
import torch.optim as optim

vgg16 = models.vgg16(pretrained=True)
print(vgg16)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [10]:
# set freeze and unfreeze to last cnn layer and classfication layers
for param in vgg16.features[:24].parameters():
    param.requires_grad = False

vgg16.classifier[6] = nn.Linear(4096,2)

# optimizer
optimizer = optim.Adam(
    filter(lambda p: p.requires_grad, vgg16.parameters()),
    lr = 1e-14
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vgg16 = vgg16.to(device)

# Training