In [None]:
KAGGLE_NOTEBOOK = True

# Imports

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from torchvision import datasets, transforms, utils
import os
import random
from collections import defaultdict
from PIL import Image
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

if not KAGGLE_NOTEBOOK:
  !pip install opendatasets

  import opendatasets as od
  import pandas

  od.download("https://www.kaggle.com/datasets/mittalshubham/images256/data")

  !pip install wandb
  from google.colab import userdata
  wandb_api_key = userdata.get('wandb-api-key')

  root_dir = '/content/images256'

else:
  from kaggle_secrets import UserSecretsClient
  user_secrets = UserSecretsClient()
  wandb_api_key = user_secrets.get_secret("wandb-api-key")

  root_dir = '/kaggle/input'

import wandb
wandb.login(key=wandb_api_key)

# Hyperparameters

In [None]:
batch_size = 32
n_kernels = 64
n_epochs = 500
learning_rate = 1e-3
dropout = 0.2
train_size = 0.8
limit_per_class = 0  # 0 to disable
seed = 42
save_every = 1

# WandB

In [None]:
model_name = "ResNet"

wandb.init(
    project="deep-learning",
    config={
        "model": model_name,
        "batch_size": batch_size,
        "n_kernels": n_kernels,
        "n_epochs": n_epochs,
        "learning_rate": learning_rate,
        "dropout": dropout,
        "train_size": train_size,
        "seed": seed,
        "limit_per_class": limit_per_class,
    }
)

# PlacesDataset

In [None]:
class PlacesDataset(data.Dataset):
    def __init__(self, root_dir=root_dir, train=True, train_size=train_size, seed=seed, limit_per_class=limit_per_class):
        super().__init__()

        # params dataset
        self.root_dir = root_dir
        self.train = train
        self.limit_per_class = limit_per_class

        # data and labels
        self.image_paths = []
        self.labels = []
        self.image_per_class = defaultdict(list)

        self.number_of_classes = 0
        self.classes = []

        self.transform = transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),
            transforms.RandomRotation(degrees=30), # degrees = range of rotation
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), # parameters are ranges
            transforms.RandomGrayscale(p=0.1), # p = probability of applying the transform
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])

        list_sub_catg = ['shop', 'outdoor', 'outdoor', 'cultivated', 'wild',
                         'outdoor', 'coral_reef', 'outdoor', 'indoor', 'outdoor',
                         'outdoor', 'indoor', 'football', 'baseball', 'platform',
                         'outdoor', 'outdoor', 'platform', 'south_asia', 'east_asia',
                         'outdoor', 'outdoor', 'home', 'outdoor', 'sand',
                         'vegetation']

        class_idx = 0
        for letter_folder in os.listdir(root_dir):
            letter_folder_path = os.path.join(root_dir, letter_folder)
            if not os.path.isdir(letter_folder_path):
                continue  # Skip if not a directory

            # Iterate through class folders within the alphabetical folder
            for class_name in os.listdir(letter_folder_path):
                class_dir = os.path.join(letter_folder_path, class_name)
                if not os.path.isdir(class_dir):
                    continue  # Skip if not a directory

                filenames = sorted(os.listdir(class_dir))
                count = 0
                for filename in filenames:
                    if limit_per_class == 0 or count < limit_per_class:
                      # sub category is detected
                      if filename in list_sub_catg:
                        print(
                            f"Sub-category detected '{filename}' and images label with parent category {class_name} {class_idx}")

                        sub_class_dir = os.path.join(class_dir, filename)
                        sub_filenames = sorted(os.listdir(sub_class_dir))

                        for sub_filename in sub_filenames:
                            try:
                                sub_img_path = os.path.join(
                                    sub_class_dir, sub_filename)

                                Image.open(sub_img_path).verify()

                                self.image_per_class[class_idx].append(
                                    sub_img_path)

                                count += 1

                            except (IOError, SyntaxError):
                                print(
                                    'SUB Corrupted image or non-image file detected and skipped:', sub_filename)
                      else:
                        try:
                          img_path = os.path.join(class_dir, filename)

                          Image.open(img_path).verify()

                          self.image_per_class[class_idx].append(img_path)

                          count += 1

                        except (IOError, SyntaxError):
                          print(
                              'Corrupted image or non-image file detected and skipped:', filename)
                    else:
                        break
                self.number_of_classes += 1
                self.classes.append(class_name)
                class_idx += 1

        # Lists to hold training and testing data
        train_img_paths = []
        train_labels = []
        test_img_paths = []
        test_labels = []

        # Determining train/test split per class
        random.seed(seed)
        for label, paths in self.image_per_class.items():
            random.shuffle(paths)  # Shuffling within each class
            # Number of items in this class for train set
            nb_train = int(train_size * len(paths))

            # Splitting the data for this class into train and test
            train_img_paths.extend(paths[:nb_train])
            train_labels.extend([label] * nb_train)
            test_img_paths.extend(paths[nb_train:])
            test_labels.extend([label] * (len(paths) - nb_train))

        # Applying the split
        if self.train:
            self.image_paths = train_img_paths
            self.labels = train_labels
        else:
            self.image_paths = test_img_paths
            self.labels = test_labels

        combined = list(zip(self.image_paths, self.labels))
        random.shuffle(combined)
        self.image_paths, self.labels = zip(*combined)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, index):
        image_path = self.image_paths[index]
        label = self.labels[index]

        try:
          image = Image.open(image_path)
          op = 1
          image = image.convert('RGB')
          rgb = 1
        except (IOError, SyntaxError):
          print(f"Error convert to load {op} RGB {rgb} : {image_path} {label}")

        image = self.transform(image)

        return image, label

# ResNet

In [None]:
if torch.cuda.is_available():
    device = 'cuda'
else :
    device = 'cpu'

print(device)

In [None]:
class ResidualBlock(nn.Module):
    def __init__(self, input_size, output_size, stride=1, kernel_size=3, padding=1, bias=False):
        super(ResidualBlock, self).__init__()
        self.cnn1 = nn.Sequential(
            nn.Conv2d(input_size, output_size, kernel_size,
                      stride, padding, bias=False),
            nn.BatchNorm2d(output_size),
            nn.ReLU(True)
        )
        self.cnn2 = nn.Sequential(
            nn.Conv2d(output_size, output_size,
                      kernel_size, 1, padding, bias=False),
            nn.BatchNorm2d(output_size)
        )

        # Apply He initialization to the convolutional layers
        nn.init.kaiming_normal_(self.cnn1[0].weight)
        nn.init.kaiming_normal_(self.cnn2[0].weight)

        #if the block changes dimensions, then shortcut should change dimensions too
        if stride != 1 or input_size != output_size:
            
            self.shortcut = nn.Sequential(
                nn.Conv2d(input_size, output_size, kernel_size=1,
                          stride=stride, bias=False),
                nn.BatchNorm2d(output_size)
            )
            # Apply He initialization to the shortcut convolutional layers
            nn.init.kaiming_normal_(self.shortcut[0].weight)
        else:
            self.shortcut = nn.Sequential()

    def forward(self, x):
        residual = x
        x = self.cnn1(x)
        x = self.cnn2(x)
        x += self.shortcut(residual)
        x = nn.ReLU(True)(x)
        return x

In [None]:
class ResNet18(nn.Module):
    def __init__(self,num_classes):
        super(ResNet18, self).__init__()
        self.in_channels = n_kernels
        
        self.features = nn.Sequential(
            nn.Conv2d(3, n_kernels, kernel_size=3,
                      stride=1, padding=1, bias=False),
            nn.BatchNorm2d(n_kernels),
            nn.ReLU()
        )
        
        # Apply He initialization to the initial convolutional layer
        nn.init.kaiming_normal_(self.features[0].weight)
        
        self.net = nn.Sequential(
            ResidualBlock(n_kernels, n_kernels, stride=1),
            ResidualBlock(n_kernels, n_kernels, stride=1),
            ResidualBlock(n_kernels, n_kernels*2, stride=2),
            ResidualBlock(n_kernels*2, n_kernels*2, stride=1),
            ResidualBlock(n_kernels*2, n_kernels*4, stride=2),
            ResidualBlock(n_kernels*4, n_kernels*4, stride=1),
            ResidualBlock(n_kernels*4, n_kernels*8, stride=2),
            ResidualBlock(n_kernels*8, n_kernels*8, stride=1)
            
        )

        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(dropout)
        self.classifer = nn.Linear(n_kernels*8, num_classes)


    def forward(self, x):
        out = self.features(x)
        out = self.net(out)
        out = self.avg_pool(out)
        out = self.dropout(out)
        
        #flatten
        out = out.view(out.size(0), -1)
        
        out = self.classifer(out)
        return out

# Train loop

In [None]:
def train(model, trainloader, testloader, n_epochs=n_epochs, learning_rate=learning_rate):

    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    train_avg_loss = []
    test_avg_loss = []
    test_accuracy = []

    for i in range(n_epochs):

        print(f"Epoch : {i}")

        train_losses = []
        test_losses = []
        
        # train
        for x, y in trainloader:
            # send to device
            x = x.to(device)
            y = y.to(device)

            # predict
            pred = model(x)
            loss = criterion(pred, y)
            train_losses.append(loss.detach())

            # step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # test
        with torch.no_grad():
            correct = 0

            for x,y in testloader:
                x = x.to(device)
                y = y.to(device)

                pred = model(x)
                loss = criterion(pred, y)
                test_losses.append(loss.detach())

                y_pred = pred.argmax(dim=-1)
                correct = correct + (y_pred==y).sum()

            accuracy = (correct / len(testloader.dataset))

        train_loss = torch.stack(train_losses).mean()
        test_loss = torch.stack(test_losses).mean()

        print(f"train_losses : {train_loss}")
        print(f"test_losses : {test_loss}")
        print(f"accuracy : {accuracy}")
        
        wandb.log({
            "epoch": i,
            "train loss": train_loss,
            "test loss": test_loss,
            "accuracy": accuracy,
        })
        
        if i % save_every == 0:
            torch.save(model.state_dict(), f"epoch_{i}_model.pt")
            wandb.save(f"epoch_{i}_model.pt")

        train_avg_loss.append(train_loss)
        test_avg_loss.append(test_loss)
        test_accuracy.append(accuracy)

    return train_avg_loss, test_avg_loss, test_accuracy

# Create dataset / dataloader

In [None]:
# Instantiate the train and test set

# train
train_dataset = PlacesDataset(train=True)

# test
test_dataset = PlacesDataset(train=False)

In [None]:
# Instantiate the corresponding data loaders

# train
train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)

# test
test_loader = data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

# Define Model

In [None]:
input_features = [3, 256, 256] # Channels (assuming RGB images), Height, Width
output_features = train_dataset.number_of_classes

network = ResNet18(num_classes=output_features).to(device)
print(network)

# Train Model

In [None]:
train_avg_loss, test_avg_loss, test_accuracy = train(model=network,
                                                     trainloader=train_loader,
                                                     testloader=test_loader,
                                                     n_epochs=n_epochs,
                                                     learning_rate=learning_rate
                                                     )

# Plot

In [None]:
train_avg_loss_np = torch.tensor(train_avg_loss).detach().cpu().numpy()
test_avg_loss_np = torch.tensor(test_avg_loss).detach().cpu().numpy()
test_accuracy_np = torch.tensor(test_accuracy).detach().cpu().numpy()

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(train_avg_loss_np, label='Training Loss')
plt.plot(test_avg_loss_np, label='Testing Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Testing Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(test_accuracy_np, label='Test Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Test Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

# Finish wandb run

In [None]:
# necessary in notebooks
wandb.finish()