In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision import models
import os
import random
from PIL import Image
from torchvision.transforms import v2
import numpy as np


model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 30)
# Load the initially trained model from initial_train.ipynb (optional)
model.load_state_dict(torch.load("/content/drive/MyDrive/MAIS202/initial_train_resnet18_mnist.pth"))

# Set the model to training mode and use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Transform images to 224x224 and normalize
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # changing this resulting in the initial loss falling from
                                                                                    # ~50 to 0.0013!!
])


# Load the Recyclable and House Waste dataset

class WasteDataset():
    def __init__(self, split, root_dir="/content/drive/MyDrive/MAIS202/dataset/images/images", transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = sorted(os.listdir(root_dir))
        self.image_paths = []
        self.labels = []
        self.classDict = {0 : 'aerosol_cans',
                            1 : 'aluminum_food_cans',
                            2 : 'aluminum_soda_cans',
                            3 : 'cardboard_boxes',
                            4 : 'cardboard_packaging',
                            5 : 'clothing',
                            6 : 'coffee_grounds',
                            7 : 'disposable_plastic_cutlery',
                            8 : 'eggshells',
                            9 : 'food_waste',
                            10 : 'glass_beverage_bottles',
                            11 : 'glass_cosmetic_containers',
                            12 : 'glass_food_jars',
                            13 : 'magazines',
                            14 : 'newspaper',
                            15 : 'office_paper',
                            16 : 'paper_cups',
                            17 : 'plastic_cup_lids',
                            18 : 'plastic_detergent_bottles',
                            19 : 'plastic_food_containers',
                            20 : 'plastic_shopping_bags',
                            21 : 'plastic_soda_bottles',
                            22 : 'plastic_straws',
                            23 : 'plastic_trash_bags',
                            24 : 'plastic_water_bottles',
                            25 : 'shoes',
                            26 : 'steel_food_cans',
                            27 : 'styrofoam_cups',
                            28 : 'styrofoam_food_containers',
                            29 : 'tea_bags'}
        self.wasteDict = {"aerosol_cans": "Landfill",
                                "aluminum_food_cans": "Plastic, Glass, Metal Recyclable",
                                "aluminum_soda_cans": "Plastic, Glass, Metal Recyclable",
                                "cardboard_boxes": "Paper, Cardboard Recyclable",
                                "cardboard_packaging": "Paper, Cardboard Recyclable",
                                "clothing": "Landfill",
                                "coffee_grounds": "Compostable",
                                "disposable_plastic_cutlery": "Landfill",
                                "eggshells": "Compostable",
                                "food_waste": "Compostable",
                                "glass_beverage_bottles": "Plastic, Glass, Metal Recyclable",
                                "glass_cosmetic_containers": "Plastic, Glass, Metal Recyclable",
                                "glass_food_jars": "Plastic, Glass, Metal Recyclable",
                                "magazines": "Paper, Cardboard Recyclable",
                                "newspaper": "Paper, Cardboard Recyclable",
                                "office_paper": "Paper, Cardboard Recyclable",
                                "paper_cups": "Paper, Cardboard Recyclable",
                                "plastic_cup_lids": "Landfill",
                                "plastic_detergent_bottles": "Plastic, Glass, Metal Recyclable",
                                "plastic_food_containers": "Plastic, Glass, Metal Recyclable",
                                "plastic_shopping_bags": "Landfill",
                                "plastic_soda_bottles": "Plastic, Glass, Metal Recyclable",
                                "plastic_straws": "Landfill",
                                "plastic_trash_bags": "Landfill",
                                "plastic_water_bottles": "Plastic, Glass, Metal Recyclable",
                                "shoes": "Landfill",
                                "steel_food_cans": "Plastic, Glass, Metal Recyclable",
                                "styrofoam_cups": "Landfill",
                                "styrofoam_food_containers": "Landfill",
                                "tea_bags": "Compostable",}

        for i, class_name in enumerate(self.classes): # maybe change this to iterate over the labelsDict
            class_dir = os.path.join(root_dir, class_name)
            for subfolder in ['default', 'real_world']:
                subfolder_dir = os.path.join(class_dir, subfolder)
                image_names = os.listdir(subfolder_dir)
                random.shuffle(image_names)

                if split == 'train':
                    image_names = image_names[:int(0.05 * len(image_names))] # note!! only 5% of the dataset is used for the initial train
                elif split == 'val':
                    image_names = image_names[int(0.6 * len(image_names)):int(0.8 * len(image_names))]
                else:  # split == 'test'
                    image_names = image_names[int(0.8 * len(image_names)):]

                for image_name in image_names:
                    self.image_paths.append(os.path.join(subfolder_dir, image_name))
                    self.labels.append(i)


    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, index):
        image_path = self.image_paths[index]
        label = self.labels[index]
        image = Image.open(image_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        data = {
            "image":image,
            "label":label,
        }
        return data


# Prepare datasets and dataloaders

train_pil_transform = v2.Compose([
        v2.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.2),
        v2.RandomAffine(degrees=5, translate=(0.1, 0.1),scale=(0.8,1.3),
                        interpolation=torchvision.transforms.InterpolationMode.BILINEAR),
        v2.Resize(size=(256, 256)),
        v2.GaussianBlur(kernel_size=(7, 13), sigma=(0.1, 0.2)),
        v2.PILToTensor(),
        v2.ToDtype(torch.float32),
        v2.Normalize(mean=(0.5,0.5,0.5),std=(0.5,0.5,0.5))
    ])

val_pil_transform = v2.Compose([
    v2.Resize(size=(256, 256)),
    v2.PILToTensor(),
    v2.ToDtype(torch.float32),
    v2.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
])

test_pil_transform = v2.Compose([
    v2.Resize(size=(256, 256)),
    v2.PILToTensor(),
    v2.ToDtype(torch.float32),
    v2.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
])


data_transforms = {
    "train":train_pil_transform,
    "val":val_pil_transform,
    "test":test_pil_transform,
}

train_dataset = WasteDataset(split="train", transform=data_transforms["train"])
val_dataset = WasteDataset(split="val", transform=data_transforms["val"])
test_dataset = WasteDataset(split="test", transform=data_transforms["test"])



# Load the Recyclable and House Waste dataset
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)

validateloader = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=True)

testloader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=True)

# translates the numerical classses outputted by the model to their original human-readable class
classtranslater = train_dataset.classDict

# sorts the human-readable class into one of McGill's 4 nonhazardous waste streams
wastetranslater = train_dataset.wasteDict

def wastesort(classInputted):
    return wastetranslater[classtranslater[classInputted]]

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Learning rate scheduler to adjust the learning rate
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)



Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 90.3MB/s]


In [None]:
# Fine-tuning
num_epochs = 50
# the number of epochs is set to 5 for the initial train
val_losses = []
train_losses = []
val_accuracies = []

for epoch in range(num_epochs):
    running_loss = 0.0
    model.train()

    for idx, data in enumerate(trainloader): # each entry in trainloader (each idx) is a batch of 64 images and their labels
        images, labels = data["image"].to(device), data["label"].to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()*images.size(0)
        running_loss /= len(train_dataset)
        train_losses.append(running_loss)


    # step the scheduler after each epoch
    scheduler.step()

    # evaluating the model
    model.eval()

    val_loss = 0.0
    correct = 0
    total = 0
    #correctSort = 0

    with torch.no_grad():
        for idx, data in enumerate(testloader): # each entry in testloader (each idx) is a batch of 64 images and their labels
            images, labels = data["image"].to(device), data["label"].to(device)
            outputs = model(images)

            predicted = torch.argmax(outputs, dim=1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            #predictedwaste = [wastesort(val.item()) for val in predicted]
            #labelwaste = [wastesort(val.item()) for val in labels]
            #predictedwaste = np.array(predictedwaste)
            #labelwaste = np.array(labelwaste)
            #correctSort += np.sum(predictedwaste == labelwaste)

            loss = criterion(outputs, labels)
            val_loss += loss.item()*images.size(0)

    val_loss /= len(val_dataset)
    val_losses.append(val_loss)

    val_accuracy = 100 * correct / total
    val_accuracies

    print(f'Accuracy of the fine-tuned model on the test images: {val_accuracy:.2f}%')
    #print(f'Accuracy of the fine-tuned model on the task (sorting objects into their waste categories): {100 * correctSort / total:.2f}%')
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(trainloader):.4f}")

    save_path = "/content/drive/MyDrive/MAIS202/"
    torch.save(model.state_dict(), os.path.join(save_path, 'final_train_resnet18_epoch-{}.pth'.format(epoch)))


print('Fine-tuning complete')



print('Model saved')

# takes 1 epoch 24 minutes to run
# after epoch 1, results:
# Accuracy of the fine-tuned model on the test images: 61.97%
# Epoch [1/1], Loss: 0.0013
# Fine-tuning complete
# Model saved



Accuracy of the fine-tuned model on the test images: 62.87%
Epoch [1/50], Loss: 0.0013
Accuracy of the fine-tuned model on the test images: 68.63%
Epoch [2/50], Loss: 0.0027
Accuracy of the fine-tuned model on the test images: 70.23%
Epoch [3/50], Loss: 0.0007
Accuracy of the fine-tuned model on the test images: 75.90%
Epoch [4/50], Loss: 0.0006
Accuracy of the fine-tuned model on the test images: 75.13%
Epoch [5/50], Loss: 0.0002
Accuracy of the fine-tuned model on the test images: 77.53%
Epoch [6/50], Loss: 0.0005
Accuracy of the fine-tuned model on the test images: 73.13%
Epoch [7/50], Loss: 0.0010
Accuracy of the fine-tuned model on the test images: 77.73%
Epoch [8/50], Loss: 0.0002
Accuracy of the fine-tuned model on the test images: 79.33%
Epoch [9/50], Loss: 0.0005
Accuracy of the fine-tuned model on the test images: 80.00%
Epoch [10/50], Loss: 0.0002
Accuracy of the fine-tuned model on the test images: 80.33%
Epoch [11/50], Loss: 0.0002
Accuracy of the fine-tuned model on the t