In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision import models
import os
import random
from PIL import Image
from torchvision.transforms import v2



# Load a pre-trained ResNet18 model
model = models.resnet18(pretrained=True)

# Modify the last layer to match waste labels (30 labels)
model.fc = nn.Linear(model.fc.in_features, 30)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Transform images to 224x224 and normalize
transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])


# Load the Recyclable and House Waste dataset

class WasteDataset():
    def __init__(self, split, root_dir="/content/drive/MyDrive/MAIS202/dataset/images/images", transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = sorted(os.listdir(root_dir))
        self.image_paths = []
        self.labels = []

        for i, class_name in enumerate(self.classes):
            class_dir = os.path.join(root_dir, class_name)
            for subfolder in ['default', 'real_world']:
                subfolder_dir = os.path.join(class_dir, subfolder)
                image_names = os.listdir(subfolder_dir)
                random.shuffle(image_names)

                if split == 'train':
                    image_names = image_names[:int(0.6 * len(image_names))]
                elif split == 'val':
                    image_names = image_names[int(0.6 * len(image_names)):int(0.8 * len(image_names))]
                else:  # split == 'test'
                    image_names = image_names[int(0.8 * len(image_names)):]

                for image_name in image_names:
                    self.image_paths.append(os.path.join(subfolder_dir, image_name))
                    self.labels.append(i)


    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, index):
        image_path = self.image_paths[index]
        label = self.labels[index]

        image = Image.open(image_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        data = {
            "image":image,
            "label":label,
        }
        return data


# Prepare datasets and dataloaders

train_pil_transform = v2.Compose([
        v2.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.2),
        v2.RandomAffine(degrees=5, translate=(0.1, 0.1),scale=(0.8,1.3),
                        interpolation=torchvision.transforms.InterpolationMode.BILINEAR),
        v2.Resize(size=(256, 256)),
        v2.GaussianBlur(kernel_size=(7, 13), sigma=(0.1, 0.2)),
        v2.PILToTensor(),
        v2.ToDtype(torch.float32),
        v2.Normalize(mean=(0.5,0.5,0.5),std=(0.5,0.5,0.5))
    ])

val_pil_transform = v2.Compose([
    v2.Resize(size=(256, 256)),
    v2.PILToTensor(),
    v2.ToDtype(torch.float32),
    v2.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
])

test_pil_transform = v2.Compose([
    v2.Resize(size=(256, 256)),
    v2.PILToTensor(),
    v2.ToDtype(torch.float32),
    v2.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
])


data_transforms = {
    "train":train_pil_transform,
    "val":val_pil_transform,
    "test":test_pil_transform,
}

train_dataset = WasteDataset(split="train", transform=data_transforms["train"])
val_dataset = WasteDataset(split="val", transform=data_transforms["val"])
test_dataset = WasteDataset(split="test", transform=data_transforms["test"])



# Load the Recyclable and House Waste dataset
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)

validateloader = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=True)

testloader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=True)






In [11]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Learning rate scheduler to adjust the learning rate
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)


In [12]:
# Fine-tuning
num_epochs = 5
# the number of epochs is set to 5 for the initial train
for epoch in range(num_epochs):
    running_loss = 0.0

    for idx, data in enumerate(trainloader): # each entry in trainloader (each idx) is a batch of 64 images and their labels
        images, labels = data["image"].to(device), data["label"].to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()* images.size(0)

    # Step the scheduler after each epoch
    scheduler.step()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(trainloader):.4f}")

print('Fine-tuning complete')
# took 5 hours to run!!

Epoch [1/5], Loss: 102.0208
Epoch [2/5], Loss: 68.6975
Epoch [3/5], Loss: 55.4733
Epoch [4/5], Loss: 47.3633
Epoch [5/5], Loss: 40.0380
Fine-tuning complete


In [36]:
save_path = "/content/drive/MyDrive/MAIS202/initial_train_resnet18_mnist.pth"

torch.save(model.state_dict(), save_path) # in the future, save after each epoch so that training on more than 5 epochs can be done stepwise
print('Model saved')

Model saved


In [17]:
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 30)
model.load_state_dict(torch.load("/content/drive/MyDrive/MAIS202/initial_train_resnet18_mnist.pth"))

model.eval()

correct = 0
total = 0
correctSort = 0


with torch.no_grad():
    for idx, data in enumerate(testloader): # each entry in testloader (each idx) is a batch of 64 images and their labels
        images, labels = data["image"].to(device), data["label"].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the fine-tuned model on the test images: {100 * correct / total:.2f}%')




Accuracy of the fine-tuned model on the test images: 77.70%


In [34]:
sample_batch = next(iter(testloader))

with torch.no_grad():
    sample_outputs = model(sample_batch["image"].to(device))
    print(torch.argmax(sample_outputs, dim=1))
    print(sample_batch["label"].to(device))

tensor([12, 20, 13, 13, 14, 20,  8, 17, 22, 11,  0, 13,  3,  4,  9, 10, 13, 16,
        25,  6, 12, 24, 23,  8,  4,  5,  5,  3, 11, 10, 22, 27, 17, 25, 23, 17,
        17,  6,  4, 18, 22,  6, 15, 26,  4,  6, 22, 23, 25,  5,  1, 27, 15,  4,
        29, 24,  2, 27, 26, 12,  3, 29, 14,  9])
tensor([12,  7, 13, 22, 24, 20,  8, 17, 22, 11,  0, 15, 15,  4,  9, 10, 13, 16,
        25,  6, 12, 22, 20,  8, 15,  5,  5,  3, 11, 10, 22, 27, 17, 25, 26, 17,
        17,  6,  4, 18, 22,  6, 15, 26,  4,  6, 22, 23, 25, 22, 11, 27, 16,  4,
        14, 24,  2, 27,  1, 12,  4, 29, 14,  9])


In [35]:
print(sample_outputs.min(), sample_outputs.max()) # maybe fixing the logit values will reduce the loss

tensor(-16.1039) tensor(22.6542)


In [None]:
# code adapted from this tutorial: https://dev.to/santoshpremi/fine-tuning-a-pre-trained-model-in-pytorch-a-step-by-step-guide-for-beginners-4p6l