In [5]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split, Subset
from torchvision import datasets
import random

This dataset contains a comprehensive collection of 15,000 images (each 256x256 pixels) depicting various recyclable materials, general waste, and household items across 30 distinct categories.

In [3]:
import os
from PIL import Image
import numpy as np

def compute_mean_std(image_folder):
    """Compute mean and std per channel for all images in image_folder (RGB), including subfolders."""
    channel_sum = np.zeros(3)
    channel_sum_sq = np.zeros(3)
    num_pixels = 0

    for root, _, files in os.walk(image_folder):
        for fname in files:
            if not fname.lower().endswith(('.png', '.jpg', '.jpeg')):
                continue
            img_path = os.path.join(root, fname)
            img = Image.open(img_path).convert('RGB')
            img_np = np.array(img, dtype=np.float32) / 255.0
            num_pixels += img_np.shape[0] * img_np.shape[1]
            channel_sum += img_np.sum(axis=(0,1))
            channel_sum_sq += (img_np ** 2).sum(axis=(0,1))

    mean = channel_sum / num_pixels
    std = np.sqrt(channel_sum_sq / num_pixels - mean**2)
    return mean, std

mean, std = compute_mean_std("../data/images")  # top-level folder
print("mean:", mean)
print("std:", std)


mean: [0.74856972 0.72743281 0.70510449]
std: [0.30954896 0.31524588 0.33628213]


dataset has values around 0.7–0.75 for mean, which is expected for images dominated by bright backgrounds or lighter materials.

std values around 0.31–0.33 mean the pixel values vary moderately around the mean.

In [None]:
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomCrop(256, padding=16),
    transforms.RandomRotation(degrees=30), #This rotates the image by a random angle between –30 and +30 degrees.
    transforms.ToTensor(),
    transforms.Normalize((0.74856972, 0.72743281, 0.70510449), (0.30954896, 0.31524588, 0.33628213))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.74856972, 0.72743281, 0.70510449), (0.30954896, 0.31524588, 0.33628213))
])

In [7]:
# TODO: Load Datasets (1 mark)
data_dir = "../data/images"  # top-level folder containing all categories
full_dataset = datasets.ImageFolder(root=data_dir)

# -----------------------------
# 3. Split per class
# -----------------------------
random.seed(42)  # for reproducibility

train_indices = []
test_indices = []

# Map from class index to list of sample indices
class_to_indices = {i: [] for i in range(len(full_dataset.classes))}
for idx, (_, label) in enumerate(full_dataset.samples):
    class_to_indices[label].append(idx)

for class_idx, indices in class_to_indices.items():
    n_total = len(indices)
    n_test = int(0.2 * n_total)
    shuffled = indices.copy()
    random.shuffle(shuffled)
    test_indices.extend(shuffled[:n_test])
    train_indices.extend(shuffled[n_test:])

# -----------------------------
# 4. Create Subset datasets
# -----------------------------
train_dataset = Subset(full_dataset, train_indices)
test_dataset = Subset(full_dataset, test_indices)

# TODO: Split train into train + validation (1 mark)
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

# TODO: Data loaders (1 mark)
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")