## Color CNN implementation

if can find the kernel, use colorcnn kernel

or do in the terminal:

conda create -n yournamefortheenv python=3.10 #more recent versions don't work with pytorch

conda install pytorch

conda install torchvision

conda install cuda -c nvidia

#etc... Whatever's missing in the error message just install it; if conda install didn't work, try **pip install** or **pip3 install**

**before running this notebook**, do in the terminal:

module load cuda/11.8 #To start using gpu

conda activate colorcnn #in the env

then restart the kernel and reopen the notebook

In [1]:
# use gpu if available
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cpu


In [2]:
# import packages
import os
import shutil
from torchvision import datasets, transforms, models
from torchvision.models import VGG16_Weights
from torch.utils.data import DataLoader

## Dataset preprocessing
(Don't need to run it if the val_processed is there already)

In [None]:
# dirs
data_root            = "./tiny-imagenet-200-data"
val_images_dir       = os.path.join(data_root, "val/images")
val_ann_file         = os.path.join(data_root, "val/val_annotations.txt")
words_file           = os.path.join(data_root, "words.txt")
processed_val_dir    = os.path.join(data_root, "val_processed")
# paras
batch_size = 32 #don't know whether we should use something different

# 2. Read synset → human label map
synset_to_label = {}
with open(words_file, 'r') as f:
    for line in f:
        synset, label = line.strip().split('\t')
        synset_to_label[synset] = label

# 3. Read filename → synset map
filename_to_synset = {}
with open(val_ann_file, 'r') as f:
    for line in f:
        fname, syn, *rest = line.strip().split('\t')
        filename_to_synset[fname] = syn

# 4. Reorganize: move each image into val_processed/<synset>_<label>/
for fname, syn in filename_to_synset.items():
    src_path = os.path.join(val_images_dir, fname)
    if not os.path.exists(src_path):
        continue
    # use the first comma-separated token of the human label, underscored
    human = synset_to_label[syn].split(',')[0].replace(' ', '_')
    class_folder = f"{syn}_{human}"
    dst_dir = os.path.join(processed_val_dir, class_folder)
    os.makedirs(dst_dir, exist_ok=True)
    shutil.copy(src_path, os.path.join(dst_dir, fname))

print("Reorganized val into:", processed_val_dir)


# 5. Define transforms using the original ImageNet mean/std 
imagenet_mean = [0.485, 0.456, 0.406]
imagenet_std  = [0.229, 0.224, 0.225] 

#I tried to manually calculate the mean & std for our subset 
#but then realized it doesn't make sense cuz the model is trained on the complete one; 
#so we just use the standard paras here

val_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=imagenet_mean, std=imagenet_std)
])

# 6. Create data loaders
val_dataset = datasets.ImageFolder(processed_val_dir, transform=val_transform)
val_loader  = DataLoader(val_dataset,
                         batch_size=batch_size,
                         shuffle=False,
                         num_workers=4,
                         pin_memory=True)

print("Number of classes in val:", len(val_dataset.classes))
print("Number of val images:", len(val_dataset))


## Load dataset to VGG16
and create the vgg16 model

In [3]:
#dirs
data_root            = "./tiny-imagenet-200-data"
val_images_dir       = os.path.join(data_root, "val/images")
val_ann_file         = os.path.join(data_root, "val/val_annotations.txt")
words_file           = os.path.join(data_root, "words.txt")
processed_val_dir    = os.path.join(data_root, "val_processed")
# paras
batch_size = 32 #don't know whether we should use something different


imagenet_mean = [0.485, 0.456, 0.406]
imagenet_std  = [0.229, 0.224, 0.225] 

val_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=imagenet_mean, std=imagenet_std)
])

# Create data loaders
val_dataset = datasets.ImageFolder(processed_val_dir, transform=val_transform)
val_loader  = DataLoader(val_dataset,
                         batch_size=batch_size,
                         shuffle=False,
                         num_workers=4,
                         pin_memory=True)
# Point to subset
dataset = datasets.ImageFolder(
    processed_val_dir,
    transform=val_transform
)
loader = DataLoader(dataset,
                    batch_size=batch_size,
                    shuffle=False,
                    num_workers=4,
                    pin_memory=True)


In [None]:
model = models.vgg16(weights=VGG16_Weights.DEFAULT)
model.eval()   # inference mode

In [None]:
# test if model worked by just grabbing one batch from DataLoader
images, labels = next(iter(val_loader))  # shape: [B, 3, 224, 224]

# 3. Forward pass
with torch.no_grad():
    outputs = model(images)              # shape: [B, 1000]

print("Batch input shape :", images.shape)
print("Output logits shape:", outputs.shape)

# 4. Decode top-5 predictions for the first image
probs = torch.softmax(outputs, dim=1)
top5_prob, top5_lbl = probs[0].topk(5)
for prob, idx in zip(top5_prob, top5_lbl):
    print(f"{val_dataset.classes[idx]:>30s} : {prob.item():.4f}")

In [None]:
# 4. Inference loop
all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in dataloader:
        images = images.to(device)
        outputs = vgg(images)               # raw logits
        _, preds = torch.max(outputs, dim=1)  # predicted class indices

        all_preds.extend(preds.cpu().tolist())
        all_labels.extend(labels.tolist())


In [None]:
# load VGG-16 with the default ImageNet-pretrained weights
vgg = models.vgg16(weights=VGG16_Weights.DEFAULT)
print("Successfully loaded vgg")
vgg.eval()
vgg.to(device)

In [None]:
# 5. Compute simple accuracy
correct = sum(p==t for p, t in zip(all_preds, all_labels))
total   = len(all_labels)
print(f"Top‐1 Accuracy: {correct/total*100:.2f}%")