In [2]:
import numpy

def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        data = pickle.load(fo, encoding='bytes')
    return data

In [3]:
data_batch = unpickle('../cifar-10-batches-py/data_batch_1')
print(data_batch.keys())

dict_keys([b'batch_label', b'labels', b'data', b'filenames'])


## 1. Image
Original Data Batch Shape: 
1. When you load a data_batch file, the shape of data_batch[b'data'] is (10000, 3072), where:
    - 10000 is the number of images.
    - 3072 is the total number of pixels in each image (32 * 32 * 3)

2. Reshaped Images: After reshaping the data into (10000, 3, 32, 32), where:
    - 10000 is the number of images.
    - 3 represents the three color channels (RGB).
    - 32 is the height (32 pixels).
    - 32 is the width (32 pixels).

## 2. Image Data Representation:
In PyTorch, images are usually represented with the channels-first format, i.e., (C, H, W), where:
- C is the number of channels (3 for RGB images).
- H is the height of the image.
- W is the width of the image.

So, for CIFAR-10, the image data should be reshaped to (3, 32, 32) — this means:
- 3 color channels (RGB),
- each image has a height of 32 pixels and a width of 32 pixels.

In [4]:
images = data_batch[b'data']
labels = data_batch[b'labels']
print(images.shape) # 10,000 x 3072 

# Reshape images to (3, 32, 32) and convert to float32
images = images.reshape(-1, 3, 32, 32).astype('float32')
print(images.shape) # 10,000 x 3 x 32 x 32

(10000, 3072)
(10000, 3, 32, 32)


In [6]:
import torchvision.transforms as transforms
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

import torch
from PIL import Image

# Apply the transformations correctly
images = torch.stack([transform(Image.fromarray((img.transpose(1, 2, 0) * 255).astype('uint8'))) for img in images])


In [7]:
# Check device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


# Move images to the selected device (GPU/CPU)
images = images.to(device)

print(images.shape)

Using device: cpu
torch.Size([10000, 3, 224, 224])


In [8]:
from torchvision.models import resnet18
import torch

weights_path = "../weights/resnet18-f37072fd.pth"

# Load the weights manually
state_dict = torch.load(weights_path)

# Load the model and assign the weights
model = resnet18()
model.load_state_dict(state_dict)

<All keys matched successfully>

In [11]:
# Modify the model's fully connected layer to match CIFAR-10's 10 classes
# The original ResNet-18 model has 1000 classes (for ImageNet), so we modify the last layer
model.fc = torch.nn.Linear(model.fc.in_features, 10)

# Freeze the initial layers of the model (all layers before the last fully connected layer)
for param in model.parameters():
    param.requires_grad = False

# Only the last fully connected layer will have its weights updated
for param in model.fc.parameters():
    param.requires_grad = True

# Move the model to the selected device (GPU/CPU)
model = model.to(device)

# Put the model in evaluation mode (since we're just testing)
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [12]:
# Make predictions on the images
with torch.no_grad():
    outputs = model(images)
    _, predicted = torch.max(outputs, 1)

# Display the first 10 predictions
print(f"Predicted labels: {predicted[:10]}")

Predicted labels: tensor([4, 4, 4, 4, 0, 4, 4, 4, 4, 4])


In [13]:
# Convert labels to a tensor and move to the selected device
labels_tensor = torch.tensor(labels).to(device)

# Calculate the accuracy
correct = (predicted == labels_tensor).sum().item()
accuracy = correct / len(labels_tensor) * 100

print(f"Accuracy: {accuracy:.2f}%")

Accuracy: 11.76%
