In [2]:
import sys

sys.executable

'/Users/wba/repos/godshatter/cifar-100/.venv/bin/python'

In [3]:
import torch
import torch.nn as nn
import torchvision
import numpy as np

In [14]:
dataset = torchvision.datasets.CIFAR100(
    root="/Users/wba/repos/godshatter/cifar-100/datasets",
    transform=torchvision.transforms.ToTensor(),
    download=True,
)

Files already downloaded and verified


In [7]:
dataset.class_to_idx

{'apple': 0,
 'aquarium_fish': 1,
 'baby': 2,
 'bear': 3,
 'beaver': 4,
 'bed': 5,
 'bee': 6,
 'beetle': 7,
 'bicycle': 8,
 'bottle': 9,
 'bowl': 10,
 'boy': 11,
 'bridge': 12,
 'bus': 13,
 'butterfly': 14,
 'camel': 15,
 'can': 16,
 'castle': 17,
 'caterpillar': 18,
 'cattle': 19,
 'chair': 20,
 'chimpanzee': 21,
 'clock': 22,
 'cloud': 23,
 'cockroach': 24,
 'couch': 25,
 'crab': 26,
 'crocodile': 27,
 'cup': 28,
 'dinosaur': 29,
 'dolphin': 30,
 'elephant': 31,
 'flatfish': 32,
 'forest': 33,
 'fox': 34,
 'girl': 35,
 'hamster': 36,
 'house': 37,
 'kangaroo': 38,
 'keyboard': 39,
 'lamp': 40,
 'lawn_mower': 41,
 'leopard': 42,
 'lion': 43,
 'lizard': 44,
 'lobster': 45,
 'man': 46,
 'maple_tree': 47,
 'motorcycle': 48,
 'mountain': 49,
 'mouse': 50,
 'mushroom': 51,
 'oak_tree': 52,
 'orange': 53,
 'orchid': 54,
 'otter': 55,
 'palm_tree': 56,
 'pear': 57,
 'pickup_truck': 58,
 'pine_tree': 59,
 'plain': 60,
 'plate': 61,
 'poppy': 62,
 'porcupine': 63,
 'possum': 64,
 'rabbit': 65,

In [6]:
dataloader = torch.utils.data.DataLoader(dataset)

In [25]:
class AlexNet(nn.Module):
    """
    AlexNet on CIFAR-100.

    Seems like there are several different networks (A-E). Smallest one (A) has 11
    different layers.

    Their input size was 224x224. Ours is 32x32.
    Spatial resolution is preserved in conv layers, ie output is also 224x224.
    Conv2D - 3x3 - 64 channels
    relu
    maxpool
    Conv2D - 3x3 - 128 channels
    relu
    maxpool
    Conv2D - 3x3 - 256
    relu
    Conv2D - 3x3 - 256
    relu
    maxpool
    Conv2D - 3x3 - 512
    relu
    Conv2D - 512
    relu
    maxpool
    Conv2D - 512
    relu
    Conv2D - 512
    relu
    maxpool
    FC - out features: 4096
    ReLU
    FC - out features: 4096
    ReLU
    FC - out features: 1000 (one for each class, so we need 100)
    softmax
    """

    def __init__(self, n_classes=100):
        super().__init__()
        self.layers = nn.Sequential(
            # (b, 3, 32, 32) -> (b, 64, 32, 32
            nn.Conv2d(3, 64, (3, 3), stride=1, padding="same"),
            nn.ReLU(),
            # (b, 64, 32, 32) -> (b, 64, 16, 16)
            nn.MaxPool2d((2, 2), stride=2),
            # (b, 64, 16, 16) -> (b, 128, 16, 16)
            nn.Conv2d(64, 128, (3, 3), stride=1, padding="same"),
            nn.ReLU(),
            # (b, 128, 16, 16) -> (b, 128, 8, 8)
            nn.MaxPool2d((2, 2), stride=2),
            # (b, 128, 8, 8) -> (b, 256, 8, 8)
            nn.Conv2d(128, 256, (3, 3), stride=1, padding="same"),
            nn.ReLU(),
            # (b, 256, 8, 8) -> (b, 256, 8, 8)
            nn.Conv2d(256, 256, (3, 3), stride=1, padding="same"),
            nn.ReLU(),
            # (b, 256, 8, 8) -> (b, 256, 4, 4
            nn.MaxPool2d((2, 2), stride=2),
            # (b, 256, 4, 4) -> (b, 512, 4, 4)
            nn.Conv2d(256, 512, (3, 3), stride=1, padding="same"),
            nn.ReLU(),
            # (b, 512, 4, 4) -> (b, 512, 4, 4)
            nn.Conv2d(512, 512, (3, 3), stride=1, padding="same"),
            nn.ReLU(),
            # (b, 512, 4, 4) -> (b, 512, 2, 2)
            nn.MaxPool2d((2, 2), stride=2),
            # (b, 512, 2, 2) -> (b, 2048)
            nn.Flatten(),
            # (b, 2048) -> (b, 4096)
            nn.Dropout(0.5),
            nn.Linear(2048, 4096),
            nn.ReLU(),
            # (b, 4096) -> (b, 4096)
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            # (b, 4096) -> (b, n_classes)
            nn.Linear(4096, n_classes),
            # (b, 100) -> (b, n_classes)
            nn.Softmax(dim=1),
        )

        # Initialization procedure from 3.1: weights are sampled from N(0, 0.01)
        # and biases are 0
        for layer in self.layers:
            if isinstance(layer, (nn.Conv2d, nn.Linear)):
                nn.init.normal_(layer.weight, mean=0, std=0.01)
                nn.init.constant_(layer.bias, 0)

    def forward(self, x):
        return self.layers(x)


# net = AlexNet(n_classes=2)
# Add batch size 1
# x = dataset[0][0].unsqueeze(0)
# net(x)

In [26]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


count_parameters(net)

9951874

In [27]:
# Subset of CIFAR-100 with 2 classes
indices = [i for i, (_, label) in enumerate(dataset) if label in [0, 1]]
print(len(indices))
binary_dataset = torch.utils.data.Subset(dataset, indices)

1000


In [28]:
print(len(dataset))

50000


In [62]:
LR = 0.003
BATCH_SIZE = 128
N_EPOCHS = 90

# net = AlexNet(n_classes=2)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=LR)

dataloader = torch.utils.data.DataLoader(binary_dataset, batch_size=BATCH_SIZE)
for epoch in range(N_EPOCHS):
    for batch, (X, y) in enumerate(dataloader):
        pred = net(X)
        loss = loss_fn(pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            print(f"{loss=}")

loss=tensor(0.6931, grad_fn=<NllLossBackward0>)
loss=tensor(0.6781, grad_fn=<NllLossBackward0>)
loss=tensor(0.5984, grad_fn=<NllLossBackward0>)
loss=tensor(0.5751, grad_fn=<NllLossBackward0>)
loss=tensor(0.6040, grad_fn=<NllLossBackward0>)


KeyboardInterrupt: 

In [38]:
pred.shape, y.shape

(torch.Size([4, 2]), torch.Size([4]))

In [32]:
# Example of target with class indices
loss = nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)
output = loss(input, target)
# output.backward()
# Example of target with class probabilities
# input = torch.randn(3, 5, requires_grad=True)
# target = torch.randn(3, 5).softmax(dim=1)
# output = loss(input, target)
# output.backward()

In [39]:
input

tensor([[ 0.3311, -1.3027, -0.0989, -0.3307,  0.3001],
        [-0.0284,  0.1879, -0.9803,  1.9988, -0.9456],
        [-2.4115, -0.9480,  0.1922,  1.0942,  0.6149]], requires_grad=True)

In [34]:
output

tensor(0.9940, grad_fn=<NllLossBackward0>)

In [37]:
target.shape

torch.Size([3])

In [27]:
net(dataset[0][0].unsqueeze(0))

tensor([[0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100,
         0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100,
         0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100,
         0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100,
         0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100,
         0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100,
         0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100,
         0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100,
         0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100,
         0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100,
         0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100, 0.0100,
         0.0100]], grad_fn=<SoftmaxBackward0>)