In [10]:
import numpy as np
from keras.datasets import cifar10
import pandas as pd 
import matplotlib.pyplot as plt
import sys

def load_cifar10(subset_percentage=10):
    # Load the CIFAR-10 dataset
    (train_images, train_labels), (test_images, test_labels) = cifar10.load_data()

    # Select a random subset of the data
    num_train_samples = int(len(train_images) * (subset_percentage / 100))
    num_test_samples = int(len(test_images) * (subset_percentage / 100))

    train_indices = np.random.choice(len(train_images), num_train_samples, replace=False)
    test_indices = np.random.choice(len(test_images), num_test_samples, replace=False)

    train_images = train_images[train_indices]
    train_labels = train_labels[train_indices]
    test_images = test_images[test_indices]
    test_labels = test_labels[test_indices]

    # Normalize pixel values to be between 0 and 1
    train_images = train_images.astype('float32') / 255.0
    test_images = test_images.astype('float32') / 255.0

    return (train_images, train_labels), (test_images, test_labels)

(train_images, train_labels), (test_images, test_labels) = load_cifar10()
print(train_images.shape, train_labels.shape, test_images.shape, test_labels.shape)

from layers import *
from solver import sgd, sgd_momentum, adam
from loss import SoftmaxLoss, l2_regularization, delta_l2_regularization
from utils import softmax

class CNN:
    def __init__(self, layers, loss_func=SoftmaxLoss):
        self.layers = layers
        self.params = []
        for layer in self.layers:
            self.params.append(layer.params)
        self.loss_func = loss_func
        
    def forward(self, X):
        for layer in self.layers:
            X = layer.forward(X)
        return X

    def backward(self, dout):
        grads = []
        for layer in reversed(self.layers):
            dout, grad = layer.backward(dout)
            grads.append(grad)
        return grads

    def train_step(self, X, y):
        out = self.forward(X)
        loss, dout = self.loss_func(out, y)
        loss += l2_regularization(self.layers)
        grads = self.backward(dout)
        grads = delta_l2_regularization(self.layers, grads)
        return loss, grads

    def predict(self, X):
        X = self.forward(X)
        return np.argmax(softmax(X), axis=1)
    
    
    def accuracy(y_true, y_pred):
        return np.mean(y_pred == y_true) 

def make_cnn(X_dim, num_class):
    conv = Conv(X_dim, n_filter=32, h_filter=3,
                w_filter=3, stride=1, padding=1)
    relu_conv = ReLU()
    maxpool = Maxpool(conv.out_dim, size=2, stride=1)
    flat = Flatten()
    fc = FullyConnected(np.prod(maxpool.out_dim), num_class)
    return [conv, relu_conv, maxpool, flat, fc]


cifar_dims = (32, 32, 3)
cnn = CNN(make_cnn(cifar_dims, num_class=10))
cnn = sgd_momentum(cnn, train_images, train_labels, minibatch_size=256, epoch=15,
                   learning_rate=0.01, X_test=test_images, y_test=test_labels)

(5000, 32, 32, 3) (5000, 1) (1000, 32, 32, 3) (1000, 1)
Epoch 1
Loss = inf | Training Accuracy = 0.1072 | Test Accuracy = 0.1
Epoch 2
Loss = inf | Training Accuracy = 0.1072 | Test Accuracy = 0.1
Epoch 3
Loss = inf | Training Accuracy = 0.1072 | Test Accuracy = 0.1
Epoch 4
Loss = inf | Training Accuracy = 0.1072 | Test Accuracy = 0.1
Epoch 5
Loss = inf | Training Accuracy = 0.1072 | Test Accuracy = 0.1
Epoch 6
Loss = inf | Training Accuracy = 0.1072 | Test Accuracy = 0.1
Epoch 7
Loss = inf | Training Accuracy = 0.1072 | Test Accuracy = 0.1
Epoch 8
Loss = inf | Training Accuracy = 0.1072 | Test Accuracy = 0.1
Epoch 9
Loss = inf | Training Accuracy = 0.1072 | Test Accuracy = 0.1
Epoch 10
Loss = inf | Training Accuracy = 0.1072 | Test Accuracy = 0.1
Epoch 11
Loss = inf | Training Accuracy = 0.1072 | Test Accuracy = 0.1
Epoch 12
Loss = inf | Training Accuracy = 0.1072 | Test Accuracy = 0.1
Epoch 13
Loss = inf | Training Accuracy = 0.1072 | Test Accuracy = 0.1
Epoch 14
Loss = inf | Training

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms


class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(32 * 16 * 16, 128)
        self.relu2 = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        x = x.view(-1, 32 * 16 * 16)  # Flatten the output
        x = self.fc1(x)
        x = self.relu2(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

def load_cifar10(batch_size):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
    test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

    # Loading smaller subset of CIFAR-10
    train_subset = torch.utils.data.Subset(train_dataset, torch.arange(0, 5000))  # 10% of 50000
    test_subset = torch.utils.data.Subset(test_dataset, torch.arange(0, 1000))  # 10% of 10000

    train_loader = torch.utils.data.DataLoader(train_subset, batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_subset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader


def train_model(model, train_loader, test_loader, epochs):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, weight_decay=0.001)  # L2 regularization as weight_decay

    for epoch in range(epochs):
        model.train()
        for data, target in train_loader:
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)

            # Manual L1 regularization
            l1_penalty = sum(p.abs().sum() for p in model.parameters())
            loss += 0.01 * l1_penalty  # Apply L1 regularization

            loss.backward()
            optimizer.step()

        model.eval()
        test_loss = 0
        correct = 0
        with torch.no_grad():
            for data, target in test_loader:
                output = model(data)
                test_loss += criterion(output, target).item()
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

        test_loss /= len(test_loader.dataset)
        print(
            f'Epoch {epoch + 1}: Test Loss: {test_loss:.4f}, Accuracy: {100. * correct / len(test_loader.dataset):.2f}%')


if __name__ == "__main__":
    train_loader, test_loader = load_cifar10(batch_size=64)
    model = CNNModel()
    train_model(model, train_loader, test_loader, epochs=5)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Epoch 1: Test Loss: 0.0365, Accuracy: 19.90%
Epoch 2: Test Loss: 0.0363, Accuracy: 22.90%
Epoch 3: Test Loss: 0.0362, Accuracy: 23.80%
Epoch 4: Test Loss: 0.0361, Accuracy: 24.40%
Epoch 5: Test Loss: 0.0360, Accuracy: 25.10%
