<a href="https://colab.research.google.com/github/venomouscyanide/dl_sain/blob/master/week5/week_5_cnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup NN base + Hyper Tuning class

### All Imports

In [1]:
import itertools
from typing import List, Dict, Type
import copy
import time
from abc import abstractmethod


import pandas as pd
import numpy as np

import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST, CIFAR10, CIFAR100
from torchvision.transforms import ToTensor, transforms
from torch.utils.data.dataset import random_split

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device} device')
# TODO; reproducibility is only guarenteed on cpu.
# For some reason, using cuda, each run produces different results regarless of me setting manual_seed().
# However, reproducibility is consistent on cpu.
# Time taken for experiments increases due to this

Using cpu device


### Setup a factory that returns the dataloader for the Dataset that you want
Using this I can conduct multiple experiments across CIFAR10, CIFAR100 and MNIST using the same `Module()` base class

In [None]:
class DataLoaderWrapper:
    @abstractmethod
    def get_data(self):
        pass

    @abstractmethod
    def get_validation_splits(self) -> List[int]:
        pass


class GetDataLoaderFactory:
    def get(self, dataset: str) -> Type[DataLoaderWrapper]:
        if dataset == "MNIST":
            return MNISTLoader
        elif dataset == "CIFAR10":
            return CIFAR10Loader
        elif dataset == "CIFAR100":
            return CIFAR100Loader
        else:
            raise NotImplementedError(f"Dataset {dataset} not configured")


class MNISTLoader(DataLoaderWrapper):
    def get_data(self):
        train_data = MNIST(root='mnist_torch_data', train=True, download=True, transform=ToTensor())
        test_data = MNIST(root='mnist_torch_data', train=False, download=True, transform=ToTensor())
        return train_data, test_data

    def get_validation_splits(self) -> List[int]:
        return [50000, 10000]


cifar_transform = transforms.Compose([transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])


class CIFAR10Loader(DataLoaderWrapper):
    def get_data(self):
        train_data = CIFAR10(root='cifar10', train=True, download=True, transform=cifar_transform)
        test_data = CIFAR10(root='cifar10', train=False, download=True, transform=cifar_transform)
        return train_data, test_data

    def get_validation_splits(self) -> List[int]:
        return [40000, 10000]


class CIFAR100Loader(DataLoaderWrapper):
    def get_data(self):
        train_data = CIFAR100(root='cifar100', train=True, download=True, transform=cifar_transform)
        test_data = CIFAR100(root='cifar100', train=False, download=True, transform=cifar_transform)
        return train_data, test_data

    def get_validation_splits(self) -> List[int]:
        return [40000, 10000]


### Hypertuner & EvalData classes
- `EvalData` holds the data obtained from each iteration inside a Dataframe
- `HyperTuner` takes a catesian product of all combinations and adds a row to EvalData after 3 iterations on each combination(each iteration has a fixed random seed value). `Hypertuner` internally calls `TorchCNN` for training and evaluation on any given dataset

Work here is highly inspired by work done by Isaac last week 

In [None]:
class EvalData:
    def __init__(self):
        self.data = pd.DataFrame(
            columns=["dataset", "epochs", "nn_stack", "loss_func",
                     "optimizer", "learning_rate", "weight_decay", "batch_size", "momentum",
                     "testing_dataset_type", "training_size", "testing_size",
                     "best_accuracy", "avg_accuracy",
                     "avg_time_taken"])

    def add_record(self, data_dict: Dict):
        self.data = self.data.append(data_dict, ignore_index=True)
        print(f"Added record to eval DF. Total records so far: {self.data.shape[0]}")

    def get(self, rearrange: bool) -> pd.DataFrame:
        if rearrange:
            # push "best_accuracy", "avg_accuracy", "avg_time_taken" cols to the front and sort by "avg_accuracy"
            curr_cols = self.data.columns.tolist()
            updated_order = curr_cols[-3:] + curr_cols[:-3]
            self.data = self.data[updated_order]
        return self.data.sort_values(by="avg_accuracy", ascending=False)


class HyperTuner:
    def tune(self, config: Dict, verbose: bool = True) -> pd.DataFrame:
        eval_data = EvalData()

        all_combinations = list(itertools.product(*config.values()))
        print(f"Total combinations for exp: {len(all_combinations)}")
        for combination in all_combinations:
            # reconstruct the dict using the combination
            combination_dict = {k: v for k, v in zip(config.keys(), combination)}

            data_loader = GetDataLoaderFactory().get(combination_dict["dataset"])()
            train_data, test_data = data_loader.get_data()

            accuracies = np.array([])
            time_consumed = np.array([])

            for seed in [28, 35, 42]:
                batch_size = combination_dict["batch_size"]
                testing_dataset_type = combination_dict["testing_dataset_type"]

                training_subset = train_data
                torch.manual_seed(seed)
                training_loader = DataLoader(training_subset, batch_size=batch_size, shuffle=True)
                testing_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

                if testing_dataset_type == "validation":
                    splits = data_loader.get_validation_splits()
                    training_subset, validation_subset = random_split(train_data, lengths=splits)
                    validation_loader = DataLoader(validation_subset, batch_size=batch_size, shuffle=True)
                    testing_loader = validation_loader if testing_dataset_type == "validation" else testing_loader

                model = TorchCNN(
                    loss_func=combination_dict["loss_func"],
                    optimizer=combination_dict["optimizer"],
                    learning_rate=combination_dict["learning_rate"],
                    lmda_wt_decay=combination_dict["weight_decay"],
                    batch_size=batch_size,
                    momentum=combination_dict["momentum"],
                    training_size=combination_dict["training_size"],
                    testing_size=combination_dict["testing_size"],
                    seed=seed,
                    nn_stack=combination_dict['nn_stack']
                ).to(device)

                num_epochs = combination_dict["epochs"]
                time_epoch_start = time.time()
                for epoch in range(num_epochs):
                    if verbose:
                        print(f"Training for epoch: {epoch}")
                    model.train_model(training_loader, verbose)
                    accuracy = model.evaluate(testing_loader, model.testing_size, "testing", verbose)
                    accuracies = np.append(accuracies, accuracy)
                time_for_seed = time.time() - time_epoch_start
                time_consumed = np.append(time_consumed, time_for_seed)

                self._reset_params(model)

            avg_time = np.mean(time_consumed)
            avg_accuracy = np.mean(accuracies)
            best_accuracy = np.max(accuracies)

            combination_dict.update({
                "best_accuracy": best_accuracy,
                "avg_accuracy": avg_accuracy,
                "avg_time_taken": avg_time
            })

            eval_data.add_record(combination_dict)

            if verbose:
                print(eval_data.get(rearrange=False))
        return eval_data.get(rearrange=True)

    def _reset_params(self, model: nn.Module):
        for layer in list(model.children())[0]:
            if hasattr(layer, 'reset_parameters'):
                layer.reset_parameters()


def _get_nn_stacks_with_dropouts(base_architectures, dropout_options):
    archs_with_dropout = []
    for base_architecture in base_architectures:
        for dropout_option in dropout_options:
            arch_copy = copy.deepcopy(base_architecture)
            for index, module in enumerate(arch_copy):
                if type(module) == nn.Dropout:
                    arch_copy[index] = nn.Dropout(dropout_option)
            archs_with_dropout += [arch_copy]
    return archs_with_dropout

### The Neural Network Module which takes in different architectures and hyperparameters by argument
This class gets called by `HyperTuner` on different values of hyperparameters, datasets and even architectures

In [None]:
class TorchCNN(nn.Module):
    def __init__(self, loss_func: str,
                 optimizer: str, learning_rate: float, lmda_wt_decay: float, batch_size: int,
                 momentum: float, nn_stack: List[nn.Module], training_size: int,
                 testing_size: int, seed: int = 42):
        super().__init__()
        self.loss_func = loss_func
        self.optimizer = optimizer
        self.learning_rate = learning_rate
        self.lmda_wt_decay = lmda_wt_decay
        self.mlp = nn.Sequential(*nn_stack)
        self.momentum = momentum
        optimizer_params = self._get_optimizer_params()
        self.optimizer = getattr(torch.optim, self.optimizer)(**optimizer_params)
        self.loss_function = getattr(nn, self.loss_func)()
        self.batch_size = batch_size
        self.training_size = training_size
        self.testing_size = testing_size
        self.seed = seed

    def _get_optimizer_params(self):
        opt_params = {
            "params": self.parameters(),
            "lr": self.learning_rate,
            "weight_decay": self.lmda_wt_decay,
            "momentum": self.momentum,
        }
        return opt_params

    def forward(self, data: torch.Tensor) -> torch.Tensor:
        logits = self.mlp(data)
        return logits

    def train_model(self, training_loader: DataLoader, verbose: int = True):
        torch.manual_seed(self.seed)
        for input, labels in itertools.islice(training_loader, self.training_size // self.batch_size):
            prediction = self(input.to(device))
            labels = labels.to(device)
            if self.loss_function._get_name() == 'MSELoss':
                # TODO: This will not work for CIFAR100
                labels = torch.nn.functional.one_hot(labels, 10).float()
            loss = self.loss_function(prediction, labels)
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
        if verbose:
            self.evaluate(training_loader, self.training_size, verbose=verbose)

    def evaluate(self, data_loader: DataLoader, dataset_size: int, data_type: str = "training",
                 verbose: bool = True) -> float:
        correct_classifications = 0
        with torch.no_grad():
            torch.manual_seed(self.seed)
            for input, labels in itertools.islice(data_loader, dataset_size // self.batch_size):
                prediction = self(input.to(device))
                labels = labels.to(device)
                correct_classifications += (prediction.argmax(1) == labels).type(torch.float).sum().item()
        numerator = int(correct_classifications)
        denominator = dataset_size
        accuracy = round((numerator / denominator) * 100, 2)
        if verbose:
            print(f'Accuracy on {data_type} data {accuracy}({numerator}/{denominator})%')
        return accuracy

In [2]:
# get rid of the UserWarning from being shown
import warnings
warnings.simplefilter("ignore", UserWarning)

# MNIST Tuning on CNN

Try out the hyperparameters from previous week which gave 98% accuracy on test data + introduce Conv2d and pooling layers.
The archtecture for CNN is taken from Neilson's work in [chapter 6](http://neuralnetworksanddeeplearning.com/chap6.html#convolutional_neural_networks_in_practice)


---


First try with different values of dropouts - [0.00, 0.10, 0.20, 0.30, 0.40, 0.50]
<br><br>
Note: all trials are on "validation" dataset and we only consume 10,000 training samples + 1,000 testing sample and lower number of epochs.

In [None]:
class TestingConfig:
    torch.manual_seed(35)
    base_architectures = [[nn.Conv2d(in_channels=1, out_channels=20, kernel_size=5, stride=1),
                            nn.ReLU(),
                            nn.MaxPool2d(kernel_size=2, stride=2),
                            nn.Dropout(0.0),
                            nn.Conv2d(in_channels=20, out_channels=40, kernel_size=5, stride=1),
                            nn.ReLU(),
                            nn.MaxPool2d(kernel_size=2, stride=2),
                            nn.Dropout(0.0),
                            nn.Flatten(),
                            nn.Linear(40 * 4 * 4, 40 * 4 * 4 * 2),
                            nn.ReLU(),
                            nn.Dropout(0.0),
                            nn.Linear(40 * 4 * 4 * 2, 10),
                            nn.ReLU()]]
    CONFIG = {
        "dataset": ["MNIST"],
        "epochs": [10],
        "nn_stack": _get_nn_stacks_with_dropouts(base_architectures, dropout_options=[0.00, 0.10, 0.20, 0.30, 0.40, 0.50]),
        "loss_func": ["CrossEntropyLoss"],
        "optimizer": ["SGD"],
        "learning_rate": [1e-2],
        "weight_decay": [1e-4],
        "batch_size": [10],
        "testing_dataset_type": ["validation"],
        "training_size": [10000],
        "testing_size": [1000],
        "momentum": [0.9]
    }
eval_data = HyperTuner().tune(TestingConfig.CONFIG, True)

Total combinations for exp: 6
Training for epoch: 0
Accuracy on training data 56.76(5676/10000)%
Accuracy on testing data 56.4(564/1000)%
Training for epoch: 1
Accuracy on training data 57.63(5763/10000)%
Accuracy on testing data 57.1(571/1000)%
Training for epoch: 2
Accuracy on training data 57.73(5773/10000)%
Accuracy on testing data 57.7(577/1000)%
Training for epoch: 3
Accuracy on training data 68.76(6876/10000)%
Accuracy on testing data 67.8(678/1000)%
Training for epoch: 4
Accuracy on training data 79.62(7962/10000)%
Accuracy on testing data 79.5(795/1000)%
Training for epoch: 5
Accuracy on training data 79.78(7978/10000)%
Accuracy on testing data 79.4(794/1000)%
Training for epoch: 6
Accuracy on training data 79.9(7990/10000)%
Accuracy on testing data 79.9(799/1000)%
Training for epoch: 7
Accuracy on training data 79.93(7993/10000)%
Accuracy on testing data 79.9(799/1000)%
Training for epoch: 8
Accuracy on training data 90.02(9002/10000)%
Accuracy on testing data 89.5(895/1000)%

In [None]:
eval_data

Unnamed: 0,best_accuracy,avg_accuracy,avg_time_taken,dataset,epochs,nn_stack,loss_func,optimizer,learning_rate,weight_decay,batch_size,momentum,testing_dataset_type,training_size,testing_size
3,96.4,89.486667,203.551767,MNIST,10,"[Conv2d(1, 20, kernel_size=(5, 5), stride=(1, ...",CrossEntropyLoss,SGD,0.01,0.0001,10,0.9,validation,10000,1000
4,93.9,87.066667,198.729376,MNIST,10,"[Conv2d(1, 20, kernel_size=(5, 5), stride=(1, ...",CrossEntropyLoss,SGD,0.01,0.0001,10,0.9,validation,10000,1000
2,97.7,83.176667,211.172637,MNIST,10,"[Conv2d(1, 20, kernel_size=(5, 5), stride=(1, ...",CrossEntropyLoss,SGD,0.01,0.0001,10,0.9,validation,10000,1000
5,89.7,71.96,198.976652,MNIST,10,"[Conv2d(1, 20, kernel_size=(5, 5), stride=(1, ...",CrossEntropyLoss,SGD,0.01,0.0001,10,0.9,validation,10000,1000
1,97.8,59.683333,201.010889,MNIST,10,"[Conv2d(1, 20, kernel_size=(5, 5), stride=(1, ...",CrossEntropyLoss,SGD,0.01,0.0001,10,0.9,validation,10000,1000
0,89.7,58.973333,194.402864,MNIST,10,"[Conv2d(1, 20, kernel_size=(5, 5), stride=(1, ...",CrossEntropyLoss,SGD,0.01,0.0001,10,0.9,validation,10000,1000


In [None]:
eval_data['nn_stack'][0]

[Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1)),
 ReLU(),
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
 Dropout(p=0.0, inplace=False),
 Conv2d(20, 40, kernel_size=(5, 5), stride=(1, 1)),
 ReLU(),
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
 Dropout(p=0.0, inplace=False),
 Flatten(start_dim=1, end_dim=-1),
 Linear(in_features=640, out_features=1280, bias=True),
 ReLU(),
 Dropout(p=0.0, inplace=False),
 Linear(in_features=1280, out_features=10, bias=True),
 ReLU()]

Dropout value of **0.20** performs best during the evaluation. 

---
Next, Try out two different architectures
- Same as before
- Add more hidden neurons to the Linear layers


In [None]:
class TestingConfig:
    torch.manual_seed(35)
    base_architectures = [ # more hidden neurons
                            [nn.Conv2d(in_channels=1, out_channels=20, kernel_size=5, stride=1),
                            nn.ReLU(),
                            nn.MaxPool2d(kernel_size=2, stride=2),
                            nn.Dropout(0.0),
                            nn.Conv2d(in_channels=20, out_channels=40, kernel_size=5, stride=1),
                            nn.ReLU(),
                            nn.MaxPool2d(kernel_size=2, stride=2),
                            nn.Dropout(0.0),
                            nn.Flatten(),
                            nn.Linear(40 * 4 * 4, 40 * 4 * 4 * 20),
                            nn.ReLU(),
                            nn.Dropout(0.0),
                            nn.Linear(40 * 4 * 4 * 20, 10),
                            nn.ReLU()],  
                            # same config as above
                            [nn.Conv2d(in_channels=1, out_channels=20, kernel_size=5, stride=1),
                            nn.ReLU(),
                            nn.MaxPool2d(kernel_size=2, stride=2),
                            nn.Dropout(0.0),
                            nn.Conv2d(in_channels=20, out_channels=40, kernel_size=5, stride=1),
                            nn.ReLU(),
                            nn.MaxPool2d(kernel_size=2, stride=2),
                            nn.Dropout(0.0),
                            nn.Flatten(),
                            nn.Linear(40 * 4 * 4, 40 * 4 * 4 * 2),
                            nn.ReLU(),
                            nn.Dropout(0.0),
                            nn.Linear(40 * 4 * 4 * 2, 10),
                            nn.ReLU()],
                            ]
    CONFIG = {
    "dataset": ["MNIST"],
    "epochs": [10],
    "nn_stack": _get_nn_stacks_with_dropouts(base_architectures, dropout_options=[0.20]),
    "loss_func": ["CrossEntropyLoss"],
    "optimizer": ["SGD"],
    "learning_rate": [1e-2],
    "weight_decay": [1e-4],
    "batch_size": [10],
    "testing_dataset_type": ["validation"],
    "training_size": [10000],
    "testing_size": [1000],
    "momentum": [0.9]
    }
eval_data = HyperTuner().tune(TestingConfig.CONFIG, True)

Total combinations for exp: 2
Training for epoch: 0
Accuracy on training data 59.62(5962/10000)%
Accuracy on testing data 58.4(584/1000)%
Training for epoch: 1
Accuracy on training data 68.9(6890/10000)%
Accuracy on testing data 66.0(660/1000)%
Training for epoch: 2
Accuracy on training data 69.64(6964/10000)%
Accuracy on testing data 66.6(666/1000)%
Training for epoch: 3
Accuracy on training data 69.89(6989/10000)%
Accuracy on testing data 66.3(663/1000)%
Training for epoch: 4
Accuracy on training data 69.71(6971/10000)%
Accuracy on testing data 66.4(664/1000)%
Training for epoch: 5
Accuracy on training data 70.05(7005/10000)%
Accuracy on testing data 67.0(670/1000)%
Training for epoch: 6
Accuracy on training data 69.08(6908/10000)%
Accuracy on testing data 65.7(657/1000)%
Training for epoch: 7
Accuracy on training data 69.34(6934/10000)%
Accuracy on testing data 66.1(661/1000)%
Training for epoch: 8
Accuracy on training data 70.03(7003/10000)%
Accuracy on testing data 66.3(663/1000)%

In [None]:
eval_data

Unnamed: 0,best_accuracy,avg_accuracy,avg_time_taken,dataset,epochs,nn_stack,loss_func,optimizer,learning_rate,weight_decay,batch_size,momentum,testing_dataset_type,training_size,testing_size
1,97.7,78.18,206.669593,MNIST,10,"[Conv2d(1, 20, kernel_size=(5, 5), stride=(1, ...",CrossEntropyLoss,SGD,0.01,0.0001,10,0.9,validation,10000,1000
0,67.0,39.62,970.561377,MNIST,10,"[Conv2d(1, 20, kernel_size=(5, 5), stride=(1, ...",CrossEntropyLoss,SGD,0.01,0.0001,10,0.9,validation,10000,1000


Initial architecture performs better.


---

Full Run on 60, 00 training samples and 10,000 testing samples for best observed architecture and hyperparameters 

In [None]:
model = TorchCNN(
    loss_func="CrossEntropyLoss",
    optimizer="SGD",
    learning_rate=1e-2,
    lmda_wt_decay=1e-4,
    batch_size=10,
    training_size=60000,
    testing_size=10000,
    seed=35,
    momentum=0.9,
    nn_stack=[nn.Conv2d(in_channels=1, out_channels=20, kernel_size=5, stride=1),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2),
                nn.Dropout(0.20),
                nn.Conv2d(in_channels=20, out_channels=40, kernel_size=5, stride=1),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2),
                nn.Dropout(0.20),
                nn.Flatten(),
                nn.Linear(40 * 4 * 4, 40 * 4 * 4 * 2),
                nn.ReLU(),
                nn.Dropout(0.20),
                nn.Linear(40 * 4 * 4 * 2, 10),
                nn.ReLU()]
).to(device)
train_data = MNIST(root='mnist_torch_data', train=True, download=True, transform=ToTensor())
test_data = MNIST(root='mnist_torch_data', train=False, download=True, transform=ToTensor())
torch.manual_seed(35)
training_loader = DataLoader(train_data, batch_size=10, shuffle=True)
testing_loader = DataLoader(test_data, batch_size=10, shuffle=True)
accuracies = []
for epoch in range(20):
    print(f"Training for epoch: {epoch}")
    model.train_model(training_loader)
    accuracies.append(model.evaluate(testing_loader, model.testing_size, "testing"))
print(max(accuracies))

Training for epoch: 0
Accuracy on training data 98.52(59109/60000)%
Accuracy on testing data 97.92(9792/10000)%
Training for epoch: 1
Accuracy on training data 99.11(59464/60000)%
Accuracy on testing data 98.19(9819/10000)%
Training for epoch: 2
Accuracy on training data 98.88(59330/60000)%
Accuracy on testing data 97.84(9784/10000)%
Training for epoch: 3
Accuracy on training data 99.64(59787/60000)%
Accuracy on testing data 98.55(9855/10000)%
Training for epoch: 4
Accuracy on training data 99.39(59634/60000)%
Accuracy on testing data 98.24(9824/10000)%
Training for epoch: 5
Accuracy on training data 99.68(59810/60000)%
Accuracy on testing data 98.58(9858/10000)%
Training for epoch: 6
Accuracy on training data 99.83(59900/60000)%
Accuracy on testing data 98.59(9859/10000)%
Training for epoch: 7
Accuracy on training data 99.78(59868/60000)%
Accuracy on testing data 98.51(9851/10000)%
Training for epoch: 8
Accuracy on training data 99.88(59928/60000)%
Accuracy on testing data 98.73(9873/

Observe Peak accuracy of **99.0%** on MNIST testing data

# CIFAR10 Tuning

Start tuning using the same layers as MNIST with two different dropout values `[0.00, 0.10]`

---

Note: all trials are on "validation" dataset and we only consume 10,000 training samples + 1,000 testing sample and lower number of epochs.

In [None]:
class TestingConfig:
    torch.manual_seed(21)
    base_architectures = [[nn.Conv2d(in_channels=3, out_channels=20, kernel_size=5, stride=1),
                            nn.ReLU(),
                            nn.MaxPool2d(kernel_size=2, stride=2),
                            nn.Dropout(0.0),
                            nn.Conv2d(in_channels=20, out_channels=40, kernel_size=5, stride=1),
                            nn.ReLU(),
                            nn.MaxPool2d(kernel_size=2, stride=2),
                            nn.Dropout(0.0),
                            nn.Flatten(),
                            nn.Linear(40 * 5 * 5, 40 * 5 * 5 * 5),
                            nn.ReLU(),
                            nn.Dropout(0.0),
                            nn.Linear(40 * 5 * 5 * 5, 10),
                            nn.ReLU()]]
    CONFIG = {
        "dataset": ["CIFAR10"],
        "epochs": [20],
        "nn_stack": _get_nn_stacks_with_dropouts(base_architectures, dropout_options=[0.0, 0.10]),
        "loss_func": ["CrossEntropyLoss"],
        "optimizer": ["SGD"],
        "learning_rate": [1e-2],
        "weight_decay": [1e-4],
        "batch_size": [10],
        "testing_dataset_type": ["validation"],
        "training_size": [10000],
        "testing_size": [1000],
        "momentum": [0.9]
    }


eval_data = HyperTuner().tune(TestingConfig.CONFIG, True)

Total combinations for exp: 2
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to cifar10/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value='')))


Extracting cifar10/cifar-10-python.tar.gz to cifar10
Files already downloaded and verified
Training for epoch: 0
Accuracy on training data 10.12(1012/10000)%
Accuracy on testing data 11.0(110/1000)%
Training for epoch: 1
Accuracy on training data 15.91(1591/10000)%
Accuracy on testing data 16.4(164/1000)%
Training for epoch: 2
Accuracy on training data 21.4(2140/10000)%
Accuracy on testing data 22.9(229/1000)%
Training for epoch: 3
Accuracy on training data 29.73(2973/10000)%
Accuracy on testing data 29.6(296/1000)%
Training for epoch: 4
Accuracy on training data 30.02(3002/10000)%
Accuracy on testing data 28.5(285/1000)%
Training for epoch: 5
Accuracy on training data 43.34(4334/10000)%
Accuracy on testing data 39.1(391/1000)%
Training for epoch: 6
Accuracy on training data 52.33(5233/10000)%
Accuracy on testing data 44.9(449/1000)%
Training for epoch: 7
Accuracy on training data 65.77(6577/10000)%
Accuracy on testing data 52.5(525/1000)%
Training for epoch: 8
Accuracy on training da

In [None]:
eval_data

Unnamed: 0,best_accuracy,avg_accuracy,avg_time_taken,dataset,epochs,nn_stack,loss_func,optimizer,learning_rate,weight_decay,batch_size,momentum,testing_dataset_type,training_size,testing_size
0,54.7,33.728333,1222.037093,CIFAR10,20,"[Conv2d(3, 20, kernel_size=(5, 5), stride=(1, ...",CrossEntropyLoss,SGD,0.01,0.0001,10,0.9,validation,10000,1000
1,46.2,24.11,1214.860022,CIFAR10,20,"[Conv2d(3, 20, kernel_size=(5, 5), stride=(1, ...",CrossEntropyLoss,SGD,0.01,0.0001,10,0.9,validation,10000,1000


In [None]:
eval_data['nn_stack'][0]

[Conv2d(3, 20, kernel_size=(5, 5), stride=(1, 1)),
 ReLU(),
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
 Dropout(p=0.0, inplace=False),
 Conv2d(20, 40, kernel_size=(5, 5), stride=(1, 1)),
 ReLU(),
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
 Dropout(p=0.0, inplace=False),
 Flatten(start_dim=1, end_dim=-1),
 Linear(in_features=1000, out_features=5000, bias=True),
 ReLU(),
 Dropout(p=0.0, inplace=False),
 Linear(in_features=5000, out_features=10, bias=True),
 ReLU()]

No dropout works better as of now. Maybe because of lower epochs. <br>Try different learning rates; `[5, 1, 1e-1, 1e-2]`

In [None]:
class TestingConfig:
    torch.manual_seed(21)
    base_architectures = [[nn.Conv2d(in_channels=3, out_channels=20, kernel_size=5, stride=1),
                            nn.ReLU(),
                            nn.MaxPool2d(kernel_size=2, stride=2),
                            nn.Dropout(0.0),
                            nn.Conv2d(in_channels=20, out_channels=40, kernel_size=5, stride=1),
                            nn.ReLU(),
                            nn.MaxPool2d(kernel_size=2, stride=2),
                            nn.Dropout(0.0),
                            nn.Flatten(),
                            nn.Linear(40 * 5 * 5, 40 * 5 * 5 * 2),
                            nn.ReLU(),
                            nn.Dropout(0.0),
                            nn.Linear(40 * 5 * 5 * 2, 10),
                            nn.ReLU()]]
    CONFIG = {
        "dataset": ["CIFAR10"],
        "epochs": [10],
        "nn_stack": _get_nn_stacks_with_dropouts(base_architectures, dropout_options=[0.0]),
        "loss_func": ["CrossEntropyLoss"],
        "optimizer": ["SGD"],
        "learning_rate": [5, 1, 1e-1, 1e-2],
        "weight_decay": [1e-4],
        "batch_size": [10],
        "testing_dataset_type": ["validation"],
        "training_size": [10000],
        "testing_size": [1000],
        "momentum": [0.9]
    }


eval_data = HyperTuner().tune(TestingConfig.CONFIG, True)

Total combinations for exp: 4
Files already downloaded and verified
Files already downloaded and verified
Training for epoch: 0
Accuracy on training data 10.12(1012/10000)%
Accuracy on testing data 11.0(110/1000)%
Training for epoch: 1
Accuracy on training data 10.12(1012/10000)%
Accuracy on testing data 11.0(110/1000)%
Training for epoch: 2
Accuracy on training data 10.12(1012/10000)%
Accuracy on testing data 11.0(110/1000)%
Training for epoch: 3
Accuracy on training data 10.12(1012/10000)%
Accuracy on testing data 11.0(110/1000)%
Training for epoch: 4
Accuracy on training data 10.12(1012/10000)%
Accuracy on testing data 11.0(110/1000)%
Training for epoch: 5
Accuracy on training data 10.12(1012/10000)%
Accuracy on testing data 11.0(110/1000)%
Training for epoch: 6
Accuracy on training data 10.12(1012/10000)%
Accuracy on testing data 11.0(110/1000)%
Training for epoch: 7
Accuracy on training data 10.12(1012/10000)%
Accuracy on testing data 11.0(110/1000)%
Training for epoch: 8
Accuracy

In [None]:
eval_data

Unnamed: 0,best_accuracy,avg_accuracy,avg_time_taken,dataset,epochs,nn_stack,loss_func,optimizer,learning_rate,weight_decay,batch_size,momentum,testing_dataset_type,training_size,testing_size
3,53.5,45.043333,350.749481,CIFAR10,10,"[Conv2d(3, 20, kernel_size=(5, 5), stride=(1, ...",CrossEntropyLoss,SGD,0.01,0.0001,10,0.9,validation,10000,1000
0,11.0,10.2,360.559661,CIFAR10,10,"[Conv2d(3, 20, kernel_size=(5, 5), stride=(1, ...",CrossEntropyLoss,SGD,5.0,0.0001,10,0.9,validation,10000,1000
1,11.0,10.2,358.672987,CIFAR10,10,"[Conv2d(3, 20, kernel_size=(5, 5), stride=(1, ...",CrossEntropyLoss,SGD,1.0,0.0001,10,0.9,validation,10000,1000
2,11.0,10.2,357.240163,CIFAR10,10,"[Conv2d(3, 20, kernel_size=(5, 5), stride=(1, ...",CrossEntropyLoss,SGD,0.1,0.0001,10,0.9,validation,10000,1000


In [None]:
eval_data['nn_stack'][0]

[Conv2d(3, 20, kernel_size=(5, 5), stride=(1, 1)),
 ReLU(),
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
 Dropout(p=0.0, inplace=False),
 Conv2d(20, 40, kernel_size=(5, 5), stride=(1, 1)),
 ReLU(),
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
 Dropout(p=0.0, inplace=False),
 Flatten(start_dim=1, end_dim=-1),
 Linear(in_features=1000, out_features=2000, bias=True),
 ReLU(),
 Dropout(p=0.0, inplace=False),
 Linear(in_features=2000, out_features=10, bias=True),
 ReLU()]

The lower, the better performance.
Try more combinations with lower learning rates `[1e-3, 1e-4, 1e-5]`

In [None]:
class TestingConfig:
    torch.manual_seed(21)
    base_architectures = [[nn.Conv2d(in_channels=3, out_channels=20, kernel_size=5, stride=1),
                            nn.ReLU(),
                            nn.MaxPool2d(kernel_size=2, stride=2),
                            nn.Dropout(0.0),
                            nn.Conv2d(in_channels=20, out_channels=40, kernel_size=5, stride=1),
                            nn.ReLU(),
                            nn.MaxPool2d(kernel_size=2, stride=2),
                            nn.Dropout(0.0),
                            nn.Flatten(),
                            nn.Linear(40 * 5 * 5, 40 * 5 * 5 * 2),
                            nn.ReLU(),
                            nn.Dropout(0.0),
                            nn.Linear(40 * 5 * 5 * 2, 10),
                            nn.ReLU()]]
    CONFIG = {
        "dataset": ["CIFAR10"],
        "epochs": [10],
        "nn_stack": _get_nn_stacks_with_dropouts(base_architectures, dropout_options=[0.0]),
        "loss_func": ["CrossEntropyLoss"],
        "optimizer": ["SGD"],
        "learning_rate": [1e-3, 1e-4, 1e-5],
        "weight_decay": [1e-4],
        "batch_size": [10],
        "testing_dataset_type": ["validation"],
        "training_size": [10000],
        "testing_size": [1000],
        "momentum": [0.9]
    }


eval_data = HyperTuner().tune(TestingConfig.CONFIG, True)

Total combinations for exp: 3
Files already downloaded and verified
Files already downloaded and verified
Training for epoch: 0
Accuracy on training data 23.95(2395/10000)%
Accuracy on testing data 24.3(243/1000)%
Training for epoch: 1
Accuracy on training data 35.05(3505/10000)%
Accuracy on testing data 34.6(346/1000)%
Training for epoch: 2
Accuracy on training data 45.03(4503/10000)%
Accuracy on testing data 47.2(472/1000)%
Training for epoch: 3
Accuracy on training data 51.15(5115/10000)%
Accuracy on testing data 52.5(525/1000)%
Training for epoch: 4
Accuracy on training data 55.21(5521/10000)%
Accuracy on testing data 55.3(553/1000)%
Training for epoch: 5
Accuracy on training data 59.22(5922/10000)%
Accuracy on testing data 58.1(581/1000)%
Training for epoch: 6
Accuracy on training data 62.71(6271/10000)%
Accuracy on testing data 58.3(583/1000)%
Training for epoch: 7
Accuracy on training data 66.52(6652/10000)%
Accuracy on testing data 59.5(595/1000)%
Training for epoch: 8
Accuracy

In [None]:
eval_data

Unnamed: 0,best_accuracy,avg_accuracy,avg_time_taken,dataset,epochs,nn_stack,loss_func,optimizer,learning_rate,weight_decay,batch_size,momentum,testing_dataset_type,training_size,testing_size
0,62.1,46.31,347.395857,CIFAR10,10,"[Conv2d(3, 20, kernel_size=(5, 5), stride=(1, ...",CrossEntropyLoss,SGD,0.001,0.0001,10,0.9,validation,10000,1000
1,37.9,26.213333,353.097072,CIFAR10,10,"[Conv2d(3, 20, kernel_size=(5, 5), stride=(1, ...",CrossEntropyLoss,SGD,0.0001,0.0001,10,0.9,validation,10000,1000
2,17.2,13.323333,348.533622,CIFAR10,10,"[Conv2d(3, 20, kernel_size=(5, 5), stride=(1, ...",CrossEntropyLoss,SGD,1e-05,0.0001,10,0.9,validation,10000,1000


Learning rate of `1e-3` seems to work best. Try different batch sizes `[10, 100, 250]`

In [None]:
class TestingConfig:
    torch.manual_seed(21)
    base_architectures = [[nn.Conv2d(in_channels=3, out_channels=20, kernel_size=5, stride=1),
                            nn.ReLU(),
                            nn.MaxPool2d(kernel_size=2, stride=2),
                            nn.Dropout(0.0),
                            nn.Conv2d(in_channels=20, out_channels=40, kernel_size=5, stride=1),
                            nn.ReLU(),
                            nn.MaxPool2d(kernel_size=2, stride=2),
                            nn.Dropout(0.0),
                            nn.Flatten(),
                            nn.Linear(40 * 5 * 5, 40 * 5 * 5 * 2),
                            nn.ReLU(),
                            nn.Dropout(0.0),
                            nn.Linear(40 * 5 * 5 * 2, 10),
                            nn.ReLU()]]
    CONFIG = {
        "dataset": ["CIFAR10"],
        "epochs": [10],
        "nn_stack": _get_nn_stacks_with_dropouts(base_architectures, dropout_options=[0.0]),
        "loss_func": ["CrossEntropyLoss"],
        "optimizer": ["SGD"],
        "learning_rate": [1e-3],
        "weight_decay": [1e-4],
        "batch_size": [10, 100, 250],
        "testing_dataset_type": ["validation"],
        "training_size": [10000],
        "testing_size": [1000],
        "momentum": [0.9]
    }


eval_data = HyperTuner().tune(TestingConfig.CONFIG, True)

Total combinations for exp: 3
Files already downloaded and verified
Files already downloaded and verified
Training for epoch: 0
Accuracy on training data 23.95(2395/10000)%
Accuracy on testing data 24.3(243/1000)%
Training for epoch: 1
Accuracy on training data 35.05(3505/10000)%
Accuracy on testing data 34.6(346/1000)%
Training for epoch: 2
Accuracy on training data 45.03(4503/10000)%
Accuracy on testing data 47.2(472/1000)%
Training for epoch: 3
Accuracy on training data 51.15(5115/10000)%
Accuracy on testing data 52.5(525/1000)%
Training for epoch: 4
Accuracy on training data 55.21(5521/10000)%
Accuracy on testing data 55.3(553/1000)%
Training for epoch: 5
Accuracy on training data 59.22(5922/10000)%
Accuracy on testing data 58.1(581/1000)%
Training for epoch: 6
Accuracy on training data 62.71(6271/10000)%
Accuracy on testing data 58.3(583/1000)%
Training for epoch: 7
Accuracy on training data 66.52(6652/10000)%
Accuracy on testing data 59.5(595/1000)%
Training for epoch: 8
Accuracy

In [None]:
eval_data

Unnamed: 0,best_accuracy,avg_accuracy,avg_time_taken,dataset,epochs,nn_stack,loss_func,optimizer,learning_rate,weight_decay,batch_size,momentum,testing_dataset_type,training_size,testing_size
0,62.1,46.31,341.212751,CIFAR10,10,"[Conv2d(3, 20, kernel_size=(5, 5), stride=(1, ...",CrossEntropyLoss,SGD,0.001,0.0001,10,0.9,validation,10000,1000
1,37.0,25.416667,198.163208,CIFAR10,10,"[Conv2d(3, 20, kernel_size=(5, 5), stride=(1, ...",CrossEntropyLoss,SGD,0.001,0.0001,100,0.9,validation,10000,1000
2,29.0,19.413333,180.172441,CIFAR10,10,"[Conv2d(3, 20, kernel_size=(5, 5), stride=(1, ...",CrossEntropyLoss,SGD,0.001,0.0001,250,0.9,validation,10000,1000


Lowest batch size worked best. Try with batch sizes 5 as well(1 is too slow)

In [None]:
class TestingConfig:
    torch.manual_seed(21)
    base_architectures = [
                          [nn.Conv2d(in_channels=3, out_channels=20, kernel_size=5, stride=1),
                            nn.ReLU(),
                            nn.MaxPool2d(kernel_size=2, stride=2),
                            nn.Dropout(0.0),
                            nn.Conv2d(in_channels=20, out_channels=40, kernel_size=5, stride=1),
                            nn.ReLU(),
                            nn.MaxPool2d(kernel_size=2, stride=2),
                            nn.Dropout(0.0),
                            nn.Flatten(),
                            nn.Linear(40 * 5 * 5, 40 * 5 * 5 * 2),
                            nn.ReLU(),
                            nn.Dropout(0.0),
                            nn.Linear(40 * 5 * 5 * 2, 10),
                            nn.ReLU()]]
    CONFIG = {
        "dataset": ["CIFAR10"],
        "epochs": [10],
        "nn_stack": _get_nn_stacks_with_dropouts(base_architectures, dropout_options=[0.0]),
        "loss_func": ["CrossEntropyLoss"],
        "optimizer": ["SGD"],
        "learning_rate": [1e-3],
        "weight_decay": [1e-4],
        "batch_size": [5],
        "testing_dataset_type": ["validation"],
        "training_size": [10000],
        "testing_size": [1000],
        "momentum": [0.9]
    }


eval_data = HyperTuner().tune(TestingConfig.CONFIG, True)

Total combinations for exp: 1
Files already downloaded and verified
Files already downloaded and verified
Training for epoch: 0
Accuracy on training data 29.69(2969/10000)%
Accuracy on testing data 30.2(302/1000)%
Training for epoch: 1
Accuracy on training data 45.39(4539/10000)%
Accuracy on testing data 48.3(483/1000)%
Training for epoch: 2
Accuracy on training data 52.84(5284/10000)%
Accuracy on testing data 52.8(528/1000)%
Training for epoch: 3
Accuracy on training data 58.16(5816/10000)%
Accuracy on testing data 54.3(543/1000)%
Training for epoch: 4
Accuracy on training data 62.9(6290/10000)%
Accuracy on testing data 56.8(568/1000)%
Training for epoch: 5
Accuracy on training data 67.63(6763/10000)%
Accuracy on testing data 57.8(578/1000)%
Training for epoch: 6
Accuracy on training data 72.32(7232/10000)%
Accuracy on testing data 59.0(590/1000)%
Training for epoch: 7
Accuracy on training data 77.7(7770/10000)%
Accuracy on testing data 60.5(605/1000)%
Training for epoch: 8
Accuracy o

In [None]:
eval_data

Unnamed: 0,best_accuracy,avg_accuracy,avg_time_taken,dataset,epochs,nn_stack,loss_func,optimizer,learning_rate,weight_decay,batch_size,momentum,testing_dataset_type,training_size,testing_size
0,63.4,52.796667,513.717378,CIFAR10,10,"[Conv2d(3, 20, kernel_size=(5, 5), stride=(1, ...",CrossEntropyLoss,SGD,0.001,0.0001,5,0.9,validation,10000,1000


Batch size of 5 seems to work best, albeit slower.

---
Try adding one more hidden Linear Layer



In [None]:
class TestingConfig:
    torch.manual_seed(21)
    base_architectures = [
            # more linear layers
            [nn.Conv2d(in_channels=3, out_channels=20, kernel_size=5, stride=1),
             nn.ReLU(),
             nn.MaxPool2d(kernel_size=2, stride=2),
             nn.Dropout(0.0),
             nn.Conv2d(in_channels=20, out_channels=40, kernel_size=5, stride=1),
             nn.ReLU(),
             nn.MaxPool2d(kernel_size=2, stride=2),
             nn.Dropout(0.0),
             nn.Flatten(),
             nn.Linear(40 * 5 * 5, 40 * 5 * 5 * 2),
             nn.ReLU(),
             nn.Dropout(0.0),
             nn.Linear(40 * 5 * 5 * 2, 1000),
             nn.ReLU(),
             nn.Dropout(0.0),
             nn.Linear(1000, 10),
             nn.ReLU()],
        ]
    CONFIG = {
        "dataset": ["CIFAR10"],
        "epochs": [10],
        "nn_stack": _get_nn_stacks_with_dropouts(base_architectures, dropout_options=[0.0]),
        "loss_func": ["CrossEntropyLoss"],
        "optimizer": ["SGD"],
        "learning_rate": [1e-3],
        "weight_decay": [1e-4],
        "batch_size": [5],
        "testing_dataset_type": ["validation"],
        "training_size": [10000],
        "testing_size": [1000],
        "momentum": [0.9]
    }


eval_data = HyperTuner().tune(TestingConfig.CONFIG, True)

Total combinations for exp: 1
Files already downloaded and verified
Files already downloaded and verified
Training for epoch: 0
Accuracy on training data 31.34(3134/10000)%
Accuracy on testing data 32.4(324/1000)%
Training for epoch: 1
Accuracy on training data 38.73(3873/10000)%
Accuracy on testing data 39.4(394/1000)%
Training for epoch: 2
Accuracy on training data 44.95(4495/10000)%
Accuracy on testing data 44.8(448/1000)%
Training for epoch: 3
Accuracy on training data 50.15(5015/10000)%
Accuracy on testing data 50.0(500/1000)%
Training for epoch: 4
Accuracy on training data 54.69(5469/10000)%
Accuracy on testing data 51.8(518/1000)%
Training for epoch: 5
Accuracy on training data 59.38(5938/10000)%
Accuracy on testing data 52.9(529/1000)%
Training for epoch: 6
Accuracy on training data 63.17(6317/10000)%
Accuracy on testing data 55.0(550/1000)%
Training for epoch: 7
Accuracy on training data 67.31(6731/10000)%
Accuracy on testing data 55.6(556/1000)%
Training for epoch: 8
Accuracy

In [None]:
eval_data

Unnamed: 0,best_accuracy,avg_accuracy,avg_time_taken,dataset,epochs,nn_stack,loss_func,optimizer,learning_rate,weight_decay,batch_size,momentum,testing_dataset_type,training_size,testing_size
0,60.0,44.666667,807.469093,CIFAR10,10,"[Conv2d(3, 20, kernel_size=(5, 5), stride=(1, ...",CrossEntropyLoss,SGD,0.001,0.0001,5,0.9,validation,10000,1000


Does not seem to improve the accuracy.
<br> Try a full run using previous config on training/testing datasets

In [None]:
model = TorchCNN(
    loss_func="CrossEntropyLoss",
    optimizer="SGD",
    learning_rate=1e-3,
    lmda_wt_decay=1e-4,
    batch_size=5,
    training_size=50000,
    testing_size=10000,
    seed=35,
    momentum=0.9,
    nn_stack=[  nn.Conv2d(in_channels=3, out_channels=20, kernel_size=5, stride=1),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2),
                nn.Dropout(0.0),
                nn.Conv2d(in_channels=20, out_channels=40, kernel_size=5, stride=1),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2),
                nn.Dropout(0.0),
                nn.Flatten(),
                nn.Linear(40 * 5 * 5, 40 * 5 * 5 * 2),
                nn.ReLU(),
                nn.Dropout(0.0),
                nn.Linear(40 * 5 * 5 * 2, 10),
                nn.ReLU()]
).to(device)
train_data = CIFAR10(root='cifar10', train=True, download=True, transform=cifar_transform)
test_data = CIFAR10(root='cifar10', train=False, download=True, transform=cifar_transform)
torch.manual_seed(35)
training_loader = DataLoader(train_data, batch_size=5, shuffle=True)
testing_loader = DataLoader(test_data, batch_size=5, shuffle=True)
accuracies = []
for epoch in range(25):
    print(f"Training for epoch: {epoch}")
    model.train_model(training_loader)
    accuracies.append(model.evaluate(testing_loader, model.testing_size, "testing"))
print(max(accuracies))

Files already downloaded and verified
Files already downloaded and verified
Training for epoch: 0
Accuracy on training data 46.14(23071/50000)%
Accuracy on testing data 44.79(4479/10000)%
Training for epoch: 1
Accuracy on training data 63.77(31885/50000)%
Accuracy on testing data 60.96(6096/10000)%
Training for epoch: 2
Accuracy on training data 71.92(35959/50000)%
Accuracy on testing data 65.95(6595/10000)%
Training for epoch: 3
Accuracy on training data 76.65(38326/50000)%
Accuracy on testing data 67.28(6728/10000)%
Training for epoch: 4
Accuracy on training data 81.17(40583/50000)%
Accuracy on testing data 67.73(6773/10000)%
Training for epoch: 5
Accuracy on training data 84.92(42461/50000)%
Accuracy on testing data 68.27(6827/10000)%
Training for epoch: 6
Accuracy on training data 87.29(43646/50000)%
Accuracy on testing data 67.93(6793/10000)%
Training for epoch: 7
Accuracy on training data 89.86(44929/50000)%
Accuracy on testing data 68.6(6860/10000)%
Training for epoch: 8
Accurac

TypeError: ignored

Reaches around 68-69% on CIFAR10 testing dataset. Stopping the execution manually.

In [None]:
class TestingConfig:
    torch.manual_seed(21)
    base_architectures = [
            # more conv layers
            [nn.Conv2d(in_channels=3, out_channels=20, kernel_size=3, stride=1, padding=1),
             nn.ReLU(),
             nn.MaxPool2d(kernel_size=2, stride=2),
             nn.Dropout(0.0),
             nn.Conv2d(in_channels=20, out_channels=40, kernel_size=3, stride=1, padding=1),
             nn.ReLU(),
             nn.MaxPool2d(kernel_size=2, stride=2),
             nn.Dropout(0.0),
             nn.Conv2d(in_channels=40, out_channels=60, kernel_size=3, stride=1, padding=1),
             nn.ReLU(),
             nn.MaxPool2d(kernel_size=2, stride=2),
             nn.Dropout(0.0),
             nn.Flatten(),
             nn.Linear(60 * 4 * 4, 1000),
             nn.ReLU(),
             nn.Dropout(0.0),
             nn.Linear(1000, 10),
             nn.ReLU()],
        ]
    CONFIG = {
        "dataset": ["CIFAR10"],
        "epochs": [10],
        "nn_stack": _get_nn_stacks_with_dropouts(base_architectures, dropout_options=[0.00]),
        "loss_func": ["CrossEntropyLoss"],
        "optimizer": ["SGD"],
        "learning_rate": [1e-3],
        "weight_decay": [1e-4],
        "batch_size": [5],
        "testing_dataset_type": ["validation"],
        "training_size": [10000],
        "testing_size": [1000],
        "momentum": [0.9]
    }


eval_data = HyperTuner().tune(TestingConfig.CONFIG, True)

Total combinations for exp: 1
Files already downloaded and verified
Files already downloaded and verified
Training for epoch: 0
Accuracy on training data 24.79(2479/10000)%
Accuracy on testing data 26.6(266/1000)%
Training for epoch: 1
Accuracy on training data 36.79(3679/10000)%
Accuracy on testing data 38.9(389/1000)%
Training for epoch: 2
Accuracy on training data 46.85(4685/10000)%
Accuracy on testing data 47.6(476/1000)%
Training for epoch: 3
Accuracy on training data 52.54(5254/10000)%
Accuracy on testing data 51.6(516/1000)%
Training for epoch: 4
Accuracy on training data 58.33(5833/10000)%
Accuracy on testing data 56.5(565/1000)%
Training for epoch: 5
Accuracy on training data 62.71(6271/10000)%
Accuracy on testing data 58.3(583/1000)%
Training for epoch: 6
Accuracy on training data 66.46(6646/10000)%
Accuracy on testing data 59.8(598/1000)%
Training for epoch: 7
Accuracy on training data 70.06(7006/10000)%
Accuracy on testing data 60.2(602/1000)%
Training for epoch: 8
Accuracy

In [None]:
eval_data

Unnamed: 0,best_accuracy,avg_accuracy,avg_time_taken,dataset,epochs,nn_stack,loss_func,optimizer,learning_rate,weight_decay,batch_size,momentum,testing_dataset_type,training_size,testing_size
0,64.0,50.376667,464.986883,CIFAR10,10,"[Conv2d(3, 20, kernel_size=(3, 3), stride=(1, ...",CrossEntropyLoss,SGD,0.001,0.0001,5,0.9,validation,10000,1000


In [None]:
class TestingConfig:
    torch.manual_seed(21)
    base_architectures = [
            # more conv layers
            [nn.Conv2d(in_channels=3, out_channels=20, kernel_size=3, stride=1, padding=1),
             nn.ReLU(),
             nn.MaxPool2d(kernel_size=2, stride=2),
             nn.Dropout(0.0),
             nn.Conv2d(in_channels=20, out_channels=40, kernel_size=3, stride=1, padding=1),
             nn.ReLU(),
             nn.MaxPool2d(kernel_size=2, stride=2),
             nn.Dropout(0.0),
             nn.Conv2d(in_channels=40, out_channels=60, kernel_size=3, stride=1, padding=1),
             nn.ReLU(),
             nn.MaxPool2d(kernel_size=2, stride=2),
             nn.Dropout(0.0),
             nn.Conv2d(in_channels=60, out_channels=80, kernel_size=3, stride=1, padding=1),
             nn.ReLU(),
             nn.MaxPool2d(kernel_size=2, stride=2),
             nn.Dropout(0.0),
             nn.Flatten(),
             nn.Linear(80 * 2 * 2, 80 * 2 * 2 * 2),
             nn.ReLU(),
             nn.Dropout(0.0),
             nn.Linear(80 * 2 * 2 * 2, 10),
             nn.ReLU()],
        ]
    CONFIG = {
        "dataset": ["CIFAR10"],
        "epochs": [10],
        "nn_stack": _get_nn_stacks_with_dropouts(base_architectures, dropout_options=[0.00]),
        "loss_func": ["CrossEntropyLoss"],
        "optimizer": ["SGD"],
        "learning_rate": [1e-3],
        "weight_decay": [1e-4],
        "batch_size": [5],
        "testing_dataset_type": ["validation"],
        "training_size": [10000],
        "testing_size": [1000],
        "momentum": [0.9]
    }


eval_data = HyperTuner().tune(TestingConfig.CONFIG, True)

Total combinations for exp: 1
Files already downloaded and verified
Files already downloaded and verified
Training for epoch: 0
Accuracy on training data 18.29(1829/10000)%
Accuracy on testing data 18.7(187/1000)%
Training for epoch: 1
Accuracy on training data 30.3(3030/10000)%
Accuracy on testing data 31.2(312/1000)%
Training for epoch: 2
Accuracy on training data 38.82(3882/10000)%
Accuracy on testing data 40.7(407/1000)%
Training for epoch: 3
Accuracy on training data 42.73(4273/10000)%
Accuracy on testing data 45.3(453/1000)%
Training for epoch: 4
Accuracy on training data 46.47(4647/10000)%
Accuracy on testing data 47.9(479/1000)%
Training for epoch: 5
Accuracy on training data 50.73(5073/10000)%
Accuracy on testing data 50.3(503/1000)%
Training for epoch: 6
Accuracy on training data 54.27(5427/10000)%
Accuracy on testing data 53.0(530/1000)%
Training for epoch: 7
Accuracy on training data 57.62(5762/10000)%
Accuracy on testing data 55.9(559/1000)%
Training for epoch: 8
Accuracy 

In [None]:
eval_data

Unnamed: 0,best_accuracy,avg_accuracy,avg_time_taken,dataset,epochs,nn_stack,loss_func,optimizer,learning_rate,weight_decay,batch_size,momentum,testing_dataset_type,training_size,testing_size
0,60.8,42.533333,359.428771,CIFAR10,10,"[Conv2d(3, 20, kernel_size=(3, 3), stride=(1, ...",CrossEntropyLoss,SGD,0.001,0.0001,5,0.9,validation,10000,1000


In [None]:
class TestingConfig:
    torch.manual_seed(21)
    base_architectures = [
            # more conv layers
            [nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1),
             nn.ReLU(),
             nn.MaxPool2d(kernel_size=2, stride=2),
             nn.Dropout(0.0),
             nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1),
             nn.ReLU(),
             nn.MaxPool2d(kernel_size=2, stride=2),
             nn.Dropout(0.0),
             nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
             nn.ReLU(),
             nn.MaxPool2d(kernel_size=2, stride=2),
             nn.Dropout(0.0),
             nn.Flatten(),
             nn.Linear(64 * 4 * 4, 64 * 4 * 4 * 2),
             nn.ReLU(),
             nn.Dropout(0.0),
             nn.Linear(64 * 4 * 4 * 2, 10),
             nn.ReLU()],
        ]
    CONFIG = {
        "dataset": ["CIFAR10"],
        "epochs": [10],
        "nn_stack": _get_nn_stacks_with_dropouts(base_architectures, dropout_options=[0.00]),
        "loss_func": ["CrossEntropyLoss"],
        "optimizer": ["SGD"],
        "learning_rate": [1e-3],
        "weight_decay": [1e-4],
        "batch_size": [5],
        "testing_dataset_type": ["validation"],
        "training_size": [10000],
        "testing_size": [1000],
        "momentum": [0.9]
    }


eval_data = HyperTuner().tune(TestingConfig.CONFIG, True)

Total combinations for exp: 1
Files already downloaded and verified
Files already downloaded and verified
Training for epoch: 0
Accuracy on training data 22.43(2243/10000)%
Accuracy on testing data 23.5(235/1000)%
Training for epoch: 1
Accuracy on training data 37.0(3700/10000)%
Accuracy on testing data 38.6(386/1000)%
Training for epoch: 2
Accuracy on training data 44.91(4491/10000)%
Accuracy on testing data 45.5(455/1000)%
Training for epoch: 3
Accuracy on training data 51.45(5145/10000)%
Accuracy on testing data 52.0(520/1000)%
Training for epoch: 4
Accuracy on training data 56.82(5682/10000)%
Accuracy on testing data 55.9(559/1000)%
Training for epoch: 5
Accuracy on training data 61.4(6140/10000)%
Accuracy on testing data 57.8(578/1000)%
Training for epoch: 6
Accuracy on training data 65.83(6583/10000)%
Accuracy on testing data 58.4(584/1000)%
Training for epoch: 7
Accuracy on training data 70.25(7025/10000)%
Accuracy on testing data 60.3(603/1000)%
Training for epoch: 8
Accuracy o

In [None]:
eval_data

Unnamed: 0,best_accuracy,avg_accuracy,avg_time_taken,dataset,epochs,nn_stack,loss_func,optimizer,learning_rate,weight_decay,batch_size,momentum,testing_dataset_type,training_size,testing_size
0,63.6,50.416667,572.154505,CIFAR10,10,"[Conv2d(3, 16, kernel_size=(3, 3), stride=(1, ...",CrossEntropyLoss,SGD,0.001,0.0001,5,0.9,validation,10000,1000


In [None]:
model = TorchCNN(
    loss_func="CrossEntropyLoss",
    optimizer="SGD",
    learning_rate=1e-3,
    lmda_wt_decay=1e-4,
    batch_size=5,
    training_size=50000,
    testing_size=10000,
    seed=35,
    momentum=0.9,
    nn_stack=[nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1),
             nn.ReLU(),
             nn.MaxPool2d(kernel_size=2, stride=2),
             nn.Dropout(0.0),
             nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1),
             nn.ReLU(),
             nn.MaxPool2d(kernel_size=2, stride=2),
             nn.Dropout(0.0),
             nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
             nn.ReLU(),
             nn.MaxPool2d(kernel_size=2, stride=2),
             nn.Dropout(0.0),
             nn.Flatten(),
             nn.Linear(64 * 4 * 4, 64 * 4 * 4 * 2),
             nn.ReLU(),
             nn.Dropout(0.0),
             nn.Linear(64 * 4 * 4 * 2, 10),
             nn.ReLU()]
).to(device)
train_data = CIFAR10(root='cifar10', train=True, download=True, transform=cifar_transform)
test_data = CIFAR10(root='cifar10', train=False, download=True, transform=cifar_transform)
torch.manual_seed(35)
training_loader = DataLoader(train_data, batch_size=5, shuffle=True)
testing_loader = DataLoader(test_data, batch_size=5, shuffle=True)
accuracies = []
for epoch in range(25):
    print(f"Training for epoch: {epoch}")
    model.train_model(training_loader)
    accuracies.append(model.evaluate(testing_loader, model.testing_size, "testing"))
print(max(accuracies))

Files already downloaded and verified
Files already downloaded and verified
Training for epoch: 0
Accuracy on training data 51.75(25873/50000)%
Accuracy on testing data 50.7(5070/10000)%
Training for epoch: 1
Accuracy on training data 65.94(32968/50000)%
Accuracy on testing data 62.95(6295/10000)%
Training for epoch: 2
Accuracy on training data 72.72(36362/50000)%
Accuracy on testing data 67.26(6726/10000)%
Training for epoch: 3
Accuracy on training data 77.63(38814/50000)%
Accuracy on testing data 69.51(6951/10000)%
Training for epoch: 4
Accuracy on training data 80.9(40451/50000)%
Accuracy on testing data 69.83(6983/10000)%
Training for epoch: 5
Accuracy on training data 83.74(41872/50000)%
Accuracy on testing data 69.92(6992/10000)%
Training for epoch: 6
Accuracy on training data 85.12(42558/50000)%
Accuracy on testing data 68.77(6877/10000)%
Training for epoch: 7
Accuracy on training data 89.2(44600/50000)%
Accuracy on testing data 70.03(7003/10000)%
Training for epoch: 8
Accuracy 

Max observation of **73.62**. Try increasing epochs to 40.

In [None]:
model = TorchCNN(
    loss_func="CrossEntropyLoss",
    optimizer="SGD",
    learning_rate=1e-3,
    lmda_wt_decay=1e-4,
    batch_size=5,
    training_size=50000,
    testing_size=10000,
    seed=35,
    momentum=0.9,
    nn_stack=[nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1),
             nn.ReLU(),
             nn.MaxPool2d(kernel_size=2, stride=2),
             nn.Dropout(0.0),
             nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1),
             nn.ReLU(),
             nn.MaxPool2d(kernel_size=2, stride=2),
             nn.Dropout(0.0),
             nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
             nn.ReLU(),
             nn.MaxPool2d(kernel_size=2, stride=2),
             nn.Dropout(0.0),
             nn.Flatten(),
             nn.Linear(64 * 4 * 4, 64 * 4 * 4 * 2),
             nn.ReLU(),
             nn.Dropout(0.0),
             nn.Linear(64 * 4 * 4 * 2, 10),
             nn.ReLU()]
).to(device)
train_data = CIFAR10(root='cifar10', train=True, download=True, transform=cifar_transform)
test_data = CIFAR10(root='cifar10', train=False, download=True, transform=cifar_transform)
torch.manual_seed(35)
training_loader = DataLoader(train_data, batch_size=5, shuffle=True)
testing_loader = DataLoader(test_data, batch_size=5, shuffle=True)
accuracies = []
for epoch in range(40):
    print(f"Training for epoch: {epoch}")
    model.train_model(training_loader)
    accuracies.append(model.evaluate(testing_loader, model.testing_size, "testing"))
print(max(accuracies))

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to cifar10/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value='')))


Extracting cifar10/cifar-10-python.tar.gz to cifar10
Files already downloaded and verified
Training for epoch: 0
Accuracy on training data 51.62(25808/50000)%
Accuracy on testing data 50.52(5052/10000)%
Training for epoch: 1
Accuracy on training data 64.86(32429/50000)%
Accuracy on testing data 62.55(6255/10000)%
Training for epoch: 2
Accuracy on training data 71.66(35832/50000)%
Accuracy on testing data 66.83(6683/10000)%
Training for epoch: 3
Accuracy on training data 74.95(37475/50000)%
Accuracy on testing data 67.58(6758/10000)%
Training for epoch: 4
Accuracy on training data 76.94(38469/50000)%
Accuracy on testing data 67.62(6762/10000)%
Training for epoch: 5
Accuracy on training data 80.54(40268/50000)%
Accuracy on testing data 68.22(6822/10000)%
Training for epoch: 6
Accuracy on training data 83.44(41718/50000)%
Accuracy on testing data 68.78(6878/10000)%
Training for epoch: 7
Accuracy on training data 84.54(42270/50000)%
Accuracy on testing data 68.56(6856/10000)%
Training for

In [None]:
model = TorchCNN(
    loss_func="CrossEntropyLoss",
    optimizer="SGD",
    learning_rate=1e-3,
    lmda_wt_decay=1e-4,
    batch_size=5,
    training_size=50000,
    testing_size=10000,
    seed=35,
    momentum=0.9,
    nn_stack=[nn.Conv2d(in_channels=3, out_channels=20, kernel_size=3, stride=1, padding=1),
             nn.ReLU(),
             nn.MaxPool2d(kernel_size=2, stride=2),
             nn.Dropout(0.00),
             nn.Conv2d(in_channels=20, out_channels=40, kernel_size=3, stride=1, padding=1),
             nn.ReLU(),
             nn.MaxPool2d(kernel_size=2, stride=2),
             nn.Dropout(0.00),
             nn.Conv2d(in_channels=40, out_channels=60, kernel_size=3, stride=1, padding=1),
             nn.ReLU(),
             nn.MaxPool2d(kernel_size=2, stride=2),
             nn.Dropout(0.00),
             nn.Flatten(),
             nn.Linear(60 * 4 * 4, 60 * 4 * 4 * 2),
             nn.ReLU(),
             nn.Dropout(0.00),
             nn.Linear(60 * 4 * 4 * 2, 10),
             nn.ReLU()]
).to(device)
train_data = CIFAR10(root='cifar10', train=True, download=True, transform=cifar_transform)
test_data = CIFAR10(root='cifar10', train=False, download=True, transform=cifar_transform)
torch.manual_seed(35)
training_loader = DataLoader(train_data, batch_size=5, shuffle=True)
testing_loader = DataLoader(test_data, batch_size=5, shuffle=True)
accuracies = []
for epoch in range(100):
    print(f"Training for epoch: {epoch}")
    model.train_model(training_loader)
    accuracies.append(model.evaluate(testing_loader, model.testing_size, "testing"))
print(max(accuracies))

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to cifar10/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value='')))


Extracting cifar10/cifar-10-python.tar.gz to cifar10
Files already downloaded and verified
Training for epoch: 0
Accuracy on training data 46.17(23084/50000)%
Accuracy on testing data 45.28(4528/10000)%
Training for epoch: 1
Accuracy on training data 65.56(32779/50000)%
Accuracy on testing data 62.78(6278/10000)%
Training for epoch: 2
Accuracy on training data 72.79(36393/50000)%
Accuracy on testing data 67.89(6789/10000)%
Training for epoch: 3
Accuracy on training data 76.72(38359/50000)%
Accuracy on testing data 69.12(6912/10000)%
Training for epoch: 4
Accuracy on training data 78.43(39216/50000)%
Accuracy on testing data 68.97(6897/10000)%
Training for epoch: 5
Accuracy on training data 81.6(40801/50000)%
Accuracy on testing data 69.43(6943/10000)%
Training for epoch: 6
Accuracy on training data 84.94(42471/50000)%
Accuracy on testing data 70.14(7014/10000)%
Training for epoch: 7
Accuracy on training data 82.66(41331/50000)%
Accuracy on testing data 67.62(6762/10000)%
Training for 

Best observed accuracy of 76.39. Network was still learning, but at a slower pace(maybe because I was using CPU runtime)

# CIFAR100 Tuning

Try the same architecture on CIFAR100, but will 100 logics instead.

In [None]:
device="cuda"
model = TorchCNN(
    loss_func="CrossEntropyLoss",
    optimizer="SGD",
    learning_rate=1e-3,
    lmda_wt_decay=1e-4,
    batch_size=5,
    training_size=50000,
    testing_size=10000,
    seed=35,
    momentum=0.9,
    nn_stack=[nn.Conv2d(in_channels=3, out_channels=20, kernel_size=3, stride=1, padding=1),
             nn.ReLU(),
             nn.MaxPool2d(kernel_size=2, stride=2),
             nn.Dropout(0.00),
             nn.Conv2d(in_channels=20, out_channels=40, kernel_size=3, stride=1, padding=1),
             nn.ReLU(),
             nn.MaxPool2d(kernel_size=2, stride=2),
             nn.Dropout(0.00),
             nn.Conv2d(in_channels=40, out_channels=60, kernel_size=3, stride=1, padding=1),
             nn.ReLU(),
             nn.MaxPool2d(kernel_size=2, stride=2),
             nn.Dropout(0.00),
             nn.Flatten(),
             nn.Linear(60 * 4 * 4, 60 * 4 * 4 * 2),
             nn.ReLU(),
             nn.Dropout(0.00),
             nn.Linear(60 * 4 * 4 * 2, 100),
             nn.ReLU()]
).to(device)
train_data = CIFAR100(root='cifar10', train=True, download=True, transform=cifar_transform)
test_data = CIFAR100(root='cifar100', train=False, download=True, transform=cifar_transform)
torch.manual_seed(35)
training_loader = DataLoader(train_data, batch_size=5, shuffle=True)
testing_loader = DataLoader(test_data, batch_size=5, shuffle=True)
accuracies = []
for epoch in range(25):
    print(f"Training for epoch: {epoch}")
    model.train_model(training_loader)
    accuracies.append(model.evaluate(testing_loader, model.testing_size, "testing"))
print(max(accuracies))

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to cifar10/cifar-100-python.tar.gz


HBox(children=(FloatProgress(value=0.0, max=169001437.0), HTML(value='')))


Extracting cifar10/cifar-100-python.tar.gz to cifar10
Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to cifar100/cifar-100-python.tar.gz


HBox(children=(FloatProgress(value=0.0, max=169001437.0), HTML(value='')))


Extracting cifar100/cifar-100-python.tar.gz to cifar100
Training for epoch: 0
Accuracy on training data 11.04(5518/50000)%
Accuracy on testing data 10.6(1060/10000)%
Training for epoch: 1
Accuracy on training data 25.31(12656/50000)%
Accuracy on testing data 23.14(2314/10000)%
Training for epoch: 2
Accuracy on training data 35.75(17873/50000)%
Accuracy on testing data 29.97(2997/10000)%
Training for epoch: 3
Accuracy on training data 44.65(22324/50000)%
Accuracy on testing data 33.98(3398/10000)%
Training for epoch: 4
Accuracy on training data 52.8(26400/50000)%
Accuracy on testing data 34.78(3478/10000)%
Training for epoch: 5
Accuracy on training data 60.41(30203/50000)%
Accuracy on testing data 35.07(3507/10000)%
Training for epoch: 6
Accuracy on training data 62.45(31227/50000)%
Accuracy on testing data 32.77(3277/10000)%
Training for epoch: 7
Accuracy on training data 63.32(31662/50000)%
Accuracy on testing data 30.8(3080/10000)%
Training for epoch: 8
Accuracy on training data 69.

# CIFAR10 using Alexnet

In [8]:
from torchvision.models import AlexNet
from torch.optim import SGD
from torch.nn import CrossEntropyLoss
device = "cuda"

In [14]:
class AlexNetCNN(AlexNet):
    def train_model(self, training_loader: DataLoader, verbose: bool = True):
        optimizer = SGD(params=self.parameters(), lr=1e-3, momentum=0.9)
        loss_function = CrossEntropyLoss()
        for input, labels in training_loader:
            prediction = self(input.to(device))
            labels = labels.to(device)
            loss = loss_function(prediction, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        if verbose:
            self.evaluate(training_loader, verbose=verbose)

    def evaluate(self, data_loader: DataLoader, data_type: str = "training",
                 verbose: bool = True) -> float:
        correct_classifications = 0
        dataset_size = len(data_loader.dataset)
        with torch.no_grad():
            for input, labels in data_loader:
                prediction = self(input.to(device))
                labels = labels.to(device)
                correct_classifications += (prediction.argmax(1) == labels).type(torch.float).sum().item()
        accuracy = round((correct_classifications / dataset_size) * 100, 2)
        if verbose:
            print(f'Accuracy on {data_type} data {accuracy}%')
        return accuracy


def run():
    model = AlexNetCNN(10).to(device)
    # need input size of 63x63 for alexnet
    transform = transforms.Compose(
        [
            transforms.Resize((63, 63)),
            transforms.ToTensor()
        ])
    train_data = CIFAR10(root='cifar10', train=True, download=True, transform=transform)
    test_data = CIFAR10(root='cifar10', train=False, download=True, transform=transform)

    training_loader = DataLoader(train_data, batch_size=4, shuffle=True)
    testing_loader = DataLoader(test_data, batch_size=4, shuffle=True)

    accuracies = []
    for epoch in range(25):
        print(f"Training for epoch: {epoch}")
        model.train_model(training_loader)
        accuracies.append(model.evaluate(testing_loader, "testing"))
    print(max(accuracies))

In [15]:
run()

Files already downloaded and verified
Files already downloaded and verified
Training for epoch: 0
Accuracy on training data 26.71%
Accuracy on testing data 26.25%
Training for epoch: 1
Accuracy on training data 46.72%
Accuracy on testing data 45.91%
Training for epoch: 2
Accuracy on training data 57.22%
Accuracy on testing data 54.9%
Training for epoch: 3
Accuracy on training data 65.81%
Accuracy on testing data 61.6%
Training for epoch: 4
Accuracy on training data 69.53%
Accuracy on testing data 63.45%
Training for epoch: 5
Accuracy on training data 72.15%
Accuracy on testing data 65.0%
Training for epoch: 6
Accuracy on training data 77.82%
Accuracy on testing data 68.8%
Training for epoch: 7
Accuracy on training data 81.38%
Accuracy on testing data 70.05%
Training for epoch: 8
Accuracy on training data 81.35%
Accuracy on testing data 68.26%
Training for epoch: 9
Accuracy on training data 85.86%
Accuracy on testing data 70.09%
Training for epoch: 10
Accuracy on training data 85.7%
Acc

KeyboardInterrupt: ignored

# CIFAR100 using Alexnet

In [16]:
def run():
    model = AlexNetCNN(100).to(device)
    # need input size of 63x63 for alexnet
    transform = transforms.Compose(
        [
            transforms.Resize((63, 63)),
            transforms.ToTensor()
        ])
    train_data = CIFAR100(root='cifar100', train=True, download=True, transform=transform)
    test_data = CIFAR100(root='cifar100', train=False, download=True, transform=transform)

    training_loader = DataLoader(train_data, batch_size=4, shuffle=True)
    testing_loader = DataLoader(test_data, batch_size=4)

    accuracies = []
    for epoch in range(25):
        print(f"Training for epoch: {epoch}")
        model.train_model(training_loader)
        accuracies.append(model.evaluate(testing_loader, "testing"))
    print(max(accuracies))

In [17]:
run()

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to cifar100/cifar-100-python.tar.gz


HBox(children=(FloatProgress(value=0.0, max=169001437.0), HTML(value='')))


Extracting cifar100/cifar-100-python.tar.gz to cifar100
Files already downloaded and verified
Training for epoch: 0
Accuracy on training data 1.94%
Accuracy on testing data 1.85%
Training for epoch: 1
Accuracy on training data 3.36%
Accuracy on testing data 3.48%
Training for epoch: 2
Accuracy on training data 7.32%
Accuracy on testing data 7.2%
Training for epoch: 3
Accuracy on training data 13.48%
Accuracy on testing data 13.51%
Training for epoch: 4
Accuracy on training data 18.82%
Accuracy on testing data 17.85%
Training for epoch: 5
Accuracy on training data 21.85%
Accuracy on testing data 20.41%
Training for epoch: 6
Accuracy on training data 30.39%
Accuracy on testing data 26.03%
Training for epoch: 7
Accuracy on training data 34.39%
Accuracy on testing data 29.22%
Training for epoch: 8
Accuracy on training data 37.84%
Accuracy on testing data 29.72%
Training for epoch: 9
Accuracy on training data 42.79%
Accuracy on testing data 33.27%
Training for epoch: 10
Accuracy on trainin