<a href="https://colab.research.google.com/github/venomouscyanide/dl_sain/blob/master/week4/week4_pytorch_tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import itertools
import time
from typing import List, Any, Dict

# third party
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.optim.sgd import SGD
from torchvision.datasets import MNIST
from torch.utils.data.dataset import random_split
from torchvision.transforms import ToTensor

In [2]:
# Use Nvidia CUDA if available
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device} device')


Using cpu device


In [4]:
class TorchMLP(nn.Module):
    def __init__(self, size: List[int], loss_func: str, hidden_act_function: str, output_act_function: str,
                 output_act_function_kwargs: Dict[Any, Any],
                 optimizer: str, learning_rate: float, lmda_wt_decay: float, p_to_be_zeroed: float, batch_size: int,
                 momentum: float, training_size: int = 60000,
                 testing_size: int = 10000, seed: int = 42, dropout_on_input: bool = False):
        super().__init__()
        self.size = size
        self.loss_func = loss_func
        self.hidden_act_function = hidden_act_function
        self.output_act_function = output_act_function
        self.output_act_function_kwargs = output_act_function_kwargs
        self.optimizer = optimizer
        self.learning_rate = learning_rate
        self.lmda_wt_decay = lmda_wt_decay
        self.p_to_be_zeroed = p_to_be_zeroed
        self.dropout_on_input = dropout_on_input
        self.flatten = nn.Flatten()
        nn_stack = self._form_nn_stack()
        self.mlp = nn.Sequential(*nn_stack)
        self.momentum = momentum
        optimizer_params = self._get_optimizer_params()
        self.optimizer = getattr(torch.optim, self.optimizer)(**optimizer_params)
        self.loss_function = getattr(nn, self.loss_func)()
        self.batch_size = batch_size
        self.training_size = training_size
        self.testing_size = testing_size
        self.seed = seed
        
    def _form_nn_stack(self):
        nn_stack = []
        # hidden layers
        for layer in range(len(self.size) - 2):
            nn_stack.append(nn.Linear(self.size[layer], self.size[layer + 1]))
            if (layer == 0 and self.dropout_on_input and self.p_to_be_zeroed > 0) or \
                    (layer > 0 and self.p_to_be_zeroed > 0):
                nn_stack.append(nn.Dropout(self.p_to_be_zeroed))
            nn_stack.append(getattr(nn, self.hidden_act_function)())
        # output layer
        nn_stack.append(nn.Linear(self.size[-2], self.size[-1]))
        nn_stack.append(getattr(nn, self.output_act_function)(**self.output_act_function_kwargs))
        return nn_stack

    def _get_optimizer_params(self):
        opt_params = {
            "params": self.parameters(),
            "lr": self.learning_rate,
            "weight_decay": self.lmda_wt_decay,
            "momentum": self.momentum,
        }
        return opt_params

    def forward(self, data: torch.Tensor) -> torch.Tensor:
        data = self.flatten(data)
        logits = self.mlp(data)
        return logits

    def train_model(self, training_loader: DataLoader, verbose: int = True):
        torch.manual_seed(self.seed)
        for input, labels in itertools.islice(training_loader, self.training_size // self.batch_size):
            prediction = self(input.to(device))
            labels = labels.to(device)
            if self.loss_function._get_name() == 'MSELoss':
                labels = torch.nn.functional.one_hot(labels, 10).float()
            loss = self.loss_function(prediction, labels)
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
        if verbose:
            self.evaluate(training_loader, self.training_size, verbose=verbose)

    def evaluate(self, data_loader: DataLoader, dataset_size: int, data_type: str = "training",
                 verbose: bool = True) -> float:
        correct_classifications = 0
        with torch.no_grad():
            torch.manual_seed(self.seed)
            for input, labels in itertools.islice(data_loader, dataset_size // self.batch_size):
                prediction = self(input.to(device))
                labels = labels.to(device)
                correct_classifications += (prediction.argmax(1) == labels).type(torch.float).sum().item()
        accuracy = round((correct_classifications / dataset_size) * 100, 2)
        if verbose:
            print(f'Accuracy on {data_type} data {accuracy}%')
        return accuracy


In [5]:
def one_hot_encode(y):
    return torch.zeros(10, dtype=torch.float).scatter_(0, torch.tensor(y), value=1).reshape(1, 10)


In [7]:
class EvalData:
    def __init__(self):
        self.data = pd.DataFrame(
            columns=["size", "epochs", "hidden_act_function", "output_act_function", "loss_func",
                     "optimizer", "learning_rate", "weight_decay", "batch_size", "momentum",
                     "testing_dataset_type", "training_size", "testing_size",
                     "p_to_be_zeroed", "dropout_on_input_layer",
                     "best_accuracy", "avg_accuracy",
                     "avg_time_taken"])

    def add_record(self, data_dict: Dict):
        self.data = self.data.append(data_dict, ignore_index=True)
        print(f"Added record to eval DF. Total records so far: {self.data.shape[0]}")

    def get(self, rearrange: bool) -> pd.DataFrame:
        if rearrange:
            # push "best_accuracy", "avg_accuracy", "avg_time_taken" cols to the front and sort by "avg_accuracy"
            curr_cols = self.data.columns.tolist()
            updated_order = curr_cols[-3:] + curr_cols[:-3]
            self.data = self.data[updated_order]
        return self.data.sort_values(by="avg_accuracy", ascending=False)

In [8]:
class HyperTuner:
    def tune(self, config: Dict, verbose: bool = True) -> pd.DataFrame:
        eval_data = EvalData()
        train_data = MNIST(root='mnist_torch_data', train=True, download=True, transform=ToTensor())
        test_data = MNIST(root='mnist_torch_data', train=False, download=True, transform=ToTensor())

        all_combinations = list(itertools.product(*config.values()))
        print(f"Total combinations for exp: {len(all_combinations)}")
        for combination in all_combinations:
            # reconstruct the dict using the combination
            combination_dict = {k: v for k, v in zip(config.keys(), combination)}
            accuracies = np.array([])
            time_consumed = np.array([])

            for seed in [28, 35, 42]:
                batch_size = combination_dict["batch_size"]

                torch.manual_seed(seed)
                training_subset, validation_subset = random_split(train_data, lengths=[50000, 10000])
                training_loader = DataLoader(training_subset, batch_size=batch_size, shuffle=True)
                validation_loader = DataLoader(validation_subset, batch_size=batch_size, shuffle=True)
                testing_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

                testing_dataset_type = combination_dict["testing_dataset_type"]
                testing_loader = validation_loader if testing_dataset_type == "validation" else testing_loader
                output_act_fn = combination_dict["output_act_function"]

                model = TorchMLP(
                    size=combination_dict["size"],
                    loss_func=combination_dict["loss_func"],
                    hidden_act_function=combination_dict["hidden_act_function"],
                    output_act_function=combination_dict["output_act_function"],
                    output_act_function_kwargs={"dim": 1} if output_act_fn in ["Softmax", "LogSoftmax"] else {},
                    optimizer=combination_dict["optimizer"],
                    learning_rate=combination_dict["learning_rate"],
                    lmda_wt_decay=combination_dict["weight_decay"],
                    p_to_be_zeroed=combination_dict["p_to_be_zeroed"],
                    batch_size=batch_size,
                    momentum=combination_dict["momentum"],
                    training_size=combination_dict["training_size"],
                    testing_size=combination_dict["testing_size"],
                    seed=seed,
                    dropout_on_input=combination_dict["dropout_on_input_layer"],
                ).to(device)

                num_epochs = combination_dict["epochs"]
                time_epoch_start = time.time()
                for epoch in range(num_epochs):
                    if verbose:
                        print(f"Training for epoch: {epoch}")
                    model.train_model(training_loader, verbose)
                    accuracy = model.evaluate(testing_loader, model.testing_size, "testing", verbose)
                    accuracies = np.append(accuracies, accuracy)
                time_for_seed = time.time() - time_epoch_start
                time_consumed = np.append(time_consumed, time_for_seed)

            avg_time = np.mean(time_consumed)
            last_5_accuracies_of_three_seeds = np.concatenate((accuracies[epoch - 5: epoch], accuracies[epoch * 2 - 5: epoch * 2], accuracies[epoch * 3 - 5: epoch * 3]))
            avg_accuracy = np.mean(last_5_accuracies_of_three_seeds)
            best_accuracy = np.max(accuracies)
            combination_dict.update({
                "best_accuracy": best_accuracy,
                "avg_accuracy": avg_accuracy,
                "avg_time_taken": avg_time
            })

            eval_data.add_record(combination_dict)

            if verbose:
                print(eval_data.get(rearrange=False))
        return eval_data.get(rearrange=True)

Find best activation function using MSELoss using different learning rates

In [None]:
class TestingConfig:
    CONFIG = {
        "size": [[784, 30, 10]], 
        "epochs": [25],  
        "hidden_act_function": ["Sigmoid", "ReLU", "Tanh"],  
        "output_act_function": ["Sigmoid", "ReLU", "Tanh"],  
        "loss_func": ["MSELoss"],  
        "optimizer": ["SGD"],  
        "learning_rate": [0.01, 0.1, 1], 
        "weight_decay": [0.0],  
        "batch_size": [10],  
        "testing_dataset_type": ["validation"],  
        "training_size": [5000],  
        "testing_size": [2000],  
        "p_to_be_zeroed": [0.0],  
        "dropout_on_input_layer": [False],  
    }
eval_data = HyperTuner().tune(TestingConfig.CONFIG, False)

In [None]:
eval_data

Unnamed: 0,best_accuracy,avg_accuracy,avg_time_taken,size,epochs,hidden_act_function,output_act_function,loss_func,optimizer,learning_rate,weight_decay,batch_size,testing_dataset_type,training_size,testing_size,p_to_be_zeroed,dropout_on_input_layer
11,92.95,91.385333,37.135675,"[784, 30, 10]",25,ReLU,Sigmoid,MSELoss,SGD,1.0,0.0,10,validation,5000,2000,0.0,False
20,92.65,91.064667,36.975883,"[784, 30, 10]",25,Tanh,Sigmoid,MSELoss,SGD,1.0,0.0,10,validation,5000,2000,0.0,False
17,91.5,90.421333,37.064687,"[784, 30, 10]",25,ReLU,Tanh,MSELoss,SGD,1.0,0.0,10,validation,5000,2000,0.0,False
8,92.6,90.273333,37.660565,"[784, 30, 10]",25,Sigmoid,Tanh,MSELoss,SGD,1.0,0.0,10,validation,5000,2000,0.0,False
16,92.05,90.099333,37.112951,"[784, 30, 10]",25,ReLU,Tanh,MSELoss,SGD,0.1,0.0,10,validation,5000,2000,0.0,False
14,93.9,89.464667,37.613673,"[784, 30, 10]",25,ReLU,ReLU,MSELoss,SGD,1.0,0.0,10,validation,5000,2000,0.0,False
26,90.8,88.983333,36.760986,"[784, 30, 10]",25,Tanh,Tanh,MSELoss,SGD,1.0,0.0,10,validation,5000,2000,0.0,False
25,91.35,88.768,36.856509,"[784, 30, 10]",25,Tanh,Tanh,MSELoss,SGD,0.1,0.0,10,validation,5000,2000,0.0,False
23,92.95,88.038,37.147289,"[784, 30, 10]",25,Tanh,ReLU,MSELoss,SGD,1.0,0.0,10,validation,5000,2000,0.0,False
2,91.95,86.14,37.207622,"[784, 30, 10]",25,Sigmoid,Sigmoid,MSELoss,SGD,1.0,0.0,10,validation,5000,2000,0.0,False


Relu hidden and Softmax output seems to work best at a learning rate of 1.
<br>
Fix ReLU as the hidden layer. Try with different learning rates for Relu, Tanh and Sigmoid functions as output activations and loss functions

In [None]:
class TestingConfig:
    CONFIG = {
        "size": [[784, 30, 10]], 
        "epochs": [25],  
        "hidden_act_function": ["ReLU"],  
        "output_act_function": ["Sigmoid", "ReLU", "Tanh", "Softmax"],  
        "loss_func": ["MSELoss", "CrossEntropyLoss"],  
        "optimizer": ["SGD"],  
        "learning_rate": [0.01, 1, 3, 5], 
        "weight_decay": [0.0],  
        "batch_size": [10],  
        "testing_dataset_type": ["validation"],  
        "training_size": [5000],  
        "testing_size": [2000],  
        "p_to_be_zeroed": [0.0],  
        "dropout_on_input_layer": [False],  
    }
eval_data = HyperTuner().tune(TestingConfig.CONFIG, False)

Total combinations for exp: 32
Added record to eval DF. Total records so far: 1
Added record to eval DF. Total records so far: 2
Added record to eval DF. Total records so far: 3
Added record to eval DF. Total records so far: 4
Added record to eval DF. Total records so far: 5
Added record to eval DF. Total records so far: 6
Added record to eval DF. Total records so far: 7
Added record to eval DF. Total records so far: 8
Added record to eval DF. Total records so far: 9
Added record to eval DF. Total records so far: 10
Added record to eval DF. Total records so far: 11
Added record to eval DF. Total records so far: 12
Added record to eval DF. Total records so far: 13
Added record to eval DF. Total records so far: 14
Added record to eval DF. Total records so far: 15
Added record to eval DF. Total records so far: 16
Added record to eval DF. Total records so far: 17
Added record to eval DF. Total records so far: 18
Added record to eval DF. Total records so far: 19
Added record to eval DF. Tot

In [None]:
eval_data

Unnamed: 0,best_accuracy,avg_accuracy,avg_time_taken,size,epochs,hidden_act_function,output_act_function,loss_func,optimizer,learning_rate,weight_decay,batch_size,testing_dataset_type,training_size,testing_size,p_to_be_zeroed,dropout_on_input_layer
2,93.7,92.07,37.466634,"[784, 30, 10]",25,ReLU,Sigmoid,MSELoss,SGD,3.0,0.0,10,validation,5000,2000,0.0,False
3,93.65,91.826,37.385592,"[784, 30, 10]",25,ReLU,Sigmoid,MSELoss,SGD,5.0,0.0,10,validation,5000,2000,0.0,False
26,93.75,91.802,37.312673,"[784, 30, 10]",25,ReLU,Softmax,MSELoss,SGD,3.0,0.0,10,validation,5000,2000,0.0,False
25,93.0,91.405333,37.305377,"[784, 30, 10]",25,ReLU,Softmax,MSELoss,SGD,1.0,0.0,10,validation,5000,2000,0.0,False
1,92.95,91.385333,37.356164,"[784, 30, 10]",25,ReLU,Sigmoid,MSELoss,SGD,1.0,0.0,10,validation,5000,2000,0.0,False
5,93.7,90.724,36.034096,"[784, 30, 10]",25,ReLU,Sigmoid,CrossEntropyLoss,SGD,1.0,0.0,10,validation,5000,2000,0.0,False
27,93.35,90.690667,37.35746,"[784, 30, 10]",25,ReLU,Softmax,MSELoss,SGD,5.0,0.0,10,validation,5000,2000,0.0,False
17,91.5,90.421333,37.027907,"[784, 30, 10]",25,ReLU,Tanh,MSELoss,SGD,1.0,0.0,10,validation,5000,2000,0.0,False
9,93.9,89.464667,37.054228,"[784, 30, 10]",25,ReLU,ReLU,MSELoss,SGD,1.0,0.0,10,validation,5000,2000,0.0,False
20,91.3,88.831333,35.774298,"[784, 30, 10]",25,ReLU,Tanh,CrossEntropyLoss,SGD,0.01,0.0,10,validation,5000,2000,0.0,False


Remove tanh, from output function. MSE still seems to be the best with a combination of (ReLU and Sigmoid) 
<br>
Add more epochs and training data size to confirm which output function to take and learning rate as well

In [None]:
class TestingConfig:
    CONFIG = {
        "size": [[784, 30, 10]], 
        "epochs": [25, 50],  
        "hidden_act_function": ["ReLU"],  
        "output_act_function": ["Sigmoid", "ReLU", "Softmax"],  
        "loss_func": ["MSELoss", "CrossEntropyLoss"],  
        "optimizer": ["SGD"],  
        "learning_rate": [0.01, 0.1, 1, 5], 
        "weight_decay": [0.0],  
        "batch_size": [10],  
        "testing_dataset_type": ["validation"],  
        "training_size": [10000],  
        "testing_size": [2000],  
        "p_to_be_zeroed": [0.0],  
        "dropout_on_input_layer": [False],  
    }
eval_data = HyperTuner().tune(TestingConfig.CONFIG, False)

Total combinations for exp: 48
Added record to eval DF. Total records so far: 1
Added record to eval DF. Total records so far: 2
Added record to eval DF. Total records so far: 3
Added record to eval DF. Total records so far: 4
Added record to eval DF. Total records so far: 5
Added record to eval DF. Total records so far: 6
Added record to eval DF. Total records so far: 7
Added record to eval DF. Total records so far: 8
Added record to eval DF. Total records so far: 9
Added record to eval DF. Total records so far: 10
Added record to eval DF. Total records so far: 11
Added record to eval DF. Total records so far: 12
Added record to eval DF. Total records so far: 13
Added record to eval DF. Total records so far: 14
Added record to eval DF. Total records so far: 15
Added record to eval DF. Total records so far: 16
Added record to eval DF. Total records so far: 17
Added record to eval DF. Total records so far: 18
Added record to eval DF. Total records so far: 19
Added record to eval DF. Tot

In [None]:
eval_data

Unnamed: 0,best_accuracy,avg_accuracy,avg_time_taken,size,epochs,hidden_act_function,output_act_function,loss_func,optimizer,learning_rate,weight_decay,batch_size,testing_dataset_type,training_size,testing_size,p_to_be_zeroed,dropout_on_input_layer
42,95.5,94.031,112.802789,"[784, 30, 10]",50,ReLU,Softmax,MSELoss,SGD,1.0,0.0,10,validation,10000,2000,0.0,False
26,95.05,93.844667,110.338788,"[784, 30, 10]",50,ReLU,Sigmoid,MSELoss,SGD,1.0,0.0,10,validation,10000,2000,0.0,False
27,94.8,93.723667,112.778563,"[784, 30, 10]",50,ReLU,Sigmoid,MSELoss,SGD,5.0,0.0,10,validation,10000,2000,0.0,False
34,95.2,93.674667,111.357,"[784, 30, 10]",50,ReLU,ReLU,MSELoss,SGD,1.0,0.0,10,validation,10000,2000,0.0,False
10,95.2,93.5,55.335983,"[784, 30, 10]",25,ReLU,ReLU,MSELoss,SGD,1.0,0.0,10,validation,10000,2000,0.0,False
30,95.55,93.349,108.933675,"[784, 30, 10]",50,ReLU,Sigmoid,CrossEntropyLoss,SGD,1.0,0.0,10,validation,10000,2000,0.0,False
3,94.6,93.336,55.869598,"[784, 30, 10]",25,ReLU,Sigmoid,MSELoss,SGD,5.0,0.0,10,validation,10000,2000,0.0,False
2,94.7,93.23,55.601242,"[784, 30, 10]",25,ReLU,Sigmoid,MSELoss,SGD,1.0,0.0,10,validation,10000,2000,0.0,False
18,94.9,93.178,55.683721,"[784, 30, 10]",25,ReLU,Softmax,MSELoss,SGD,1.0,0.0,10,validation,10000,2000,0.0,False
6,94.8,92.615333,55.162861,"[784, 30, 10]",25,ReLU,Sigmoid,CrossEntropyLoss,SGD,1.0,0.0,10,validation,10000,2000,0.0,False


Softmax + CrossEntropyLoss performs really bad here.
 <br>
If using Softmax Activation; loss function cannot be CrossEntropyLoss
We can use LogSoftmax at output layer + NLLLoss instead? https://pytorch.org/docs/stable/generated/torch.nn.NLLLoss.html

In [None]:
class TestingConfig:
    CONFIG = {
        "size": [[784, 30, 10]], 
        "epochs": [50],  
        "hidden_act_function": ["ReLU"],  
        "output_act_function": ["LogSoftmax"],  
        "loss_func": ["NLLLoss"],  
        "optimizer": ["SGD"],  
        "learning_rate": [1], 
        "weight_decay": [0.0],  
        "batch_size": [10],  
        "testing_dataset_type": ["validation"],  
        "training_size": [10000],  
        "testing_size": [2000],  
        "p_to_be_zeroed": [0.0],  
        "dropout_on_input_layer": [False],  
    }
eval_data = HyperTuner().tune(TestingConfig.CONFIG, False)

Total combinations for exp: 1
Added record to eval DF. Total records so far: 1


In [None]:
eval_data

Unnamed: 0,best_accuracy,avg_accuracy,avg_time_taken,size,epochs,hidden_act_function,output_act_function,loss_func,optimizer,learning_rate,weight_decay,batch_size,testing_dataset_type,training_size,testing_size,p_to_be_zeroed,dropout_on_input_layer
0,28.5,14.027333,107.508718,"[784, 30, 10]",50,ReLU,LogSoftmax,NLLLoss,SGD,1,0.0,10,validation,10000,2000,0.0,False


ReLU + ReLU using CrossEntropyLoss

In [None]:
class TestingConfig:
    CONFIG = {
        "size": [[784, 30, 10]], 
        "epochs": [100],  
        "hidden_act_function": ["ReLU"],  
        "output_act_function": ["Softmax"],  
        "loss_func": ["MSELoss"],  
        "optimizer": ["SGD"],  
        "learning_rate": [1], 
        "weight_decay": [0.0],  
        "batch_size": [10],  
        "testing_dataset_type": ["validation"],  
        "training_size": [10000],  
        "testing_size": [2000],  
        "p_to_be_zeroed": [0.0],  
        "dropout_on_input_layer": [False],  
    }
eval_data = HyperTuner().tune(TestingConfig.CONFIG, True)

Total combinations for exp: 1
Training for epoch: 0
Accuracy on training data 88.22%
Accuracy on testing data 88.45%
Training for epoch: 1
Accuracy on training data 91.3%
Accuracy on testing data 91.25%
Training for epoch: 2
Accuracy on training data 92.77%
Accuracy on testing data 92.2%
Training for epoch: 3
Accuracy on training data 92.9%
Accuracy on testing data 92.2%
Training for epoch: 4
Accuracy on training data 93.94%
Accuracy on testing data 92.5%
Training for epoch: 5
Accuracy on training data 94.66%
Accuracy on testing data 92.8%
Training for epoch: 6
Accuracy on training data 94.73%
Accuracy on testing data 93.0%
Training for epoch: 7
Accuracy on training data 95.64%
Accuracy on testing data 93.6%
Training for epoch: 8
Accuracy on training data 95.53%
Accuracy on testing data 93.25%
Training for epoch: 9
Accuracy on training data 96.5%
Accuracy on testing data 93.7%
Training for epoch: 10
Accuracy on training data 96.13%
Accuracy on testing data 93.4%
Training for epoch: 11


KeyboardInterrupt: ignored

In [None]:
class TestingConfig:
    CONFIG = {
        "size": [[784, 30, 10]], 
        "epochs": [50],  
        "hidden_act_function": ["ReLU"],  
        "output_act_function": ["Softmax"],  
        "loss_func": ["MSELoss"],  
        "optimizer": ["SGD"],  
        "learning_rate": [1], 
        "weight_decay": [0.0, 0.1, 0.5, 1, 5],  
        "batch_size": [10],  
        "testing_dataset_type": ["validation"],  
        "training_size": [5000],  
        "testing_size": [2000],  
        "p_to_be_zeroed": [0.0],  
        "dropout_on_input_layer": [False],  
    }
eval_data = HyperTuner().tune(TestingConfig.CONFIG, False)

In [None]:
eval_data

Unnamed: 0,hidden_act_function,output_act_function,loss_func,optimizer,learning_rate,weight_decay,batch_size,testing_dataset_type,training_size,testing_size,p_to_be_zeroed,dropout_on_input_layer,best_accuracy,avg_accuracy,avg_time_taken,size,epochs
0,ReLU,Softmax,MSELoss,SGD,1,0.0,10,validation,10000,2000,0.0,False,95.5,94.031,186.375182,"[784, 30, 10]",50
2,ReLU,Softmax,MSELoss,SGD,1,1.0,10,validation,10000,2000,0.0,False,11.75,10.866667,187.869351,"[784, 30, 10]",50
1,ReLU,Softmax,MSELoss,SGD,1,0.5,10,validation,10000,2000,0.0,False,11.75,10.233333,239.849532,"[784, 30, 10]",50
3,ReLU,Softmax,MSELoss,SGD,1,5.0,10,validation,10000,2000,0.0,False,10.4,10.166667,188.426046,"[784, 30, 10]",50


In [None]:
class TestingConfig:
    CONFIG = {
        "size": [[784, 30, 10], [784, 100, 10], [784, 100, 100, 10]], 
        "epochs": [50],  
        "hidden_act_function": ["ReLU"],  
        "output_act_function": ["ReLU"],  
        "loss_func": ["CrossEntropyLoss"],  
        "optimizer": ["SGD"],  
        "learning_rate": [0.01], 
        "weight_decay": [0.0],  
        "batch_size": [10],  
        "testing_dataset_type": ["validation"],  
        "training_size": [10000],  
        "testing_size": [2000],  
        "p_to_be_zeroed": [0.0],  
        "dropout_on_input_layer": [False],  
    }
eval_data = HyperTuner().tune(TestingConfig.CONFIG, True)

Total combinations for exp: 3
Training for epoch: 0
Accuracy on training data 56.98%
Accuracy on testing data 56.9%
Training for epoch: 1
Accuracy on training data 71.78%
Accuracy on testing data 71.55%
Training for epoch: 2
Accuracy on training data 75.86%
Accuracy on testing data 75.75%
Training for epoch: 3
Accuracy on training data 77.94%
Accuracy on testing data 78.0%
Training for epoch: 4
Accuracy on training data 79.27%
Accuracy on testing data 78.85%
Training for epoch: 5
Accuracy on training data 80.19%
Accuracy on testing data 80.1%
Training for epoch: 6
Accuracy on training data 80.86%
Accuracy on testing data 80.8%
Training for epoch: 7
Accuracy on training data 81.65%
Accuracy on testing data 81.45%
Training for epoch: 8
Accuracy on training data 82.2%
Accuracy on testing data 81.7%
Training for epoch: 9
Accuracy on training data 82.56%
Accuracy on testing data 81.8%
Training for epoch: 10
Accuracy on training data 82.86%
Accuracy on testing data 82.1%
Training for epoch: 

TypeError: ignored

In [None]:
class TestingConfig:
    CONFIG = {
        "size": [[784, 100, 100, 10]], 
        "epochs": [50],  
        "hidden_act_function": ["ReLU"],  
        "output_act_function": ["ReLU"],  
        "loss_func": ["CrossEntropyLoss"],  
        "optimizer": ["SGD"],  
        "learning_rate": [0.01], 
        "weight_decay": [0.0],  
        "batch_size": [10],  
        "testing_dataset_type": ["validation"],  
        "training_size": [10000],  
        "testing_size": [2000],  
        "p_to_be_zeroed": [0.0, 0.15, 0.20, 0.25],  
        "dropout_on_input_layer": [True, False],  
    }
eval_data = HyperTuner().tune(TestingConfig.CONFIG, False)

Total combinations for exp: 8
Added record to eval DF. Total records so far: 1
Added record to eval DF. Total records so far: 2
Added record to eval DF. Total records so far: 3
Added record to eval DF. Total records so far: 4
Added record to eval DF. Total records so far: 5
Added record to eval DF. Total records so far: 6
Added record to eval DF. Total records so far: 7
Added record to eval DF. Total records so far: 8


In [None]:
eval_data

Unnamed: 0,best_accuracy,avg_accuracy,avg_time_taken,size,epochs,hidden_act_function,output_act_function,loss_func,optimizer,learning_rate,weight_decay,batch_size,testing_dataset_type,training_size,testing_size,p_to_be_zeroed,dropout_on_input_layer
3,95.2,91.75,134.366458,"[784, 100, 100, 10]",50,ReLU,ReLU,CrossEntropyLoss,SGD,0.01,0.0,10,validation,10000,2000,0.15,False
7,94.45,91.457333,134.786392,"[784, 100, 100, 10]",50,ReLU,ReLU,CrossEntropyLoss,SGD,0.01,0.0,10,validation,10000,2000,0.25,False
2,93.75,90.518667,137.006188,"[784, 100, 100, 10]",50,ReLU,ReLU,CrossEntropyLoss,SGD,0.01,0.0,10,validation,10000,2000,0.15,True
4,92.35,89.725,136.551217,"[784, 100, 100, 10]",50,ReLU,ReLU,CrossEntropyLoss,SGD,0.01,0.0,10,validation,10000,2000,0.2,True
5,95.0,89.156,132.419563,"[784, 100, 100, 10]",50,ReLU,ReLU,CrossEntropyLoss,SGD,0.01,0.0,10,validation,10000,2000,0.2,False
6,91.2,88.756333,138.792837,"[784, 100, 100, 10]",50,ReLU,ReLU,CrossEntropyLoss,SGD,0.01,0.0,10,validation,10000,2000,0.25,True
0,96.15,83.360667,123.694075,"[784, 100, 100, 10]",50,ReLU,ReLU,CrossEntropyLoss,SGD,0.01,0.0,10,validation,10000,2000,0.0,True
1,96.15,83.360667,124.731418,"[784, 100, 100, 10]",50,ReLU,ReLU,CrossEntropyLoss,SGD,0.01,0.0,10,validation,10000,2000,0.0,False


In [None]:
class TestingConfig:
    CONFIG = {
        "size": [[784, 100, 100, 10]], 
        "epochs": [50],  
        "hidden_act_function": ["ReLU"],  
        "output_act_function": ["ReLU"],  
        "loss_func": ["CrossEntropyLoss"],  
        "optimizer": ["SGD"],  
        "learning_rate": [1e-2], 
        "weight_decay": [1e-4, 1e-3, 1e-2],  
        "batch_size": [10],  
        "testing_dataset_type": ["validation"],  
        "training_size": [10000],  
        "testing_size": [2000],  
        "p_to_be_zeroed": [0.0],  
        "dropout_on_input_layer": [False],  
    }
eval_data = HyperTuner().tune(TestingConfig.CONFIG, False)

In [None]:
eval_data

Unnamed: 0,best_accuracy,avg_accuracy,avg_time_taken,size,epochs,hidden_act_function,output_act_function,loss_func,optimizer,learning_rate,weight_decay,batch_size,testing_dataset_type,training_size,testing_size,p_to_be_zeroed,dropout_on_input_layer
0,96.0,85.453333,134.261753,"[784, 100, 100, 10]",50,ReLU,ReLU,CrossEntropyLoss,SGD,0.01,0.0001,10,validation,10000,2000,0.0,False
1,96.0,81.793333,130.313463,"[784, 100, 100, 10]",50,ReLU,ReLU,CrossEntropyLoss,SGD,0.01,0.001,10,validation,10000,2000,0.0,False
2,93.8,76.666667,130.804522,"[784, 100, 100, 10]",50,ReLU,ReLU,CrossEntropyLoss,SGD,0.01,0.01,10,validation,10000,2000,0.0,False


In [None]:
class TestingConfig:
    CONFIG = {
        "size": [[784, 100, 100, 10]], 
        "epochs": [50],  
        "hidden_act_function": ["ReLU"],  
        "output_act_function": ["ReLU"],  
        "loss_func": ["CrossEntropyLoss"],  
        "optimizer": ["SGD"],  
        "learning_rate": [1e-2], 
        "weight_decay": [1e-4],  
        "batch_size": [10, 100, 250],  
        "testing_dataset_type": ["validation"],  
        "training_size": [10000],  
        "testing_size": [2000],  
        "p_to_be_zeroed": [0.0],  
        "dropout_on_input_layer": [False],  
    }
eval_data = HyperTuner().tune(TestingConfig.CONFIG, False)

Total combinations for exp: 3
Added record to eval DF. Total records so far: 1
Added record to eval DF. Total records so far: 2
Added record to eval DF. Total records so far: 3


In [None]:
eval_data

Unnamed: 0,best_accuracy,avg_accuracy,avg_time_taken,size,epochs,hidden_act_function,output_act_function,loss_func,optimizer,learning_rate,weight_decay,batch_size,testing_dataset_type,training_size,testing_size,p_to_be_zeroed,dropout_on_input_layer
0,96.0,85.453333,129.98956,"[784, 100, 100, 10]",50,ReLU,ReLU,CrossEntropyLoss,SGD,0.01,0.0001,10,validation,10000,2000,0.0,False
1,91.95,79.076667,70.63768,"[784, 100, 100, 10]",50,ReLU,ReLU,CrossEntropyLoss,SGD,0.01,0.0001,100,validation,10000,2000,0.0,False
2,86.9,69.423333,65.616332,"[784, 100, 100, 10]",50,ReLU,ReLU,CrossEntropyLoss,SGD,0.01,0.0001,250,validation,10000,2000,0.0,False


In [None]:
model = TorchMLP(
        size=[784, 100, 100, 10],
        loss_func="CrossEntropyLoss",
        hidden_act_function="ReLU",
        output_act_function="ReLU",
        output_act_function_kwargs={},
        optimizer="SGD",
        learning_rate=1e-2,
        lmda_wt_decay=1e-4,
        p_to_be_zeroed=0.0,
        batch_size=10,
        training_size=60000,
        testing_size=10000,
        seed=21,
        dropout_on_input=False
    ).to(device)
train_data = MNIST(root='mnist_torch_data', train=True, download=True, transform=ToTensor())
test_data = MNIST(root='mnist_torch_data', train=False, download=True, transform=ToTensor())
training_loader = DataLoader(train_data, batch_size=10, shuffle=True)
testing_loader = DataLoader(test_data, batch_size=10, shuffle=True)
for epoch in range(125):
    print(f"Training for epoch: {epoch}")
    model.train_model(training_loader)
    model.evaluate(testing_loader, model.testing_size, "testing")

Training for epoch: 0
Accuracy on training data 74.51%
Accuracy on testing data 75.17%
Training for epoch: 1
Accuracy on training data 92.27%
Accuracy on testing data 92.41%
Training for epoch: 2
Accuracy on training data 94.48%
Accuracy on testing data 94.05%
Training for epoch: 3
Accuracy on training data 95.71%
Accuracy on testing data 95.12%
Training for epoch: 4
Accuracy on training data 96.5%
Accuracy on testing data 95.92%
Training for epoch: 5
Accuracy on training data 97.05%
Accuracy on testing data 96.37%
Training for epoch: 6
Accuracy on training data 97.48%
Accuracy on testing data 96.65%
Training for epoch: 7
Accuracy on training data 97.86%
Accuracy on testing data 96.94%
Training for epoch: 8
Accuracy on training data 98.05%
Accuracy on testing data 97.18%
Training for epoch: 9
Accuracy on training data 98.28%
Accuracy on testing data 97.22%
Training for epoch: 10
Accuracy on training data 98.45%
Accuracy on testing data 97.27%
Training for epoch: 11
Accuracy on training

In [None]:
model = TorchMLP(
        size=[784, 100, 100, 10],
        loss_func="CrossEntropyLoss",
        hidden_act_function="ReLU",
        output_act_function="ReLU",
        output_act_function_kwargs={},
        optimizer="SGD",
        learning_rate=1e-2,
        lmda_wt_decay=1e-4,
        p_to_be_zeroed=0.20,
        batch_size=10,
        training_size=60000,
        testing_size=10000,
        seed=21,
        dropout_on_input=False
    ).to(device)
train_data = MNIST(root='mnist_torch_data', train=True, download=True, transform=ToTensor())
test_data = MNIST(root='mnist_torch_data', train=False, download=True, transform=ToTensor())
training_loader = DataLoader(train_data, batch_size=10, shuffle=True)
testing_loader = DataLoader(test_data, batch_size=10, shuffle=True)
for epoch in range(125):
    print(f"Training for epoch: {epoch}")
    model.train_model(training_loader)
    model.evaluate(testing_loader, model.testing_size, "testing")

Training for epoch: 0
Accuracy on training data 83.66%
Accuracy on testing data 83.87%
Training for epoch: 1
Accuracy on training data 92.9%
Accuracy on testing data 92.52%
Training for epoch: 2
Accuracy on training data 94.67%
Accuracy on testing data 94.03%
Training for epoch: 3
Accuracy on training data 95.67%
Accuracy on testing data 94.81%
Training for epoch: 4
Accuracy on training data 96.43%
Accuracy on testing data 95.32%
Training for epoch: 5
Accuracy on training data 96.89%
Accuracy on testing data 95.69%
Training for epoch: 6
Accuracy on training data 97.24%
Accuracy on testing data 96.04%
Training for epoch: 7
Accuracy on training data 97.55%
Accuracy on testing data 96.28%
Training for epoch: 8
Accuracy on training data 97.84%
Accuracy on testing data 96.57%
Training for epoch: 9
Accuracy on training data 98.04%
Accuracy on testing data 96.75%
Training for epoch: 10
Accuracy on training data 98.24%
Accuracy on testing data 96.81%
Training for epoch: 11
Accuracy on training

In [None]:
model = TorchMLP(
        size=[784, 100, 100, 10],
        loss_func="CrossEntropyLoss",
        hidden_act_function="ReLU",
        output_act_function="ReLU",
        output_act_function_kwargs={},
        optimizer="SGD",
        learning_rate=1e-2,
        lmda_wt_decay=1e-4,
        p_to_be_zeroed=0.10,
        batch_size=10,
        training_size=60000,
        testing_size=10000,
        seed=21,
        dropout_on_input=False
    ).to(device)
train_data = MNIST(root='mnist_torch_data', train=True, download=True, transform=ToTensor())
test_data = MNIST(root='mnist_torch_data', train=False, download=True, transform=ToTensor())
training_loader = DataLoader(train_data, batch_size=10, shuffle=True)
testing_loader = DataLoader(test_data, batch_size=10, shuffle=True)
for epoch in range(125):
    print(f"Training for epoch: {epoch}")
    model.train_model(training_loader)
    model.evaluate(testing_loader, model.testing_size, "testing")

Training for epoch: 0
Accuracy on training data 90.37%
Accuracy on testing data 90.31%
Training for epoch: 1
Accuracy on training data 93.55%
Accuracy on testing data 93.26%
Training for epoch: 2
Accuracy on training data 95.16%
Accuracy on testing data 94.6%
Training for epoch: 3
Accuracy on training data 96.1%
Accuracy on testing data 95.47%
Training for epoch: 4
Accuracy on training data 96.74%
Accuracy on testing data 96.0%
Training for epoch: 5
Accuracy on training data 97.21%
Accuracy on testing data 96.35%
Training for epoch: 6
Accuracy on training data 97.51%
Accuracy on testing data 96.62%
Training for epoch: 7
Accuracy on training data 97.81%
Accuracy on testing data 96.83%
Training for epoch: 8
Accuracy on training data 98.05%
Accuracy on testing data 96.82%
Training for epoch: 9
Accuracy on training data 98.3%
Accuracy on testing data 96.99%
Training for epoch: 10
Accuracy on training data 98.46%
Accuracy on testing data 97.05%
Training for epoch: 11
Accuracy on training da

In [None]:
model = TorchMLP(
        size=[784, 100, 100, 10],
        loss_func="CrossEntropyLoss",
        hidden_act_function="ReLU",
        output_act_function="ReLU",
        output_act_function_kwargs={},
        optimizer="SGD",
        learning_rate=1e-2,
        lmda_wt_decay=1e-4,
        p_to_be_zeroed=0.0,
        batch_size=10,
        training_size=60000,
        testing_size=10000,
        seed=21,
        dropout_on_input=False
    ).to(device)
torch.manual_seed(35)
train_data = MNIST(root='mnist_torch_data', train=True, download=True, transform=ToTensor())
test_data = MNIST(root='mnist_torch_data', train=False, download=True, transform=ToTensor())
training_loader = DataLoader(train_data, batch_size=10, shuffle=True)
testing_loader = DataLoader(test_data, batch_size=10, shuffle=True)
for epoch in range(200):
    print(f"Training for epoch: {epoch}")
    model.train_model(training_loader)
    model.evaluate(testing_loader, model.testing_size, "testing")

Training for epoch: 0
Accuracy on training data 84.16%
Accuracy on testing data 84.57%
Training for epoch: 1
Accuracy on training data 85.75%
Accuracy on testing data 85.74%
Training for epoch: 2
Accuracy on training data 86.9%
Accuracy on testing data 86.56%
Training for epoch: 3
Accuracy on training data 87.65%
Accuracy on testing data 87.14%
Training for epoch: 4
Accuracy on training data 88.19%
Accuracy on testing data 87.74%
Training for epoch: 5
Accuracy on training data 88.58%
Accuracy on testing data 88.16%
Training for epoch: 6
Accuracy on training data 88.86%
Accuracy on testing data 88.42%
Training for epoch: 7
Accuracy on training data 89.13%
Accuracy on testing data 88.61%
Training for epoch: 8
Accuracy on training data 89.26%
Accuracy on testing data 88.69%
Training for epoch: 9
Accuracy on training data 89.43%
Accuracy on testing data 88.83%
Training for epoch: 10
Accuracy on training data 89.6%
Accuracy on testing data 88.9%
Training for epoch: 11
Accuracy on training d

KeyboardInterrupt: ignored

In [None]:
class TestingConfig:
    CONFIG = {
        "size": [[784, 100, 100, 10]], 
        "epochs": [50],  
        "hidden_act_function": ["ReLU"],  
        "output_act_function": ["ReLU"],  
        "loss_func": ["CrossEntropyLoss"],  
        "optimizer": ["SGD"],  
        "learning_rate": [1e-2], 
        "weight_decay": [1e-4],  
        "batch_size": [10],  
        "testing_dataset_type": ["validation"],  
        "momentum": [0.6, 0.7, 0.8, 0.9], 
        "training_size": [10000],  
        "testing_size": [2000],  
        "p_to_be_zeroed": [0.0],  
        "dropout_on_input_layer": [False], 
    }
eval_data = HyperTuner().tune(TestingConfig.CONFIG, False)

Total combinations for exp: 4
Added record to eval DF. Total records so far: 1
Added record to eval DF. Total records so far: 2
Added record to eval DF. Total records so far: 3
Added record to eval DF. Total records so far: 4
['size', 'epochs', 'hidden_act_function', 'output_act_function', 'loss_func', 'optimizer', 'learning_rate', 'weight_decay', 'batch_size', 'momentumtesting_dataset_type', 'training_size', 'testing_size', 'p_to_be_zeroed', 'dropout_on_input_layer', 'best_accuracy', 'avg_accuracy', 'avg_time_taken', 'momentum', 'testing_dataset_type']


In [None]:
eval_data

Unnamed: 0,avg_time_taken,momentum,testing_dataset_type,size,epochs,hidden_act_function,output_act_function,loss_func,optimizer,learning_rate,weight_decay,batch_size,momentumtesting_dataset_type,training_size,testing_size,p_to_be_zeroed,dropout_on_input_layer,best_accuracy,avg_accuracy
3,105.358101,0.9,validation,"[784, 100, 100, 10]",50,ReLU,ReLU,CrossEntropyLoss,SGD,0.01,0.0001,10,,10000,2000,0.0,False,96.85,93.116667
2,104.933231,0.8,validation,"[784, 100, 100, 10]",50,ReLU,ReLU,CrossEntropyLoss,SGD,0.01,0.0001,10,,10000,2000,0.0,False,96.15,88.916667
0,104.26332,0.6,validation,"[784, 100, 100, 10]",50,ReLU,ReLU,CrossEntropyLoss,SGD,0.01,0.0001,10,,10000,2000,0.0,False,95.55,82.006667
1,104.454231,0.7,validation,"[784, 100, 100, 10]",50,ReLU,ReLU,CrossEntropyLoss,SGD,0.01,0.0001,10,,10000,2000,0.0,False,87.8,79.65


In [None]:
model = TorchMLP(
        size=[784, 100, 100, 10],
        loss_func="CrossEntropyLoss",
        hidden_act_function="ReLU",
        output_act_function="ReLU",
        output_act_function_kwargs={},
        optimizer="SGD",
        learning_rate=1e-2,
        lmda_wt_decay=1e-4,
        p_to_be_zeroed=0.0,
        batch_size=10,
        training_size=60000,
        testing_size=10000,
        seed=21,
        dropout_on_input=False,
        momentum=0.9
    ).to(device)
train_data = MNIST(root='mnist_torch_data', train=True, download=True, transform=ToTensor())
test_data = MNIST(root='mnist_torch_data', train=False, download=True, transform=ToTensor())
training_loader = DataLoader(train_data, batch_size=10, shuffle=True)
testing_loader = DataLoader(test_data, batch_size=10, shuffle=True)
for epoch in range(200):
    print(f"Training for epoch: {epoch}")
    model.train_model(training_loader)
    accuracies.append(model.evaluate(testing_loader, model.testing_size, "testing"))
print(max(accuracies))

Training for epoch: 0
Accuracy on training data 87.94%
Accuracy on testing data 87.74%
Training for epoch: 1
Accuracy on training data 97.24%
Accuracy on testing data 96.63%
Training for epoch: 2
Accuracy on training data 98.1%
Accuracy on testing data 97.27%
Training for epoch: 3
Accuracy on training data 98.17%
Accuracy on testing data 97.23%
Training for epoch: 4
Accuracy on training data 98.42%
Accuracy on testing data 97.25%
Training for epoch: 5
Accuracy on training data 98.83%
Accuracy on testing data 97.43%
Training for epoch: 6
Accuracy on training data 98.52%
Accuracy on testing data 97.34%
Training for epoch: 7
Accuracy on training data 98.92%
Accuracy on testing data 97.42%
Training for epoch: 8
Accuracy on training data 99.08%
Accuracy on testing data 98.02%
Training for epoch: 9
Accuracy on training data 99.18%
Accuracy on testing data 97.95%
Training for epoch: 10
Accuracy on training data 98.91%
Accuracy on testing data 97.57%
Training for epoch: 11
Accuracy on training

Conclude with a peak of **98.53** accuracy

In [None]:
model = TorchMLP(
        size=[784, 100, 100, 10],
        loss_func="CrossEntropyLoss",
        hidden_act_function="ReLU",
        output_act_function="ReLU",
        output_act_function_kwargs={},
        optimizer="SGD",
        learning_rate=1e-2,
        lmda_wt_decay=1e-4,
        p_to_be_zeroed=0.20,
        batch_size=10,
        training_size=60000,
        testing_size=10000,
        seed=21,
        dropout_on_input=False,
        momentum=0.9
    ).to(device)
train_data = MNIST(root='mnist_torch_data', train=True, download=True, transform=ToTensor())
test_data = MNIST(root='mnist_torch_data', train=False, download=True, transform=ToTensor())
training_loader = DataLoader(train_data, batch_size=10, shuffle=True)
testing_loader = DataLoader(test_data, batch_size=10, shuffle=True)
accuracies = []
for epoch in range(200):
    print(f"Training for epoch: {epoch}")
    model.train_model(training_loader)
    accuracies.append(model.evaluate(testing_loader, model.testing_size, "testing"))
print(max(accuracies))

Training for epoch: 0
Accuracy on training data 96.08%
Accuracy on testing data 95.29%
Training for epoch: 1
Accuracy on training data 97.43%
Accuracy on testing data 96.48%
Training for epoch: 2
Accuracy on training data 97.52%
Accuracy on testing data 96.31%
Training for epoch: 3
Accuracy on training data 98.17%
Accuracy on testing data 96.71%
Training for epoch: 4
Accuracy on training data 98.31%
Accuracy on testing data 96.72%
Training for epoch: 5
Accuracy on training data 98.41%
Accuracy on testing data 96.7%
Training for epoch: 6
Accuracy on training data 98.84%
Accuracy on testing data 97.02%
Training for epoch: 7
Accuracy on training data 98.66%
Accuracy on testing data 96.96%
Training for epoch: 8
Accuracy on training data 98.36%
Accuracy on testing data 96.59%
Training for epoch: 9
Accuracy on training data 98.96%
Accuracy on testing data 97.04%
Training for epoch: 10
Accuracy on training data 99.11%
Accuracy on testing data 97.23%
Training for epoch: 11
Accuracy on training

Introducing dropout does not seem to help. Better with no dropout

In [10]:
class TestingConfig:
    CONFIG = {
        "size": [[784, 100, 100, 10]], 
        "epochs": [50],  
        "hidden_act_function": ["ReLU"],  
        "output_act_function": ["ReLU"],  
        "loss_func": ["CrossEntropyLoss"],  
        "optimizer": ["SGD"],  
        "learning_rate": [1e-2], 
        "weight_decay": [1e-4],  
        "batch_size": [10],  
        "testing_dataset_type": ["validation"],  
        "momentum": [0.6, 0.7, 0.8, 0.9, 1.0], 
        "training_size": [10000],  
        "testing_size": [2000],  
        "p_to_be_zeroed": [0.0],  
        "dropout_on_input_layer": [False], 
    }
eval_data = HyperTuner().tune(TestingConfig.CONFIG, False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to mnist_torch_data/MNIST/raw/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to mnist_torch_data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=9912422.0), HTML(value='')))


Extracting mnist_torch_data/MNIST/raw/train-images-idx3-ubyte.gz to mnist_torch_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to mnist_torch_data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=28881.0), HTML(value='')))


Extracting mnist_torch_data/MNIST/raw/train-labels-idx1-ubyte.gz to mnist_torch_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to mnist_torch_data/MNIST/raw/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to mnist_torch_data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=1648877.0), HTML(value='')))


Extracting mnist_torch_data/MNIST/raw/t10k-images-idx3-ubyte.gz to mnist_torch_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to mnist_torch_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=4542.0), HTML(value='')))


Extracting mnist_torch_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to mnist_torch_data/MNIST/raw

Processing...


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


Done!
Total combinations for exp: 5
Added record to eval DF. Total records so far: 1
Added record to eval DF. Total records so far: 2
Added record to eval DF. Total records so far: 3
Added record to eval DF. Total records so far: 4
Added record to eval DF. Total records so far: 5


In [11]:
eval_data

Unnamed: 0,best_accuracy,avg_accuracy,avg_time_taken,size,epochs,hidden_act_function,output_act_function,loss_func,optimizer,learning_rate,weight_decay,batch_size,momentum,testing_dataset_type,training_size,testing_size,p_to_be_zeroed,dropout_on_input_layer
3,96.4,93.036667,123.20046,"[784, 100, 100, 10]",50,ReLU,ReLU,CrossEntropyLoss,SGD,0.01,0.0001,10,0.9,validation,10000,2000,0.0,False
2,96.2,89.003333,125.999803,"[784, 100, 100, 10]",50,ReLU,ReLU,CrossEntropyLoss,SGD,0.01,0.0001,10,0.8,validation,10000,2000,0.0,False
1,88.0,83.046667,122.066078,"[784, 100, 100, 10]",50,ReLU,ReLU,CrossEntropyLoss,SGD,0.01,0.0001,10,0.7,validation,10000,2000,0.0,False
0,95.6,82.066667,125.920994,"[784, 100, 100, 10]",50,ReLU,ReLU,CrossEntropyLoss,SGD,0.01,0.0001,10,0.6,validation,10000,2000,0.0,False
4,11.3,10.166667,121.889036,"[784, 100, 100, 10]",50,ReLU,ReLU,CrossEntropyLoss,SGD,0.01,0.0001,10,1.0,validation,10000,2000,0.0,False
