# Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import SubsetRandomSampler, Subset
import datasets as ds
from weighted_random_search import wrs

In [3]:
from cnn_models import Simple_CNN, Complex_CNN, PretrainedAlexNet
from hyperparameter_search import Net_wrapper, GridSearch, RandomSearch, WeightedRandomSearch
from datasets import cifar_train_AN , cifar_val_AN

### Sample for testing GridSearch

In [4]:
train_dataset = ds.cifar_train
val_dataset = ds.cifar_val

In [11]:
subset_indices = list(range(500))
subset_sampler = SubsetRandomSampler(subset_indices)

subset_train_dataset = Subset(train_dataset, subset_indices)
subset_val_dataset = Subset(val_dataset, subset_indices)

### GridSearch / RandomSearch test

In [14]:
test_hyper_params = {'batch_size': [8, 16, 32, 64], 'learning_rate': [0.0001, 0.0005, 0.001, 0.01]}
my_net = Net_wrapper(model = PretrainedAlexNet, optimizer=optim.Adam, max_epochs=5, learning_rate = 0.0001)
gs = GridSearch(net=my_net, param_grid=test_hyper_params, step_by_step=True, verbose=1)
gs = gs.fit(cifar_train_AN, cifar_val_AN)



Current parameter: batch_size: 8   val_accuracy: 0.8649
Best parameters till now:{'batch_size': 8}
Current parameter: batch_size: 16   val_accuracy: 0.8834
Best parameters till now:{'batch_size': 16}
Current parameter: batch_size: 32   val_accuracy: 0.8804
Best parameters till now:{'batch_size': 16}
Current parameter: batch_size: 64   val_accuracy: 0.8935
Best parameters till now:{'batch_size': 64}
64


In [13]:
print(gs.best_score)
print(gs.best_params)

0.3333333333333333
{'learning_rate': 0.0005, 'batch_size': 16}


In [14]:
test_hyper_params = {'learning_rate': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1], 'batch_size': [8, 16, 32, 64, 128]}
rs = RandomSearch(my_net, test_hyper_params, verbose=1)
rs.fit(subset_train_dataset, subset_val_dataset, n_trials = 5)

Parameter set: {'learning_rate': 0.0003195254015709299, 'batch_size': 13}
val_accuracy: 0.3353
Parameter set: {'learning_rate': 0.0002668088018810296, 'batch_size': 12}
val_accuracy: 0.3552
Parameter set: {'learning_rate': 0.0002743979608568015, 'batch_size': 13}
val_accuracy: 0.3478
Parameter set: {'learning_rate': 0.0003203191610298302, 'batch_size': 9}
val_accuracy: 0.3448
Parameter set: {'learning_rate': 0.0004868119356054707, 'batch_size': 15}
val_accuracy: 0.3510


<__main__.RandomSearch at 0x1bd4aedeb90>

In [15]:
print(rs.best_score)
print(rs.best_params)

0.35515872903522994
{'learning_rate': 0.0002668088018810296, 'batch_size': 12}


### Weighted random search test

In [28]:
my_net = Net_wrapper()
param_grid = {'lr': [0.01, 0.05, 0.1, 0.5, 1, 2], 'batch_size': [16, 32, 64, 128, 256]}
w = WeightedRandomSearch(net=my_net, param_grid=test_hyper_params)
N, N_0 = 10, 3
w.fit(subset_train_dataset, subset_val_dataset, N, N_0)
print(w.best_params)

{'learning_rate': 0.01, 'batch_size': 64, 'no_neurons': 64} 0.11944110598415136
{'learning_rate': 0.005, 'batch_size': 64, 'no_neurons': 64} 0.28305288404226303
{'learning_rate': 0.005, 'batch_size': 64, 'no_neurons': 64} 0.28305288404226303
{'learning_rate': 0.005, 'batch_size': 64, 'no_neurons': 64} 0.28305288404226303
{'learning_rate': 0.005, 'batch_size': 64, 'no_neurons': 64} 0.28305288404226303
{'learning_rate': 0.005, 'batch_size': 64, 'no_neurons': 64} 0.28305288404226303
{'learning_rate': 0.005, 'batch_size': 64, 'no_neurons': 64} 0.28305288404226303
{'learning_rate': 0.005, 'batch_size': 64, 'no_neurons': 64} 0.28305288404226303
{'learning_rate': 0.005, 'batch_size': 64, 'no_neurons': 64} 0.28305288404226303
{'learning_rate': 0.005, 'batch_size': 64, 'no_neurons': 64} 0.28305288404226303
{'learning_rate': 0.005, 'batch_size': 64, 'no_neurons': 64} 0.28305288404226303


<__main__.WeightedRandomSearch at 0x23a32821750>

# Hyperparameter Tuning

## Simple_CNN model on original dataset (without augmentation)

1. Learning rate + batch size

In [16]:
my_net = Net_wrapper(model=Simple_CNN, max_epochs=5)
param_grid = {'learning_rate': [0.0001, 0.0005, 0.001, 0.005, 0.01], 'batch_size': [8, 16, 32, 64, 128]}
gs = GridSearch(net=my_net, param_grid=param_grid, step_by_step=True, verbose=1)
gs = gs.fit(train_dataset, val_dataset)

Current parameter: learning_rate: 0.0001   val_accuracy: 0.5830
Best parameters till now:{'learning_rate': 0.0001}
Current parameter: learning_rate: 0.0005   val_accuracy: 0.6887
Best parameters till now:{'learning_rate': 0.0005}
Current parameter: learning_rate: 0.001   val_accuracy: 0.6761
Best parameters till now:{'learning_rate': 0.0005}
Current parameter: learning_rate: 0.005   val_accuracy: 0.5991
Best parameters till now:{'learning_rate': 0.0005}
Current parameter: learning_rate: 0.01   val_accuracy: 0.1000
Best parameters till now:{'learning_rate': 0.0005}
0.0005
Current parameter: batch_size: 8   val_accuracy: 0.6954
Best parameters till now:{'learning_rate': 0.0005, 'batch_size': 8}
Current parameter: batch_size: 16   val_accuracy: 0.6823
Best parameters till now:{'learning_rate': 0.0005, 'batch_size': 8}
Current parameter: batch_size: 32   val_accuracy: 0.6785
Best parameters till now:{'learning_rate': 0.0005, 'batch_size': 8}
Current parameter: batch_size: 64   val_accuracy

2. Dropout, weight_decay

In [20]:
my_net = Net_wrapper(model=Simple_CNN, max_epochs=5, learning_rate=0.0005, batch_size=8)
param_grid = {'dr': [nn.Dropout(0), nn.Dropout(0.1), nn.Dropout(0.25), nn.Dropout(0.4), nn.Dropout(0.5)], 'weight_decay': [0, 1e-5, 1e-4, 1e-3]}
gs = GridSearch(net=my_net, param_grid=param_grid, step_by_step=True, verbose=1)
gs = gs.fit(train_dataset, val_dataset)

Current parameter: dr: Dropout(p=0, inplace=False)   val_accuracy: 0.7061
Best parameters till now:{'dr': Dropout(p=0, inplace=False)}
Current parameter: dr: Dropout(p=0.1, inplace=False)   val_accuracy: 0.6770
Best parameters till now:{'dr': Dropout(p=0, inplace=False)}
Current parameter: dr: Dropout(p=0.25, inplace=False)   val_accuracy: 0.6614
Best parameters till now:{'dr': Dropout(p=0, inplace=False)}
Current parameter: dr: Dropout(p=0.4, inplace=False)   val_accuracy: 0.6550
Best parameters till now:{'dr': Dropout(p=0, inplace=False)}
Current parameter: dr: Dropout(p=0.5, inplace=False)   val_accuracy: 0.6351
Best parameters till now:{'dr': Dropout(p=0, inplace=False)}
Current parameter: weight_decay: 0   val_accuracy: 0.7047
Best parameters till now:{'dr': Dropout(p=0, inplace=False), 'weight_decay': 0}
Current parameter: weight_decay: 1e-05   val_accuracy: 0.6925
Best parameters till now:{'dr': Dropout(p=0, inplace=False), 'weight_decay': 0}
Current parameter: weight_decay: 0.0

3. Net architecture (kernel size, number of filters)

In [10]:
my_net = Net_wrapper(model=Simple_CNN, max_epochs=5, learning_rate=0.0005, batch_size=8)
param_grid = {'number_of_filters0': [32, 64, 128, 256], 'number_of_filters1': [32, 64, 128, 256]}
gs = GridSearch(net=my_net, param_grid=param_grid, step_by_step=True, verbose=1)
gs = gs.fit(train_dataset, val_dataset)

Current parameter: number_of_filters0: 32   val_accuracy: 0.7007
Best parameters till now:{'number_of_filters0': 32}
Current parameter: number_of_filters0: 64   val_accuracy: 0.6869
Best parameters till now:{'number_of_filters0': 32}
Current parameter: number_of_filters0: 128   val_accuracy: 0.7005
Best parameters till now:{'number_of_filters0': 32}
Current parameter: number_of_filters0: 256   val_accuracy: 0.6403
Best parameters till now:{'number_of_filters0': 32}
Current parameter: number_of_filters1: 32   val_accuracy: 0.6976
Best parameters till now:{'number_of_filters0': 32, 'number_of_filters1': 32}
Current parameter: number_of_filters1: 64   val_accuracy: 0.7197
Best parameters till now:{'number_of_filters0': 32, 'number_of_filters1': 64}
Current parameter: number_of_filters1: 128   val_accuracy: 0.7115
Best parameters till now:{'number_of_filters0': 32, 'number_of_filters1': 64}
Current parameter: number_of_filters1: 256   val_accuracy: 0.7270
Best parameters till now:{'number_

TypeError: CNN_3_class.__init__() got an unexpected keyword argument 'kernel_size1'

4. Number of neurons

In [19]:
my_net = Net_wrapper(model=Simple_CNN, max_epochs=5, learning_rate=0.0005, batch_size=8, 
                     number_of_filters0=32, number_of_filters1=256)
param_grid = {'kernel_size1': [3,4,5], 'kernel_size2':[3,4,5], 'no_neurons': [32, 64, 128, 256]}
gs = GridSearch(net=my_net, param_grid=param_grid, step_by_step=True, verbose=1)
gs = gs.fit(train_dataset, val_dataset)

5. Num of epochs (run many epochs and stop when validation accuracy starts to decrease / validation loss starts to increase)

In [43]:
my_net = Net_wrapper(model=Simple_CNN, max_epochs=20, learning_rate=0.0005, batch_size=8, verbose=1)
my_net.score(train_dataset, val_dataset)

Epoch 1/20


100%|██████████| 5000/5000 [02:22<00:00, 35.16it/s]


train_loss:1.367179244017601, train_acc: 0.5074


100%|██████████| 1250/1250 [00:19<00:00, 62.74it/s]


val_loss:1.1311261782169342, val_acc: 0.6056
Epoch 2/20


100%|██████████| 5000/5000 [02:23<00:00, 34.88it/s]


train_loss:1.0221099519059063, train_acc: 0.637175


100%|██████████| 1250/1250 [00:18<00:00, 66.59it/s]


val_loss:0.993101014137268, val_acc: 0.6527
Epoch 3/20


100%|██████████| 5000/5000 [02:30<00:00, 33.33it/s]


train_loss:0.8751001523837447, train_acc: 0.690425


100%|██████████| 1250/1250 [00:21<00:00, 58.75it/s]


val_loss:0.9432699997067452, val_acc: 0.6715
Epoch 4/20


100%|██████████| 5000/5000 [02:15<00:00, 37.02it/s]


train_loss:0.7678421761438251, train_acc: 0.73045


100%|██████████| 1250/1250 [00:16<00:00, 73.90it/s]


val_loss:0.901851232546568, val_acc: 0.6923
Epoch 5/20


100%|██████████| 5000/5000 [02:03<00:00, 40.53it/s]


train_loss:0.6812195054091513, train_acc: 0.76025


100%|██████████| 1250/1250 [00:17<00:00, 70.63it/s]


val_loss:0.8833921184927225, val_acc: 0.6964
Epoch 6/20


100%|██████████| 5000/5000 [02:06<00:00, 39.62it/s]


train_loss:0.6047580602146685, train_acc: 0.7869


100%|██████████| 1250/1250 [00:18<00:00, 69.33it/s]


val_loss:0.9163684277355671, val_acc: 0.6984
Epoch 7/20


100%|██████████| 5000/5000 [02:24<00:00, 34.71it/s]


train_loss:0.5349723679063841, train_acc: 0.8111


100%|██████████| 1250/1250 [00:17<00:00, 73.16it/s]


val_loss:0.9288069393068552, val_acc: 0.7
Epoch 8/20


100%|██████████| 5000/5000 [02:08<00:00, 38.93it/s]


train_loss:0.46668085316661745, train_acc: 0.836475


100%|██████████| 1250/1250 [00:16<00:00, 74.35it/s]


val_loss:0.9727133320122957, val_acc: 0.7042
Epoch 9/20


100%|██████████| 5000/5000 [02:14<00:00, 37.22it/s]


train_loss:0.40310598198021763, train_acc: 0.859075


100%|██████████| 1250/1250 [00:16<00:00, 75.02it/s]


val_loss:1.0612843801558018, val_acc: 0.6872
Epoch 10/20


100%|██████████| 5000/5000 [02:31<00:00, 32.94it/s]


train_loss:0.34871114134127273, train_acc: 0.87725


100%|██████████| 1250/1250 [00:27<00:00, 45.46it/s]


val_loss:1.1207977756097913, val_acc: 0.6886
Epoch 11/20


100%|██████████| 5000/5000 [02:43<00:00, 30.67it/s]


train_loss:0.21719751578164578, train_acc: 0.927925


100%|██████████| 1250/1250 [00:21<00:00, 58.99it/s]


val_loss:1.1960707105569541, val_acc: 0.7002
Epoch 12/20


100%|██████████| 5000/5000 [02:29<00:00, 33.53it/s]


train_loss:0.17249927212976035, train_acc: 0.944875


100%|██████████| 1250/1250 [00:23<00:00, 54.08it/s]


val_loss:1.335640177924186, val_acc: 0.7
Epoch 13/20


100%|██████████| 5000/5000 [02:25<00:00, 34.45it/s]


train_loss:0.13608480416923704, train_acc: 0.9582


100%|██████████| 1250/1250 [00:19<00:00, 62.87it/s]


val_loss:1.465663416982442, val_acc: 0.6915
Epoch 14/20


100%|██████████| 5000/5000 [02:25<00:00, 34.36it/s]


train_loss:0.10815417145311804, train_acc: 0.968275


100%|██████████| 1250/1250 [00:19<00:00, 65.38it/s]


val_loss:1.598927933447808, val_acc: 0.6911
Epoch 15/20


100%|██████████| 5000/5000 [02:28<00:00, 33.61it/s]


train_loss:0.08646666240554222, train_acc: 0.9746


100%|██████████| 1250/1250 [00:17<00:00, 71.20it/s]


val_loss:1.7931744132095948, val_acc: 0.6888
Epoch 16/20


  7%|▋         | 356/5000 [00:11<02:25, 31.96it/s]


KeyboardInterrupt: 

# Complex_CNN model hyperparameter tuning

Learning rate and batch size

In [None]:
my_net = Net_wrapper(model=Complex_CNN, max_epochs=4)
param_grid = {'learning_rate': [0.0001, 0.0005, 0.001], 'batch_size': [64, 32, 8]}
gs = GridSearch(my_net, param_grid=param_grid, step_by_step=True, verbose=1)
gs.fit(train_dataset, val_dataset)