In [23]:
%run ../talktools.py

# Monitoring and Optimization of NNs (continued)

(UCB Datalab AY 128/256; 2021)

Clearly the parameters we choose will correlate with the quality of the network we build and train
<img src="https://miro.medium.com/max/2000/1*XgAAoiQ14z8vz-_PMvEVIA.png">
from: https://towardsdatascience.com/hyperparameter-optimization-with-keras-b82e6364ca53

We'd like a principled way to "search" for the best set of parameters that also protects against overfitting. The simplest thing to do is a *grid search* over all possible parameter combinations. This can be both expensive and somewhat dangerous as this "greedy search" will select for a hyperparameter set that happens to get the best score on the validation set and may not generalize well. The next possibility is to do a *random* search over a finite random set of hyperparameters. This has been shown to be superior to grid searches (see Bergstra & Bengio https://www.jmlr.org/papers/v13/bergstra12a.html)

Below is adapted from https://notebook.community/lukas/ml-class/examples/keras-fashion/sweeps

In [14]:
import datetime
import warnings
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
import wandb
from wandb.keras import WandbCallback

warnings.simplefilter(action='ignore', category=FutureWarning)

fashion_mnist = tf.keras.datasets.fashion_mnist

labels=["T-shirt/top","Trouser","Pullover","Dress","Coat",
        "Sandal","Shirt","Sneaker","Bag","Ankle boot"]

(x_train, y_train),(x_test, y_test) = fashion_mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

img_width=28
img_height=28

# reshape input data
X_train = x_train.reshape(x_train.shape[0], img_width, img_height, 1)
X_test = x_test.reshape(x_test.shape[0], img_width, img_height, 1)

# one hot encode outputs
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
num_classes = y_test.shape[1]

Establish the parameter ranges in a config file like this:

In [18]:
sweep_config = {
    'method': 'random', #grid, random
    'metric': {
      'name': 'accuracy',
      'goal': 'maximize'   
    },
    'parameters': {
        'epochs': {
            'values': [2, 5, 10]
        },
        'batch_size': {
            'values': [256, 128, 64, 32]
        },
        'dropout': {
            'values': [0.1, 0.3, 0.4]
        },
        'conv_layer_size': {
            'values': [16, 32, 64]
        },
        'weight_decay': {
            'values': [0.0005, 0.005]
        },
        'learning_rate': {
            'values': [1e-2, 1e-3, 1e-4]
        },
        'optimizer': {
            'values': ['adam', 'nadam', 'sgd']
        },
        'activation': {
            'values': ['relu', 'selu', 'softmax']
        }
    }
}

Start a *sweep* project

In [19]:
sweep_id = wandb.sweep(sweep_config, project='ucb-datalab-sweep-2021', entity='profjsb')

Create sweep with ID: gfomdoco
Sweep URL: https://wandb.ai/profjsb/ucb-datalab-sweep-2021/sweeps/gfomdoco


In [20]:
def train():
    # Default values for hyper-parameters we're going to sweep over
    config_defaults = {
        'epochs': 5,
        'batch_size': 128,
        'weight_decay': 0.0005,
        'learning_rate': 1e-3,
        'activation': 'relu',
        'optimizer': 'nadam',
        'hidden_layer_size': 128,
        'conv_layer_size': 16,
        'dropout': 0.5,
        'momentum': 0.9,
        'seed': 42
    }

    # Initialize a new wandb run
    wandb.init(config=config_defaults)
    
    # Config is a variable that holds and saves hyperparameters and inputs
    config = wandb.config
    
    # Define the model architecture - This is a simplified version of the VGG19 architecture
    model = tf.keras.models.Sequential()
    
    # Set of Conv2D, Conv2D, MaxPooling2D layers with 32 and 64 filters
    model.add(tf.keras.layers.Conv2D(filters = config.conv_layer_size, kernel_size = (3, 3), padding = 'same', 
                     activation ='relu', input_shape=(img_width, img_height,1)))
    model.add(tf.keras.layers.Dropout(config.dropout))

    model.add(tf.keras.layers.Conv2D(filters = config.conv_layer_size, kernel_size = (3, 3),
                     padding = 'same', activation ='relu'))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))

    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(config.hidden_layer_size, activation ='relu'))

    model.add(tf.keras.layers.Dense(num_classes, activation = "softmax"))

    # Define the optimizer
    if config.optimizer=='sgd':
      optimizer = tf.keras.optimizers.SGD(lr=config.learning_rate, decay=1e-5, momentum=config.momentum, nesterov=True)
    elif config.optimizer=='rmsprop':
      optimizer = tf.keras.optimizers.RMSprop(lr=config.learning_rate, decay=1e-5)
    elif config.optimizer=='adam':
      optimizer = tf.keras.optimizers.Adam(lr=config.learning_rate, beta_1=0.9, beta_2=0.999, clipnorm=1.0)
    elif config.optimizer=='nadam':
      optimizer = tf.keras.optimizers.Nadam(lr=config.learning_rate, beta_1=0.9, beta_2=0.999, clipnorm=1.0)

    model.compile(loss = "categorical_crossentropy", optimizer = optimizer, metrics=['accuracy'])

    model.fit(X_train, y_train, batch_size=config.batch_size,
              epochs=config.epochs,
              validation_data=(X_test, y_test),
              callbacks=[WandbCallback(data_type="image", validation_data=(X_test, y_test), labels=labels),
                          tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)])

In [21]:
wandb.agent(sweep_id, train)

[34m[1mwandb[0m: Agent Starting Run: 6eehnr3i with config:
[34m[1mwandb[0m: 	activation: selu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	conv_layer_size: 32
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005


Train on 60000 samples, validate on 10000 samples
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


VBox(children=(Label(value=' 9.44MB of 9.44MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.99951259289…

0,1
epoch,4.0
loss,0.16829
acc,0.93712
val_loss,0.31556
val_acc,0.8934
_runtime,468.0
_timestamp,1619471091.0
_step,4.0
best_val_loss,0.27864
best_epoch,1.0


0,1
epoch,▁▃▅▆█
loss,█▃▂▁▁
acc,▁▆▇██
val_loss,█▁▃▅▃
val_acc,▁█▇▅▇
_runtime,▁▃▄▆█
_timestamp,▁▃▄▆█
_step,▁▃▅▆█


[34m[1mwandb[0m: Agent Starting Run: ofi3nmsy with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_layer_size: 64
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.005


Train on 60000 samples, validate on 10000 samples
Epoch 1/5

[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


Let's see the results: https://wandb.ai/profjsb/ucb-datalab-sweep-2021