# Tune Tutorial

In [None]:
from helper import load_data
import numpy as np

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator

%load_ext autoreload
%autoreload 2

We want to start off by creating a model:

In [None]:
def make_model(args):
    num_classes = 10
    
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(args.kernel1, args.kernel1),
                     activation='relu', input_shape=(28, 28, 1)))
    model.add(Conv2D(64, (args.kernel2, args.kernel2), activation='relu'))
    model.add(MaxPooling2D(pool_size=(args.poolsize, args.poolsize)))
    model.add(Dropout(args.dropout1))
    model.add(Flatten())
    model.add(Dense(args.hidden, activation='relu'))
    model.add(Dropout(args.dropout2))
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.SGD(
                      lr=args.lr, momentum=args.momentum),
                  metrics=['accuracy'])
    return model

def train_mnist(args):
    data_generator = load_data()
    model = make_model(args)
    for x_batch, y_batch in data_generator:
        model.fit(x_batch, y_batch, verbose=0)
    model.save_weights("./weights.h5")

*Then*, we want to train this model (try out default hyperparameters)

Now, let's try running a simple search to find the best hyperparameters

In [None]:
import argparse
parser = argparse.ArgumentParser(description='Keras MNIST Example')
parser.add_argument('--steps', type=float, default=0.01, metavar='LR',
                    help='learning rate (default: 0.01)')
parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
                    help='learning rate (default: 0.01)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                    help='SGD momentum (default: 0.5)')
parser.add_argument('--kernel1', type=int, default=3,
                    help='Size of first kernel (default: 3)')
parser.add_argument('--kernel2', type=int, default=3,
                    help='Size of second kernel (default: 3)')
parser.add_argument('--poolsize', type=int, default=2,
                    help='Size of Pooling (default: 2)')
parser.add_argument('--dropout1', type=float, default=0.25,
                    help='Size of first kernel (default: 0.25)')
parser.add_argument('--hidden', type=int, default=128,
                    help='Size of Hidden Layer (default: 128)')
parser.add_argument('--dropout2', type=float, default=0.5,
                    help='Size of first kernel (default: 0.5)')

args = parser.parse_known_args()[0]

In [None]:
train_mnist(args)

Now, let's use this machine with CPUs and multiplex our training to find the best parameters using a single machine.

In [None]:
import ray
from ray import tune

In [None]:
def train_mnist_tune(config, reporter):
    data_generator = load_data()
    model = make_model(args)
    for x_batch, y_batch in data_generator:
        result = model.fit(x_batch, y_batch, verbose=0)
        reporter(mean_accuracy=result.history["acc"][0])
    model.save_weights("./weights_tune.h5")

In [None]:
ray.init(ignore_reinit_error=True)

configuration = tune.Experiment(
    "experiment_name",
    stop={"mean_accuracy": 0.99},
    run=train_mnist_tune,
    config={
        "lr": lambda spec: np.random.uniform(0.001, 0.1),
        "momentum": lambda spec: np.random.uniform(0.1, 0.9),
        "hidden": lambda spec: np.random.randint(32, 512),
        "dropout1": lambda spec: np.random.uniform(0.2, 0.8),
    }
)
tune.run_experiments(configuration)

## Try using a scheduler

In [None]:
from ray.tune.schedulers import AsyncHyperBandScheduler
hyperband = AsyncHyperBandScheduler(
    time_attr="training_iteration",
    reward_attr="mean_accuracy")
configuration.spec["num_samples"] = 10

In [None]:
tune.run_experiments(configuration, scheduler=hyperband)

# Try using a search algorithm

In [None]:
from ray.tune.suggest import HyperOptSearch
space = {
    "lr": hp.uniform("lr", 0.001, 0.1),
    "momentum": hp.uniform("momentum", 0.1, 0.9),
    "hidden": hp.quniform("hidden", 32, 512, 1),
    "dropout1": hp.uniform("dropout1", 0.2, 0.8),
}
hyperopt = HyperOptSearch(space, max_concurrent=4, reward_attr="neg_mean_loss")
tune.run_experiments(configuration, search_alg=hyperopt, scheduler=hyperband)

## (Optional) Fault Tolerance

In [None]:
class Model(tune.Trainable):
    def _setup(self):
        vars(args).update(self.config) #add this
        self.model = make_model(args)
        self.data_generator = load_data()
    
    def _train(self):
        x_train, y_train = self.data_generator.next()
        result = self.model.fit(x_batch, y_batch, verbose=0)
        return {"mean_accuracy": result.history["acc"][0]}
    
    def _save(self, checkpoint_dir):
        checkpoint_path = os.path.join(checkpoint_dir, "weights.h5")
        self.model.save_weights(checkpoint_path)
    
    def _restore(self, checkpoint_path):
        self.model.load_weights(checkpoint_path)

In [None]:
ray.init(ignore_reinit_error=True)
configuration = tune.Experiment(
    "experiment_name",
    stop={"mean_accuracy": 0.99},
    run=Model,
    config={
        "lr": lambda spec: np.random.uniform(0.001, 0.1),
        "momentum": lambda spec: np.random.uniform(0.1, 0.9),
        "hidden": lambda spec: np.random.randint(32, 512),
        "dropout1": lambda spec: np.random.uniform(0.2, 0.8),
    },
    checkpoint_at_end=True
)
tune.run_experiments(configuration)