## Imports

In [364]:
import gc
import logging
import math
import random
import warnings
from collections import defaultdict
from datetime import datetime
from importlib import reload
from warnings import catch_warnings, simplefilter

import matplotlib.pyplot as plt
import numpy as np
import torch
from scipy.stats import qmc, norm
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.metrics import accuracy_score
from torch.autograd import Variable
from torch.nn import *
from torch.optim import Adam
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
from tqdm import tqdm

reload(logging)
logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', level=logging.INFO, datefmt='%I:%M:%S')

## Neural Network Classes

In [365]:
class SimpleCNN(Module):
    def __init__(self, config, verbose):
        super(SimpleCNN, self).__init__()
        img_dim = config['shape'][2]
        channels = config['shape'][1]
        conv_layers = np.array([
            [LazyConv2d(channels * 3 ** (depth + 1), kernel_size=3, stride=1, padding=1),
             LazyBatchNorm2d(),
             ReLU(inplace=True),
             MaxPool2d(kernel_size=2, stride=2)]
            for depth in range(config['factors'].get('nr_conv_layers', 2))
        ]).flatten().tolist()
        linear_layers = [
            LazyLinear(max(math.floor(channels * img_dim ** 2 / 2 ** (depth + 1)), channels)) for depth in
            range(config['factors'].get('nr_linear_layers', 3) - 1)
        ]
        self.layers = Sequential(
            *conv_layers,
            Flatten(start_dim=1),
            *linear_layers,
            LazyLinear(config['labels'])
        )
        if verbose:
            logging.info(self.layers)

    def forward(self, x):
        x = self.layers(x)
        return x


class DensePolyNN(Module):
    def __init__(self, config, verbose):
        super(DensePolyNN, self).__init__()

        img_dim = config['shape'][2]
        channels = config['shape'][1]
        linear_layers = [
            LazyLinear(max(math.floor(channels * img_dim ** 2 / 2 ** (depth + 1)), channels)) for depth in
            range(config['factors'].get('nr_linear_layers', 3) - 1)
        ]

        self.layers = Sequential(
            Flatten(start_dim=1),
            *linear_layers,
            LazyLinear(config['labels'])
        )
        if verbose:
            logging.info(self.layers)

    def forward(self, x):
        x = self.layers(x)
        return x


class DenseLinearNN(Module):
    def __init__(self, config, verbose):
        super(DenseLinearNN, self).__init__()

        img_dim = config['shape'][2]
        channels = config['shape'][1]
        linear_layers = [
            LazyLinear(max(math.floor(
                channels * img_dim ** 2 - (depth + 1) * (
                        channels * img_dim ** 2 / config['factors'].get('nr_linear_layers', 3))),
                channels)) for depth in range(config['factors'].get('nr_linear_layers', 3) - 1)]

        self.layers = Sequential(
            Flatten(start_dim=1),
            *linear_layers,
            LazyLinear(config['labels'])
        )
        if verbose:
            logging.info(self.layers)

    def forward(self, x):
        x = self.layers(x)
        return x

## Model operations

In [366]:
def create_model(net, config, verbose):
    model = net(config=config, verbose=verbose)
    optimizer = Adam(model.parameters(),
                     lr=config['factors'].get('learning_rate', 1e-3),
                     betas=(config['factors'].get('beta1', 0.9), config.get('beta2', 0.999)),
                     eps=config['factors'].get('epsilon', 1e-8),
                     weight_decay=config['factors'].get('weight_decay', 0)
                     )
    criterion = CrossEntropyLoss()
    if torch.cuda.is_available():
        logging.info('Using GPU')
        model = model.cuda()
        criterion = criterion.cuda()
    return model, optimizer, criterion


def train(net, config, batches, tag, verbose=False):
    logging.info(f'Using {config["factors"]}')
    logging.info(f'Constructing {tag}')
    model, optimizer, criterion = create_model(net, config, verbose)
    train_losses = []
    logging.info('Training the model')
    for _ in tqdm(range(config['epochs'])):
        for batch_id, batch in batches:
            train_x = batch[0]
            train_y = batch[1]
            model.train()
            train_x, train_y = Variable(train_x), Variable(train_y)
            if torch.cuda.is_available():
                train_x = train_x.cuda()
                train_y = train_y.cuda()

            # clearing the Gradients of the model parameters
            optimizer.zero_grad()

            # prediction for training set
            output_train = model(train_x)

            # computing the training loss
            loss_train = criterion(output_train, train_y)
            train_losses.append(loss_train.item())

            # computing the updated weights of all the model parameters
            loss_train.backward()
            optimizer.step()
    if verbose:
        plt.plot(train_losses, label='Training loss')
        plt.yscale('log')
        plt.legend()
        plt.show()
    return model, tag, config['factors']


def test(models, test_x, test_y):
    logging.info('Generating predictions and calculating accuracy')
    accuracies = []
    for model, tag, factors in models:
        with torch.no_grad():
            output = model(test_x.cuda())

        softmax = torch.exp(output).cpu()
        prob = list(softmax.numpy())
        predictions = np.argmax(prob, axis=1)
        accuracy = accuracy_score(test_y, predictions)
        accuracies.append((tag, accuracy, factors))
        logging.info(f'{tag}: {accuracy=}')
    return accuracies


def predict(model, test_x, predictions):
    logging.info('Generating predictions')
    with torch.no_grad():
        output = model(test_x.cuda())

    softmax = torch.exp(output).cpu()
    prob = list(softmax.numpy())
    predictions['label'] = np.argmax(prob, axis=1)
    return predictions

## Training strategies

In [367]:
def random_strategy(factors_bounds, random_function, tag):
    logging.info('Applying the random strategy...')

    factors = []
    lowers = []
    uppers = []
    is_ints = []
    for factor, (low, up, is_int) in factors_bounds.items():
        factors.append(factor)
        lowers.append(low)
        uppers.append(up)
        is_ints.append(is_int)

    return random_function(factors, 1, lowers, uppers, is_ints)[0]


# def run_grid_experiments(args):
#     logging.info('Applying a grid search strategy...')
#     return args

def bayesian_pi(factor_bounds, random_function, tag):
    logging.info('Applying the bayesian strategy with probability of improvement...')

    factors = []
    lowers = []
    uppers = []
    is_ints = []
    for factor, (low, up, is_int) in factor_bounds.items():
        factors.append(factor)
        lowers.append(low)
        uppers.append(up)
        is_ints.append(is_int)

    # Return a random sample and initialize the regression model if it does not exist
    if tag not in regression_models:
        random_factors = random_function(factors, 1, lowers, uppers, is_ints)[0]
        regression_models[tag] = {
            "model": GaussianProcessRegressor(),
            "X": [],
            "y": []
        }
        return random_factors

    # Generate 1000 random samples and return the one with the highest probability of improvement, update the regression model
    regression_models[tag]['model'].fit(regression_models[tag]['X'], regression_models[tag]['y'])
    random_samples = random_function(factors, 1000, lowers, uppers, is_ints)
    candidate_sample = best_probability_of_improvement(factors, random_samples, regression_models[tag]['model'],
                                                       max(regression_models[tag]['y']))
    return candidate_sample

## Random functions

In [368]:
def get_quasi_random_samples(factors, length, lower, upper, is_int, sampler=qmc.Halton):
    sampler = sampler(len(factors))
    sample = sampler.random(length)
    return qmc.scale(sample, lower, upper)


def get_random_samples(factors, length, lower, upper, is_int):
    return [
        {factors[i]: round(random.uniform(lower[i], upper[i])) if is_int[i] else random.uniform(lower[i], upper[i]) for
         i in range(len(factors))} for _ in range(length)]

## Acquisition functions

In [369]:
def best_probability_of_improvement(factors, samples, model, y_best, maximize=True):
    best = -1
    x_next = None
    for x in samples:
        x = list(x.values())
        with catch_warnings():
            # ignore generated warnings
            simplefilter("ignore")
            mu_, std_ = model.predict([x], return_std=True)
            pi = norm.cdf((mu_ - y_best) / (std_ + 1e-9)) if maximize else norm.cdf((y_best - mu_) / (std_ + 1e-9))
            if pi > best:
                best = pi
                x_next = x
    return {factors[i]: x_next[i] for i in range(len(factors))}

## Configuration

In [370]:
# Configurable parameters:
# nr_linear_layers: default 3
# nr_conv_layers: default 2
# learning_rate: default 1e-3
# beta1: default 0.9
# beta2: default 0.999
# epsilon: default 1e-8
# weight_decay: default 0
config = {
    'experiments': 128,
    'repetitions': 3,
    'epochs': 10,
    'batch_size': 512,
    'dataset': datasets.FashionMNIST,
    'random_function': get_random_samples,
    'optimization_strategy': bayesian_pi,
    'factor_bounds': {
        'nr_linear_layers': (1, 15, True),
        'learning_rate': (1e-5, 5e-3, False),
        'beta1': (0.4, 1, False),
        'beta2': (0.9, 1, False),
        'epsilon': (1e-9, 1e-7, False),
        'weight_decay': (0, 0.1, False)
    }
}

## Load data

In [371]:
logging.info('Loading data...')
training_data = config['dataset'](
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)
train_dataloader = DataLoader(training_data, batch_size=config['batch_size'], shuffle=True)
batches = [(batch_id, batch) for batch_id, batch in enumerate(train_dataloader)]
logging.info('Training data loaded')

test_data = config['dataset'](
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)
test_dataloader = DataLoader(test_data, batch_size=len(test_data), shuffle=False)
test_x, test_y = next(iter(test_dataloader))
logging.info('Test data loaded')
config['shape'] = batches[0][1][0].shape
config['labels'] = len(training_data.classes)

03:26:20 INFO: Loading data...
03:26:25 INFO: Training data loaded
03:26:26 INFO: Test data loaded


## Train models

In [372]:
seed = 43
torch.manual_seed(seed)
random.seed(seed)
results = defaultdict(list)
regression_models = {}
####################
nets_to_train = [DensePolyNN, DenseLinearNN, SimpleCNN]
####################
for experiment in range(config['experiments']):
    logging.info(f'\n\n#### RUNNING EXPERIMENT {experiment + 1}/{config["experiments"]} ####\n')
    models = []
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        for net in nets_to_train:
            config['factors'] = config['optimization_strategy'](config['factor_bounds'], config['random_function'], net.__name__)
            for _ in range(config['repetitions']):
                models.append(train(DensePolyNN, config, batches, net.__name__))

    res = test(models, test_x, test_y)
    for tag, acc, used_config in res:
        if tag in regression_models:
            regression_models[tag]['X'].append(list(used_config.values()))
            regression_models[tag]['y'].append(acc)
        results[tag].append((experiment, acc, used_config))
    gc.collect()
    torch.cuda.empty_cache()

03:26:26 INFO: 

#### RUNNING EXPERIMENT 1/128 ####

03:26:26 INFO: Applying the bayesian strategy with probability of improvement...
03:26:26 INFO: Using {'nr_linear_layers': 2, 'learning_rate': 0.003484159369958894, 'beta1': 0.4863599328372166, 'beta2': 0.9462532254829088, 'epsilon': 6.749302964765893e-08, 'weight_decay': 0.07929512716552944}
03:26:26 INFO: Constructing DensePolyNN
03:26:26 INFO: Using GPU
03:26:26 INFO: Training the model
100%|██████████| 10/10 [00:01<00:00,  6.47it/s]
03:26:27 INFO: Using {'nr_linear_layers': 2, 'learning_rate': 0.003484159369958894, 'beta1': 0.4863599328372166, 'beta2': 0.9462532254829088, 'epsilon': 6.749302964765893e-08, 'weight_decay': 0.07929512716552944}
03:26:27 INFO: Constructing DensePolyNN
03:26:27 INFO: Using GPU
03:26:27 INFO: Training the model
100%|██████████| 10/10 [00:01<00:00,  6.59it/s]
03:26:29 INFO: Using {'nr_linear_layers': 2, 'learning_rate': 0.003484159369958894, 'beta1': 0.4863599328372166, 'beta2': 0.9462532254829088, 'eps

## Write Results

In [373]:
file_name = f'exp_results_{datetime.today().strftime("%d%m%Y_%H%M%S")}.csv'
file = open(f'results/{file_name}', 'a')
for tag in results:
    print(f'{tag}:')
    file.write(f'{tag}:\n')
    print(f'experiment,accuracy,{"".join(key + "," for key in results[tag][0][2])[:-1]}')
    file.write(f'experiment,accuracy,{"".join(key + "," for key in results[tag][0][2])[:-1]}\n')
    [print(f'{experiment},{acc},{"".join(str(val) + "," for val in used_config.values())[:-1]}') for experiment, acc, used_config in results[tag]]
    file.writelines([f'{experiment},{acc},{"".join(str(val) + "," for val in used_config.values())[:-1]}\n' for experiment, acc, used_config in results[tag]])
file.close()
logging.info(f'Results logged to: results/{file_name}')

04:11:10 INFO: Results logged to: results/exp_results_22122021_161110.csv


DensePolyNN:
experiment,accuracy,nr_linear_layers,learning_rate,beta1,beta2,epsilon,weight_decay
0,0.2472,2,0.003484159369958894,0.4863599328372166,0.9462532254829088,6.749302964765893e-08,0.07929512716552944
0,0.6735,2,0.003484159369958894,0.4863599328372166,0.9462532254829088,6.749302964765893e-08,0.07929512716552944
0,0.6723,2,0.003484159369958894,0.4863599328372166,0.9462532254829088,6.749302964765893e-08,0.07929512716552944
1,0.8097,2,0.004112943986953834,0.9981253850642523,0.9944071363494472,1.633935940961457e-08,0.01351199873787684
1,0.79,2,0.004112943986953834,0.9981253850642523,0.9944071363494472,1.633935940961457e-08,0.01351199873787684
1,0.8041,2,0.004112943986953834,0.9981253850642523,0.9944071363494472,1.633935940961457e-08,0.01351199873787684
2,0.8375,2,0.0005237824229049575,0.9955869473367974,0.9035232558717647,5.1724460664000074e-08,0.0030435482731696606
2,0.8369,2,0.0005237824229049575,0.9955869473367974,0.9035232558717647,5.1724460664000074e-08,0.0030435482731696606
2