## Imports

In [261]:
import gc
import logging
import math
import random
import warnings
from collections import defaultdict
from importlib import reload
from warnings import catch_warnings, simplefilter

import matplotlib.pyplot as plt
import numpy as np
import torch
from scipy.stats import qmc, norm
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.metrics import accuracy_score
from torch.autograd import Variable
from torch.nn import *
from torch.optim import Adam
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
from tqdm import tqdm

reload(logging)
logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', level=logging.INFO, datefmt='%I:%M:%S')

## Neural Network Classes

In [262]:
class SimpleCNN(Module):
    def __init__(self, config, verbose):
        super(SimpleCNN, self).__init__()
        img_dim = config['shape'][2]
        channels = config['shape'][1]
        conv_layers = np.array([
            [LazyConv2d(channels * 3 ** (depth + 1), kernel_size=3, stride=1, padding=1),
             LazyBatchNorm2d(),
             ReLU(inplace=True),
             MaxPool2d(kernel_size=2, stride=2)]
            for depth in range(config['factors']['nr_conv_layers'])
        ]).flatten().tolist()
        linear_layers = [
            LazyLinear(max(math.floor(channels * img_dim ** 2 / 2 ** (depth + 1)), channels)) for depth in
            range(config['factors']['nr_linear_layers'] - 1)
        ]
        self.layers = Sequential(
            *conv_layers,
            Flatten(start_dim=1),
            *linear_layers,
            LazyLinear(config['labels'])
        )
        if verbose:
            logging.info(self.layers)

    def forward(self, x):
        x = self.layers(x)
        return x


class DensePolyNN(Module):
    def __init__(self, config, verbose):
        super(DensePolyNN, self).__init__()

        img_dim = config['shape'][2]
        channels = config['shape'][1]
        linear_layers = [
            LazyLinear(max(math.floor(channels * img_dim ** 2 / 2 ** (depth + 1)), channels)) for depth in
            range(config['factors']['nr_linear_layers'] - 1)
        ]

        self.layers = Sequential(
            Flatten(start_dim=1),
            *linear_layers,
            LazyLinear(config['labels'])
        )
        if verbose:
            logging.info(self.layers)

    def forward(self, x):
        x = self.layers(x)
        return x


class DenseLinearNN(Module):
    def __init__(self, config, verbose):
        super(DenseLinearNN, self).__init__()

        img_dim = config['shape'][2]
        channels = config['shape'][1]
        linear_layers = [
            LazyLinear(max(math.floor(
                channels * img_dim ** 2 - (depth + 1) * (
                        channels * img_dim ** 2 / config['factors']['nr_linear_layers'])),
                channels)) for depth in range(config['factors']['nr_linear_layers'] - 1)]

        self.layers = Sequential(
            Flatten(start_dim=1),
            *linear_layers,
            LazyLinear(config['labels'])
        )
        if verbose:
            logging.info(self.layers)

    def forward(self, x):
        x = self.layers(x)
        return x

## Model operations

In [263]:
def create_model(net, config, verbose):
    model = net(config=config, verbose=verbose)
    optimizer = Adam(model.parameters(), lr=0.005)
    criterion = CrossEntropyLoss()
    if torch.cuda.is_available():
        logging.info('Using GPU')
        model = model.cuda()
        criterion = criterion.cuda()
    return model, optimizer, criterion


def train(net, config, batches, tag, verbose=False):
    logging.info(f'Using {config["factors"]}')
    logging.info(f'Constructing {tag}')
    model, optimizer, criterion = create_model(net, config, verbose)
    train_losses = []
    logging.info('Training the model')
    for _ in tqdm(range(config['epochs'])):
        for batch_id, batch in batches:
            train_x = batch[0]
            train_y = batch[1]
            model.train()
            x_train, y_train = Variable(train_x), Variable(train_y)
            if torch.cuda.is_available():
                x_train = x_train.cuda()
                y_train = y_train.cuda()

            # clearing the Gradients of the model parameters
            optimizer.zero_grad()

            # prediction for training set
            output_train = model(x_train)

            # computing the training loss
            loss_train = criterion(output_train, y_train)
            train_losses.append(loss_train.item())

            # computing the updated weights of all the model parameters
            loss_train.backward()
            optimizer.step()
    if verbose:
        plt.plot(train_losses, label='Training loss')
        plt.yscale('log')
        plt.legend()
        plt.show()
    return model, tag


def test(models, test_x, test_y):
    logging.info('Generating predictions and calculating accuracy')
    accuracies = []
    for model, tag in models:
        with torch.no_grad():
            output = model(test_x.cuda())

        softmax = torch.exp(output).cpu()
        prob = list(softmax.numpy())
        predictions = np.argmax(prob, axis=1)
        accuracy = accuracy_score(test_y, predictions)
        accuracies.append((tag, accuracy))
        logging.info(f'{tag}: {accuracy=}')
    return accuracies


def predict(model, test_x, predictions):
    logging.info('Generating predictions')
    with torch.no_grad():
        output = model(test_x.cuda())

    softmax = torch.exp(output).cpu()
    prob = list(softmax.numpy())
    predictions['label'] = np.argmax(prob, axis=1)
    return predictions

## Training strategies

In [264]:
def random_strategy(factors_bounds, random_function, tag):
    logging.info('Applying the random strategy...')

    factors = []
    lowers = []
    uppers = []
    is_ints = []
    for factor, (low, up, is_int) in factors_bounds.items():
        factors.append(factor)
        lowers.append(low)
        uppers.append(up)
        is_ints.append(is_int)

    return random_function(factors, 1, lowers, uppers, is_ints)[0]

# def run_grid_experiments(args):
#     logging.info('Applying a grid search strategy...')
#     return args

def bayesian_pi(factor_bounds, random_function, tag):
    logging.info('Applying the bayesian strategy with probability of improvement...')

    factors = []
    lowers = []
    uppers = []
    is_ints = []
    for factor, (low, up, is_int) in factor_bounds.items():
        factors.append(factor)
        lowers.append(low)
        uppers.append(up)
        is_ints.append(is_int)

    if tag not in regression_models:
        random_factors = random_function(factors, 1, lowers, uppers, is_ints)[0]
        regression_models[tag] = {
            "model": GaussianProcessRegressor(),
            "X": [list(random_factors.values())],
            "y": []
        }
        return random_factors


    regression_models[tag]['model'].fit(regression_models[tag]['X'], regression_models[tag]['y'])
    random_samples = random_function(factors, 1000, lowers, uppers, is_ints)
    candidate_sample = best_probability_of_improvement(factors, random_samples, regression_models[tag]['model'], max(regression_models[tag]['y']))
    regression_models[tag]['X'].append(list(candidate_sample.values()))
    return candidate_sample

    # X.append(acquisition_function(candidate_samples, model, max(y)))
    # random.seed(INITIAL_SEED + i)
    # # TODO: Make compatible for non-numeric values!
    # factor_values = {key: value for key, value in zip(factors, X[i])}
    # set_variables(args, {**factor_values, **constants}, i)
    # acc, loss = __run_deberta(args)
    # results.append(f'{i}\t{acc}\t{loss}\t{factor_values}')
    # y.append(acc)
    # print(f'Best accuracy found for bayesian: {max(y)}')
    # return None

## Random functions

In [265]:
def get_quasi_random_samples(factors, length, lower, upper, is_int, sampler=qmc.Halton):
    sampler = sampler(len(factors))
    sample = sampler.random(length)
    return qmc.scale(sample, lower, upper)


def get_random_samples(factors, length, lower, upper, is_int):
    return [{factors[i]: round(random.uniform(lower[i], upper[i])) if is_int[i] else random.uniform(lower[i], upper[i]) for i in range(len(factors))} for _ in range(length)]

## Acquisition functions

In [266]:
def best_probability_of_improvement(factors, samples, model, y_best, maximize=True):
    best = -1
    x_next = None
    for x in samples:
        x = list(x.values())
        with catch_warnings():
            # ignore generated warnings
            simplefilter("ignore")
            mu_, std_ = model.predict([x], return_std=True)
            pi = norm.cdf((mu_ - y_best) / (std_ + 1e-9)) if maximize else norm.cdf((y_best - mu_) / (std_ + 1e-9))
            if pi > best:
                best = pi
                x_next = x
    return {factors[i]: x_next[i] for i in range(len(factors))}

## Configuration

In [267]:
config = {
    'experiments': 20,
    'epochs': 10,
    'batch_size': 512,
    'dataset': datasets.FashionMNIST,
    'random_function': get_random_samples,
    'optimization_strategy': bayesian_pi,
    'factor_bounds': {
        'nr_linear_layers': (1, 20, True),
        'nr_conv_layers': (1, 4, True),
    }
}

## Load data

In [268]:
logging.info('Loading data...')
training_data = config['dataset'](
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)
train_dataloader = DataLoader(training_data, batch_size=config['batch_size'], shuffle=True)
batches = [(batch_id, batch) for batch_id, batch in enumerate(train_dataloader)]
logging.info('Training data loaded')

test_data = config['dataset'](
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)
test_dataloader = DataLoader(test_data, batch_size=len(test_data), shuffle=False)
test_x, test_y = next(iter(test_dataloader))
logging.info('Test data loaded')
config['shape'] = batches[0][1][0].shape
config['labels'] = len(training_data.classes)

05:43:56 INFO: Loading data...
05:44:01 INFO: Training data loaded
05:44:01 INFO: Test data loaded


## Train model

In [269]:
seed = 42
torch.manual_seed(seed)
random.seed(seed)
accuracies = defaultdict(list)
regression_models = {}
for experiment in range(config['experiments']):
    logging.info(f'\n\n#### RUNNING EXPERIMENT {experiment + 1}/{config["experiments"]} ####\n')
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        models = []

        tag = 'DensePolyNN'
        config['factors'] = config['optimization_strategy'](config['factor_bounds'], config['random_function'], tag)
        model = train(DensePolyNN, config, batches, tag)
        models.append(model)

        # tag = 'DenseLinearNN'
        # config['factors'] = config['optimization_strategy'](config['factor_bounds'], config['random_function'], tag)
        # model = train(DenseLinearNN, config, batches, tag)
        # models.append(model)

        # config['factors'] = config['optimization_strategy'](config['factor_bounds'], config['random_function'], tag)
        # model = train(SimpleCNN, config, batches, 'SimpleCNN')
        # models.append(model)

    accs = test(models, test_x, test_y)
    for tag, acc in accs:
        if tag in regression_models:
            regression_models[tag]['y'].append(acc)
        accuracies[tag].append(acc)
    gc.collect()
    torch.cuda.empty_cache()
accuracies

05:44:01 INFO: 

#### RUNNING EXPERIMENT 1/20 ####

05:44:01 INFO: Applying the bayesian strategy with probability of improvement...
05:44:01 INFO: Using {'nr_linear_layers': 13, 'nr_conv_layers': 1}
05:44:01 INFO: Constructing DensePolyNN
05:44:01 INFO: Using GPU
05:44:01 INFO: Training the model
100%|██████████| 10/10 [00:04<00:00,  2.37it/s]
05:44:06 INFO: Generating predictions and calculating accuracy
05:44:06 INFO: DensePolyNN: accuracy=0.4735
05:44:06 INFO: 

#### RUNNING EXPERIMENT 2/20 ####

05:44:06 INFO: Applying the bayesian strategy with probability of improvement...
05:44:06 INFO: Using {'nr_linear_layers': 13, 'nr_conv_layers': 1}
05:44:06 INFO: Constructing DensePolyNN
05:44:06 INFO: Using GPU
05:44:06 INFO: Training the model
100%|██████████| 10/10 [00:03<00:00,  2.69it/s]
05:44:10 INFO: Generating predictions and calculating accuracy
05:44:10 INFO: DensePolyNN: accuracy=0.5165
05:44:10 INFO: 

#### RUNNING EXPERIMENT 3/20 ####

05:44:10 INFO: Applying the bayesian str

defaultdict(list,
            {'DensePolyNN': [0.4735,
              0.5165,
              0.4913,
              0.4549,
              0.527,
              0.4981,
              0.4571,
              0.5007,
              0.4281,
              0.4698,
              0.461,
              0.4966,
              0.4891,
              0.4613,
              0.4531,
              0.5052,
              0.4967,
              0.4873,
              0.4893,
              0.515]})